mirror of
https://github.com/rust-lang/rust.git
synced 2026-04-27 18:57:42 +03:00
Rollup merge of #152418 - asder8215:btreemap_merge_optimized, r=Mark-Simulacrum
`BTreeMap::merge` optimized This is an optimized version of https://github.com/rust-lang/rust/pull/151981. See [ACP](https://github.com/rust-lang/libs-team/issues/739#issuecomment-3873487320) for more information on `BTreeMap::merge` does. CC @programmerjake. Let me know what you think of how I'm using `CursorMut` and `IntoIter` here and whether the unsafe code here looks good. I decided to use `ptr::read()` and `ptr::write()` to grab the value from `CursorMut` as `V` than `&mut V`, use it within the `conflict` function, and overwrite the content of conflicting key afterward. I know this needs some polishing, especially with refactoring some redundant looking code in a nicer way, some of which could probably just be public API methods for `CursorMut`. It does pass all the tests that I currently have for `BTreeMap::merge` (inspired from `BTreeMap::append`) though, so that's good.
This commit is contained in:
@@ -1240,6 +1240,162 @@ pub fn append(&mut self, other: &mut Self)
|
||||
)
|
||||
}
|
||||
|
||||
/// Moves all elements from `other` into `self`, leaving `other` empty.
|
||||
///
|
||||
/// If a key from `other` is already present in `self`, then the `conflict`
|
||||
/// closure is used to return a value to `self`. The `conflict`
|
||||
/// closure takes in a borrow of `self`'s key, `self`'s value, and `other`'s value
|
||||
/// in that order.
|
||||
///
|
||||
/// An example of why one might use this method over [`append`]
|
||||
/// is to combine `self`'s value with `other`'s value when their keys conflict.
|
||||
///
|
||||
/// Similar to [`insert`], though, the key is not overwritten,
|
||||
/// which matters for types that can be `==` without being identical.
|
||||
///
|
||||
/// [`insert`]: BTreeMap::insert
|
||||
/// [`append`]: BTreeMap::append
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// #![feature(btree_merge)]
|
||||
/// use std::collections::BTreeMap;
|
||||
///
|
||||
/// let mut a = BTreeMap::new();
|
||||
/// a.insert(1, String::from("a"));
|
||||
/// a.insert(2, String::from("b"));
|
||||
/// a.insert(3, String::from("c")); // Note: Key (3) also present in b.
|
||||
///
|
||||
/// let mut b = BTreeMap::new();
|
||||
/// b.insert(3, String::from("d")); // Note: Key (3) also present in a.
|
||||
/// b.insert(4, String::from("e"));
|
||||
/// b.insert(5, String::from("f"));
|
||||
///
|
||||
/// // concatenate a's value and b's value
|
||||
/// a.merge(b, |_, a_val, b_val| {
|
||||
/// format!("{a_val}{b_val}")
|
||||
/// });
|
||||
///
|
||||
/// assert_eq!(a.len(), 5); // all of b's keys in a
|
||||
///
|
||||
/// assert_eq!(a[&1], "a");
|
||||
/// assert_eq!(a[&2], "b");
|
||||
/// assert_eq!(a[&3], "cd"); // Note: "c" has been combined with "d".
|
||||
/// assert_eq!(a[&4], "e");
|
||||
/// assert_eq!(a[&5], "f");
|
||||
/// ```
|
||||
#[unstable(feature = "btree_merge", issue = "152152")]
|
||||
pub fn merge(&mut self, mut other: Self, mut conflict: impl FnMut(&K, V, V) -> V)
|
||||
where
|
||||
K: Ord,
|
||||
A: Clone,
|
||||
{
|
||||
// Do we have to append anything at all?
|
||||
if other.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// We can just swap `self` and `other` if `self` is empty.
|
||||
if self.is_empty() {
|
||||
mem::swap(self, &mut other);
|
||||
return;
|
||||
}
|
||||
|
||||
let mut other_iter = other.into_iter();
|
||||
let (first_other_key, first_other_val) = other_iter.next().unwrap();
|
||||
|
||||
// find the first gap that has the smallest key greater than or equal to
|
||||
// the first key from other
|
||||
let mut self_cursor = self.lower_bound_mut(Bound::Included(&first_other_key));
|
||||
|
||||
if let Some((self_key, _)) = self_cursor.peek_next() {
|
||||
match K::cmp(self_key, &first_other_key) {
|
||||
Ordering::Equal => {
|
||||
// if `f` unwinds, the next entry is already removed leaving
|
||||
// the tree in valid state.
|
||||
// FIXME: Once `MaybeDangling` is implemented, we can optimize
|
||||
// this through using a drop handler and transmutating CursorMutKey<K, V>
|
||||
// to CursorMutKey<ManuallyDrop<K>, ManuallyDrop<V>> (see PR #152418)
|
||||
if let Some((k, v)) = self_cursor.remove_next() {
|
||||
// SAFETY: we remove the K, V out of the next entry,
|
||||
// apply 'f' to get a new (K, V), and insert it back
|
||||
// into the next entry that the cursor is pointing at
|
||||
let v = conflict(&k, v, first_other_val);
|
||||
unsafe { self_cursor.insert_after_unchecked(k, v) };
|
||||
}
|
||||
}
|
||||
Ordering::Greater =>
|
||||
// SAFETY: we know our other_key's ordering is less than self_key,
|
||||
// so inserting before will guarantee sorted order
|
||||
unsafe {
|
||||
self_cursor.insert_before_unchecked(first_other_key, first_other_val);
|
||||
},
|
||||
Ordering::Less => {
|
||||
unreachable!("Cursor's peek_next should return None.");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// SAFETY: reaching here means our cursor is at the end
|
||||
// self BTreeMap so we just insert other_key here
|
||||
unsafe {
|
||||
self_cursor.insert_before_unchecked(first_other_key, first_other_val);
|
||||
}
|
||||
}
|
||||
|
||||
for (other_key, other_val) in other_iter {
|
||||
loop {
|
||||
if let Some((self_key, _)) = self_cursor.peek_next() {
|
||||
match K::cmp(self_key, &other_key) {
|
||||
Ordering::Equal => {
|
||||
// if `f` unwinds, the next entry is already removed leaving
|
||||
// the tree in valid state.
|
||||
// FIXME: Once `MaybeDangling` is implemented, we can optimize
|
||||
// this through using a drop handler and transmutating CursorMutKey<K, V>
|
||||
// to CursorMutKey<ManuallyDrop<K>, ManuallyDrop<V>> (see PR #152418)
|
||||
if let Some((k, v)) = self_cursor.remove_next() {
|
||||
// SAFETY: we remove the K, V out of the next entry,
|
||||
// apply 'f' to get a new (K, V), and insert it back
|
||||
// into the next entry that the cursor is pointing at
|
||||
let v = conflict(&k, v, other_val);
|
||||
unsafe { self_cursor.insert_after_unchecked(k, v) };
|
||||
}
|
||||
break;
|
||||
}
|
||||
Ordering::Greater => {
|
||||
// SAFETY: we know our self_key's ordering is greater than other_key,
|
||||
// so inserting before will guarantee sorted order
|
||||
unsafe {
|
||||
self_cursor.insert_before_unchecked(other_key, other_val);
|
||||
}
|
||||
break;
|
||||
}
|
||||
Ordering::Less => {
|
||||
// FIXME: instead of doing a linear search here,
|
||||
// this can be optimized to search the tree by starting
|
||||
// from self_cursor and going towards the root and then
|
||||
// back down to the proper node -- that should probably
|
||||
// be a new method on Cursor*.
|
||||
self_cursor.next();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// FIXME: If we get here, that means all of other's keys are greater than
|
||||
// self's keys. For performance, this should really do a bulk insertion of items
|
||||
// from other_iter into the end of self `BTreeMap`. Maybe this should be
|
||||
// a method for Cursor*?
|
||||
|
||||
// SAFETY: reaching here means our cursor is at the end
|
||||
// self BTreeMap so we just insert other_key here
|
||||
unsafe {
|
||||
self_cursor.insert_before_unchecked(other_key, other_val);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Constructs a double-ended iterator over a sub-range of elements in the map.
|
||||
/// The simplest way is to use the range syntax `min..max`, thus `range(min..max)` will
|
||||
/// yield elements from min (inclusive) to max (exclusive).
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use core::assert_matches;
|
||||
use std::iter;
|
||||
use std::ops::Bound::{Excluded, Included, Unbounded};
|
||||
use std::panic::{AssertUnwindSafe, catch_unwind};
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::atomic::Ordering::SeqCst;
|
||||
use std::{cmp, iter};
|
||||
|
||||
use super::*;
|
||||
use crate::boxed::Box;
|
||||
@@ -2128,6 +2128,86 @@ fn $name() {
|
||||
#[cfg(not(miri))] // Miri is too slow
|
||||
create_append_test!(test_append_1700, 1700);
|
||||
|
||||
// a inserts (0, 0)..(8, 8) to its own tree
|
||||
// b inserts (5, 5 * 2)..($len, 2 * $len) to its own tree
|
||||
// note that between a and b, there are duplicate keys
|
||||
// between 5..min($len, 8), so on merge we add the values
|
||||
// of these keys together
|
||||
// we check that:
|
||||
// - the merged tree 'a' has a length of max(8, $len)
|
||||
// - all keys in 'a' have the correct value associated
|
||||
// - removing and inserting an element into the merged
|
||||
// tree 'a' still keeps it in valid tree form
|
||||
macro_rules! create_merge_test {
|
||||
($name:ident, $len:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut a = BTreeMap::new();
|
||||
for i in 0..8 {
|
||||
a.insert(i, i);
|
||||
}
|
||||
|
||||
let mut b = BTreeMap::new();
|
||||
for i in 5..$len {
|
||||
b.insert(i, 2 * i);
|
||||
}
|
||||
|
||||
a.merge(b, |_, a_val, b_val| a_val + b_val);
|
||||
|
||||
assert_eq!(a.len(), cmp::max($len, 8));
|
||||
|
||||
for i in 0..cmp::max($len, 8) {
|
||||
if i < 5 {
|
||||
assert_eq!(a[&i], i);
|
||||
} else {
|
||||
if i < cmp::min($len, 8) {
|
||||
assert_eq!(a[&i], i + 2 * i);
|
||||
} else if i >= $len {
|
||||
assert_eq!(a[&i], i);
|
||||
} else {
|
||||
assert_eq!(a[&i], 2 * i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
a.check();
|
||||
assert_eq!(
|
||||
a.remove(&($len - 1)),
|
||||
if $len >= 5 && $len < 8 {
|
||||
Some(($len - 1) + 2 * ($len - 1))
|
||||
} else {
|
||||
Some(2 * ($len - 1))
|
||||
}
|
||||
);
|
||||
assert_eq!(a.insert($len - 1, 20), None);
|
||||
a.check();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// These are mostly for testing the algorithm that "fixes" the right edge after insertion.
|
||||
// Single node, merge conflicting key values.
|
||||
create_merge_test!(test_merge_7, 7);
|
||||
// Single node.
|
||||
create_merge_test!(test_merge_9, 9);
|
||||
// Two leafs that don't need fixing.
|
||||
create_merge_test!(test_merge_17, 17);
|
||||
// Two leafs where the second one ends up underfull and needs stealing at the end.
|
||||
create_merge_test!(test_merge_14, 14);
|
||||
// Two leafs where the second one ends up empty because the insertion finished at the root.
|
||||
create_merge_test!(test_merge_12, 12);
|
||||
// Three levels; insertion finished at the root.
|
||||
create_merge_test!(test_merge_144, 144);
|
||||
// Three levels; insertion finished at leaf while there is an empty node on the second level.
|
||||
create_merge_test!(test_merge_145, 145);
|
||||
// Tests for several randomly chosen sizes.
|
||||
create_merge_test!(test_merge_170, 170);
|
||||
create_merge_test!(test_merge_181, 181);
|
||||
#[cfg(not(miri))] // Miri is too slow
|
||||
create_merge_test!(test_merge_239, 239);
|
||||
#[cfg(not(miri))] // Miri is too slow
|
||||
create_merge_test!(test_merge_1700, 1700);
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")]
|
||||
fn test_append_drop_leak() {
|
||||
@@ -2169,6 +2249,84 @@ fn test_append_ord_chaos() {
|
||||
map2.check();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")]
|
||||
fn test_merge_drop_leak() {
|
||||
let a = CrashTestDummy::new(0);
|
||||
let b = CrashTestDummy::new(1);
|
||||
let c = CrashTestDummy::new(2);
|
||||
let mut left = BTreeMap::new();
|
||||
let mut right = BTreeMap::new();
|
||||
left.insert(a.spawn(Panic::Never), ());
|
||||
left.insert(b.spawn(Panic::Never), ());
|
||||
left.insert(c.spawn(Panic::Never), ());
|
||||
right.insert(b.spawn(Panic::InDrop), ()); // first duplicate key, dropped during merge
|
||||
right.insert(c.spawn(Panic::Never), ());
|
||||
|
||||
catch_unwind(move || left.merge(right, |_, _, _| ())).unwrap_err();
|
||||
assert_eq!(a.dropped(), 1); // this should not be dropped
|
||||
assert_eq!(b.dropped(), 2); // key is dropped on panic
|
||||
assert_eq!(c.dropped(), 2); // key is dropped on panic
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")]
|
||||
fn test_merge_conflict_drop_leak() {
|
||||
let a = CrashTestDummy::new(0);
|
||||
let a_val_left = CrashTestDummy::new(0);
|
||||
|
||||
let b = CrashTestDummy::new(1);
|
||||
let b_val_left = CrashTestDummy::new(1);
|
||||
let b_val_right = CrashTestDummy::new(1);
|
||||
|
||||
let c = CrashTestDummy::new(2);
|
||||
let c_val_left = CrashTestDummy::new(2);
|
||||
let c_val_right = CrashTestDummy::new(2);
|
||||
|
||||
let mut left = BTreeMap::new();
|
||||
let mut right = BTreeMap::new();
|
||||
|
||||
left.insert(a.spawn(Panic::Never), a_val_left.spawn(Panic::Never));
|
||||
left.insert(b.spawn(Panic::Never), b_val_left.spawn(Panic::Never));
|
||||
left.insert(c.spawn(Panic::Never), c_val_left.spawn(Panic::Never));
|
||||
right.insert(b.spawn(Panic::Never), b_val_right.spawn(Panic::Never));
|
||||
right.insert(c.spawn(Panic::Never), c_val_right.spawn(Panic::Never));
|
||||
|
||||
// First key that conflicts should
|
||||
catch_unwind(move || {
|
||||
left.merge(right, |_, _, _| panic!("Panic in conflict function"));
|
||||
assert_eq!(left.len(), 1); // only 1 entry should be left
|
||||
})
|
||||
.unwrap_err();
|
||||
assert_eq!(a.dropped(), 1); // should not panic
|
||||
assert_eq!(a_val_left.dropped(), 1); // should not panic
|
||||
assert_eq!(b.dropped(), 2); // should drop from panic (conflict)
|
||||
assert_eq!(b_val_left.dropped(), 1); // should be 2 were it not for Rust issue #47949
|
||||
assert_eq!(b_val_right.dropped(), 1); // should be 2 were it not for Rust issue #47949
|
||||
assert_eq!(c.dropped(), 2); // should drop from panic (conflict)
|
||||
assert_eq!(c_val_left.dropped(), 1); // should be 2 were it not for Rust issue #47949
|
||||
assert_eq!(c_val_right.dropped(), 1); // should be 2 were it not for Rust issue #47949
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_ord_chaos() {
|
||||
let mut map1 = BTreeMap::new();
|
||||
map1.insert(Cyclic3::A, ());
|
||||
map1.insert(Cyclic3::B, ());
|
||||
let mut map2 = BTreeMap::new();
|
||||
map2.insert(Cyclic3::A, ());
|
||||
map2.insert(Cyclic3::B, ());
|
||||
map2.insert(Cyclic3::C, ()); // lands first, before A
|
||||
map2.insert(Cyclic3::B, ()); // lands first, before C
|
||||
map1.check();
|
||||
map2.check(); // keys are not unique but still strictly ascending
|
||||
assert_eq!(map1.len(), 2);
|
||||
assert_eq!(map2.len(), 4);
|
||||
map1.merge(map2, |_, _, _| ());
|
||||
assert_eq!(map1.len(), 5);
|
||||
map1.check();
|
||||
}
|
||||
|
||||
fn rand_data(len: usize) -> Vec<(u32, u32)> {
|
||||
let mut rng = DeterministicRng::new();
|
||||
Vec::from_iter((0..len).map(|_| (rng.next(), rng.next())))
|
||||
@@ -2615,3 +2773,25 @@ fn test_id_based_append() {
|
||||
|
||||
assert_eq!(lhs.pop_first().unwrap().0.name, "lhs_k".to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_id_based_merge() {
|
||||
let mut lhs = BTreeMap::new();
|
||||
let mut rhs = BTreeMap::new();
|
||||
|
||||
lhs.insert(IdBased { id: 0, name: "lhs_k".to_string() }, "1".to_string());
|
||||
rhs.insert(IdBased { id: 0, name: "rhs_k".to_string() }, "2".to_string());
|
||||
|
||||
lhs.merge(rhs, |_, mut lhs_val, rhs_val| {
|
||||
// confirming that lhs_val comes from lhs tree,
|
||||
// rhs_val comes from rhs tree
|
||||
assert_eq!(lhs_val, String::from("1"));
|
||||
assert_eq!(rhs_val, String::from("2"));
|
||||
lhs_val.push_str(&rhs_val);
|
||||
lhs_val
|
||||
});
|
||||
|
||||
let merged_kv_pair = lhs.pop_first().unwrap();
|
||||
assert_eq!(merged_kv_pair.0.id, 0);
|
||||
assert_eq!(merged_kv_pair.0.name, "lhs_k".to_string());
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#![feature(allocator_api)]
|
||||
#![feature(binary_heap_pop_if)]
|
||||
#![feature(btree_merge)]
|
||||
#![feature(const_heap)]
|
||||
#![feature(deque_extend_front)]
|
||||
#![feature(iter_array_chunks)]
|
||||
|
||||
Reference in New Issue
Block a user