-
-
Notifications
You must be signed in to change notification settings - Fork 14.5k
implement BTreeMap::merge #151981
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
implement BTreeMap::merge #151981
Changes from all commits
6a2f234
1611b73
8136795
746641c
d3ee0fc
2f926de
1b29082
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,6 +33,36 @@ impl<K, V> Root<K, V> { | |
| self.bulk_push(iter, length, alloc) | ||
| } | ||
|
|
||
| /// Merges all key-value pairs from the union of two ascending iterators, | ||
| /// incrementing a `length` variable along the way. The latter makes it | ||
| /// easier for the caller to avoid a leak when a drop handler panicks. | ||
| /// | ||
| /// If both iterators produce the same key, this method constructs a pair using the | ||
| /// key from the left iterator and calls on a closure `f` to return a value given | ||
| /// the conflicting key and value from left and right iterators. | ||
| /// | ||
| /// If you want the tree to end up in a strictly ascending order, like for | ||
| /// a `BTreeMap`, both iterators should produce keys in strictly ascending | ||
| /// order, each greater than all keys in the tree, including any keys | ||
| /// already in the tree upon entry. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When we say "including any keys already in the tree", what keys are not in that set but relevant here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I copied the comment from the |
||
| pub(super) fn merge_from_sorted_iters_with<I, A: Allocator + Clone>( | ||
| &mut self, | ||
| left: I, | ||
| right: I, | ||
| length: &mut usize, | ||
| alloc: A, | ||
| f: impl FnMut(&K, V, V) -> V, | ||
| ) where | ||
| K: Ord, | ||
| I: Iterator<Item = (K, V)> + FusedIterator, | ||
| { | ||
| // We prepare to merge `left` and `right` into a sorted sequence in linear time. | ||
| let iter = MergeIterWith { inner: MergeIterInner::new(left, right), f }; | ||
|
|
||
| // Meanwhile, we build a tree from the sorted sequence in linear time. | ||
| self.bulk_push(iter, length, alloc) | ||
| } | ||
|
|
||
| /// Pushes all key-value pairs to the end of the tree, incrementing a | ||
| /// `length` variable along the way. The latter makes it easier for the | ||
| /// caller to avoid a leak when the iterator panicks. | ||
|
|
@@ -115,3 +145,33 @@ where | |
| } | ||
| } | ||
| } | ||
|
|
||
| /// An iterator for merging two sorted sequences into one with | ||
| /// a callback function to return a value on conflicting keys | ||
| struct MergeIterWith<F, K, V, I: Iterator<Item = (K, V)>> { | ||
| inner: MergeIterInner<I>, | ||
| f: F, | ||
| } | ||
|
|
||
| impl<F, K: Ord, V, I> Iterator for MergeIterWith<F, K, V, I> | ||
| where | ||
| F: FnMut(&K, V, V) -> V, | ||
| I: Iterator<Item = (K, V)> + FusedIterator, | ||
| { | ||
| type Item = (K, V); | ||
|
|
||
| /// If two keys are equal, returns the key from the left and uses `f` to return | ||
| /// a value given the conflicting key and values from left and right | ||
| fn next(&mut self) -> Option<(K, V)> { | ||
| let (a_next, b_next) = self.inner.nexts(|a: &(K, V), b: &(K, V)| K::cmp(&a.0, &b.0)); | ||
| match (a_next, b_next) { | ||
| (Some((a_k, a_v)), Some((_, b_v))) => Some({ | ||
| let next_val = (self.f)(&a_k, a_v, b_v); | ||
| (a_k, next_val) | ||
| }), | ||
| (Some(a), None) => Some(a), | ||
| (None, Some(b)) => Some(b), | ||
| (None, None) => None, | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1240,6 +1240,81 @@ impl<K, V, A: Allocator + Clone> BTreeMap<K, V, A> { | |
| ) | ||
| } | ||
|
|
||
| /// Moves all elements from `other` into `self`, leaving `other` empty. | ||
| /// | ||
| /// If a key from `other` is already present in `self`, then the `conflict` | ||
| /// closure is used to return a value to `self`. The `conflict` | ||
| /// closure takes in a borrow of `self`'s key, `self`'s value, and `other`'s value | ||
| /// in that order. | ||
| /// | ||
| /// An example of why one might use this method over [`append`] | ||
| /// is to combine `self`'s value with `other`'s value when their keys conflict. | ||
| /// | ||
| /// Similar to [`insert`], though, the key is not overwritten, | ||
| /// which matters for types that can be `==` without being identical. | ||
| /// | ||
| /// | ||
| /// [`insert`]: BTreeMap::insert | ||
| /// [`append`]: BTreeMap::append | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// #![feature(btree_merge)] | ||
| /// use std::collections::BTreeMap; | ||
| /// | ||
| /// let mut a = BTreeMap::new(); | ||
| /// a.insert(1, String::from("a")); | ||
| /// a.insert(2, String::from("b")); | ||
| /// a.insert(3, String::from("c")); // Note: Key (3) also present in b. | ||
| /// | ||
| /// let mut b = BTreeMap::new(); | ||
| /// b.insert(3, String::from("d")); // Note: Key (3) also present in a. | ||
| /// b.insert(4, String::from("e")); | ||
| /// b.insert(5, String::from("f")); | ||
| /// | ||
| /// // concatenate a's value and b's value | ||
| /// a.merge(b, |_, a_val, b_val| { | ||
| /// format!("{a_val}{b_val}") | ||
| /// }); | ||
| /// | ||
| /// assert_eq!(a.len(), 5); // all of b's keys in a | ||
| /// | ||
| /// assert_eq!(a[&1], "a"); | ||
| /// assert_eq!(a[&2], "b"); | ||
| /// assert_eq!(a[&3], "cd"); // Note: "c" has been combined with "d". | ||
| /// assert_eq!(a[&4], "e"); | ||
| /// assert_eq!(a[&5], "f"); | ||
| /// ``` | ||
| #[unstable(feature = "btree_merge", issue = "152152")] | ||
| pub fn merge(&mut self, mut other: Self, conflict: impl FnMut(&K, V, V) -> V) | ||
| where | ||
| K: Ord, | ||
| A: Clone, | ||
| { | ||
| // Do we have to append anything at all? | ||
| if other.is_empty() { | ||
| return; | ||
| } | ||
|
|
||
| // We can just swap `self` and `other` if `self` is empty. | ||
| if self.is_empty() { | ||
| mem::swap(self, &mut other); | ||
| return; | ||
| } | ||
|
|
||
| let self_iter = mem::replace(self, Self::new_in((*self.alloc).clone())).into_iter(); | ||
| let other_iter = mem::replace(&mut other, Self::new_in((*self.alloc).clone())).into_iter(); | ||
| let root = self.root.get_or_insert_with(|| Root::new((*self.alloc).clone())); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this get_or_insert_with? Isn't the root guaranteed to be absent after the replace with a new tree above?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To be honest with you, I copied the code verbatim from The new PR doesn't use this approach though and preserves the content of self |
||
| root.merge_from_sorted_iters_with( | ||
| self_iter, | ||
| other_iter, | ||
| &mut self.length, | ||
| (*self.alloc).clone(), | ||
| conflict, | ||
| ) | ||
| } | ||
|
|
||
| /// Constructs a double-ended iterator over a sub-range of elements in the map. | ||
| /// The simplest way is to use the range syntax `min..max`, thus `range(min..max)` will | ||
| /// yield elements from min (inclusive) to max (exclusive). | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,9 +1,9 @@ | ||
| use core::assert_matches; | ||
| use std::iter; | ||
| use std::ops::Bound::{Excluded, Included, Unbounded}; | ||
| use std::panic::{AssertUnwindSafe, catch_unwind}; | ||
| use std::sync::atomic::AtomicUsize; | ||
| use std::sync::atomic::Ordering::SeqCst; | ||
| use std::{cmp, iter}; | ||
|
|
||
| use super::*; | ||
| use crate::boxed::Box; | ||
|
|
@@ -2128,6 +2128,76 @@ create_append_test!(test_append_239, 239); | |
| #[cfg(not(miri))] // Miri is too slow | ||
| create_append_test!(test_append_1700, 1700); | ||
|
|
||
| macro_rules! create_merge_test { | ||
| ($name:ident, $len:expr) => { | ||
| #[test] | ||
| fn $name() { | ||
| let mut a = BTreeMap::new(); | ||
| for i in 0..8 { | ||
| a.insert(i, i); | ||
| } | ||
|
|
||
| let mut b = BTreeMap::new(); | ||
| for i in 5..$len { | ||
| b.insert(i, 2 * i); | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we add some comments on why certain constants were chosen here or what the intent of this code is? E.g., 5..$len is not immediately apparent when reading this to me.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the new PR, I mentioned a comment above // On merging keys 5..8, the values are
// added together (i + 2 * i) |
||
|
|
||
| a.merge(b, |_, a_val, b_val| a_val + b_val); | ||
|
|
||
| assert_eq!(a.len(), cmp::max($len, 8)); | ||
|
|
||
| for i in 0..cmp::max($len, 8) { | ||
| if i < 5 { | ||
| assert_eq!(a[&i], i); | ||
| } else { | ||
| if i < cmp::min($len, 8) { | ||
| assert_eq!(a[&i], i + 2 * i); | ||
| } else if i >= $len { | ||
| assert_eq!(a[&i], i); | ||
| } else { | ||
| assert_eq!(a[&i], 2 * i); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| a.check(); | ||
| assert_eq!( | ||
| a.remove(&($len - 1)), | ||
| if $len >= 5 && $len < 8 { | ||
| Some(($len - 1) + 2 * ($len - 1)) | ||
| } else { | ||
| Some(2 * ($len - 1)) | ||
| } | ||
| ); | ||
| assert_eq!(a.insert($len - 1, 20), None); | ||
| a.check(); | ||
| } | ||
| }; | ||
| } | ||
|
|
||
| // These are mostly for testing the algorithm that "fixes" the right edge after insertion. | ||
| // Single node, merge conflicting key values. | ||
| create_merge_test!(test_merge_7, 7); | ||
| // Single node. | ||
| create_merge_test!(test_merge_9, 9); | ||
| // Two leafs that don't need fixing. | ||
| create_merge_test!(test_merge_17, 17); | ||
| // Two leafs where the second one ends up underfull and needs stealing at the end. | ||
| create_merge_test!(test_merge_14, 14); | ||
| // Two leafs where the second one ends up empty because the insertion finished at the root. | ||
| create_merge_test!(test_merge_12, 12); | ||
| // Three levels; insertion finished at the root. | ||
| create_merge_test!(test_merge_144, 144); | ||
| // Three levels; insertion finished at leaf while there is an empty node on the second level. | ||
| create_merge_test!(test_merge_145, 145); | ||
| // Tests for several randomly chosen sizes. | ||
| create_merge_test!(test_merge_170, 170); | ||
| create_merge_test!(test_merge_181, 181); | ||
| #[cfg(not(miri))] // Miri is too slow | ||
| create_merge_test!(test_merge_239, 239); | ||
| #[cfg(not(miri))] // Miri is too slow | ||
| create_merge_test!(test_merge_1700, 1700); | ||
|
|
||
| #[test] | ||
| #[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")] | ||
| fn test_append_drop_leak() { | ||
|
|
@@ -2615,3 +2685,19 @@ fn test_id_based_append() { | |
|
|
||
| assert_eq!(lhs.pop_first().unwrap().0.name, "lhs_k".to_string()); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_id_based_merge() { | ||
| let mut lhs = BTreeMap::new(); | ||
| let mut rhs = BTreeMap::new(); | ||
|
|
||
| lhs.insert(IdBased { id: 0, name: "lhs_k".to_string() }, "1".to_string()); | ||
| rhs.insert(IdBased { id: 0, name: "rhs_k".to_string() }, "2".to_string()); | ||
|
|
||
| lhs.merge(rhs, |_, mut lhs_val, rhs_val| { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we add a test / extend this one to verify that lhs_val and rhs_val are from left/right respectively?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I hope I'm understanding correctly, but do you want me to add assertion cases in this merge callback function that verifies that I could do that. |
||
| lhs_val.push_str(&rhs_val); | ||
| lhs_val | ||
| }); | ||
|
|
||
| assert_eq!(lhs.pop_first().unwrap().0.name, "lhs_k".to_string()); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the new PR, I use
CursorMutto help me merge the keys from otherBTreeMapinto selfBTreeMap, so I don't have to consume the selfBTreeMapreconstruct the mergedBTreeMapfrom scratch.