diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index b11a6446..cb167357 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -473,6 +473,14 @@ impl Iter<'_> { .next_range_back() .map(|r| util::join(self.key, *r.start())..=util::join(self.key, *r.end())) } + + /// Read multiple values from the iterator into `dst`. + /// Returns a mutable slice of `dst` that contains the read values. + /// + /// This can be significantly faster than calling `next()` repeatedly. + pub(crate) fn next_many<'a>(&mut self, dst: &'a mut [u32]) -> &'a mut [u32] { + self.inner.next_many(self.key, dst) + } } impl fmt::Debug for Container { diff --git a/roaring/src/bitmap/inherent.rs b/roaring/src/bitmap/inherent.rs index 72b4fc2b..f0b88501 100644 --- a/roaring/src/bitmap/inherent.rs +++ b/roaring/src/bitmap/inherent.rs @@ -400,15 +400,11 @@ impl RoaringBitmap { pub fn remove(&mut self, value: u32) -> bool { let (key, index) = util::split(value); match self.containers.binary_search_by_key(&key, |c| c.key) { - Ok(loc) => { - if self.containers[loc].remove(index) { - if self.containers[loc].is_empty() { - self.containers.remove(loc); - } - true - } else { - false + Ok(loc) if self.containers[loc].remove(index) => { + if self.containers[loc].is_empty() { + self.containers.remove(loc); } + true } _ => false, } diff --git a/roaring/src/bitmap/iter.rs b/roaring/src/bitmap/iter.rs index 49a5cbc5..cfc41c6f 100644 --- a/roaring/src/bitmap/iter.rs +++ b/roaring/src/bitmap/iter.rs @@ -331,6 +331,81 @@ impl Iter<'_> { pub fn next_range_back(&mut self) -> Option> { next_range_back_impl(&mut self.front, &mut self.containers, &mut self.back) } + + /// Retrieve the next `dst.len()` values from the iterator and write them into `dst`. + /// + /// Returns a mutable slice of `dst` that contains the read values. A slice shorter + /// than `dst.len()` is returned if the iterator is exhausted. + /// + /// This method is significantly faster than calling `next()` repeatedly due to + /// reduced per-element overhead and better CPU cache utilization. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// + /// let bitmap: RoaringBitmap = (0..100).collect(); + /// let mut iter = bitmap.iter(); + /// let mut buf = [0u32; 32]; + /// + /// let out = iter.next_many(&mut buf); + /// assert_eq!(out.len(), 32); + /// assert_eq!(out[0], 0); + /// assert_eq!(out[31], 31); + /// + /// // Iterate remainder + /// let out = iter.next_many(&mut buf); + /// assert_eq!(out.len(), 32); + /// assert_eq!(out[0], 32); + /// ``` + pub fn next_many<'a>(&mut self, dst: &'a mut [u32]) -> &'a mut [u32] { + if dst.is_empty() { + return &mut []; + } + + let mut count = 0; + + // First drain from the front container iterator if present + if let Some(ref mut front_iter) = self.front { + count += front_iter.next_many(&mut dst[count..]).len(); + if count >= dst.len() { + return &mut dst[..count]; + } + // Front is exhausted + self.front = None; + } + + // Process remaining containers + while count < dst.len() { + let Some(container) = self.containers.next() else { + // No more containers in the middle, try the back + break; + }; + let mut container_iter = container.into_iter(); + let out = container_iter.next_many(&mut dst[count..]); + count += out.len(); + + // If container still has values, save it as new front + if !out.is_empty() && container_iter.len() > 0 { + self.front = Some(container_iter); + return &mut dst[..count]; + } + } + + // Finally, try draining from the back iterator if present + if count < dst.len() { + if let Some(ref mut back_iter) = self.back { + let n = back_iter.next_many(&mut dst[count..]); + count += n.len(); + if back_iter.len() == 0 { + self.back = None; + } + } + } + + &mut dst[..count] + } } impl IntoIter { @@ -419,6 +494,80 @@ impl IntoIter { pub fn next_range_back(&mut self) -> Option> { next_range_back_impl(&mut self.front, &mut self.containers, &mut self.back) } + + /// Retrieve the next `dst.len()` values from the iterator and write them into `dst`. + /// + /// Returns a mutable slice of `dst` that contains the read values. A slice shorter + /// than `dst.len()` is returned if the iterator is exhausted. + /// + /// This method is significantly faster than calling `next()` repeatedly due to + /// reduced per-element overhead and better CPU cache utilization. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// + /// let bitmap: RoaringBitmap = (0..100).collect(); + /// let mut iter = bitmap.into_iter(); + /// let mut buf = [0u32; 32]; + /// + /// let out = iter.next_many(&mut buf); + /// assert_eq!(out.len(), 32); + /// assert_eq!(out[0], 0); + /// assert_eq!(out[31], 31); + /// + /// // Iterate remainder + /// let out = iter.next_many(&mut buf); + /// assert_eq!(out.len(), 32); + /// assert_eq!(out[0], 32); + /// ``` + pub fn next_many<'a>(&mut self, dst: &'a mut [u32]) -> &'a mut [u32] { + if dst.is_empty() { + return &mut []; + } + + let mut count = 0; + + // First drain from the front container iterator if present + if let Some(ref mut front_iter) = self.front { + count += front_iter.next_many(&mut dst[count..]).len(); + if count >= dst.len() { + return &mut dst[..count]; + } + // Front is exhausted + self.front = None; + } + + // Process remaining containers + while count < dst.len() { + let Some(container) = self.containers.next() else { + // No more containers in the middle, try the back + break; + }; + let mut container_iter = container.into_iter(); + let out = container_iter.next_many(&mut dst[count..]); + count += out.len(); + + // If container still has values, save it as new front + if !out.is_empty() && container_iter.len() > 0 { + self.front = Some(container_iter); + return &mut dst[..count]; + } + } + + // Finally, try draining from the back iterator if present + if count < dst.len() { + if let Some(ref mut back_iter) = self.back { + count += back_iter.next_many(&mut dst[count..]).len(); + if back_iter.len() == 0 { + self.back = None; + } + } + } + + &mut dst[..count] + } } fn size_hint_impl( diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index ca32e2b5..5c25ed48 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -4,6 +4,8 @@ use core::fmt::{Display, Formatter}; use core::mem::size_of; use core::ops::{BitAndAssign, BitOrAssign, BitXorAssign, RangeInclusive, SubAssign}; +use crate::bitmap::util; + use super::{ArrayStore, Interval}; #[cfg(not(feature = "std"))] @@ -691,6 +693,55 @@ impl> BitmapIter { let index = 63 - index_from_left; Some(64 * key_back + index) } + + /// Read multiple values from the iterator into `dst`. + /// Returns a mutable slice of `dst` that contains the read values. + /// + /// This can be significantly faster than calling `next()` repeatedly. + pub fn next_many<'a>(&mut self, high: u16, dst: &'a mut [u32]) -> &'a mut [u32] { + if dst.is_empty() { + return &mut []; + } + + let mut count = 0; + let bits = self.bits.borrow(); + + while count < dst.len() { + // Advance to next non-zero word if current is empty + if self.value == 0 { + if self.key >= self.key_back { + break; + } + loop { + self.key += 1; + if self.key == self.key_back { + self.value = core::mem::replace(&mut self.value_back, 0); + break; + } + // Safety: key is always in bounds + self.value = unsafe { *bits.get_unchecked(self.key as usize) }; + if self.value != 0 { + break; + } + } + if self.value == 0 { + break; + } + } + + // Extract set bits from current word + let base = self.key * 64; + while self.value != 0 && count < dst.len() { + let bit_pos = self.value.trailing_zeros() as u16; + dst[count] = util::join(high, base + bit_pos); + count += 1; + // Clear the lowest set bit + self.value &= self.value - 1; + } + } + + &mut dst[..count] + } } fn advance_to_next_nonzero_word<'a>( diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 86b20223..4ad8854c 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -5,6 +5,8 @@ use core::ops::{ use core::slice::Iter; use core::{cmp::Ordering, ops::ControlFlow}; +use crate::bitmap::util; + use super::{ArrayStore, BitmapStore}; #[derive(PartialEq, Eq, Clone, Debug)] @@ -834,6 +836,56 @@ impl> RunIter { let result = self.intervals.as_slice().last()?.end - self.backward_offset; Some(result) } + + /// Read multiple values from the iterator into `dst`. + /// Returns a mutable slice of `dst` that contains the read values. + /// + /// This can be significantly faster than calling `next()` repeatedly + /// because it processes runs in bulk. + pub fn next_many<'a>(&mut self, high: u16, dst: &'a mut [u32]) -> &'a mut [u32] { + if dst.is_empty() { + return &mut []; + } + + let mut count = 0; + + while count < dst.len() { + let Some(interval) = self.intervals.as_slice().first() else { + break; + }; + + let end_offset = + if self.intervals.as_slice().len() == 1 { self.backward_offset } else { 0 }; + + let start = interval.start + self.forward_offset; + let end = interval.end - end_offset; + + // How many values can we emit from this interval? + let available = (end - start + 1) as usize; + let to_emit = available.min(dst.len() - count); + + // Emit values + for i in 0..to_emit { + dst[count + i] = util::join(high, start + i as u16); + } + count += to_emit; + + // Advance within or past this interval + if to_emit == available { + // Consumed entire interval + _ = self.intervals.next(); + self.forward_offset = 0; + if self.intervals.as_slice().is_empty() { + self.backward_offset = 0; + } + } else { + // Partial consumption + self.forward_offset += to_emit as u16; + } + } + + &mut dst[..count] + } } impl> Iterator for RunIter { diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index 11a3de0c..8ec3cfa6 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -20,6 +20,7 @@ pub(crate) use interval_store::{IntervalStore, RunIterBorrowed, RunIterOwned}; pub(crate) use interval_store::{RUN_ELEMENT_BYTES, RUN_NUM_BYTES}; use crate::bitmap::container::ARRAY_LIMIT; +use crate::bitmap::util; #[cfg(not(feature = "std"))] use alloc::boxed::Box; @@ -1038,6 +1039,45 @@ impl Iterator for Iter<'_> { } } +impl Iter<'_> { + /// Read multiple values from the iterator into `dst`. + /// Returns a mutable slice of `dst` that contains the read values. + /// + /// This can be significantly faster than calling `next()` repeatedly. + pub fn next_many<'a>(&mut self, high: u16, dst: &'a mut [u32]) -> &'a mut [u32] { + match self { + Iter::Array(inner) => { + let remaining = inner.as_slice(); + let n = remaining.len().min(dst.len()); + dst[..n] + .iter_mut() + .zip(&remaining[..n]) + .for_each(|(o, low)| *o = util::join(high, *low)); + if n > 0 { + _ = inner.nth(n - 1); + } + &mut dst[..n] + } + Iter::Vec(inner) => { + let remaining = inner.as_slice(); + let n = remaining.len().min(dst.len()); + dst[..n] + .iter_mut() + .zip(&remaining[..n]) + .for_each(|(o, low)| *o = util::join(high, *low)); + if n > 0 { + _ = inner.nth(n - 1); + } + &mut dst[..n] + } + Iter::BitmapBorrowed(inner) => inner.next_many(high, dst), + Iter::BitmapOwned(inner) => inner.next_many(high, dst), + Iter::RunBorrowed(inner) => inner.next_many(high, dst), + Iter::RunOwned(inner) => inner.next_many(high, dst), + } + } +} + impl DoubleEndedIterator for Iter<'_> { fn next_back(&mut self) -> Option { match self { diff --git a/roaring/tests/iter_next_many.rs b/roaring/tests/iter_next_many.rs new file mode 100644 index 00000000..f0b3ea44 --- /dev/null +++ b/roaring/tests/iter_next_many.rs @@ -0,0 +1,239 @@ +use proptest::arbitrary::any; +use proptest::collection::btree_set; +use proptest::proptest; +use roaring::RoaringBitmap; + +/// Test basic next_many functionality with a simple range +#[test] +fn next_many_simple() { + let bitmap: RoaringBitmap = (0..100).collect(); + let mut iter = bitmap.iter(); + let mut buf = [0u32; 32]; + + let out = iter.next_many(&mut buf); + assert_eq!(out.len(), 32); + assert_eq!(&out[..], &(0..32).collect::>()[..]); + + let out = iter.next_many(&mut buf); + assert_eq!(out.len(), 32); + assert_eq!(&out[..], &(32..64).collect::>()[..]); + + let out = iter.next_many(&mut buf); + assert_eq!(out.len(), 32); + assert_eq!(&out[..], &(64..96).collect::>()[..]); + + let out = iter.next_many(&mut buf); + assert_eq!(out.len(), 4); + assert_eq!(&out[..], &[96, 97, 98, 99]); + + let out = iter.next_many(&mut buf); + assert!(out.is_empty()); +} + +/// Test next_many with IntoIter (owned iterator) +#[test] +fn next_many_into_iter() { + let bitmap: RoaringBitmap = (0..100).collect(); + let mut iter = bitmap.into_iter(); + let mut buf = [0u32; 32]; + let mut all_values = Vec::new(); + + loop { + let out = iter.next_many(&mut buf); + if out.is_empty() { + break; + } + all_values.extend_from_slice(&out[..]); + } + + let expected: Vec = (0..100).collect(); + assert_eq!(all_values, expected); +} + +/// Test next_many with empty buffer +#[test] +fn next_many_empty_buffer() { + let bitmap: RoaringBitmap = (0..10).collect(); + let mut iter = bitmap.iter(); + let mut buf = [0u32; 0]; + + let out = iter.next_many(&mut buf); + assert!(out.is_empty()); + // Iterator should not be advanced + assert_eq!(iter.next(), Some(0)); +} + +/// Test next_many with empty bitmap +#[test] +fn next_many_empty_bitmap() { + let bitmap = RoaringBitmap::new(); + let mut iter = bitmap.iter(); + let mut buf = [0u32; 32]; + + let out = iter.next_many(&mut buf); + assert!(out.is_empty()); +} + +/// Test next_many across multiple containers +#[test] +fn next_many_multiple_containers() { + // Container boundary is at 65536 + let bitmap: RoaringBitmap = (65530..65545).collect(); + let mut iter = bitmap.iter(); + let mut buf = [0u32; 32]; + + let out = iter.next_many(&mut buf); + assert_eq!(out.len(), 15); + let expected: Vec = (65530..65545).collect(); + assert_eq!(&out[..], &expected[..]); +} + +/// Test next_many with large buffer +#[test] +fn next_many_large_buffer() { + let bitmap: RoaringBitmap = (0..50).collect(); + let mut iter = bitmap.iter(); + let mut buf = [0u32; 1000]; + + let out = iter.next_many(&mut buf); + assert_eq!(out.len(), 50); + let expected: Vec = (0..50).collect(); + assert_eq!(&out[..], &expected[..]); +} + +/// Test next_many with bitmap store (dense values) +#[test] +fn next_many_bitmap_store() { + // More than 4096 values in a container triggers bitmap storage + let bitmap: RoaringBitmap = (0..10000).collect(); + let mut iter = bitmap.iter(); + let mut buf = [0u32; 512]; + let mut all_values = Vec::new(); + + loop { + let out = iter.next_many(&mut buf); + if out.is_empty() { + break; + } + all_values.extend_from_slice(&out[..]); + } + + let expected: Vec = (0..10000).collect(); + assert_eq!(all_values, expected); +} + +/// Test next_many with run store (consecutive values) +#[test] +fn next_many_run_store() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(0..1000); + bitmap.insert_range(2000..3000); + + let mut iter = bitmap.iter(); + let mut buf = [0u32; 256]; + let mut all_values = Vec::new(); + + loop { + let out = iter.next_many(&mut buf); + if out.is_empty() { + break; + } + all_values.extend_from_slice(&out[..]); + } + + let expected: Vec = (0..1000).chain(2000..3000).collect(); + assert_eq!(all_values, expected); +} + +/// Test interleaving next_many with next() +#[test] +fn next_many_interleaved_with_next() { + let bitmap: RoaringBitmap = (0..100).collect(); + let mut iter = bitmap.iter(); + let mut buf = [0u32; 10]; + + // Read first 10 via next_many + let out = iter.next_many(&mut buf); + assert_eq!(out.len(), 10); + assert_eq!(&out[..], &(0..10).collect::>()[..]); + + // Read one via next + assert_eq!(iter.next(), Some(10)); + + // Read next 10 via next_many + let out = iter.next_many(&mut buf); + assert_eq!(out.len(), 10); + assert_eq!(&out[..], &(11..21).collect::>()[..]); + + // Read one via next + assert_eq!(iter.next(), Some(21)); +} + +// Test next_many preserves no gaps/duplicates +proptest! { + #[test] + fn next_many_correctness(values in btree_set(any::(), ..=10_000)) { + let bitmap = RoaringBitmap::from_sorted_iter(values.iter().cloned()).unwrap(); + let mut iter = bitmap.iter(); + let mut buf = [0u32; 128]; + let mut collected = Vec::new(); + + loop { + let out = iter.next_many(&mut buf); + if out.is_empty() { + break; + } + collected.extend_from_slice(&out[..]); + } + + let expected: Vec = values.into_iter().collect(); + assert_eq!(collected, expected); + } +} + +// Test next_many with various buffer sizes +proptest! { + #[test] + fn next_many_various_buffer_sizes( + values in btree_set(any::(), 100..=1000), + buf_size in 1usize..=500 + ) { + let bitmap = RoaringBitmap::from_sorted_iter(values.iter().cloned()).unwrap(); + let mut iter = bitmap.iter(); + let mut buf = vec![0u32; buf_size]; + let mut collected = Vec::new(); + + loop { + let out = iter.next_many(&mut buf); + if out.is_empty() { + break; + } + collected.extend_from_slice(&out[..]); + } + + let expected: Vec = values.into_iter().collect(); + assert_eq!(collected, expected); + } +} + +// Test next_many with IntoIter correctness +proptest! { + #[test] + fn next_many_into_iter_correctness(values in btree_set(any::(), ..=10_000)) { + let bitmap = RoaringBitmap::from_sorted_iter(values.iter().cloned()).unwrap(); + let mut iter = bitmap.into_iter(); + let mut buf = [0u32; 128]; + let mut collected = Vec::new(); + + loop { + let out = iter.next_many(&mut buf); + if out.is_empty() { + break; + } + collected.extend_from_slice(&out[..]); + } + + let expected: Vec = values.into_iter().collect(); + assert_eq!(collected, expected); + } +}