From 84600ddab756d887affa1916322cd8dcf3b368c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 30 Apr 2021 19:29:46 +0200 Subject: [PATCH 1/6] Improve the ref-ref RoaringBitmap union --- src/bitmap/container.rs | 17 ++++++++++++++++- src/bitmap/ops.rs | 33 ++++++++++++++++++++++++++++----- src/bitmap/store.rs | 27 ++++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 7 deletions(-) diff --git a/src/bitmap/container.rs b/src/bitmap/container.rs index 31bc628c7..09a5fb857 100644 --- a/src/bitmap/container.rs +++ b/src/bitmap/container.rs @@ -1,4 +1,4 @@ -use std::ops::{BitAndAssign, BitOrAssign, BitXorAssign, SubAssign}; +use std::ops::{BitAndAssign, BitOr, BitOrAssign, BitXorAssign, SubAssign}; use std::{fmt, ops::Range}; use super::store::{self, Store}; @@ -118,6 +118,21 @@ impl Container { } } +impl BitOr<&Container> for &Container { + type Output = Container; + + fn bitor(self, rhs: &Container) -> Container { + let store = BitOr::bitor(&self.store, &rhs.store); + let mut container = Container { + key: self.key, + len: store.len(), + store, + }; + container.ensure_correct_store(); + container + } +} + impl BitOrAssign for Container { fn bitor_assign(&mut self, rhs: Container) { BitOrAssign::bitor_assign(&mut self.store, rhs.store); diff --git a/src/bitmap/ops.rs b/src/bitmap/ops.rs index 11f42a6df..960bf7221 100644 --- a/src/bitmap/ops.rs +++ b/src/bitmap/ops.rs @@ -1,6 +1,6 @@ use std::cmp::Ordering; -use std::mem; use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign}; +use std::{cmp, mem}; use retain_mut::RetainMut; @@ -191,11 +191,34 @@ impl BitOr<&RoaringBitmap> for &RoaringBitmap { /// An `union` between two sets. fn bitor(self, rhs: &RoaringBitmap) -> RoaringBitmap { - if self.len() <= rhs.len() { - BitOr::bitor(rhs.clone(), self) - } else { - BitOr::bitor(self.clone(), rhs) + let len = cmp::max(self.containers.len(), rhs.containers.len()); + let mut containers = Vec::with_capacity(len); + + let mut iter_lhs = self.containers.iter().peekable(); + let mut iter_rhs = rhs.containers.iter().peekable(); + + loop { + match (iter_lhs.peek(), iter_rhs.peek()) { + (Some(lhs), Some(rhs)) => { + let container = match lhs.key.cmp(&rhs.key) { + Ordering::Less => iter_lhs.next().cloned().unwrap(), + Ordering::Greater => iter_rhs.next().cloned().unwrap(), + Ordering::Equal => { + let container = BitOr::bitor(*lhs, *rhs); + iter_lhs.next(); + iter_rhs.next(); + container + } + }; + containers.push(container); + } + (Some(_), None) => containers.extend(iter_lhs.by_ref().cloned()), + (None, Some(_)) => containers.extend(iter_rhs.by_ref().cloned()), + (None, None) => break, + } } + + RoaringBitmap { containers } } } diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index a4b348fd6..a98d626d0 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -1,5 +1,5 @@ use std::cmp::Ordering::{Equal, Greater, Less}; -use std::ops::{BitAndAssign, BitOrAssign, BitXorAssign, SubAssign}; +use std::ops::{BitAndAssign, BitOr, BitOrAssign, BitXorAssign, SubAssign}; use std::{borrow::Borrow, ops::Range}; use std::{mem, slice, vec}; @@ -307,6 +307,31 @@ impl Store { } } +impl BitOr<&Store> for &Store { + type Output = Store; + + fn bitor(self, rhs: &Store) -> Store { + match (self, rhs) { + (&Array(ref vec1), &Array(ref vec2)) => Array(union_arrays(vec1, vec2)), + (&Bitmap(_), &Array(_)) => { + let mut lhs = self.clone(); + BitOrAssign::bitor_assign(&mut lhs, rhs); + lhs + } + (&Bitmap(_), &Bitmap(_)) => { + let mut lhs = self.clone(); + BitOrAssign::bitor_assign(&mut lhs, rhs); + lhs + } + (&Array(_), &Bitmap(_)) => { + let mut rhs = rhs.clone(); + BitOrAssign::bitor_assign(&mut rhs, self); + rhs + } + } + } +} + impl BitOrAssign for Store { fn bitor_assign(&mut self, mut rhs: Store) { match (self, &mut rhs) { From bc101d0fd2f3ae907319364ad3d501db228d1147 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 30 Apr 2021 19:41:52 +0200 Subject: [PATCH 2/6] Improve the ref-ref RoaringBitmap intersection --- src/bitmap/container.rs | 17 +++++++++++++- src/bitmap/ops.rs | 23 +++++++++++++++---- src/bitmap/store.rs | 51 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 85 insertions(+), 6 deletions(-) diff --git a/src/bitmap/container.rs b/src/bitmap/container.rs index 09a5fb857..92b1b524c 100644 --- a/src/bitmap/container.rs +++ b/src/bitmap/container.rs @@ -1,4 +1,4 @@ -use std::ops::{BitAndAssign, BitOr, BitOrAssign, BitXorAssign, SubAssign}; +use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXorAssign, SubAssign}; use std::{fmt, ops::Range}; use super::store::{self, Store}; @@ -149,6 +149,21 @@ impl BitOrAssign<&Container> for Container { } } +impl BitAnd<&Container> for &Container { + type Output = Container; + + fn bitand(self, rhs: &Container) -> Container { + let store = BitAnd::bitand(&self.store, &rhs.store); + let mut container = Container { + key: self.key, + len: store.len(), + store, + }; + container.ensure_correct_store(); + container + } +} + impl BitAndAssign for Container { fn bitand_assign(&mut self, rhs: Container) { BitAndAssign::bitand_assign(&mut self.store, rhs.store); diff --git a/src/bitmap/ops.rs b/src/bitmap/ops.rs index 960bf7221..09e6fb8e6 100644 --- a/src/bitmap/ops.rs +++ b/src/bitmap/ops.rs @@ -287,11 +287,26 @@ impl BitAnd<&RoaringBitmap> for &RoaringBitmap { /// An `intersection` between two sets. fn bitand(self, rhs: &RoaringBitmap) -> RoaringBitmap { - if rhs.len() < self.len() { - BitAnd::bitand(self.clone(), rhs) - } else { - BitAnd::bitand(rhs.clone(), self) + let mut containers = Vec::new(); + let mut iter_lhs = self.containers.iter().peekable(); + let mut iter_rhs = rhs.containers.iter().peekable(); + + loop { + match (iter_lhs.peek(), iter_rhs.peek()) { + (None, None) => break, + (Some(lhs), Some(rhs)) => { + if lhs.key == rhs.key { + let container = BitAnd::bitand(*lhs, *rhs); + iter_lhs.next(); + iter_rhs.next(); + containers.push(container); + } + } + _otherwise => (), + } } + + RoaringBitmap { containers } } } diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index a98d626d0..57c7ccb47 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -1,5 +1,5 @@ use std::cmp::Ordering::{Equal, Greater, Less}; -use std::ops::{BitAndAssign, BitOr, BitOrAssign, BitXorAssign, SubAssign}; +use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXorAssign, SubAssign}; use std::{borrow::Borrow, ops::Range}; use std::{mem, slice, vec}; @@ -381,6 +381,31 @@ impl BitOrAssign<&Store> for Store { } } +impl BitAnd<&Store> for &Store { + type Output = Store; + + fn bitand(self, rhs: &Store) -> Store { + match (self, rhs) { + (&Array(ref vec1), &Array(ref vec2)) => Array(intersect_arrays(vec1, vec2)), + (&Bitmap(_), &Array(_)) => { + let mut rhs = rhs.clone(); + BitAndAssign::bitand_assign(&mut rhs, self); + rhs + } + (&Bitmap(_), &Bitmap(_)) => { + let mut lhs = self.clone(); + BitAndAssign::bitand_assign(&mut lhs, rhs); + lhs + } + (&Array(_), &Bitmap(_)) => { + let mut lhs = self.clone(); + BitAndAssign::bitand_assign(&mut lhs, rhs); + lhs + } + } + } +} + impl BitAndAssign for Store { #[allow(clippy::suspicious_op_assign_impl)] fn bitand_assign(&mut self, mut rhs: Store) { @@ -716,6 +741,30 @@ fn union_arrays(arr1: &[u16], arr2: &[u16]) -> Vec { out } +#[inline] +fn intersect_arrays(arr1: &[u16], arr2: &[u16]) -> Vec { + let mut out = Vec::new(); + + // Traverse both arrays + let mut i = 0; + let mut j = 0; + while i < arr1.len() && j < arr2.len() { + let a = unsafe { arr1.get_unchecked(i) }; + let b = unsafe { arr2.get_unchecked(j) }; + match a.cmp(&b) { + Less => i += 1, + Greater => j += 1, + Equal => { + out.push(*a); + i += 1; + j += 1; + } + } + } + + out +} + #[inline] fn key(index: u16) -> usize { index as usize / 64 From 51922c5bfdba01ed5236638d6d39fc9ff1f2c84b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 1 May 2021 12:41:17 +0200 Subject: [PATCH 3/6] Improve the ref-ref RoaringBitmap symmetric difference --- src/bitmap/container.rs | 17 ++++++++++- src/bitmap/ops.rs | 38 ++++++++++++++++++------ src/bitmap/store.rs | 64 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 107 insertions(+), 12 deletions(-) diff --git a/src/bitmap/container.rs b/src/bitmap/container.rs index 92b1b524c..f5f9d00e2 100644 --- a/src/bitmap/container.rs +++ b/src/bitmap/container.rs @@ -1,4 +1,4 @@ -use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXorAssign, SubAssign}; +use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, SubAssign}; use std::{fmt, ops::Range}; use super::store::{self, Store}; @@ -188,6 +188,21 @@ impl SubAssign<&Container> for Container { } } +impl BitXor<&Container> for &Container { + type Output = Container; + + fn bitxor(self, rhs: &Container) -> Container { + let store = BitXor::bitxor(&self.store, &rhs.store); + let mut container = Container { + key: self.key, + len: store.len(), + store, + }; + container.ensure_correct_store(); + container + } +} + impl BitXorAssign for Container { fn bitxor_assign(&mut self, rhs: Container) { BitXorAssign::bitxor_assign(&mut self.store, rhs.store); diff --git a/src/bitmap/ops.rs b/src/bitmap/ops.rs index 09e6fb8e6..8eba28039 100644 --- a/src/bitmap/ops.rs +++ b/src/bitmap/ops.rs @@ -204,10 +204,8 @@ impl BitOr<&RoaringBitmap> for &RoaringBitmap { Ordering::Less => iter_lhs.next().cloned().unwrap(), Ordering::Greater => iter_rhs.next().cloned().unwrap(), Ordering::Equal => { - let container = BitOr::bitor(*lhs, *rhs); - iter_lhs.next(); - iter_rhs.next(); - container + let (lhs, rhs) = iter_lhs.next().zip(iter_rhs.next()).unwrap(); + BitOr::bitor(lhs, rhs) } }; containers.push(container); @@ -443,11 +441,35 @@ impl BitXor<&RoaringBitmap> for &RoaringBitmap { /// A `symmetric difference` between two sets. fn bitxor(self, rhs: &RoaringBitmap) -> RoaringBitmap { - if self.len() < rhs.len() { - BitXor::bitxor(self, rhs.clone()) - } else { - BitXor::bitxor(self.clone(), rhs) + let mut containers = Vec::new(); + let mut iter_lhs = self.containers.iter().peekable(); + let mut iter_rhs = rhs.containers.iter().peekable(); + + loop { + match (iter_lhs.peek(), iter_rhs.peek()) { + (None, None) => break, + (Some(_), None) => containers.extend(iter_lhs.by_ref().cloned()), + (None, Some(_)) => containers.extend(iter_rhs.by_ref().cloned()), + (Some(lhs), Some(rhs)) => { + let container = match lhs.key.cmp(&rhs.key) { + Ordering::Equal => { + let (lhs, rhs) = iter_lhs.next().zip(iter_rhs.next()).unwrap(); + let container = BitXor::bitxor(lhs, rhs); + if container.len != 0 { + container + } else { + continue; + } + } + Ordering::Less => iter_lhs.next().cloned().unwrap(), + Ordering::Greater => iter_rhs.next().cloned().unwrap(), + }; + containers.push(container); + } + } } + + RoaringBitmap { containers } } } diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 57c7ccb47..b16b574a2 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -1,5 +1,5 @@ use std::cmp::Ordering::{Equal, Greater, Less}; -use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXorAssign, SubAssign}; +use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, SubAssign}; use std::{borrow::Borrow, ops::Range}; use std::{mem, slice, vec}; @@ -500,6 +500,31 @@ impl SubAssign<&Store> for Store { } } +impl BitXor<&Store> for &Store { + type Output = Store; + + fn bitxor(self, rhs: &Store) -> Store { + match (self, rhs) { + (&Array(ref vec1), &Array(ref vec2)) => Array(symmetric_difference_arrays(vec1, vec2)), + (&Bitmap(_), &Array(_)) => { + let mut lhs = self.clone(); + BitXorAssign::bitxor_assign(&mut lhs, rhs); + lhs + } + (&Bitmap(_), &Bitmap(_)) => { + let mut lhs = self.clone(); + BitXorAssign::bitxor_assign(&mut lhs, rhs); + lhs + } + (&Array(_), &Bitmap(_)) => { + let mut lhs = rhs.clone(); + BitXorAssign::bitxor_assign(&mut lhs, self); + lhs + } + } + } +} + impl BitXorAssign for Store { fn bitxor_assign(&mut self, mut rhs: Store) { // TODO improve this function @@ -720,11 +745,11 @@ fn union_arrays(arr1: &[u16], arr2: &[u16]) -> Vec { match a.cmp(&b) { Less => { out.push(*a); - i += 1 + i += 1; } Greater => { out.push(*b); - j += 1 + j += 1; } Equal => { out.push(*a); @@ -765,6 +790,39 @@ fn intersect_arrays(arr1: &[u16], arr2: &[u16]) -> Vec { out } +#[inline] +fn symmetric_difference_arrays(arr1: &[u16], arr2: &[u16]) -> Vec { + let mut out = Vec::new(); + + // Traverse both arrays + let mut i = 0; + let mut j = 0; + while i < arr1.len() && j < arr2.len() { + let a = unsafe { arr1.get_unchecked(i) }; + let b = unsafe { arr2.get_unchecked(j) }; + match a.cmp(&b) { + Less => { + out.push(*a); + i += 1; + } + Greater => { + out.push(*b); + j += 1; + } + Equal => { + i += 1; + j += 1; + } + } + } + + // Store remaining elements of the arrays + out.extend_from_slice(&arr1[i..]); + out.extend_from_slice(&arr2[j..]); + + out +} + #[inline] fn key(index: u16) -> usize { index as usize / 64 From db90b25c263c9d06353242cc1108c1598da48781 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 1 May 2021 16:19:17 +0200 Subject: [PATCH 4/6] Improve the ref-ref RoaringBitmap difference --- src/bitmap/container.rs | 17 ++++++++++++- src/bitmap/ops.rs | 49 ++++++++++++++++++++++++++++++------ src/bitmap/store.rs | 56 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 112 insertions(+), 10 deletions(-) diff --git a/src/bitmap/container.rs b/src/bitmap/container.rs index f5f9d00e2..8aa785c0c 100644 --- a/src/bitmap/container.rs +++ b/src/bitmap/container.rs @@ -1,4 +1,4 @@ -use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, SubAssign}; +use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign}; use std::{fmt, ops::Range}; use super::store::{self, Store}; @@ -180,6 +180,21 @@ impl BitAndAssign<&Container> for Container { } } +impl Sub<&Container> for &Container { + type Output = Container; + + fn sub(self, rhs: &Container) -> Container { + let store = Sub::sub(&self.store, &rhs.store); + let mut container = Container { + key: self.key, + len: store.len(), + store, + }; + container.ensure_correct_store(); + container + } +} + impl SubAssign<&Container> for Container { fn sub_assign(&mut self, rhs: &Container) { SubAssign::sub_assign(&mut self.store, &rhs.store); diff --git a/src/bitmap/ops.rs b/src/bitmap/ops.rs index 8eba28039..e494eed0e 100644 --- a/src/bitmap/ops.rs +++ b/src/bitmap/ops.rs @@ -294,10 +294,11 @@ impl BitAnd<&RoaringBitmap> for &RoaringBitmap { (None, None) => break, (Some(lhs), Some(rhs)) => { if lhs.key == rhs.key { - let container = BitAnd::bitand(*lhs, *rhs); - iter_lhs.next(); - iter_rhs.next(); - containers.push(container); + let (lhs, rhs) = iter_lhs.next().zip(iter_rhs.next()).unwrap(); + let container = BitAnd::bitand(lhs, rhs); + if container.len != 0 { + containers.push(container); + } } } _otherwise => (), @@ -372,7 +373,7 @@ impl Sub for &RoaringBitmap { /// A `difference` between two sets. fn sub(self, rhs: RoaringBitmap) -> RoaringBitmap { - Sub::sub(self.clone(), rhs) + Sub::sub(self, &rhs) } } @@ -381,7 +382,40 @@ impl Sub<&RoaringBitmap> for &RoaringBitmap { /// A `difference` between two sets. fn sub(self, rhs: &RoaringBitmap) -> RoaringBitmap { - Sub::sub(self.clone(), rhs) + let mut iter_lhs = self.containers.iter().peekable(); + let mut iter_rhs = rhs.containers.iter().peekable(); + let mut containers = Vec::new(); + + loop { + match (iter_lhs.peek(), iter_rhs.peek()) { + (None, None) => break, + (Some(_), None) => { + let container = iter_lhs.next().cloned().unwrap(); + containers.push(container); + } + (None, Some(_)) => { + iter_rhs.next().unwrap(); + } + (Some(lhs), Some(rhs)) => match lhs.key.cmp(&rhs.key) { + Ordering::Less => { + let container = iter_lhs.next().cloned().unwrap(); + containers.push(container); + } + Ordering::Equal => { + let (lhs, rhs) = iter_lhs.next().zip(iter_rhs.next()).unwrap(); + let container = Sub::sub(lhs, rhs); + if container.len != 0 { + containers.push(container); + } + } + Ordering::Greater => { + iter_rhs.next().unwrap(); + } + }, + } + } + + RoaringBitmap { containers } } } @@ -532,8 +566,7 @@ impl BitXorAssign<&RoaringBitmap> for RoaringBitmap { } (Some(l), Some(r)) => match l.key.cmp(&r.key) { Ordering::Equal => { - let mut lhs = left.next().unwrap(); - let rhs = right.next().unwrap(); + let (mut lhs, rhs) = left.next().zip(right.next()).unwrap(); BitXorAssign::bitxor_assign(&mut lhs, rhs); if lhs.len != 0 { self.containers.push(lhs); diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index b16b574a2..0d6d01674 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -1,5 +1,5 @@ use std::cmp::Ordering::{Equal, Greater, Less}; -use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, SubAssign}; +use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign}; use std::{borrow::Borrow, ops::Range}; use std::{mem, slice, vec}; @@ -473,6 +473,31 @@ impl BitAndAssign<&Store> for Store { } } +impl Sub<&Store> for &Store { + type Output = Store; + + fn sub(self, rhs: &Store) -> Store { + match (self, rhs) { + (&Array(ref vec1), &Array(ref vec2)) => Array(difference_arrays(vec1, vec2)), + (&Bitmap(_), &Array(_)) => { + let mut lhs = self.clone(); + BitOrAssign::bitor_assign(&mut lhs, rhs); + lhs + } + (&Bitmap(_), &Bitmap(_)) => { + let mut lhs = self.clone(); + BitOrAssign::bitor_assign(&mut lhs, rhs); + lhs + } + (&Array(_), &Bitmap(_)) => { + let mut lhs = self.clone(); + BitOrAssign::bitor_assign(&mut lhs, rhs); + lhs + } + } + } +} + impl SubAssign<&Store> for Store { fn sub_assign(&mut self, rhs: &Store) { match (self, rhs) { @@ -790,6 +815,35 @@ fn intersect_arrays(arr1: &[u16], arr2: &[u16]) -> Vec { out } +#[inline] +fn difference_arrays(arr1: &[u16], arr2: &[u16]) -> Vec { + let mut out = Vec::new(); + + // Traverse both arrays + let mut i = 0; + let mut j = 0; + while i < arr1.len() && j < arr2.len() { + let a = unsafe { arr1.get_unchecked(i) }; + let b = unsafe { arr2.get_unchecked(j) }; + match a.cmp(&b) { + Less => { + out.push(*a); + i += 1; + } + Greater => j += 1, + Equal => { + i += 1; + j += 1; + } + } + } + + // Store remaining elements of the left array + out.extend_from_slice(&arr1[i..]); + + out +} + #[inline] fn symmetric_difference_arrays(arr1: &[u16], arr2: &[u16]) -> Vec { let mut out = Vec::new(); From 4bd55001a33ad74de1bab91cbc37995dd7d62581 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 1 May 2021 17:15:48 +0200 Subject: [PATCH 5/6] Introduce a basic benchmark for the difference operation --- benches/lib.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/benches/lib.rs b/benches/lib.rs index dcde85ae6..d1f8acc45 100644 --- a/benches/lib.rs +++ b/benches/lib.rs @@ -129,6 +129,15 @@ fn union_with(c: &mut Criterion) { }); } +fn sub(c: &mut Criterion) { + c.bench_function("sub", |b| { + let bitmap1: RoaringBitmap = (1..100_000).collect(); + let bitmap2: RoaringBitmap = (10..2_000_000).collect(); + + b.iter(|| &bitmap1 - &bitmap2); + }); +} + fn xor(c: &mut Criterion) { c.bench_function("xor", |b| { let bitmap1: RoaringBitmap = (1..100).collect(); @@ -319,6 +328,7 @@ criterion_group!( intersect_with, or, union_with, + sub, xor, symmetric_deference_with, is_subset, From 93815ab88f1f9d143fa4df88ae17834d18dec228 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 2 May 2021 18:54:21 +0200 Subject: [PATCH 6/6] Implement a constant-time len method --- src/bitmap/inherent.rs | 44 +++++++-- src/bitmap/mod.rs | 1 + src/bitmap/ops.rs | 176 ++++++++++++++++++++++++++++-------- src/bitmap/serialization.rs | 7 +- 4 files changed, 178 insertions(+), 50 deletions(-) diff --git a/src/bitmap/inherent.rs b/src/bitmap/inherent.rs index 902afe1e9..04a20a157 100644 --- a/src/bitmap/inherent.rs +++ b/src/bitmap/inherent.rs @@ -16,6 +16,7 @@ impl RoaringBitmap { pub fn new() -> RoaringBitmap { RoaringBitmap { containers: Vec::new(), + len: 0, } } @@ -42,7 +43,13 @@ impl RoaringBitmap { &mut self.containers[loc] } }; - container.insert(index) + + if container.insert(index) { + self.len += 1; + true + } else { + false + } } /// Inserts a range of values from the set specific as [start..end). Returns @@ -94,7 +101,9 @@ impl RoaringBitmap { // If the end range value is in the same container, just call into // the one container. if start_container_key == end_container_key { - return self.containers[start_i].insert_range(start_index..end_index); + let inserted = self.containers[start_i].insert_range(start_index..end_index); + self.len += inserted; + return inserted; } // For the first container, insert start_index..u16::MAX, with @@ -138,6 +147,8 @@ impl RoaringBitmap { }; c.insert_range(0..end_index); + self.len += inserted; + inserted } @@ -162,12 +173,20 @@ impl RoaringBitmap { let (key, index) = util::split(value); match self.containers.last_mut() { - Some(container) if container.key == key => container.push(index), + Some(container) if container.key == key => { + if container.push(index) { + self.len += 1; + true + } else { + false + } + } Some(container) if container.key > key => false, _otherwise => { let mut container = Container::new(key); container.push(index); self.containers.push(container); + self.len += 1; true } } @@ -194,12 +213,13 @@ impl RoaringBitmap { if self.containers[loc].len == 0 { self.containers.remove(loc); } + self.len -= 1; true } else { false } } - _ => false, + Err(_) => false, } } /// Removes a range of values from the set specific as [start..end). @@ -230,7 +250,7 @@ impl RoaringBitmap { let (start_hi, start_lo) = util::split(range.start as u32); let (end_hi, end_lo) = util::split((range.end - 1) as u32); let mut index = 0; - let mut result = 0; + let mut removed = 0; while index < self.containers.len() { let key = self.containers[index].key; if key >= start_hi && key <= end_hi { @@ -246,11 +266,11 @@ impl RoaringBitmap { }; // remove container? if a == 0 && b == u32::from(u16::max_value()) + 1 { - result += self.containers[index].len; + removed += self.containers[index].len; self.containers.remove(index); continue; } else { - result += self.containers[index].remove_range(a, b); + removed += self.containers[index].remove_range(a, b); if self.containers[index].len == 0 { self.containers.remove(index); continue; @@ -259,7 +279,10 @@ impl RoaringBitmap { } index += 1; } - result + + self.len -= removed; + + removed } /// Returns `true` if this set contains the specified integer. @@ -298,6 +321,7 @@ impl RoaringBitmap { /// ``` pub fn clear(&mut self) { self.containers.clear(); + self.len = 0; } /// Returns `true` if there are no integers in this set. @@ -314,7 +338,7 @@ impl RoaringBitmap { /// assert_eq!(rb.is_empty(), false); /// ``` pub fn is_empty(&self) -> bool { - self.containers.is_empty() + self.len == 0 } /// Returns the number of distinct integers added to the set. @@ -335,7 +359,7 @@ impl RoaringBitmap { /// assert_eq!(rb.len(), 2); /// ``` pub fn len(&self) -> u64 { - self.containers.iter().map(|container| container.len).sum() + self.len } /// Returns the minimum value in the set (if the set is non-empty). diff --git a/src/bitmap/mod.rs b/src/bitmap/mod.rs index 7a3540503..00c7c5a1e 100644 --- a/src/bitmap/mod.rs +++ b/src/bitmap/mod.rs @@ -33,4 +33,5 @@ pub use self::iter::Iter; #[derive(PartialEq, Clone)] pub struct RoaringBitmap { containers: Vec, + len: u64, } diff --git a/src/bitmap/ops.rs b/src/bitmap/ops.rs index e494eed0e..59dc822f1 100644 --- a/src/bitmap/ops.rs +++ b/src/bitmap/ops.rs @@ -193,6 +193,7 @@ impl BitOr<&RoaringBitmap> for &RoaringBitmap { fn bitor(self, rhs: &RoaringBitmap) -> RoaringBitmap { let len = cmp::max(self.containers.len(), rhs.containers.len()); let mut containers = Vec::with_capacity(len); + let mut len = 0; let mut iter_lhs = self.containers.iter().peekable(); let mut iter_rhs = rhs.containers.iter().peekable(); @@ -208,15 +209,26 @@ impl BitOr<&RoaringBitmap> for &RoaringBitmap { BitOr::bitor(lhs, rhs) } }; + len += container.len; containers.push(container); } - (Some(_), None) => containers.extend(iter_lhs.by_ref().cloned()), - (None, Some(_)) => containers.extend(iter_rhs.by_ref().cloned()), + (Some(_), None) => { + iter_lhs.by_ref().cloned().for_each(|container| { + len += container.len; + containers.push(container); + }); + } + (None, Some(_)) => { + iter_rhs.by_ref().cloned().for_each(|container| { + len += container.len; + containers.push(container); + }); + } (None, None) => break, } } - RoaringBitmap { containers } + RoaringBitmap { containers, len } } } @@ -231,8 +243,16 @@ impl BitOrAssign for RoaringBitmap { for container in rhs.containers { let key = container.key; match self.containers.binary_search_by_key(&key, |c| c.key) { - Err(loc) => self.containers.insert(loc, container), - Ok(loc) => BitOrAssign::bitor_assign(&mut self.containers[loc], container), + Err(loc) => { + self.len += container.len; + self.containers.insert(loc, container); + } + Ok(loc) => { + let this_container = &mut self.containers[loc]; + self.len -= this_container.len; + BitOrAssign::bitor_assign(this_container, container); + self.len += this_container.len; + } } } } @@ -244,8 +264,16 @@ impl BitOrAssign<&RoaringBitmap> for RoaringBitmap { for container in &rhs.containers { let key = container.key; match self.containers.binary_search_by_key(&key, |c| c.key) { - Err(loc) => self.containers.insert(loc, container.clone()), - Ok(loc) => BitOrAssign::bitor_assign(&mut self.containers[loc], container), + Err(loc) => { + self.len += container.len; + self.containers.insert(loc, container.clone()); + } + Ok(loc) => { + let this_container = &mut self.containers[loc]; + self.len -= this_container.len; + BitOrAssign::bitor_assign(this_container, container); + self.len += this_container.len; + } } } } @@ -286,6 +314,8 @@ impl BitAnd<&RoaringBitmap> for &RoaringBitmap { /// An `intersection` between two sets. fn bitand(self, rhs: &RoaringBitmap) -> RoaringBitmap { let mut containers = Vec::new(); + let mut len = 0; + let mut iter_lhs = self.containers.iter().peekable(); let mut iter_rhs = rhs.containers.iter().peekable(); @@ -297,6 +327,7 @@ impl BitAnd<&RoaringBitmap> for &RoaringBitmap { let (lhs, rhs) = iter_lhs.next().zip(iter_rhs.next()).unwrap(); let container = BitAnd::bitand(lhs, rhs); if container.len != 0 { + len += container.len; containers.push(container); } } @@ -305,7 +336,7 @@ impl BitAnd<&RoaringBitmap> for &RoaringBitmap { } } - RoaringBitmap { containers } + RoaringBitmap { containers, len } } } @@ -317,34 +348,58 @@ impl BitAndAssign for RoaringBitmap { mem::swap(self, &mut rhs); } + let mut removed = 0; self.containers.retain_mut(|cont| { let key = cont.key; match rhs.containers.binary_search_by_key(&key, |c| c.key) { Ok(loc) => { let rhs_cont = &mut rhs.containers[loc]; let rhs_cont = mem::replace(rhs_cont, Container::new(rhs_cont.key)); + removed += cont.len; BitAndAssign::bitand_assign(cont, rhs_cont); - cont.len != 0 + if cont.len != 0 { + removed -= cont.len; + true + } else { + false + } + } + Err(_) => { + removed += cont.len; + false } - Err(_) => false, } - }) + }); + + self.len -= removed; } } impl BitAndAssign<&RoaringBitmap> for RoaringBitmap { /// An `intersection` between two sets. fn bitand_assign(&mut self, rhs: &RoaringBitmap) { + let mut removed = 0; self.containers.retain_mut(|cont| { let key = cont.key; match rhs.containers.binary_search_by_key(&key, |c| c.key) { Ok(loc) => { + removed += cont.len; BitAndAssign::bitand_assign(cont, &rhs.containers[loc]); - cont.len != 0 + if cont.len != 0 { + removed -= cont.len; + true + } else { + false + } + } + Err(_) => { + removed += cont.len; + false } - Err(_) => false, } - }) + }); + + self.len -= removed; } } @@ -382,15 +437,18 @@ impl Sub<&RoaringBitmap> for &RoaringBitmap { /// A `difference` between two sets. fn sub(self, rhs: &RoaringBitmap) -> RoaringBitmap { + let mut containers = Vec::new(); + let mut len = 0; + let mut iter_lhs = self.containers.iter().peekable(); let mut iter_rhs = rhs.containers.iter().peekable(); - let mut containers = Vec::new(); loop { match (iter_lhs.peek(), iter_rhs.peek()) { (None, None) => break, (Some(_), None) => { let container = iter_lhs.next().cloned().unwrap(); + len += container.len; containers.push(container); } (None, Some(_)) => { @@ -399,12 +457,14 @@ impl Sub<&RoaringBitmap> for &RoaringBitmap { (Some(lhs), Some(rhs)) => match lhs.key.cmp(&rhs.key) { Ordering::Less => { let container = iter_lhs.next().cloned().unwrap(); + len += container.len; containers.push(container); } Ordering::Equal => { let (lhs, rhs) = iter_lhs.next().zip(iter_rhs.next()).unwrap(); let container = Sub::sub(lhs, rhs); if container.len != 0 { + len += container.len; containers.push(container); } } @@ -415,7 +475,7 @@ impl Sub<&RoaringBitmap> for &RoaringBitmap { } } - RoaringBitmap { containers } + RoaringBitmap { containers, len } } } @@ -429,15 +489,24 @@ impl SubAssign for RoaringBitmap { impl SubAssign<&RoaringBitmap> for RoaringBitmap { /// A `difference` between two sets. fn sub_assign(&mut self, rhs: &RoaringBitmap) { + let mut removed = 0; self.containers.retain_mut(|cont| { match rhs.containers.binary_search_by_key(&cont.key, |c| c.key) { Ok(loc) => { + removed += cont.len; SubAssign::sub_assign(cont, &rhs.containers[loc]); - cont.len != 0 + if cont.len != 0 { + removed -= cont.len; + true + } else { + false + } } Err(_) => true, } - }) + }); + + self.len -= removed; } } @@ -476,6 +545,8 @@ impl BitXor<&RoaringBitmap> for &RoaringBitmap { /// A `symmetric difference` between two sets. fn bitxor(self, rhs: &RoaringBitmap) -> RoaringBitmap { let mut containers = Vec::new(); + let mut len = 0; + let mut iter_lhs = self.containers.iter().peekable(); let mut iter_rhs = rhs.containers.iter().peekable(); @@ -498,12 +569,13 @@ impl BitXor<&RoaringBitmap> for &RoaringBitmap { Ordering::Less => iter_lhs.next().cloned().unwrap(), Ordering::Greater => iter_rhs.next().cloned().unwrap(), }; + len += container.len; containers.push(container); } } } - RoaringBitmap { containers } + RoaringBitmap { containers, len } } } @@ -513,33 +585,46 @@ impl BitXorAssign for RoaringBitmap { let mut left = mem::take(&mut self.containers).into_iter().peekable(); let mut right = rhs.containers.into_iter().peekable(); + self.len = 0; + loop { match (left.peek(), right.peek()) { (None, None) => break, (Some(_), None) => { - self.containers.extend(left); + self.containers.reserve(left.len()); + left.for_each(|container| { + self.len += container.len; + self.containers.push(container); + }); break; } (None, Some(_)) => { - self.containers.extend(right); + self.containers.reserve(right.len()); + right.for_each(|container| { + self.len += container.len; + self.containers.push(container); + }); break; } (Some(l), Some(r)) => match l.key.cmp(&r.key) { Ordering::Equal => { - let mut lhs = left.next().unwrap(); + let mut container = left.next().unwrap(); let rhs = right.next().unwrap(); - BitXorAssign::bitxor_assign(&mut lhs, rhs); - if lhs.len != 0 { - self.containers.push(lhs); + BitXorAssign::bitxor_assign(&mut container, rhs); + if container.len != 0 { + self.len += container.len; + self.containers.push(container); } } Ordering::Less => { - let lhs = left.next().unwrap(); - self.containers.push(lhs); + let container = left.next().unwrap(); + self.len += container.len; + self.containers.push(container); } Ordering::Greater => { - let rhs = right.next().unwrap(); - self.containers.push(rhs); + let container = right.next().unwrap(); + self.len += container.len; + self.containers.push(container); } }, } @@ -553,32 +638,45 @@ impl BitXorAssign<&RoaringBitmap> for RoaringBitmap { let mut left = mem::take(&mut self.containers).into_iter().peekable(); let mut right = rhs.containers.iter().peekable(); + self.len = 0; + loop { match (left.peek(), right.peek()) { (None, None) => break, (Some(_), None) => { - self.containers.extend(left); + self.containers.reserve(left.len()); + left.for_each(|container| { + self.len += container.len; + self.containers.push(container); + }); break; } (None, Some(_)) => { - self.containers.extend(right.cloned()); + self.containers.reserve(right.len()); + right.cloned().for_each(|container| { + self.len += container.len; + self.containers.push(container); + }); break; } (Some(l), Some(r)) => match l.key.cmp(&r.key) { Ordering::Equal => { - let (mut lhs, rhs) = left.next().zip(right.next()).unwrap(); - BitXorAssign::bitxor_assign(&mut lhs, rhs); - if lhs.len != 0 { - self.containers.push(lhs); + let (mut container, rhs) = left.next().zip(right.next()).unwrap(); + BitXorAssign::bitxor_assign(&mut container, rhs); + if container.len != 0 { + self.len += container.len; + self.containers.push(container); } } Ordering::Less => { - let lhs = left.next().unwrap(); - self.containers.push(lhs); + let container = left.next().unwrap(); + self.len += container.len; + self.containers.push(container); } Ordering::Greater => { - let rhs = right.next().unwrap(); - self.containers.push(rhs.clone()); + let container = right.next().unwrap(); + self.len += container.len; + self.containers.push(container.clone()); } }, } diff --git a/src/bitmap/serialization.rs b/src/bitmap/serialization.rs index da49a7d78..3015cc9cd 100644 --- a/src/bitmap/serialization.rs +++ b/src/bitmap/serialization.rs @@ -149,6 +149,7 @@ impl RoaringBitmap { } let mut containers = Vec::with_capacity(size); + let mut total_len = 0; for _ in 0..size { let key = description_bytes.read_u16::()?; @@ -167,8 +168,12 @@ impl RoaringBitmap { }; containers.push(Container { key, len, store }); + total_len += len; } - Ok(RoaringBitmap { containers }) + Ok(RoaringBitmap { + containers, + len: total_len, + }) } }