-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Use BooleanBufferBuilder rather than Vec<bool> in ArrowBytesViewMap #20064
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fefefc8
c01d32b
8d9b6a0
63e4ea2
9e6cc0b
c954c1a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,9 +19,10 @@ | |
| //! `StringViewArray`/`BinaryViewArray`. | ||
| use crate::binary_map::OutputType; | ||
| use ahash::RandomState; | ||
| use arrow::array::NullBufferBuilder; | ||
| use arrow::array::cast::AsArray; | ||
| use arrow::array::{Array, ArrayRef, BinaryViewArray, ByteView, make_view}; | ||
| use arrow::buffer::{Buffer, NullBuffer, ScalarBuffer}; | ||
| use arrow::buffer::{Buffer, ScalarBuffer}; | ||
| use arrow::datatypes::{BinaryViewType, ByteViewType, DataType, StringViewType}; | ||
| use datafusion_common::hash_utils::create_hashes; | ||
| use datafusion_common::utils::proxy::{HashTableAllocExt, VecAllocExt}; | ||
|
|
@@ -134,7 +135,7 @@ where | |
| /// Completed buffers containing string data | ||
| completed: Vec<Buffer>, | ||
| /// Tracks null values (true = null) | ||
| nulls: Vec<bool>, | ||
| nulls: NullBufferBuilder, | ||
|
|
||
| /// random state used to generate hashes | ||
| random_state: RandomState, | ||
|
|
@@ -161,7 +162,7 @@ where | |
| views: Vec::new(), | ||
| in_progress: Vec::new(), | ||
| completed: Vec::new(), | ||
| nulls: Vec::new(), | ||
| nulls: NullBufferBuilder::new(0), | ||
| random_state: RandomState::new(), | ||
| hashes_buffer: vec![], | ||
| null: None, | ||
|
|
@@ -281,7 +282,7 @@ where | |
| let payload = make_payload_fn(None); | ||
| let null_index = self.views.len(); | ||
| self.views.push(0); | ||
| self.nulls.push(true); | ||
| self.nulls.append_null(); | ||
| self.null = Some((payload, null_index)); | ||
| payload | ||
| }; | ||
|
|
@@ -371,16 +372,7 @@ where | |
| } | ||
|
|
||
| // Build null buffer if we have any nulls | ||
| let null_buffer = if self.nulls.iter().any(|&is_null| is_null) { | ||
| Some(NullBuffer::from( | ||
| self.nulls | ||
| .iter() | ||
| .map(|&is_null| !is_null) | ||
| .collect::<Vec<_>>(), | ||
| )) | ||
| } else { | ||
| None | ||
| }; | ||
| let null_buffer = self.nulls.finish(); | ||
|
|
||
| let views = ScalarBuffer::from(self.views); | ||
| let array = | ||
|
|
@@ -420,7 +412,7 @@ where | |
| }; | ||
|
|
||
| self.views.push(view); | ||
| self.nulls.push(false); | ||
| self.nulls.append_non_null(); | ||
| view | ||
| } | ||
|
|
||
|
|
@@ -445,7 +437,7 @@ where | |
| let views_size = self.views.len() * size_of::<u128>(); | ||
| let in_progress_size = self.in_progress.capacity(); | ||
| let completed_size: usize = self.completed.iter().map(|b| b.len()).sum(); | ||
| let nulls_size = self.nulls.len(); | ||
| let nulls_size = self.nulls.len() / 8; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should use https://docs.rs/arrow/latest/arrow/array/struct.NullBufferBuilder.html#method.allocated_size as it is meant to track the allocated size (not the used size)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
|
||
| self.map_size | ||
| + views_size | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
❤️