diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index acee7b7a84b02..0c667b17c3fd9 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -31,12 +31,12 @@ use arrow::array::{ cast::AsArray, }; use arrow::array::{ - ArrowPrimitiveType, Datum, GenericListArray, Int32Array, Int64Array, + ArrowPrimitiveType, BooleanArray, Datum, GenericListArray, Int32Array, Int64Array, MutableArrayData, PrimitiveArray, make_array, }; use arrow::array::{LargeListViewArray, ListViewArray}; use arrow::buffer::{OffsetBuffer, ScalarBuffer}; -use arrow::compute::kernels::cmp::neq; +use arrow::compute::kernels::cmp::eq; use arrow::compute::kernels::length::length; use arrow::compute::{SortColumn, SortOptions, partition}; use arrow::datatypes::{ @@ -1129,6 +1129,7 @@ pub fn remove_list_null_values(array: &ArrayRef) -> Result { } } +/// Create a new list array where all the nulls point to empty lists fn truncate_list_nulls( list: &GenericListArray, ) -> Result> { @@ -1142,17 +1143,18 @@ fn truncate_list_nulls( &Int64Array::new_scalar(0) }; - let not_empty = neq(&lengths, zero)?; - let null_and_non_empty = &!nulls.inner() & not_empty.values(); + let (mut valid_or_empty, _nulls) = eq(&lengths, zero)?.into_parts(); + valid_or_empty |= nulls.inner(); + let valid_or_empty = BooleanArray::from(valid_or_empty); - if null_and_non_empty.count_set_bits() > 0 { + if valid_or_empty.has_false() { let array_data = list.values().to_data(); let offsets = list.offsets(); let capacity = offsets[offsets.len() - 1] - offsets[0]; let mut mutable_array_data = MutableArrayData::new(vec![&array_data], false, capacity.as_usize()); - let valid_or_empty = nulls.inner() | &!not_empty.values(); + let (valid_or_empty, _nulls) = valid_or_empty.into_parts(); for (start, end) in valid_or_empty.set_slices() { mutable_array_data.extend(