Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ documentation = "https://docs.rs/deltalake"
repository = "https://github.com/delta-io/delta.rs"

[workspace.dependencies]
delta_kernel = { version = "0.19.0", features = [
delta_kernel = { version = "0.19.2", features = [
"arrow-57",
"default-engine-rustls",
"internal-api",
Expand Down
3 changes: 2 additions & 1 deletion crates/core/src/kernel/snapshot/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
use arrow::compute::{filter_record_batch, is_not_null};
use arrow::datatypes::SchemaRef;
use arrow_arith::aggregate::sum_array_checked;
use arrow_array::{Int64Array, StructArray};

Check warning on line 24 in crates/core/src/kernel/snapshot/mod.rs

View workflow job for this annotation

GitHub Actions / Integration Tests (LakeFS v1.48)

unused import: `StructArray`

Check warning on line 24 in crates/core/src/kernel/snapshot/mod.rs

View workflow job for this annotation

GitHub Actions / Integration Tests (HDFS)

unused import: `StructArray`

Check warning on line 24 in crates/core/src/kernel/snapshot/mod.rs

View workflow job for this annotation

GitHub Actions / build (ubuntu-latest)

unused import: `StructArray`

Check warning on line 24 in crates/core/src/kernel/snapshot/mod.rs

View workflow job for this annotation

GitHub Actions / aws-native-tls

unused import: `StructArray`
use delta_kernel::actions::{Remove, Sidecar};
use delta_kernel::engine::arrow_conversion::TryIntoArrow as _;
use delta_kernel::engine::arrow_data::ArrowEngineData;
Expand Down Expand Up @@ -270,7 +270,8 @@
log_store: &dyn LogStore,
predicate: Option<PredicateRef>,
) -> SendableRBStream {
let scan = match self.scan_builder().with_predicate(predicate).build() {
let skip_stats = self.config.skip_stats_in_file_listing;
let scan = match self.scan_builder().with_predicate(predicate).with_skip_stats(skip_stats).build() {
Ok(scan) => scan,
Err(err) => return Box::pin(once(ready(Err(err)))),
};
Expand Down
10 changes: 10 additions & 0 deletions crates/core/src/kernel/snapshot/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,16 @@
self
}

/// Skip reading file statistics from checkpoint parquet files.
///
/// When enabled, the stats column is not read from checkpoint files and data skipping
/// is disabled. This is useful when the caller handles data skipping externally or
/// doesn't need file statistics.
pub fn with_skip_stats(mut self, skip_stats: bool) -> Self {
self.inner = self.inner.with_skip_stats(skip_stats);
self
}

pub fn build(self) -> DeltaResult<Scan> {
Ok(Scan::from(self.inner.build()?))
}
Expand All @@ -89,7 +99,7 @@

impl Scan {
/// Get a shared reference to the inner [`KernelScan`].
pub(crate) fn inner(&self) -> &Arc<KernelScan> {

Check warning on line 102 in crates/core/src/kernel/snapshot/scan.rs

View workflow job for this annotation

GitHub Actions / Integration Tests (LakeFS v1.48)

method `inner` is never used

Check warning on line 102 in crates/core/src/kernel/snapshot/scan.rs

View workflow job for this annotation

GitHub Actions / Integration Tests (HDFS)

method `inner` is never used

Check warning on line 102 in crates/core/src/kernel/snapshot/scan.rs

View workflow job for this annotation

GitHub Actions / build (ubuntu-latest)

method `inner` is never used

Check warning on line 102 in crates/core/src/kernel/snapshot/scan.rs

View workflow job for this annotation

GitHub Actions / aws-native-tls

method `inner` is never used
&self.inner
}

Expand Down
8 changes: 8 additions & 0 deletions crates/core/src/table/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ pub struct DeltaTableConfig {

#[delta(skip)]
pub log_size_limiter: Option<LogSizeLimiter>,

/// HSTACK: skip stats parsing during file listing. Runtime-only (not persisted).
/// Default `true` for performance; set to `false` when stats-based pruning helps the query.
#[serde(skip_serializing, skip_deserializing)]
#[delta(skip)]
pub skip_stats_in_file_listing: bool,
}

impl Default for DeltaTableConfig {
Expand All @@ -70,6 +76,7 @@ impl Default for DeltaTableConfig {
log_batch_size: 1024,
io_runtime: None,
log_size_limiter: None,
skip_stats_in_file_listing: true,
}
}
}
Expand All @@ -80,6 +87,7 @@ impl PartialEq for DeltaTableConfig {
&& self.log_buffer_size == other.log_buffer_size
&& self.log_batch_size == other.log_batch_size
&& self.log_size_limiter == other.log_size_limiter
&& self.skip_stats_in_file_listing == other.skip_stats_in_file_listing
}
}

Expand Down
Loading