From ec701cd1aad1beb8831b2ee662734ac1b60ff5bc Mon Sep 17 00:00:00 2001 From: Adrian Tanase Date: Sat, 14 Mar 2026 22:24:15 +0200 Subject: [PATCH 1/2] [HSTACK] - add skip_stas (backported from delta-kernel 0.20) Signed-off-by: Adrian Tanase --- Cargo.toml | 2 +- crates/core/src/kernel/snapshot/mod.rs | 2 +- crates/core/src/kernel/snapshot/scan.rs | 10 ++++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9d77d59c2..71875327f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ documentation = "https://docs.rs/deltalake" repository = "https://github.com/delta-io/delta.rs" [workspace.dependencies] -delta_kernel = { version = "0.19.0", features = [ +delta_kernel = { version = "0.19.2", features = [ "arrow-57", "default-engine-rustls", "internal-api", diff --git a/crates/core/src/kernel/snapshot/mod.rs b/crates/core/src/kernel/snapshot/mod.rs index 08eb79618..a1ed18d37 100644 --- a/crates/core/src/kernel/snapshot/mod.rs +++ b/crates/core/src/kernel/snapshot/mod.rs @@ -270,7 +270,7 @@ impl Snapshot { log_store: &dyn LogStore, predicate: Option, ) -> SendableRBStream { - let scan = match self.scan_builder().with_predicate(predicate).build() { + let scan = match self.scan_builder().with_predicate(predicate).with_skip_stats(true).build() { Ok(scan) => scan, Err(err) => return Box::pin(once(ready(Err(err)))), }; diff --git a/crates/core/src/kernel/snapshot/scan.rs b/crates/core/src/kernel/snapshot/scan.rs index 87d5b235d..1182aa3bd 100644 --- a/crates/core/src/kernel/snapshot/scan.rs +++ b/crates/core/src/kernel/snapshot/scan.rs @@ -63,6 +63,16 @@ impl ScanBuilder { self } + /// Skip reading file statistics from checkpoint parquet files. + /// + /// When enabled, the stats column is not read from checkpoint files and data skipping + /// is disabled. This is useful when the caller handles data skipping externally or + /// doesn't need file statistics. + pub fn with_skip_stats(mut self, skip_stats: bool) -> Self { + self.inner = self.inner.with_skip_stats(skip_stats); + self + } + pub fn build(self) -> DeltaResult { Ok(Scan::from(self.inner.build()?)) } From 5c03680fb3cf3142f4fc4b66d84f842d287afe71 Mon Sep 17 00:00:00 2001 From: Radu Stoenescu Date: Wed, 22 Apr 2026 14:31:51 +0300 Subject: [PATCH 2/2] feat: enable/disable skip stats through table config --- crates/core/src/kernel/snapshot/mod.rs | 3 ++- crates/core/src/table/builder.rs | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/crates/core/src/kernel/snapshot/mod.rs b/crates/core/src/kernel/snapshot/mod.rs index a1ed18d37..f1523ebf3 100644 --- a/crates/core/src/kernel/snapshot/mod.rs +++ b/crates/core/src/kernel/snapshot/mod.rs @@ -270,7 +270,8 @@ impl Snapshot { log_store: &dyn LogStore, predicate: Option, ) -> SendableRBStream { - let scan = match self.scan_builder().with_predicate(predicate).with_skip_stats(true).build() { + let skip_stats = self.config.skip_stats_in_file_listing; + let scan = match self.scan_builder().with_predicate(predicate).with_skip_stats(skip_stats).build() { Ok(scan) => scan, Err(err) => return Box::pin(once(ready(Err(err)))), }; diff --git a/crates/core/src/table/builder.rs b/crates/core/src/table/builder.rs index bdb9eec0c..1107d07c5 100644 --- a/crates/core/src/table/builder.rs +++ b/crates/core/src/table/builder.rs @@ -60,6 +60,12 @@ pub struct DeltaTableConfig { #[delta(skip)] pub log_size_limiter: Option, + + /// HSTACK: skip stats parsing during file listing. Runtime-only (not persisted). + /// Default `true` for performance; set to `false` when stats-based pruning helps the query. + #[serde(skip_serializing, skip_deserializing)] + #[delta(skip)] + pub skip_stats_in_file_listing: bool, } impl Default for DeltaTableConfig { @@ -70,6 +76,7 @@ impl Default for DeltaTableConfig { log_batch_size: 1024, io_runtime: None, log_size_limiter: None, + skip_stats_in_file_listing: true, } } } @@ -80,6 +87,7 @@ impl PartialEq for DeltaTableConfig { && self.log_buffer_size == other.log_buffer_size && self.log_batch_size == other.log_batch_size && self.log_size_limiter == other.log_size_limiter + && self.skip_stats_in_file_listing == other.skip_stats_in_file_listing } }