diff --git a/Cargo.lock b/Cargo.lock index c18fd2012891c..e3a0876c3a85f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2416,6 +2416,7 @@ version = "53.1.0" dependencies = [ "arrow", "arrow-data", + "arrow-ipc", "arrow-ord", "arrow-schema", "async-trait", diff --git a/benchmarks/results/main/clickbench_1.json b/benchmarks/results/main/clickbench_1.json new file mode 100644 index 0000000000000..378687e8f8237 --- /dev/null +++ b/benchmarks/results/main/clickbench_1.json @@ -0,0 +1,29 @@ +{ + "queries": [ + { + "iterations": [], + "query": "Query 36", + "start_time": 1768790964, + "success": false + } + ], + "context": { + "arguments": [ + "clickbench", + "--iterations", + "5", + "--path", + "/Users/kosiew/GitHub/datafusion/benchmarks/data/hits.parquet", + "--queries-path", + "/Users/kosiew/GitHub/datafusion/benchmarks/queries/clickbench/queries", + "-o", + "/Users/kosiew/GitHub/datafusion/benchmarks/results/main/clickbench_1.json", + "--query", + "36" + ], + "benchmark_version": "52.0.0", + "datafusion_version": "52.0.0", + "num_cpus": 10, + "start_time": 1768790964 + } +} \ No newline at end of file diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 414b8c6444869..baf8e2c297fd2 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -82,7 +82,7 @@ testcontainers-modules = { workspace = true, features = ["minio"] } # feature unification with dependencies serde_json = { workspace = true, features = ["preserve_order"] } -# Required because we pull serde_json with a feature to get consistent pg display, -# but its not directly used. +# serde_json is pulled with a feature to get +# consistent pg display, but is not directly used in this crate. [package.metadata.cargo-machete] -ignored = "serde_json" +ignored = ["serde_json"] diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml index 374fc275a06e0..cc3fbec5589de 100644 --- a/datafusion/physical-plan/Cargo.toml +++ b/datafusion/physical-plan/Cargo.toml @@ -49,6 +49,14 @@ name = "datafusion_physical_plan" [dependencies] arrow = { workspace = true } arrow-data = { workspace = true } +# Feature-only dep used only via the `arrow::ipc` re-export (there is no direct +# `use arrow_ipc` in this crate): lz4 and zstd codec support required by +# IPCStreamWriter in spill/mod.rs. These features must stay in sync with the +# SpillCompression variants in datafusion-common. Declaring the dependency here +# makes the contract explicit and local to the crate that owns spill, preventing +# silent regressions if workspace-level arrow-ipc features are narrowed (see +# #21917 for the documented regression guard around this coupling). +arrow-ipc = { workspace = true, features = ["lz4", "zstd"] } arrow-ord = { workspace = true } arrow-schema = { workspace = true } async-trait = { workspace = true } @@ -87,6 +95,11 @@ tokio = { workspace = true, features = [ "parking_lot", ] } +# `arrow-ipc` is used only through the `arrow::ipc` re-export, so cargo-machete +# reports it as unused even though this crate relies on its codec features. +[package.metadata.cargo-machete] +ignored = ["arrow-ipc"] + [[bench]] harness = false name = "partial_ordering" diff --git a/datafusion/physical-plan/src/spill/mod.rs b/datafusion/physical-plan/src/spill/mod.rs index 51e59318e2d94..f5afce50d28f5 100644 --- a/datafusion/physical-plan/src/spill/mod.rs +++ b/datafusion/physical-plan/src/spill/mod.rs @@ -290,6 +290,17 @@ struct IPCStreamWriter { impl IPCStreamWriter { /// Create new writer + /// + /// # Codec contract + /// + /// `arrow-ipc` must be compiled with the `lz4` and `zstd` features + /// (declared explicitly in `datafusion-physical-plan/Cargo.toml`). If + /// those features are absent, `try_with_compression` will return an + /// error at runtime for [`SpillCompression::Lz4Frame`] and + /// [`SpillCompression::Zstd`] variants. The Cargo dependency keeps this + /// contract local and build-visible during Cargo feature resolution, + /// rather than relying solely on workspace-level feature unification; + /// see #21917. pub fn new( path: &Path, schema: &Schema, diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml index 1159b7f3b703a..e2ffe1415a1fb 100644 --- a/datafusion/sqllogictest/Cargo.toml +++ b/datafusion/sqllogictest/Cargo.toml @@ -94,7 +94,7 @@ harness = false name = "sqllogictests" path = "bin/sqllogictests.rs" -# Required because we pull serde_json with a feature to get consistent pg display, -# but its not directly used. +# serde_json is pulled with a feature to get +# consistent pg display, but is not directly used in this crate. [package.metadata.cargo-machete] -ignored = "serde_json" +ignored = ["serde_json"]