From 96f4e7828029720589bc9fddb496513f628b5efb Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 5 Apr 2026 00:11:23 +0300 Subject: [PATCH 01/11] feat(encoding): add numeric compression levels api - add CompressionLevel::Level(i32) and canonical from_level mapping - port level table, source-size hinting, and matcher sizing behavior - align CLI parsing/range validation and update docs/tests --- README.md | 7 +- cli/src/main.rs | 113 +++++++-- zstd/src/encoding/frame_compressor.rs | 24 +- zstd/src/encoding/match_generator.rs | 334 +++++++++++++++++++------ zstd/src/encoding/mod.rs | 61 ++++- zstd/src/encoding/streaming_encoder.rs | 68 ++++- zstd/src/tests/roundtrip_integrity.rs | 322 ++++++++++++++++++++++++ 7 files changed, 811 insertions(+), 118 deletions(-) diff --git a/README.md b/README.md index 3bb97f6a..3f5e783b 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ This is a **maintained fork** of [KillingSpark/zstd-rs](https://github.com/Killi **Fork goals:** - Dictionary compression improvements (critical for per-label trained dictionaries in LSM-tree) - Performance parity with C zstd for decompression (currently 1.4-3.5x slower) -- Additional compression levels (Fastest, Default, Better, and Best are all implemented) +- Full numeric compression levels (0 = default, 1–22 plus negative ultra-fast, C zstd compatible) - No FFI — pure `cargo build`, no cmake/system libraries (ADR-013 compliance) **Upstream relationship:** We periodically sync with upstream but maintain an independent development trajectory focused on CoordiNode requirements. @@ -46,6 +46,8 @@ Complete RFC 8878 implementation. Performance: ~1.4-3.5x slower than C zstd depe - [x] Default (roughly level 3) - [x] Better (roughly level 7) - [x] Best (roughly level 11) +- [x] Numeric levels `0` (default), `1–22`, and negative ultra-fast levels via `CompressionLevel::from_level(n)` (C zstd compatible numbering) +- [x] Negative levels for ultra-fast compression - [x] Checksums - [x] Frame Content Size — `FrameCompressor` writes FCS automatically; `StreamingEncoder` requires `set_pledged_content_size()` before first write - [x] Dictionary compression @@ -67,7 +69,10 @@ Performance tracking lives in [BENCHMARKS.md](BENCHMARKS.md). The suite compares use structured_zstd::encoding::{compress, compress_to_vec, CompressionLevel}; let data: &[u8] = b"hello world"; +// Named level let compressed = compress_to_vec(data, CompressionLevel::Fastest); +// Numeric level (C zstd compatible: 0 = default, 1-22, negative for ultra-fast) +let compressed = compress_to_vec(data, CompressionLevel::from_level(7)); ``` ```rust,no_run diff --git a/cli/src/main.rs b/cli/src/main.rs index 1562da3c..239074d6 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -34,23 +34,40 @@ enum Commands { /// Where the compressed file is written /// [default: .zst] output_file: Option, - /// How thoroughly the file should be compressed. A higher level will take - /// more time to compress but result in a smaller file, and vice versa. + /// Compression level (higher = smaller, slower). /// - /// - 0: Uncompressed - /// - 1: Fastest - /// - 2: Default - /// - 3: Better (lazy2, ~zstd level 7) - /// - 4: Best (deep lazy2, ~zstd level 11) + /// Numeric levels follow the zstd convention where 0 means + /// "use the default level" (currently 3). + /// + /// - 0: Default (same as 3) + /// - 1: Fastest (fast hash, ~zstd level 1) + /// - 3: Default (dfast, ~zstd level 3) + /// - 7: Better (lazy2, ~zstd level 7) + /// - 11: Best (deep lazy2, ~zstd level 11) + /// - Negative: ultra-fast modes (less compression, more speed) + /// - 12-22: progressively higher ratio (capped at lazy2 backend) + /// + /// Use --store to write an uncompressed zstd frame. #[arg( short, long, - value_name = "COMPRESSION_LEVEL", - default_value_t = 2, - value_parser = clap::value_parser!(u8).range(0..=4), - verbatim_doc_comment + value_name = "LEVEL", + default_value_t = CompressionLevel::DEFAULT_LEVEL, + // clap's ranged parser expects i64 bounds here (RangedI64ValueParser), + // even though the target value type is i32. + value_parser = clap::value_parser!(i32).range( + (CompressionLevel::MIN_LEVEL as i64)..=(CompressionLevel::MAX_LEVEL as i64) + ), + verbatim_doc_comment, + allow_hyphen_values = true, )] - level: u8, + level: i32, + /// Write an uncompressed zstd frame (no compression). + /// + /// When set, compression itself ignores `--level` and writes a raw + /// zstd frame. The CLI still validates `--level` range at parse time. + #[arg(long)] + store: bool, }, Decompress { /// .zst archive to decompress @@ -81,9 +98,10 @@ fn main() -> color_eyre::Result<()> { input_file, output_file, level, + store, } => { let output_file = output_file.unwrap_or_else(|| add_extension(&input_file, ".zst")); - compress(input_file, output_file, level)?; + compress(input_file, output_file, level, store)?; } Commands::Decompress { input_file, @@ -101,15 +119,12 @@ fn main() -> color_eyre::Result<()> { Ok(()) } -fn compress(input: PathBuf, output: PathBuf, level: u8) -> color_eyre::Result<()> { +fn compress(input: PathBuf, output: PathBuf, level: i32, store: bool) -> color_eyre::Result<()> { info!("compressing {input:?} to {output:?}"); - let compression_level: structured_zstd::encoding::CompressionLevel = match level { - 0 => CompressionLevel::Uncompressed, - 1 => CompressionLevel::Fastest, - 2 => CompressionLevel::Default, - 3 => CompressionLevel::Better, - 4 => CompressionLevel::Best, - _ => return Err(eyre!("unsupported compression level: {level}")), + let compression_level = if store { + CompressionLevel::Uncompressed + } else { + CompressionLevel::from_level(level) }; ensure_distinct_paths(&input, &output)?; ensure_regular_output_destination(&output)?; @@ -128,6 +143,9 @@ fn compress(input: PathBuf, output: PathBuf, level: u8) -> color_eyre::Result<() let compression_result: color_eyre::Result = (|| { let mut encoder = structured_zstd::encoding::StreamingEncoder::new(temporary_output, compression_level); + encoder + .set_source_size_hint(source_size as u64) + .wrap_err("failed to configure source size hint")?; std::io::copy(&mut encoder_input, &mut encoder).wrap_err("streaming compression failed")?; encoder.finish().wrap_err("failed to finalize zstd frame") })(); @@ -402,7 +420,50 @@ mod tests { #[test] fn cli_rejects_unsupported_compression_level_at_parse_time() { - let parse = Cli::try_parse_from(["structured-zstd", "compress", "in.bin", "--level", "5"]); + let too_high = + (structured_zstd::encoding::CompressionLevel::MAX_LEVEL as i64 + 1).to_string(); + let parse = Cli::try_parse_from([ + "structured-zstd", + "compress", + "in.bin", + "--level", + too_high.as_str(), + ]); + assert!(parse.is_err()); + } + + #[test] + fn cli_accepts_negative_compression_level() { + let parse = Cli::try_parse_from(["structured-zstd", "compress", "in.bin", "--level", "-3"]); + assert!(parse.is_ok()); + } + + #[test] + fn cli_rejects_too_negative_compression_level() { + let too_low = + (structured_zstd::encoding::CompressionLevel::MIN_LEVEL as i64 - 1).to_string(); + let parse = Cli::try_parse_from([ + "structured-zstd", + "compress", + "in.bin", + "--level", + too_low.as_str(), + ]); + assert!(parse.is_err()); + } + + #[test] + fn cli_store_still_validates_level_range_at_parse_time() { + let too_high = + (structured_zstd::encoding::CompressionLevel::MAX_LEVEL as i64 + 1).to_string(); + let parse = Cli::try_parse_from([ + "structured-zstd", + "compress", + "in.bin", + "--store", + "--level", + too_high.as_str(), + ]); assert!(parse.is_err()); } @@ -415,7 +476,7 @@ mod tests { let input = std::env::temp_dir().join(format!("structured-zstd-cli-alias-{unique}.txt")); fs::write(&input, b"streaming-cli-alias-check").unwrap(); - let err = compress(input.clone(), input.clone(), 2).unwrap_err(); + let err = compress(input.clone(), input.clone(), 3, false).unwrap_err(); let message = format!("{err:#}"); assert!( message.contains("input and output"), @@ -434,7 +495,7 @@ mod tests { fs::write(&input, b"streaming-cli-hardlink-check").unwrap(); fs::hard_link(&input, &output).unwrap(); - let err = compress(input.clone(), output.clone(), 2).unwrap_err(); + let err = compress(input.clone(), output.clone(), 3, false).unwrap_err(); let message = format!("{err:#}"); assert!( message.contains("input and output"), @@ -455,7 +516,7 @@ mod tests { let output = std::env::temp_dir().join(format!("structured-zstd-cli-missing-output-{unique}.zst")); - let err = compress(missing_input, output.clone(), 2).unwrap_err(); + let err = compress(missing_input, output.clone(), 3, false).unwrap_err(); let message = format!("{err:#}"); assert!( message.contains("failed to open input file"), @@ -473,7 +534,7 @@ mod tests { let output = dir.join("existing-dir"); fs::create_dir(&output).unwrap(); - let err = compress(input, output.clone(), 2).unwrap_err(); + let err = compress(input, output.clone(), 3, false).unwrap_err(); let message = format!("{err:#}"); assert!( message.contains("not a regular file"), diff --git a/zstd/src/encoding/frame_compressor.rs b/zstd/src/encoding/frame_compressor.rs index 42c941b9..bd5ad612 100644 --- a/zstd/src/encoding/frame_compressor.rs +++ b/zstd/src/encoding/frame_compressor.rs @@ -157,6 +157,15 @@ impl FrameCompressor { self.compressed_data.replace(compressed_data) } + /// Provide a hint about the total uncompressed size for the next frame. + /// + /// When set, the encoder selects smaller hash tables and windows for + /// small inputs, matching the C zstd source-size-class behavior. + /// Must be called before [`compress`](Self::compress). + pub fn set_source_size_hint(&mut self, size: u64) { + self.state.matcher.set_source_size_hint(size); + } + /// Compress the uncompressed data from the provided source as one Zstd frame and write it to the provided drain /// /// This will repeatedly call [Read::read] on the source to fill up blocks until the source returns 0 on the read call. @@ -274,7 +283,8 @@ impl FrameCompressor { CompressionLevel::Fastest | CompressionLevel::Default | CompressionLevel::Better - | CompressionLevel::Best => compress_block_encoded( + | CompressionLevel::Best + | CompressionLevel::Level(_) => compress_block_encoded( &mut self.state, last_block, uncompressed_data, @@ -476,7 +486,7 @@ mod tests { data.len() as u64, "FCS mismatch for len={} level={:?}", data.len(), - level as u8, + level, ); // Confirm the FCS field is actually present in the header // (not just the decoder returning 0 for absent FCS). @@ -485,7 +495,7 @@ mod tests { 0, "FCS field must be present for len={} level={:?}", data.len(), - level as u8, + level, ); // Verify C zstd can decompress let mut decoded = Vec::new(); @@ -883,8 +893,10 @@ mod tests { crate::decoding::Dictionary::from_raw_content(dict_id, b"abcdefgh".to_vec()) .expect("raw dictionary should be valid"); - let payload = b"abcdefgh".repeat(512); - let matcher = MatchGeneratorDriver::new(8, 1); + // Payload must exceed the encoder's advertised window (128 KiB for + // Fastest) so the test actually exercises cross-window-boundary behavior. + let payload = b"abcdefgh".repeat(128 * 1024 / 8 + 64); + let matcher = MatchGeneratorDriver::new(1024, 1); let mut no_dict_output = Vec::new(); let mut no_dict_compressor = @@ -900,7 +912,7 @@ mod tests { .expect("window size should be present"); let mut output = Vec::new(); - let matcher = MatchGeneratorDriver::new(8, 1); + let matcher = MatchGeneratorDriver::new(1024, 1); let mut compressor = FrameCompressor::new_with_matcher(matcher, super::CompressionLevel::Fastest); compressor diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index 7eaae2a5..e303160c 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -23,8 +23,6 @@ const DFAST_TARGET_LEN: usize = 48; // measurements show we can shrink them without regressing acceptance tests. const DFAST_HASH_BITS: usize = 20; const DFAST_SEARCH_DEPTH: usize = 4; -const DFAST_DEFAULT_WINDOW_SIZE: usize = 1 << 22; -const BETTER_DEFAULT_WINDOW_SIZE: usize = 1 << 23; const DFAST_EMPTY_SLOT: usize = usize::MAX; const HC_HASH_LOG: usize = 20; @@ -36,7 +34,6 @@ const HC_TARGET_LEN: usize = 48; // that can never collide with any valid position, even at the 4 GiB boundary. const HC_EMPTY: u32 = 0; -const BEST_DEFAULT_WINDOW_SIZE: usize = 1 << 24; // Maximum search depth across all HC-based levels. Used to size the // fixed-length candidate array returned by chain_candidates(). const MAX_HC_SEARCH_DEPTH: usize = 32; @@ -66,6 +63,141 @@ const BEST_HC_CONFIG: HcConfig = HcConfig { target_len: 128, }; +/// Resolved tuning parameters for a compression level. +#[derive(Copy, Clone)] +struct LevelParams { + backend: MatcherBackend, + window_log: u8, + hash_fill_step: usize, + lazy_depth: u8, + hc: HcConfig, +} + +fn dfast_hash_bits_for_window(max_window_size: usize) -> usize { + let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize; + window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS) +} + +/// Parameter table for numeric compression levels 1–22. +/// +/// Each entry maps a zstd compression level to the best-available matcher +/// backend and tuning knobs. Levels that require strategies this crate does +/// not implement (greedy, btopt, btultra) are approximated with the closest +/// available backend. +/// +/// Index 0 = level 1, index 21 = level 22. +#[rustfmt::skip] +const LEVEL_TABLE: [LevelParams; 22] = [ + // Lvl Strategy wlog step lazy HC config + // --- -------------- ---- ---- ---- ------------------------------------------ + /* 1 */ LevelParams { backend: MatcherBackend::Simple, window_log: 17, hash_fill_step: 3, lazy_depth: 0, hc: HC_CONFIG }, + /* 2 */ LevelParams { backend: MatcherBackend::Dfast, window_log: 19, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG }, + /* 3 */ LevelParams { backend: MatcherBackend::Dfast, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG }, + /* 4 */ LevelParams { backend: MatcherBackend::Dfast, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG }, + /* 5 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4, target_len: 32 } }, + /* 6 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8, target_len: 48 } }, + /* 7 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 48 } }, + /* 8 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 64 } }, + /* 9 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 64 } }, + /*10 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 96 } }, + /*11 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: BEST_HC_CONFIG }, + /*12 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 128 } }, + /*13 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 160 } }, + /*14 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 192 } }, + /*15 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 192 } }, + /*16 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 256 } }, + /*17 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } }, + /*18 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } }, + /*19 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } }, + /*20 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } }, + /*21 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } }, + /*22 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } }, +]; + +/// Smallest window_log the encoder will use regardless of source size. +const MIN_WINDOW_LOG: u8 = 10; + +/// Adjust level parameters for a known source size. +/// +/// This derives a cap from `ceil(log2(src_size))`, then clamps it to +/// [`MIN_WINDOW_LOG`]. A zero-byte size hint is treated as +/// [`MIN_WINDOW_LOG`]. This keeps tables bounded for +/// small inputs while preserving the encoder's minimum supported window. +/// For the HC backend, `hash_log` and `chain_log` are reduced +/// proportionally. +fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams { + // Derive a source-size-based cap from ceil(log2(src_size)), then + // clamp to MIN_WINDOW_LOG. For inputs smaller than 1 KiB (or zero) we keep the + // 1 KiB minimum window instead of shrinking below that floor. + let src_log = if src_size == 0 { + MIN_WINDOW_LOG + } else { + (64 - (src_size - 1).leading_zeros()) as u8 // ceil_log2 + }; + let src_log = src_log.max(MIN_WINDOW_LOG); + if src_log < params.window_log { + params.window_log = src_log; + } + // For HC backend: also cap hash_log and chain_log so tables are + // proportional to the source, avoiding multi-MB allocations for + // tiny inputs. + if params.backend == MatcherBackend::HashChain { + if (src_log + 2) < params.hc.hash_log as u8 { + params.hc.hash_log = (src_log + 2) as usize; + } + if (src_log + 1) < params.hc.chain_log as u8 { + params.hc.chain_log = (src_log + 1) as usize; + } + } + params +} + +/// Resolve a [`CompressionLevel`] to internal tuning parameters, +/// optionally adjusted for a known source size. +fn resolve_level_params(level: CompressionLevel, source_size: Option) -> LevelParams { + let params = match level { + CompressionLevel::Uncompressed => LevelParams { + backend: MatcherBackend::Simple, + window_log: 17, + hash_fill_step: 1, + lazy_depth: 0, + hc: HC_CONFIG, + }, + CompressionLevel::Fastest => LEVEL_TABLE[0], + CompressionLevel::Default => LEVEL_TABLE[2], + CompressionLevel::Better => LEVEL_TABLE[6], + CompressionLevel::Best => LEVEL_TABLE[10], + CompressionLevel::Level(n) => { + if n > 0 { + let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1; + LEVEL_TABLE[idx] + } else if n == 0 { + // Level 0 = default, matching C zstd semantics. + LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1] + } else { + // Negative levels: ultra-fast with the Simple backend. + // Acceleration grows with magnitude, expressed as larger + // hash_fill_step (fewer positions indexed). + let acceleration = + (n.saturating_abs() as usize).min((-CompressionLevel::MIN_LEVEL) as usize); + let step = (acceleration + 3).min(128); + LevelParams { + backend: MatcherBackend::Simple, + window_log: 17, + hash_fill_step: step, + lazy_depth: 0, + hc: HC_CONFIG, + } + } + } + }; + if let Some(size) = source_size { + adjust_params_for_source_size(params, size) + } else { + params + } +} + #[derive(Copy, Clone, Debug, PartialEq, Eq)] enum MatcherBackend { Simple, @@ -83,18 +215,21 @@ pub struct MatchGeneratorDriver { active_backend: MatcherBackend, slice_size: usize, base_slice_size: usize, - base_window_size: usize, // Frame header window size must stay at the configured live-window budget. // Dictionary retention expands internal matcher capacity only. reported_window_size: usize, // Tracks currently retained bytes that originated from primed dictionary // history and have not been evicted yet. dictionary_retained_budget: usize, + // Source size hint for next frame (set via set_source_size_hint, cleared on reset). + source_size_hint: Option, } impl MatchGeneratorDriver { - /// slice_size says how big the slices should be that are allocated to work with - /// max_slices_in_window says how many slices should at most be used while looking for matches + /// `slice_size` sets the base block allocation size used for matcher input chunks. + /// `max_slices_in_window` determines the initial window capacity at construction + /// time. Effective window sizing is recalculated on every [`reset`](Self::reset) + /// from the resolved compression level and optional source-size hint. pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self { let max_window_size = max_slices_in_window * slice_size; Self { @@ -106,45 +241,14 @@ impl MatchGeneratorDriver { active_backend: MatcherBackend::Simple, slice_size, base_slice_size: slice_size, - base_window_size: max_window_size, reported_window_size: max_window_size, dictionary_retained_budget: 0, + source_size_hint: None, } } - fn level_config(&self, level: CompressionLevel) -> (MatcherBackend, usize, usize, usize) { - match level { - CompressionLevel::Uncompressed => ( - MatcherBackend::Simple, - self.base_slice_size, - self.base_window_size, - 1, - ), - CompressionLevel::Fastest => ( - MatcherBackend::Simple, - self.base_slice_size, - self.base_window_size, - FAST_HASH_FILL_STEP, - ), - CompressionLevel::Default => ( - MatcherBackend::Dfast, - self.base_slice_size, - DFAST_DEFAULT_WINDOW_SIZE, - 1, - ), - CompressionLevel::Better => ( - MatcherBackend::HashChain, - self.base_slice_size, - BETTER_DEFAULT_WINDOW_SIZE, - 1, - ), - CompressionLevel::Best => ( - MatcherBackend::HashChain, - self.base_slice_size, - BEST_DEFAULT_WINDOW_SIZE, - 1, - ), - } + fn level_params(level: CompressionLevel, source_size: Option) -> LevelParams { + resolve_level_params(level, source_size) } fn dfast_matcher(&self) -> &DfastMatchGenerator { @@ -247,10 +351,16 @@ impl Matcher for MatchGeneratorDriver { true } + fn set_source_size_hint(&mut self, size: u64) { + self.source_size_hint = Some(size); + } + fn reset(&mut self, level: CompressionLevel) { - let (backend, slice_size, max_window_size, hash_fill_step) = self.level_config(level); + let hint = self.source_size_hint.take(); + let params = Self::level_params(level, hint); + let max_window_size = 1usize << params.window_log; self.dictionary_retained_budget = 0; - if self.active_backend != backend { + if self.active_backend != params.backend { match self.active_backend { MatcherBackend::Simple => { let vec_pool = &mut self.vec_pool; @@ -288,15 +398,15 @@ impl Matcher for MatchGeneratorDriver { } } - self.active_backend = backend; - self.slice_size = slice_size; + self.active_backend = params.backend; + self.slice_size = self.base_slice_size.min(max_window_size); self.reported_window_size = max_window_size; match self.active_backend { MatcherBackend::Simple => { let vec_pool = &mut self.vec_pool; let suffix_pool = &mut self.suffix_pool; self.match_generator.max_window_size = max_window_size; - self.match_generator.hash_fill_step = hash_fill_step; + self.match_generator.hash_fill_step = params.hash_fill_step; self.match_generator.reset(|mut data, mut suffixes| { data.resize(data.capacity(), 0); vec_pool.push(data); @@ -310,7 +420,8 @@ impl Matcher for MatchGeneratorDriver { .dfast_match_generator .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size)); dfast.max_window_size = max_window_size; - dfast.lazy_depth = 1; + dfast.lazy_depth = params.lazy_depth; + dfast.set_hash_bits(dfast_hash_bits_for_window(max_window_size)); let vec_pool = &mut self.vec_pool; dfast.reset(|mut data| { data.resize(data.capacity(), 0); @@ -322,11 +433,8 @@ impl Matcher for MatchGeneratorDriver { .hc_match_generator .get_or_insert_with(|| HcMatchGenerator::new(max_window_size)); hc.max_window_size = max_window_size; - hc.lazy_depth = 2; - match level { - CompressionLevel::Best => hc.configure(BEST_HC_CONFIG), - _ => hc.configure(HC_CONFIG), - } + hc.lazy_depth = params.lazy_depth; + hc.configure(params.hc); let vec_pool = &mut self.vec_pool; hc.reset(|mut data| { data.resize(data.capacity(), 0); @@ -427,11 +535,19 @@ impl Matcher for MatchGeneratorDriver { } fn get_next_space(&mut self) -> Vec { - self.vec_pool.pop().unwrap_or_else(|| { - let mut space = alloc::vec![0; self.slice_size]; - space.resize(space.capacity(), 0); - space - }) + if let Some(mut space) = self.vec_pool.pop() { + if space.len() > self.slice_size { + space.truncate(self.slice_size); + } + if space.capacity() > self.slice_size { + space.shrink_to(self.slice_size); + } + if space.len() < self.slice_size { + space.resize(self.slice_size, 0); + } + return space; + } + alloc::vec![0; self.slice_size] } fn get_last_space(&mut self) -> &[u8] { @@ -1002,6 +1118,7 @@ struct DfastMatchGenerator { offset_hist: [u32; 3], short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>, long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>, + hash_bits: usize, // Lazy match lookahead depth (internal tuning parameter). lazy_depth: u8, } @@ -1025,10 +1142,20 @@ impl DfastMatchGenerator { offset_hist: [1, 4, 8], short_hash: Vec::new(), long_hash: Vec::new(), + hash_bits: DFAST_HASH_BITS, lazy_depth: 1, } } + fn set_hash_bits(&mut self, bits: usize) { + let clamped = bits.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS); + if self.hash_bits != clamped { + self.hash_bits = clamped; + self.short_hash = Vec::new(); + self.long_hash = Vec::new(); + } + } + fn reset(&mut self, mut reuse_space: impl FnMut(Vec)) { self.window_size = 0; self.history.clear(); @@ -1138,14 +1265,13 @@ impl DfastMatchGenerator { } fn ensure_hash_tables(&mut self) { - if self.short_hash.is_empty() { + let table_len = 1usize << self.hash_bits; + if self.short_hash.len() != table_len { // This is intentionally lazy so Fastest/Uncompressed never pay the // ~dfast-level memory cost. The current size tracks the issue's // zstd level-3 style parameters rather than a generic low-memory preset. - self.short_hash = - alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS]; - self.long_hash = - alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS]; + self.short_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len]; + self.long_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len]; } } @@ -1341,7 +1467,7 @@ impl DfastMatchGenerator { let idx = pos - self.history_abs_start; let short = { let concat = self.live_history(); - (idx + 4 <= concat.len()).then(|| Self::hash4(&concat[idx..])) + (idx + 4 <= concat.len()).then(|| self.hash4(&concat[idx..])) }; if let Some(short) = short { let bucket = &mut self.short_hash[short]; @@ -1353,7 +1479,7 @@ impl DfastMatchGenerator { let long = { let concat = self.live_history(); - (idx + 8 <= concat.len()).then(|| Self::hash8(&concat[idx..])) + (idx + 8 <= concat.len()).then(|| self.hash8(&concat[idx..])) }; if let Some(long) = long { let bucket = &mut self.long_hash[long]; @@ -1368,7 +1494,7 @@ impl DfastMatchGenerator { let concat = self.live_history(); let idx = pos - self.history_abs_start; (idx + 4 <= concat.len()) - .then(|| self.short_hash[Self::hash4(&concat[idx..])]) + .then(|| self.short_hash[self.hash4(&concat[idx..])]) .into_iter() .flatten() .filter(|candidate| *candidate != DFAST_EMPTY_SLOT) @@ -1378,25 +1504,25 @@ impl DfastMatchGenerator { let concat = self.live_history(); let idx = pos - self.history_abs_start; (idx + 8 <= concat.len()) - .then(|| self.long_hash[Self::hash8(&concat[idx..])]) + .then(|| self.long_hash[self.hash8(&concat[idx..])]) .into_iter() .flatten() .filter(|candidate| *candidate != DFAST_EMPTY_SLOT) } - fn hash4(data: &[u8]) -> usize { + fn hash4(&self, data: &[u8]) -> usize { let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64; - Self::hash_bits(value) + self.hash_bits(value) } - fn hash8(data: &[u8]) -> usize { + fn hash8(&self, data: &[u8]) -> usize { let value = u64::from_le_bytes(data[..8].try_into().unwrap()); - Self::hash_bits(value) + self.hash_bits(value) } - fn hash_bits(value: u64) -> usize { + fn hash_bits(&self, value: u64) -> usize { const PRIME: u64 = 0x9E37_79B1_85EB_CA87; - ((value.wrapping_mul(PRIME)) >> (64 - DFAST_HASH_BITS)) as usize + ((value.wrapping_mul(PRIME)) >> (64 - self.hash_bits)) as usize } } @@ -1975,7 +2101,7 @@ fn dfast_matches_roundtrip_multi_block_pattern() { let first_block: Vec = pattern.iter().copied().cycle().take(128 * 1024).collect(); let second_block: Vec = pattern.iter().copied().cycle().take(128 * 1024).collect(); - let mut matcher = DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE); + let mut matcher = DfastMatchGenerator::new(1 << 22); let replay_sequence = |decoded: &mut Vec, seq: Sequence<'_>| match seq { Sequence::Literals { literals } => decoded.extend_from_slice(literals), Sequence::Triple { @@ -2009,7 +2135,7 @@ fn driver_switches_backends_and_initializes_dfast_via_reset() { let mut driver = MatchGeneratorDriver::new(32, 2); driver.reset(CompressionLevel::Default); - assert_eq!(driver.window_size(), DFAST_DEFAULT_WINDOW_SIZE as u64); + assert_eq!(driver.window_size(), (1u64 << 22)); let mut first = driver.get_next_space(); first[..12].copy_from_slice(b"abcabcabcabc"); @@ -2042,7 +2168,51 @@ fn driver_switches_backends_and_initializes_dfast_via_reset() { assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc"); driver.reset(CompressionLevel::Fastest); - assert_eq!(driver.window_size(), 64); + assert_eq!(driver.window_size(), (1u64 << 17)); +} + +#[test] +fn driver_small_source_hint_shrinks_dfast_hash_tables() { + let mut driver = MatchGeneratorDriver::new(32, 2); + + driver.reset(CompressionLevel::Default); + let mut space = driver.get_next_space(); + space[..12].copy_from_slice(b"abcabcabcabc"); + space.truncate(12); + driver.commit_space(space); + driver.skip_matching(); + let full_tables = driver.dfast_matcher().short_hash.len(); + assert_eq!(full_tables, 1 << DFAST_HASH_BITS); + + driver.set_source_size_hint(1024); + driver.reset(CompressionLevel::Default); + let mut space = driver.get_next_space(); + space[..12].copy_from_slice(b"xyzxyzxyzxyz"); + space.truncate(12); + driver.commit_space(space); + driver.skip_matching(); + let hinted_tables = driver.dfast_matcher().short_hash.len(); + + assert_eq!(driver.window_size(), 1 << MIN_WINDOW_LOG); + assert!( + hinted_tables < full_tables, + "tiny source hint should reduce dfast table footprint" + ); +} + +#[test] +fn source_hint_clamps_driver_slice_size_to_window() { + let mut driver = MatchGeneratorDriver::new(128 * 1024, 2); + driver.set_source_size_hint(1024); + driver.reset(CompressionLevel::Default); + + let window = driver.window_size() as usize; + assert_eq!(window, 1024); + assert_eq!(driver.slice_size, window); + + let space = driver.get_next_space(); + assert_eq!(space.len(), window); + driver.commit_space(space); } #[test] @@ -2051,7 +2221,7 @@ fn driver_best_to_fastest_releases_oversized_hc_tables() { // Initialize at Best — allocates large HC tables (2M hash, 1M chain). driver.reset(CompressionLevel::Best); - assert_eq!(driver.window_size(), BEST_DEFAULT_WINDOW_SIZE as u64); + assert_eq!(driver.window_size(), (1u64 << 24)); // Feed data so tables are actually allocated via ensure_tables(). let mut space = driver.get_next_space(); @@ -2062,7 +2232,7 @@ fn driver_best_to_fastest_releases_oversized_hc_tables() { // Switch to Fastest — must release HC tables. driver.reset(CompressionLevel::Fastest); - assert_eq!(driver.window_size(), 64); + assert_eq!(driver.window_size(), (1u64 << 17)); // HC matcher should have empty tables after backend switch. let hc = driver.hc_match_generator.as_ref().unwrap(); @@ -2082,7 +2252,7 @@ fn driver_better_to_best_resizes_hc_tables() { // Initialize at Better — allocates small HC tables (1M hash, 512K chain). driver.reset(CompressionLevel::Better); - assert_eq!(driver.window_size(), BETTER_DEFAULT_WINDOW_SIZE as u64); + assert_eq!(driver.window_size(), (1u64 << 23)); let mut space = driver.get_next_space(); space[..12].copy_from_slice(b"abcabcabcabc"); @@ -2096,7 +2266,7 @@ fn driver_better_to_best_resizes_hc_tables() { // Switch to Best — must resize to larger tables. driver.reset(CompressionLevel::Best); - assert_eq!(driver.window_size(), BEST_DEFAULT_WINDOW_SIZE as u64); + assert_eq!(driver.window_size(), (1u64 << 24)); // Feed data to trigger ensure_tables with new sizes. let mut space = driver.get_next_space(); @@ -2300,6 +2470,10 @@ fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() { fn prime_with_dictionary_budget_shrinks_after_simple_eviction() { let mut driver = MatchGeneratorDriver::new(8, 1); driver.reset(CompressionLevel::Fastest); + // Use a small live window so dictionary-primed slices are evicted + // quickly and budget retirement can be asserted deterministically. + driver.match_generator.max_window_size = 8; + driver.reported_window_size = 8; let base_window = driver.match_generator.max_window_size; driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]); @@ -2443,7 +2617,7 @@ fn fastest_reset_uses_interleaved_hash_fill_step() { // happened and the lazy_depth is configured correctly. driver.reset(CompressionLevel::Better); assert_eq!(driver.active_backend, MatcherBackend::HashChain); - assert_eq!(driver.window_size(), BETTER_DEFAULT_WINDOW_SIZE as u64); + assert_eq!(driver.window_size(), (1u64 << 23)); assert_eq!(driver.hc_matcher().lazy_depth, 2); } @@ -2723,7 +2897,7 @@ fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() { #[test] fn dfast_inserts_tail_positions_for_next_block_matching() { - let mut matcher = DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE); + let mut matcher = DfastMatchGenerator::new(1 << 22); matcher.add_data(b"012345bcdea".to_vec(), |_| {}); let mut history = Vec::new(); diff --git a/zstd/src/encoding/mod.rs b/zstd/src/encoding/mod.rs index d075b9bc..976d28fd 100644 --- a/zstd/src/encoding/mod.rs +++ b/zstd/src/encoding/mod.rs @@ -45,7 +45,7 @@ pub fn compress_to_vec(source: R, level: CompressionLevel) -> Vec { /// The compression mode used impacts the speed of compression, /// and resulting compression ratios. Faster compression will result /// in worse compression ratios, and vice versa. -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub enum CompressionLevel { /// This level does not compress the data at all, and simply wraps /// it in a Zstandard frame. @@ -88,6 +88,54 @@ pub enum CompressionLevel { /// Prefer [`CompressionLevel::Default`] for very large single-frame /// streams until table rebasing is implemented. Best, + /// Numeric compression level. + /// + /// Levels 1–22 correspond to the C zstd level numbering. Higher values + /// produce smaller output at the cost of more CPU time. Negative values + /// select ultra-fast modes that trade ratio for speed. Level 0 is + /// treated as [`DEFAULT_LEVEL`](Self::DEFAULT_LEVEL), matching C zstd + /// semantics. + /// + /// Named variants map to specific numeric levels: + /// [`Fastest`](Self::Fastest) = 1, [`Default`](Self::Default) = 3, + /// [`Better`](Self::Better) = 7, [`Best`](Self::Best) = 11. + /// + /// Levels above 11 use progressively larger windows and deeper search + /// with the lazy2 hash-chain backend. Levels that require strategies + /// this crate has not yet implemented (btopt, btultra) are approximated + /// with the closest available matcher. + /// + /// Semver note: this variant was added after the initial enum shape and + /// is a breaking API change for downstream crates that exhaustively + /// `match` on [`CompressionLevel`] without a wildcard arm. + Level(i32), +} + +impl CompressionLevel { + /// The minimum supported numeric compression level (ultra-fast mode). + pub const MIN_LEVEL: i32 = -131072; + /// The maximum supported numeric compression level. + pub const MAX_LEVEL: i32 = 22; + /// The default numeric compression level (equivalent to [`Default`](Self::Default)). + pub const DEFAULT_LEVEL: i32 = 3; + + /// Create a compression level from a numeric value. + /// + /// Returns named variants for canonical levels (`0`/`3`, `1`, `7`, `11`) + /// and [`Level`](Self::Level) for all other values. + /// + /// With the default matcher backend (`MatchGeneratorDriver`), values + /// outside [`MIN_LEVEL`](Self::MIN_LEVEL)..=[`MAX_LEVEL`](Self::MAX_LEVEL) + /// are silently clamped during built-in level parameter resolution. + pub const fn from_level(level: i32) -> Self { + match level { + 0 | Self::DEFAULT_LEVEL => Self::Default, + 1 => Self::Fastest, + 7 => Self::Better, + 11 => Self::Best, + _ => Self::Level(level), + } + } } /// Trait used by the encoder that users can use to extend the matching facilities with their own algorithm @@ -118,6 +166,17 @@ pub trait Matcher { fn start_matching(&mut self, handle_sequence: impl for<'a> FnMut(Sequence<'a>)); /// Reset this matcher so it can be used for the next new frame fn reset(&mut self, level: CompressionLevel); + /// Provide a hint about the total uncompressed size for the next frame. + /// + /// Implementations may use this to select smaller hash tables and windows + /// for small inputs, matching the C zstd source-size-class behavior. + /// Called before [`reset`](Self::reset) when the caller knows the input + /// size (e.g. from pledged content size or file metadata). + /// + /// The default implementation is a no-op for custom matchers and + /// test stubs. The built-in runtime matcher (`MatchGeneratorDriver`) + /// overrides this hook and applies the hint during level resolution. + fn set_source_size_hint(&mut self, _size: u64) {} /// Prime matcher state with dictionary history before compressing the next frame. /// Default implementation is a no-op for custom matchers that do not support this. fn prime_with_dictionary(&mut self, _dict_content: &[u8], _offset_hist: [u32; 3]) {} diff --git a/zstd/src/encoding/streaming_encoder.rs b/zstd/src/encoding/streaming_encoder.rs index 6f2e3f0a..afd3db60 100644 --- a/zstd/src/encoding/streaming_encoder.rs +++ b/zstd/src/encoding/streaming_encoder.rs @@ -81,6 +81,8 @@ impl StreamingEncoder { /// /// When set, the frame header will include a `Frame_Content_Size` field. /// This enables decoders to pre-allocate output buffers. + /// The pledged size is also forwarded as a source-size hint to the + /// matcher so small inputs can use smaller matching tables. /// /// Must be called **before** the first [`write`](Write::write) call; /// calling it after the frame header has already been emitted returns an @@ -93,6 +95,26 @@ impl StreamingEncoder { )); } self.pledged_content_size = Some(size); + // Also use pledged size as source-size hint so the matcher + // can select smaller tables for small inputs. + self.state.matcher.set_source_size_hint(size); + Ok(()) + } + + /// Provide a hint about the total uncompressed size for the next frame. + /// + /// Unlike [`set_pledged_content_size`](Self::set_pledged_content_size), + /// this does **not** enforce that exactly `size` bytes are written; it + /// only optimises matcher parameters for small inputs. Must be called + /// before the first [`write`](Write::write). + pub fn set_source_size_hint(&mut self, size: u64) -> Result<(), Error> { + self.ensure_open()?; + if self.frame_started { + return Err(invalid_input_error( + "source size hint must be set before the first write", + )); + } + self.state.matcher.set_source_size_hint(size); Ok(()) } @@ -246,8 +268,9 @@ impl StreamingEncoder { CompressionLevel::Fastest | CompressionLevel::Default | CompressionLevel::Better - | CompressionLevel::Best => self.state.matcher.get_next_space(), - _ => Vec::new(), + | CompressionLevel::Best + | CompressionLevel::Level(_) => self.state.matcher.get_next_space(), + CompressionLevel::Uncompressed => Vec::new(), }; space.clear(); if space.capacity() > block_capacity { @@ -303,7 +326,8 @@ impl StreamingEncoder { | CompressionLevel::Fastest | CompressionLevel::Default | CompressionLevel::Better - | CompressionLevel::Best => Ok(()), + | CompressionLevel::Best + | CompressionLevel::Level(_) => Ok(()), } } @@ -338,7 +362,8 @@ impl StreamingEncoder { CompressionLevel::Fastest | CompressionLevel::Default | CompressionLevel::Better - | CompressionLevel::Best => { + | CompressionLevel::Best + | CompressionLevel::Level(_) => { let block = raw_block.take().expect("raw block missing"); debug_assert!(!block.is_empty(), "empty blocks handled above"); compress_block_encoded(&mut self.state, last_block, block, &mut encoded); @@ -991,6 +1016,41 @@ mod tests { assert_eq!(err.kind(), ErrorKind::InvalidInput); } + #[test] + fn source_size_hint_directly_reduces_window_header() { + let payload = b"streaming-source-size-hint".repeat(64); + + let mut no_hint = StreamingEncoder::new(Vec::new(), CompressionLevel::from_level(11)); + no_hint.write_all(payload.as_slice()).unwrap(); + let no_hint_frame = no_hint.finish().unwrap(); + let no_hint_header = crate::decoding::frame::read_frame_header(no_hint_frame.as_slice()) + .unwrap() + .0; + let no_hint_window = no_hint_header.window_size().unwrap(); + + let mut with_hint = StreamingEncoder::new(Vec::new(), CompressionLevel::from_level(11)); + with_hint + .set_source_size_hint(payload.len() as u64) + .unwrap(); + with_hint.write_all(payload.as_slice()).unwrap(); + let with_hint_frame = with_hint.finish().unwrap(); + let with_hint_header = + crate::decoding::frame::read_frame_header(with_hint_frame.as_slice()) + .unwrap() + .0; + let with_hint_window = with_hint_header.window_size().unwrap(); + + assert!( + with_hint_window <= no_hint_window, + "source size hint should not increase advertised window" + ); + + let mut decoder = StreamingDecoder::new(with_hint_frame.as_slice()).unwrap(); + let mut decoded = Vec::new(); + decoder.read_to_end(&mut decoded).unwrap(); + assert_eq!(decoded, payload); + } + #[cfg(feature = "std")] #[test] fn pledged_content_size_c_zstd_compatible() { diff --git a/zstd/src/tests/roundtrip_integrity.rs b/zstd/src/tests/roundtrip_integrity.rs index 29e9d8f3..e2e39a0a 100644 --- a/zstd/src/tests/roundtrip_integrity.rs +++ b/zstd/src/tests/roundtrip_integrity.rs @@ -544,3 +544,325 @@ fn roundtrip_best_level_streaming_multi_block() { let data = generate_compressible(5555, 512 * 1024); assert_eq!(roundtrip_best_streaming(&data), data); } + +// ─── Numeric compression levels (CompressionLevel::Level) ───────── + +/// Canonical numeric levels should map to named enum variants for pattern/equality checks. +#[test] +fn numeric_levels_map_to_named_variants() { + assert!(matches!( + CompressionLevel::from_level(0), + CompressionLevel::Default + )); + assert!(matches!( + CompressionLevel::from_level(3), + CompressionLevel::Default + )); + assert!(matches!( + CompressionLevel::from_level(1), + CompressionLevel::Fastest + )); + assert!(matches!( + CompressionLevel::from_level(7), + CompressionLevel::Better + )); + assert!(matches!( + CompressionLevel::from_level(11), + CompressionLevel::Best + )); + assert!(matches!( + CompressionLevel::from_level(2), + CompressionLevel::Level(2) + )); +} + +/// `from_level(3)` and direct `Level(3)` must be equivalent to `Default`. +#[test] +fn numeric_level_3_matches_default() { + let data = generate_compressible(9000, 64 * 1024); + let default = compress_to_vec(&data[..], CompressionLevel::Default); + let from_level_3 = compress_to_vec(&data[..], CompressionLevel::from_level(3)); + let direct_level_3 = compress_to_vec(&data[..], CompressionLevel::Level(3)); + assert_eq!( + default, from_level_3, + "from_level(3) output must be identical to Default" + ); + assert_eq!( + default, direct_level_3, + "direct Level(3) output must be identical to Default" + ); +} + +/// `from_level(1)` must be equivalent to `Fastest`. +#[test] +fn numeric_level_1_matches_fastest() { + let data = generate_compressible(9001, 64 * 1024); + let fastest = compress_to_vec(&data[..], CompressionLevel::Fastest); + let level_1 = compress_to_vec(&data[..], CompressionLevel::from_level(1)); + assert_eq!( + fastest, level_1, + "Level(1) output must be identical to Fastest" + ); +} + +/// `from_level(7)` must be equivalent to `Better`. +#[test] +fn numeric_level_7_matches_better() { + let data = generate_compressible(9002, 64 * 1024); + let better = compress_to_vec(&data[..], CompressionLevel::Better); + let level_7 = compress_to_vec(&data[..], CompressionLevel::from_level(7)); + assert_eq!( + better, level_7, + "Level(7) output must be identical to Better" + ); +} + +/// `from_level(11)` must be equivalent to `Best`. +#[test] +fn numeric_level_11_matches_best() { + let data = generate_compressible(9003, 64 * 1024); + let best = compress_to_vec(&data[..], CompressionLevel::Best); + let level_11 = compress_to_vec(&data[..], CompressionLevel::from_level(11)); + assert_eq!(best, level_11, "Level(11) output must be identical to Best"); +} + +/// `from_level(0)` and direct `Level(0)` map to default compression (level 3). +#[test] +fn numeric_level_0_is_default_compression() { + let data = generate_compressible(9004, 64 * 1024); + let from_level_0 = compress_to_vec(&data[..], CompressionLevel::from_level(0)); + let direct_level_0 = compress_to_vec(&data[..], CompressionLevel::Level(0)); + let level_3 = compress_to_vec(&data[..], CompressionLevel::from_level(3)); + assert_eq!( + from_level_0, level_3, + "from_level(0) should map to default (level 3)" + ); + assert_eq!( + direct_level_0, level_3, + "direct Level(0) should map to default (level 3)" + ); +} + +/// All 22 positive levels produce valid output that round-trips correctly. +#[test] +fn all_22_levels_roundtrip() { + let data = generate_compressible(9100, 32 * 1024); + for level in 1..=22 { + let compressed = { + let mut compressor = FrameCompressor::new(CompressionLevel::from_level(level)); + compressor.set_source_size_hint(data.len() as u64); + compressor.set_source(data.as_slice()); + let mut out = Vec::new(); + compressor.set_drain(&mut out); + compressor.compress(); + out + }; + let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap(); + let mut result = Vec::new(); + decoder.read_to_end(&mut result).unwrap(); + assert_eq!(data, result, "Roundtrip failed for Level({level})"); + } +} + +/// Negative levels produce valid compressed output (ultra-fast mode). +#[test] +fn negative_levels_roundtrip() { + let data = generate_compressible(9200, 32 * 1024); + for level in [-1, -2, -3, -5] { + let result = roundtrip_at_level(&data, CompressionLevel::from_level(level)); + assert_eq!(data, result, "Roundtrip failed for Level({level})"); + } +} + +/// Sampled numeric levels should produce valid compressed output and preserve +/// data through a full compress/decompress roundtrip. +#[test] +fn sampled_levels_roundtrip_validity() { + let data = generate_compressible(9300, 64 * 1024); + for level in [1, 3, 7, 11] { + let compressed = compress_to_vec(&data[..], CompressionLevel::from_level(level)); + assert!( + !compressed.is_empty(), + "Level {level} produced empty compressed output" + ); + let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap(); + let mut result = Vec::new(); + decoder.read_to_end(&mut result).unwrap(); + assert_eq!( + data, result, + "Roundtrip failed for sampled compression level {level}" + ); + } +} + +/// Numeric levels work with the streaming encoder. +#[test] +fn numeric_level_streaming_roundtrip() { + use crate::encoding::StreamingEncoder; + use crate::io::Write; + + let data = generate_compressible(9400, 200 * 1024); + for level in [1, 3, 5, 7, 9, 11, -1] { + let mut encoder = StreamingEncoder::new(Vec::new(), CompressionLevel::from_level(level)); + for chunk in data.chunks(4096) { + encoder.write_all(chunk).unwrap(); + } + let compressed = encoder.finish().unwrap(); + let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap(); + let mut result = Vec::new(); + decoder.read_to_end(&mut result).unwrap(); + assert_eq!( + data, result, + "Streaming roundtrip failed for Level({level})" + ); + } +} + +/// Values beyond MAX_LEVEL are clamped — they must still produce valid output. +#[test] +fn out_of_range_level_clamped() { + let data = generate_compressible(9500, 16 * 1024); + let result = roundtrip_at_level(&data, CompressionLevel::from_level(100)); + assert_eq!(data, result, "Clamped Level(100) must still roundtrip"); + let result = roundtrip_at_level(&data, CompressionLevel::from_level(-200000)); + assert_eq!(data, result, "Clamped Level(-200000) must still roundtrip"); + let result = roundtrip_at_level(&data, CompressionLevel::from_level(i32::MIN)); + assert_eq!(data, result, "Clamped Level(i32::MIN) must still roundtrip"); +} + +// ─── Source-size-aware selection ─────────────────────────────────── + +/// Small input with source size hint should produce valid output. +#[test] +fn source_size_hint_small_input_roundtrip() { + let data = generate_compressible(9600, 4 * 1024); // 4 KiB + let compressed = { + let mut compressor = FrameCompressor::new(CompressionLevel::from_level(7)); + compressor.set_source_size_hint(data.len() as u64); + compressor.set_source(data.as_slice()); + let mut out = Vec::new(); + compressor.set_drain(&mut out); + compressor.compress(); + out + }; + let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap(); + let mut result = Vec::new(); + decoder.read_to_end(&mut result).unwrap(); + assert_eq!(data, result, "Small input with size hint must roundtrip"); +} + +/// Source size hint should reduce compressed output overhead for small inputs +/// by avoiding oversized windows/tables. +#[test] +fn source_size_hint_reduces_window_for_small_input() { + let data = generate_compressible(9601, 1024); // 1 KiB + // Without hint: uses full level-11 window (16 MiB) + let no_hint = compress_to_vec(&data[..], CompressionLevel::from_level(11)); + let no_hint_header = crate::decoding::frame::read_frame_header(no_hint.as_slice()) + .unwrap() + .0 + .window_size() + .unwrap(); + // With hint: should use smaller window + let with_hint = { + let mut compressor = FrameCompressor::new(CompressionLevel::from_level(11)); + compressor.set_source_size_hint(data.len() as u64); + compressor.set_source(data.as_slice()); + let mut out = Vec::new(); + compressor.set_drain(&mut out); + compressor.compress(); + out + }; + let with_hint_header = crate::decoding::frame::read_frame_header(with_hint.as_slice()) + .unwrap() + .0 + .window_size() + .unwrap(); + // Both must decompress correctly + let mut decoder = StreamingDecoder::new(no_hint.as_slice()).unwrap(); + let mut r = Vec::new(); + decoder.read_to_end(&mut r).unwrap(); + assert_eq!(data, r); + + let mut decoder = StreamingDecoder::new(with_hint.as_slice()).unwrap(); + let mut r = Vec::new(); + decoder.read_to_end(&mut r).unwrap(); + assert_eq!(data, r); + + assert!( + with_hint_header <= no_hint_header, + "size hint should not increase frame window size: hint={} no_hint={}", + with_hint_header, + no_hint_header + ); + assert!( + with_hint_header < (16 * 1024 * 1024), + "hinted level-11 frame should advertise smaller-than-default window, got {}", + with_hint_header + ); +} + +/// Streaming encoder with pledged content size automatically uses source size hint. +#[test] +fn streaming_pledged_size_uses_source_hint() { + use crate::encoding::StreamingEncoder; + use crate::io::Write; + + let data = generate_compressible(9602, 2 * 1024); // 2 KiB + let no_hint = compress_to_vec(&data[..], CompressionLevel::from_level(11)); + let no_hint_header = crate::decoding::frame::read_frame_header(no_hint.as_slice()) + .unwrap() + .0 + .window_size() + .unwrap(); + + let mut encoder = StreamingEncoder::new(Vec::new(), CompressionLevel::from_level(11)); + encoder.set_pledged_content_size(data.len() as u64).unwrap(); + encoder.write_all(&data).unwrap(); + let compressed = encoder.finish().unwrap(); + let hinted_header = crate::decoding::frame::read_frame_header(compressed.as_slice()) + .unwrap() + .0 + .window_size() + .unwrap(); + + let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap(); + let mut result = Vec::new(); + decoder.read_to_end(&mut result).unwrap(); + assert_eq!(data, result, "Pledged-size streaming must roundtrip"); + assert!( + hinted_header <= no_hint_header, + "pledged source hint should not increase window size: hinted={} no_hint={}", + hinted_header, + no_hint_header + ); + assert!( + hinted_header < (16 * 1024 * 1024), + "pledged source hint should reduce level-11 advertised window, got {}", + hinted_header + ); +} + +/// All 22 levels produce valid output for a tiny (256 byte) input with size hint. +#[test] +fn all_levels_tiny_input_with_hint() { + let data = generate_compressible(9603, 256); + for level in 1..=22 { + let compressed = { + let mut compressor = FrameCompressor::new(CompressionLevel::from_level(level)); + compressor.set_source_size_hint(data.len() as u64); + compressor.set_source(data.as_slice()); + let mut out = Vec::new(); + compressor.set_drain(&mut out); + compressor.compress(); + out + }; + let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap(); + let mut result = Vec::new(); + decoder.read_to_end(&mut result).unwrap(); + assert_eq!( + data, result, + "Tiny input with hint failed for Level({level})" + ); + } +} From 5ec3b5a7bf49719d877527ca11a25c560aeef985 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 5 Apr 2026 00:20:59 +0300 Subject: [PATCH 02/11] fix(encoding): clamp source-size hint to dictionary history --- zstd/src/encoding/frame_compressor.rs | 51 ++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/zstd/src/encoding/frame_compressor.rs b/zstd/src/encoding/frame_compressor.rs index bd5ad612..18eb9dfe 100644 --- a/zstd/src/encoding/frame_compressor.rs +++ b/zstd/src/encoding/frame_compressor.rs @@ -41,6 +41,7 @@ pub struct FrameCompressor { compression_level: CompressionLevel, dictionary: Option, dictionary_entropy_cache: Option, + source_size_hint: Option, state: CompressState, #[cfg(feature = "hash")] hasher: XxHash64, @@ -111,6 +112,7 @@ impl FrameCompressor { compression_level, dictionary: None, dictionary_entropy_cache: None, + source_size_hint: None, state: CompressState { matcher: MatchGeneratorDriver::new(1024 * 128, 1), last_huff_table: None, @@ -131,6 +133,7 @@ impl FrameCompressor { compressed_data: None, dictionary: None, dictionary_entropy_cache: None, + source_size_hint: None, state: CompressState { matcher, last_huff_table: None, @@ -163,7 +166,7 @@ impl FrameCompressor { /// small inputs, matching the C zstd source-size-class behavior. /// Must be called before [`compress`](Self::compress). pub fn set_source_size_hint(&mut self, size: u64) { - self.state.matcher.set_source_size_hint(size); + self.source_size_hint = Some(size); } /// Compress the uncompressed data from the provided source as one Zstd frame and write it to the provided drain @@ -177,12 +180,25 @@ impl FrameCompressor { /// To avoid endlessly encoding from a potentially endless source (like a network socket) you can use the /// [Read::take] function pub fn compress(&mut self) { - // Clearing buffers to allow re-using of the compressor - self.state.matcher.reset(self.compression_level); - self.state.offset_hist = [1, 4, 8]; let use_dictionary_state = !matches!(self.compression_level, CompressionLevel::Uncompressed) && self.state.matcher.supports_dictionary_priming(); + if let Some(size_hint) = self.source_size_hint.take() { + let effective_hint = if use_dictionary_state { + let dict_floor = self + .dictionary + .as_ref() + .map(|dict| dict.dict_content.len() as u64) + .unwrap_or(0); + size_hint.max(dict_floor) + } else { + size_hint + }; + self.state.matcher.set_source_size_hint(effective_hint); + } + // Clearing buffers to allow re-using of the compressor + self.state.matcher.reset(self.compression_level); + self.state.offset_hist = [1, 4, 8]; let cached_entropy = if use_dictionary_state { self.dictionary_entropy_cache.as_ref() } else { @@ -943,6 +959,33 @@ mod tests { assert_eq!(decoded, payload); } + #[test] + fn source_size_hint_does_not_shrink_window_below_dictionary_history() { + let dict_id = 0xABCD_0004; + let dict_content = b"abcd".repeat(1024); // 4 KiB dictionary history + let dict_len = dict_content.len() as u64; + let dict = crate::decoding::Dictionary::from_raw_content(dict_id, dict_content).unwrap(); + let payload = b"abcdabcdabcdabcd".repeat(128); + + let mut output = Vec::new(); + let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest); + compressor.set_dictionary(dict).unwrap(); + compressor.set_source_size_hint(1); // would clamp to MIN_WINDOW_LOG without dictionary guard + compressor.set_source(payload.as_slice()); + compressor.set_drain(&mut output); + compressor.compress(); + + let (frame_header, _) = crate::decoding::frame::read_frame_header(output.as_slice()) + .expect("encoded frame should have a header"); + let advertised_window = frame_header + .window_size() + .expect("window size should be present"); + assert!( + advertised_window >= dict_len, + "window_size ({advertised_window}) must cover dictionary history ({dict_len})", + ); + } + #[test] fn custom_matcher_without_dictionary_priming_does_not_advertise_dict_id() { let dict_id = 0xABCD_0003; From a2ec7471d3b0750bca356ddff834634c99bc91ed Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 5 Apr 2026 00:47:42 +0300 Subject: [PATCH 03/11] fix(encoding): avoid shrinking pooled matcher buffers --- zstd/src/encoding/match_generator.rs | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index e303160c..89afd452 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -539,9 +539,6 @@ impl Matcher for MatchGeneratorDriver { if space.len() > self.slice_size { space.truncate(self.slice_size); } - if space.capacity() > self.slice_size { - space.shrink_to(self.slice_size); - } if space.len() < self.slice_size { space.resize(self.slice_size, 0); } @@ -2215,6 +2212,27 @@ fn source_hint_clamps_driver_slice_size_to_window() { driver.commit_space(space); } +#[test] +fn pooled_space_keeps_capacity_when_slice_size_shrinks() { + let mut driver = MatchGeneratorDriver::new(128 * 1024, 2); + driver.reset(CompressionLevel::Default); + + let large = driver.get_next_space(); + let large_capacity = large.capacity(); + assert!(large_capacity >= 128 * 1024); + driver.commit_space(large); + + driver.set_source_size_hint(1024); + driver.reset(CompressionLevel::Default); + + let small = driver.get_next_space(); + assert_eq!(small.len(), 1024); + assert!( + small.capacity() >= large_capacity, + "pooled buffer capacity should be preserved to avoid shrink/grow churn" + ); +} + #[test] fn driver_best_to_fastest_releases_oversized_hc_tables() { let mut driver = MatchGeneratorDriver::new(32, 2); From f051085cf38566b06bc3b3d43b9e7a9c78f8cb5d Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 5 Apr 2026 01:50:01 +0300 Subject: [PATCH 04/11] docs(readme): clarify numeric-level compatibility scope --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3f5e783b..39b1e91e 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ This is a **maintained fork** of [KillingSpark/zstd-rs](https://github.com/Killi **Fork goals:** - Dictionary compression improvements (critical for per-label trained dictionaries in LSM-tree) - Performance parity with C zstd for decompression (currently 1.4-3.5x slower) -- Full numeric compression levels (0 = default, 1–22 plus negative ultra-fast, C zstd compatible) +- Full numeric compression levels (0 = default, 1–22 plus negative ultra-fast, with C zstd-compatible level numbering/API; not exact strategy/ratio parity at every level) - No FFI — pure `cargo build`, no cmake/system libraries (ADR-013 compliance) **Upstream relationship:** We periodically sync with upstream but maintain an independent development trajectory focused on CoordiNode requirements. From f0593eecd4bc0825ce91381b7e02a8bb2369f8ed Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 5 Apr 2026 02:55:54 +0300 Subject: [PATCH 05/11] fix(encoding): tighten level hint behavior and docs --- README.md | 1 - cli/src/main.rs | 4 +- zstd/src/encoding/match_generator.rs | 60 +++++++++++++++++++++++++--- zstd/src/encoding/mod.rs | 10 +++++ 4 files changed, 67 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 39b1e91e..8dcad430 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,6 @@ Complete RFC 8878 implementation. Performance: ~1.4-3.5x slower than C zstd depe - [x] Better (roughly level 7) - [x] Best (roughly level 11) - [x] Numeric levels `0` (default), `1–22`, and negative ultra-fast levels via `CompressionLevel::from_level(n)` (C zstd compatible numbering) -- [x] Negative levels for ultra-fast compression - [x] Checksums - [x] Frame Content Size — `FrameCompressor` writes FCS automatically; `StreamingEncoder` requires `set_pledged_content_size()` before first write - [x] Dictionary compression diff --git a/cli/src/main.rs b/cli/src/main.rs index 239074d6..2838bfe8 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -144,8 +144,8 @@ fn compress(input: PathBuf, output: PathBuf, level: i32, store: bool) -> color_e let mut encoder = structured_zstd::encoding::StreamingEncoder::new(temporary_output, compression_level); encoder - .set_source_size_hint(source_size as u64) - .wrap_err("failed to configure source size hint")?; + .set_pledged_content_size(source_size as u64) + .wrap_err("failed to configure pledged content size")?; std::io::copy(&mut encoder_input, &mut encoder).wrap_err("streaming compression failed")?; encoder.finish().wrap_err("failed to finalize zstd frame") })(); diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index 89afd452..bb6ba0f3 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -357,6 +357,7 @@ impl Matcher for MatchGeneratorDriver { fn reset(&mut self, level: CompressionLevel) { let hint = self.source_size_hint.take(); + let hinted = hint.is_some(); let params = Self::level_params(level, hint); let max_window_size = 1usize << params.window_log; self.dictionary_retained_budget = 0; @@ -421,7 +422,11 @@ impl Matcher for MatchGeneratorDriver { .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size)); dfast.max_window_size = max_window_size; dfast.lazy_depth = params.lazy_depth; - dfast.set_hash_bits(dfast_hash_bits_for_window(max_window_size)); + dfast.set_hash_bits(if hinted { + dfast_hash_bits_for_window(max_window_size) + } else { + DFAST_HASH_BITS + }); let vec_pool = &mut self.vec_pool; dfast.reset(|mut data| { data.resize(data.capacity(), 0); @@ -560,10 +565,10 @@ impl Matcher for MatchGeneratorDriver { MatcherBackend::Simple => { let vec_pool = &mut self.vec_pool; let mut evicted_bytes = 0usize; - let suffixes = self - .suffix_pool - .pop() - .unwrap_or_else(|| SuffixStore::with_capacity(space.len())); + let suffixes = match self.suffix_pool.pop() { + Some(store) if store.slots.len() >= space.len() => store, + _ => SuffixStore::with_capacity(space.len()), + }; let suffix_pool = &mut self.suffix_pool; self.match_generator .add_data(space, suffixes, |mut data, mut suffixes| { @@ -2197,6 +2202,51 @@ fn driver_small_source_hint_shrinks_dfast_hash_tables() { ); } +#[test] +fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() { + let mut driver = MatchGeneratorDriver::new(32, 2); + + driver.reset(CompressionLevel::Level(2)); + let mut space = driver.get_next_space(); + space[..12].copy_from_slice(b"abcabcabcabc"); + space.truncate(12); + driver.commit_space(space); + driver.skip_matching(); + + let table_len = driver.dfast_matcher().short_hash.len(); + assert_eq!( + table_len, + 1 << DFAST_HASH_BITS, + "unhinted Level(2) should keep default dfast table size" + ); +} + +#[test] +fn simple_backend_rejects_undersized_pooled_suffix_store() { + let mut driver = MatchGeneratorDriver::new(128 * 1024, 2); + driver.reset(CompressionLevel::Fastest); + + driver.suffix_pool.push(SuffixStore::with_capacity(1024)); + + let mut space = driver.get_next_space(); + space.clear(); + space.resize(4096, 0xAB); + driver.commit_space(space); + + let last_suffix_slots = driver + .match_generator + .window + .last() + .expect("window entry must exist after commit") + .suffixes + .slots + .len(); + assert!( + last_suffix_slots >= 4096, + "undersized pooled suffix store must not be reused for larger blocks" + ); +} + #[test] fn source_hint_clamps_driver_slice_size_to_window() { let mut driver = MatchGeneratorDriver::new(128 * 1024, 2); diff --git a/zstd/src/encoding/mod.rs b/zstd/src/encoding/mod.rs index 976d28fd..1fa89360 100644 --- a/zstd/src/encoding/mod.rs +++ b/zstd/src/encoding/mod.rs @@ -99,12 +99,22 @@ pub enum CompressionLevel { /// Named variants map to specific numeric levels: /// [`Fastest`](Self::Fastest) = 1, [`Default`](Self::Default) = 3, /// [`Better`](Self::Better) = 7, [`Best`](Self::Best) = 11. + /// [`Best`](Self::Best) remains the highest-ratio named preset, but + /// [`Level`](Self::Level) values above 11 can target stronger (slower) + /// tuning than the named hierarchy. /// /// Levels above 11 use progressively larger windows and deeper search /// with the lazy2 hash-chain backend. Levels that require strategies /// this crate has not yet implemented (btopt, btultra) are approximated /// with the closest available matcher. /// + /// **Limitation:** large hash-chain levels still use 32-bit positions. + /// For single-frame inputs exceeding ~4 GiB, matches can still be found + /// for roughly one window past that point; once all in-window positions + /// exceed `u32::MAX` (≈4 GiB + window size), matching becomes effectively + /// repcode-only. Prefer [`CompressionLevel::Default`] for very large + /// single-frame streams until table rebasing is implemented. + /// /// Semver note: this variant was added after the initial enum shape and /// is a breaking API change for downstream crates that exhaustively /// `match` on [`CompressionLevel`] without a wildcard arm. From 8265e691f66b4f15f4ed2bb8034fbb5246d65a5d Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 5 Apr 2026 03:25:05 +0300 Subject: [PATCH 06/11] fix(encoding): account payload with dictionary size hints --- zstd/src/encoding/frame_compressor.rs | 32 ++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/zstd/src/encoding/frame_compressor.rs b/zstd/src/encoding/frame_compressor.rs index 18eb9dfe..2f7bc2df 100644 --- a/zstd/src/encoding/frame_compressor.rs +++ b/zstd/src/encoding/frame_compressor.rs @@ -190,7 +190,9 @@ impl FrameCompressor { .as_ref() .map(|dict| dict.dict_content.len() as u64) .unwrap_or(0); - size_hint.max(dict_floor) + // Dictionary priming extends matcher history beyond payload bytes, + // so the advertised hint must cover both dictionary and payload. + size_hint.saturating_add(dict_floor) } else { size_hint }; @@ -986,6 +988,34 @@ mod tests { ); } + #[test] + fn source_size_hint_with_dictionary_covers_dictionary_plus_payload() { + let dict_id = 0xABCD_0005; + let dict_content = b"abcd".repeat(1024); // 4 KiB dictionary history + let dict_len = dict_content.len() as u64; + let dict = crate::decoding::Dictionary::from_raw_content(dict_id, dict_content).unwrap(); + let payload = b"abcd".repeat(1024); // 4 KiB payload + let payload_len = payload.len() as u64; + + let mut output = Vec::new(); + let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest); + compressor.set_dictionary(dict).unwrap(); + compressor.set_source_size_hint(payload_len); + compressor.set_source(payload.as_slice()); + compressor.set_drain(&mut output); + compressor.compress(); + + let (frame_header, _) = crate::decoding::frame::read_frame_header(output.as_slice()) + .expect("encoded frame should have a header"); + let advertised_window = frame_header + .window_size() + .expect("window size should be present"); + assert!( + advertised_window >= dict_len + payload_len, + "window_size ({advertised_window}) must cover dictionary ({dict_len}) + payload ({payload_len})" + ); + } + #[test] fn custom_matcher_without_dictionary_priming_does_not_advertise_dict_id() { let dict_id = 0xABCD_0003; From 61ea66a92f690037dcf12eeedf8f2a9e63623d03 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 5 Apr 2026 03:44:12 +0300 Subject: [PATCH 07/11] fix(encoding): clarify and verify dictionary size hints --- zstd/src/encoding/frame_compressor.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/zstd/src/encoding/frame_compressor.rs b/zstd/src/encoding/frame_compressor.rs index 2f7bc2df..bb765174 100644 --- a/zstd/src/encoding/frame_compressor.rs +++ b/zstd/src/encoding/frame_compressor.rs @@ -164,6 +164,10 @@ impl FrameCompressor { /// /// When set, the encoder selects smaller hash tables and windows for /// small inputs, matching the C zstd source-size-class behavior. + /// + /// When dictionary priming is active, the effective matcher hint also + /// includes primed dictionary history bytes in addition to frame payload + /// bytes so the advertised window can cover both. /// Must be called before [`compress`](Self::compress). pub fn set_source_size_hint(&mut self, size: u64) { self.source_size_hint = Some(size); @@ -994,6 +998,8 @@ mod tests { let dict_content = b"abcd".repeat(1024); // 4 KiB dictionary history let dict_len = dict_content.len() as u64; let dict = crate::decoding::Dictionary::from_raw_content(dict_id, dict_content).unwrap(); + let dict_for_decoder = + crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024)).unwrap(); let payload = b"abcd".repeat(1024); // 4 KiB payload let payload_len = payload.len() as u64; @@ -1014,6 +1020,12 @@ mod tests { advertised_window >= dict_len + payload_len, "window_size ({advertised_window}) must cover dictionary ({dict_len}) + payload ({payload_len})" ); + + let mut decoder = FrameDecoder::new(); + decoder.add_dict(dict_for_decoder).unwrap(); + let mut decoded = Vec::with_capacity(payload.len()); + decoder.decode_all_to_vec(&output, &mut decoded).unwrap(); + assert_eq!(decoded, payload); } #[test] From 8f5e59b41237168e7dcc770986e32153bd116eb8 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 5 Apr 2026 03:59:38 +0300 Subject: [PATCH 08/11] test(encoding): add decode roundtrip for dictionary hint floor --- zstd/src/encoding/frame_compressor.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/zstd/src/encoding/frame_compressor.rs b/zstd/src/encoding/frame_compressor.rs index bb765174..14b61655 100644 --- a/zstd/src/encoding/frame_compressor.rs +++ b/zstd/src/encoding/frame_compressor.rs @@ -971,6 +971,8 @@ mod tests { let dict_content = b"abcd".repeat(1024); // 4 KiB dictionary history let dict_len = dict_content.len() as u64; let dict = crate::decoding::Dictionary::from_raw_content(dict_id, dict_content).unwrap(); + let dict_for_decoder = + crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024)).unwrap(); let payload = b"abcdabcdabcdabcd".repeat(128); let mut output = Vec::new(); @@ -990,6 +992,12 @@ mod tests { advertised_window >= dict_len, "window_size ({advertised_window}) must cover dictionary history ({dict_len})", ); + + let mut decoder = FrameDecoder::new(); + decoder.add_dict(dict_for_decoder).unwrap(); + let mut decoded = Vec::with_capacity(payload.len()); + decoder.decode_all_to_vec(&output, &mut decoded).unwrap(); + assert_eq!(decoded, payload); } #[test] From eb362d04010279234461b97b84574cfc5b0a2439 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 5 Apr 2026 10:18:23 +0300 Subject: [PATCH 09/11] fix(encoding): align size-hint handling and regressions - use source_size_hint in CLI instead of hard pledged size from metadata - rename dfast hash helper to hash_index for clearer field/method separation - pin late set_source_size_hint InvalidInput behavior after write --- cli/src/main.rs | 4 ++-- zstd/src/encoding/match_generator.rs | 6 +++--- zstd/src/encoding/streaming_encoder.rs | 4 ++++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 2838bfe8..239074d6 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -144,8 +144,8 @@ fn compress(input: PathBuf, output: PathBuf, level: i32, store: bool) -> color_e let mut encoder = structured_zstd::encoding::StreamingEncoder::new(temporary_output, compression_level); encoder - .set_pledged_content_size(source_size as u64) - .wrap_err("failed to configure pledged content size")?; + .set_source_size_hint(source_size as u64) + .wrap_err("failed to configure source size hint")?; std::io::copy(&mut encoder_input, &mut encoder).wrap_err("streaming compression failed")?; encoder.finish().wrap_err("failed to finalize zstd frame") })(); diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index bb6ba0f3..cef319c6 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -1514,15 +1514,15 @@ impl DfastMatchGenerator { fn hash4(&self, data: &[u8]) -> usize { let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64; - self.hash_bits(value) + self.hash_index(value) } fn hash8(&self, data: &[u8]) -> usize { let value = u64::from_le_bytes(data[..8].try_into().unwrap()); - self.hash_bits(value) + self.hash_index(value) } - fn hash_bits(&self, value: u64) -> usize { + fn hash_index(&self, value: u64) -> usize { const PRIME: u64 = 0x9E37_79B1_85EB_CA87; ((value.wrapping_mul(PRIME)) >> (64 - self.hash_bits)) as usize } diff --git a/zstd/src/encoding/streaming_encoder.rs b/zstd/src/encoding/streaming_encoder.rs index afd3db60..ae7e7a0e 100644 --- a/zstd/src/encoding/streaming_encoder.rs +++ b/zstd/src/encoding/streaming_encoder.rs @@ -1033,6 +1033,10 @@ mod tests { .set_source_size_hint(payload.len() as u64) .unwrap(); with_hint.write_all(payload.as_slice()).unwrap(); + let late_hint_err = with_hint + .set_source_size_hint(payload.len() as u64) + .unwrap_err(); + assert_eq!(late_hint_err.kind(), ErrorKind::InvalidInput); let with_hint_frame = with_hint.finish().unwrap(); let with_hint_header = crate::decoding::frame::read_frame_header(with_hint_frame.as_slice()) From 88f0576c10c4b67e9190f01765a62146a7612a8a Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 5 Apr 2026 10:40:19 +0300 Subject: [PATCH 10/11] fix(encoding): tighten source-hint semantics and tests - keep dictionary source-size hint scoped to payload bytes in FrameCompressor - relax dictionary window assertions to roundtrip + nonincreasing-window guarantees - pin dfast hinted table size and document visible set_source_size_hint effects --- zstd/src/encoding/frame_compressor.rs | 112 +++++++++++++++---------- zstd/src/encoding/match_generator.rs | 1 + zstd/src/encoding/streaming_encoder.rs | 5 +- 3 files changed, 73 insertions(+), 45 deletions(-) diff --git a/zstd/src/encoding/frame_compressor.rs b/zstd/src/encoding/frame_compressor.rs index 14b61655..14235dba 100644 --- a/zstd/src/encoding/frame_compressor.rs +++ b/zstd/src/encoding/frame_compressor.rs @@ -188,19 +188,9 @@ impl FrameCompressor { !matches!(self.compression_level, CompressionLevel::Uncompressed) && self.state.matcher.supports_dictionary_priming(); if let Some(size_hint) = self.source_size_hint.take() { - let effective_hint = if use_dictionary_state { - let dict_floor = self - .dictionary - .as_ref() - .map(|dict| dict.dict_content.len() as u64) - .unwrap_or(0); - // Dictionary priming extends matcher history beyond payload bytes, - // so the advertised hint must cover both dictionary and payload. - size_hint.saturating_add(dict_floor) - } else { - size_hint - }; - self.state.matcher.set_source_size_hint(effective_hint); + // Keep source-size hint scoped to payload bytes; dictionary priming + // is applied separately and should not force larger matcher sizing. + self.state.matcher.set_source_size_hint(size_hint); } // Clearing buffers to allow re-using of the compressor self.state.matcher.reset(self.compression_level); @@ -966,73 +956,109 @@ mod tests { } #[test] - fn source_size_hint_does_not_shrink_window_below_dictionary_history() { + fn source_size_hint_with_dictionary_keeps_roundtrip_and_nonincreasing_window() { let dict_id = 0xABCD_0004; let dict_content = b"abcd".repeat(1024); // 4 KiB dictionary history - let dict_len = dict_content.len() as u64; let dict = crate::decoding::Dictionary::from_raw_content(dict_id, dict_content).unwrap(); let dict_for_decoder = crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024)).unwrap(); let payload = b"abcdabcdabcdabcd".repeat(128); - let mut output = Vec::new(); - let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest); - compressor.set_dictionary(dict).unwrap(); - compressor.set_source_size_hint(1); // would clamp to MIN_WINDOW_LOG without dictionary guard - compressor.set_source(payload.as_slice()); - compressor.set_drain(&mut output); - compressor.compress(); + let mut hinted_output = Vec::new(); + let mut hinted = FrameCompressor::new(super::CompressionLevel::Fastest); + hinted.set_dictionary(dict).unwrap(); + hinted.set_source_size_hint(1); + hinted.set_source(payload.as_slice()); + hinted.set_drain(&mut hinted_output); + hinted.compress(); + + let mut no_hint_output = Vec::new(); + let mut no_hint = FrameCompressor::new(super::CompressionLevel::Fastest); + no_hint + .set_dictionary( + crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024)) + .unwrap(), + ) + .unwrap(); + no_hint.set_source(payload.as_slice()); + no_hint.set_drain(&mut no_hint_output); + no_hint.compress(); - let (frame_header, _) = crate::decoding::frame::read_frame_header(output.as_slice()) - .expect("encoded frame should have a header"); - let advertised_window = frame_header + let hinted_window = crate::decoding::frame::read_frame_header(hinted_output.as_slice()) + .expect("encoded frame should have a header") + .0 + .window_size() + .expect("window size should be present"); + let no_hint_window = crate::decoding::frame::read_frame_header(no_hint_output.as_slice()) + .expect("encoded frame should have a header") + .0 .window_size() .expect("window size should be present"); assert!( - advertised_window >= dict_len, - "window_size ({advertised_window}) must cover dictionary history ({dict_len})", + hinted_window <= no_hint_window, + "source-size hint should not increase advertised window with dictionary priming", ); let mut decoder = FrameDecoder::new(); decoder.add_dict(dict_for_decoder).unwrap(); let mut decoded = Vec::with_capacity(payload.len()); - decoder.decode_all_to_vec(&output, &mut decoded).unwrap(); + decoder + .decode_all_to_vec(&hinted_output, &mut decoded) + .unwrap(); assert_eq!(decoded, payload); } #[test] - fn source_size_hint_with_dictionary_covers_dictionary_plus_payload() { + fn source_size_hint_with_dictionary_keeps_roundtrip_for_larger_payload() { let dict_id = 0xABCD_0005; let dict_content = b"abcd".repeat(1024); // 4 KiB dictionary history - let dict_len = dict_content.len() as u64; let dict = crate::decoding::Dictionary::from_raw_content(dict_id, dict_content).unwrap(); let dict_for_decoder = crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024)).unwrap(); let payload = b"abcd".repeat(1024); // 4 KiB payload let payload_len = payload.len() as u64; - let mut output = Vec::new(); - let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest); - compressor.set_dictionary(dict).unwrap(); - compressor.set_source_size_hint(payload_len); - compressor.set_source(payload.as_slice()); - compressor.set_drain(&mut output); - compressor.compress(); + let mut hinted_output = Vec::new(); + let mut hinted = FrameCompressor::new(super::CompressionLevel::Fastest); + hinted.set_dictionary(dict).unwrap(); + hinted.set_source_size_hint(payload_len); + hinted.set_source(payload.as_slice()); + hinted.set_drain(&mut hinted_output); + hinted.compress(); + + let mut no_hint_output = Vec::new(); + let mut no_hint = FrameCompressor::new(super::CompressionLevel::Fastest); + no_hint + .set_dictionary( + crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024)) + .unwrap(), + ) + .unwrap(); + no_hint.set_source(payload.as_slice()); + no_hint.set_drain(&mut no_hint_output); + no_hint.compress(); - let (frame_header, _) = crate::decoding::frame::read_frame_header(output.as_slice()) - .expect("encoded frame should have a header"); - let advertised_window = frame_header + let hinted_window = crate::decoding::frame::read_frame_header(hinted_output.as_slice()) + .expect("encoded frame should have a header") + .0 + .window_size() + .expect("window size should be present"); + let no_hint_window = crate::decoding::frame::read_frame_header(no_hint_output.as_slice()) + .expect("encoded frame should have a header") + .0 .window_size() .expect("window size should be present"); assert!( - advertised_window >= dict_len + payload_len, - "window_size ({advertised_window}) must cover dictionary ({dict_len}) + payload ({payload_len})" + hinted_window <= no_hint_window, + "source-size hint should not increase advertised window with dictionary priming", ); let mut decoder = FrameDecoder::new(); decoder.add_dict(dict_for_decoder).unwrap(); let mut decoded = Vec::with_capacity(payload.len()); - decoder.decode_all_to_vec(&output, &mut decoded).unwrap(); + decoder + .decode_all_to_vec(&hinted_output, &mut decoded) + .unwrap(); assert_eq!(decoded, payload); } diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index cef319c6..f223b820 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -2196,6 +2196,7 @@ fn driver_small_source_hint_shrinks_dfast_hash_tables() { let hinted_tables = driver.dfast_matcher().short_hash.len(); assert_eq!(driver.window_size(), 1 << MIN_WINDOW_LOG); + assert_eq!(hinted_tables, 1 << MIN_WINDOW_LOG); assert!( hinted_tables < full_tables, "tiny source hint should reduce dfast table footprint" diff --git a/zstd/src/encoding/streaming_encoder.rs b/zstd/src/encoding/streaming_encoder.rs index ae7e7a0e..c140bc38 100644 --- a/zstd/src/encoding/streaming_encoder.rs +++ b/zstd/src/encoding/streaming_encoder.rs @@ -105,8 +105,9 @@ impl StreamingEncoder { /// /// Unlike [`set_pledged_content_size`](Self::set_pledged_content_size), /// this does **not** enforce that exactly `size` bytes are written; it - /// only optimises matcher parameters for small inputs. Must be called - /// before the first [`write`](Write::write). + /// may reduce matcher tables, advertised frame window, and block sizing + /// for small inputs. Must be called before the first + /// [`write`](Write::write). pub fn set_source_size_hint(&mut self, size: u64) -> Result<(), Error> { self.ensure_open()?; if self.frame_started { From 03b33e22461f553dd01f6e8f1eba6a5babd8f1dd Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 5 Apr 2026 11:17:21 +0300 Subject: [PATCH 11/11] docs(encoding): align source size hint contract - update FrameCompressor::set_source_size_hint docs to payload-only semantics - document that dictionary priming does not inflate advertised window --- zstd/src/encoding/frame_compressor.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/zstd/src/encoding/frame_compressor.rs b/zstd/src/encoding/frame_compressor.rs index 14235dba..c6c11c5a 100644 --- a/zstd/src/encoding/frame_compressor.rs +++ b/zstd/src/encoding/frame_compressor.rs @@ -165,9 +165,9 @@ impl FrameCompressor { /// When set, the encoder selects smaller hash tables and windows for /// small inputs, matching the C zstd source-size-class behavior. /// - /// When dictionary priming is active, the effective matcher hint also - /// includes primed dictionary history bytes in addition to frame payload - /// bytes so the advertised window can cover both. + /// This hint applies only to frame payload bytes (`size`). Dictionary + /// history is primed separately and does not inflate the hinted size or + /// advertised frame window. /// Must be called before [`compress`](Self::compress). pub fn set_source_size_hint(&mut self, size: u64) { self.source_size_hint = Some(size);