Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ This is a **maintained fork** of [KillingSpark/zstd-rs](https://github.com/Killi
**Fork goals:**
- Dictionary compression improvements (critical for per-label trained dictionaries in LSM-tree)
- Performance parity with C zstd for decompression (currently 1.4-3.5x slower)
- Additional compression levels (Fastest, Default, Better, and Best are all implemented)
- Full numeric compression levels (0 = default, 1–22 plus negative ultra-fast, with C zstd-compatible level numbering/API; not exact strategy/ratio parity at every level)
- No FFI — pure `cargo build`, no cmake/system libraries (ADR-013 compliance)

**Upstream relationship:** We periodically sync with upstream but maintain an independent development trajectory focused on CoordiNode requirements.
Expand All @@ -46,6 +46,7 @@ Complete RFC 8878 implementation. Performance: ~1.4-3.5x slower than C zstd depe
- [x] Default (roughly level 3)
- [x] Better (roughly level 7)
- [x] Best (roughly level 11)
- [x] Numeric levels `0` (default), `1–22`, and negative ultra-fast levels via `CompressionLevel::from_level(n)` (C zstd compatible numbering)
- [x] Checksums
- [x] Frame Content Size — `FrameCompressor` writes FCS automatically; `StreamingEncoder` requires `set_pledged_content_size()` before first write
- [x] Dictionary compression
Expand All @@ -67,7 +68,10 @@ Performance tracking lives in [BENCHMARKS.md](BENCHMARKS.md). The suite compares
use structured_zstd::encoding::{compress, compress_to_vec, CompressionLevel};

let data: &[u8] = b"hello world";
// Named level
let compressed = compress_to_vec(data, CompressionLevel::Fastest);
// Numeric level (C zstd compatible: 0 = default, 1-22, negative for ultra-fast)
let compressed = compress_to_vec(data, CompressionLevel::from_level(7));
```

```rust,no_run
Expand Down
113 changes: 87 additions & 26 deletions cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,40 @@ enum Commands {
/// Where the compressed file is written
/// [default: <INPUT_FILE>.zst]
output_file: Option<PathBuf>,
/// How thoroughly the file should be compressed. A higher level will take
/// more time to compress but result in a smaller file, and vice versa.
/// Compression level (higher = smaller, slower).
///
/// - 0: Uncompressed
/// - 1: Fastest
/// - 2: Default
/// - 3: Better (lazy2, ~zstd level 7)
/// - 4: Best (deep lazy2, ~zstd level 11)
/// Numeric levels follow the zstd convention where 0 means
/// "use the default level" (currently 3).
///
/// - 0: Default (same as 3)
/// - 1: Fastest (fast hash, ~zstd level 1)
/// - 3: Default (dfast, ~zstd level 3)
/// - 7: Better (lazy2, ~zstd level 7)
/// - 11: Best (deep lazy2, ~zstd level 11)
/// - Negative: ultra-fast modes (less compression, more speed)
/// - 12-22: progressively higher ratio (capped at lazy2 backend)
///
/// Use --store to write an uncompressed zstd frame.
#[arg(
short,
long,
value_name = "COMPRESSION_LEVEL",
default_value_t = 2,
value_parser = clap::value_parser!(u8).range(0..=4),
verbatim_doc_comment
value_name = "LEVEL",
default_value_t = CompressionLevel::DEFAULT_LEVEL,
// clap's ranged parser expects i64 bounds here (RangedI64ValueParser),
// even though the target value type is i32.
value_parser = clap::value_parser!(i32).range(
(CompressionLevel::MIN_LEVEL as i64)..=(CompressionLevel::MAX_LEVEL as i64)
),
verbatim_doc_comment,
allow_hyphen_values = true,
)]
level: u8,
level: i32,
/// Write an uncompressed zstd frame (no compression).
///
/// When set, compression itself ignores `--level` and writes a raw
/// zstd frame. The CLI still validates `--level` range at parse time.
#[arg(long)]
store: bool,
},
Decompress {
/// .zst archive to decompress
Expand Down Expand Up @@ -81,9 +98,10 @@ fn main() -> color_eyre::Result<()> {
input_file,
output_file,
level,
store,
} => {
let output_file = output_file.unwrap_or_else(|| add_extension(&input_file, ".zst"));
compress(input_file, output_file, level)?;
compress(input_file, output_file, level, store)?;
}
Commands::Decompress {
input_file,
Expand All @@ -101,15 +119,12 @@ fn main() -> color_eyre::Result<()> {
Ok(())
}

fn compress(input: PathBuf, output: PathBuf, level: u8) -> color_eyre::Result<()> {
fn compress(input: PathBuf, output: PathBuf, level: i32, store: bool) -> color_eyre::Result<()> {
info!("compressing {input:?} to {output:?}");
let compression_level: structured_zstd::encoding::CompressionLevel = match level {
0 => CompressionLevel::Uncompressed,
1 => CompressionLevel::Fastest,
2 => CompressionLevel::Default,
3 => CompressionLevel::Better,
4 => CompressionLevel::Best,
_ => return Err(eyre!("unsupported compression level: {level}")),
let compression_level = if store {
CompressionLevel::Uncompressed
} else {
CompressionLevel::from_level(level)
};
ensure_distinct_paths(&input, &output)?;
ensure_regular_output_destination(&output)?;
Expand All @@ -128,6 +143,9 @@ fn compress(input: PathBuf, output: PathBuf, level: u8) -> color_eyre::Result<()
let compression_result: color_eyre::Result<File> = (|| {
let mut encoder =
structured_zstd::encoding::StreamingEncoder::new(temporary_output, compression_level);
encoder
.set_source_size_hint(source_size as u64)
.wrap_err("failed to configure source size hint")?;
std::io::copy(&mut encoder_input, &mut encoder).wrap_err("streaming compression failed")?;
encoder.finish().wrap_err("failed to finalize zstd frame")
})();
Expand Down Expand Up @@ -402,7 +420,50 @@ mod tests {

#[test]
fn cli_rejects_unsupported_compression_level_at_parse_time() {
let parse = Cli::try_parse_from(["structured-zstd", "compress", "in.bin", "--level", "5"]);
let too_high =
(structured_zstd::encoding::CompressionLevel::MAX_LEVEL as i64 + 1).to_string();
let parse = Cli::try_parse_from([
"structured-zstd",
"compress",
"in.bin",
"--level",
too_high.as_str(),
]);
assert!(parse.is_err());
}

#[test]
fn cli_accepts_negative_compression_level() {
let parse = Cli::try_parse_from(["structured-zstd", "compress", "in.bin", "--level", "-3"]);
assert!(parse.is_ok());
}

#[test]
fn cli_rejects_too_negative_compression_level() {
let too_low =
(structured_zstd::encoding::CompressionLevel::MIN_LEVEL as i64 - 1).to_string();
let parse = Cli::try_parse_from([
"structured-zstd",
"compress",
"in.bin",
"--level",
too_low.as_str(),
]);
assert!(parse.is_err());
}

#[test]
fn cli_store_still_validates_level_range_at_parse_time() {
let too_high =
(structured_zstd::encoding::CompressionLevel::MAX_LEVEL as i64 + 1).to_string();
let parse = Cli::try_parse_from([
"structured-zstd",
"compress",
"in.bin",
"--store",
"--level",
too_high.as_str(),
]);
assert!(parse.is_err());
}

Expand All @@ -415,7 +476,7 @@ mod tests {
let input = std::env::temp_dir().join(format!("structured-zstd-cli-alias-{unique}.txt"));
fs::write(&input, b"streaming-cli-alias-check").unwrap();

let err = compress(input.clone(), input.clone(), 2).unwrap_err();
let err = compress(input.clone(), input.clone(), 3, false).unwrap_err();
let message = format!("{err:#}");
assert!(
message.contains("input and output"),
Expand All @@ -434,7 +495,7 @@ mod tests {
fs::write(&input, b"streaming-cli-hardlink-check").unwrap();
fs::hard_link(&input, &output).unwrap();

let err = compress(input.clone(), output.clone(), 2).unwrap_err();
let err = compress(input.clone(), output.clone(), 3, false).unwrap_err();
let message = format!("{err:#}");
assert!(
message.contains("input and output"),
Expand All @@ -455,7 +516,7 @@ mod tests {
let output =
std::env::temp_dir().join(format!("structured-zstd-cli-missing-output-{unique}.zst"));

let err = compress(missing_input, output.clone(), 2).unwrap_err();
let err = compress(missing_input, output.clone(), 3, false).unwrap_err();
let message = format!("{err:#}");
assert!(
message.contains("failed to open input file"),
Expand All @@ -473,7 +534,7 @@ mod tests {
let output = dir.join("existing-dir");
fs::create_dir(&output).unwrap();

let err = compress(input, output.clone(), 2).unwrap_err();
let err = compress(input, output.clone(), 3, false).unwrap_err();
let message = format!("{err:#}");
assert!(
message.contains("not a regular file"),
Expand Down
Loading
Loading