From 0d476ddb1721df91be3424c4bce2244cf5ce2ad7 Mon Sep 17 00:00:00 2001
From: Chris Lundquist <rampantdurandal@gmail.com>
Date: Tue, 10 Mar 2026 01:58:50 -0700
Subject: [PATCH] refactor: remove Deflate pipeline and default to Lzf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Deflate pipeline (LZ77 + Huffman, ID 0) was neither gzip-compatible
nor competitive with Lzf (LZ77 + FSE). Tombstone ID 0 following the
established pattern of prior removals (Lz78R ID 7, Parlz ID 11).

- Remove Pipeline::Deflate enum variant, tombstone ID 0 in TryFrom<u8>
- Change default pipeline to Lzf in CLI, FFI, auto-selection, and scripts
- Rename DEFLATE_MAX_MATCH → LZ77_MAX_MATCH (used broadly as default cap)
- Delete throughput_deflate and stages_deflate_webgpu_chained benchmarks
- Keep src/deflate.rs (RFC 1951 inflate for .gz decompression)
- Keep src/huffman/ (used by LzSeqH pipeline)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 ARCHITECTURE.md                              |  48 ++----
 Cargo.toml                                   |   8 -
 README.md                                    |   5 +-
 benches/stages_deflate_webgpu_chained.rs     |  70 ---------
 benches/stages_lz77.rs                       |  12 +-
 benches/stages_match_finders.rs              |   8 +-
 benches/throughput_deflate.rs                |  23 ---
 docs/DESIGN.md                               |   8 +-
 examples/block_size_experiment.rs            |   2 +-
 examples/decode_profile.rs                   |   6 +-
 examples/explore_pipelines.rs                |   4 -
 examples/pipeline_comparison.rs              |   1 -
 examples/profile.rs                          |   9 +-
 examples/profile_decode.rs                   |   8 +-
 examples/sortlz_lzseq_eval.rs                |   3 +-
 examples/webgpu_profile.rs                   |  32 ++--
 fuzz/fuzz_targets/fuzz_pipeline_roundtrip.rs |   1 -
 scripts/analyze-ratio.sh                     |   4 +-
 scripts/bench.sh                             |  18 +--
 scripts/gpu-experiment-bench.sh              |   1 -
 scripts/perf-gate.sh                         |   4 +-
 scripts/profile.sh                           |   8 +-
 scripts/test-targets.sh                      |   2 +-
 scripts/trace-pipeline.sh                    |  21 +--
 src/bin/pz.rs                                |  16 +-
 src/ffi.rs                                   |  12 +-
 src/lz77/mod.rs                              |  14 +-
 src/lz77/tests.rs                            |   6 +-
 src/lz_token.rs                              |   2 +-
 src/lzseq/mod.rs                             |   4 +-
 src/optimal.rs                               |   8 +-
 src/pipeline/blocks.rs                       |  16 +-
 src/pipeline/demux.rs                        |   2 +-
 src/pipeline/mod.rs                          |  58 +++-----
 src/pipeline/parallel.rs                     |  11 +-
 src/pipeline/parallel_tests.rs               |  23 +--
 src/pipeline/stages.rs                       |   2 -
 src/pipeline/tests.rs                        | 149 ++++++-------------
 src/sortlz.rs                                |  22 +--
 src/streaming.rs                             |  10 +-
 src/validation.rs                            | 130 ++--------------
 src/webgpu/tests/pipelines.rs                |  68 +--------
 42 files changed, 203 insertions(+), 656 deletions(-)
 delete mode 100644 benches/stages_deflate_webgpu_chained.rs
 delete mode 100644 benches/throughput_deflate.rs
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index 7c570b8..4841764 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -5,13 +5,13 @@ For day-to-day development instructions, see `CLAUDE.md`.
 
 ## Completed milestones (12/12)
 - **Algorithms:** LZ77 (brute, hashchain, lazy, parallel), LzSeq (code+extra-bits, repeat offsets, 128KB window), Huffman, BWT (SA-IS), MTF, RLE, FSE, rANS
-- **Pipelines:** Deflate (LZ77+Huffman), Bw (BWT+MTF+RLE+FSE), Lzr (LZ77+rANS), Lzf (LZ77+FSE), LzSeqR (LzSeq+rANS), LzSeqH (LzSeq+Huffman), SortLz (sort-LZ77+FSE) — Deflate, Lzr, and Lzf use multi-stream entropy coding for ~16-18% better compression; LzSeqR/LzSeqH use zstd-style code+extra-bits encoding with 6-stream demux; SortLz uses sort-based match finding (GPU-accelerated)
+- **Pipelines:** Bw (BWT+MTF+RLE+FSE), Lzf (LzSeq+FSE), LzSeqR (LzSeq+rANS), LzSeqH (LzSeq+Huffman), SortLz (sort-LZ77+FSE) — Lzf and LzSeqR/LzSeqH use multi-stream entropy coding for ~16-18% better compression; LzSeqR/LzSeqH use zstd-style code+extra-bits encoding with 6-stream demux; SortLz uses sort-based match finding (GPU-accelerated)
 - **Auto-selection:** Heuristic (`select_pipeline`) and trial-based (`select_pipeline_trial`) pipeline selection using data analysis (entropy, match density, run ratio, autocorrelation); LzSeqR included in trial candidates
 - **Data analysis:** `src/analysis.rs` — statistical profiling (Shannon entropy, autocorrelation, run ratio, match density, distribution shape) with sampling support
 - **Optimal parsing:** GPU top-K match table → CPU backward DP (4-6% better compression)
 - **Multi-threading:** Block-parallel and pipeline-parallel via V2 container format; within-block parallel LZ77 match finding (`compress_lazy_parallel`)
-- **SortLZ:** Sort-based match finder — standalone pipeline (ID 10) and pluggable `MatchFinder::SortLz` for Deflate/Lzr/Lzf/LzSeqR/LzSeqH; GPU radix sort batched (single submit); adaptive `select_match_finder()` heuristic; u64-optimized `extend_match`; 39.6% ratio (beats Deflate 43.4%)
-- **GPU kernels:** LZ77 hash-table (fast), LZ77 batch/per-position (legacy), LZ77 top-K, BWT radix sort + parallel rank assignment, SortLZ radix sort + match verification, Huffman encode (two-pass with Blelloch prefix sum), GPU Deflate chaining (LZ77→Huffman on device)
+- **SortLZ:** Sort-based match finder — standalone pipeline (ID 10) and pluggable `MatchFinder::SortLz` for Lzf/LzSeqR/LzSeqH; GPU radix sort batched (single submit); adaptive `select_match_finder()` heuristic; u64-optimized `extend_match`; 39.6% ratio
+- **GPU kernels:** LZ77 hash-table (fast), LZ77 batch/per-position (legacy), LZ77 top-K, BWT radix sort + parallel rank assignment, SortLZ radix sort + match verification, Huffman encode (two-pass with Blelloch prefix sum)
 - **Tooling:** CLI (`pz` with `-a`/`--auto` and `--trial` flags), C FFI, Criterion benchmarks, CI (3 OS)
 - **Fuzz testing (M5.3):** `cargo-fuzz` infrastructure with 12 targets covering all algorithms and pipelines (roundtrip + crash resistance)
 
@@ -23,9 +23,9 @@ For day-to-day development instructions, see `CLAUDE.md`.
 - **CPU:** Uses SA-IS (Suffix Array by Induced Sorting) — O(n) linear time via doubled-text-with-sentinel strategy.
 - **GPU:** Uses LSB-first 8-bit radix sort with prefix-doubling for suffix array construction. Replaced earlier bitonic sort (PR #21). Features adaptive key width (skip zero-digit radix passes) and event chain batching (one host sync per doubling step). Rank assignment runs on GPU via Blelloch prefix sum + scatter. Still slower than CPU SA-IS at all sizes but dramatically improved from bitonic sort (7-14x faster). The GPU uses circular comparison `(sa[i]+k) % n` vs CPU SA-IS's doubled-text approach — both produce valid BWTs that round-trip correctly.
 
-## Multi-stream Deflate
+## Multi-stream entropy coding
 
-The Deflate, Lzr, and Lzf pipelines use **multi-stream entropy coding** to improve
+The Lzf and LzSeqR pipelines use **multi-stream entropy coding** to improve
 compression ratio by separating LZ77 output into independent byte streams with
 tighter symbol distributions. Instead of feeding one mixed stream to the entropy
 coder, the encoder deinterleaves tokens into three streams:
@@ -36,7 +36,7 @@ coder, the encoder deinterleaves tokens into three streams:
 | **Lengths** | Match lengths (capped to u8) | Length distribution is highly skewed (short matches dominate) |
 | **Literals** | Literal bytes + low offset bytes + next bytes | Natural-language / binary byte distribution |
 
-Each stream gets its own Huffman tree (Deflate), FSE table (Lzf), or rANS context (Lzr),
+Each stream gets its own FSE table (Lzf) or rANS context (LzSeqR),
 yielding lower per-stream entropy than a single combined stream.
 
 ### Encoding format
@@ -65,14 +65,12 @@ Comparison on Canterbury + Large corpus (14 files, 13.3 MB total), averaged over
 
 | Pipeline | Before (bytes) | After (bytes) | Size delta | Throughput delta |
 |----------|---------------|--------------|------------|-----------------|
-| Deflate  | 6,319,168     | 5,301,184    | **-16.1%** | +1.6% faster    |
 | Lzf      | 6,199,044     | 5,107,601    | **-17.6%** | +2.8% faster    |
 
 **Decompression throughput:**
 
 | Pipeline | Throughput delta |
 |----------|-----------------|
-| Deflate  | **+8.4%** faster |
 | Lzf      | **+2.4%** faster |
 
 Multi-stream is a pure win: better compression **and** faster speed. The largest
@@ -132,16 +130,9 @@ Every symbol present in the input gets at least frequency 1. Excess is trimmed
 from the most-frequent symbol; deficit is added to it. The normalization code
 is shared conceptually with `src/fse.rs` (both operate on power-of-2 tables).
 
-### Pipeline: Lzr (LZ77 + rANS)
-
-`Pipeline::Lzr` (ID 3) reuses the existing multi-stream LZ77 architecture
-(offsets, lengths, literals) with rANS as the entropy coder instead of Huffman
-(Deflate) or FSE (Lzf). It participates in auto-selection via
-`select_pipeline_trial()`.
-
 ### Forward TODOs
 
-See `docs/exec-plans/tech-debt-tracker.md` for rANS SIMD decode and reciprocal multiplication work items. Benchmark integration (rANS/Lzr in `benches/throughput.rs` and `benches/stages.rs`) is also pending.
+See `docs/exec-plans/tech-debt-tracker.md` for rANS SIMD decode and reciprocal multiplication work items.
 
 ## SIMD acceleration
 `src/simd.rs` provides runtime-dispatched SIMD for CPU hot paths:
@@ -170,8 +161,7 @@ match verification. Zero atomics, fully deterministic — ideal for GPU executio
 | **`MatchFinder::SortLz`** | Pluggable match finder for other pipelines | Host pipeline's format |
 
 When used as a `MatchFinder`, SortLZ is transparent to the wire format — the
-output is 100% compatible with the host pipeline (Deflate, Lzr, Lzf, LzSeqR,
-LzSeqH). The consumer and decompressor see no difference.
+output is 100% compatible with the host pipeline (Lzf, LzSeqR, LzSeqH). The consumer and decompressor see no difference.
 
 ### Pipeline::SortLz wire format (per block)
 
@@ -221,19 +211,7 @@ Uses GPU radix sort (same kernels as BWT) + GPU match verification:
 | 256KB| 131 MB/s     | 31 MB/s   | 53 MB/s   | **1.7x faster** |
 | 4MB  | 142 MB/s     | 8 MB/s    | 89 MB/s   | **10.6x faster** |
 
-SortLZ compression ratio: **39.6%** (vs hashchain+Deflate 43.4%, BWT 32.7%).
-
-## GPU stage chaining
-The Deflate GPU path chains LZ77 → Huffman on the GPU with minimized transfers:
-1. GPU: LZ77 hash-table kernel → download match array → CPU dedupe + serialize
-2. GPU: upload LZ77 bytes once → `ByteHistogram` kernel → download only 256×u32 (1KB)
-3. CPU: build Huffman tree from histogram, produce code LUT
-4. GPU: Huffman encode (reusing LZ77 buffer) with Blelloch prefix sum
-5. GPU: download final encoded bitstream
-
-The `ByteHistogram` kernel eliminates the need to scan LZ77 data on CPU for frequency counting — only 1KB of histogram data is transferred instead of the full LZ77 stream.
-
-This is activated automatically when a GPU backend is selected and input ≥ `MIN_GPU_INPUT_SIZE`.
+SortLZ compression ratio: **39.6%** (vs BWT 32.7%).
 
 ## Parallel LZ77
 `compress_lazy_parallel(input, num_threads)` pre-computes matches in parallel (each thread builds its own hash chain), then serializes sequentially with lazy evaluation. Thresholds:
@@ -262,14 +240,6 @@ This is activated automatically when a GPU backend is selected and input ≥ `MI
 
 GPU Huffman with Blelloch prefix sum crosses over ~128KB. At 256KB the GPU scan path is 3.4x faster than CPU.
 
-### Deflate chained (GPU LZ77 → GPU Huffman)
-
-| Size | CPU 1-thread | GPU chained | Speedup |
-|------|-------------|-------------|---------|
-| 64KB | 1.63ms (38 MiB/s) | 3.01ms (21 MiB/s) | CPU 1.8x faster |
-| 256KB | 6.06ms (41 MiB/s) | 4.93ms (51 MiB/s) | **GPU 1.2x faster** |
-| 1MB | 23.5ms (43 MiB/s) | 18.3ms (55 MiB/s) | **GPU 1.3x faster** |
-
 ### BWT GPU (radix sort)
 
 | Size | GPU radix | Throughput | Old bitonic | Speedup vs bitonic |
diff --git a/Cargo.toml b/Cargo.toml
index 3d862b7..d0ae0e6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,10 +21,6 @@ path = "src/bin/pz.rs"
 [dev-dependencies]
 criterion = { version = "0.5", features = ["html_reports"] }
 
-[[bench]]
-name = "throughput_deflate"
-harness = false
-
 [[bench]]
 name = "throughput_bw"
 harness = false
@@ -101,10 +97,6 @@ harness = false
 name = "stages_auto_select"
 harness = false
 
-[[bench]]
-name = "stages_deflate_webgpu_chained"
-harness = false
-
 [[bench]]
 name = "rans_decode_bench"
 harness = false
diff --git a/README.md b/README.md
index b59f7c9..dc448e8 100644
--- a/README.md
+++ b/README.md
@@ -6,12 +6,11 @@ Lossless data compression library with GPU acceleration, written in Rust.
 
 | Pipeline | Stages | Similar to |
 |----------|--------|------------|
-| **Deflate** | LZ77 + Multi-stream Huffman | gzip |
 | **BW** | BWT + MTF + RLE + FSE | bzip2 |
 | **LZR** | LZ77 + Multi-stream rANS | — |
 | **LZF** | LZ77 + Multi-stream FSE | zstd-like |
 
-Deflate, Lzr, and Lzf use **multi-stream entropy coding**: LZ77 output is split into separate offset, length, and literal streams, each with its own entropy coder. This yields ~16-18% better compression than single-stream encoding with no speed penalty. LZR uses rANS (range ANS) — a multiply-shift entropy coder designed for SIMD and GPU parallelism via interleaved decode states. LZF uses FSE (Finite State Entropy) — a fast table-driven tANS coder similar to zstd.
+Lzr and Lzf use **multi-stream entropy coding**: LZ77 output is split into separate offset, length, and literal streams, each with its own entropy coder. This yields ~16-18% better compression than single-stream encoding with no speed penalty. LZR uses rANS (range ANS) — a multiply-shift entropy coder designed for SIMD and GPU parallelism via interleaved decode states. LZF uses FSE (Finite State Entropy) — a fast table-driven tANS coder similar to zstd.
 
 Optional WebGPU support for GPU-accelerated LZ77 match finding and BWT suffix array construction.
 
@@ -77,7 +76,7 @@ Uses [criterion](https://github.com/bheisler/criterion.rs) for statistical bench
 ```
 cargo bench                         # CPU benchmarks
 cargo bench --features webgpu       # includes GPU benchmarks
-cargo bench --bench throughput_deflate
+cargo bench --bench throughput_lzf
 cargo bench --bench stages_lz77
 ```
 
diff --git a/benches/stages_deflate_webgpu_chained.rs b/benches/stages_deflate_webgpu_chained.rs
deleted file mode 100644
index 840948f..0000000
--- a/benches/stages_deflate_webgpu_chained.rs
+++ /dev/null
@@ -1,70 +0,0 @@
-#[path = "stages_common.rs"]
-#[cfg_attr(not(feature = "webgpu"), allow(dead_code))]
-mod stages_common;
-
-use criterion::{criterion_group, criterion_main, Criterion};
-#[cfg(feature = "webgpu")]
-use criterion::{BenchmarkId, Throughput};
-#[cfg(feature = "webgpu")]
-use stages_common::{cap, get_test_data};
-
-#[cfg(feature = "webgpu")]
-fn bench_deflate_webgpu_chained(c: &mut Criterion) {
-    use pz::pipeline::CompressOptions;
-    use pz::webgpu::WebGpuEngine;
-
-    let engine = match WebGpuEngine::new() {
-        Ok(e) => std::sync::Arc::new(e),
-        Err(_) => {
-            eprintln!("stages: no WebGPU device, skipping WebGPU Deflate chained benchmarks");
-            return;
-        }
-    };
-
-    let mut group = c.benchmark_group("deflate_webgpu_chained");
-    cap(&mut group);
-    for &size in &[65536, 262_144, 1_048_576, 4_194_304, 16_777_216] {
-        let data = get_test_data(size);
-        group.throughput(Throughput::Bytes(size as u64));
-
-        group.bench_with_input(BenchmarkId::new("cpu_1t", size), &data, |b, data| {
-            let opts = CompressOptions {
-                threads: 1,
-                ..Default::default()
-            };
-            b.iter(|| {
-                pz::pipeline::compress_with_options(data, pz::pipeline::Pipeline::Deflate, &opts)
-                    .unwrap()
-            });
-        });
-
-        let eng = engine.clone();
-        group.bench_with_input(
-            BenchmarkId::new("webgpu_modular", size),
-            &data,
-            move |b, data| {
-                let opts = CompressOptions {
-                    backend: pz::pipeline::Backend::WebGpu,
-                    threads: 1,
-                    webgpu_engine: Some(eng.clone()),
-                    ..Default::default()
-                };
-                b.iter(|| {
-                    pz::pipeline::compress_with_options(
-                        data,
-                        pz::pipeline::Pipeline::Deflate,
-                        &opts,
-                    )
-                    .unwrap()
-                });
-            },
-        );
-    }
-    group.finish();
-}
-
-#[cfg(not(feature = "webgpu"))]
-fn bench_deflate_webgpu_chained(_c: &mut Criterion) {}
-
-criterion_group!(benches, bench_deflate_webgpu_chained);
-criterion_main!(benches);
diff --git a/benches/stages_lz77.rs b/benches/stages_lz77.rs
index 963b46e..7fd58c0 100644
--- a/benches/stages_lz77.rs
+++ b/benches/stages_lz77.rs
@@ -91,7 +91,7 @@ fn bench_lz77_webgpu_batched(c: &mut Criterion) {
 
         let eng = engine.clone();
         group.bench_with_input(
-            BenchmarkId::new("gpu_batched_deflate", size),
+            BenchmarkId::new("gpu_batched_lzf", size),
             &data,
             move |b, data| {
                 let opts = CompressOptions {
@@ -100,23 +100,19 @@ fn bench_lz77_webgpu_batched(c: &mut Criterion) {
                     threads: 4,
                     ..Default::default()
                 };
-                b.iter(|| {
-                    pz::pipeline::compress_with_options(data, Pipeline::Deflate, &opts).unwrap()
-                });
+                b.iter(|| pz::pipeline::compress_with_options(data, Pipeline::Lzf, &opts).unwrap());
             },
         );
 
         group.bench_with_input(
-            BenchmarkId::new("cpu_parallel_deflate", size),
+            BenchmarkId::new("cpu_parallel_lzf", size),
             &data,
             |b, data| {
                 let opts = CompressOptions {
                     threads: 4,
                     ..Default::default()
                 };
-                b.iter(|| {
-                    pz::pipeline::compress_with_options(data, Pipeline::Deflate, &opts).unwrap()
-                });
+                b.iter(|| pz::pipeline::compress_with_options(data, Pipeline::Lzf, &opts).unwrap());
             },
         );
     }
diff --git a/benches/stages_match_finders.rs b/benches/stages_match_finders.rs
index 3476102..ae9b919 100644
--- a/benches/stages_match_finders.rs
+++ b/benches/stages_match_finders.rs
@@ -203,7 +203,7 @@ fn bench_gpu_match_finding(c: &mut Criterion) {
     }
     group.finish();
 
-    // --- Part 3: Cross-pipeline GPU sortlz (Deflate, LzSeqR, Lzf) at 256K ---
+    // --- Part 3: Cross-pipeline GPU sortlz (LzSeqR, Lzf) at 256K ---
     let mut group = c.benchmark_group("gpu_sortlz_pipelines");
     cap(&mut group);
 
@@ -211,11 +211,7 @@ fn bench_gpu_match_finding(c: &mut Criterion) {
     let data = get_test_data(size);
     group.throughput(Throughput::Bytes(size as u64));
 
-    for (name, pipeline) in [
-        ("deflate", Pipeline::Deflate),
-        ("lzseqr", Pipeline::LzSeqR),
-        ("lzf", Pipeline::Lzf),
-    ] {
+    for (name, pipeline) in [("lzseqr", Pipeline::LzSeqR), ("lzf", Pipeline::Lzf)] {
         // CPU hashchain baseline
         let opts = CompressOptions {
             parse_strategy: ParseStrategy::Lazy,
diff --git a/benches/throughput_deflate.rs b/benches/throughput_deflate.rs
deleted file mode 100644
index 1f1106b..0000000
--- a/benches/throughput_deflate.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-#[path = "throughput_common.rs"]
-mod throughput_common;
-
-use criterion::{criterion_group, criterion_main, Criterion};
-use pz::pipeline::Pipeline;
-use throughput_common::{run_throughput_benches, ThroughputBenchSpec};
-
-const SPEC: ThroughputBenchSpec = ThroughputBenchSpec {
-    id: "deflate",
-    pipeline: Pipeline::Deflate,
-    parallel: true,
-    large: true,
-    decompress_large: true,
-    webgpu: true,
-    webgpu_large: true,
-};
-
-fn bench(c: &mut Criterion) {
-    run_throughput_benches(c, &SPEC);
-}
-
-criterion_group!(benches, bench);
-criterion_main!(benches);
diff --git a/docs/DESIGN.md b/docs/DESIGN.md
index 10f9e4d..a4fb08c 100644
--- a/docs/DESIGN.md
+++ b/docs/DESIGN.md
@@ -19,15 +19,15 @@ Each algorithm (`bwt`, `huffman`, `lz77`, `fse`, `rans`, etc.) is:
 - Usable standalone or in pipelines
 - Not tied to a specific compression format
 
-Pipelines (`deflate`, `bw`, `lzr`, `lzf`) combine algorithms via the demuxer pattern:
+Pipelines (`bw`, `lzf`, `lzseqr`) combine algorithms via the demuxer pattern:
 ```rust
 // Pipeline = sequence of stages
-LZ77 → demux into streams → Huffman encode → merge streams
+LZ77 → demux into streams → FSE/rANS encode → merge streams
 ```
 
 **Why:** Flexibility. New pipelines reuse existing algorithms. New algorithms enhance all pipelines.
 
-**Example:** The same LZ77 implementation works in Deflate (LZ77+Huffman), Lzf (LZ77+FSE), and Lzr (LZ77+rANS) pipelines.
+**Example:** The same LZ77 implementation works in Lzf (LZ77+FSE) and Lzr (LZ77+rANS) pipelines.
 
 ### 2. GPU-Friendly Design Patterns
 
@@ -210,7 +210,7 @@ Below these thresholds, kernel launch overhead dominates.
 
 ### Multi-Stream Entropy Coding
 
-LZ-based pipelines (Deflate, Lzf, Lzr) use multi-stream encoding (3 independent streams per block). See `ARCHITECTURE.md` for benchmark results (16-18% better compression, 2-8% faster decompression) and `design-docs/pipeline-architecture.md` for the stream layout.
+LZ-based pipelines (Lzf, Lzr) use multi-stream encoding (3 independent streams per block). See `ARCHITECTURE.md` for benchmark results (16-18% better compression, 2-8% faster decompression) and `design-docs/pipeline-architecture.md` for the stream layout.
 
 ### Memory Management
 
diff --git a/examples/block_size_experiment.rs b/examples/block_size_experiment.rs
index 4eceb54..dec106e 100644
--- a/examples/block_size_experiment.rs
+++ b/examples/block_size_experiment.rs
@@ -35,7 +35,7 @@ const BLOCK_SIZES: &[usize] = &[
 ];
 
 /// Pipelines to test (LZ77-based, GPU-eligible).
-const PIPELINES: &[Pipeline] = &[Pipeline::Deflate, Pipeline::Lzf];
+const PIPELINES: &[Pipeline] = &[Pipeline::Lzf];
 
 fn load_data(size: usize) -> Vec<u8> {
     let manifest = Path::new(env!("CARGO_MANIFEST_DIR"));
diff --git a/examples/decode_profile.rs b/examples/decode_profile.rs
index bcd49e2..f11de35 100644
--- a/examples/decode_profile.rs
+++ b/examples/decode_profile.rs
@@ -36,11 +36,7 @@ fn main() {
 
     let iters = 20;
 
-    for &(name, pipeline) in &[
-        ("LzSeqR", Pipeline::LzSeqR),
-        ("LzSeqH", Pipeline::LzSeqH),
-        ("Deflate", Pipeline::Deflate),
-    ] {
+    for &(name, pipeline) in &[("LzSeqR", Pipeline::LzSeqR), ("LzSeqH", Pipeline::LzSeqH)] {
         let options = CompressOptions::default();
         let compressed = pipeline::compress_with_options(&data, pipeline, &options).unwrap();
         let ratio = compressed.len() as f64 / data.len() as f64 * 100.0;
diff --git a/examples/explore_pipelines.rs b/examples/explore_pipelines.rs
index 785aa55..d381b5e 100644
--- a/examples/explore_pipelines.rs
+++ b/examples/explore_pipelines.rs
@@ -307,8 +307,6 @@ fn main() {
     #[allow(unused_mut)]
     let mut pz_configs: Vec<(Pipeline, ParseStrategy, usize, Backend)> = vec![
         // Single-threaded CPU variants
-        (Pipeline::Deflate, ParseStrategy::Lazy, 1, Backend::Cpu),
-        (Pipeline::Deflate, ParseStrategy::Optimal, 1, Backend::Cpu),
         (Pipeline::Lzf, ParseStrategy::Lazy, 1, Backend::Cpu),
         (Pipeline::Lzf, ParseStrategy::Optimal, 1, Backend::Cpu),
         (Pipeline::Bw, ParseStrategy::Auto, 1, Backend::Cpu),
@@ -316,7 +314,6 @@ fn main() {
         // Experimental: LZSS pipeline
         (Pipeline::LzssR, ParseStrategy::Auto, 1, Backend::Cpu),
         // Multi-threaded CPU
-        (Pipeline::Deflate, ParseStrategy::Lazy, 0, Backend::Cpu),
         (Pipeline::Lzf, ParseStrategy::Lazy, 0, Backend::Cpu),
         (Pipeline::Bw, ParseStrategy::Auto, 0, Backend::Cpu),
         (Pipeline::Bbw, ParseStrategy::Auto, 0, Backend::Cpu),
@@ -327,7 +324,6 @@ fn main() {
     #[cfg(feature = "webgpu")]
     if has_webgpu {
         pz_configs.extend([
-            (Pipeline::Deflate, ParseStrategy::Auto, 1, Backend::WebGpu),
             (Pipeline::Lzf, ParseStrategy::Auto, 1, Backend::WebGpu),
             (Pipeline::Bw, ParseStrategy::Auto, 1, Backend::WebGpu),
         ]);
diff --git a/examples/pipeline_comparison.rs b/examples/pipeline_comparison.rs
index 94315be..3ee871d 100644
--- a/examples/pipeline_comparison.rs
+++ b/examples/pipeline_comparison.rs
@@ -17,7 +17,6 @@ fn test_pipeline_comparison(name: &str, data: Vec<u8>) {
     let pipelines = vec![
         ("Lzf (LzSeq+FSE)", Pipeline::Lzf),
         ("LzSeqR (LzSeq+rANS)", Pipeline::LzSeqR),
-        ("Deflate (LZ77+Huffman)", Pipeline::Deflate),
     ];
 
     println!(
diff --git a/examples/profile.rs b/examples/profile.rs
index 2f53e38..addfb52 100644
--- a/examples/profile.rs
+++ b/examples/profile.rs
@@ -5,7 +5,7 @@
 /// Usage:
 ///   cargo build --profile profiling --example profile
 ///   samply record ./target/profiling/examples/profile --pipeline lzf
-///   samply record ./target/profiling/examples/profile --pipeline deflate --decompress
+///   samply record ./target/profiling/examples/profile --pipeline lzseqr --decompress
 ///   samply record ./target/profiling/examples/profile --stage lz77
 ///   samply record ./target/profiling/examples/profile --stage fse --size 1048576
 use std::path::Path;
@@ -24,9 +24,7 @@ fn usage() {
     eprintln!("Usage: profile [OPTIONS]");
     eprintln!();
     eprintln!("Options:");
-    eprintln!(
-        "  --pipeline P    Pipeline: deflate, bw, bbw, lzf, lzfi, lzssr, lzseqr (default: lzf)"
-    );
+    eprintln!("  --pipeline P    Pipeline: bw, bbw, lzf, lzfi, lzssr, lzseqr (default: lzf)");
     eprintln!("  --stage S       Profile a single stage instead of full pipeline:");
     eprintln!("                    lz77, huffman, bwt, mtf, rle, fse, rans");
     eprintln!("  --decompress    Profile decompression instead of compression");
@@ -604,7 +602,6 @@ fn main() {
         profile_stage(&data, stage_name, decompress, iterations, rans_profile_opts);
     } else {
         let pipe = match pipeline_name.as_str() {
-            "deflate" => Pipeline::Deflate,
             "bw" => Pipeline::Bw,
             "bbw" => Pipeline::Bbw,
             "lzf" => Pipeline::Lzf,
@@ -613,7 +610,7 @@ fn main() {
             "lzseqr" => Pipeline::LzSeqR,
             other => {
                 eprintln!("unknown pipeline: {}", other);
-                eprintln!("valid pipelines: deflate, bw, bbw, lzf, lzfi, lzssr, lzseqr");
+                eprintln!("valid pipelines: bw, bbw, lzf, lzfi, lzssr, lzseqr");
                 std::process::exit(1);
             }
         };
diff --git a/examples/profile_decode.rs b/examples/profile_decode.rs
index 66b77de..87c0516 100644
--- a/examples/profile_decode.rs
+++ b/examples/profile_decode.rs
@@ -83,14 +83,14 @@ fn profile_lzseqr_decode(data: &[u8], label: &str) {
     );
 }
 
-fn profile_deflate_decode(data: &[u8], label: &str) {
+fn profile_lzf_decode(data: &[u8], label: &str) {
     use pz::pipeline::{self, CompressOptions, Pipeline};
 
     let opts = CompressOptions {
         threads: 1,
         ..Default::default()
     };
-    let compressed = pipeline::compress_with_options(data, Pipeline::Deflate, &opts).unwrap();
+    let compressed = pipeline::compress_with_options(data, Pipeline::Lzf, &opts).unwrap();
 
     let iters = 10;
     let mut total_ns = 0u128;
@@ -104,7 +104,7 @@ fn profile_deflate_decode(data: &[u8], label: &str) {
     let avg_ms = total_ns as f64 / iters as f64 / 1e6;
     let tp = mb / (avg_ms / 1000.0);
     println!(
-        "Deflate decode {} ({:.1} MB) — {:.1} ms ({:.1} MB/s)",
+        "Lzf decode {} ({:.1} MB) — {:.1} ms ({:.1} MB/s)",
         label, mb, avg_ms, tp
     );
 }
@@ -121,7 +121,7 @@ fn main() {
             Ok(data) => {
                 profile_bw_decode(&data, label);
                 profile_lzseqr_decode(&data, label);
-                profile_deflate_decode(&data, label);
+                profile_lzf_decode(&data, label);
                 println!();
             }
             Err(e) => println!("{}: {}", label, e),
diff --git a/examples/sortlz_lzseq_eval.rs b/examples/sortlz_lzseq_eval.rs
index 8973153..34e1553 100644
--- a/examples/sortlz_lzseq_eval.rs
+++ b/examples/sortlz_lzseq_eval.rs
@@ -1,6 +1,6 @@
 // Evaluate SortLZ+LzSeq hybrid pipeline vs alternatives.
 // Compares: LzSeqR (hashchain), LzSeqR (sortlz), LzSeqH (hashchain), LzSeqH (sortlz),
-//           SortLz (native), Deflate, Bw
+//           SortLz (native), Bw
 use std::time::Instant;
 
 use pz::pipeline::{self, CompressOptions, MatchFinder, Pipeline};
@@ -57,7 +57,6 @@ fn main() {
     ];
 
     let configs: Vec<(Pipeline, MatchFinder, &str)> = vec![
-        (Pipeline::Deflate, MatchFinder::HashChain, "deflate"),
         (Pipeline::Bw, MatchFinder::HashChain, "bw"),
         (Pipeline::LzSeqR, MatchFinder::HashChain, "lzseqr-hc"),
         (Pipeline::LzSeqR, MatchFinder::SortLz, "lzseqr-slz"),
diff --git a/examples/webgpu_profile.rs b/examples/webgpu_profile.rs
index 5f7ab6d..53f78bc 100644
--- a/examples/webgpu_profile.rs
+++ b/examples/webgpu_profile.rs
@@ -144,8 +144,8 @@ fn run() {
         cpu_huff.as_secs_f64() * 1000.0
     );
 
-    // Phase 5: Full pipeline end-to-end (deflate)
-    eprintln!("\n--- Full pipeline compress (4MB, deflate) ---");
+    // Phase 5: Full pipeline end-to-end (lzf)
+    eprintln!("\n--- Full pipeline compress (4MB, lzf) ---");
     let opts_gpu = CompressOptions {
         backend: Backend::WebGpu,
         webgpu_engine: Some(engine.clone()),
@@ -154,19 +154,19 @@ fn run() {
     let opts_cpu = CompressOptions::default();
 
     // Warmup
-    let _ = pipeline::compress_with_options(&large_data, Pipeline::Deflate, &opts_gpu).unwrap();
+    let _ = pipeline::compress_with_options(&large_data, Pipeline::Lzf, &opts_gpu).unwrap();
 
     let iters = 3;
     let t0 = Instant::now();
     for _ in 0..iters {
         let _ = std::hint::black_box(
-            pipeline::compress_with_options(&large_data, Pipeline::Deflate, &opts_gpu).unwrap(),
+            pipeline::compress_with_options(&large_data, Pipeline::Lzf, &opts_gpu).unwrap(),
         );
     }
     let gpu_full = t0.elapsed() / iters;
     let gpu_full_mbps = large_data.len() as f64 / gpu_full.as_secs_f64() / (1024.0 * 1024.0);
     eprintln!(
-        "GPU deflate: {:.0} ms ({:.1} MB/s)",
+        "GPU lzf:     {:.0} ms ({:.1} MB/s)",
         gpu_full.as_secs_f64() * 1000.0,
         gpu_full_mbps
     );
@@ -174,13 +174,13 @@ fn run() {
     let t0 = Instant::now();
     for _ in 0..iters {
         let _ = std::hint::black_box(
-            pipeline::compress_with_options(&large_data, Pipeline::Deflate, &opts_cpu).unwrap(),
+            pipeline::compress_with_options(&large_data, Pipeline::Lzf, &opts_cpu).unwrap(),
         );
     }
     let cpu_full = t0.elapsed() / iters;
     let cpu_full_mbps = large_data.len() as f64 / cpu_full.as_secs_f64() / (1024.0 * 1024.0);
     eprintln!(
-        "CPU deflate: {:.0} ms ({:.1} MB/s)",
+        "CPU lzf:     {:.0} ms ({:.1} MB/s)",
         cpu_full.as_secs_f64() * 1000.0,
         cpu_full_mbps
     );
@@ -290,11 +290,11 @@ fn run() {
 
     // Compression ratio comparison
     eprintln!("\n--- Compression ratios (4MB) ---");
-    let compressed_deflate =
-        pipeline::compress_with_options(&large_data, Pipeline::Deflate, &opts_cpu).unwrap();
+    let compressed_lzf =
+        pipeline::compress_with_options(&large_data, Pipeline::Lzf, &opts_cpu).unwrap();
     eprintln!(
-        "deflate: {:.2}%",
-        compressed_deflate.len() as f64 / large_data.len() as f64 * 100.0
+        "lzf:     {:.2}%",
+        compressed_lzf.len() as f64 / large_data.len() as f64 * 100.0
     );
     eprintln!(
         "lzfi:    {:.2}%",
@@ -318,7 +318,7 @@ fn run() {
     );
 
     // Phase 7: Single-threaded CPU comparison
-    eprintln!("\n--- Single-threaded CPU deflate (4MB) ---");
+    eprintln!("\n--- Single-threaded CPU lzf (4MB) ---");
     let opts_cpu_1t = CompressOptions {
         threads: 1,
         ..Default::default()
@@ -327,7 +327,7 @@ fn run() {
     let t0 = Instant::now();
     for _ in 0..iters {
         let _ = std::hint::black_box(
-            pipeline::compress_with_options(&large_data, Pipeline::Deflate, &opts_cpu_1t).unwrap(),
+            pipeline::compress_with_options(&large_data, Pipeline::Lzf, &opts_cpu_1t).unwrap(),
         );
     }
     let cpu_1t = t0.elapsed() / iters;
@@ -460,17 +460,17 @@ fn run() {
     );
     eprintln!("\n-- Full pipelines (4MB) --");
     eprintln!(
-        "GPU deflate:  {:.0} ms ({:.1} MB/s)",
+        "GPU lzf:      {:.0} ms ({:.1} MB/s)",
         gpu_full.as_secs_f64() * 1000.0,
         gpu_full_mbps
     );
     eprintln!(
-        "CPU deflate:  {:.0} ms ({:.1} MB/s)",
+        "CPU lzf:      {:.0} ms ({:.1} MB/s)",
         cpu_full.as_secs_f64() * 1000.0,
         cpu_full_mbps
     );
     eprintln!(
-        "CPU 1T defl:  {:.0} ms ({:.1} MB/s)",
+        "CPU 1T lzf:   {:.0} ms ({:.1} MB/s)",
         cpu_1t.as_secs_f64() * 1000.0,
         cpu_1t_mbps
     );
diff --git a/fuzz/fuzz_targets/fuzz_pipeline_roundtrip.rs b/fuzz/fuzz_targets/fuzz_pipeline_roundtrip.rs
index ae7492f..000af0f 100644
--- a/fuzz/fuzz_targets/fuzz_pipeline_roundtrip.rs
+++ b/fuzz/fuzz_targets/fuzz_pipeline_roundtrip.rs
@@ -11,7 +11,6 @@ fuzz_target!(|data: &[u8]| {
     let input = if data.len() > 64 * 1024 { &data[..64 * 1024] } else { data };
 
     let pipelines = [
-        Pipeline::Deflate,
         Pipeline::Bw,
         Pipeline::Bbw,
         Pipeline::Lzf,
diff --git a/scripts/analyze-ratio.sh b/scripts/analyze-ratio.sh
index 4b8a03d..169d4f5 100755
--- a/scripts/analyze-ratio.sh
+++ b/scripts/analyze-ratio.sh
@@ -46,7 +46,7 @@ Output:
 
 Example:
   ./scripts/analyze-ratio.sh samples/cantrbry/alice29.txt
-  ./scripts/analyze-ratio.sh -p deflate samples/cantrbry/enwik8
+  ./scripts/analyze-ratio.sh -p lzf samples/cantrbry/enwik8
 EOF
 }
 
@@ -159,7 +159,7 @@ gap_pct=$(awk "BEGIN { printf \"%.2f%%\", (($PZ_SIZE - $GZ_SIZE) / $ORIG_SIZE) *
 
 # Pipeline category: LZ-based or BWT-based (affects interpretation of gap)
 case "$PIPELINE" in
-    deflate|lzf|lzfi|lzseqr|lzseqh|lzssr|sortlz)
+    lzf|lzfi|lzseqr|lzseqh|lzssr|sortlz)
         PIPELINE_CLASS="LZ-based" ;;
     bw|bbw)
         PIPELINE_CLASS="BWT-based" ;;
diff --git a/scripts/bench.sh b/scripts/bench.sh
index 12e05da..9b02144 100755
--- a/scripts/bench.sh
+++ b/scripts/bench.sh
@@ -34,7 +34,7 @@ Usage:
 Options:
   -n, --iters N          Number of iterations per operation (default: 3)
   -p, --pipelines LIST   Comma-separated list of pipelines to benchmark
-                         (default: deflate,lzf,lzseqr)
+                         (default: lzf,lzseqr)
   -t, --threads N        Pass thread count to pz (-t N; 0=auto, 1=single-threaded)
   --all                  Benchmark all available pipelines
   --pareto               Single-thread Pareto table: all pipelines + all competitors,
@@ -51,12 +51,12 @@ Use --silesia to also include the larger Silesia corpus (211 MB).
 Examples:
   ./scripts/bench.sh                              # all corpus, all pipelines
   ./scripts/bench.sh myfile.bin                   # specific file
-  ./scripts/bench.sh -p deflate,lzf               # subset of pipelines
+  ./scripts/bench.sh -p lzf,lzseqr                  # subset of pipelines
   ./scripts/bench.sh -t 1 -p lzseqr                # force single-threaded pz
   ./scripts/bench.sh -n 10                        # more iterations
   ./scripts/bench.sh --webgpu -p bw,bbw           # GPU-accelerated via WebGPU
   ./scripts/bench.sh --all                         # benchmark every pipeline
-  ./scripts/bench.sh -n 1 -p deflate,lzf file.txt # combine options
+  ./scripts/bench.sh -n 1 -p lzf,lzseqr file.txt   # combine options
   ./scripts/bench.sh -v                           # verbose output with full tables
 EOF
 }
@@ -101,7 +101,7 @@ while [[ $# -gt 0 ]]; do
             shift 2
             ;;
         --all)
-            PIPELINES=(deflate bw bbw lzf lzfi lzseqr lzseqh sortlz)
+            PIPELINES=(bw bbw lzf lzfi lzseqr lzseqh sortlz)
             shift
             ;;
         --silesia)
@@ -110,7 +110,7 @@ while [[ $# -gt 0 ]]; do
             ;;
         --pareto)
             PARETO=true
-            PIPELINES=(deflate bw bbw lzf lzfi lzseqr lzseqh sortlz)
+            PIPELINES=(bw bbw lzf lzfi lzseqr lzseqh sortlz)
             # Force single-thread for apples-to-apples comparison
             THREADS="1"
             shift
@@ -151,7 +151,7 @@ done
 
 # Default pipelines if none specified
 if [[ ${#PIPELINES[@]} -eq 0 ]]; then
-    PIPELINES=(deflate lzf lzseqr)
+    PIPELINES=(lzf lzseqr)
 fi
 
 # Collect input files from corpus if none given on command line
@@ -294,12 +294,12 @@ if [[ -n "$GPU_FLAG" ]]; then
     gpu_probe="$BENCH_TMPDIR/_gpu_probe"
     printf 'x%.0s' {1..256} > "$gpu_probe"
     # Warmup (shader compilation caching)
-    "$PZ" -k -f -p deflate $GPU_FLAG "$gpu_probe" >/dev/null 2>&1
+    "$PZ" -k -f -p lzf $GPU_FLAG "$gpu_probe" >/dev/null 2>&1
     rm -f "$gpu_probe.pz"
     # Average 3 GPU runs on tiny data
     gpu_total=0
     for (( gi=0; gi<3; gi++ )); do
-        gpu_ns=$(time_ns "$PZ" -k -f -p deflate $GPU_FLAG "$gpu_probe")
+        gpu_ns=$(time_ns "$PZ" -k -f -p lzf $GPU_FLAG "$gpu_probe")
         gpu_total=$(( gpu_total + gpu_ns ))
         rm -f "$gpu_probe.pz"
     done
@@ -307,7 +307,7 @@ if [[ -n "$GPU_FLAG" ]]; then
     # CPU-only baseline on same data
     cpu_total=0
     for (( gi=0; gi<3; gi++ )); do
-        cpu_ns=$(time_ns "$PZ" -k -f -p deflate "$gpu_probe")
+        cpu_ns=$(time_ns "$PZ" -k -f -p lzf "$gpu_probe")
         cpu_total=$(( cpu_total + cpu_ns ))
         rm -f "$gpu_probe.pz"
     done
diff --git a/scripts/gpu-experiment-bench.sh b/scripts/gpu-experiment-bench.sh
index ec202bb..01476d4 100755
--- a/scripts/gpu-experiment-bench.sh
+++ b/scripts/gpu-experiment-bench.sh
@@ -62,7 +62,6 @@ FILES=(
 
 # Pipelines to benchmark.
 PIPELINES=(
-    deflate
     bw
     bbw
     lzf
diff --git a/scripts/perf-gate.sh b/scripts/perf-gate.sh
index 4d68483..d9778d2 100755
--- a/scripts/perf-gate.sh
+++ b/scripts/perf-gate.sh
@@ -18,7 +18,7 @@ SIZE=1048576
 ITERATIONS=20
 REPEATS=3
 THREADS=0
-PIPELINES_CSV="deflate,lzf,lzseqr"
+PIPELINES_CSV="lzf,lzseqr"
 CPU_ONLY=false
 UPDATE_BASELINE=false
 THROUGHPUT_REGRESSION_PCT=4.0
@@ -39,7 +39,7 @@ Options:
   --iterations N              profile-example loop iterations per run (default: 20)
   --repeats N                 repeated runs per case; must be odd (default: 3)
   --threads N                 pass thread count to profile (0=auto, default: 0)
-  --pipelines LIST            comma-separated pipelines (default: deflate,lzf,lzseqr)
+  --pipelines LIST            comma-separated pipelines (default: lzf,lzseqr)
   --cpu-only                  skip WebGPU matrix
   --cargo-profile NAME        cargo profile for example binary (default: profiling)
   --baseline FILE             baseline TSV path
diff --git a/scripts/profile.sh b/scripts/profile.sh
index bf3cae0..4bb4f8d 100755
--- a/scripts/profile.sh
+++ b/scripts/profile.sh
@@ -8,7 +8,7 @@
 #
 # Usage:
 #   ./scripts/profile.sh                              # lzf compress, 256KB
-#   ./scripts/profile.sh --pipeline deflate            # profile deflate compress
+#   ./scripts/profile.sh --pipeline lzseqr               # profile lzseqr compress
 #   ./scripts/profile.sh --stage lz77                  # profile lz77 encode only
 #   ./scripts/profile.sh --stage fse --decompress      # profile fse decode
 #   ./scripts/profile.sh --pipeline lzf --size 1048576 # 1MB input
@@ -50,7 +50,7 @@ BUILD OPTIONS:
     --no-default-features   Disable default features (pure CPU build)
 
 PROFILE BINARY OPTIONS (forwarded to the profile example):
-    --pipeline P            Pipeline: deflate, bw, bbw, lzf, lzfi, lzssr, lzseqr, lzseqh, sortlz (default: lzf)
+    --pipeline P            Pipeline: bw, bbw, lzf, lzfi, lzssr, lzseqr, lzseqh, sortlz (default: lzf)
     --stage S               Profile a single stage: lz77, huffman, bwt, mtf, rle, fse, rans
     --decompress            Profile decompression instead of compression
     --iterations N          Number of iterations (default: 200)
@@ -60,8 +60,8 @@ EXAMPLES:
     # Profile lz77 → profiling/a1b2c3d/lz77_encode_256KB.json.gz
     ./scripts/profile.sh --stage lz77
 
-    # Profile pipeline → profiling/a1b2c3d/deflate_decompress_256KB.json.gz
-    ./scripts/profile.sh --pipeline deflate --decompress
+    # Profile pipeline → profiling/a1b2c3d/lzseqr_decompress_256KB.json.gz
+    ./scripts/profile.sh --pipeline lzseqr --decompress
 
     # Open browser to inspect results interactively
     ./scripts/profile.sh --web --pipeline lzf
diff --git a/scripts/test-targets.sh b/scripts/test-targets.sh
index 63d210a..9fdbc02 100755
--- a/scripts/test-targets.sh
+++ b/scripts/test-targets.sh
@@ -14,7 +14,7 @@ Usage:
   ./scripts/test-targets.sh <test_target> [test_target...]
 
 Examples:
-  ./scripts/test-targets.sh lz77::tests::test_lazy_quality_repeated_pattern pipeline::tests::test_optimal_deflate_round_trip
+  ./scripts/test-targets.sh lz77::tests::test_lazy_quality_repeated_pattern pipeline::tests::test_lzf_round_trip
   ./scripts/test-targets.sh --features webgpu -- webgpu::tests::test_smoke
 USAGE
 }
diff --git a/scripts/trace-pipeline.sh b/scripts/trace-pipeline.sh
index 194be9f..9bb88a2 100755
--- a/scripts/trace-pipeline.sh
+++ b/scripts/trace-pipeline.sh
@@ -23,8 +23,8 @@ USAGE:
     ./scripts/trace-pipeline.sh [OPTIONS]
 
 OPTIONS:
-    -p, --pipeline NAME     Pipeline to trace (default: deflate)
-                            Options: deflate, lzf, lzfi, lzssr, bw, bbw, lzseqr, lzseqh, sortlz
+    -p, --pipeline NAME     Pipeline to trace (default: lzf)
+                            Options: lzf, lzfi, lzssr, bw, bbw, lzseqr, lzseqh, sortlz
     --format FORMAT         Output format: text (default) or mermaid
     -h, --help              Show this help
 
@@ -33,7 +33,7 @@ OUTPUT FORMATS:
     mermaid                 Mermaid flowchart syntax (paste into mermaid.live)
 
 EXAMPLES:
-    ./scripts/trace-pipeline.sh                        # deflate pipeline (text)
+    ./scripts/trace-pipeline.sh                        # lzf pipeline (text)
     ./scripts/trace-pipeline.sh -p bw                  # BWT pipeline
     ./scripts/trace-pipeline.sh -p lzfi --format mermaid  # FSE interleaved (mermaid)
 
@@ -44,7 +44,7 @@ UNDERSTANDING THE OUTPUT:
 EOF
 }
 
-PIPELINE="deflate"
+PIPELINE="lzf"
 FORMAT="text"
 
 while [[ $# -gt 0 ]]; do
@@ -79,10 +79,10 @@ done
 
 # Validate pipeline
 case "$PIPELINE" in
-    deflate|lzf|lzfi|lzssr|bw|bbw|lzseqr|lzseqh|sortlz) ;;
+    lzf|lzfi|lzssr|bw|bbw|lzseqr|lzseqh|sortlz) ;;
     *)
         echo "ERROR: unknown pipeline '$PIPELINE'" >&2
-        echo "Valid pipelines: deflate, lzf, lzfi, lzssr, bw, bbw, lzseqr, lzseqh, sortlz" >&2
+        echo "Valid pipelines: lzf, lzfi, lzssr, bw, bbw, lzseqr, lzseqh, sortlz" >&2
         exit 1
         ;;
 esac
@@ -104,11 +104,6 @@ esac
 # - compress_block_bw()/compress_block_bbw() in src/pipeline/blocks.rs:200-263
 
 case "$PIPELINE" in
-    deflate)
-        DEMUXER="Lz77"
-        STREAM_COUNT=3
-        ENTROPY="Huffman"
-        ;;
     lzf)
         DEMUXER="LzSeq"
         STREAM_COUNT=6
@@ -165,7 +160,7 @@ if [[ "$FORMAT" == "mermaid" ]]; then
     emit_mermaid "    Start([\"Input: raw bytes\"]) --> CompressBlock"
 fi
 
-# Trace LZ-based pipelines (Deflate, Lzf, Lzfi, LzssR, LzSeqR, LzSeqH)
+# Trace LZ-based pipelines (Lzf, Lzfi, LzssR, LzSeqR, LzSeqH)
 trace_lz_pipeline() {
     local pipeline=$1
     local demuxer=$2
@@ -391,7 +386,7 @@ trace_bwt_pipeline() {
 
 # Main trace dispatch
 case "$PIPELINE" in
-    deflate|lzf|lzfi|lzssr|lzseqr|lzseqh|sortlz)
+    lzf|lzfi|lzssr|lzseqr|lzseqh|sortlz)
         trace_lz_pipeline "$PIPELINE" "$DEMUXER" "$STREAM_COUNT" "$ENTROPY"
         ;;
     bw|bbw)
diff --git a/src/bin/pz.rs b/src/bin/pz.rs
index 069c5ac..bf7baaa 100644
--- a/src/bin/pz.rs
+++ b/src/bin/pz.rs
@@ -1,6 +1,6 @@
 /// pz – CLI compression tool for libpz.
 ///
-/// Works similar to gzip / zstd:
+/// Works similar to zstd:
 ///   pz file.txt          → compress to file.txt.pz (removes original)
 ///   pz -d file.txt.pz    → decompress to file.txt (removes original)
 ///   pz -d file.txt.gz    → decompress gzip file to file.txt
@@ -32,7 +32,7 @@ fn usage() {
     eprintln!("  -k, --keep         Keep original file");
     eprintln!("  -f, --force        Overwrite existing output files");
     eprintln!("  -l, --list         List info about compressed file");
-    eprintln!("  -p, --pipeline P   Compression pipeline (default: deflate)");
+    eprintln!("  -p, --pipeline P   Compression pipeline (default: lzf)");
     eprintln!("  --list-pipelines   List all available pipelines and exit");
     eprintln!("  -a, --auto         Auto-select best pipeline based on data analysis");
     eprintln!("  --trial            Auto-select by trial compression (slower, more accurate)");
@@ -70,14 +70,17 @@ fn list_pipelines() {
     println!("  NAME        ID  DESCRIPTION");
     println!("  ----        --  -----------");
     let pipelines: &[(&str, &str, &str)] = &[
-        ("deflate", "0", "LZ77 + Huffman (gzip-like, default)"),
         ("bw", "1", "BWT + MTF + RLE + FSE (bzip2-like, best ratio)"),
         (
             "bbw",
             "2",
             "Bijective BWT + MTF + RLE + FSE (parallelizable BWT)",
         ),
-        ("lzf", "4", "LzSeq + FSE (zstd-style entropy coding)"),
+        (
+            "lzf",
+            "4",
+            "LzSeq + FSE (zstd-style entropy coding, default)",
+        ),
         ("lzfi", "5", "LZSS + interleaved FSE (fast CPU decode)"),
         ("lzssr", "6", "LZSS + rANS (experimental)"),
         ("lzseqr", "8", "LzSeq + rANS (zstd-style code+extra-bits)"),
@@ -134,7 +137,7 @@ fn parse_args() -> Opts {
         rans_interleaved_min_bytes: 64 * 1024,
         rans_interleaved_states: pz::rans::DEFAULT_INTERLEAVE,
         rans_shared_stream: false,
-        pipeline: Pipeline::Deflate,
+        pipeline: Pipeline::Lzf,
         files: Vec::new(),
     };
 
@@ -236,7 +239,6 @@ fn parse_args() -> Opts {
                     process::exit(1);
                 }
                 opts.pipeline = match args[i].as_str() {
-                    "deflate" | "0" => Pipeline::Deflate,
                     "bw" | "1" => Pipeline::Bw,
                     "bbw" | "2" => Pipeline::Bbw,
                     "lzf" | "4" => Pipeline::Lzf,
@@ -430,7 +432,7 @@ fn list_file(path: &str, data: &[u8]) -> Result<(), String> {
             }
             let version = data[2];
             let pipe = match data[3] {
-                0 => "deflate",
+                0 => "deflate (removed)",
                 1 => "bw",
                 2 => "bbw",
                 4 => "lzf",
diff --git a/src/ffi.rs b/src/ffi.rs
index 0bbfe58..b084170 100644
--- a/src/ffi.rs
+++ b/src/ffi.rs
@@ -35,7 +35,7 @@ pub enum PzLevel {
 /// Compression pipeline types.
 #[repr(C)]
 pub enum PzPipeline {
-    Deflate = 0,
+    // 0 was Deflate — removed
     Bw = 1,
 }
 
@@ -194,7 +194,7 @@ pub unsafe extern "C" fn pz_compress(
     let ctx = &*ctx;
 
     let pipe = match pipeline {
-        0 => pipeline::Pipeline::Deflate,
+        // 0 was Deflate — removed
         1 => pipeline::Pipeline::Bw,
         _ => return PZ_ERROR_UNSUPPORTED,
     };
@@ -251,7 +251,7 @@ pub unsafe extern "C" fn pz_compress_mt(
     let ctx = &*ctx;
 
     let pipe = match pipeline {
-        0 => pipeline::Pipeline::Deflate,
+        // 0 was Deflate — removed
         1 => pipeline::Pipeline::Bw,
         _ => return PZ_ERROR_UNSUPPORTED,
     };
@@ -532,7 +532,7 @@ mod tests {
     fn test_compress_decompress_ffi() {
         unsafe {
             let ctx = pz_init();
-            // Use longer input to overcome Deflate pipeline overhead (~1KB freq table)
+            // Use longer input to overcome pipeline overhead
             let pattern = b"hello, world! this is a test of compression. ";
             let mut input = Vec::new();
             for _ in 0..50 {
@@ -548,7 +548,7 @@ mod tests {
                 compressed.as_mut_ptr(),
                 compressed.len(),
                 PzLevel::Default as i32,
-                PzPipeline::Deflate as i32,
+                PzPipeline::Bw as i32,
             );
             assert!(comp_size > 0, "compression failed: {}", comp_size);
 
@@ -576,7 +576,7 @@ mod tests {
                 input.extend_from_slice(pattern);
             }
 
-            for pipeline_id in 0..2i32 {
+            for pipeline_id in 1..2i32 {
                 let mut compressed = vec![0u8; input.len() * 2 + 2048];
                 let mut decompressed = vec![0u8; input.len() + 1024];
 
diff --git a/src/lz77/mod.rs b/src/lz77/mod.rs
index cb1ce7f..78ece80 100644
--- a/src/lz77/mod.rs
+++ b/src/lz77/mod.rs
@@ -30,8 +30,8 @@ pub(crate) const MAX_CHAIN: usize = 64;
 /// Reduced chain depth used by auto/speed-biased parsing on large inputs.
 const MAX_CHAIN_AUTO: usize = 48;
 
-/// Maximum match length for DEFLATE-compatible pipelines (RFC 1951).
-pub const DEFLATE_MAX_MATCH: u16 = 258;
+/// Standard LZ77 maximum match length (258 bytes, from RFC 1951).
+pub const LZ77_MAX_MATCH: u16 = 258;
 
 /// Default maximum match length for non-DEFLATE pipelines.
 /// Uses full u16 range since Match.length is u16.
@@ -259,7 +259,7 @@ pub(crate) struct HashChainFinder {
 impl HashChainFinder {
     /// Create a match finder with the DEFLATE-standard max match length (258).
     pub(crate) fn new() -> Self {
-        Self::with_max_match_len(DEFLATE_MAX_MATCH)
+        Self::with_max_match_len(LZ77_MAX_MATCH)
     }
 
     /// Create a match finder with a caller-specified max match length.
@@ -588,10 +588,10 @@ impl HashChainFinder {
 /// and is also faster than greedy hash-chain due to skipping matched
 /// positions during hash insertion.
 ///
-/// Uses `DEFLATE_MAX_MATCH` (258) as the maximum match length.
+/// Uses `LZ77_MAX_MATCH` (258) as the maximum match length.
 /// For configurable max match length, use `compress_lazy_to_matches_with_limit`.
 pub fn compress_lazy_to_matches(input: &[u8]) -> PzResult<Vec<Match>> {
-    compress_lazy_to_matches_with_limit(input, DEFLATE_MAX_MATCH)
+    compress_lazy_to_matches_with_limit(input, LZ77_MAX_MATCH)
 }
 
 /// Compress input using greedy matching: always take the longest match at
@@ -701,9 +701,9 @@ pub(crate) fn compress_lazy_to_matches_with_limit_and_chain(
 ///
 /// This is the standard entry point for LZ77 compression. Uses
 /// `compress_lazy_to_matches` internally and serializes the result.
-/// Uses `DEFLATE_MAX_MATCH` (258) as the maximum match length.
+/// Uses `LZ77_MAX_MATCH` (258) as the maximum match length.
 pub fn compress_lazy(input: &[u8]) -> PzResult<Vec<u8>> {
-    compress_lazy_with_limit(input, DEFLATE_MAX_MATCH)
+    compress_lazy_with_limit(input, LZ77_MAX_MATCH)
 }
 
 /// Like `compress_lazy` but with a caller-specified max match length.
diff --git a/src/lz77/tests.rs b/src/lz77/tests.rs
index b530bc3..21e365a 100644
--- a/src/lz77/tests.rs
+++ b/src/lz77/tests.rs
@@ -373,9 +373,9 @@ fn test_round_trip_long_repeating_pattern() {
     );
 }
 
-/// Verify find_match respects the default DEFLATE_MAX_MATCH (258).
+/// Verify find_match respects the default LZ77_MAX_MATCH (258).
 ///
-/// The default HashChainFinder uses DEFLATE_MAX_MATCH, which is passed
+/// The default HashChainFinder uses LZ77_MAX_MATCH, which is passed
 /// as the limit to SIMD compare_bytes. This prevents matches from
 /// exceeding 258 for DEFLATE-compatible output.
 #[test]
@@ -386,7 +386,7 @@ fn test_find_match_length_bounded_deflate() {
     let m = finder.find_match(&input, 1);
     assert_eq!(
         m.length, 258,
-        "default find_match should cap at DEFLATE_MAX_MATCH"
+        "default find_match should cap at LZ77_MAX_MATCH"
     );
     assert_eq!(m.offset, 1, "should match with offset 1");
 }
diff --git a/src/lz_token.rs b/src/lz_token.rs
index 271ab71..5a7447a 100644
--- a/src/lz_token.rs
+++ b/src/lz_token.rs
@@ -6,7 +6,7 @@
 /// ## Encoders
 ///
 /// - `Lz77Encoder`: DEFLATE-compatible (u16 offset, u16 length, u8 next).
-///   3 streams. Used by the Deflate pipeline for backward compat.
+///   3 streams. Legacy format (Deflate pipeline removed).
 /// - `LzSeqEncoder`: log2-coded offsets/lengths with repeat offsets.
 ///   6 streams. Best ratio. Used by LzSeqR, LzSeqH, Lzf, SortLz.
 /// - `LzssEncoder`: flag bits + raw u16 offsets/lengths.
diff --git a/src/lzseq/mod.rs b/src/lzseq/mod.rs
index ba76118..ed9105f 100644
--- a/src/lzseq/mod.rs
+++ b/src/lzseq/mod.rs
@@ -762,9 +762,9 @@ pub fn encode_optimal(input: &[u8], config: &SeqConfig) -> PzResult<SeqEncoded>
     }
 
     // Build match table and run repeat-offset-aware optimal parse.
-    // Use DEFLATE_MAX_MATCH (258) for reasonable performance.
+    // Use LZ77_MAX_MATCH (258) for reasonable performance.
     // Searching for extremely long matches (u16::MAX) is prohibitively slow.
-    let max_match = crate::lz77::DEFLATE_MAX_MATCH;
+    let max_match = crate::lz77::LZ77_MAX_MATCH;
     let table = crate::optimal::build_match_table_cpu_with_config(
         input,
         crate::optimal::K,
diff --git a/src/optimal.rs b/src/optimal.rs
index 81cd7b4..f546d14 100644
--- a/src/optimal.rs
+++ b/src/optimal.rs
@@ -286,9 +286,9 @@ impl RepeatOffsetState {
 /// Build a match table from input using the hash-chain finder.
 ///
 /// For each position, finds up to `k` match candidates using the
-/// existing hash-chain infrastructure. Uses `DEFLATE_MAX_MATCH` (258).
+/// existing hash-chain infrastructure. Uses `LZ77_MAX_MATCH` (258).
 pub fn build_match_table_cpu(input: &[u8], k: usize) -> MatchTable {
-    build_match_table_cpu_with_limit(input, k, crate::lz77::DEFLATE_MAX_MATCH)
+    build_match_table_cpu_with_limit(input, k, crate::lz77::LZ77_MAX_MATCH)
 }
 
 /// Like `build_match_table_cpu` but with a caller-specified max match length.
@@ -537,9 +537,9 @@ pub fn optimal_parse(input: &[u8], table: &MatchTable, cost_model: &CostModel) -
 ///
 /// Produces the same serialized `Match` format as `lz77::compress_lazy`,
 /// but selects matches via backward DP to minimize total encoding cost.
-/// Decompressible with `lz77::decompress()`. Uses `DEFLATE_MAX_MATCH` (258).
+/// Decompressible with `lz77::decompress()`. Uses `LZ77_MAX_MATCH` (258).
 pub fn compress_optimal(input: &[u8]) -> PzResult<Vec<u8>> {
-    compress_optimal_with_limit(input, crate::lz77::DEFLATE_MAX_MATCH)
+    compress_optimal_with_limit(input, crate::lz77::LZ77_MAX_MATCH)
 }
 
 /// Like `compress_optimal` but with a caller-specified max match length.
diff --git a/src/pipeline/blocks.rs b/src/pipeline/blocks.rs
index 9971331..abe8886 100644
--- a/src/pipeline/blocks.rs
+++ b/src/pipeline/blocks.rs
@@ -1,6 +1,6 @@
 //! Per-pipeline single-block compress and decompress implementations.
 //!
-//! LZ-based pipelines (Deflate, Lzf, Lzfi, LzssR) use a unified path:
+//! LZ-based pipelines (Lzf, Lzfi, LzssR) use a unified path:
 //!   compress:   demux → entropy_encode
 //!   decompress: entropy_decode → demux
 //!
@@ -28,7 +28,7 @@ pub(crate) fn compress_block(
     pipeline: Pipeline,
     options: &CompressOptions,
 ) -> PzResult<Vec<u8>> {
-    // Resolve max match length for this pipeline (Deflate=258, others=u16::MAX).
+    // Resolve max match length for this pipeline.
     // Clone options only when we need to override the default.
     let resolved;
     let opts = if options.max_match_len.is_none() && demuxer_for_pipeline(pipeline).is_some() {
@@ -124,8 +124,6 @@ fn decompress_block_lz(
 // ---------------------------------------------------------------------------
 
 /// Dispatch to the correct entropy encoder for a pipeline.
-///
-/// For Huffman (Deflate), GPU variants are used when a GPU backend is active.
 fn entropy_encode(
     block: StageBlock,
     pipeline: Pipeline,
@@ -133,15 +131,6 @@ fn entropy_encode(
     options: &CompressOptions,
 ) -> PzResult<StageBlock> {
     match pipeline {
-        Pipeline::Deflate => {
-            // Note: WebGPU Huffman is intentionally NOT used here.
-            // Profiling shows CPU Huffman (~0.5ms/256KB) is faster than the
-            // WebGPU path (~2ms) due to CPU↔GPU round-trips for bit-length
-            // computation and prefix-sum. The GPU LZ77 path provides the
-            // parallelism win; entropy encoding is faster on the CPU.
-            let _ = (input_len, options);
-            stage_huffman_encode(block)
-        }
         Pipeline::LzssR | Pipeline::LzSeqR => {
             let _ = (input_len, options);
             stage_rans_encode_with_options(block, options)
@@ -179,7 +168,6 @@ fn entropy_decode(
     options: &DecompressOptions,
 ) -> PzResult<StageBlock> {
     match pipeline {
-        Pipeline::Deflate => stage_huffman_decode(block),
         Pipeline::LzssR | Pipeline::LzSeqR => {
             #[cfg(feature = "webgpu")]
             {
diff --git a/src/pipeline/demux.rs b/src/pipeline/demux.rs
index 924d6a6..991d5eb 100644
--- a/src/pipeline/demux.rs
+++ b/src/pipeline/demux.rs
@@ -53,6 +53,7 @@ pub(crate) trait StreamDemuxer {
 /// Concrete LZ demuxer variants (enum dispatch, no dyn/vtable overhead).
 pub(crate) enum LzDemuxer {
     /// LZ77: 3 streams (offsets, lengths, literals).
+    #[allow(dead_code)]
     Lz77,
     /// LZSS: 4 streams (flags, literals, offsets, lengths).
     Lzss,
@@ -64,7 +65,6 @@ pub(crate) enum LzDemuxer {
 /// Returns `None` for BWT-based pipelines (Bw, Bbw).
 pub(crate) fn demuxer_for_pipeline(pipeline: super::Pipeline) -> Option<LzDemuxer> {
     match pipeline {
-        super::Pipeline::Deflate => Some(LzDemuxer::Lz77),
         super::Pipeline::Lzf => Some(LzDemuxer::LzSeq),
         super::Pipeline::Lzfi | super::Pipeline::LzssR => Some(LzDemuxer::Lzss),
         super::Pipeline::LzSeqR | super::Pipeline::LzSeqH => Some(LzDemuxer::LzSeq),
diff --git a/src/pipeline/mod.rs b/src/pipeline/mod.rs
index 7c9fb59..e66b381 100644
--- a/src/pipeline/mod.rs
+++ b/src/pipeline/mod.rs
@@ -9,7 +9,6 @@
 //!
 //! | Pipeline      | Stages                           | Similar to      |
 //! |---------------|----------------------------------|-----------------|
-//! | `Deflate`     | LZ77 → Huffman                   | gzip            |
 //! | `Bw`          | BWT → MTF → RLE → FSE            | bzip2           |
 //! | `Lzf`         | LzSeq → FSE                      | zstd-like       |
 //! | `Lzfi`        | LZSS → interleaved FSE           | fast CPU decode |
@@ -26,7 +25,7 @@
 //! Each compressed stream starts with a header:
 //! - Magic bytes: `PZ` (2 bytes)
 //! - Version: 2 (1 byte)
-//! - Pipeline ID: 0=Deflate, 1=Bw, 4=Lzf, 5=Lzfi, 6=LzssR, 8=LzSeqR, 9=LzSeqH, 10=SortLz (1 byte)
+//! - Pipeline ID: 1=Bw, 4=Lzf, 5=Lzfi, 6=LzssR, 8=LzSeqR, 9=LzSeqH, 10=SortLz (1 byte)
 //! - Original length: u32 little-endian (4 bytes)
 //! - num_blocks: u32 little-endian (4 bytes)
 //! - Block table: \[compressed_len: u32, original_len: u32\] \* num_blocks
@@ -165,10 +164,9 @@ pub struct CompressOptions {
     pub parse_strategy: ParseStrategy,
     /// Maximum match length for LZ77 compression.
     ///
-    /// `None` = use the pipeline's default: 258 for Deflate (RFC 1951
-    /// constraint), `u16::MAX` for other LZ-based pipelines (Lzf, LzSeqR, etc.).
-    /// Larger limits allow longer matches on repetitive data without
-    /// penalizing short-match performance (SIMD short-circuits).
+    /// `None` = use the pipeline's default (`u16::MAX` for LZ-based
+    /// pipelines). Larger limits allow longer matches on repetitive data
+    /// without penalizing short-match performance (SIMD short-circuits).
     pub max_match_len: Option<u16>,
     /// WebGPU engine handle, required when `backend` is `Backend::WebGpu`.
     #[cfg(feature = "webgpu")]
@@ -215,7 +213,7 @@ pub struct CompressOptions {
     /// Auto routes to GPU when block size >= GPU_ENTROPY_THRESHOLD and GPU available.
     pub stage1_backend: BackendAssignment,
     /// Match-finding algorithm: HashChain (default) or SortLz.
-    /// Applies to all LZ-based pipelines (Deflate, Lzf, Lzfi, LzssR, LzSeqR, LzSeqH).
+    /// Applies to all LZ-based pipelines (Lzf, Lzfi, LzssR, LzSeqR, LzSeqH).
     pub match_finder: MatchFinder,
 }
 
@@ -292,13 +290,10 @@ impl Default for DecompressOptions {
 
 /// Resolve the effective max match length from options and pipeline type.
 ///
-/// Deflate is hard-capped at 258 (RFC 1951). Other LZ77-based pipelines
-/// default to `u16::MAX` for better compression on repetitive data.
-pub(crate) fn resolve_max_match_len(pipeline: Pipeline, options: &CompressOptions) -> u16 {
-    options.max_match_len.unwrap_or(match pipeline {
-        Pipeline::Deflate => lz77::DEFLATE_MAX_MATCH,
-        _ => lz77::DEFAULT_MAX_MATCH,
-    })
+/// All LZ77-based pipelines default to `u16::MAX` for best compression
+/// on repetitive data.
+pub(crate) fn resolve_max_match_len(_pipeline: Pipeline, options: &CompressOptions) -> u16 {
+    options.max_match_len.unwrap_or(lz77::DEFAULT_MAX_MATCH)
 }
 
 /// Magic bytes for the libpz container format.
@@ -324,8 +319,7 @@ pub(crate) const BLOCK_HEADER_SIZE: usize = 8;
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 #[repr(u8)]
 pub enum Pipeline {
-    /// LZ77 + Huffman (gzip-like)
-    Deflate = 0,
+    // ID 0 was Deflate (LZ77 + Huffman, gzip-like) — removed as redundant with Lzf.
     /// BWT + MTF + RLE + FSE (bzip2-like)
     Bw = 1,
     /// Bijective BWT + MTF + RLE + FSE (parallelizable BWT variant)
@@ -354,7 +348,7 @@ impl TryFrom<u8> for Pipeline {
 
     fn try_from(v: u8) -> Result<Self, Self::Error> {
         match v {
-            0 => Ok(Self::Deflate),
+            // 0 was Deflate — removed
             1 => Ok(Self::Bw),
             2 => Ok(Self::Bbw),
             // 3 was Lzr — removed
@@ -568,14 +562,14 @@ pub fn select_pipeline(input: &[u8]) -> Pipeline {
     use crate::analysis::{self, DistributionShape};
 
     if input.is_empty() {
-        return Pipeline::Deflate;
+        return Pipeline::Lzf;
     }
 
     let profile = analysis::analyze(input);
 
     // Near-random data: use fastest pipeline, won't compress much
     if profile.byte_entropy > 7.5 && profile.match_density < 0.1 {
-        return Pipeline::Deflate;
+        return Pipeline::Lzf;
     }
 
     // High run ratio: BWT+RLE excels
@@ -599,7 +593,7 @@ pub fn select_pipeline(input: &[u8]) -> Pipeline {
             // High entropy: FSE handles better than Huffman
             return Pipeline::Lzfi;
         }
-        return Pipeline::Deflate;
+        return Pipeline::Lzf;
     }
 
     // Moderate match density with high entropy
@@ -607,8 +601,8 @@ pub fn select_pipeline(input: &[u8]) -> Pipeline {
         return Pipeline::Lzfi;
     }
 
-    // Default: Deflate (fast, decent compression)
-    Pipeline::Deflate
+    // Default: Lzf (LZ77 + FSE, fast and decent compression)
+    Pipeline::Lzf
 }
 
 /// Select the best match finder for the given input and options.
@@ -678,7 +672,7 @@ pub fn select_pipeline_trial(
     sample_size: usize,
 ) -> Pipeline {
     if input.is_empty() {
-        return Pipeline::Deflate;
+        return Pipeline::Lzf;
     }
 
     let sample_len = input.len().min(sample_size);
@@ -691,15 +685,15 @@ pub fn select_pipeline_trial(
     };
 
     let candidates = [
-        Pipeline::Deflate,
         Pipeline::Bw,
+        Pipeline::Lzf,
         Pipeline::Lzfi,
         Pipeline::LzssR,
         Pipeline::LzSeqR,
         Pipeline::LzSeqH,
         Pipeline::SortLz,
     ];
-    let mut best_pipeline = Pipeline::Deflate;
+    let mut best_pipeline = Pipeline::Lzf;
     let mut best_size = usize::MAX;
 
     // Also try SortLz match finder with LZ pipelines to find the best combo
@@ -766,12 +760,7 @@ fn adjusted_options(pipeline: Pipeline, options: &CompressOptions) -> CompressOp
 
     let is_lz_pipeline = matches!(
         pipeline,
-        Pipeline::Deflate
-            | Pipeline::Lzf
-            | Pipeline::Lzfi
-            | Pipeline::LzssR
-            | Pipeline::LzSeqR
-            | Pipeline::LzSeqH
+        Pipeline::Lzf | Pipeline::Lzfi | Pipeline::LzssR | Pipeline::LzSeqR | Pipeline::LzSeqH
     );
     let is_gpu = {
         #[allow(unused_mut)]
@@ -789,10 +778,9 @@ fn adjusted_options(pipeline: Pipeline, options: &CompressOptions) -> CompressOp
         adjusted.block_size = DEFAULT_GPU_BLOCK_SIZE;
     }
 
-    // Non-Deflate LZ pipelines default to extended match length (u16::MAX)
+    // LZ pipelines default to extended match length (u16::MAX)
     // when the caller hasn't explicitly set one.
-    if adjusted.max_match_len.is_none() && is_lz_pipeline && !matches!(pipeline, Pipeline::Deflate)
-    {
+    if adjusted.max_match_len.is_none() && is_lz_pipeline {
         adjusted.max_match_len = Some(lz77::DEFAULT_MAX_MATCH);
     }
 
@@ -874,7 +862,7 @@ pub(crate) fn tokenize(
     input: &[u8],
     options: &CompressOptions,
 ) -> PzResult<Vec<crate::lz_token::LzToken>> {
-    let max_match = options.max_match_len.unwrap_or(lz77::DEFLATE_MAX_MATCH);
+    let max_match = options.max_match_len.unwrap_or(lz77::LZ77_MAX_MATCH);
 
     // GPU path: use GPU kernels when available and input is in range.
     #[cfg(feature = "webgpu")]
diff --git a/src/pipeline/parallel.rs b/src/pipeline/parallel.rs
index a517ec7..f7956ff 100644
--- a/src/pipeline/parallel.rs
+++ b/src/pipeline/parallel.rs
@@ -154,12 +154,7 @@ fn should_route_block_to_gpu_stage0(
     // Only LZ-based pipelines have GPU Stage 0 support
     let is_gpu_stage0_pipeline = matches!(
         pipeline,
-        Pipeline::Deflate
-            | Pipeline::Lzf
-            | Pipeline::Lzfi
-            | Pipeline::LzssR
-            | Pipeline::LzSeqR
-            | Pipeline::LzSeqH
+        Pipeline::Lzf | Pipeline::Lzfi | Pipeline::LzssR | Pipeline::LzSeqR | Pipeline::LzSeqH
     );
 
     if !is_gpu_stage0_pipeline {
@@ -404,7 +399,7 @@ fn compress_parallel_unified(
 
             scope.spawn(move || {
                 let engine = opts.webgpu_engine.as_ref().unwrap();
-                let uses_lz77_demux = matches!(pipeline, Pipeline::Deflate | Pipeline::Lzf);
+                let uses_lz77_demux = false; // No current pipeline uses the Lz77 demuxer
                 let uses_sortlz_match_finder =
                     opts.match_finder == super::MatchFinder::SortLz && uses_lz77_demux;
 
@@ -550,7 +545,7 @@ fn compress_parallel_unified(
                     if !stage0_batch.is_empty() && uses_sortlz_match_finder {
                         // SortLZ GPU match finding: per-block dispatch with LZ77 conversion
                         let sortlz_config = crate::sortlz::SortLzConfig::for_lz77(
-                            opts.max_match_len.unwrap_or(crate::lz77::DEFLATE_MAX_MATCH),
+                            opts.max_match_len.unwrap_or(crate::lz77::LZ77_MAX_MATCH),
                         );
                         for block_idx in stage0_batch {
                             let t0 = Instant::now();
diff --git a/src/pipeline/parallel_tests.rs b/src/pipeline/parallel_tests.rs
index 88c21ac..df0cdfd 100644
--- a/src/pipeline/parallel_tests.rs
+++ b/src/pipeline/parallel_tests.rs
@@ -652,30 +652,9 @@ fn test_heterogeneous_mixed_block_sizes_with_gpu() {
     );
 }
 
-// GPU unified scheduler tests for LZ-based pipelines (Deflate, Lzf).
+// GPU unified scheduler tests for LZ-based pipelines (Lzf).
 // These exercise the Stage 0 GPU routing and batch-collect path.
 
-#[test]
-#[cfg(feature = "webgpu")]
-fn test_gpu_roundtrip_deflate() {
-    use crate::webgpu::WebGpuEngine;
-    let input: Vec<u8> = (0..=255).cycle().take(512 * 1024).collect();
-    let engine = match WebGpuEngine::new() {
-        Ok(e) => e,
-        Err(_) => return,
-    };
-    let opts = CompressOptions {
-        backend: super::super::Backend::WebGpu,
-        threads: 2,
-        block_size: 256 * 1024,
-        webgpu_engine: Some(std::sync::Arc::new(engine)),
-        ..CompressOptions::default()
-    };
-    let compressed = super::super::compress_with_options(&input, Pipeline::Deflate, &opts).unwrap();
-    let decompressed = super::super::decompress(&compressed).unwrap();
-    assert_eq!(decompressed, input, "GPU Deflate round-trip failed");
-}
-
 #[test]
 #[cfg(feature = "webgpu")]
 fn test_gpu_roundtrip_lzf() {
diff --git a/src/pipeline/stages.rs b/src/pipeline/stages.rs
index b140602..98cdee3 100644
--- a/src/pipeline/stages.rs
+++ b/src/pipeline/stages.rs
@@ -883,8 +883,6 @@ pub(crate) fn run_compress_stage(
     options: &CompressOptions,
 ) -> PzResult<StageBlock> {
     match (pipeline, stage_idx) {
-        (Pipeline::Deflate, 0) => stage_demux_compress(block, &LzDemuxer::Lz77, options),
-        (Pipeline::Deflate, 1) => stage_huffman_encode(block),
         (Pipeline::Bw, 0) => stage_bwt_encode(block, options),
         (Pipeline::Bw, 1) => stage_mtf_encode(block),
         (Pipeline::Bw, 2) => stage_rle_encode(block),
diff --git a/src/pipeline/tests.rs b/src/pipeline/tests.rs
index ecf0234..223aae2 100644
--- a/src/pipeline/tests.rs
+++ b/src/pipeline/tests.rs
@@ -34,7 +34,7 @@ fn test_too_short_input() {
 
 #[test]
 fn test_zero_original_length() {
-    let result = decompress(&[b'P', b'Z', VERSION, 0, 0, 0, 0, 0]);
+    let result = decompress(&[b'P', b'Z', VERSION, 1, 0, 0, 0, 0]);
     assert_eq!(result.unwrap(), Vec::<u8>::new());
 }
 
@@ -44,7 +44,6 @@ fn test_zero_original_length() {
 fn test_all_pipelines_banana() {
     let input = b"banana banana banana banana banana";
     for &pipeline in &[
-        Pipeline::Deflate,
         Pipeline::Bw,
         Pipeline::Bbw,
         Pipeline::LzSeqR,
@@ -65,7 +64,6 @@ fn test_all_pipelines_medium_text() {
         input.extend(b"abcdefghij klmnopqrstuvwxyz 0123456789 ");
     }
     for &pipeline in &[
-        Pipeline::Deflate,
         Pipeline::Bw,
         Pipeline::Bbw,
         Pipeline::LzSeqR,
@@ -106,7 +104,6 @@ fn test_multiblock_round_trip_all_pipelines() {
     }
 
     for &pipeline in &[
-        Pipeline::Deflate,
         Pipeline::Bw,
         Pipeline::Lzf,
         Pipeline::Lzfi,
@@ -129,7 +126,7 @@ fn test_multiblock_various_block_sizes() {
     }
 
     for block_size in [256, 512, 1024, 2048] {
-        for &pipeline in &[Pipeline::Deflate, Pipeline::Bw, Pipeline::Lzf] {
+        for &pipeline in &[Pipeline::Bw, Pipeline::Lzf] {
             let compressed = compress_mt(&input, pipeline, 4, block_size).unwrap();
             let decompressed = decompress(&compressed).unwrap();
             assert_eq!(
@@ -143,7 +140,7 @@ fn test_multiblock_various_block_sizes() {
 
 #[test]
 fn test_multiblock_empty_input() {
-    let compressed = compress_mt(&[], Pipeline::Deflate, 4, 512).unwrap();
+    let compressed = compress_mt(&[], Pipeline::Lzf, 4, 512).unwrap();
     assert!(compressed.is_empty());
 }
 
@@ -182,7 +179,7 @@ fn test_multiblock_large_input() {
     for _ in 0..2500 {
         input.extend_from_slice(pattern);
     }
-    for &pipeline in &[Pipeline::Deflate, Pipeline::Bw, Pipeline::Lzf] {
+    for &pipeline in &[Pipeline::Bw, Pipeline::Lzf] {
         let compressed = compress_mt(&input, pipeline, 4, 16384).unwrap();
         assert_eq!(compressed[2], VERSION);
         let decompressed = decompress(&compressed).unwrap();
@@ -192,23 +189,6 @@ fn test_multiblock_large_input() {
 
 // --- Optimal parsing pipeline tests ---
 
-#[test]
-fn test_optimal_deflate_round_trip() {
-    let pattern = b"The quick brown fox jumps over the lazy dog. ";
-    let mut input = Vec::new();
-    for _ in 0..100 {
-        input.extend_from_slice(pattern);
-    }
-    let opts = CompressOptions {
-        parse_strategy: ParseStrategy::Optimal,
-        threads: 1,
-        ..Default::default()
-    };
-    let compressed = compress_with_options(&input, Pipeline::Deflate, &opts).unwrap();
-    let decompressed = decompress(&compressed).unwrap();
-    assert_eq!(decompressed, input);
-}
-
 #[test]
 fn test_optimal_lza_round_trip() {
     let pattern = b"abcdefghij abcdefghij ";
@@ -239,7 +219,7 @@ fn test_optimal_multiblock_round_trip() {
         block_size: 4096,
         ..Default::default()
     };
-    let compressed = compress_with_options(&input, Pipeline::Deflate, &opts).unwrap();
+    let compressed = compress_with_options(&input, Pipeline::Lzf, &opts).unwrap();
     let decompressed = decompress(&compressed).unwrap();
     assert_eq!(decompressed, input);
 }
@@ -248,7 +228,7 @@ fn test_optimal_multiblock_round_trip() {
 
 #[test]
 fn test_select_pipeline_empty() {
-    assert_eq!(select_pipeline(&[]), Pipeline::Deflate);
+    assert_eq!(select_pipeline(&[]), Pipeline::Lzf);
 }
 
 #[test]
@@ -261,7 +241,7 @@ fn test_select_pipeline_constant_data() {
 
 #[test]
 fn test_select_pipeline_text() {
-    // Repetitive text → good match density, moderate entropy → Deflate or Lzf
+    // Repetitive text → good match density, moderate entropy → Lzf
     let pattern = b"The quick brown fox jumps over the lazy dog. ";
     let mut input = Vec::new();
     for _ in 0..200 {
@@ -269,15 +249,15 @@ fn test_select_pipeline_text() {
     }
     let pipeline = select_pipeline(&input);
     assert!(
-        pipeline == Pipeline::Deflate || pipeline == Pipeline::Lzf,
-        "expected Deflate or Lzf, got {:?}",
+        pipeline == Pipeline::Lzf,
+        "expected Lzf, got {:?}",
         pipeline
     );
 }
 
 #[test]
 fn test_select_pipeline_random() {
-    // Pseudo-random → high entropy, low match density → Deflate (fastest)
+    // Pseudo-random → high entropy, low match density → Lzf (fastest)
     let mut input = vec![0u8; 10000];
     let mut state: u32 = 12345;
     for byte in &mut input {
@@ -285,7 +265,7 @@ fn test_select_pipeline_random() {
         *byte = (state >> 16) as u8;
     }
     let pipeline = select_pipeline(&input);
-    assert_eq!(pipeline, Pipeline::Deflate);
+    assert_eq!(pipeline, Pipeline::Lzf);
 }
 
 #[test]
@@ -321,43 +301,6 @@ fn test_select_pipeline_auto_vs_explicit_round_trip() {
 
 // --- Multi-stream tests ---
 
-#[test]
-fn test_multistream_deflate_round_trip_medium() {
-    // 10KB of repetitive text
-    let pattern = b"The quick brown fox jumps over the lazy dog. ";
-    let mut input = Vec::new();
-    for _ in 0..222 {
-        input.extend_from_slice(pattern);
-    }
-    let compressed = compress(&input, Pipeline::Deflate).unwrap();
-    let decompressed = decompress(&compressed).unwrap();
-    assert_eq!(decompressed, input);
-    assert!(
-        compressed.len() < input.len(),
-        "compressed {} >= input {}",
-        compressed.len(),
-        input.len()
-    );
-}
-
-#[test]
-fn test_multistream_deflate_compression_ratio() {
-    // Multi-stream should compress well on structured/repetitive data
-    // because offset/length/literal distributions are each tighter
-    let pattern = b"The quick brown fox jumps over the lazy dog. ";
-    let mut input = Vec::new();
-    for _ in 0..500 {
-        input.extend_from_slice(pattern);
-    }
-    let compressed = compress(&input, Pipeline::Deflate).unwrap();
-    assert!(
-        compressed.len() < input.len() / 2,
-        "expected significant compression: compressed {} vs input {}",
-        compressed.len(),
-        input.len()
-    );
-}
-
 #[test]
 fn test_multistream_lzf_round_trip_medium() {
     let pattern = b"abcdefghij abcdefghij ";
@@ -384,7 +327,7 @@ fn test_multistream_all_pipelines_round_trip() {
     for _ in 0..100 {
         input.extend_from_slice(pattern);
     }
-    for &pipeline in &[Pipeline::Deflate, Pipeline::Bw, Pipeline::LzSeqR] {
+    for &pipeline in &[Pipeline::Bw, Pipeline::LzSeqR] {
         let compressed = compress(&input, pipeline).unwrap();
         let decompressed = decompress(&compressed).unwrap();
         assert_eq!(decompressed, input, "round-trip failed for {:?}", pipeline);
@@ -608,27 +551,35 @@ fn test_lzfi_multistream_deinterleave_reinterleave() {
 /// Verify LzSeqR pipeline benefits from extended match lengths on repetitive data.
 #[test]
 fn test_lzseqr_extended_match_length() {
+    use crate::lz77;
+
     let input = vec![0xAAu8; 100_000];
 
-    // Deflate should use 258-byte max matches
-    let deflate_compressed = compress(&input, Pipeline::Deflate).unwrap();
+    // LzSeqR with constrained (258-byte) max matches as baseline
+    let opts_limited = CompressOptions {
+        max_match_len: Some(lz77::LZ77_MAX_MATCH),
+        threads: 1,
+        ..Default::default()
+    };
+    let limited_compressed =
+        compress_with_options(&input, Pipeline::LzSeqR, &opts_limited).unwrap();
 
     // LzSeqR should use extended matches (u16::MAX) and compress better
-    let lzr_compressed = compress(&input, Pipeline::LzSeqR).unwrap();
+    let extended_compressed = compress(&input, Pipeline::LzSeqR).unwrap();
 
     // Both must decompress correctly
-    let deflate_decompressed = decompress(&deflate_compressed).unwrap();
-    let lzr_decompressed = decompress(&lzr_compressed).unwrap();
-    assert_eq!(deflate_decompressed, input);
-    assert_eq!(lzr_decompressed, input);
+    let limited_decompressed = decompress(&limited_compressed).unwrap();
+    let extended_decompressed = decompress(&extended_compressed).unwrap();
+    assert_eq!(limited_decompressed, input);
+    assert_eq!(extended_decompressed, input);
 
     // LzSeqR with extended matches should produce smaller output on highly
     // repetitive data (fewer matches needed = fewer tokens = better ratio)
     assert!(
-        lzr_compressed.len() < deflate_compressed.len(),
-        "LzSeqR ({} bytes) should compress better than Deflate ({} bytes) on repetitive data",
-        lzr_compressed.len(),
-        deflate_compressed.len()
+        extended_compressed.len() < limited_compressed.len(),
+        "LzSeqR extended ({} bytes) should compress better than LzSeqR limited ({} bytes) on repetitive data",
+        extended_compressed.len(),
+        limited_compressed.len()
     );
 }
 
@@ -713,21 +664,29 @@ fn test_lzseqr_recoil_wide_interleave_round_trip() {
 /// Verify Lzf pipeline benefits from extended match lengths on repetitive data.
 #[test]
 fn test_lzf_extended_match_length() {
+    use crate::lz77;
+
     let input = vec![0xBBu8; 100_000];
 
-    let deflate_compressed = compress(&input, Pipeline::Deflate).unwrap();
+    // Lzf with constrained (258-byte) max matches as baseline
+    let opts_limited = CompressOptions {
+        max_match_len: Some(lz77::LZ77_MAX_MATCH),
+        threads: 1,
+        ..Default::default()
+    };
+    let limited_compressed = compress_with_options(&input, Pipeline::Lzf, &opts_limited).unwrap();
     let lzf_compressed = compress(&input, Pipeline::Lzf).unwrap();
 
     // Both must decompress correctly
-    assert_eq!(decompress(&deflate_compressed).unwrap(), input);
+    assert_eq!(decompress(&limited_compressed).unwrap(), input);
     assert_eq!(decompress(&lzf_compressed).unwrap(), input);
 
     // Lzf with extended matches should produce smaller output
     assert!(
-        lzf_compressed.len() < deflate_compressed.len(),
-        "Lzf ({} bytes) should compress better than Deflate ({} bytes) on repetitive data",
+        lzf_compressed.len() < limited_compressed.len(),
+        "Lzf extended ({} bytes) should compress better than Lzf limited ({} bytes) on repetitive data",
         lzf_compressed.len(),
-        deflate_compressed.len()
+        limited_compressed.len()
     );
 }
 
@@ -738,9 +697,9 @@ fn test_explicit_max_match_len_option() {
 
     let input = vec![0xCCu8; 100_000];
 
-    // Force LzSeqR to use Deflate-style 258 limit
+    // Force LzSeqR to use 258-byte match limit
     let opts_limited = CompressOptions {
-        max_match_len: Some(lz77::DEFLATE_MAX_MATCH),
+        max_match_len: Some(lz77::LZ77_MAX_MATCH),
         threads: 1,
         ..Default::default()
     };
@@ -818,20 +777,6 @@ mod gpu_batched_tests {
         })
     }
 
-    #[test]
-    fn test_gpu_batched_deflate_round_trip() {
-        let opts = match make_webgpu_options() {
-            Some(o) => o,
-            None => return,
-        };
-
-        let pattern = b"the quick brown fox jumps over the lazy dog. ";
-        let input: Vec<u8> = pattern.iter().cycle().take(256 * 1024).copied().collect();
-        let compressed = compress_with_options(&input, Pipeline::Deflate, &opts).unwrap();
-        let decompressed = decompress(&compressed).unwrap();
-        assert_eq!(decompressed, input);
-    }
-
     #[test]
     fn test_gpu_batched_lzseqr_round_trip() {
         let opts = match make_webgpu_options() {
@@ -943,7 +888,7 @@ mod gpu_batched_tests {
             input.extend_from_slice(&block);
         }
 
-        for pipeline in [Pipeline::Deflate, Pipeline::LzSeqR, Pipeline::Lzf] {
+        for pipeline in [Pipeline::LzSeqR, Pipeline::Lzf] {
             let compressed = compress_with_options(&input, pipeline, &opts).unwrap();
             let decompressed = decompress(&compressed).unwrap();
             assert_eq!(
diff --git a/src/sortlz.rs b/src/sortlz.rs
index 2da4468..82ce566 100644
--- a/src/sortlz.rs
+++ b/src/sortlz.rs
@@ -751,7 +751,7 @@ mod tests {
     #[test]
     fn test_lz77_greedy_roundtrip() {
         let input = test_data();
-        let config = SortLzConfig::for_lz77(crate::lz77::DEFLATE_MAX_MATCH);
+        let config = SortLzConfig::for_lz77(crate::lz77::LZ77_MAX_MATCH);
         let matches = find_matches(&input, &config);
         let lz_matches = matches_to_lz77_greedy(&input, &matches);
 
@@ -767,7 +767,7 @@ mod tests {
     #[test]
     fn test_lz77_lazy_roundtrip() {
         let input = test_data();
-        let config = SortLzConfig::for_lz77(crate::lz77::DEFLATE_MAX_MATCH);
+        let config = SortLzConfig::for_lz77(crate::lz77::LZ77_MAX_MATCH);
         let matches = find_matches(&input, &config);
         let lz_matches = matches_to_lz77_lazy(&input, &matches);
 
@@ -782,7 +782,7 @@ mod tests {
     #[test]
     fn test_topk_roundtrip() {
         let input = test_data();
-        let config = SortLzConfig::for_lz77(crate::lz77::DEFLATE_MAX_MATCH);
+        let config = SortLzConfig::for_lz77(crate::lz77::LZ77_MAX_MATCH);
         let table = find_matches_topk(&input, &config, 4);
 
         // Verify table has valid candidates
@@ -841,22 +841,6 @@ mod tests {
         }
     }
 
-    #[test]
-    fn test_pipeline_roundtrip_deflate_sortlz() {
-        use crate::pipeline::{self, CompressOptions, MatchFinder, ParseStrategy, Pipeline};
-        let input = test_data();
-
-        let opts = CompressOptions {
-            match_finder: MatchFinder::SortLz,
-            parse_strategy: ParseStrategy::Lazy,
-            threads: 1,
-            ..Default::default()
-        };
-        let compressed = pipeline::compress_with_options(&input, Pipeline::Deflate, &opts).unwrap();
-        let decoded = pipeline::decompress(&compressed).unwrap();
-        assert_eq!(decoded, input);
-    }
-
     #[test]
     fn test_pipeline_roundtrip_lzseqr_sortlz() {
         use crate::pipeline::{self, CompressOptions, MatchFinder, Pipeline};
diff --git a/src/streaming.rs b/src/streaming.rs
index 2746530..0460881 100644
--- a/src/streaming.rs
+++ b/src/streaming.rs
@@ -646,7 +646,7 @@ mod tests {
     #[test]
     fn test_stream_round_trip_single_block() {
         let data = b"hello, world!".repeat(10);
-        for pipeline in [Pipeline::Deflate, Pipeline::Bw, Pipeline::Lzf] {
+        for pipeline in [Pipeline::Bw, Pipeline::Lzf] {
             let compressed = stream_compress(&data, pipeline, 1);
             let decompressed = stream_decompress(&compressed, 1);
             assert_eq!(decompressed, data, "round-trip failed for {:?}", pipeline);
@@ -657,7 +657,7 @@ mod tests {
     fn test_stream_round_trip_multi_block() {
         // 2KB input with 512-byte blocks = 4 blocks
         let data = b"The quick brown fox jumps over the lazy dog. ".repeat(50);
-        for pipeline in [Pipeline::Deflate, Pipeline::Bw, Pipeline::Lzf] {
+        for pipeline in [Pipeline::Bw, Pipeline::Lzf] {
             let compressed = stream_compress(&data, pipeline, 1);
             let decompressed = stream_decompress(&compressed, 1);
             assert_eq!(decompressed, data, "round-trip failed for {:?}", pipeline);
@@ -679,7 +679,7 @@ mod tests {
     fn test_framed_decompressed_by_in_memory() {
         // compress_stream -> pipeline::decompress (in-memory)
         let data = b"hello, world!".repeat(30);
-        let compressed = stream_compress(&data, Pipeline::Deflate, 1);
+        let compressed = stream_compress(&data, Pipeline::Lzf, 1);
         let decompressed = crate::pipeline::decompress(&compressed).unwrap();
         assert_eq!(decompressed, data);
     }
@@ -688,7 +688,7 @@ mod tests {
     fn test_table_mode_decompressed_by_stream() {
         // pipeline::compress (table-mode V2) -> decompress_stream
         let data = b"hello, world!".repeat(30);
-        let compressed = crate::pipeline::compress(&data, Pipeline::Deflate).unwrap();
+        let compressed = crate::pipeline::compress(&data, Pipeline::Lzf).unwrap();
         let decompressed = stream_decompress(&compressed, 1);
         assert_eq!(decompressed, data);
     }
@@ -723,7 +723,7 @@ mod tests {
     #[test]
     fn test_stream_single_byte() {
         let data = vec![0xAB];
-        for pipeline in [Pipeline::Deflate, Pipeline::Bw, Pipeline::Lzf] {
+        for pipeline in [Pipeline::Bw, Pipeline::Lzf] {
             let compressed = stream_compress(&data, pipeline, 1);
             let decompressed = stream_decompress(&compressed, 1);
             assert_eq!(decompressed, data, "single-byte failed for {:?}", pipeline);
diff --git a/src/validation.rs b/src/validation.rs
index 704600b..a570438 100644
--- a/src/validation.rs
+++ b/src/validation.rs
@@ -276,7 +276,6 @@ mod tests {
         fn all_pipelines_zeros() {
             let input = data_all_zeros(500);
             for &p in &[
-                Pipeline::Deflate,
                 Pipeline::Bw,
                 Pipeline::Bbw,
                 Pipeline::Lzf,
@@ -291,7 +290,6 @@ mod tests {
         fn all_pipelines_uniform() {
             let input = data_uniform();
             for &p in &[
-                Pipeline::Deflate,
                 Pipeline::Bw,
                 Pipeline::Bbw,
                 Pipeline::Lzf,
@@ -306,7 +304,6 @@ mod tests {
         fn all_pipelines_skewed() {
             let input = data_skewed(2000);
             for &p in &[
-                Pipeline::Deflate,
                 Pipeline::Bw,
                 Pipeline::Bbw,
                 Pipeline::Lzf,
@@ -321,7 +318,6 @@ mod tests {
         fn all_pipelines_text() {
             let input = data_repeating_text();
             for &p in &[
-                Pipeline::Deflate,
                 Pipeline::Bw,
                 Pipeline::Bbw,
                 Pipeline::Lzf,
@@ -336,7 +332,6 @@ mod tests {
         fn all_pipelines_sawtooth() {
             let input = data_sawtooth(2048);
             for &p in &[
-                Pipeline::Deflate,
                 Pipeline::Bw,
                 Pipeline::Bbw,
                 Pipeline::Lzf,
@@ -351,7 +346,6 @@ mod tests {
         fn all_pipelines_runs() {
             let input = data_runs();
             for &p in &[
-                Pipeline::Deflate,
                 Pipeline::Bw,
                 Pipeline::Bbw,
                 Pipeline::Lzf,
@@ -366,7 +360,6 @@ mod tests {
         fn all_pipelines_single_byte() {
             let input = vec![42u8];
             for &p in &[
-                Pipeline::Deflate,
                 Pipeline::Bw,
                 Pipeline::Bbw,
                 Pipeline::Lzf,
@@ -588,13 +581,7 @@ mod tests {
                 return;
             }
 
-            for &pipe in &[
-                Pipeline::Deflate,
-                Pipeline::Bw,
-                Pipeline::Bbw,
-                Pipeline::Lzf,
-                Pipeline::LzssR,
-            ] {
+            for &pipe in &[Pipeline::Bw, Pipeline::Bbw, Pipeline::Lzf, Pipeline::LzssR] {
                 let compressed = pipeline::compress(&input, pipe).unwrap();
                 let decompressed = pipeline::decompress(&compressed).unwrap();
                 assert_eq!(
@@ -693,13 +680,7 @@ mod tests {
             }
             let full = fs::read(&path).unwrap();
             let input = &full[..full.len().min(65536)];
-            for &pipe in &[
-                Pipeline::Deflate,
-                Pipeline::Bw,
-                Pipeline::Bbw,
-                Pipeline::Lzf,
-                Pipeline::LzssR,
-            ] {
+            for &pipe in &[Pipeline::Bw, Pipeline::Bbw, Pipeline::Lzf, Pipeline::LzssR] {
                 let compressed = pipeline::compress(input, pipe).unwrap();
                 let decompressed = pipeline::decompress(&compressed).unwrap();
                 assert_eq!(
@@ -719,13 +700,7 @@ mod tests {
             }
             let full = fs::read(&path).unwrap();
             let input = &full[..full.len().min(65536)];
-            for &pipe in &[
-                Pipeline::Deflate,
-                Pipeline::Bw,
-                Pipeline::Bbw,
-                Pipeline::Lzf,
-                Pipeline::LzssR,
-            ] {
+            for &pipe in &[Pipeline::Bw, Pipeline::Bbw, Pipeline::Lzf, Pipeline::LzssR] {
                 let compressed = pipeline::compress(input, pipe).unwrap();
                 let decompressed = pipeline::decompress(&compressed).unwrap();
                 assert_eq!(
@@ -748,13 +723,7 @@ mod tests {
         fn pipeline_two_bytes() {
             // Smallest non-trivial input
             let input = vec![0u8, 1];
-            for &p in &[
-                Pipeline::Deflate,
-                Pipeline::Bw,
-                Pipeline::Bbw,
-                Pipeline::Lzf,
-                Pipeline::LzssR,
-            ] {
+            for &p in &[Pipeline::Bw, Pipeline::Bbw, Pipeline::Lzf, Pipeline::LzssR] {
                 let compressed = pipeline::compress(&input, p).unwrap();
                 let decompressed = pipeline::decompress(&compressed).unwrap();
                 assert_eq!(decompressed, input, "pipeline {:?}", p);
@@ -765,13 +734,7 @@ mod tests {
         fn alternating_bytes() {
             // Worst case for RLE (no runs), but structured for LZ77
             let input: Vec<u8> = (0..1000).map(|i| if i % 2 == 0 { 0 } else { 1 }).collect();
-            for &p in &[
-                Pipeline::Deflate,
-                Pipeline::Bw,
-                Pipeline::Bbw,
-                Pipeline::Lzf,
-                Pipeline::LzssR,
-            ] {
+            for &p in &[Pipeline::Bw, Pipeline::Bbw, Pipeline::Lzf, Pipeline::LzssR] {
                 let compressed = pipeline::compress(&input, p).unwrap();
                 let decompressed = pipeline::decompress(&compressed).unwrap();
                 assert_eq!(decompressed, input, "pipeline {:?}", p);
@@ -782,13 +745,7 @@ mod tests {
         fn all_256_byte_values() {
             // Every byte value appears exactly once
             let input: Vec<u8> = (0..=255).collect();
-            for &p in &[
-                Pipeline::Deflate,
-                Pipeline::Bw,
-                Pipeline::Bbw,
-                Pipeline::Lzf,
-                Pipeline::LzssR,
-            ] {
+            for &p in &[Pipeline::Bw, Pipeline::Bbw, Pipeline::Lzf, Pipeline::LzssR] {
                 let compressed = pipeline::compress(&input, p).unwrap();
                 let decompressed = pipeline::decompress(&compressed).unwrap();
                 assert_eq!(decompressed, input, "pipeline {:?}", p);
@@ -807,13 +764,7 @@ mod tests {
             assert_eq!(decoded, input);
 
             // Full pipeline test
-            for &p in &[
-                Pipeline::Deflate,
-                Pipeline::Bw,
-                Pipeline::Bbw,
-                Pipeline::Lzf,
-                Pipeline::LzssR,
-            ] {
+            for &p in &[Pipeline::Bw, Pipeline::Bbw, Pipeline::Lzf, Pipeline::LzssR] {
                 let compressed = pipeline::compress(&input, p).unwrap();
                 let decompressed = pipeline::decompress(&compressed).unwrap();
                 assert_eq!(decompressed, input, "pipeline {:?}", p);
@@ -823,13 +774,7 @@ mod tests {
         #[test]
         fn descending_bytes() {
             let input: Vec<u8> = (0..=255).rev().collect();
-            for &p in &[
-                Pipeline::Deflate,
-                Pipeline::Bw,
-                Pipeline::Bbw,
-                Pipeline::Lzf,
-                Pipeline::LzssR,
-            ] {
+            for &p in &[Pipeline::Bw, Pipeline::Bbw, Pipeline::Lzf, Pipeline::LzssR] {
                 let compressed = pipeline::compress(&input, p).unwrap();
                 let decompressed = pipeline::decompress(&compressed).unwrap();
                 assert_eq!(decompressed, input, "pipeline {:?}", p);
@@ -840,13 +785,7 @@ mod tests {
         fn repeated_short_pattern() {
             // "ab" repeated 500 times - good for LZ77
             let input: Vec<u8> = b"ab".iter().copied().cycle().take(1000).collect();
-            for &p in &[
-                Pipeline::Deflate,
-                Pipeline::Bw,
-                Pipeline::Bbw,
-                Pipeline::Lzf,
-                Pipeline::LzssR,
-            ] {
+            for &p in &[Pipeline::Bw, Pipeline::Bbw, Pipeline::Lzf, Pipeline::LzssR] {
                 let compressed = pipeline::compress(&input, p).unwrap();
                 let decompressed = pipeline::decompress(&compressed).unwrap();
                 assert_eq!(decompressed, input, "pipeline {:?}", p);
@@ -886,27 +825,6 @@ mod tests {
         input
     }
 
-    #[test]
-    #[cfg(feature = "webgpu")]
-    fn webgpu_compress_cpu_decompress_deflate() {
-        use crate::pipeline::{Backend, CompressOptions};
-        let engine = match crate::webgpu::WebGpuEngine::new() {
-            Ok(e) => std::sync::Arc::new(e),
-            Err(crate::PzError::Unsupported) => return,
-            Err(e) => panic!("unexpected error: {:?}", e),
-        };
-        let input = gpu_test_input();
-        let options = CompressOptions {
-            backend: Backend::WebGpu,
-            webgpu_engine: Some(engine),
-            ..Default::default()
-        };
-        let compressed =
-            pipeline::compress_with_options(&input, Pipeline::Deflate, &options).unwrap();
-        let decompressed = pipeline::decompress(&compressed).unwrap();
-        assert_eq!(decompressed, input, "WebGPU Deflate GPU→CPU round-trip");
-    }
-
     #[test]
     #[cfg(feature = "webgpu")]
     fn webgpu_compress_cpu_decompress_lzseqr() {
@@ -970,31 +888,6 @@ mod tests {
         input
     }
 
-    #[test]
-    #[cfg(feature = "webgpu")]
-    fn webgpu_streaming_round_trip_deflate() {
-        use crate::pipeline::{Backend, CompressOptions};
-        let engine = match crate::webgpu::WebGpuEngine::new() {
-            Ok(e) => std::sync::Arc::new(e),
-            Err(crate::PzError::Unsupported) => return,
-            Err(e) => panic!("unexpected error: {:?}", e),
-        };
-        let input = gpu_streaming_test_input();
-        let options = CompressOptions {
-            backend: Backend::WebGpu,
-            webgpu_engine: Some(engine),
-            threads: 2,
-            ..Default::default()
-        };
-        let compressed =
-            pipeline::compress_with_options(&input, Pipeline::Deflate, &options).unwrap();
-        let decompressed = pipeline::decompress(&compressed).unwrap();
-        assert_eq!(
-            decompressed, input,
-            "WebGPU streaming Deflate round-trip (multi-block)"
-        );
-    }
-
     #[test]
     #[cfg(feature = "webgpu")]
     fn webgpu_streaming_round_trip_lzf() {
@@ -1039,12 +932,11 @@ mod tests {
         };
         // This should go through the streaming path (5 blocks > 1)
         // or batched path, both exercising multi-block GPU code
-        let compressed =
-            pipeline::compress_with_options(&input, Pipeline::Deflate, &options).unwrap();
+        let compressed = pipeline::compress_with_options(&input, Pipeline::Lzf, &options).unwrap();
         let decompressed = pipeline::decompress(&compressed).unwrap();
         assert_eq!(
             decompressed, input,
-            "WebGPU streaming Deflate round-trip (small blocks)"
+            "WebGPU streaming Lzf round-trip (small blocks)"
         );
     }
 }
diff --git a/src/webgpu/tests/pipelines.rs b/src/webgpu/tests/pipelines.rs
index 4f3abca..55dfa12 100644
--- a/src/webgpu/tests/pipelines.rs
+++ b/src/webgpu/tests/pipelines.rs
@@ -1,57 +1,5 @@
 use super::super::*;
 
-#[test]
-fn test_webgpu_deflate_pipeline_round_trip() {
-    let engine = match WebGpuEngine::new() {
-        Ok(e) => std::sync::Arc::new(e),
-        Err(PzError::Unsupported) => return,
-        Err(e) => panic!("unexpected error: {:?}", e),
-    };
-
-    let input = b"the quick brown fox jumps over the lazy dog. the quick brown fox.";
-    let options = crate::pipeline::CompressOptions {
-        backend: crate::pipeline::Backend::WebGpu,
-        webgpu_engine: Some(engine),
-        ..Default::default()
-    };
-
-    let compressed =
-        crate::pipeline::compress_with_options(input, crate::pipeline::Pipeline::Deflate, &options)
-            .unwrap();
-    let decompressed = crate::pipeline::decompress(&compressed).unwrap();
-    assert_eq!(decompressed, input);
-}
-
-#[test]
-fn test_webgpu_deflate_pipeline_larger() {
-    let engine = match WebGpuEngine::new() {
-        Ok(e) => std::sync::Arc::new(e),
-        Err(PzError::Unsupported) => return,
-        Err(e) => panic!("unexpected error: {:?}", e),
-    };
-
-    let pattern = b"The quick brown fox jumps over the lazy dog. ";
-    let mut input = Vec::new();
-    for _ in 0..200 {
-        input.extend_from_slice(pattern);
-    }
-
-    let options = crate::pipeline::CompressOptions {
-        backend: crate::pipeline::Backend::WebGpu,
-        webgpu_engine: Some(engine),
-        ..Default::default()
-    };
-
-    let compressed = crate::pipeline::compress_with_options(
-        &input,
-        crate::pipeline::Pipeline::Deflate,
-        &options,
-    )
-    .unwrap();
-    let decompressed = crate::pipeline::decompress(&compressed).unwrap();
-    assert_eq!(decompressed, input);
-}
-
 // --- Modular GPU pipeline round-trip tests ---
 
 fn gpu_pipeline_round_trip(input: &[u8], pipeline: crate::pipeline::Pipeline) {
@@ -84,20 +32,8 @@ fn gpu_pipeline_round_trip(input: &[u8], pipeline: crate::pipeline::Pipeline) {
 }
 
 #[test]
-fn test_modular_gpu_deflate_round_trip() {
-    // GPU LZ77 → GPU Huffman (modular stage path)
-    let mut input = Vec::new();
-    for i in 0u8..=255 {
-        for _ in 0..40 {
-            input.push(i);
-        }
-    }
-    gpu_pipeline_round_trip(&input, crate::pipeline::Pipeline::Deflate);
-}
-
-#[test]
-fn test_gpu_lzseq_cpu_rans_round_trip() {
-    // GPU LzSeq → CPU rANS
+fn test_gpu_lz77_cpu_rans_round_trip() {
+    // GPU LZ77 → CPU rANS
     let pattern = b"Hello, World! This is a test pattern for GPU+CPU composition. ";
     let mut input = Vec::new();
     for _ in 0..100 {