From 6b24b269d71674aef5253ea5dcacfa8a9f925201 Mon Sep 17 00:00:00 2001
From: Gnanesh <gnanesh.dometti@gmail.com>
Date: Thu, 19 Mar 2026 16:26:33 -0400
Subject: [PATCH 1/6] Integrate sketchlib-rust for KLL quantile sketches

---
 Cargo.lock                                    |  16 +-
 asap-common/sketch-core/Cargo.toml            |   5 +
 asap-common/sketch-core/report.md             | 131 +++++
 .../sketch-core/src/bin/sketchlib_fidelity.rs | 488 +++++++++++++++++
 asap-common/sketch-core/src/config.rs         |  75 +++
 asap-common/sketch-core/src/count_min.rs      | 275 ++++++++--
 .../sketch-core/src/count_min_sketchlib.rs    |  59 +++
 .../sketch-core/src/count_min_with_heap.rs    | 492 +++++++++++++-----
 .../src/count_min_with_heap_sketchlib.rs      | 109 ++++
 asap-common/sketch-core/src/kll.rs            | 194 +++++--
 asap-common/sketch-core/src/kll_sketchlib.rs  |  36 ++
 asap-common/sketch-core/src/lib.rs            |  12 +
 asap-query-engine/Cargo.toml                  |   8 +-
 asap-query-engine/src/lib.rs                  |  17 +
 asap-query-engine/src/main.rs                 |  22 +
 .../count_min_sketch_accumulator.rs           | 104 ++--
 .../count_min_sketch_with_heap_accumulator.rs | 134 ++---
 .../datasketches_kll_accumulator.rs           |  41 +-
 asap-query-engine/tests/test_both_backends.rs |  30 ++
 .../templates/udfs/countminsketch_count.rs.j2 |  78 ++-
 .../templates/udfs/countminsketch_sum.rs.j2   |  84 ++-
 .../udfs/countminsketchwithheap_topk.rs.j2    |  78 ++-
 .../templates/udfs/datasketcheskll_.rs.j2     |  68 +--
 .../templates/udfs/hydrakll_.rs.j2            | 149 ++++--
 24 files changed, 2263 insertions(+), 442 deletions(-)
 create mode 100644 asap-common/sketch-core/report.md
 create mode 100644 asap-common/sketch-core/src/bin/sketchlib_fidelity.rs
 create mode 100644 asap-common/sketch-core/src/config.rs
 create mode 100644 asap-common/sketch-core/src/count_min_sketchlib.rs
 create mode 100644 asap-common/sketch-core/src/count_min_with_heap_sketchlib.rs
 create mode 100644 asap-common/sketch-core/src/kll_sketchlib.rs
 create mode 100644 asap-query-engine/tests/test_both_backends.rs

diff --git a/Cargo.lock b/Cargo.lock
index b520550..f12cb9f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -376,7 +376,7 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "clap 4.5.60",
- "indexmap",
+ "indexmap 2.13.0",
  "pretty_assertions",
  "promql-parser",
  "promql_utilities",
@@ -945,6 +945,16 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "ctor"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501"
+dependencies = [
+ "quote",
+ "syn 2.0.117",
+]
+
 [[package]]
 name = "cxx"
 version = "1.0.194"
@@ -3334,6 +3344,7 @@ dependencies = [
  "bincode",
  "chrono",
  "clap 4.5.60",
+ "ctor",
  "dashmap 5.5.3",
  "datafusion",
  "datafusion_summary_library",
@@ -3881,9 +3892,12 @@ checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
 name = "sketch-core"
 version = "0.1.0"
 dependencies = [
+ "clap 4.5.60",
+ "ctor",
  "dsrs",
  "rmp-serde",
  "serde",
+ "sketchlib-rust",
  "xxhash-rust",
 ]
 
diff --git a/asap-common/sketch-core/Cargo.toml b/asap-common/sketch-core/Cargo.toml
index 2dbea8b..70195a2 100644
--- a/asap-common/sketch-core/Cargo.toml
+++ b/asap-common/sketch-core/Cargo.toml
@@ -9,3 +9,8 @@ serde.workspace = true
 rmp-serde = "1.1"
 xxhash-rust = { version = "0.8", features = ["xxh32"] }
 dsrs = { git = "https://github.com/ProjectASAP/datasketches-rs" }
+sketchlib-rust = { git = "https://github.com/ProjectASAP/sketchlib-rust" }
+clap = { version = "4.0", features = ["derive"] }
+
+[dev-dependencies]
+ctor = "0.2"
diff --git a/asap-common/sketch-core/report.md b/asap-common/sketch-core/report.md
new file mode 100644
index 0000000..9b6e093
--- /dev/null
+++ b/asap-common/sketch-core/report.md
@@ -0,0 +1,131 @@
+# Report
+
+Compares the **legacy** sketch implementations in `sketch-core` vs the new **sketchlib-rust** backends for:
+
+- `CountMinSketch`
+- `CountMinSketchWithHeap` (Count-Min portion)
+- `KllSketch`
+- `HydraKllSketch` (via `KllSketch`)
+
+
+
+
+### Fidelity harness
+
+The fidelity binary now selects backends via CLI flags instead of environment variables.
+
+| Goal                     | Command                                                                                                      |
+|--------------------------|--------------------------------------------------------------------------------------------------------------|
+| Default (all sketchlib)  | `cargo run -p sketch-core --bin sketchlib_fidelity`                                                          |
+| All legacy               | `cargo run -p sketch-core --bin sketchlib_fidelity -- --cms-impl legacy --kll-impl legacy --cmwh-impl legacy` |
+| Legacy KLL only          | `cargo run -p sketch-core --bin sketchlib_fidelity -- --cms-impl sketchlib --kll-impl legacy --cmwh-impl sketchlib` |
+
+### Unit tests
+
+Unit tests always run with **legacy** backends enabled (the test ctor calls
+`force_legacy_mode_for_tests()`), so you only need:
+
+```bash
+cargo test -p sketch-core
+```
+
+## Results
+
+### CountMinSketch (accuracy vs exact counts)
+
+#### depth=3
+
+| width | n      | domain | Mode           | Pearson corr   | MAPE (%) | RMSE (%) |
+|-------|--------|--------|----------------|----------------|----------|----------|
+| 1024  | 100000 | 1000   | Legacy         | 0.9998451189   | 24.48    | 52.76    |
+| 1024  | 100000 | 1000   | sketchlib-rust | 0.9998387103   | 24.36    | 54.11    |
+
+#### depth=5
+
+| width | n      | domain | Mode           | Pearson corr   | MAPE (%) | RMSE (%) |
+|-------|--------|--------|----------------|----------------|----------|----------|
+| 2048  | 200000 | 2000   | Legacy         | 0.9999733814   | 8.75     | 29.94    |
+| 2048  | 200000 | 2000   | sketchlib-rust | 0.9999744627   | 8.37     | 28.84    |
+| 2048  | 50000  | 500    | Legacy         | 1.0000000000   | 0.00     | 0.00     |
+| 2048  | 50000  | 500    | sketchlib-rust | 1.0000000000   | 0.00     | 0.00     |
+
+#### depth=7
+
+| width | n      | domain | Mode           | Pearson corr   | MAPE (%) | RMSE (%) |
+|-------|--------|--------|----------------|----------------|----------|----------|
+| 4096  | 200000 | 2000   | Legacy         | 0.9999993694   | 0.20     | 3.69     |
+| 4096  | 200000 | 2000   | sketchlib-rust | 0.9999993499   | 0.21     | 4.27     |
+
+---
+
+### CountMinSketchWithHeap (top-k + CMS accuracy on exact top-k)
+
+The heap is maintained by local updates; recall is measured against the **true** top-k at the end of the stream.
+
+#### depth=3
+
+| width | n      | domain | heap_size | Mode           | Top-k recall | Pearson (top-k) | MAPE (%) | RMSE (%) |
+|-------|--------|--------|-----------|----------------|--------------|-----------------|----------|----------|
+| 1024  | 100000 | 1000   | 10        | Legacy         | 0.40         | 0.9571          | 0.174    | 0.319    |
+| 1024  | 100000 | 1000   | 10        | sketchlib-rust | 0.40         | 1.0000          | 0.000    | 0.000    |
+
+#### depth=5
+
+| width | n      | domain | heap_size | Mode           | Top-k recall | Pearson (top-k) | MAPE (%) | RMSE (%) |
+|-------|--------|--------|-----------|----------------|--------------|-----------------|----------|----------|
+| 2048  | 200000 | 2000   | 20        | Legacy         | 0.60         | 0.9964          | 0.045    | 0.101    |
+| 2048  | 200000 | 2000   | 20        | sketchlib-rust | 0.60         | 0.9982          | 0.021    | 0.067    |
+| 2048  | 200000 | 2000   | 50        | Legacy         | 0.40         | 0.9999983       | 5.60     | 16.49    |
+| 2048  | 200000 | 2000   | 50        | sketchlib-rust | 0.40         | 0.9999990       | 3.90     | 12.95    |
+
+---
+
+### KllSketch (quantiles, absolute rank error)
+
+For each quantile \(q\), we compute the sketch estimate `est_value`, then:
+`abs_rank_error = |rank_fraction(exact_sorted_values, est_value) - q|`.
+
+#### k=20
+
+| n_updates | Mode           | q=0.5   | q=0.9   | q=0.99  |
+|-----------|----------------|---------|---------|---------|
+| 200000    | Legacy         | 0.0104  | 0.0145  | 0.0028  |
+| 200000    | sketchlib-rust | 0.0275  | 0.0470  | 0.0061  |
+| 50000     | Legacy         | 0.0131  | 0.0091  | 0.0054  |
+| 50000     | sketchlib-rust | 0.0110  | 0.0116  | 0.0031  |
+
+#### k=50
+
+| n_updates | Mode           | q=0.5   | q=0.9   | q=0.99  |
+|-----------|----------------|---------|---------|---------|
+| 200000    | Legacy         | 0.0013  | 0.0021  | 0.0012  |
+| 200000    | sketchlib-rust | 0.0101  | 0.0044  | 0.0074  |
+
+#### k=200
+
+| n_updates | Mode           | q=0.5   | q=0.9   | q=0.99  |
+|-----------|----------------|---------|---------|---------|
+| 200000    | Legacy         | 0.0021  | 0.0036  | 0.0000  |
+| 200000    | sketchlib-rust | 0.0015  | 0.0001  | 0.0002  |
+
+---
+
+### HydraKllSketch (per-key quantiles, mean/max absolute rank error across 50 keys)
+
+#### rows=2, cols=64
+
+| k   | n      | domain | Mode           | q=0.5 (mean / max) | q=0.9 (mean / max) |
+|-----|--------|--------|----------------|--------------------|--------------------|
+| 20  | 200000 | 200    | Legacy         | 0.0170 / 0.0546    | 0.0165 / 0.0452    |
+| 20  | 200000 | 200    | sketchlib-rust | 0.0254 / 0.0629    | 0.0546 / 0.0942    |
+
+#### rows=3, cols=128
+
+| k   | n      | domain | Mode           | q=0.5 (mean / max) | q=0.9 (mean / max) |
+|-----|--------|--------|----------------|--------------------|--------------------|
+| 20  | 200000 | 200    | Legacy         | 0.0166 / 0.0591    | 0.0114 / 0.0304    |
+| 20  | 200000 | 200    | sketchlib-rust | 0.0216 / 0.0534    | 0.0238 / 0.1087    |
+| 50  | 200000 | 200    | Legacy         | 0.0099 / 0.0352    | 0.0087 / 0.0330    |
+| 50  | 200000 | 200    | sketchlib-rust | 0.0119 / 0.0458    | 0.0119 / 0.0296    |
+| 20  | 100000 | 100    | Legacy         | 0.0141 / 0.0574    | 0.0149 / 0.0471    |
+| 20  | 100000 | 100    | sketchlib-rust | 0.0202 / 0.0621    | 0.0287 / 0.0779    |
diff --git a/asap-common/sketch-core/src/bin/sketchlib_fidelity.rs b/asap-common/sketch-core/src/bin/sketchlib_fidelity.rs
new file mode 100644
index 0000000..3f6b263
--- /dev/null
+++ b/asap-common/sketch-core/src/bin/sketchlib_fidelity.rs
@@ -0,0 +1,488 @@
+use std::collections::HashMap;
+
+use clap::Parser;
+use sketch_core::config::{self, ImplMode};
+use sketch_core::count_min::CountMinSketch;
+use sketch_core::count_min_with_heap::CountMinSketchWithHeap;
+use sketch_core::hydra_kll::HydraKllSketch;
+use sketch_core::kll::KllSketch;
+
+#[derive(Clone)]
+struct Lcg64 {
+    state: u64,
+}
+
+impl Lcg64 {
+    fn new(seed: u64) -> Self {
+        Self { state: seed }
+    }
+
+    fn next_u64(&mut self) -> u64 {
+        self.state = self
+            .state
+            .wrapping_mul(6364136223846793005)
+            .wrapping_add(1442695040888963407);
+        self.state
+    }
+
+    fn next_f64_0_1(&mut self) -> f64 {
+        let x = self.next_u64() >> 11;
+        (x as f64) / ((1u64 << 53) as f64)
+    }
+}
+
+fn pearson_corr(exact: &[f64], est: &[f64]) -> f64 {
+    let n = exact.len().min(est.len());
+    if n == 0 {
+        return f64::NAN;
+    }
+    let (mut sum_x, mut sum_y) = (0.0, 0.0);
+    for i in 0..n {
+        sum_x += exact[i];
+        sum_y += est[i];
+    }
+    let mean_x = sum_x / (n as f64);
+    let mean_y = sum_y / (n as f64);
+    let (mut num, mut den_x, mut den_y) = (0.0, 0.0, 0.0);
+    for i in 0..n {
+        let dx = exact[i] - mean_x;
+        let dy = est[i] - mean_y;
+        num += dx * dy;
+        den_x += dx * dx;
+        den_y += dy * dy;
+    }
+    if den_x == 0.0 || den_y == 0.0 {
+        return f64::NAN;
+    }
+    num / (den_x.sqrt() * den_y.sqrt())
+}
+
+fn mape(exact: &[f64], est: &[f64]) -> f64 {
+    let n = exact.len().min(est.len());
+    let mut num = 0.0;
+    let mut denom = 0.0;
+    for i in 0..n {
+        if exact[i] == 0.0 {
+            continue;
+        }
+        num += ((exact[i] - est[i]) / exact[i]).abs();
+        denom += 1.0;
+    }
+    if denom == 0.0 {
+        return if exact == est { 0.0 } else { f64::INFINITY };
+    }
+    (num / denom) * 100.0
+}
+
+fn rmse_percentage(exact: &[f64], est: &[f64]) -> f64 {
+    let n = exact.len().min(est.len());
+    let mut sum_sq = 0.0;
+    let mut denom = 0.0;
+    for i in 0..n {
+        if exact[i] == 0.0 {
+            continue;
+        }
+        let rel = (exact[i] - est[i]) / exact[i];
+        sum_sq += rel * rel;
+        denom += 1.0;
+    }
+    if denom == 0.0 {
+        return if exact == est { 0.0 } else { f64::INFINITY };
+    }
+    (sum_sq / denom).sqrt() * 100.0
+}
+
+#[derive(Parser)]
+struct Args {
+    #[arg(long, value_enum, default_value = "sketchlib")]
+    cms_impl: ImplMode,
+    #[arg(long, value_enum, default_value = "sketchlib")]
+    kll_impl: ImplMode,
+    #[arg(long, value_enum, default_value = "sketchlib")]
+    cmwh_impl: ImplMode,
+}
+
+fn rank_fraction(sorted: &[f64], x: f64) -> f64 {
+    if sorted.is_empty() {
+        return 0.0;
+    }
+    let idx = sorted.partition_point(|v| *v <= x);
+    (idx as f64) / (sorted.len() as f64)
+}
+
+// --- CountMinSketch parameter sets and runner ---
+
+struct CmsParams {
+    depth: usize,
+    width: usize,
+    n: usize,
+    domain: usize,
+}
+
+struct CmsResult {
+    pearson: f64,
+    mape: f64,
+    rmse: f64,
+}
+
+fn run_countmin_once(seed: u64, p: &CmsParams) -> CmsResult {
+    let mut rng = Lcg64::new(seed);
+    let mut exact: Vec<f64> = vec![0.0; p.domain];
+    let mut cms = CountMinSketch::new(p.depth, p.width);
+
+    for _ in 0..p.n {
+        let r = rng.next_u64();
+        let key_id = if (r & 0xFF) < 200 {
+            (r as usize) % 20
+        } else {
+            (r as usize) % p.domain
+        };
+        let key = format!("k{key_id}");
+        cms.update(&key, 1.0);
+        exact[key_id] += 1.0;
+    }
+
+    let mut est: Vec<f64> = Vec::with_capacity(p.domain);
+    for key_id in 0..p.domain {
+        let key = format!("k{key_id}");
+        est.push(cms.query_key(&key));
+    }
+
+    CmsResult {
+        pearson: pearson_corr(&exact, &est),
+        mape: mape(&exact, &est),
+        rmse: rmse_percentage(&exact, &est),
+    }
+}
+
+// --- CountMinSketchWithHeap ---
+
+struct CmwhParams {
+    depth: usize,
+    width: usize,
+    n: usize,
+    domain: usize,
+    heap_size: usize,
+}
+
+struct CmwhResult {
+    topk_recall: f64,
+    pearson: f64,
+    mape: f64,
+    rmse: f64,
+}
+
+fn run_countmin_with_heap_once(seed: u64, p: &CmwhParams) -> CmwhResult {
+    let mut rng = Lcg64::new(seed ^ 0xA5A5_A5A5);
+    let mut exact: Vec<f64> = vec![0.0; p.domain];
+    let mut cms = CountMinSketchWithHeap::new(p.depth, p.width, p.heap_size);
+
+    for _ in 0..p.n {
+        let r = rng.next_u64();
+        let key_id = if (r & 0xFF) < 200 {
+            (r as usize) % 20
+        } else {
+            (r as usize) % p.domain
+        };
+        let key = format!("k{key_id}");
+        cms.update(&key, 1.0);
+        exact[key_id] += 1.0;
+    }
+
+    let mut exact_pairs: Vec<(usize, f64)> = exact.iter().copied().enumerate().collect();
+    exact_pairs.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
+    exact_pairs.truncate(p.heap_size);
+
+    let exact_top: HashMap<String, f64> = exact_pairs
+        .into_iter()
+        .map(|(k, v)| (format!("k{k}"), v))
+        .collect();
+
+    let mut est_vals = Vec::with_capacity(exact_top.len());
+    let mut exact_vals = Vec::with_capacity(exact_top.len());
+    let mut hit = 0usize;
+    for item in cms.topk_heap_items() {
+        if exact_top.contains_key(&item.key) {
+            hit += 1;
+        }
+    }
+    for (k, v) in &exact_top {
+        exact_vals.push(*v);
+        est_vals.push(cms.query_key(k));
+    }
+
+    CmwhResult {
+        topk_recall: (hit as f64) / (p.heap_size as f64),
+        pearson: pearson_corr(&exact_vals, &est_vals),
+        mape: mape(&exact_vals, &est_vals),
+        rmse: rmse_percentage(&exact_vals, &est_vals),
+    }
+}
+
+// --- KllSketch ---
+
+struct KllParams {
+    k: u16,
+    n: usize,
+}
+
+struct KllResult {
+    rank_err_50: f64,
+    rank_err_90: f64,
+    rank_err_99: f64,
+}
+
+fn run_kll_once(seed: u64, p: &KllParams) -> KllResult {
+    let mut rng = Lcg64::new(seed ^ 0x1234_5678);
+    let mut values: Vec<f64> = Vec::with_capacity(p.n);
+    let mut sk = KllSketch::new(p.k);
+
+    for _ in 0..p.n {
+        let v = rng.next_f64_0_1() * 1_000_000.0;
+        values.push(v);
+        sk.update(v);
+    }
+
+    values.sort_by(f64::total_cmp);
+    let qs = [0.5, 0.9, 0.99];
+    let rank_err = |q: f64| (rank_fraction(&values, sk.get_quantile(q)) - q).abs();
+
+    KllResult {
+        rank_err_50: rank_err(qs[0]),
+        rank_err_90: rank_err(qs[1]),
+        rank_err_99: rank_err(qs[2]),
+    }
+}
+
+// --- HydraKllSketch ---
+
+struct HydraKllParams {
+    rows: usize,
+    cols: usize,
+    k: u16,
+    n: usize,
+    domain: usize,
+    eval_keys: usize,
+}
+
+struct HydraKllResult {
+    mean_50: f64,
+    max_50: f64,
+    mean_90: f64,
+    max_90: f64,
+}
+
+fn run_hydra_kll_once(seed: u64, p: &HydraKllParams) -> HydraKllResult {
+    let mut rng = Lcg64::new(seed ^ 0xDEAD_BEEF);
+    let mut hydra = HydraKllSketch::new(p.rows, p.cols, p.k);
+    let mut exact: HashMap<String, Vec<f64>> = HashMap::new();
+
+    for _ in 0..p.n {
+        let r = rng.next_u64();
+        let key_id = if (r & 0xFF) < 200 {
+            (r as usize) % 20
+        } else {
+            (r as usize) % p.domain
+        };
+        let key = format!("k{key_id}");
+        let v = rng.next_f64_0_1() * 1_000_000.0;
+        hydra.update(&key, v);
+        exact.entry(key).or_default().push(v);
+    }
+
+    let _qs = [0.5, 0.9];
+    let mut keys: Vec<String> = exact.keys().cloned().collect();
+    keys.sort();
+    keys.truncate(p.eval_keys);
+
+    let mut mean_50 = 0.0f64;
+    let mut max_50 = 0.0f64;
+    let mut mean_90 = 0.0f64;
+    let mut max_90 = 0.0f64;
+    let nk = keys.len() as f64;
+    for key in &keys {
+        let mut vals = exact.get(key).cloned().unwrap_or_default();
+        vals.sort_by(f64::total_cmp);
+        for (q, mean_ref, max_ref) in [
+            (0.5, &mut mean_50, &mut max_50),
+            (0.9, &mut mean_90, &mut max_90),
+        ] {
+            let est = hydra.query(key, q);
+            let err = (rank_fraction(&vals, est) - q).abs();
+            *mean_ref += err;
+            if err > *max_ref {
+                *max_ref = err;
+            }
+        }
+    }
+    mean_50 /= nk;
+    mean_90 /= nk;
+
+    HydraKllResult {
+        mean_50,
+        max_50,
+        mean_90,
+        max_90,
+    }
+}
+
+fn main() {
+    let args = Args::parse();
+    config::configure(args.cms_impl, args.kll_impl, args.cmwh_impl)
+        .expect("sketch backend already initialised");
+
+    let seed = 0xC0FFEE_u64;
+    let mode = if matches!(args.cms_impl, ImplMode::Legacy)
+        || matches!(args.kll_impl, ImplMode::Legacy)
+        || matches!(args.cmwh_impl, ImplMode::Legacy)
+    {
+        "Legacy"
+    } else {
+        "sketchlib-rust"
+    };
+
+    // CountMinSketch: multiple (depth, width, n, domain)
+    let cms_param_sets: Vec<CmsParams> = vec![
+        CmsParams {
+            depth: 3,
+            width: 1024,
+            n: 100_000,
+            domain: 1000,
+        },
+        CmsParams {
+            depth: 5,
+            width: 2048,
+            n: 200_000,
+            domain: 2000,
+        },
+        CmsParams {
+            depth: 7,
+            width: 4096,
+            n: 200_000,
+            domain: 2000,
+        },
+        CmsParams {
+            depth: 5,
+            width: 2048,
+            n: 50_000,
+            domain: 500,
+        },
+    ];
+
+    println!("## CountMinSketch ({mode})");
+    println!("| depth | width | n_updates | domain | Pearson corr | MAPE (%) | RMSE (%) |");
+    println!("|-------|-------|------------|--------|--------------|----------|----------|");
+    for p in &cms_param_sets {
+        let r = run_countmin_once(seed, p);
+        println!(
+            "| {} | {} | {} | {} | {:.10} | {:.6} | {:.6} |",
+            p.depth, p.width, p.n, p.domain, r.pearson, r.mape, r.rmse
+        );
+    }
+
+    // CountMinSketchWithHeap
+    let cmwh_param_sets: Vec<CmwhParams> = vec![
+        CmwhParams {
+            depth: 3,
+            width: 1024,
+            n: 100_000,
+            domain: 1000,
+            heap_size: 10,
+        },
+        CmwhParams {
+            depth: 5,
+            width: 2048,
+            n: 200_000,
+            domain: 2000,
+            heap_size: 20,
+        },
+        CmwhParams {
+            depth: 5,
+            width: 2048,
+            n: 200_000,
+            domain: 2000,
+            heap_size: 50,
+        },
+    ];
+
+    println!("\n## CountMinSketchWithHeap ({mode})");
+    println!("| depth | width | n | domain | heap_size | Top-k recall | Pearson (top-k) | MAPE (%) | RMSE (%) |");
+    println!("|-------|-------|-----|--------|-----------|--------------|-----------------|----------|----------|");
+    for p in &cmwh_param_sets {
+        let r = run_countmin_with_heap_once(seed, p);
+        println!(
+            "| {} | {} | {} | {} | {} | {:.4} | {:.10} | {:.6} | {:.6} |",
+            p.depth, p.width, p.n, p.domain, p.heap_size, r.topk_recall, r.pearson, r.mape, r.rmse
+        );
+    }
+
+    // KllSketch
+    let kll_param_sets: Vec<KllParams> = vec![
+        KllParams { k: 20, n: 200_000 },
+        KllParams { k: 50, n: 200_000 },
+        KllParams { k: 200, n: 200_000 },
+        KllParams { k: 20, n: 50_000 },
+    ];
+
+    println!("\n## KllSketch ({mode})");
+    println!(
+        "| k | n_updates | q=0.5 abs_rank_error | q=0.9 abs_rank_error | q=0.99 abs_rank_error |"
+    );
+    println!(
+        "|---|-----------|----------------------|----------------------|-----------------------|"
+    );
+    for p in &kll_param_sets {
+        let r = run_kll_once(seed, p);
+        println!(
+            "| {} | {} | {:.6} | {:.6} | {:.6} |",
+            p.k, p.n, r.rank_err_50, r.rank_err_90, r.rank_err_99
+        );
+    }
+
+    // HydraKllSketch
+    let hydra_param_sets: Vec<HydraKllParams> = vec![
+        HydraKllParams {
+            rows: 2,
+            cols: 64,
+            k: 20,
+            n: 200_000,
+            domain: 200,
+            eval_keys: 50,
+        },
+        HydraKllParams {
+            rows: 3,
+            cols: 128,
+            k: 20,
+            n: 200_000,
+            domain: 200,
+            eval_keys: 50,
+        },
+        HydraKllParams {
+            rows: 3,
+            cols: 128,
+            k: 50,
+            n: 200_000,
+            domain: 200,
+            eval_keys: 50,
+        },
+        HydraKllParams {
+            rows: 3,
+            cols: 128,
+            k: 20,
+            n: 100_000,
+            domain: 100,
+            eval_keys: 50,
+        },
+    ];
+
+    println!("\n## HydraKllSketch ({mode})");
+    println!("| rows | cols | k | n | domain | q=0.5 mean/max | q=0.9 mean/max |");
+    println!("|------|------|---|-----|--------|----------------|----------------|");
+    for p in &hydra_param_sets {
+        let r = run_hydra_kll_once(seed, p);
+        println!(
+            "| {} | {} | {} | {} | {} | {:.5} / {:.5} | {:.5} / {:.5} |",
+            p.rows, p.cols, p.k, p.n, p.domain, r.mean_50, r.max_50, r.mean_90, r.max_90
+        );
+    }
+}
diff --git a/asap-common/sketch-core/src/config.rs b/asap-common/sketch-core/src/config.rs
new file mode 100644
index 0000000..84267b0
--- /dev/null
+++ b/asap-common/sketch-core/src/config.rs
@@ -0,0 +1,75 @@
+use std::sync::OnceLock;
+
+/// Implementation mode for sketch-core internals.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
+pub enum ImplMode {
+    /// Use the original hand-written implementations.
+    Legacy,
+    /// Use sketchlib-rust backed implementations.
+    Sketchlib,
+}
+
+static COUNTMIN_MODE: OnceLock<ImplMode> = OnceLock::new();
+
+/// Returns true if Count-Min operations should use sketchlib-rust internally.
+pub fn use_sketchlib_for_count_min() -> bool {
+    *COUNTMIN_MODE.get_or_init(|| ImplMode::Sketchlib) == ImplMode::Sketchlib
+}
+
+static KLL_MODE: OnceLock<ImplMode> = OnceLock::new();
+
+/// Returns true if KLL operations should use sketchlib-rust internally.
+pub fn use_sketchlib_for_kll() -> bool {
+    *KLL_MODE.get_or_init(|| ImplMode::Sketchlib) == ImplMode::Sketchlib
+}
+
+static COUNTMIN_WITH_HEAP_MODE: OnceLock<ImplMode> = OnceLock::new();
+
+/// Returns true if Count-Min-With-Heap operations should use sketchlib-rust internally for the
+/// Count-Min portion.
+pub fn use_sketchlib_for_count_min_with_heap() -> bool {
+    *COUNTMIN_WITH_HEAP_MODE.get_or_init(|| ImplMode::Sketchlib) == ImplMode::Sketchlib
+}
+
+/// Set backend modes for all sketch types. Call once at process startup,
+/// before any sketch operation. Returns Err if any OnceLock was already set.
+pub fn configure(cms: ImplMode, kll: ImplMode, cmwh: ImplMode) -> Result<(), &'static str> {
+    let a = COUNTMIN_MODE.set(cms);
+    let b = KLL_MODE.set(kll);
+    let c = COUNTMIN_WITH_HEAP_MODE.set(cmwh);
+    if a.is_err() || b.is_err() || c.is_err() {
+        Err("configure() called after sketch backends were already initialised")
+    } else {
+        Ok(())
+    }
+}
+
+pub fn force_legacy_mode_for_tests() {
+    let _ = COUNTMIN_MODE.set(ImplMode::Legacy);
+    let _ = KLL_MODE.set(ImplMode::Legacy);
+    let _ = COUNTMIN_WITH_HEAP_MODE.set(ImplMode::Legacy);
+}
+
+/// Helper used by UDF templates and documentation examples to parse implementation mode
+/// from environment variables in a robust way. This is not used in the hot path.
+pub fn parse_mode(var: Result<String, std::env::VarError>) -> ImplMode {
+    match var {
+        Ok(v) => match v.to_ascii_lowercase().as_str() {
+            "legacy" => ImplMode::Legacy,
+            "sketchlib" => ImplMode::Sketchlib,
+            other => {
+                eprintln!(
+                    "sketch-core: unrecognised IMPL value {other:?}, defaulting to Sketchlib"
+                );
+                ImplMode::Sketchlib
+            }
+        },
+        Err(std::env::VarError::NotPresent) => ImplMode::Sketchlib,
+        Err(std::env::VarError::NotUnicode(v)) => {
+            eprintln!(
+                "sketch-core: IMPL env var has invalid UTF-8 ({v:?}), defaulting to Sketchlib"
+            );
+            ImplMode::Sketchlib
+        }
+    }
+}
diff --git a/asap-common/sketch-core/src/count_min.rs b/asap-common/sketch-core/src/count_min.rs
index fcd7794..a77e8bb 100644
--- a/asap-common/sketch-core/src/count_min.rs
+++ b/asap-common/sketch-core/src/count_min.rs
@@ -14,47 +14,113 @@
 use serde::{Deserialize, Serialize};
 use xxhash_rust::xxh32::xxh32;
 
+use crate::config::use_sketchlib_for_count_min;
+use crate::count_min_sketchlib::{
+    matrix_from_sketchlib_cms, new_sketchlib_cms, sketchlib_cms_from_matrix, sketchlib_cms_query,
+    sketchlib_cms_update, SketchlibCms,
+};
+
+#[derive(Serialize, Deserialize)]
+struct WireFormat {
+    sketch: Vec<Vec<f64>>,
+    row_num: usize,
+    col_num: usize,
+}
+
+/// Backend implementation for Count-Min Sketch. Only one is active at a time.
+#[derive(Debug, Clone)]
+pub enum CountMinBackend {
+    /// Original hand-written matrix implementation.
+    Legacy(Vec<Vec<f64>>),
+    /// sketchlib-rust backed implementation.
+    Sketchlib(SketchlibCms),
+}
+
 /// Count-Min Sketch probabilistic data structure for frequency counting.
 /// Provides approximate frequency counts with error bounds.
 /// This is the canonical shared implementation; the msgpack wire format is the
 /// contract between Arroyo UDAFs (producers) and QueryEngineRust (consumer).
-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone)]
 pub struct CountMinSketch {
-    pub sketch: Vec<Vec<f64>>,
     pub row_num: usize,
     pub col_num: usize,
+    pub backend: CountMinBackend,
 }
 
 impl CountMinSketch {
     pub fn new(row_num: usize, col_num: usize) -> Self {
-        let sketch = vec![vec![0.0; col_num]; row_num];
+        let backend = if use_sketchlib_for_count_min() {
+            CountMinBackend::Sketchlib(new_sketchlib_cms(row_num, col_num))
+        } else {
+            CountMinBackend::Legacy(vec![vec![0.0; col_num]; row_num])
+        };
         Self {
-            sketch,
             row_num,
             col_num,
+            backend,
+        }
+    }
+
+    /// Returns the sketch matrix (for wire format, serialization, tests).
+    pub fn sketch(&self) -> Vec<Vec<f64>> {
+        match &self.backend {
+            CountMinBackend::Legacy(m) => m.clone(),
+            CountMinBackend::Sketchlib(s) => matrix_from_sketchlib_cms(s),
+        }
+    }
+
+    /// Mutable access to the matrix. Only `Some` for Legacy backend.
+    pub fn sketch_mut(&mut self) -> Option<&mut Vec<Vec<f64>>> {
+        match &mut self.backend {
+            CountMinBackend::Legacy(m) => Some(m),
+            CountMinBackend::Sketchlib(_) => None,
+        }
+    }
+
+    /// Construct from a legacy matrix (used by deserialization and query engine).
+    pub fn from_legacy_matrix(sketch: Vec<Vec<f64>>, row_num: usize, col_num: usize) -> Self {
+        let backend = if use_sketchlib_for_count_min() {
+            CountMinBackend::Sketchlib(sketchlib_cms_from_matrix(row_num, col_num, &sketch))
+        } else {
+            CountMinBackend::Legacy(sketch)
+        };
+        Self {
+            row_num,
+            col_num,
+            backend,
         }
     }
 
     pub fn update(&mut self, key: &str, value: f64) {
-        let key_bytes = key.as_bytes();
-        // Update each row using different hash functions
-        for i in 0..self.row_num {
-            let hash_value = xxh32(key_bytes, i as u32);
-            let col_index = (hash_value as usize) % self.col_num;
-            self.sketch[i][col_index] += value;
+        match &mut self.backend {
+            CountMinBackend::Legacy(sketch) => {
+                let key_bytes = key.as_bytes();
+                for (i, row) in sketch.iter_mut().enumerate().take(self.row_num) {
+                    let hash_value = xxh32(key_bytes, i as u32);
+                    let col_index = (hash_value as usize) % self.col_num;
+                    row[col_index] += value;
+                }
+            }
+            CountMinBackend::Sketchlib(s) => {
+                sketchlib_cms_update(s, key, value);
+            }
         }
     }
 
     pub fn query_key(&self, key: &str) -> f64 {
-        let key_bytes = key.as_bytes();
-        let mut min_value = f64::MAX;
-        // Query each row and take the minimum
-        for i in 0..self.row_num {
-            let hash_value = xxh32(key_bytes, i as u32);
-            let col_index = (hash_value as usize) % self.col_num;
-            min_value = min_value.min(self.sketch[i][col_index]);
+        match &self.backend {
+            CountMinBackend::Legacy(sketch) => {
+                let key_bytes = key.as_bytes();
+                let mut min_value = f64::MAX;
+                for (i, row) in sketch.iter().enumerate().take(self.row_num) {
+                    let hash_value = xxh32(key_bytes, i as u32);
+                    let col_index = (hash_value as usize) % self.col_num;
+                    min_value = min_value.min(row[col_index]);
+                }
+                min_value
+            }
+            CountMinBackend::Sketchlib(s) => sketchlib_cms_query(s, key),
         }
-        min_value
     }
 
     pub fn merge(
@@ -80,17 +146,44 @@ impl CountMinSketch {
             }
         }
 
-        let mut merged = accumulators[0].clone();
-        // Add all sketches element-wise
-        for acc in &accumulators[1..] {
-            for (merged_row, acc_row) in merged.sketch.iter_mut().zip(&acc.sketch) {
-                for (m_cell, a_cell) in merged_row.iter_mut().zip(acc_row.iter()) {
-                    *m_cell += *a_cell;
+        if use_sketchlib_for_count_min() {
+            let mut sketchlib_inners: Vec<SketchlibCms> = Vec::with_capacity(accumulators.len());
+            for acc in accumulators {
+                let matrix = acc.sketch();
+                let inner = sketchlib_cms_from_matrix(acc.row_num, acc.col_num, &matrix);
+                sketchlib_inners.push(inner);
+            }
+            let merged_sketchlib = sketchlib_inners
+                .into_iter()
+                .reduce(|mut lhs: SketchlibCms, rhs: SketchlibCms| {
+                    lhs.merge(&rhs);
+                    lhs
+                })
+                .ok_or("No accumulators to merge")?;
+
+            let sketch = matrix_from_sketchlib_cms(&merged_sketchlib);
+            let row_num = sketch.len();
+            let col_num = sketch.first().map(|r| r.len()).unwrap_or(0);
+
+            Ok(Self {
+                row_num,
+                col_num,
+                backend: CountMinBackend::Sketchlib(merged_sketchlib),
+            })
+        } else {
+            let mut merged = accumulators[0].clone();
+            for acc in &accumulators[1..] {
+                let acc_matrix = acc.sketch();
+                if let CountMinBackend::Legacy(merged_matrix) = &mut merged.backend {
+                    for (merged_row, acc_row) in merged_matrix.iter_mut().zip(acc_matrix.iter()) {
+                        for (m_cell, a_cell) in merged_row.iter_mut().zip(acc_row.iter()) {
+                            *m_cell += *a_cell;
+                        }
+                    }
                 }
             }
+            Ok(merged)
         }
-
-        Ok(merged)
     }
 
     /// Merge from references, allocating only the output — no input clones.
@@ -112,31 +205,94 @@ impl CountMinSketch {
             }
         }
 
-        let mut merged = Self::new(row_num, col_num);
-        for acc in accumulators {
-            for (merged_row, acc_row) in merged.sketch.iter_mut().zip(&acc.sketch) {
-                for (m_cell, a_cell) in merged_row.iter_mut().zip(acc_row.iter()) {
-                    *m_cell += *a_cell;
+        if use_sketchlib_for_count_min() {
+            let mut sketchlib_inners: Vec<SketchlibCms> = Vec::with_capacity(accumulators.len());
+            for acc in accumulators {
+                let acc_matrix = acc.sketch();
+                let matrix_has_values = acc_matrix
+                    .iter()
+                    .any(|row: &Vec<f64>| row.iter().any(|&v| v != 0.0));
+
+                let inner = if matrix_has_values {
+                    sketchlib_cms_from_matrix(acc.row_num, acc.col_num, &acc_matrix)
+                } else if let CountMinBackend::Sketchlib(s) = &acc.backend {
+                    s.clone()
+                } else {
+                    sketchlib_cms_from_matrix(acc.row_num, acc.col_num, &acc_matrix)
+                };
+
+                sketchlib_inners.push(inner);
+            }
+
+            let merged_sketchlib = sketchlib_inners
+                .into_iter()
+                .reduce(|mut lhs: SketchlibCms, rhs: SketchlibCms| {
+                    lhs.merge(&rhs);
+                    lhs
+                })
+                .ok_or("No accumulators to merge")?;
+
+            let sketch = matrix_from_sketchlib_cms(&merged_sketchlib);
+            let r = sketch.len();
+            let c = sketch.first().map(|row| row.len()).unwrap_or(0);
+
+            Ok(Self {
+                row_num: r,
+                col_num: c,
+                backend: CountMinBackend::Sketchlib(merged_sketchlib),
+            })
+        } else {
+            let mut merged = Self::new(row_num, col_num);
+            if let CountMinBackend::Legacy(ref mut merged_sketch) = merged.backend {
+                for acc in accumulators {
+                    let acc_matrix = acc.sketch();
+                    for (merged_row, acc_row) in merged_sketch.iter_mut().zip(acc_matrix.iter()) {
+                        for (m_cell, a_cell) in merged_row.iter_mut().zip(acc_row.iter()) {
+                            *m_cell += *a_cell;
+                        }
+                    }
                 }
             }
+            Ok(merged)
         }
-
-        Ok(merged)
     }
 
     /// Serialize to MessagePack — matches the Arroyo UDF wire format exactly.
     pub fn serialize_msgpack(&self) -> Vec<u8> {
-        // Match Arroyo UDF: countminsketch.serialize(&mut Serializer::new(&mut buf))
+        let sketch = self.sketch();
+        let wire = WireFormat {
+            sketch,
+            row_num: self.row_num,
+            col_num: self.col_num,
+        };
+
         let mut buf = Vec::new();
-        self.serialize(&mut rmp_serde::Serializer::new(&mut buf))
+        wire.serialize(&mut rmp_serde::Serializer::new(&mut buf))
             .unwrap();
         buf
     }
 
     /// Deserialize from MessagePack produced by the Arroyo UDF.
     pub fn deserialize_msgpack(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
-        rmp_serde::from_slice(buffer).map_err(|e| {
-            format!("Failed to deserialize CountMinSketch from MessagePack: {e}").into()
+        let wire: WireFormat =
+            rmp_serde::from_slice(buffer).map_err(|e| -> Box<dyn std::error::Error> {
+                format!("Failed to deserialize CountMinSketch from MessagePack: {e}").into()
+            })?;
+
+        let backend = if use_sketchlib_for_count_min() {
+            CountMinBackend::Sketchlib(sketchlib_cms_from_matrix(
+                wire.row_num,
+                wire.col_num,
+                &wire.sketch,
+            ))
+        } else {
+            CountMinBackend::Legacy(wire.sketch)
+        };
+
+        Ok(Self {
+            row_num: wire.row_num,
+            col_num: wire.col_num,
+            backend,
         })
     }
 
@@ -178,11 +334,12 @@ mod tests {
         let cms = CountMinSketch::new(4, 1000);
         assert_eq!(cms.row_num, 4);
         assert_eq!(cms.col_num, 1000);
-        assert_eq!(cms.sketch.len(), 4);
-        assert_eq!(cms.sketch[0].len(), 1000);
+        let sketch = cms.sketch();
+        assert_eq!(sketch.len(), 4);
+        assert_eq!(sketch[0].len(), 1000);
 
         // Check all values are initialized to 0
-        for row in &cms.sketch {
+        for row in &sketch {
             for &value in row {
                 assert_eq!(value, 0.0);
             }
@@ -206,20 +363,23 @@ mod tests {
 
     #[test]
     fn test_count_min_sketch_merge() {
-        let mut cms1 = CountMinSketch::new(2, 3);
-        let mut cms2 = CountMinSketch::new(2, 3);
-
-        cms1.sketch[0][0] = 5.0;
-        cms1.sketch[1][2] = 10.0;
+        // Use from_legacy_matrix so the test works regardless of sketchlib/legacy config
+        let mut sketch1 = vec![vec![0.0; 3]; 2];
+        sketch1[0][0] = 5.0;
+        sketch1[1][2] = 10.0;
+        let cms1 = CountMinSketch::from_legacy_matrix(sketch1, 2, 3);
 
-        cms2.sketch[0][0] = 3.0;
-        cms2.sketch[0][1] = 7.0;
+        let mut sketch2 = vec![vec![0.0; 3]; 2];
+        sketch2[0][0] = 3.0;
+        sketch2[0][1] = 7.0;
+        let cms2 = CountMinSketch::from_legacy_matrix(sketch2, 2, 3);
 
         let merged = CountMinSketch::merge(vec![cms1, cms2]).unwrap();
+        let merged_sketch = merged.sketch();
 
-        assert_eq!(merged.sketch[0][0], 8.0); // 5 + 3
-        assert_eq!(merged.sketch[0][1], 7.0); // 0 + 7
-        assert_eq!(merged.sketch[1][2], 10.0); // 10 + 0
+        assert_eq!(merged_sketch[0][0], 8.0); // 5 + 3
+        assert_eq!(merged_sketch[0][1], 7.0); // 0 + 7
+        assert_eq!(merged_sketch[1][2], 10.0); // 10 + 0
     }
 
     #[test]
@@ -231,17 +391,18 @@ mod tests {
 
     #[test]
     fn test_count_min_sketch_msgpack_round_trip() {
-        let mut cms = CountMinSketch::new(2, 3);
-        cms.sketch[0][1] = 42.0;
-        cms.sketch[1][2] = 100.0;
+        let mut cms = CountMinSketch::new(4, 256);
+        cms.update("apple", 5.0);
+        cms.update("banana", 3.0);
+        cms.update("apple", 2.0); // total "apple" = 7
 
         let bytes = cms.serialize_msgpack();
         let deserialized = CountMinSketch::deserialize_msgpack(&bytes).unwrap();
 
-        assert_eq!(deserialized.row_num, 2);
-        assert_eq!(deserialized.col_num, 3);
-        assert_eq!(deserialized.sketch[0][1], 42.0);
-        assert_eq!(deserialized.sketch[1][2], 100.0);
+        assert_eq!(deserialized.row_num, 4);
+        assert_eq!(deserialized.col_num, 256);
+        assert!(deserialized.query_key("apple") >= 7.0);
+        assert!(deserialized.query_key("banana") >= 3.0);
     }
 
     #[test]
diff --git a/asap-common/sketch-core/src/count_min_sketchlib.rs b/asap-common/sketch-core/src/count_min_sketchlib.rs
new file mode 100644
index 0000000..20fe7be
--- /dev/null
+++ b/asap-common/sketch-core/src/count_min_sketchlib.rs
@@ -0,0 +1,59 @@
+use sketchlib_rust::{CountMin, RegularPath, SketchInput, Vector2D};
+
+/// Concrete Count-Min type from sketchlib-rust when sketchlib backend is enabled.
+/// Uses f64 counters (Vector2D<f64>) for weighted updates without integer rounding.
+pub type SketchlibCms = CountMin<Vector2D<f64>, RegularPath>;
+
+/// Creates a fresh sketchlib Count-Min sketch with the given dimensions.
+pub fn new_sketchlib_cms(row_num: usize, col_num: usize) -> SketchlibCms {
+    SketchlibCms::with_dimensions(row_num, col_num)
+}
+
+/// Builds a sketchlib Count-Min sketch from an existing `sketch` matrix.
+pub fn sketchlib_cms_from_matrix(
+    row_num: usize,
+    col_num: usize,
+    sketch: &[Vec<f64>],
+) -> SketchlibCms {
+    let matrix = Vector2D::from_fn(row_num, col_num, |r, c| {
+        sketch
+            .get(r)
+            .and_then(|row| row.get(c))
+            .copied()
+            .unwrap_or(0.0)
+    });
+    SketchlibCms::from_storage(matrix)
+}
+
+/// Converts a sketchlib Count-Min sketch into the legacy `Vec<Vec<f64>>` matrix.
+pub fn matrix_from_sketchlib_cms(inner: &SketchlibCms) -> Vec<Vec<f64>> {
+    let storage: &Vector2D<f64> = inner.as_storage();
+    let rows = storage.rows();
+    let cols = storage.cols();
+    let mut sketch = vec![vec![0.0; cols]; rows];
+
+    for (r, row) in sketch.iter_mut().enumerate().take(rows) {
+        for (c, cell) in row.iter_mut().enumerate().take(cols) {
+            if let Some(v) = storage.get(r, c) {
+                *cell = *v;
+            }
+        }
+    }
+
+    sketch
+}
+
+/// Helper to update a sketchlib Count-Min with a weighted key.
+pub fn sketchlib_cms_update(inner: &mut SketchlibCms, key: &str, value: f64) {
+    if value <= 0.0 {
+        return;
+    }
+    let input = SketchInput::String(key.to_owned());
+    inner.insert_many(&input, value);
+}
+
+/// Helper to query a sketchlib Count-Min for a key, returning f64.
+pub fn sketchlib_cms_query(inner: &SketchlibCms, key: &str) -> f64 {
+    let input = SketchInput::String(key.to_owned());
+    inner.estimate(&input)
+}
diff --git a/asap-common/sketch-core/src/count_min_with_heap.rs b/asap-common/sketch-core/src/count_min_with_heap.rs
index 1c40ba3..39d69b3 100644
--- a/asap-common/sketch-core/src/count_min_with_heap.rs
+++ b/asap-common/sketch-core/src/count_min_with_heap.rs
@@ -11,6 +11,7 @@
 //   - Removed: AggregateCore, SerializableToSink, MergeableAccumulator, MultipleSubpopulationAggregate impls
 //   - Removed: get_topk_keys (returns KeyByLabelValues — QE-specific)
 //   - Added: insert_or_update_heap helper, aggregate_topk() one-shot helper
+//   - Refactored to enum-based backend (Legacy vs Sketchlib)
 //
 // NOTE (bug, do not fix): QueryEngineRust uses xxhash-rust::xxh32; the Arroyo template uses
 // twox-hash::XxHash32. Bucket assignments differ, so query results will be wrong until the
@@ -20,6 +21,13 @@ use serde::{Deserialize, Serialize};
 use std::collections::HashSet;
 use xxhash_rust::xxh32::xxh32;
 
+use crate::config::use_sketchlib_for_count_min_with_heap;
+use crate::count_min_with_heap_sketchlib::{
+    heap_to_wire, matrix_from_sketchlib_cms_heap, new_sketchlib_cms_heap,
+    sketchlib_cms_heap_from_matrix_and_heap, sketchlib_cms_heap_query, sketchlib_cms_heap_update,
+    SketchlibCMSHeap, WireHeapItem,
+};
+
 /// Item in the top-k heap representing a key-value pair.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct HeapItem {
@@ -43,52 +51,195 @@ struct CountMinSketchWithHeapSerialized {
     heap_size: usize,
 }
 
+/// Backend implementation for Count-Min Sketch with Heap. Only one is active at a time.
+pub enum CountMinWithHeapBackend {
+    /// Legacy implementation: matrix + local heap.
+    Legacy {
+        sketch: Vec<Vec<f64>>,
+        heap: Vec<HeapItem>,
+    },
+    /// sketchlib-rust CMSHeap implementation.
+    Sketchlib(SketchlibCMSHeap),
+}
+
+impl std::fmt::Debug for CountMinWithHeapBackend {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            CountMinWithHeapBackend::Legacy { sketch, heap } => f
+                .debug_struct("Legacy")
+                .field("sketch", sketch)
+                .field("heap", heap)
+                .finish(),
+            CountMinWithHeapBackend::Sketchlib(_) => write!(f, "Sketchlib(..)"),
+        }
+    }
+}
+
 /// Count-Min Sketch with Heap for top-k tracking.
 /// Combines probabilistic frequency counting with efficient top-k maintenance.
-#[derive(Debug, Clone)]
 pub struct CountMinSketchWithHeap {
-    pub sketch: Vec<Vec<f64>>,
     pub row_num: usize,
     pub col_num: usize,
-    pub topk_heap: Vec<HeapItem>,
     pub heap_size: usize,
+    pub backend: CountMinWithHeapBackend,
+}
+
+impl std::fmt::Debug for CountMinSketchWithHeap {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("CountMinSketchWithHeap")
+            .field("row_num", &self.row_num)
+            .field("col_num", &self.col_num)
+            .field("heap_size", &self.heap_size)
+            .field("backend", &self.backend)
+            .finish()
+    }
+}
+
+impl Clone for CountMinSketchWithHeap {
+    fn clone(&self) -> Self {
+        let backend = match &self.backend {
+            CountMinWithHeapBackend::Legacy { sketch, heap } => CountMinWithHeapBackend::Legacy {
+                sketch: sketch.clone(),
+                heap: heap.clone(),
+            },
+            CountMinWithHeapBackend::Sketchlib(cms_heap) => {
+                let sketch = matrix_from_sketchlib_cms_heap(cms_heap);
+                let heap_items: Vec<HeapItem> = heap_to_wire(cms_heap)
+                    .into_iter()
+                    .map(|w| HeapItem {
+                        key: w.key,
+                        value: w.value,
+                    })
+                    .collect();
+                let wire_ref: Vec<WireHeapItem> = heap_items
+                    .iter()
+                    .map(|h| WireHeapItem {
+                        key: h.key.clone(),
+                        value: h.value,
+                    })
+                    .collect();
+                CountMinWithHeapBackend::Sketchlib(sketchlib_cms_heap_from_matrix_and_heap(
+                    self.row_num,
+                    self.col_num,
+                    self.heap_size,
+                    &sketch,
+                    &wire_ref,
+                ))
+            }
+        };
+        Self {
+            row_num: self.row_num,
+            col_num: self.col_num,
+            heap_size: self.heap_size,
+            backend,
+        }
+    }
 }
 
 impl CountMinSketchWithHeap {
     pub fn new(row_num: usize, col_num: usize, heap_size: usize) -> Self {
-        let sketch = vec![vec![0.0; col_num]; row_num];
+        let backend = if use_sketchlib_for_count_min_with_heap() {
+            CountMinWithHeapBackend::Sketchlib(new_sketchlib_cms_heap(row_num, col_num, heap_size))
+        } else {
+            CountMinWithHeapBackend::Legacy {
+                sketch: vec![vec![0.0; col_num]; row_num],
+                heap: Vec::new(),
+            }
+        };
         Self {
-            sketch,
             row_num,
             col_num,
-            topk_heap: Vec::new(),
             heap_size,
+            backend,
+        }
+    }
+
+    /// Create from legacy matrix and heap (e.g. from JSON deserialization).
+    pub fn from_legacy_matrix(
+        sketch: Vec<Vec<f64>>,
+        topk_heap: Vec<HeapItem>,
+        row_num: usize,
+        col_num: usize,
+        heap_size: usize,
+    ) -> Self {
+        Self {
+            row_num,
+            col_num,
+            heap_size,
+            backend: CountMinWithHeapBackend::Legacy {
+                sketch,
+                heap: topk_heap,
+            },
+        }
+    }
+
+    /// Mutable reference to the sketch matrix. Only valid for Legacy backend.
+    pub fn sketch_mut(&mut self) -> Option<&mut Vec<Vec<f64>>> {
+        match &mut self.backend {
+            CountMinWithHeapBackend::Legacy { sketch, .. } => Some(sketch),
+            CountMinWithHeapBackend::Sketchlib(_) => None,
+        }
+    }
+
+    /// Get the top-k heap items (works for both backends).
+    pub fn topk_heap_items(&self) -> Vec<HeapItem> {
+        match &self.backend {
+            CountMinWithHeapBackend::Legacy { heap, .. } => heap.clone(),
+            CountMinWithHeapBackend::Sketchlib(cms_heap) => heap_to_wire(cms_heap)
+                .into_iter()
+                .map(|w| HeapItem {
+                    key: w.key,
+                    value: w.value,
+                })
+                .collect(),
+        }
+    }
+
+    /// Get the sketch matrix (works for both backends).
+    pub fn sketch_matrix(&self) -> Vec<Vec<f64>> {
+        match &self.backend {
+            CountMinWithHeapBackend::Legacy { sketch, .. } => sketch.clone(),
+            CountMinWithHeapBackend::Sketchlib(cms_heap) => {
+                matrix_from_sketchlib_cms_heap(cms_heap)
+            }
         }
     }
 
     pub fn update(&mut self, key: &str, value: f64) {
-        let key_bytes = key.as_bytes();
-        for i in 0..self.row_num {
-            let hash_value = xxh32(key_bytes, i as u32);
-            let col_index = (hash_value as usize) % self.col_num;
-            self.sketch[i][col_index] += value;
+        match &mut self.backend {
+            CountMinWithHeapBackend::Legacy { sketch, heap } => {
+                let key_bytes = key.as_bytes();
+                for (i, row) in sketch.iter_mut().enumerate().take(self.row_num) {
+                    let hash_value = xxh32(key_bytes, i as u32);
+                    let col_index = (hash_value as usize) % self.col_num;
+                    row[col_index] += value;
+                }
+                Self::insert_or_update_heap_inline(heap, key, value, self.heap_size);
+            }
+            CountMinWithHeapBackend::Sketchlib(cms_heap) => {
+                sketchlib_cms_heap_update(cms_heap, key, value);
+            }
         }
-        self.insert_or_update_heap(key, value);
     }
 
-    fn insert_or_update_heap(&mut self, key: &str, value: f64) {
-        if let Some(item) = self.topk_heap.iter_mut().find(|i| i.key == key) {
+    fn insert_or_update_heap_inline(
+        heap: &mut Vec<HeapItem>,
+        key: &str,
+        value: f64,
+        heap_size: usize,
+    ) {
+        if let Some(item) = heap.iter_mut().find(|i| i.key == key) {
             item.value += value;
-        } else if self.topk_heap.len() < self.heap_size {
-            self.topk_heap.push(HeapItem {
+        } else if heap.len() < heap_size {
+            heap.push(HeapItem {
                 key: key.to_string(),
                 value,
             });
-        } else if let Some(min_item) = self
-            .topk_heap
-            .iter_mut()
-            .min_by(|a, b| a.value.partial_cmp(&b.value).unwrap())
-        {
+        } else if let Some(min_item) = heap.iter_mut().min_by(|a, b| {
+            a.value
+                .partial_cmp(&b.value)
+                .unwrap_or(std::cmp::Ordering::Equal)
+        }) {
             if value > min_item.value {
                 *min_item = HeapItem {
                     key: key.to_string(),
@@ -99,14 +250,19 @@ impl CountMinSketchWithHeap {
     }
 
     pub fn query_key(&self, key: &str) -> f64 {
-        let key_bytes = key.as_bytes();
-        let mut min_value = f64::MAX;
-        for i in 0..self.row_num {
-            let hash_value = xxh32(key_bytes, i as u32);
-            let col_index = (hash_value as usize) % self.col_num;
-            min_value = min_value.min(self.sketch[i][col_index]);
+        match &self.backend {
+            CountMinWithHeapBackend::Legacy { sketch, .. } => {
+                let key_bytes = key.as_bytes();
+                let mut min_value = f64::MAX;
+                for (i, row) in sketch.iter().enumerate().take(self.row_num) {
+                    let hash_value = xxh32(key_bytes, i as u32);
+                    let col_index = (hash_value as usize) % self.col_num;
+                    min_value = min_value.min(row[col_index]);
+                }
+                min_value
+            }
+            CountMinWithHeapBackend::Sketchlib(cms_heap) => sketchlib_cms_heap_query(cms_heap, key),
         }
-        min_value
     }
 
     pub fn merge(
@@ -120,7 +276,6 @@ impl CountMinSketchWithHeap {
             return Ok(accumulators.into_iter().next().unwrap());
         }
 
-        // Check that all accumulators have the same dimensions
         let row_num = accumulators[0].row_num;
         let col_num = accumulators[0].col_num;
 
@@ -133,75 +288,142 @@ impl CountMinSketchWithHeap {
             }
         }
 
-        // Merge the Count-Min Sketch tables element-wise
-        let mut merged_sketch = vec![vec![0.0; col_num]; row_num];
-        for acc in &accumulators {
-            for (i, row) in merged_sketch.iter_mut().enumerate() {
-                for (j, cell) in row.iter_mut().enumerate() {
-                    *cell += acc.sketch[i][j];
-                }
-            }
-        }
-
-        // Find the minimum heap size across all accumulators
         let min_heap_size = accumulators
             .iter()
             .map(|acc| acc.heap_size)
             .min()
             .unwrap_or(0);
 
-        // Enumerate all unique keys from all heaps
         let mut all_keys: HashSet<String> = HashSet::new();
         for acc in &accumulators {
-            for item in &acc.topk_heap {
-                all_keys.insert(item.key.clone());
+            for item in acc.topk_heap_items() {
+                all_keys.insert(item.key);
             }
         }
 
-        // Create a temporary merged accumulator to query frequencies
-        let temp_merged = CountMinSketchWithHeap {
-            sketch: merged_sketch.clone(),
-            row_num,
-            col_num,
-            topk_heap: Vec::new(),
-            heap_size: min_heap_size,
-        };
+        match &accumulators[0].backend {
+            CountMinWithHeapBackend::Sketchlib(_) => {
+                let mut sketchlib_cms_heaps: Vec<SketchlibCMSHeap> =
+                    Vec::with_capacity(accumulators.len());
+                for acc in accumulators {
+                    let (sketch, heap) = match &acc.backend {
+                        CountMinWithHeapBackend::Legacy { sketch, heap } => {
+                            (sketch.clone(), heap.clone())
+                        }
+                        CountMinWithHeapBackend::Sketchlib(cms_heap) => (
+                            matrix_from_sketchlib_cms_heap(cms_heap),
+                            heap_to_wire(cms_heap)
+                                .into_iter()
+                                .map(|w| HeapItem {
+                                    key: w.key,
+                                    value: w.value,
+                                })
+                                .collect(),
+                        ),
+                    };
+                    let wire_heap: Vec<WireHeapItem> = heap
+                        .iter()
+                        .map(|h| WireHeapItem {
+                            key: h.key.clone(),
+                            value: h.value,
+                        })
+                        .collect();
+                    sketchlib_cms_heaps.push(sketchlib_cms_heap_from_matrix_and_heap(
+                        acc.row_num,
+                        acc.col_num,
+                        acc.heap_size,
+                        &sketch,
+                        &wire_heap,
+                    ));
+                }
 
-        // Query the merged CMS for each key and build heap items
-        let mut heap_items: Vec<HeapItem> = all_keys
-            .into_iter()
-            .map(|key_str| {
-                let frequency = temp_merged.query_key(&key_str);
-                HeapItem {
-                    key: key_str,
-                    value: frequency,
+                let merged_sketchlib = sketchlib_cms_heaps
+                    .into_iter()
+                    .reduce(|mut lhs, rhs| {
+                        lhs.merge(&rhs);
+                        lhs
+                    })
+                    .ok_or("No accumulators to merge")?;
+
+                let _merged_sketch = matrix_from_sketchlib_cms_heap(&merged_sketchlib);
+                let _heap_items: Vec<HeapItem> = heap_to_wire(&merged_sketchlib)
+                    .into_iter()
+                    .map(|w| HeapItem {
+                        key: w.key,
+                        value: w.value,
+                    })
+                    .collect();
+
+                Ok(CountMinSketchWithHeap {
+                    row_num,
+                    col_num,
+                    heap_size: min_heap_size,
+                    backend: CountMinWithHeapBackend::Sketchlib(merged_sketchlib),
+                })
+            }
+            CountMinWithHeapBackend::Legacy { .. } => {
+                let mut merged_sketch = vec![vec![0.0; col_num]; row_num];
+                for acc in &accumulators {
+                    let sketch = match &acc.backend {
+                        CountMinWithHeapBackend::Legacy { sketch, .. } => sketch,
+                        CountMinWithHeapBackend::Sketchlib(_) => {
+                            return Err(
+                                "Cannot mix Legacy and Sketchlib backends when merging".into()
+                            );
+                        }
+                    };
+                    for (i, row) in merged_sketch.iter_mut().enumerate() {
+                        for (j, cell) in row.iter_mut().enumerate() {
+                            *cell += sketch[i][j];
+                        }
+                    }
                 }
-            })
-            .collect();
 
-        // Sort by frequency (descending) and take top min_heap_size items
-        heap_items.sort_by(|a, b| b.value.partial_cmp(&a.value).unwrap());
-        heap_items.truncate(min_heap_size);
+                let temp_merged = Self::from_legacy_matrix(
+                    merged_sketch.clone(),
+                    Vec::new(),
+                    row_num,
+                    col_num,
+                    min_heap_size,
+                );
 
-        Ok(CountMinSketchWithHeap {
-            sketch: merged_sketch,
-            row_num,
-            col_num,
-            topk_heap: heap_items,
-            heap_size: min_heap_size,
-        })
+                let mut heap_items: Vec<HeapItem> = all_keys
+                    .into_iter()
+                    .map(|key_str| {
+                        let frequency = temp_merged.query_key(&key_str);
+                        HeapItem {
+                            key: key_str,
+                            value: frequency,
+                        }
+                    })
+                    .collect();
+
+                heap_items.sort_by(|a, b| b.value.partial_cmp(&a.value).unwrap());
+                heap_items.truncate(min_heap_size);
+
+                Ok(CountMinSketchWithHeap {
+                    row_num,
+                    col_num,
+                    heap_size: min_heap_size,
+                    backend: CountMinWithHeapBackend::Legacy {
+                        sketch: merged_sketch,
+                        heap: heap_items,
+                    },
+                })
+            }
+        }
     }
 
-    /// Serialize to MessagePack — matches the Arroyo UDF wire format exactly.
     pub fn serialize_msgpack(&self) -> Vec<u8> {
-        // Match Arroyo UDF: serialize with nested MessagePack format
+        let (sketch, topk_heap) = (self.sketch_matrix(), self.topk_heap_items());
+
         let serialized = CountMinSketchWithHeapSerialized {
             sketch: CmsData {
-                sketch: self.sketch.clone(),
+                sketch,
                 row_num: self.row_num,
                 col_num: self.col_num,
             },
-            topk_heap: self.topk_heap.clone(),
+            topk_heap,
             heap_size: self.heap_size,
         };
 
@@ -212,28 +434,45 @@ impl CountMinSketchWithHeap {
         buf
     }
 
-    /// Deserialize from MessagePack produced by the Arroyo UDF.
     pub fn deserialize_msgpack(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
         let serialized: CountMinSketchWithHeapSerialized =
             rmp_serde::from_slice(buffer).map_err(|e| {
                 format!("Failed to deserialize CountMinSketchWithHeap from MessagePack: {e}")
             })?;
 
-        // Sort the topk_heap by value from largest to smallest
         let mut sorted_topk_heap = serialized.topk_heap;
-        // We must sort here since the vectorized heap does not guarantee order.
         sorted_topk_heap.sort_by(|a, b| b.value.partial_cmp(&a.value).unwrap());
 
+        let backend = if use_sketchlib_for_count_min_with_heap() {
+            let wire_heap: Vec<WireHeapItem> = sorted_topk_heap
+                .iter()
+                .map(|h| WireHeapItem {
+                    key: h.key.clone(),
+                    value: h.value,
+                })
+                .collect();
+            CountMinWithHeapBackend::Sketchlib(sketchlib_cms_heap_from_matrix_and_heap(
+                serialized.sketch.row_num,
+                serialized.sketch.col_num,
+                serialized.heap_size,
+                &serialized.sketch.sketch,
+                &wire_heap,
+            ))
+        } else {
+            CountMinWithHeapBackend::Legacy {
+                sketch: serialized.sketch.sketch,
+                heap: sorted_topk_heap,
+            }
+        };
+
         Ok(Self {
-            sketch: serialized.sketch.sketch,
             row_num: serialized.sketch.row_num,
             col_num: serialized.sketch.col_num,
-            topk_heap: sorted_topk_heap,
             heap_size: serialized.heap_size,
+            backend,
         })
     }
 
-    /// One-shot aggregation for the Arroyo UDAF call pattern.
     pub fn aggregate_topk(
         row_num: usize,
         col_num: usize,
@@ -262,9 +501,9 @@ mod tests {
         assert_eq!(cms.row_num, 4);
         assert_eq!(cms.col_num, 1000);
         assert_eq!(cms.heap_size, 20);
-        assert_eq!(cms.sketch.len(), 4);
-        assert_eq!(cms.sketch[0].len(), 1000);
-        assert_eq!(cms.topk_heap.len(), 0);
+        assert_eq!(cms.sketch_matrix().len(), 4);
+        assert_eq!(cms.sketch_matrix()[0].len(), 1000);
+        assert_eq!(cms.topk_heap_items().len(), 0);
     }
 
     #[test]
@@ -278,34 +517,41 @@ mod tests {
         let mut cms1 = CountMinSketchWithHeap::new(2, 10, 5);
         let mut cms2 = CountMinSketchWithHeap::new(2, 10, 3);
 
-        cms1.sketch[0][0] = 10.0;
-        cms1.sketch[1][1] = 20.0;
-        cms2.sketch[0][0] = 5.0;
-        cms2.sketch[1][1] = 15.0;
-
-        cms1.topk_heap.push(HeapItem {
-            key: "key1".to_string(),
-            value: 100.0,
-        });
-        cms1.topk_heap.push(HeapItem {
-            key: "key2".to_string(),
-            value: 50.0,
-        });
-        cms2.topk_heap.push(HeapItem {
-            key: "key3".to_string(),
-            value: 75.0,
-        });
-        cms2.topk_heap.push(HeapItem {
-            key: "key1".to_string(),
-            value: 80.0,
-        });
+        if let Some(sketch) = cms1.sketch_mut() {
+            sketch[0][0] = 10.0;
+            sketch[1][1] = 20.0;
+        }
+        if let Some(sketch) = cms2.sketch_mut() {
+            sketch[0][0] = 5.0;
+            sketch[1][1] = 15.0;
+        }
+        if let CountMinWithHeapBackend::Legacy { heap, .. } = &mut cms1.backend {
+            heap.push(HeapItem {
+                key: "key1".to_string(),
+                value: 100.0,
+            });
+            heap.push(HeapItem {
+                key: "key2".to_string(),
+                value: 50.0,
+            });
+        }
+        if let CountMinWithHeapBackend::Legacy { heap, .. } = &mut cms2.backend {
+            heap.push(HeapItem {
+                key: "key3".to_string(),
+                value: 75.0,
+            });
+            heap.push(HeapItem {
+                key: "key1".to_string(),
+                value: 80.0,
+            });
+        }
 
         let merged = CountMinSketchWithHeap::merge(vec![cms1, cms2]).unwrap();
 
-        assert_eq!(merged.sketch[0][0], 15.0); // 10 + 5
-        assert_eq!(merged.sketch[1][1], 35.0); // 20 + 15
-        assert_eq!(merged.heap_size, 3); // min(5, 3)
-        assert!(merged.topk_heap.len() <= 3);
+        assert_eq!(merged.sketch_matrix()[0][0], 15.0);
+        assert_eq!(merged.sketch_matrix()[1][1], 35.0);
+        assert_eq!(merged.heap_size, 3);
+        assert!(merged.topk_heap_items().len() <= 3);
     }
 
     #[test]
@@ -317,25 +563,21 @@ mod tests {
 
     #[test]
     fn test_msgpack_round_trip() {
-        let mut cms = CountMinSketchWithHeap::new(2, 3, 5);
-        cms.sketch[0][1] = 42.0;
-        cms.sketch[1][2] = 100.0;
-        cms.topk_heap.push(HeapItem {
-            key: "test_key".to_string(),
-            value: 99.0,
-        });
+        let mut cms = CountMinSketchWithHeap::new(4, 128, 3);
+        cms.update("hot", 100.0);
+        cms.update("cold", 1.0);
 
         let bytes = cms.serialize_msgpack();
         let deserialized = CountMinSketchWithHeap::deserialize_msgpack(&bytes).unwrap();
 
-        assert_eq!(deserialized.row_num, 2);
-        assert_eq!(deserialized.col_num, 3);
-        assert_eq!(deserialized.heap_size, 5);
-        assert_eq!(deserialized.sketch[0][1], 42.0);
-        assert_eq!(deserialized.sketch[1][2], 100.0);
-        assert_eq!(deserialized.topk_heap.len(), 1);
-        assert_eq!(deserialized.topk_heap[0].key, "test_key");
-        assert_eq!(deserialized.topk_heap[0].value, 99.0);
+        assert_eq!(deserialized.row_num, 4);
+        assert_eq!(deserialized.col_num, 128);
+        assert_eq!(deserialized.heap_size, 3);
+        assert!(!deserialized.topk_heap_items().is_empty());
+        assert_eq!(deserialized.topk_heap_items()[0].key, "hot");
+        assert!(deserialized.topk_heap_items()[0].value >= 100.0);
+        assert!(deserialized.query_key("hot") >= 100.0);
+        assert!(deserialized.query_key("cold") >= 1.0);
     }
 
     #[test]
@@ -345,7 +587,7 @@ mod tests {
         let bytes = CountMinSketchWithHeap::aggregate_topk(4, 100, 2, &keys, &values).unwrap();
         let cms = CountMinSketchWithHeap::deserialize_msgpack(&bytes).unwrap();
         assert_eq!(cms.heap_size, 2);
-        assert!(cms.topk_heap.len() <= 2);
+        assert!(cms.topk_heap_items().len() <= 2);
     }
 
     #[test]
diff --git a/asap-common/sketch-core/src/count_min_with_heap_sketchlib.rs b/asap-common/sketch-core/src/count_min_with_heap_sketchlib.rs
new file mode 100644
index 0000000..2328bbc
--- /dev/null
+++ b/asap-common/sketch-core/src/count_min_with_heap_sketchlib.rs
@@ -0,0 +1,109 @@
+//! Sketchlib-rust CMSHeap integration for CountMinSketchWithHeap.
+//!
+//! Uses CMSHeap (CountMin + HHHeap) from sketchlib-rust instead of CountMin + local heap,
+//! providing automatic top-k tracking during insert and merge.
+
+use sketchlib_rust::RegularPath;
+use sketchlib_rust::{CMSHeap, SketchInput, Vector2D};
+
+/// Wire-format heap item (key, value) to avoid circular dependency with count_min_with_heap.
+pub struct WireHeapItem {
+    pub key: String,
+    pub value: f64,
+}
+
+/// Concrete Count-Min-with-Heap type from sketchlib-rust (CMS + HHHeap).
+pub type SketchlibCMSHeap = CMSHeap<Vector2D<i64>, RegularPath>;
+
+/// Creates a fresh CMSHeap with the given dimensions and heap capacity.
+pub fn new_sketchlib_cms_heap(
+    row_num: usize,
+    col_num: usize,
+    heap_size: usize,
+) -> SketchlibCMSHeap {
+    CMSHeap::new(row_num, col_num, heap_size)
+}
+
+/// Builds a CMSHeap from an existing sketch matrix and optional heap items.
+/// Used when deserializing or when ensuring sketchlib from legacy state.
+pub fn sketchlib_cms_heap_from_matrix_and_heap(
+    row_num: usize,
+    col_num: usize,
+    heap_size: usize,
+    sketch: &[Vec<f64>],
+    topk_heap: &[WireHeapItem],
+) -> SketchlibCMSHeap {
+    let matrix = Vector2D::from_fn(row_num, col_num, |r, c| {
+        sketch
+            .get(r)
+            .and_then(|row| row.get(c))
+            .copied()
+            .unwrap_or(0.0)
+            .round() as i64
+    });
+    let mut cms_heap = CMSHeap::from_storage(matrix, heap_size);
+
+    // Populate the heap from wire-format topk_heap
+    for item in topk_heap {
+        let count = item.value.round() as i64;
+        if count > 0 {
+            let input = SketchInput::Str(&item.key);
+            cms_heap.heap_mut().update(&input, count);
+        }
+    }
+
+    cms_heap
+}
+
+/// Converts a CMSHeap's storage into the legacy `Vec<Vec<f64>>` matrix.
+pub fn matrix_from_sketchlib_cms_heap(cms_heap: &SketchlibCMSHeap) -> Vec<Vec<f64>> {
+    let storage = cms_heap.cms().as_storage();
+    let rows = storage.rows();
+    let cols = storage.cols();
+    let mut sketch = vec![vec![0.0; cols]; rows];
+
+    for (r, row) in sketch.iter_mut().enumerate().take(rows) {
+        for (c, cell) in row.iter_mut().enumerate().take(cols) {
+            if let Some(v) = storage.get(r, c) {
+                *cell = *v as f64;
+            }
+        }
+    }
+
+    sketch
+}
+
+/// Converts sketchlib HHHeap items to wire-format (key, value) pairs.
+pub fn heap_to_wire(cms_heap: &SketchlibCMSHeap) -> Vec<WireHeapItem> {
+    cms_heap
+        .heap()
+        .heap()
+        .iter()
+        .map(|hh_item| {
+            let key = match &hh_item.key {
+                sketchlib_rust::HeapItem::String(s) => s.clone(),
+                other => format!("{:?}", other),
+            };
+            WireHeapItem {
+                key,
+                value: hh_item.count as f64,
+            }
+        })
+        .collect()
+}
+
+/// Updates a CMSHeap with a weighted key. Automatically updates the heap.
+pub fn sketchlib_cms_heap_update(cms_heap: &mut SketchlibCMSHeap, key: &str, value: f64) {
+    let many = value.round() as i64;
+    if many <= 0 {
+        return;
+    }
+    let input = SketchInput::String(key.to_owned());
+    cms_heap.insert_many(&input, many);
+}
+
+/// Queries a CMSHeap for a key's frequency estimate.
+pub fn sketchlib_cms_heap_query(cms_heap: &SketchlibCMSHeap, key: &str) -> f64 {
+    let input = SketchInput::String(key.to_owned());
+    cms_heap.estimate(&input) as f64
+}
diff --git a/asap-common/sketch-core/src/kll.rs b/asap-common/sketch-core/src/kll.rs
index c31f0cf..1628744 100644
--- a/asap-common/sketch-core/src/kll.rs
+++ b/asap-common/sketch-core/src/kll.rs
@@ -16,6 +16,12 @@ use core::panic;
 use dsrs::KllDoubleSketch;
 use serde::{Deserialize, Serialize};
 
+use crate::config::use_sketchlib_for_kll;
+use crate::kll_sketchlib::{
+    bytes_from_sketchlib_kll, new_sketchlib_kll, sketchlib_kll_from_bytes, sketchlib_kll_merge,
+    sketchlib_kll_quantile, sketchlib_kll_update, SketchlibKll,
+};
+
 /// Wire format used in MessagePack serialization (matches Arroyo UDF output).
 #[derive(Deserialize, Serialize)]
 pub struct KllSketchData {
@@ -23,28 +29,84 @@ pub struct KllSketchData {
     pub sketch_bytes: Vec<u8>,
 }
 
+/// Backend implementation for KLL Sketch. Only one is active at a time.
+pub enum KllBackend {
+    /// dsrs (DataSketches) implementation.
+    Legacy(KllDoubleSketch),
+    /// sketchlib-rust backed implementation.
+    Sketchlib(SketchlibKll),
+}
+
+impl std::fmt::Debug for KllBackend {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            KllBackend::Legacy(_) => write!(f, "Legacy(..)"),
+            KllBackend::Sketchlib(_) => write!(f, "Sketchlib(..)"),
+        }
+    }
+}
+
+impl Clone for KllBackend {
+    fn clone(&self) -> Self {
+        match self {
+            KllBackend::Legacy(s) => {
+                if s.get_n() == 0 {
+                    KllBackend::Legacy(KllDoubleSketch::with_k(200)) // k will be overwritten by KllSketch
+                } else {
+                    let bytes = s.serialize();
+                    KllBackend::Legacy(KllDoubleSketch::deserialize(bytes.as_ref()).unwrap())
+                }
+            }
+            KllBackend::Sketchlib(s) => KllBackend::Sketchlib(s.clone()),
+        }
+    }
+}
+
 pub struct KllSketch {
     pub k: u16,
-    pub sketch: KllDoubleSketch,
+    pub backend: KllBackend,
 }
 
 impl KllSketch {
     pub fn new(k: u16) -> Self {
-        Self {
-            k,
-            sketch: KllDoubleSketch::with_k(k),
+        let backend = if use_sketchlib_for_kll() {
+            KllBackend::Sketchlib(new_sketchlib_kll(k))
+        } else {
+            KllBackend::Legacy(KllDoubleSketch::with_k(k))
+        };
+        Self { k, backend }
+    }
+
+    /// Returns the raw sketch bytes (for JSON serialization, etc.).
+    pub fn sketch_bytes(&self) -> Vec<u8> {
+        match &self.backend {
+            KllBackend::Legacy(s) => s.serialize().as_ref().to_vec(),
+            KllBackend::Sketchlib(s) => bytes_from_sketchlib_kll(s),
         }
     }
 
     pub fn update(&mut self, value: f64) {
-        self.sketch.update(value);
+        match &mut self.backend {
+            KllBackend::Legacy(s) => s.update(value),
+            KllBackend::Sketchlib(s) => sketchlib_kll_update(s, value),
+        }
+    }
+
+    pub fn count(&self) -> u64 {
+        match &self.backend {
+            KllBackend::Legacy(s) => s.get_n(),
+            KllBackend::Sketchlib(s) => s.count() as u64,
+        }
     }
 
     pub fn get_quantile(&self, quantile: f64) -> f64 {
-        if self.sketch.get_n() == 0 {
+        if self.count() == 0 {
             return 0.0;
         }
-        self.sketch.get_quantile(quantile)
+        match &self.backend {
+            KllBackend::Legacy(s) => s.get_quantile(quantile),
+            KllBackend::Sketchlib(s) => sketchlib_kll_quantile(s, quantile),
+        }
     }
 
     pub fn merge(
@@ -54,7 +116,6 @@ impl KllSketch {
             return Err("No accumulators to merge".into());
         }
 
-        // check K values for all and merge
         let k = accumulators[0].k;
         for acc in &accumulators {
             if acc.k != k {
@@ -63,8 +124,25 @@ impl KllSketch {
         }
 
         let mut merged = KllSketch::new(k);
-        for accumulator in accumulators {
-            merged.sketch.merge(&accumulator.sketch);
+        match &mut merged.backend {
+            KllBackend::Legacy(merged_legacy) => {
+                for acc in accumulators {
+                    if let KllBackend::Legacy(acc_legacy) = acc.backend {
+                        merged_legacy.merge(&acc_legacy);
+                    } else {
+                        return Err("Cannot merge Legacy with Sketchlib KLL".into());
+                    }
+                }
+            }
+            KllBackend::Sketchlib(merged_sketchlib) => {
+                for acc in accumulators {
+                    if let KllBackend::Sketchlib(acc_sketchlib) = &acc.backend {
+                        sketchlib_kll_merge(merged_sketchlib, acc_sketchlib);
+                    } else {
+                        return Err("Cannot merge Sketchlib with Legacy KLL".into());
+                    }
+                }
+            }
         }
 
         Ok(merged)
@@ -72,12 +150,10 @@ impl KllSketch {
 
     /// Serialize to MessagePack — matches the Arroyo UDF wire format exactly.
     pub fn serialize_msgpack(&self) -> Vec<u8> {
-        // Create KllSketchData compatible with deserialize_msgpack()
-        // This matches exactly what the Arroyo UDF does
-        let sketch_data = self.sketch.serialize();
+        let sketch_bytes = self.sketch_bytes();
         let serialized = KllSketchData {
             k: self.k,
-            sketch_bytes: sketch_data.as_ref().to_vec(),
+            sketch_bytes,
         };
 
         let mut buf = Vec::new();
@@ -91,21 +167,22 @@ impl KllSketch {
 
     /// Deserialize from MessagePack produced by the Arroyo UDF.
     pub fn deserialize_msgpack(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
-        let deserialized_sketch_data: KllSketchData = rmp_serde::from_slice(buffer)
+        let wire: KllSketchData = rmp_serde::from_slice(buffer)
             .map_err(|e| format!("Failed to deserialize KllSketchData from MessagePack: {e}"))?;
 
-        let sketch: KllDoubleSketch =
-            KllDoubleSketch::deserialize(&deserialized_sketch_data.sketch_bytes)
-                .map_err(|e| format!("Failed to deserialize KLL sketch: {e}"))?;
+        let backend = if use_sketchlib_for_kll() {
+            KllBackend::Sketchlib(sketchlib_kll_from_bytes(&wire.sketch_bytes)?)
+        } else {
+            KllBackend::Legacy(
+                KllDoubleSketch::deserialize(&wire.sketch_bytes)
+                    .map_err(|e| format!("Failed to deserialize KLL sketch: {e}"))?,
+            )
+        };
 
-        Ok(Self {
-            k: deserialized_sketch_data.k,
-            sketch,
-        })
+        Ok(Self { k: wire.k, backend })
     }
 
-    /// Merge from references without cloning — possible because KllDoubleSketch::merge
-    /// takes &other (the underlying C++ merge API is borrow-based).
+    /// Merge from references without cloning.
     pub fn merge_refs(
         sketches: &[&Self],
     ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
@@ -119,18 +196,37 @@ impl KllSketch {
             }
         }
         let mut merged = Self::new(k);
-        for s in sketches {
-            merged.sketch.merge(&s.sketch);
+        match &mut merged.backend {
+            KllBackend::Legacy(merged_legacy) => {
+                for s in sketches {
+                    if let KllBackend::Legacy(s_legacy) = &s.backend {
+                        merged_legacy.merge(s_legacy);
+                    } else {
+                        return Err("Cannot merge Legacy with Sketchlib KLL".into());
+                    }
+                }
+            }
+            KllBackend::Sketchlib(merged_sketchlib) => {
+                for s in sketches {
+                    if let KllBackend::Sketchlib(s_sketchlib) = &s.backend {
+                        sketchlib_kll_merge(merged_sketchlib, s_sketchlib);
+                    } else {
+                        return Err("Cannot merge Sketchlib with Legacy KLL".into());
+                    }
+                }
+            }
         }
         Ok(merged)
     }
 
     /// Deserialize from a raw datasketches byte buffer (legacy Flink/FlinkSketch format).
-    /// Used by QE's legacy deserializers to avoid a direct dsrs dependency there.
     pub fn from_dsrs_bytes(bytes: &[u8], k: u16) -> Result<Self, Box<dyn std::error::Error>> {
         let sketch = KllDoubleSketch::deserialize(bytes)
             .map_err(|e| format!("Failed to deserialize KLL sketch from dsrs bytes: {e}"))?;
-        Ok(Self { k, sketch })
+        Ok(Self {
+            k,
+            backend: KllBackend::Legacy(sketch),
+        })
     }
 
     /// One-shot aggregation for the Arroyo UDAF call pattern.
@@ -146,15 +242,25 @@ impl KllSketch {
     }
 }
 
-// Manual trait implementations since the C++ library doesn't provide them
+// Manual trait implementations since the C++ and sketchlib types don't provide Clone
 impl Clone for KllSketch {
     fn clone(&self) -> Self {
-        let bytes = self.sketch.serialize();
-        let new_sketch = KllDoubleSketch::deserialize(bytes.as_ref()).unwrap();
-        Self {
-            k: self.k,
-            sketch: new_sketch,
-        }
+        let backend = match &self.backend {
+            KllBackend::Legacy(sketch) => {
+                let new_sketch = if sketch.get_n() == 0 {
+                    KllDoubleSketch::with_k(self.k)
+                } else {
+                    let bytes = sketch.serialize();
+                    KllDoubleSketch::deserialize(bytes.as_ref()).unwrap()
+                };
+                KllBackend::Legacy(new_sketch)
+            }
+            KllBackend::Sketchlib(s) => {
+                let bytes = bytes_from_sketchlib_kll(s);
+                KllBackend::Sketchlib(sketchlib_kll_from_bytes(&bytes).unwrap())
+            }
+        };
+        Self { k: self.k, backend }
     }
 }
 
@@ -162,7 +268,7 @@ impl std::fmt::Debug for KllSketch {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         f.debug_struct("KllSketch")
             .field("k", &self.k)
-            .field("sketch_n", &self.sketch.get_n())
+            .field("sketch_n", &self.count())
             .finish()
     }
 }
@@ -181,7 +287,7 @@ mod tests {
     #[test]
     fn test_kll_creation() {
         let kll = KllSketch::new(200);
-        assert!(kll.sketch.get_n() == 0);
+        assert_eq!(kll.count(), 0);
         assert_eq!(kll.k, 200);
     }
 
@@ -191,7 +297,7 @@ mod tests {
         kll.update(10.0);
         kll.update(20.0);
         kll.update(15.0);
-        assert_eq!(kll.sketch.get_n(), 3);
+        assert_eq!(kll.count(), 3);
     }
 
     #[test]
@@ -202,7 +308,11 @@ mod tests {
         }
         assert_eq!(kll.get_quantile(0.0), 1.0);
         assert_eq!(kll.get_quantile(1.0), 10.0);
-        assert_eq!(kll.get_quantile(0.5), 6.0);
+        let median = kll.get_quantile(0.5);
+        assert!(
+            (5.0..=6.0).contains(&median),
+            "median should be between 5 and 6; got {median}"
+        );
     }
 
     #[test]
@@ -218,7 +328,7 @@ mod tests {
         }
 
         let merged = KllSketch::merge(vec![kll1, kll2]).unwrap();
-        assert_eq!(merged.sketch.get_n(), 10);
+        assert_eq!(merged.count(), 10);
         assert_eq!(merged.get_quantile(0.0), 1.0);
         assert_eq!(merged.get_quantile(1.0), 10.0);
     }
@@ -234,7 +344,7 @@ mod tests {
         let deserialized = KllSketch::deserialize_msgpack(&bytes).unwrap();
 
         assert_eq!(deserialized.k, 200);
-        assert_eq!(deserialized.sketch.get_n(), 5);
+        assert_eq!(deserialized.count(), 5);
         assert_eq!(deserialized.get_quantile(0.0), 1.0);
         assert_eq!(deserialized.get_quantile(1.0), 5.0);
     }
@@ -244,7 +354,7 @@ mod tests {
         let values = [1.0, 2.0, 3.0, 4.0, 5.0];
         let bytes = KllSketch::aggregate_kll(200, &values).unwrap();
         let kll = KllSketch::deserialize_msgpack(&bytes).unwrap();
-        assert_eq!(kll.sketch.get_n(), 5);
+        assert_eq!(kll.count(), 5);
         assert_eq!(kll.get_quantile(0.0), 1.0);
         assert_eq!(kll.get_quantile(1.0), 5.0);
     }
diff --git a/asap-common/sketch-core/src/kll_sketchlib.rs b/asap-common/sketch-core/src/kll_sketchlib.rs
new file mode 100644
index 0000000..96c03ab
--- /dev/null
+++ b/asap-common/sketch-core/src/kll_sketchlib.rs
@@ -0,0 +1,36 @@
+use sketchlib_rust::{SketchInput, KLL};
+
+/// Concrete KLL type from sketchlib-rust when sketchlib backend is enabled.
+pub type SketchlibKll = KLL;
+
+/// Creates a fresh sketchlib KLL sketch with the requested accuracy parameter `k`.
+pub fn new_sketchlib_kll(k: u16) -> SketchlibKll {
+    KLL::init_kll(k as i32)
+}
+
+/// Updates a sketchlib KLL with one numeric observation.
+pub fn sketchlib_kll_update(inner: &mut SketchlibKll, value: f64) {
+    // KLL accepts only numeric inputs. We intentionally ignore the error here because `value`
+    // is always numeric.
+    let _ = inner.update(&SketchInput::F64(value));
+}
+
+/// Queries a sketchlib KLL for the value at the requested quantile.
+pub fn sketchlib_kll_quantile(inner: &SketchlibKll, q: f64) -> f64 {
+    inner.quantile(q)
+}
+
+/// Merges `src` into `dst`.
+pub fn sketchlib_kll_merge(dst: &mut SketchlibKll, src: &SketchlibKll) {
+    dst.merge(src);
+}
+
+/// Serializes a sketchlib KLL into MessagePack bytes.
+pub fn bytes_from_sketchlib_kll(inner: &SketchlibKll) -> Vec<u8> {
+    inner.serialize_to_bytes().unwrap()
+}
+
+/// Deserializes a sketchlib KLL from MessagePack bytes.
+pub fn sketchlib_kll_from_bytes(bytes: &[u8]) -> Result<SketchlibKll, Box<dyn std::error::Error>> {
+    Ok(KLL::deserialize_from_bytes(bytes)?)
+}
diff --git a/asap-common/sketch-core/src/lib.rs b/asap-common/sketch-core/src/lib.rs
index 461d43e..71f299d 100644
--- a/asap-common/sketch-core/src/lib.rs
+++ b/asap-common/sketch-core/src/lib.rs
@@ -1,6 +1,18 @@
+// Force legacy sketch implementations during tests so that tests that mutate the
+// matrix directly or rely on legacy behavior pass.
+#[cfg(test)]
+#[ctor::ctor]
+fn init_sketch_legacy_for_tests() {
+    crate::config::force_legacy_mode_for_tests();
+}
+
+pub mod config;
 pub mod count_min;
+pub mod count_min_sketchlib;
 pub mod count_min_with_heap;
+pub mod count_min_with_heap_sketchlib;
 pub mod delta_set_aggregator;
 pub mod hydra_kll;
 pub mod kll;
+pub mod kll_sketchlib;
 pub mod set_aggregator;
diff --git a/asap-query-engine/Cargo.toml b/asap-query-engine/Cargo.toml
index 2e2ccf4..8723bb9 100644
--- a/asap-query-engine/Cargo.toml
+++ b/asap-query-engine/Cargo.toml
@@ -58,9 +58,6 @@ zstd = "0.13"
 reqwest = { version = "0.11", features = ["json"] }
 tracing-appender = "0.2"
 
-[dev-dependencies]
-tempfile = "3.20.0"
-
 [features]
 #default = ["lock_profiling", "extra_debugging"]
 default = []
@@ -68,3 +65,8 @@ default = []
 lock_profiling = []
 # Enable extra debugging output
 extra_debugging = []
+sketchlib-tests = []
+
+[dev-dependencies]
+ctor = "0.2"
+tempfile = "3.20.0"
diff --git a/asap-query-engine/src/lib.rs b/asap-query-engine/src/lib.rs
index 22295ed..7e59fff 100644
--- a/asap-query-engine/src/lib.rs
+++ b/asap-query-engine/src/lib.rs
@@ -1,3 +1,20 @@
+// Configure sketch-core implementations during tests.
+// Use sketchlib-tests feature to choose backend: without it = Legacy, with it = Sketchlib.
+// A single `cargo test -p query_engine_rust` runs both: lib tests use Legacy, then
+// tests/test_both_backends.rs spawns the sketchlib run.
+#[cfg(test)]
+#[ctor::ctor]
+fn init_sketch_backend_for_tests() {
+    #[cfg(feature = "sketchlib-tests")]
+    let _ = sketch_core::config::configure(
+        sketch_core::config::ImplMode::Sketchlib,
+        sketch_core::config::ImplMode::Sketchlib,
+        sketch_core::config::ImplMode::Sketchlib,
+    );
+    #[cfg(not(feature = "sketchlib-tests"))]
+    sketch_core::config::force_legacy_mode_for_tests();
+}
+
 pub mod data_model;
 pub mod drivers;
 pub mod engines;
diff --git a/asap-query-engine/src/main.rs b/asap-query-engine/src/main.rs
index a950fba..f0752c3 100644
--- a/asap-query-engine/src/main.rs
+++ b/asap-query-engine/src/main.rs
@@ -5,6 +5,8 @@ use std::sync::Arc;
 use tokio::signal;
 use tracing::{error, info};
 
+use sketch_core::config::{self, ImplMode};
+
 use query_engine_rust::data_model::enums::{InputFormat, LockStrategy, StreamingEngine};
 use query_engine_rust::drivers::AdapterConfig;
 use query_engine_rust::utils::file_io::{read_inference_config, read_streaming_config};
@@ -108,6 +110,18 @@ struct Args {
     #[arg(long)]
     promsketch_config: Option<String>,
 
+    /// Backend implementation for Count-Min Sketch (legacy | sketchlib)
+    #[arg(long, value_enum, default_value = "sketchlib")]
+    sketch_cms_impl: ImplMode,
+
+    /// Backend implementation for KLL Sketch (legacy | sketchlib)
+    #[arg(long, value_enum, default_value = "sketchlib")]
+    sketch_kll_impl: ImplMode,
+
+    /// Backend implementation for Count-Min-With-Heap (legacy | sketchlib)
+    #[arg(long, value_enum, default_value = "sketchlib")]
+    sketch_cmwh_impl: ImplMode,
+
     /// Enable OTLP metrics ingest (gRPC + HTTP)
     #[arg(long)]
     enable_otel_ingest: bool,
@@ -125,6 +139,14 @@ struct Args {
 async fn main() -> Result<()> {
     let args = Args::parse();
 
+    // Configure sketch-core backends before any sketch operations.
+    config::configure(
+        args.sketch_cms_impl,
+        args.sketch_kll_impl,
+        args.sketch_cmwh_impl,
+    )
+    .expect("sketch backend already initialised");
+
     // Create output directory
     fs::create_dir_all(&args.output_dir)?;
 
diff --git a/asap-query-engine/src/precompute_operators/count_min_sketch_accumulator.rs b/asap-query-engine/src/precompute_operators/count_min_sketch_accumulator.rs
index bba716d..e149cba 100644
--- a/asap-query-engine/src/precompute_operators/count_min_sketch_accumulator.rs
+++ b/asap-query-engine/src/precompute_operators/count_min_sketch_accumulator.rs
@@ -56,11 +56,7 @@ impl CountMinSketchAccumulator {
         }
 
         Ok(Self {
-            inner: CountMinSketch {
-                sketch,
-                row_num,
-                col_num,
-            },
+            inner: CountMinSketch::from_legacy_matrix(sketch, row_num, col_num),
         })
     }
 
@@ -111,11 +107,7 @@ impl CountMinSketchAccumulator {
         }
 
         Ok(Self {
-            inner: CountMinSketch {
-                row_num,
-                col_num,
-                sketch,
-            },
+            inner: CountMinSketch::from_legacy_matrix(sketch, row_num, col_num),
         })
     }
 
@@ -168,7 +160,7 @@ impl SerializableToSink for CountMinSketchAccumulator {
         serde_json::json!({
             "row_num": self.inner.row_num,
             "col_num": self.inner.col_num,
-            "sketch": self.inner.sketch
+            "sketch": self.inner.sketch()
         })
     }
 
@@ -261,10 +253,11 @@ mod tests {
         let cms = CountMinSketchAccumulator::new(4, 1000);
         assert_eq!(cms.inner.row_num, 4);
         assert_eq!(cms.inner.col_num, 1000);
-        assert_eq!(cms.inner.sketch.len(), 4);
-        assert_eq!(cms.inner.sketch[0].len(), 1000);
+        let sketch = cms.inner.sketch();
+        assert_eq!(sketch.len(), 4);
+        assert_eq!(sketch[0].len(), 1000);
 
-        for row in &cms.inner.sketch {
+        for row in &sketch {
             for &value in row {
                 assert_eq!(value, 0.0);
             }
@@ -292,19 +285,28 @@ mod tests {
 
     #[test]
     fn test_count_min_sketch_merge() {
-        let mut cms1 = CountMinSketchAccumulator::new(2, 3);
-        let mut cms2 = CountMinSketchAccumulator::new(2, 3);
-
-        cms1.inner.sketch[0][0] = 5.0;
-        cms1.inner.sketch[1][2] = 10.0;
-        cms2.inner.sketch[0][0] = 3.0;
-        cms2.inner.sketch[0][1] = 7.0;
+        // Build controlled state via from_legacy_matrix (works for both Legacy and Sketchlib backends).
+        let cms1 = CountMinSketchAccumulator {
+            inner: CountMinSketch::from_legacy_matrix(
+                vec![vec![5.0, 0.0, 0.0], vec![0.0, 0.0, 10.0]],
+                2,
+                3,
+            ),
+        };
+        let cms2 = CountMinSketchAccumulator {
+            inner: CountMinSketch::from_legacy_matrix(
+                vec![vec![3.0, 7.0, 0.0], vec![0.0, 0.0, 0.0]],
+                2,
+                3,
+            ),
+        };
 
         let merged = CountMinSketchAccumulator::merge_accumulators(vec![cms1, cms2]).unwrap();
 
-        assert_eq!(merged.inner.sketch[0][0], 8.0);
-        assert_eq!(merged.inner.sketch[0][1], 7.0);
-        assert_eq!(merged.inner.sketch[1][2], 10.0);
+        let merged_sketch = merged.inner.sketch();
+        assert_eq!(merged_sketch[0][0], 8.0);
+        assert_eq!(merged_sketch[0][1], 7.0);
+        assert_eq!(merged_sketch[1][2], 10.0);
     }
 
     #[test]
@@ -317,9 +319,13 @@ mod tests {
 
     #[test]
     fn test_count_min_sketch_serialization() {
-        let mut cms = CountMinSketchAccumulator::new(2, 3);
-        cms.inner.sketch[0][1] = 42.0;
-        cms.inner.sketch[1][2] = 100.0;
+        let cms = CountMinSketchAccumulator {
+            inner: CountMinSketch::from_legacy_matrix(
+                vec![vec![0.0, 42.0, 0.0], vec![0.0, 0.0, 100.0]],
+                2,
+                3,
+            ),
+        };
 
         let bytes = cms.serialize_to_bytes();
         let deserialized =
@@ -327,8 +333,9 @@ mod tests {
 
         assert_eq!(deserialized.inner.row_num, 2);
         assert_eq!(deserialized.inner.col_num, 3);
-        assert_eq!(deserialized.inner.sketch[0][1], 42.0);
-        assert_eq!(deserialized.inner.sketch[1][2], 100.0);
+        let deser_sketch = deserialized.inner.sketch();
+        assert_eq!(deser_sketch[0][1], 42.0);
+        assert_eq!(deser_sketch[1][2], 100.0);
     }
 
     #[test]
@@ -396,25 +403,38 @@ mod tests {
 
     #[test]
     fn test_count_min_sketch_merge_multiple() {
-        let mut cms1 = CountMinSketchAccumulator::new(2, 3);
-        let mut cms2 = CountMinSketchAccumulator::new(2, 3);
-        let mut cms3 = CountMinSketchAccumulator::new(2, 3);
-
-        cms1.inner.sketch[0][0] = 5.0;
-        cms1.inner.sketch[1][2] = 10.0;
-        cms2.inner.sketch[0][0] = 3.0;
-        cms2.inner.sketch[0][1] = 7.0;
-        cms3.inner.sketch[0][0] = 2.0;
-        cms3.inner.sketch[1][2] = 5.0;
+        // Build controlled state via from_legacy_matrix (works for both Legacy and Sketchlib backends).
+        let cms1 = CountMinSketchAccumulator {
+            inner: CountMinSketch::from_legacy_matrix(
+                vec![vec![5.0, 0.0, 0.0], vec![0.0, 0.0, 10.0]],
+                2,
+                3,
+            ),
+        };
+        let cms2 = CountMinSketchAccumulator {
+            inner: CountMinSketch::from_legacy_matrix(
+                vec![vec![3.0, 7.0, 0.0], vec![0.0, 0.0, 0.0]],
+                2,
+                3,
+            ),
+        };
+        let cms3 = CountMinSketchAccumulator {
+            inner: CountMinSketch::from_legacy_matrix(
+                vec![vec![2.0, 0.0, 0.0], vec![0.0, 0.0, 5.0]],
+                2,
+                3,
+            ),
+        };
 
         let boxed_accs: Vec<Box<dyn AggregateCore>> =
             vec![Box::new(cms1), Box::new(cms2), Box::new(cms3)];
 
         let merged = CountMinSketchAccumulator::merge_multiple(&boxed_accs).unwrap();
 
-        assert_eq!(merged.inner.sketch[0][0], 10.0);
-        assert_eq!(merged.inner.sketch[0][1], 7.0);
-        assert_eq!(merged.inner.sketch[1][2], 15.0);
+        let merged_sketch = merged.inner.sketch();
+        assert_eq!(merged_sketch[0][0], 10.0);
+        assert_eq!(merged_sketch[0][1], 7.0);
+        assert_eq!(merged_sketch[1][2], 15.0);
     }
 
     #[test]
diff --git a/asap-query-engine/src/precompute_operators/count_min_sketch_with_heap_accumulator.rs b/asap-query-engine/src/precompute_operators/count_min_sketch_with_heap_accumulator.rs
index 15e0ca3..1a2c827 100644
--- a/asap-query-engine/src/precompute_operators/count_min_sketch_with_heap_accumulator.rs
+++ b/asap-query-engine/src/precompute_operators/count_min_sketch_with_heap_accumulator.rs
@@ -78,13 +78,9 @@ impl CountMinSketchWithHeapAccumulator {
         }
 
         Ok(Self {
-            inner: CountMinSketchWithHeap {
-                sketch,
-                row_num,
-                col_num,
-                topk_heap,
-                heap_size,
-            },
+            inner: CountMinSketchWithHeap::from_legacy_matrix(
+                sketch, topk_heap, row_num, col_num, heap_size,
+            ),
         })
     }
 
@@ -103,7 +99,7 @@ impl CountMinSketchWithHeapAccumulator {
     /// Get all keys from the top-k heap.
     pub fn get_topk_keys(&self) -> Vec<KeyByLabelValues> {
         self.inner
-            .topk_heap
+            .topk_heap_items()
             .iter()
             .map(|item| {
                 let labels: Vec<String> = item.key.split(';').map(|s| s.to_string()).collect();
@@ -117,7 +113,7 @@ impl SerializableToSink for CountMinSketchWithHeapAccumulator {
     fn serialize_to_json(&self) -> Value {
         let heap_items: Vec<Value> = self
             .inner
-            .topk_heap
+            .topk_heap_items()
             .iter()
             .map(|item| {
                 serde_json::json!({
@@ -131,7 +127,7 @@ impl SerializableToSink for CountMinSketchWithHeapAccumulator {
             "row_num": self.inner.row_num,
             "col_num": self.inner.col_num,
             "heap_size": self.inner.heap_size,
-            "sketch": self.inner.sketch,
+            "sketch": self.inner.sketch_matrix(),
             "topk_heap": heap_items
         })
     }
@@ -225,7 +221,7 @@ mod tests {
         assert_eq!(cms.inner.row_num, 4);
         assert_eq!(cms.inner.col_num, 1000);
         assert_eq!(cms.inner.heap_size, 20);
-        assert_eq!(cms.inner.topk_heap.len(), 0);
+        assert_eq!(cms.inner.topk_heap_items().len(), 0);
     }
 
     #[test]
@@ -240,38 +236,50 @@ mod tests {
 
     #[test]
     fn test_count_min_sketch_with_heap_merge() {
-        let mut cms1 = CountMinSketchWithHeapAccumulator::new(2, 10, 5);
-        let mut cms2 = CountMinSketchWithHeapAccumulator::new(2, 10, 3);
-
-        cms1.inner.sketch[0][0] = 10.0;
-        cms1.inner.sketch[1][1] = 20.0;
-        cms2.inner.sketch[0][0] = 5.0;
-        cms2.inner.sketch[1][1] = 15.0;
-
-        cms1.inner.topk_heap.push(HeapItem {
-            key: "key1".to_string(),
-            value: 100.0,
-        });
-        cms1.inner.topk_heap.push(HeapItem {
-            key: "key2".to_string(),
-            value: 50.0,
-        });
-        cms2.inner.topk_heap.push(HeapItem {
-            key: "key3".to_string(),
-            value: 75.0,
-        });
-        cms2.inner.topk_heap.push(HeapItem {
-            key: "key1".to_string(),
-            value: 80.0,
-        });
+        // Build controlled state via from_legacy_matrix (works regardless of backend config).
+        let sketch1 = vec![
+            vec![10.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            vec![0.0, 20.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+        ];
+        let heap1 = vec![
+            HeapItem {
+                key: "key1".to_string(),
+                value: 100.0,
+            },
+            HeapItem {
+                key: "key2".to_string(),
+                value: 50.0,
+            },
+        ];
+        let sketch2 = vec![
+            vec![5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            vec![0.0, 15.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+        ];
+        let heap2 = vec![
+            HeapItem {
+                key: "key3".to_string(),
+                value: 75.0,
+            },
+            HeapItem {
+                key: "key1".to_string(),
+                value: 80.0,
+            },
+        ];
+
+        let cms1 = CountMinSketchWithHeapAccumulator {
+            inner: CountMinSketchWithHeap::from_legacy_matrix(sketch1, heap1, 2, 10, 5),
+        };
+        let cms2 = CountMinSketchWithHeapAccumulator {
+            inner: CountMinSketchWithHeap::from_legacy_matrix(sketch2, heap2, 2, 10, 3),
+        };
 
         let result = CountMinSketchWithHeapAccumulator::merge_accumulators(vec![cms1, cms2]);
         assert!(result.is_ok());
         let merged = result.unwrap();
-        assert_eq!(merged.inner.sketch[0][0], 15.0);
-        assert_eq!(merged.inner.sketch[1][1], 35.0);
+        assert_eq!(merged.inner.sketch_matrix()[0][0], 15.0);
+        assert_eq!(merged.inner.sketch_matrix()[1][1], 35.0);
         assert_eq!(merged.inner.heap_size, 3);
-        assert!(merged.inner.topk_heap.len() <= 3);
+        assert!(merged.inner.topk_heap_items().len() <= 3);
     }
 
     #[test]
@@ -299,13 +307,15 @@ mod tests {
 
     #[test]
     fn test_count_min_sketch_with_heap_serialization() {
-        let mut cms = CountMinSketchWithHeapAccumulator::new(2, 3, 5);
-        cms.inner.sketch[0][1] = 42.0;
-        cms.inner.sketch[1][2] = 100.0;
-        cms.inner.topk_heap.push(HeapItem {
+        // Use from_legacy_matrix for a controlled state that round-trips correctly with both backends.
+        let sketch = vec![vec![0.0, 42.0, 0.0], vec![0.0, 0.0, 100.0]];
+        let topk_heap = vec![HeapItem {
             key: "test_key".to_string(),
             value: 99.0,
-        });
+        }];
+        let cms = CountMinSketchWithHeapAccumulator {
+            inner: CountMinSketchWithHeap::from_legacy_matrix(sketch, topk_heap, 2, 3, 5),
+        };
 
         let bytes = cms.serialize_to_bytes();
         let deserialized =
@@ -314,11 +324,22 @@ mod tests {
         assert_eq!(deserialized.inner.row_num, 2);
         assert_eq!(deserialized.inner.col_num, 3);
         assert_eq!(deserialized.inner.heap_size, 5);
-        assert_eq!(deserialized.inner.sketch[0][1], 42.0);
-        assert_eq!(deserialized.inner.sketch[1][2], 100.0);
-        assert_eq!(deserialized.inner.topk_heap.len(), 1);
-        assert_eq!(deserialized.inner.topk_heap[0].key, "test_key");
-        assert_eq!(deserialized.inner.topk_heap[0].value, 99.0);
+        assert_eq!(deserialized.inner.sketch_matrix()[0][1], 42.0);
+        // [1][2] may be 100 (legacy, no hash collision) or 199 (100+99 when test_key hashes there)
+        assert!(
+            deserialized.inner.sketch_matrix()[1][2] >= 100.0,
+            "expected >= 100, got {}",
+            deserialized.inner.sketch_matrix()[1][2]
+        );
+        assert_eq!(deserialized.inner.topk_heap_items().len(), 1);
+        assert_eq!(deserialized.inner.topk_heap_items()[0].key, "test_key");
+        // With sketchlib backend, heap stores CMS estimate (min over buckets for key).
+        // "test_key" may hash to (0,1) and (1,2) giving min(42,100)=42, or other values.
+        assert!(
+            deserialized.inner.topk_heap_items()[0].value >= 42.0,
+            "expected >= 42, got {}",
+            deserialized.inner.topk_heap_items()[0].value
+        );
     }
 
     #[test]
@@ -330,19 +351,16 @@ mod tests {
     #[test]
     fn test_get_topk_keys() {
         let mut cms = CountMinSketchWithHeapAccumulator::new(2, 3, 5);
-        cms.inner.topk_heap.push(HeapItem {
-            key: "label1;label2".to_string(),
-            value: 100.0,
-        });
-        cms.inner.topk_heap.push(HeapItem {
-            key: "label3;label4".to_string(),
-            value: 50.0,
-        });
+        cms.inner.update("label1;label2", 100.0);
+        cms.inner.update("label3;label4", 50.0);
 
         let keys = cms.get_topk_keys();
         assert_eq!(keys.len(), 2);
-        assert_eq!(keys[0].labels, vec!["label1", "label2"]);
-        assert_eq!(keys[1].labels, vec!["label3", "label4"]);
+        // Top-k order can differ between Legacy and Sketchlib backends (heap ordering / estimates).
+        let label_sets: std::collections::HashSet<_> =
+            keys.iter().map(|k| k.labels.clone()).collect();
+        assert!(label_sets.contains(&vec!["label1".to_string(), "label2".to_string()]));
+        assert!(label_sets.contains(&vec!["label3".to_string(), "label4".to_string()]));
     }
 
     #[test]
diff --git a/asap-query-engine/src/precompute_operators/datasketches_kll_accumulator.rs b/asap-query-engine/src/precompute_operators/datasketches_kll_accumulator.rs
index b074fad..7297680 100644
--- a/asap-query-engine/src/precompute_operators/datasketches_kll_accumulator.rs
+++ b/asap-query-engine/src/precompute_operators/datasketches_kll_accumulator.rs
@@ -5,6 +5,7 @@ use base64::{engine::general_purpose, Engine as _};
 use serde_json::Value;
 use sketch_core::kll::KllSketch;
 use std::collections::HashMap;
+#[cfg(feature = "extra_debugging")]
 use std::time::Instant;
 use tracing::debug;
 
@@ -42,6 +43,7 @@ impl DatasketchesKLLAccumulator {
             .decode(sketch_b64)
             .map_err(|e| format!("Failed to decode base64 sketch data: {e}"))?;
 
+        // TODO: remove this hardcoding once FlinkSketch serializes k in its output
         Ok(Self {
             inner: KllSketch::from_dsrs_bytes(&sketch_bytes, 200)?,
         })
@@ -49,6 +51,7 @@ impl DatasketchesKLLAccumulator {
 
     pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
         // Mirror Python implementation: deserialize sketch directly from bytes
+        // TODO: remove this hardcoding once FlinkSketch serializes k in its output
         Ok(Self {
             inner: KllSketch::from_dsrs_bytes(buffer, 200)?,
         })
@@ -111,7 +114,7 @@ impl std::fmt::Debug for DatasketchesKLLAccumulator {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         f.debug_struct("DatasketchesKLLAccumulator")
             .field("k", &self.inner.k)
-            .field("sketch_n", &self.inner.sketch.get_n())
+            .field("sketch_n", &self.inner.count())
             .finish()
     }
 }
@@ -126,7 +129,7 @@ unsafe impl Sync for DatasketchesKLLAccumulator {}
 impl SerializableToSink for DatasketchesKLLAccumulator {
     fn serialize_to_json(&self) -> Value {
         // Mirror Python implementation: {"sketch": base64_encoded_string}
-        let sketch_bytes = self.inner.sketch.serialize();
+        let sketch_bytes = self.inner.sketch_bytes();
         let sketch_b64 = general_purpose::STANDARD.encode(&sketch_bytes);
         serde_json::json!({ "sketch": sketch_b64 })
     }
@@ -159,7 +162,7 @@ impl AggregateCore for DatasketchesKLLAccumulator {
         debug!(
             "[PERF] DatasketchesKLLAccumulator::merge_with() started - self.k={}, self.n={}",
             self.inner.k,
-            self.inner.sketch.get_n()
+            self.inner.count()
         );
 
         if other.get_accumulator_type() != self.get_accumulator_type() {
@@ -256,7 +259,7 @@ mod tests {
     #[test]
     fn test_datasketches_kll_creation() {
         let kll = DatasketchesKLLAccumulator::new(200);
-        assert!(kll.inner.sketch.get_n() == 0);
+        assert!(kll.inner.count() == 0);
         assert_eq!(kll.inner.k, 200);
     }
 
@@ -266,7 +269,7 @@ mod tests {
         kll._update(10.0);
         kll._update(20.0);
         kll._update(15.0);
-        assert_eq!(kll.inner.sketch.get_n(), 3);
+        assert_eq!(kll.inner.count(), 3);
     }
 
     #[test]
@@ -277,7 +280,9 @@ mod tests {
         }
         assert_eq!(kll.get_quantile(0.0), 1.0);
         assert_eq!(kll.get_quantile(1.0), 10.0);
-        assert_eq!(kll.get_quantile(0.5), 6.0);
+        // Sketchlib KLL is approximate; 0.5 quantile of 1..10 may be 5, 6, or 7.
+        let q50 = kll.get_quantile(0.5);
+        assert!((q50 - 6.0).abs() <= 1.0, "expected median ~6, got {q50}");
     }
 
     #[test]
@@ -290,7 +295,11 @@ mod tests {
         let mut query_kwargs = HashMap::new();
         query_kwargs.insert("quantile".to_string(), "0.5".to_string());
         let result = kll.query(Statistic::Quantile, Some(&query_kwargs)).unwrap();
-        assert_eq!(result, 6.0);
+        // Sketchlib KLL is approximate; 0.5 quantile of 1..10 may be 5, 6, or 7.
+        assert!(
+            (result - 6.0).abs() <= 1.0,
+            "expected median ~6, got {result}"
+        );
 
         assert!(kll.query(Statistic::Sum, Some(&query_kwargs)).is_err());
     }
@@ -308,7 +317,7 @@ mod tests {
         }
 
         let merged = DatasketchesKLLAccumulator::merge_accumulators(vec![kll1, kll2]).unwrap();
-        assert_eq!(merged.inner.sketch.get_n(), 10);
+        assert_eq!(merged.inner.count(), 10);
         assert_eq!(merged.get_quantile(0.0), 1.0);
         assert_eq!(merged.get_quantile(1.0), 10.0);
     }
@@ -325,7 +334,7 @@ mod tests {
             DatasketchesKLLAccumulator::deserialize_from_bytes_arroyo(&bytes).unwrap();
 
         assert_eq!(deserialized.inner.k, 200);
-        assert_eq!(deserialized.inner.sketch.get_n(), 5);
+        assert_eq!(deserialized.inner.count(), 5);
         assert_eq!(deserialized.get_quantile(0.0), 1.0);
         assert_eq!(deserialized.get_quantile(1.0), 5.0);
     }
@@ -354,11 +363,19 @@ mod tests {
         let mut query_kwargs = HashMap::new();
         query_kwargs.insert("quantile".to_string(), "0.5".to_string());
         let result = kll.query(Statistic::Quantile, Some(&query_kwargs)).unwrap();
-        assert_eq!(result, 6.0);
+        // Sketchlib KLL is approximate; 0.5 quantile of 1..10 may be 5, 6, or 7.
+        assert!(
+            (result - 6.0).abs() <= 1.0,
+            "expected median ~6, got {result}"
+        );
 
         query_kwargs.insert("quantile".to_string(), "0.9".to_string());
         let result = kll.query(Statistic::Quantile, Some(&query_kwargs)).unwrap();
-        assert_eq!(result, 10.0);
+        // Sketchlib KLL is approximate; 0.9 quantile of 1..10 may be 9 or 10.
+        assert!(
+            (9.0..=10.0).contains(&result),
+            "expected 0.9 quantile in [9,10], got {result}"
+        );
 
         query_kwargs.insert("quantile".to_string(), "0.0".to_string());
         assert_eq!(
@@ -407,7 +424,7 @@ mod tests {
             vec![Box::new(kll1), Box::new(kll2), Box::new(kll3)];
 
         let merged = DatasketchesKLLAccumulator::merge_multiple(&boxed_accs).unwrap();
-        assert_eq!(merged.inner.sketch.get_n(), 15);
+        assert_eq!(merged.inner.count(), 15);
         assert_eq!(merged.get_quantile(0.0), 1.0);
         assert_eq!(merged.get_quantile(1.0), 15.0);
         assert_eq!(merged.get_quantile(0.5), 8.0);
diff --git a/asap-query-engine/tests/test_both_backends.rs b/asap-query-engine/tests/test_both_backends.rs
new file mode 100644
index 0000000..5643756
--- /dev/null
+++ b/asap-query-engine/tests/test_both_backends.rs
@@ -0,0 +1,30 @@
+//! Integration test that runs the library test suite with the sketchlib backend.
+//!
+//! When you run `cargo test -p query_engine_rust` (without --features sketchlib-tests),
+//! the lib tests run with the legacy backend. This test spawns a second run with the
+//! sketchlib backend so both modes are exercised in one `cargo test` invocation.
+//!
+//! This test is only compiled when sketchlib-tests is NOT enabled, to avoid recursion.
+
+#[cfg(not(feature = "sketchlib-tests"))]
+#[test]
+fn test_sketchlib_backend() {
+    use std::process::Command;
+
+    let status = Command::new(env!("CARGO"))
+        .args([
+            "test",
+            "-p",
+            "query_engine_rust",
+            "--lib",
+            "--features",
+            "sketchlib-tests",
+        ])
+        .status()
+        .expect("failed to spawn cargo test");
+
+    assert!(
+        status.success(),
+        "sketchlib backend tests failed (run `cargo test -p query_engine_rust --lib --features sketchlib-tests` for details)"
+    );
+}
diff --git a/asap-sketch-ingest/templates/udfs/countminsketch_count.rs.j2 b/asap-sketch-ingest/templates/udfs/countminsketch_count.rs.j2
index 16b532c..4e13ceb 100644
--- a/asap-sketch-ingest/templates/udfs/countminsketch_count.rs.j2
+++ b/asap-sketch-ingest/templates/udfs/countminsketch_count.rs.j2
@@ -3,16 +3,34 @@
 rmp-serde = "1.1"
 serde = { version = "1.0", features = ["derive"] }
 twox-hash = "2.1.0"
+sketchlib-rust = { git = "https://github.com/ProjectASAP/sketchlib-rust" }
 */
+
 use arroyo_udf_plugin::udf;
 use rmp_serde::Serializer;
 use serde::{Deserialize, Serialize};
 use twox_hash::XxHash32;
 
+use sketchlib_rust::{CountMin as SketchlibCountMin, RegularPath, SketchInput, Vector2D};
+
 // Count-Min Sketch parameters
 const DEPTH: usize = {{ depth }}; // Number of hash functions
 const WIDTH: usize = {{ width }}; // Number of buckets per hash function
 
+// Implementation mode for Count-Min Sketch. Set at compile time; no env vars.
+enum ImplMode {
+    Legacy,
+    Sketchlib,
+}
+
+const IMPL_MODE: ImplMode = ImplMode::Sketchlib;
+
+fn use_sketchlib_for_cms() -> bool {
+    matches!(IMPL_MODE, ImplMode::Sketchlib)
+}
+
+type SketchlibCms = SketchlibCountMin<Vector2D<i64>, RegularPath>;
+
 #[derive(Serialize, Deserialize, Clone)]
 struct CountMinSketch {
     sketch: Vec<Vec<f64>>,
@@ -29,7 +47,7 @@ impl CountMinSketch {
         }
     }
 
-    // Update the sketch with a key-value pair
+    // Legacy path: update the sketch with a key-value pair using twox-hash.
     fn update(&mut self, key: &str, value: f64) {
         for i in 0..self.row_num {
             // already UTF-8
@@ -42,17 +60,53 @@ impl CountMinSketch {
 
 #[udf]
 fn countminsketch_count(keys: Vec<&str>, values: Vec<f64>) -> Option<Vec<u8>> {
-    // Create a new Count-Min Sketch
-    let mut countminsketch = CountMinSketch::new();
+    if use_sketchlib_for_cms() {
+        // sketchlib-rust backed implementation: integer counters + internal hashing.
+        let mut inner = SketchlibCms::with_dimensions(DEPTH, WIDTH);
 
-    // Iterate through the keys and values and update the sketch for each entry
-    for (i, &key) in keys.iter().enumerate() {
-        countminsketch.update(key, 1.0);
-    }
+        for &key in keys.iter() {
+            let input = SketchInput::String(key.to_owned());
+            inner.insert_many(&input, 1);
+        }
+
+        // Convert sketchlib storage to legacy matrix wire format.
+        let storage: &Vector2D<i64> = inner.as_storage();
+        let rows = storage.rows();
+        let cols = storage.cols();
+        let mut sketch = vec![vec![0.0; cols]; rows];
+
+        for r in 0..rows {
+            for c in 0..cols {
+                if let Some(v) = storage.get(r, c) {
+                    sketch[r][c] = *v as f64;
+                }
+            }
+        }
+
+        let countminsketch = CountMinSketch {
+            sketch,
+            row_num: rows,
+            col_num: cols,
+        };
 
-    let mut buf = Vec::new();
-    countminsketch
-        .serialize(&mut Serializer::new(&mut buf))
-        .ok()?;
-    Some(buf)
+        let mut buf = Vec::new();
+        countminsketch
+            .serialize(&mut Serializer::new(&mut buf))
+            .ok()?;
+        Some(buf)
+    } else {
+        // Legacy twox-hash backed implementation (unchanged).
+        let mut countminsketch = CountMinSketch::new();
+
+        // Iterate through the keys and update the sketch for each entry
+        for &key in keys.iter() {
+            countminsketch.update(key, 1.0);
+        }
+
+        let mut buf = Vec::new();
+        countminsketch
+            .serialize(&mut Serializer::new(&mut buf))
+            .ok()?;
+        Some(buf)
+    }
 }
diff --git a/asap-sketch-ingest/templates/udfs/countminsketch_sum.rs.j2 b/asap-sketch-ingest/templates/udfs/countminsketch_sum.rs.j2
index 8bf0530..e851d76 100644
--- a/asap-sketch-ingest/templates/udfs/countminsketch_sum.rs.j2
+++ b/asap-sketch-ingest/templates/udfs/countminsketch_sum.rs.j2
@@ -3,16 +3,34 @@
 rmp-serde = "1.1"
 serde = { version = "1.0", features = ["derive"] }
 twox-hash = "2.1.0"
+sketchlib-rust = { git = "https://github.com/ProjectASAP/sketchlib-rust" }
 */
+
 use arroyo_udf_plugin::udf;
 use rmp_serde::Serializer;
 use serde::{Deserialize, Serialize};
 use twox_hash::XxHash32;
 
+use sketchlib_rust::{CountMin as SketchlibCountMin, RegularPath, SketchInput, Vector2D};
+
 // Count-Min Sketch parameters
 const DEPTH: usize = {{ depth }}; // Number of hash functions
 const WIDTH: usize = {{ width }}; // Number of buckets per hash function
 
+// Implementation mode for Count-Min Sketch. Set at compile time; no env vars.
+enum ImplMode {
+    Legacy,
+    Sketchlib,
+}
+
+const IMPL_MODE: ImplMode = ImplMode::Sketchlib;
+
+fn use_sketchlib_for_cms() -> bool {
+    matches!(IMPL_MODE, ImplMode::Sketchlib)
+}
+
+type SketchlibCms = SketchlibCountMin<Vector2D<i64>, RegularPath>;
+
 #[derive(Serialize, Deserialize, Clone)]
 struct CountMinSketch {
     sketch: Vec<Vec<f64>>,
@@ -29,7 +47,7 @@ impl CountMinSketch {
         }
     }
 
-    // Update the sketch with a key-value pair
+    // Legacy path: update the sketch with a key-value pair using twox-hash.
     fn update(&mut self, key: &str, value: f64) {
         for i in 0..self.row_num {
             // already UTF-8
@@ -47,17 +65,59 @@ fn countminsketch_sum(keys: Vec<&str>, values: Vec<f64>) -> Option<Vec<u8>> {
         return None;
     }
 
-    // Create a new Count-Min Sketch
-    let mut countminsketch = CountMinSketch::new();
+    if use_sketchlib_for_cms() {
+        // sketchlib-rust backed implementation: integer counters + internal hashing.
+        let mut inner = SketchlibCms::with_dimensions(DEPTH, WIDTH);
 
-    // Iterate through the keys and values and update the sketch for each entry
-    for (i, &key) in keys.iter().enumerate() {
-        countminsketch.update(key, values[i]);
-    }
+        for (i, &key) in keys.iter().enumerate() {
+            let value = values[i];
+            // Values arrive as f64; Count-Min counters are integers.
+            let many = value.round() as i64;
+            if many <= 0 {
+                continue;
+            }
+            let input = SketchInput::String(key.to_owned());
+            inner.insert_many(&input, many);
+        }
+
+        // Convert sketchlib storage to legacy matrix wire format.
+        let storage: &Vector2D<i64> = inner.as_storage();
+        let rows = storage.rows();
+        let cols = storage.cols();
+        let mut sketch = vec![vec![0.0; cols]; rows];
+
+        for r in 0..rows {
+            for c in 0..cols {
+                if let Some(v) = storage.get(r, c) {
+                    sketch[r][c] = *v as f64;
+                }
+            }
+        }
+
+        let countminsketch = CountMinSketch {
+            sketch,
+            row_num: rows,
+            col_num: cols,
+        };
 
-    let mut buf = Vec::new();
-    countminsketch
-        .serialize(&mut Serializer::new(&mut buf))
-        .ok()?;
-    Some(buf)
+        let mut buf = Vec::new();
+        countminsketch
+            .serialize(&mut Serializer::new(&mut buf))
+            .ok()?;
+        Some(buf)
+    } else {
+        // Legacy twox-hash backed implementation (unchanged).
+        let mut countminsketch = CountMinSketch::new();
+
+        // Iterate through the keys and values and update the sketch for each entry
+        for (i, &key) in keys.iter().enumerate() {
+            countminsketch.update(key, values[i]);
+        }
+
+        let mut buf = Vec::new();
+        countminsketch
+            .serialize(&mut Serializer::new(&mut buf))
+            .ok()?;
+        Some(buf)
+    }
 }
diff --git a/asap-sketch-ingest/templates/udfs/countminsketchwithheap_topk.rs.j2 b/asap-sketch-ingest/templates/udfs/countminsketchwithheap_topk.rs.j2
index 988d780..e789c02 100644
--- a/asap-sketch-ingest/templates/udfs/countminsketchwithheap_topk.rs.j2
+++ b/asap-sketch-ingest/templates/udfs/countminsketchwithheap_topk.rs.j2
@@ -3,19 +3,38 @@
 rmp-serde = "1.1"
 serde = { version = "1.0", features = ["derive"] }
 twox-hash = "2.1.0"
+sketchlib-rust = { git = "https://github.com/ProjectASAP/sketchlib-rust" }
 */
+
+use std::cmp::Ordering;
+use std::collections::BinaryHeap;
+
 use arroyo_udf_plugin::udf;
 use rmp_serde::Serializer;
 use serde::{Deserialize, Serialize};
-use std::collections::BinaryHeap;
-use std::cmp::Ordering;
 use twox_hash::XxHash32;
 
+use sketchlib_rust::{CountMin as SketchlibCountMin, RegularPath, SketchInput, Vector2D};
+
 // Count-Min Sketch with Heap parameters
 const DEPTH: usize = {{ depth }}; // Number of hash functions
 const WIDTH: usize = {{ width }}; // Number of buckets per hash function
 const HEAP_SIZE: usize = {{ heapsize }}; // Maximum number of top-k items to track
 
+// Implementation mode for Count-Min Sketch with Heap. Set at compile time; no env vars.
+enum ImplMode {
+    Legacy,
+    Sketchlib,
+}
+
+const IMPL_MODE: ImplMode = ImplMode::Sketchlib;
+
+fn use_sketchlib_for_cmwh() -> bool {
+    matches!(IMPL_MODE, ImplMode::Sketchlib)
+}
+
+type SketchlibCms = SketchlibCountMin<Vector2D<i64>, RegularPath>;
+
 #[derive(Serialize, Deserialize, Clone)]
 struct CountMinSketch {
     sketch: Vec<Vec<f64>>,
@@ -93,7 +112,10 @@ impl PartialOrd for HeapItem {
 }
 
 struct CountMinSketchWithHeap {
+    // Legacy wire-format matrix representation.
     sketch: CountMinSketch,
+    // Optional sketchlib-rust Count-Min used when ARROYO_SKETCH_CMWH_IMPL selects sketchlib mode.
+    sketchlib: Option<SketchlibCms>,
     topk_heap: BinaryHeap<HeapItem>, // Maintain as heap during processing
     heap_size: usize,
 }
@@ -109,8 +131,14 @@ struct CountMinSketchWithHeapSerialized {
 
 impl CountMinSketchWithHeap {
     fn new() -> Self {
+        let use_sketchlib = use_sketchlib_for_cmwh();
         CountMinSketchWithHeap {
             sketch: CountMinSketch::new(),
+            sketchlib: if use_sketchlib {
+                Some(SketchlibCms::with_dimensions(DEPTH, WIDTH))
+            } else {
+                None
+            },
             topk_heap: BinaryHeap::new(),
             heap_size: HEAP_SIZE,
         }
@@ -118,8 +146,25 @@ impl CountMinSketchWithHeap {
 
     // Update the sketch and maintain the top-k heap
     fn update_with_topk(&mut self, key: &str, value: f64) {
-        // Update the Count-Min Sketch and get the estimated frequency in one pass
-        let estimated_freq = self.sketch.update_with_query(key, value);
+        // Compute estimated frequency using either legacy or sketchlib implementation.
+        let estimated_freq = if use_sketchlib_for_cmwh() {
+            let inner = self
+                .sketchlib
+                .as_mut()
+                .expect("sketchlib mode enabled but sketchlib state is missing");
+
+            // Values arrive as f64; Count-Min counters are integers.
+            let many = value.round() as i64;
+            if many <= 0 {
+                return;
+            }
+            let input = SketchInput::String(key.to_owned());
+            inner.insert_many(&input, many);
+            inner.estimate(&input) as f64
+        } else {
+            // Legacy Count-Min update + query in one pass.
+            self.sketch.update_with_query(key, value)
+        };
 
         // Check if the key already exists in the heap
         // TODO: This takes O(k) time, can we do better?
@@ -159,7 +204,30 @@ impl CountMinSketchWithHeap {
     }
 
     // Convert to serializable format
-    fn to_serializable(self) -> CountMinSketchWithHeapSerialized {
+    fn to_serializable(mut self) -> CountMinSketchWithHeapSerialized {
+        // In sketchlib mode, derive the matrix from the inner Count-Min sketch so that
+        // the wire format matches QueryEngineRust expectations.
+        if let Some(inner) = &self.sketchlib {
+            let storage: &Vector2D<i64> = inner.as_storage();
+            let rows = storage.rows();
+            let cols = storage.cols();
+            let mut sketch = vec![vec![0.0; cols]; rows];
+
+            for r in 0..rows {
+                for c in 0..cols {
+                    if let Some(v) = storage.get(r, c) {
+                        sketch[r][c] = *v as f64;
+                    }
+                }
+            }
+
+            self.sketch = CountMinSketch {
+                sketch,
+                row_num: rows,
+                col_num: cols,
+            };
+        }
+
         CountMinSketchWithHeapSerialized {
             sketch: self.sketch,
             topk_heap: self.topk_heap.into_iter().collect(),
diff --git a/asap-sketch-ingest/templates/udfs/datasketcheskll_.rs.j2 b/asap-sketch-ingest/templates/udfs/datasketcheskll_.rs.j2
index ca34027..d95f3b1 100644
--- a/asap-sketch-ingest/templates/udfs/datasketcheskll_.rs.j2
+++ b/asap-sketch-ingest/templates/udfs/datasketcheskll_.rs.j2
@@ -1,6 +1,7 @@
 /*
 [dependencies]
-dsrs = { git = "https://github.com/SketchDB/datasketches-rs" }
+dsrs = { git = "https://github.com/ProjectASAP/datasketches-rs" }
+sketchlib-rust = { git = "https://github.com/ProjectASAP/sketchlib-rust" }
 arroyo-udf-plugin = "0.1"
 rmp-serde = "1.1"
 serde = { version = "1.0", features = ["derive"] }
@@ -10,50 +11,57 @@ use arroyo_udf_plugin::udf;
 use dsrs::KllDoubleSketch;
 use rmp_serde::Serializer;
 use serde::{Deserialize, Serialize};
+use sketchlib_rust::{KLL, SketchInput};
 
 const DEFAULT_K: u16 = {{ k }};
 
+// Implementation mode for KLL Sketch. Set at compile time; no env vars.
+enum ImplMode {
+    Legacy,
+    Sketchlib,
+}
+
+const IMPL_MODE: ImplMode = ImplMode::Sketchlib;
+
+fn use_sketchlib_for_kll() -> bool {
+    matches!(IMPL_MODE, ImplMode::Sketchlib)
+}
+
 #[derive(Serialize, Deserialize)]
 struct KllSketchData {
     k: u16,
     sketch_bytes: Vec<u8>,
 }
 
-struct KllSketchWrapper {
-    k: u16,
-    sketch: KllDoubleSketch,
-}
-
-impl KllSketchWrapper {
-    fn new(k: u16) -> Self {
-        KllSketchWrapper {
-            k,
-            sketch: KllDoubleSketch::with_k(k),
+#[udf]
+fn datasketcheskll_(values: Vec<f64>) -> Option<Vec<u8>> {
+    if use_sketchlib_for_kll() {
+        // sketchlib-rust backed implementation
+        let mut sketch = KLL::init_kll(DEFAULT_K as i32);
+        for &value in &values {
+            let _ = sketch.update(&SketchInput::F64(value));
         }
-    }
-
-    fn update(&mut self, values: &[f64]) {
-        for &value in values {
-            self.sketch.update(value);
+        let sketch_bytes = sketch.serialize_to_bytes().ok()?;
+        let serialized = KllSketchData {
+            k: DEFAULT_K,
+            sketch_bytes,
+        };
+        let mut buf = Vec::new();
+        rmp_serde::encode::write(&mut buf, &serialized).ok()?;
+        Some(buf)
+    } else {
+        // Legacy dsrs backed implementation
+        let mut kll_wrapper = KllDoubleSketch::with_k(DEFAULT_K);
+        for &value in &values {
+            kll_wrapper.update(value);
         }
-    }
-
-    fn serialize_bytes(&self) -> Vec<u8> {
-        let sketch_data = self.sketch.serialize();
+        let sketch_data = kll_wrapper.serialize();
         let serialized = KllSketchData {
-            k: self.k,
+            k: DEFAULT_K,
             sketch_bytes: sketch_data.as_ref().to_vec(),
         };
         let mut buf = Vec::new();
         rmp_serde::encode::write(&mut buf, &serialized).unwrap();
-        buf
+        Some(buf)
     }
 }
-
-#[udf]
-fn datasketcheskll_(values: Vec<f64>) -> Option<Vec<u8>> {
-    let mut kll_wrapper = KllSketchWrapper::new(DEFAULT_K);
-    kll_wrapper.update(&values);
-
-    Some(kll_wrapper.serialize_bytes())
-}
diff --git a/asap-sketch-ingest/templates/udfs/hydrakll_.rs.j2 b/asap-sketch-ingest/templates/udfs/hydrakll_.rs.j2
index b9be3cb..94f4eb3 100644
--- a/asap-sketch-ingest/templates/udfs/hydrakll_.rs.j2
+++ b/asap-sketch-ingest/templates/udfs/hydrakll_.rs.j2
@@ -1,6 +1,7 @@
 /*
 [dependencies]
-dsrs = { git = "https://github.com/SketchDB/datasketches-rs" }
+dsrs = { git = "https://github.com/ProjectASAP/datasketches-rs" }
+sketchlib-rust = { git = "https://github.com/ProjectASAP/sketchlib-rust" }
 arroyo-udf-plugin = "0.1"
 rmp-serde = "1.1"
 serde = { version = "1.0", features = ["derive"] }
@@ -11,12 +12,25 @@ use arroyo_udf_plugin::udf;
 use dsrs::KllDoubleSketch;
 use rmp_serde::Serializer;
 use serde::{Deserialize, Serialize};
+use sketchlib_rust::{KLL, SketchInput};
 use xxhash_rust::xxh32::xxh32;
 
 const ROW_NUM: usize = {{ row_num }};
 const COL_NUM: usize = {{ col_num }};
 const DEFAULT_K: u16 = {{ k }};
 
+// Implementation mode for KLL Sketch. Set at compile time; no env vars.
+enum ImplMode {
+    Legacy,
+    Sketchlib,
+}
+
+const IMPL_MODE: ImplMode = ImplMode::Sketchlib;
+
+fn use_sketchlib_for_kll() -> bool {
+    matches!(IMPL_MODE, ImplMode::Sketchlib)
+}
+
 // Match QueryEngineRust format exactly
 #[derive(Deserialize, Serialize)]
 struct KllSketchData {
@@ -33,51 +47,100 @@ struct HydraKllSketchData {
 
 #[udf]
 fn hydrakll_(keys: Vec<&str>, values: Vec<f64>) -> Option<Vec<u8>> {
-    // Initialize 2D matrix of KLL sketches
-    let mut sketches: Vec<Vec<KllDoubleSketch>> = vec![
-        vec![KllDoubleSketch::with_k(DEFAULT_K); COL_NUM];
-        ROW_NUM
-    ];
-
-    // Process each key-value pair
-    for (i, &key) in keys.iter().enumerate() {
-        if i >= values.len() {
-            break;
+    if use_sketchlib_for_kll() {
+        // sketchlib-rust backed implementation
+        let mut sketches: Vec<Vec<KLL>> = (0..ROW_NUM)
+            .map(|_| {
+                (0..COL_NUM)
+                    .map(|_| KLL::init_kll(DEFAULT_K as i32))
+                    .collect()
+            })
+            .collect();
+
+        for (i, &key) in keys.iter().enumerate() {
+            if i >= values.len() {
+                break;
+            }
+            let key_bytes = key.as_bytes();
+            for row in 0..ROW_NUM {
+                let hash_value = xxh32(key_bytes, row as u32);
+                let col_index = (hash_value as usize) % COL_NUM;
+                let _ = sketches[row][col_index].update(&SketchInput::F64(values[i]));
+            }
         }
 
-        let key_bytes = key.as_bytes();
+        let sketch_data: Option<Vec<Vec<KllSketchData>>> = sketches
+            .iter()
+            .map(|row| {
+                row.iter()
+                    .map(|sketch| {
+                        let sketch_bytes = sketch.serialize_to_bytes().ok()?;
+                        Some(KllSketchData {
+                            k: DEFAULT_K,
+                            sketch_bytes,
+                        })
+                    })
+                    .collect::<Option<Vec<_>>>()
+            })
+            .collect::<Option<Vec<_>>>();
+        let sketch_data = sketch_data?;
+
+        let hydra_data = HydraKllSketchData {
+            row_num: ROW_NUM,
+            col_num: COL_NUM,
+            sketches: sketch_data,
+        };
 
-        // Update each row using different hash functions
-        for row in 0..ROW_NUM {
-            let hash_value = xxh32(key_bytes, row as u32);
-            let col_index = (hash_value as usize) % COL_NUM;
-            sketches[row][col_index].update(values[i]);
+        let mut buf = Vec::new();
+        hydra_data.serialize(&mut Serializer::new(&mut buf)).ok()?;
+        Some(buf)
+    } else {
+        // Legacy dsrs backed implementation
+        let mut sketches: Vec<Vec<KllDoubleSketch>> = (0..ROW_NUM)
+            .map(|_| {
+                (0..COL_NUM)
+                    .map(|_| KllDoubleSketch::with_k(DEFAULT_K))
+                    .collect()
+            })
+            .collect();
+
+        for (i, &key) in keys.iter().enumerate() {
+            if i >= values.len() {
+                break;
+            }
+
+            let key_bytes = key.as_bytes();
+
+            for row in 0..ROW_NUM {
+                let hash_value = xxh32(key_bytes, row as u32);
+                let col_index = (hash_value as usize) % COL_NUM;
+                sketches[row][col_index].update(values[i]);
+            }
         }
-    }
 
-    // Serialize to match QueryEngineRust format
-    let sketch_data: Vec<Vec<KllSketchData>> = sketches
-        .iter()
-        .map(|row| {
-            row.iter()
-                .map(|sketch| {
-                    let sketch_bytes = sketch.serialize();
-                    KllSketchData {
-                        k: DEFAULT_K,
-                        sketch_bytes: sketch_bytes.as_ref().to_vec(),
-                    }
-                })
-                .collect()
-        })
-        .collect();
-
-    let hydra_data = HydraKllSketchData {
-        row_num: ROW_NUM,
-        col_num: COL_NUM,
-        sketches: sketch_data,
-    };
-
-    let mut buf = Vec::new();
-    hydra_data.serialize(&mut Serializer::new(&mut buf)).ok()?;
-    Some(buf)
+        let sketch_data: Vec<Vec<KllSketchData>> = sketches
+            .iter()
+            .map(|row| {
+                row.iter()
+                    .map(|sketch| {
+                        let sketch_bytes = sketch.serialize();
+                        KllSketchData {
+                            k: DEFAULT_K,
+                            sketch_bytes: sketch_bytes.as_ref().to_vec(),
+                        }
+                    })
+                    .collect()
+            })
+            .collect();
+
+        let hydra_data = HydraKllSketchData {
+            row_num: ROW_NUM,
+            col_num: COL_NUM,
+            sketches: sketch_data,
+        };
+
+        let mut buf = Vec::new();
+        hydra_data.serialize(&mut Serializer::new(&mut buf)).ok()?;
+        Some(buf)
+    }
 }

From 927d7fc1b7f399d7cf8b36dc6b85ea017aea3476 Mon Sep 17 00:00:00 2001
From: Gnanesh <gnanesh.dometti@gmail.com>
Date: Fri, 20 Mar 2026 14:52:42 -0400
Subject: [PATCH 2/6] Restore per-backend default constants, global default
 Sketchlib

---
 asap-common/sketch-core/src/config.rs | 24 ++++++++++++++++--------
 asap-query-engine/src/main.rs         |  6 +++---
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/asap-common/sketch-core/src/config.rs b/asap-common/sketch-core/src/config.rs
index 84267b0..6bdccb6 100644
--- a/asap-common/sketch-core/src/config.rs
+++ b/asap-common/sketch-core/src/config.rs
@@ -9,18 +9,26 @@ pub enum ImplMode {
     Sketchlib,
 }
 
+/// Global default when impl mode is not explicitly configured (e.g. env var parsing).
+pub const DEFAULT_IMPL_MODE: ImplMode = ImplMode::Sketchlib;
+
+/// Per-backend defaults. Used when configure() has not been called.
+pub const DEFAULT_CMS_IMPL: ImplMode = ImplMode::Sketchlib;
+pub const DEFAULT_KLL_IMPL: ImplMode = ImplMode::Sketchlib;
+pub const DEFAULT_CMWH_IMPL: ImplMode = ImplMode::Sketchlib;
+
 static COUNTMIN_MODE: OnceLock<ImplMode> = OnceLock::new();
 
 /// Returns true if Count-Min operations should use sketchlib-rust internally.
 pub fn use_sketchlib_for_count_min() -> bool {
-    *COUNTMIN_MODE.get_or_init(|| ImplMode::Sketchlib) == ImplMode::Sketchlib
+    *COUNTMIN_MODE.get_or_init(|| DEFAULT_CMS_IMPL) == ImplMode::Sketchlib
 }
 
 static KLL_MODE: OnceLock<ImplMode> = OnceLock::new();
 
 /// Returns true if KLL operations should use sketchlib-rust internally.
 pub fn use_sketchlib_for_kll() -> bool {
-    *KLL_MODE.get_or_init(|| ImplMode::Sketchlib) == ImplMode::Sketchlib
+    *KLL_MODE.get_or_init(|| DEFAULT_KLL_IMPL) == ImplMode::Sketchlib
 }
 
 static COUNTMIN_WITH_HEAP_MODE: OnceLock<ImplMode> = OnceLock::new();
@@ -28,7 +36,7 @@ static COUNTMIN_WITH_HEAP_MODE: OnceLock<ImplMode> = OnceLock::new();
 /// Returns true if Count-Min-With-Heap operations should use sketchlib-rust internally for the
 /// Count-Min portion.
 pub fn use_sketchlib_for_count_min_with_heap() -> bool {
-    *COUNTMIN_WITH_HEAP_MODE.get_or_init(|| ImplMode::Sketchlib) == ImplMode::Sketchlib
+    *COUNTMIN_WITH_HEAP_MODE.get_or_init(|| DEFAULT_CMWH_IMPL) == ImplMode::Sketchlib
 }
 
 /// Set backend modes for all sketch types. Call once at process startup,
@@ -59,17 +67,17 @@ pub fn parse_mode(var: Result<String, std::env::VarError>) -> ImplMode {
             "sketchlib" => ImplMode::Sketchlib,
             other => {
                 eprintln!(
-                    "sketch-core: unrecognised IMPL value {other:?}, defaulting to Sketchlib"
+                    "sketch-core: unrecognised IMPL value {other:?}, defaulting to {DEFAULT_IMPL_MODE:?}"
                 );
-                ImplMode::Sketchlib
+                DEFAULT_IMPL_MODE
             }
         },
-        Err(std::env::VarError::NotPresent) => ImplMode::Sketchlib,
+        Err(std::env::VarError::NotPresent) => DEFAULT_IMPL_MODE,
         Err(std::env::VarError::NotUnicode(v)) => {
             eprintln!(
-                "sketch-core: IMPL env var has invalid UTF-8 ({v:?}), defaulting to Sketchlib"
+                "sketch-core: IMPL env var has invalid UTF-8 ({v:?}), defaulting to {DEFAULT_IMPL_MODE:?}"
             );
-            ImplMode::Sketchlib
+            DEFAULT_IMPL_MODE
         }
     }
 }
diff --git a/asap-query-engine/src/main.rs b/asap-query-engine/src/main.rs
index f0752c3..00be2fe 100644
--- a/asap-query-engine/src/main.rs
+++ b/asap-query-engine/src/main.rs
@@ -111,15 +111,15 @@ struct Args {
     promsketch_config: Option<String>,
 
     /// Backend implementation for Count-Min Sketch (legacy | sketchlib)
-    #[arg(long, value_enum, default_value = "sketchlib")]
+    #[arg(long, value_enum, default_value_t = config::DEFAULT_CMS_IMPL)]
     sketch_cms_impl: ImplMode,
 
     /// Backend implementation for KLL Sketch (legacy | sketchlib)
-    #[arg(long, value_enum, default_value = "sketchlib")]
+    #[arg(long, value_enum, default_value_t = config::DEFAULT_KLL_IMPL)]
     sketch_kll_impl: ImplMode,
 
     /// Backend implementation for Count-Min-With-Heap (legacy | sketchlib)
-    #[arg(long, value_enum, default_value = "sketchlib")]
+    #[arg(long, value_enum, default_value_t = config::DEFAULT_CMWH_IMPL)]
     sketch_cmwh_impl: ImplMode,
 
     /// Enable OTLP metrics ingest (gRPC + HTTP)

From 7fc5001cc00da248ddaa3b108999134749ef9f10 Mon Sep 17 00:00:00 2001
From: Gnanesh <gnanesh.dometti@gmail.com>
Date: Fri, 20 Mar 2026 15:01:21 -0400
Subject: [PATCH 3/6] Use per-backend defaults in fidelity, configurable
 impl_mode in UDF templates

---
 .../sketch-core/src/bin/sketchlib_fidelity.rs        |  9 ++++++---
 asap-summary-ingest/run_arroyosketch.py              | 12 ++++++++++++
 .../templates/udfs/datasketcheskll_.rs.j2            |  2 +-
 asap-summary-ingest/templates/udfs/hydrakll_.rs.j2   |  2 +-
 4 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/asap-common/sketch-core/src/bin/sketchlib_fidelity.rs b/asap-common/sketch-core/src/bin/sketchlib_fidelity.rs
index 3f6b263..0b47f48 100644
--- a/asap-common/sketch-core/src/bin/sketchlib_fidelity.rs
+++ b/asap-common/sketch-core/src/bin/sketchlib_fidelity.rs
@@ -1,3 +1,6 @@
+// Fidelity benchmarks comparing legacy vs sketchlib implementations across sketch types.
+#![allow(dead_code)]
+
 use std::collections::HashMap;
 
 use clap::Parser;
@@ -94,11 +97,11 @@ fn rmse_percentage(exact: &[f64], est: &[f64]) -> f64 {
 
 #[derive(Parser)]
 struct Args {
-    #[arg(long, value_enum, default_value = "sketchlib")]
+    #[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_CMS_IMPL)]
     cms_impl: ImplMode,
-    #[arg(long, value_enum, default_value = "sketchlib")]
+    #[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_KLL_IMPL)]
     kll_impl: ImplMode,
-    #[arg(long, value_enum, default_value = "sketchlib")]
+    #[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_CMWH_IMPL)]
     cmwh_impl: ImplMode,
 }
 
diff --git a/asap-summary-ingest/run_arroyosketch.py b/asap-summary-ingest/run_arroyosketch.py
index af0a4fc..3769723 100644
--- a/asap-summary-ingest/run_arroyosketch.py
+++ b/asap-summary-ingest/run_arroyosketch.py
@@ -464,6 +464,18 @@ def create_pipeline(
                     template_source, udf_template.environment
                 )
 
+                # Per-UDF impl mode defaults (aligned with sketch-core config)
+                UDF_IMPL_DEFAULTS = {
+                    "countminsketch_count": "Sketchlib",
+                    "countminsketch_sum": "Sketchlib",
+                    "countminsketchwithheap_topk": "Sketchlib",
+                    "datasketcheskll_": "Sketchlib",
+                    "hydrakll_": "Sketchlib",
+                }
+                params.setdefault(
+                    "impl_mode", UDF_IMPL_DEFAULTS.get(udf_name, "Sketchlib")
+                )
+
                 # Handle config key mapping (K -> k for KLL)
                 if "K" in params and "k" in required_params:
                     params["k"] = params["K"]
diff --git a/asap-summary-ingest/templates/udfs/datasketcheskll_.rs.j2 b/asap-summary-ingest/templates/udfs/datasketcheskll_.rs.j2
index d95f3b1..d5d51dc 100644
--- a/asap-summary-ingest/templates/udfs/datasketcheskll_.rs.j2
+++ b/asap-summary-ingest/templates/udfs/datasketcheskll_.rs.j2
@@ -21,7 +21,7 @@ enum ImplMode {
     Sketchlib,
 }
 
-const IMPL_MODE: ImplMode = ImplMode::Sketchlib;
+const IMPL_MODE: ImplMode = ImplMode::{{ impl_mode | default("Sketchlib") }};
 
 fn use_sketchlib_for_kll() -> bool {
     matches!(IMPL_MODE, ImplMode::Sketchlib)
diff --git a/asap-summary-ingest/templates/udfs/hydrakll_.rs.j2 b/asap-summary-ingest/templates/udfs/hydrakll_.rs.j2
index 94f4eb3..40cd128 100644
--- a/asap-summary-ingest/templates/udfs/hydrakll_.rs.j2
+++ b/asap-summary-ingest/templates/udfs/hydrakll_.rs.j2
@@ -25,7 +25,7 @@ enum ImplMode {
     Sketchlib,
 }
 
-const IMPL_MODE: ImplMode = ImplMode::Sketchlib;
+const IMPL_MODE: ImplMode = ImplMode::{{ impl_mode | default("Sketchlib") }};
 
 fn use_sketchlib_for_kll() -> bool {
     matches!(IMPL_MODE, ImplMode::Sketchlib)

From 13a597873cc493fe8880064a0db5ecf8496cd096 Mon Sep 17 00:00:00 2001
From: Gnanesh <gnanesh.dometti@gmail.com>
Date: Fri, 20 Mar 2026 15:13:58 -0400
Subject: [PATCH 4/6] UDFs: use same impl mode as QueryEngine (sketch_cms_impl,
 etc.)

---
 asap-summary-ingest/run_arroyosketch.py       | 41 ++++++++++++++++++-
 .../experiment_utils/services/arroyo.py       | 15 ++++++-
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/asap-summary-ingest/run_arroyosketch.py b/asap-summary-ingest/run_arroyosketch.py
index 3769723..dabf91c 100644
--- a/asap-summary-ingest/run_arroyosketch.py
+++ b/asap-summary-ingest/run_arroyosketch.py
@@ -380,6 +380,16 @@ def delete_connection_table(args, table_name):
         )
 
 
+# Map UDF names to the sketch impl CLI arg. UDFs use same impl mode as QueryEngine.
+_UDF_IMPL_MODE_ARG = {
+    "countminsketch_count": "sketch_cms_impl",
+    "countminsketch_sum": "sketch_cms_impl",
+    "countminsketchwithheap_topk": "sketch_cmwh_impl",
+    "datasketcheskll_": "sketch_kll_impl",
+    "hydrakll_": "sketch_kll_impl",
+}
+
+
 def create_pipeline(
     args: argparse.Namespace,
     sql_queries: List[str],
@@ -444,7 +454,13 @@ def create_pipeline(
             regular_path = os.path.join(udf_dir, f"{udf_name}.rs")
 
             # Get parameters for this UDF
-            params = agg_function_params.get(udf_name, {})
+            params = dict(agg_function_params.get(udf_name, {}))
+
+            # Inject impl_mode from CLI so UDFs use same backend as QueryEngine
+            impl_arg = _UDF_IMPL_MODE_ARG.get(udf_name)
+            if impl_arg:
+                impl_val = getattr(args, impl_arg, "legacy")
+                params["impl_mode"] = impl_val.capitalize()  # "Legacy" or "Sketchlib"
 
             if len(params) > 0 and not os.path.exists(template_path):
                 raise ValueError(
@@ -1112,6 +1128,29 @@ def main(args):
         help="Query language for schema interpretation (default: promql)",
     )
 
+    # Sketch implementation mode - must match QueryEngine (--sketch-cms-impl etc.)
+    parser.add_argument(
+        "--sketch_cms_impl",
+        type=str,
+        choices=["legacy", "sketchlib"],
+        default="legacy",
+        help="Count-Min Sketch backend (legacy | sketchlib). Must match QueryEngine.",
+    )
+    parser.add_argument(
+        "--sketch_kll_impl",
+        type=str,
+        choices=["legacy", "sketchlib"],
+        default="legacy",
+        help="KLL Sketch backend (legacy | sketchlib). Must match QueryEngine.",
+    )
+    parser.add_argument(
+        "--sketch_cmwh_impl",
+        type=str,
+        choices=["legacy", "sketchlib"],
+        default="legacy",
+        help="Count-Min-With-Heap backend (legacy | sketchlib). Must match QueryEngine.",
+    )
+
     args = parser.parse_args()
     check_args(args)
     main(args)
diff --git a/asap-tools/experiments/experiment_utils/services/arroyo.py b/asap-tools/experiments/experiment_utils/services/arroyo.py
index e329a80..db9e9fc 100644
--- a/asap-tools/experiments/experiment_utils/services/arroyo.py
+++ b/asap-tools/experiments/experiment_utils/services/arroyo.py
@@ -105,6 +105,9 @@ def run_arroyosketch(
         use_kafka_ingest: bool = False,
         enable_optimized_remote_write: bool = False,
         avoid_long_ssh: bool = False,
+        sketch_cms_impl: str = "legacy",
+        sketch_kll_impl: str = "legacy",
+        sketch_cmwh_impl: str = "legacy",
     ) -> str:
         """
         Run ArroyoSketch pipeline.
@@ -122,6 +125,9 @@ def run_arroyosketch(
             parallelism: Pipeline parallelism
             enable_optimized_remote_write: If True, use optimized Prometheus remote_write source (10-20x faster)
             avoid_long_ssh: If True, run command in background to avoid long SSH connections
+            sketch_cms_impl: Count-Min Sketch backend (legacy|sketchlib). Must match QueryEngine.
+            sketch_kll_impl: KLL Sketch backend (legacy|sketchlib). Must match QueryEngine.
+            sketch_cmwh_impl: Count-Min-With-Heap backend (legacy|sketchlib). Must match QueryEngine.
 
         Returns:
             Pipeline ID
@@ -134,7 +140,7 @@ def run_arroyosketch(
         )
 
         if use_kafka_ingest:
-            cmd = "python run_arroyosketch.py --source_type kafka --kafka_input_format {} --output_format {} --pipeline_name {} --config_file_path {}/streaming_config.yaml  --input_kafka_topic {} --output_kafka_topic {} --output_dir {}".format(
+            cmd = "python run_arroyosketch.py --source_type kafka --kafka_input_format {} --output_format {} --pipeline_name {} --config_file_path {}/streaming_config.yaml  --input_kafka_topic {} --output_kafka_topic {} --output_dir {} --sketch_cms_impl {} --sketch_kll_impl {} --sketch_cmwh_impl {}".format(
                 flink_input_format,
                 flink_output_format,
                 experiment_name,
@@ -142,6 +148,9 @@ def run_arroyosketch(
                 constants.FLINK_INPUT_TOPIC,
                 constants.FLINK_OUTPUT_TOPIC,
                 arroyosketch_output_dir,
+                sketch_cms_impl,
+                sketch_kll_impl,
+                sketch_cmwh_impl,
             )
         else:
             # Build base command for Prometheus remote write
@@ -159,6 +168,10 @@ def run_arroyosketch(
             # Add optimized source flag if enabled
             if enable_optimized_remote_write:
                 cmd += " --prometheus_remote_write_source optimized"
+        # Sketch impl mode - must match QueryEngine
+        cmd += " --sketch_cms_impl {} --sketch_kll_impl {} --sketch_cmwh_impl {}".format(
+            sketch_cms_impl, sketch_kll_impl, sketch_cmwh_impl
+        )
         cmd_dir = os.path.join(
             constants.CLOUDLAB_HOME_DIR, "code", "asap-summary-ingest"
         )

From a881811bfb137eb7185afccf098906e9b9b255ad Mon Sep 17 00:00:00 2001
From: Gnanesh <gnanesh.dometti@gmail.com>
Date: Fri, 20 Mar 2026 15:30:18 -0400
Subject: [PATCH 5/6] Simplify UDF impl mode, default all to sketchlib

---
 asap-summary-ingest/run_arroyosketch.py       | 50 +++++++------------
 .../experiment_utils/services/arroyo.py       |  6 +--
 2 files changed, 21 insertions(+), 35 deletions(-)

diff --git a/asap-summary-ingest/run_arroyosketch.py b/asap-summary-ingest/run_arroyosketch.py
index dabf91c..4eac982 100644
--- a/asap-summary-ingest/run_arroyosketch.py
+++ b/asap-summary-ingest/run_arroyosketch.py
@@ -380,16 +380,6 @@ def delete_connection_table(args, table_name):
         )
 
 
-# Map UDF names to the sketch impl CLI arg. UDFs use same impl mode as QueryEngine.
-_UDF_IMPL_MODE_ARG = {
-    "countminsketch_count": "sketch_cms_impl",
-    "countminsketch_sum": "sketch_cms_impl",
-    "countminsketchwithheap_topk": "sketch_cmwh_impl",
-    "datasketcheskll_": "sketch_kll_impl",
-    "hydrakll_": "sketch_kll_impl",
-}
-
-
 def create_pipeline(
     args: argparse.Namespace,
     sql_queries: List[str],
@@ -453,15 +443,9 @@ def create_pipeline(
             template_path = os.path.join(udf_dir, f"{udf_name}.rs.j2")
             regular_path = os.path.join(udf_dir, f"{udf_name}.rs")
 
-            # Get parameters for this UDF
+            # Get parameters for this UDF (impl_mode injected in main() for sketch UDFs)
             params = dict(agg_function_params.get(udf_name, {}))
 
-            # Inject impl_mode from CLI so UDFs use same backend as QueryEngine
-            impl_arg = _UDF_IMPL_MODE_ARG.get(udf_name)
-            if impl_arg:
-                impl_val = getattr(args, impl_arg, "legacy")
-                params["impl_mode"] = impl_val.capitalize()  # "Legacy" or "Sketchlib"
-
             if len(params) > 0 and not os.path.exists(template_path):
                 raise ValueError(
                     f"UDF {udf_name} requires parameters {params} but no template found at {template_path}"
@@ -480,18 +464,6 @@ def create_pipeline(
                     template_source, udf_template.environment
                 )
 
-                # Per-UDF impl mode defaults (aligned with sketch-core config)
-                UDF_IMPL_DEFAULTS = {
-                    "countminsketch_count": "Sketchlib",
-                    "countminsketch_sum": "Sketchlib",
-                    "countminsketchwithheap_topk": "Sketchlib",
-                    "datasketcheskll_": "Sketchlib",
-                    "hydrakll_": "Sketchlib",
-                }
-                params.setdefault(
-                    "impl_mode", UDF_IMPL_DEFAULTS.get(udf_name, "Sketchlib")
-                )
-
                 # Handle config key mapping (K -> k for KLL)
                 if "K" in params and "k" in required_params:
                     params["k"] = params["K"]
@@ -974,6 +946,20 @@ def main(args):
             filter_metric_name,
         )
 
+        parameters = dict(parameters)
+        if agg_function in ("countminsketch_count", "countminsketch_sum"):
+            parameters["impl_mode"] = getattr(
+                args, "sketch_cms_impl", "legacy"
+            ).capitalize()
+        elif agg_function == "countminsketchwithheap_topk":
+            parameters["impl_mode"] = getattr(
+                args, "sketch_cmwh_impl", "legacy"
+            ).capitalize()
+        elif agg_function in ("datasketcheskll_", "hydrakll_"):
+            parameters["impl_mode"] = getattr(
+                args, "sketch_kll_impl", "legacy"
+            ).capitalize()
+
         sql_queries.append(sql_query)
         # if not is_labels_accumulator:
         agg_functions_with_params.append((agg_function, parameters))
@@ -1133,21 +1119,21 @@ def main(args):
         "--sketch_cms_impl",
         type=str,
         choices=["legacy", "sketchlib"],
-        default="legacy",
+        default="sketchlib",
         help="Count-Min Sketch backend (legacy | sketchlib). Must match QueryEngine.",
     )
     parser.add_argument(
         "--sketch_kll_impl",
         type=str,
         choices=["legacy", "sketchlib"],
-        default="legacy",
+        default="sketchlib",
         help="KLL Sketch backend (legacy | sketchlib). Must match QueryEngine.",
     )
     parser.add_argument(
         "--sketch_cmwh_impl",
         type=str,
         choices=["legacy", "sketchlib"],
-        default="legacy",
+        default="sketchlib",
         help="Count-Min-With-Heap backend (legacy | sketchlib). Must match QueryEngine.",
     )
 
diff --git a/asap-tools/experiments/experiment_utils/services/arroyo.py b/asap-tools/experiments/experiment_utils/services/arroyo.py
index db9e9fc..c4658c3 100644
--- a/asap-tools/experiments/experiment_utils/services/arroyo.py
+++ b/asap-tools/experiments/experiment_utils/services/arroyo.py
@@ -105,9 +105,9 @@ def run_arroyosketch(
         use_kafka_ingest: bool = False,
         enable_optimized_remote_write: bool = False,
         avoid_long_ssh: bool = False,
-        sketch_cms_impl: str = "legacy",
-        sketch_kll_impl: str = "legacy",
-        sketch_cmwh_impl: str = "legacy",
+        sketch_cms_impl: str = "sketchlib",
+        sketch_kll_impl: str = "sketchlib",
+        sketch_cmwh_impl: str = "sketchlib",
     ) -> str:
         """
         Run ArroyoSketch pipeline.

From 0db700d5c78e1641488249e3420023e6119c5ca2 Mon Sep 17 00:00:00 2001
From: Gnanesh <gnanesh.dometti@gmail.com>
Date: Fri, 20 Mar 2026 15:32:30 -0400
Subject: [PATCH 6/6] Fix black formatting in arroyo.py

---
 asap-tools/experiments/experiment_utils/services/arroyo.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/asap-tools/experiments/experiment_utils/services/arroyo.py b/asap-tools/experiments/experiment_utils/services/arroyo.py
index c4658c3..80f9b64 100644
--- a/asap-tools/experiments/experiment_utils/services/arroyo.py
+++ b/asap-tools/experiments/experiment_utils/services/arroyo.py
@@ -169,8 +169,10 @@ def run_arroyosketch(
             if enable_optimized_remote_write:
                 cmd += " --prometheus_remote_write_source optimized"
         # Sketch impl mode - must match QueryEngine
-        cmd += " --sketch_cms_impl {} --sketch_kll_impl {} --sketch_cmwh_impl {}".format(
-            sketch_cms_impl, sketch_kll_impl, sketch_cmwh_impl
+        cmd += (
+            " --sketch_cms_impl {} --sketch_kll_impl {} --sketch_cmwh_impl {}".format(
+                sketch_cms_impl, sketch_kll_impl, sketch_cmwh_impl
+            )
         )
         cmd_dir = os.path.join(
             constants.CLOUDLAB_HOME_DIR, "code", "asap-summary-ingest"