Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 62 additions & 7 deletions asap-common/sketch-core/report.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
# Sketchlib Fidelity Report
# Report

Compares the **legacy** Count-Min Sketch implementation in `sketch-core` vs the new **sketchlib-rust** backend.
Compares the **legacy** sketch implementations in `sketch-core` vs the **sketchlib-rust** backends (Count-Min Sketch, Count-Min-With-Heap, KLL, HydraKLL).

## Fidelity harness

The fidelity binary selects backends via CLI flags.
The fidelity binary selects backends via CLI flags (`--cms-impl`, `--kll-impl`, `--cmwh-impl`).

| Goal | Command |
|-------------|---------------------------------------------------------------|
| CMS sketchlib | `cargo run -p sketch-core --bin sketchlib_fidelity -- --cms-impl sketchlib` |
| CMS legacy | `cargo run -p sketch-core --bin sketchlib_fidelity -- --cms-impl legacy` |
| Goal | Command |
|--------------------------|--------------------------------------------------------------------------------------------------------------|
| Default (all sketchlib) | `cargo run -p sketch-core --bin sketchlib_fidelity` |
| All legacy | `cargo run -p sketch-core --bin sketchlib_fidelity -- --cms-impl legacy --kll-impl legacy --cmwh-impl legacy` |
| Legacy KLL only | `cargo run -p sketch-core --bin sketchlib_fidelity -- --cms-impl sketchlib --kll-impl legacy --cmwh-impl sketchlib` |
| CMS sketchlib only | `cargo run -p sketch-core --bin sketchlib_fidelity -- --cms-impl sketchlib` |
| CMS legacy only | `cargo run -p sketch-core --bin sketchlib_fidelity -- --cms-impl legacy` |

## Unit tests

Expand Down Expand Up @@ -68,3 +71,55 @@ The heap is maintained by local updates; recall is measured against the **true**
| 2048 | 200000 | 2000 | 20 | sketchlib-rust | 1.00 | 0.9982 | 0.021 | 0.067 |
| 2048 | 200000 | 2000 | 50 | Legacy | 0.40 | 0.9999983 | 5.60 | 16.49 |
| 2048 | 200000 | 2000 | 50 | sketchlib-rust | 0.48 | 0.9999990 | 3.90 | 12.95 |

---

### KllSketch (quantiles, absolute rank error)

For each quantile \(q\), we compute the sketch estimate `est_value`, then:
`abs_rank_error = |rank_fraction(exact_sorted_values, est_value) - q|`.

#### k=20

| n_updates | Mode | q=0.5 | q=0.9 | q=0.99 |
|-----------|----------------|---------|---------|---------|
| 200000 | Legacy | 0.0104 | 0.0145 | 0.0028 |
| 200000 | sketchlib-rust | 0.0275 | 0.0470 | 0.0061 |
| 50000 | Legacy | 0.0131 | 0.0091 | 0.0054 |
| 50000 | sketchlib-rust | 0.0110 | 0.0116 | 0.0031 |

#### k=50

| n_updates | Mode | q=0.5 | q=0.9 | q=0.99 |
|-----------|----------------|---------|---------|---------|
| 200000 | Legacy | 0.0013 | 0.0021 | 0.0012 |
| 200000 | sketchlib-rust | 0.0101 | 0.0044 | 0.0074 |

#### k=200

| n_updates | Mode | q=0.5 | q=0.9 | q=0.99 |
|-----------|----------------|---------|---------|---------|
| 200000 | Legacy | 0.0021 | 0.0036 | 0.0000 |
| 200000 | sketchlib-rust | 0.0015 | 0.0001 | 0.0002 |

---

### HydraKllSketch (per-key quantiles, mean/max absolute rank error across 50 keys)

#### rows=2, cols=64

| k | n | domain | Mode | q=0.5 (mean / max) | q=0.9 (mean / max) |
|-----|--------|--------|----------------|--------------------|--------------------|
| 20 | 200000 | 200 | Legacy | 0.0170 / 0.0546 | 0.0165 / 0.0452 |
| 20 | 200000 | 200 | sketchlib-rust | 0.0254 / 0.0629 | 0.0546 / 0.0942 |

#### rows=3, cols=128

| k | n | domain | Mode | q=0.5 (mean / max) | q=0.9 (mean / max) |
|-----|--------|--------|----------------|--------------------|--------------------|
| 20 | 200000 | 200 | Legacy | 0.0166 / 0.0591 | 0.0114 / 0.0304 |
| 20 | 200000 | 200 | sketchlib-rust | 0.0216 / 0.0534 | 0.0238 / 0.1087 |
| 50 | 200000 | 200 | Legacy | 0.0099 / 0.0352 | 0.0087 / 0.0330 |
| 50 | 200000 | 200 | sketchlib-rust | 0.0119 / 0.0458 | 0.0119 / 0.0296 |
| 20 | 100000 | 100 | Legacy | 0.0141 / 0.0574 | 0.0149 / 0.0471 |
| 20 | 100000 | 100 | sketchlib-rust | 0.0202 / 0.0621 | 0.0287 / 0.0779 |
198 changes: 190 additions & 8 deletions asap-common/sketch-core/src/bin/sketchlib_fidelity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ use clap::Parser;
use sketch_core::config::{self, ImplMode};
use sketch_core::count_min::CountMinSketch;
use sketch_core::count_min_with_heap::CountMinSketchWithHeap;
use sketch_core::hydra_kll::HydraKllSketch;
use sketch_core::kll::KllSketch;

#[derive(Clone)]
struct Lcg64 {
Expand Down Expand Up @@ -93,6 +95,16 @@ fn rmse_percentage(exact: &[f64], est: &[f64]) -> f64 {
(sum_sq / denom).sqrt() * 100.0
}

#[derive(Parser)]
struct Args {
#[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_CMS_IMPL)]
cms_impl: ImplMode,
#[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_KLL_IMPL)]
kll_impl: ImplMode,
#[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_CMWH_IMPL)]
cmwh_impl: ImplMode,
}

fn rank_fraction(sorted: &[f64], x: f64) -> f64 {
if sorted.is_empty() {
return 0.0;
Expand Down Expand Up @@ -210,14 +222,110 @@ fn run_countmin_with_heap_once(seed: u64, p: &CmwhParams) -> CmwhResult {
}
}

#[derive(Parser)]
struct Args {
#[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_CMS_IMPL)]
cms_impl: ImplMode,
#[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_KLL_IMPL)]
kll_impl: ImplMode,
#[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_CMWH_IMPL)]
cmwh_impl: ImplMode,
// --- KllSketch ---

struct KllParams {
k: u16,
n: usize,
}

struct KllResult {
rank_err_50: f64,
rank_err_90: f64,
rank_err_99: f64,
}

fn run_kll_once(seed: u64, p: &KllParams) -> KllResult {
let mut rng = Lcg64::new(seed ^ 0x1234_5678);
let mut values: Vec<f64> = Vec::with_capacity(p.n);
let mut sk = KllSketch::new(p.k);

for _ in 0..p.n {
let v = rng.next_f64_0_1() * 1_000_000.0;
values.push(v);
sk.update(v);
}

values.sort_by(f64::total_cmp);
let qs = [0.5, 0.9, 0.99];
let rank_err = |q: f64| (rank_fraction(&values, sk.get_quantile(q)) - q).abs();

KllResult {
rank_err_50: rank_err(qs[0]),
rank_err_90: rank_err(qs[1]),
rank_err_99: rank_err(qs[2]),
}
}

// --- HydraKllSketch ---

struct HydraKllParams {
rows: usize,
cols: usize,
k: u16,
n: usize,
domain: usize,
eval_keys: usize,
}

struct HydraKllResult {
mean_50: f64,
max_50: f64,
mean_90: f64,
max_90: f64,
}

fn run_hydra_kll_once(seed: u64, p: &HydraKllParams) -> HydraKllResult {
let mut rng = Lcg64::new(seed ^ 0xDEAD_BEEF);
let mut hydra = HydraKllSketch::new(p.rows, p.cols, p.k);
let mut exact: HashMap<String, Vec<f64>> = HashMap::new();

for _ in 0..p.n {
let r = rng.next_u64();
let key_id = if (r & 0xFF) < 200 {
(r as usize) % 20
} else {
(r as usize) % p.domain
};
let key = format!("k{key_id}");
let v = rng.next_f64_0_1() * 1_000_000.0;
hydra.update(&key, v);
exact.entry(key).or_default().push(v);
}

let mut keys: Vec<String> = exact.keys().cloned().collect();
keys.sort();
keys.truncate(p.eval_keys);

let mut mean_50 = 0.0f64;
let mut max_50 = 0.0f64;
let mut mean_90 = 0.0f64;
let mut max_90 = 0.0f64;
let nk = keys.len() as f64;
for key in &keys {
let mut vals = exact.get(key).cloned().unwrap_or_default();
vals.sort_by(f64::total_cmp);
for (q, mean_ref, max_ref) in [
(0.5, &mut mean_50, &mut max_50),
(0.9, &mut mean_90, &mut max_90),
] {
let est = hydra.query(key, q);
let err = (rank_fraction(&vals, est) - q).abs();
*mean_ref += err;
if err > *max_ref {
*max_ref = err;
}
}
}
mean_50 /= nk;
mean_90 /= nk;

HydraKllResult {
mean_50,
max_50,
mean_90,
max_90,
}
}

fn main() {
Expand All @@ -236,6 +344,11 @@ fn main() {
} else {
"sketchlib-rust"
};
let kll_mode = if matches!(args.kll_impl, ImplMode::Legacy) {
"Legacy"
} else {
"sketchlib-rust"
};

// CountMinSketch: multiple (depth, width, n, domain)
let cms_param_sets: Vec<CmsParams> = vec![
Expand Down Expand Up @@ -311,4 +424,73 @@ fn main() {
p.depth, p.width, p.n, p.domain, p.heap_size, r.topk_recall, r.pearson, r.mape, r.rmse
);
}
// KllSketch
let kll_param_sets: Vec<KllParams> = vec![
KllParams { k: 20, n: 200_000 },
KllParams { k: 50, n: 200_000 },
KllParams { k: 200, n: 200_000 },
KllParams { k: 20, n: 50_000 },
];

println!("\n## KllSketch ({kll_mode})");
println!(
"| k | n_updates | q=0.5 abs_rank_error | q=0.9 abs_rank_error | q=0.99 abs_rank_error |"
);
println!(
"|---|-----------|----------------------|----------------------|-----------------------|"
);
for p in &kll_param_sets {
let r = run_kll_once(seed, p);
println!(
"| {} | {} | {:.6} | {:.6} | {:.6} |",
p.k, p.n, r.rank_err_50, r.rank_err_90, r.rank_err_99
);
}

// HydraKllSketch
let hydra_param_sets: Vec<HydraKllParams> = vec![
HydraKllParams {
rows: 2,
cols: 64,
k: 20,
n: 200_000,
domain: 200,
eval_keys: 50,
},
HydraKllParams {
rows: 3,
cols: 128,
k: 20,
n: 200_000,
domain: 200,
eval_keys: 50,
},
HydraKllParams {
rows: 3,
cols: 128,
k: 50,
n: 200_000,
domain: 200,
eval_keys: 50,
},
HydraKllParams {
rows: 3,
cols: 128,
k: 20,
n: 100_000,
domain: 100,
eval_keys: 50,
},
];

println!("\n## HydraKllSketch ({kll_mode})");
println!("| rows | cols | k | n | domain | q=0.5 mean/max | q=0.9 mean/max |");
println!("|------|------|---|-----|--------|----------------|----------------|");
for p in &hydra_param_sets {
let r = run_hydra_kll_once(seed, p);
println!(
"| {} | {} | {} | {} | {} | {:.5} / {:.5} | {:.5} / {:.5} |",
p.rows, p.cols, p.k, p.n, p.domain, r.mean_50, r.max_50, r.mean_90, r.max_90
);
}
}
4 changes: 2 additions & 2 deletions asap-common/sketch-core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ pub enum ImplMode {
}

/// Global default when impl mode is not explicitly configured (e.g. env var parsing).
pub const DEFAULT_IMPL_MODE: ImplMode = ImplMode::Legacy;
pub const DEFAULT_IMPL_MODE: ImplMode = ImplMode::Sketchlib;

/// Per-backend defaults. Used when configure() has not been called.
pub const DEFAULT_CMS_IMPL: ImplMode = ImplMode::Sketchlib;
pub const DEFAULT_KLL_IMPL: ImplMode = ImplMode::Legacy;
pub const DEFAULT_KLL_IMPL: ImplMode = ImplMode::Sketchlib;
pub const DEFAULT_CMWH_IMPL: ImplMode = ImplMode::Sketchlib;

static COUNTMIN_MODE: OnceLock<ImplMode> = OnceLock::new();
Expand Down
Loading
Loading