diff --git a/Cargo.lock b/Cargo.lock index d3bf26c..4de8ab3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1039,6 +1039,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "ctor" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501" +dependencies = [ + "quote", + "syn 2.0.117", +] + [[package]] name = "cxx" version = "1.0.194" @@ -1174,7 +1184,7 @@ dependencies = [ "parquet", "paste", "pin-project-lite", - "rand 0.8.5", + "rand", "sqlparser 0.51.0", "tempfile", "tokio", @@ -1251,7 +1261,7 @@ dependencies = [ "log", "object_store", "parking_lot", - "rand 0.8.5", + "rand", "tempfile", "url", ] @@ -1312,7 +1322,7 @@ dependencies = [ "itertools 0.13.0", "log", "md-5", - "rand 0.8.5", + "rand", "regex", "sha2", "unicode-segmentation", @@ -1351,7 +1361,7 @@ dependencies = [ "datafusion-common", "datafusion-expr-common", "datafusion-physical-expr-common", - "rand 0.8.5", + "rand", ] [[package]] @@ -1374,7 +1384,7 @@ dependencies = [ "itertools 0.13.0", "log", "paste", - "rand 0.8.5", + "rand", ] [[package]] @@ -1461,7 +1471,7 @@ dependencies = [ "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", - "rand 0.8.5", + "rand", ] [[package]] @@ -1511,7 +1521,7 @@ dependencies = [ "once_cell", "parking_lot", "pin-project-lite", - "rand 0.8.5", + "rand", "tokio", ] @@ -1618,17 +1628,6 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" -[[package]] -name = "errno" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" -dependencies = [ - "errno-dragonfly", - "libc", - "winapi", -] - [[package]] name = "errno" version = "0.3.14" @@ -1639,16 +1638,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "errno-dragonfly" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" -dependencies = [ - "cc", - "libc", -] - [[package]] name = "fallible-iterator" version = "0.3.0" @@ -2566,16 +2555,6 @@ version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" -[[package]] -name = "libloading" -version = "0.6.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "351a32417a12d5f7e82c368a66781e307834dae04c6ce0cd4456d52989229883" -dependencies = [ - "cfg-if", - "winapi", -] - [[package]] name = "libm" version = "0.2.16" @@ -3067,7 +3046,7 @@ dependencies = [ "glob", "opentelemetry", "percent-encoding", - "rand 0.8.5", + "rand", "serde_json", "thiserror 2.0.18", ] @@ -3167,21 +3146,6 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" -[[package]] -name = "pcap" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99e935fc73d54a89fff576526c2ccd42bbf8247aae05b358693475b14fd4ff79" -dependencies = [ - "bitflags 1.3.2", - "errno 0.2.8", - "libc", - "libloading", - "pkg-config", - "regex", - "windows-sys 0.36.1", -] - [[package]] name = "percent-encoding" version = "2.3.2" @@ -3482,6 +3446,7 @@ dependencies = [ "bincode", "chrono", "clap 4.5.60", + "ctor", "criterion", "dashmap 5.5.3", "datafusion", @@ -3507,7 +3472,6 @@ dependencies = [ "serde_yaml", "sketch-core", "sketch_db_common", - "sketchlib-rust", "snap", "sql_utilities", "sqlparser 0.59.0", @@ -3554,18 +3518,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - -[[package]] -name = "rand" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" -dependencies = [ - "rand_chacha 0.9.0", - "rand_core 0.9.5", + "rand_chacha", + "rand_core", ] [[package]] @@ -3575,17 +3529,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_chacha" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" -dependencies = [ - "ppv-lite86", - "rand_core 0.9.5", + "rand_core", ] [[package]] @@ -3818,7 +3762,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ "bitflags 2.11.0", - "errno 0.3.14", + "errno", "libc", "linux-raw-sys", "windows-sys 0.61.2", @@ -3920,15 +3864,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "serde-big-array" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11fc7cc2c76d73e0f27ee52abbd64eec84d46f370c88371120433196934e4b7f" -dependencies = [ - "serde", -] - [[package]] name = "serde_core" version = "1.0.228" @@ -4030,7 +3965,7 @@ version = "1.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" dependencies = [ - "errno 0.3.14", + "errno", "libc", ] @@ -4050,6 +3985,8 @@ checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" name = "sketch-core" version = "0.1.0" dependencies = [ + "clap 4.5.60", + "ctor", "dsrs", "rmp-serde", "serde", @@ -4069,22 +4006,6 @@ dependencies = [ "sql_utilities", ] -[[package]] -name = "sketchlib-rust" -version = "0.1.0" -source = "git+https://github.com/ProjectASAP/sketchlib-rust#a729288270cc8f74a4ac9451e5c63cd9c693668c" -dependencies = [ - "ahash", - "clap 4.5.60", - "pcap", - "rand 0.9.2", - "rmp-serde", - "serde", - "serde-big-array", - "smallvec", - "twox-hash 2.1.2", -] - [[package]] name = "slab" version = "0.4.12" @@ -4655,7 +4576,7 @@ dependencies = [ "indexmap 1.9.3", "pin-project", "pin-project-lite", - "rand 0.8.5", + "rand", "slab", "tokio", "tokio-util", @@ -4787,9 +4708,6 @@ name = "twox-hash" version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" -dependencies = [ - "rand 0.9.2", -] [[package]] name = "typenum" @@ -5155,19 +5073,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-sys" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" -dependencies = [ - "windows_aarch64_msvc 0.36.1", - "windows_i686_gnu 0.36.1", - "windows_i686_msvc 0.36.1", - "windows_x86_64_gnu 0.36.1", - "windows_x86_64_msvc 0.36.1", -] - [[package]] name = "windows-sys" version = "0.48.0" @@ -5247,12 +5152,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" - [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -5265,12 +5164,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_i686_gnu" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" - [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -5289,12 +5182,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" - [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -5307,12 +5194,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_x86_64_gnu" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" - [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -5337,12 +5218,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" - [[package]] name = "windows_x86_64_msvc" version = "0.48.5" diff --git a/asap-common/sketch-core/Cargo.toml b/asap-common/sketch-core/Cargo.toml index 2dbea8b..2836548 100644 --- a/asap-common/sketch-core/Cargo.toml +++ b/asap-common/sketch-core/Cargo.toml @@ -9,3 +9,7 @@ serde.workspace = true rmp-serde = "1.1" xxhash-rust = { version = "0.8", features = ["xxh32"] } dsrs = { git = "https://github.com/ProjectASAP/datasketches-rs" } +clap = { version = "4.0", features = ["derive"] } + +[dev-dependencies] +ctor = "0.2" diff --git a/asap-common/sketch-core/report.md b/asap-common/sketch-core/report.md new file mode 100644 index 0000000..a8c31c6 --- /dev/null +++ b/asap-common/sketch-core/report.md @@ -0,0 +1,31 @@ +# Sketchlib Fidelity Report + +Compares the **legacy** sketch implementations in `sketch-core` vs the new **sketchlib-rust** backends for: + +- `CountMinSketch` +- `CountMinSketchWithHeap` (Count-Min portion) +- `KllSketch` +- `HydraKllSketch` (via `KllSketch`) + +## Running Fidelity Tests + +The fidelity binary selects backends via CLI flags instead of environment variables. + +| Goal | Command | +|--------------------------|--------------------------------------------------------------------------------------------------------------| +| Default (all sketchlib) | `cargo run -p sketch-core --bin sketchlib_fidelity` | +| All legacy | `cargo run -p sketch-core --bin sketchlib_fidelity -- --cms-impl legacy --kll-impl legacy --cmwh-impl legacy` | +| Legacy KLL only | `cargo run -p sketch-core --bin sketchlib_fidelity -- --cms-impl sketchlib --kll-impl legacy --cmwh-impl sketchlib` | + +## Unit Tests + +Unit tests always run with **legacy** backends enabled (the test ctor calls +`force_legacy_mode_for_tests()`), so you only need: + +```bash +cargo test -p sketch-core +``` + +## Results + +Fidelity results will be added as sketch implementations are integrated in subsequent PRs. diff --git a/asap-common/sketch-core/src/bin/sketchlib_fidelity.rs b/asap-common/sketch-core/src/bin/sketchlib_fidelity.rs new file mode 100644 index 0000000..cff9ac1 --- /dev/null +++ b/asap-common/sketch-core/src/bin/sketchlib_fidelity.rs @@ -0,0 +1,127 @@ +// Scaffold for fidelity benchmarks; helpers used in later PRs when sketch types are integrated. +#![allow(dead_code)] + +use clap::Parser; +use sketch_core::config::{self, ImplMode}; + +#[derive(Clone)] +struct Lcg64 { + state: u64, +} + +impl Lcg64 { + fn new(seed: u64) -> Self { + Self { state: seed } + } + + fn next_u64(&mut self) -> u64 { + self.state = self + .state + .wrapping_mul(6364136223846793005) + .wrapping_add(1442695040888963407); + self.state + } + + fn next_f64_0_1(&mut self) -> f64 { + let x = self.next_u64() >> 11; + (x as f64) / ((1u64 << 53) as f64) + } +} + +fn pearson_corr(exact: &[f64], est: &[f64]) -> f64 { + let n = exact.len().min(est.len()); + if n == 0 { + return f64::NAN; + } + let (mut sum_x, mut sum_y) = (0.0, 0.0); + for i in 0..n { + sum_x += exact[i]; + sum_y += est[i]; + } + let mean_x = sum_x / (n as f64); + let mean_y = sum_y / (n as f64); + let (mut num, mut den_x, mut den_y) = (0.0, 0.0, 0.0); + for i in 0..n { + let dx = exact[i] - mean_x; + let dy = est[i] - mean_y; + num += dx * dy; + den_x += dx * dx; + den_y += dy * dy; + } + if den_x == 0.0 || den_y == 0.0 { + return f64::NAN; + } + num / (den_x.sqrt() * den_y.sqrt()) +} + +fn mape(exact: &[f64], est: &[f64]) -> f64 { + let n = exact.len().min(est.len()); + let mut num = 0.0; + let mut denom = 0.0; + for i in 0..n { + if exact[i] == 0.0 { + continue; + } + num += ((exact[i] - est[i]) / exact[i]).abs(); + denom += 1.0; + } + if denom == 0.0 { + return if exact == est { 0.0 } else { f64::INFINITY }; + } + (num / denom) * 100.0 +} + +fn rmse_percentage(exact: &[f64], est: &[f64]) -> f64 { + let n = exact.len().min(est.len()); + let mut sum_sq = 0.0; + let mut denom = 0.0; + for i in 0..n { + if exact[i] == 0.0 { + continue; + } + let rel = (exact[i] - est[i]) / exact[i]; + sum_sq += rel * rel; + denom += 1.0; + } + if denom == 0.0 { + return if exact == est { 0.0 } else { f64::INFINITY }; + } + (sum_sq / denom).sqrt() * 100.0 +} + +fn rank_fraction(sorted: &[f64], x: f64) -> f64 { + if sorted.is_empty() { + return 0.0; + } + let idx = sorted.partition_point(|v| *v <= x); + (idx as f64) / (sorted.len() as f64) +} + +#[derive(Parser)] +struct Args { + #[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_IMPL_MODE)] + cms_impl: ImplMode, + #[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_IMPL_MODE)] + kll_impl: ImplMode, + #[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_IMPL_MODE)] + cmwh_impl: ImplMode, +} + +fn main() { + let args = Args::parse(); + config::configure(args.cms_impl, args.kll_impl, args.cmwh_impl) + .expect("sketch backend already initialised"); + + let mode = if matches!(args.cms_impl, ImplMode::Legacy) + || matches!(args.kll_impl, ImplMode::Legacy) + || matches!(args.cmwh_impl, ImplMode::Legacy) + { + "Legacy" + } else { + "sketchlib-rust" + }; + + println!("# Sketchlib Fidelity Report ({})", mode); + println!(); + println!("Fidelity tests will be added as sketch implementations are integrated."); +} diff --git a/asap-common/sketch-core/src/config.rs b/asap-common/sketch-core/src/config.rs new file mode 100644 index 0000000..0812b3e --- /dev/null +++ b/asap-common/sketch-core/src/config.rs @@ -0,0 +1,79 @@ +use std::sync::OnceLock; + +/// Implementation mode for sketch-core internals. +#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)] +pub enum ImplMode { + /// Use the original hand-written implementations. + Legacy, + /// Use sketchlib-rust backed implementations. + Sketchlib, +} + +/// Default backend when not explicitly configured (e.g. for binaries that don't pass CLI args). +/// Single source of truth for sketch backend defaults. +pub const DEFAULT_IMPL_MODE: ImplMode = ImplMode::Legacy; + +static COUNTMIN_MODE: OnceLock = OnceLock::new(); + +/// Returns true if Count-Min operations should use sketchlib-rust internally. +pub fn use_sketchlib_for_count_min() -> bool { + *COUNTMIN_MODE.get_or_init(|| DEFAULT_IMPL_MODE) == ImplMode::Sketchlib +} + +static KLL_MODE: OnceLock = OnceLock::new(); + +/// Returns true if KLL operations should use sketchlib-rust internally. +pub fn use_sketchlib_for_kll() -> bool { + *KLL_MODE.get_or_init(|| DEFAULT_IMPL_MODE) == ImplMode::Sketchlib +} + +static COUNTMIN_WITH_HEAP_MODE: OnceLock = OnceLock::new(); + +/// Returns true if Count-Min-With-Heap operations should use sketchlib-rust internally for the +/// Count-Min portion. +pub fn use_sketchlib_for_count_min_with_heap() -> bool { + *COUNTMIN_WITH_HEAP_MODE.get_or_init(|| DEFAULT_IMPL_MODE) == ImplMode::Sketchlib +} + +/// Set backend modes for all sketch types. Call once at process startup, +/// before any sketch operation. Returns Err if any OnceLock was already set. +pub fn configure(cms: ImplMode, kll: ImplMode, cmwh: ImplMode) -> Result<(), &'static str> { + let a = COUNTMIN_MODE.set(cms); + let b = KLL_MODE.set(kll); + let c = COUNTMIN_WITH_HEAP_MODE.set(cmwh); + if a.is_err() || b.is_err() || c.is_err() { + Err("configure() called after sketch backends were already initialised") + } else { + Ok(()) + } +} + +pub fn force_legacy_mode_for_tests() { + let _ = COUNTMIN_MODE.set(ImplMode::Legacy); + let _ = KLL_MODE.set(ImplMode::Legacy); + let _ = COUNTMIN_WITH_HEAP_MODE.set(ImplMode::Legacy); +} + +/// Helper used by UDF templates and documentation examples to parse implementation mode +/// from environment variables in a robust way. This is not used in the hot path. +pub fn parse_mode(var: Result) -> ImplMode { + match var { + Ok(v) => match v.to_ascii_lowercase().as_str() { + "legacy" => ImplMode::Legacy, + "sketchlib" => ImplMode::Sketchlib, + other => { + eprintln!( + "sketch-core: unrecognised IMPL value {other:?}, defaulting to {DEFAULT_IMPL_MODE:?}" + ); + DEFAULT_IMPL_MODE + } + }, + Err(std::env::VarError::NotPresent) => DEFAULT_IMPL_MODE, + Err(std::env::VarError::NotUnicode(v)) => { + eprintln!( + "sketch-core: IMPL env var has invalid UTF-8 ({v:?}), defaulting to {DEFAULT_IMPL_MODE:?}" + ); + DEFAULT_IMPL_MODE + } + } +} diff --git a/asap-common/sketch-core/src/lib.rs b/asap-common/sketch-core/src/lib.rs index 461d43e..7d92ad1 100644 --- a/asap-common/sketch-core/src/lib.rs +++ b/asap-common/sketch-core/src/lib.rs @@ -1,3 +1,10 @@ +#[cfg(test)] +#[ctor::ctor] +fn init_sketch_legacy_for_tests() { + crate::config::force_legacy_mode_for_tests(); +} + +pub mod config; pub mod count_min; pub mod count_min_with_heap; pub mod delta_set_aggregator; diff --git a/asap-query-engine/Cargo.toml b/asap-query-engine/Cargo.toml index 07ef76d..d10eb5e 100644 --- a/asap-query-engine/Cargo.toml +++ b/asap-query-engine/Cargo.toml @@ -39,7 +39,6 @@ flate2 = "1.0" async-trait = "0.1" xxhash-rust = { version = "0.8", features = ["xxh32", "xxh64"] } dsrs = { git = "https://github.com/ProjectASAP/datasketches-rs" } -sketchlib-rust = { git = "https://github.com/ProjectASAP/sketchlib-rust" } base64 = "0.21" hex = "0.4" sqlparser = "0.59.0" @@ -59,6 +58,7 @@ reqwest = { version = "0.11", features = ["json"] } tracing-appender = "0.2" [dev-dependencies] +ctor = "0.2" tempfile = "3.20.0" criterion = { version = "0.5", features = ["html_reports"] } @@ -73,3 +73,4 @@ default = [] lock_profiling = [] # Enable extra debugging output extra_debugging = [] +sketchlib-tests = [] diff --git a/asap-query-engine/src/lib.rs b/asap-query-engine/src/lib.rs index 22295ed..a51c620 100644 --- a/asap-query-engine/src/lib.rs +++ b/asap-query-engine/src/lib.rs @@ -1,3 +1,16 @@ +#[cfg(test)] +#[ctor::ctor] +fn init_sketch_backend_for_tests() { + #[cfg(feature = "sketchlib-tests")] + let _ = sketch_core::config::configure( + sketch_core::config::ImplMode::Sketchlib, + sketch_core::config::ImplMode::Sketchlib, + sketch_core::config::ImplMode::Sketchlib, + ); + #[cfg(not(feature = "sketchlib-tests"))] + sketch_core::config::force_legacy_mode_for_tests(); +} + pub mod data_model; pub mod drivers; pub mod engines; diff --git a/asap-query-engine/src/main.rs b/asap-query-engine/src/main.rs index a950fba..3e437aa 100644 --- a/asap-query-engine/src/main.rs +++ b/asap-query-engine/src/main.rs @@ -5,6 +5,8 @@ use std::sync::Arc; use tokio::signal; use tracing::{error, info}; +use sketch_core::config::{self, ImplMode}; + use query_engine_rust::data_model::enums::{InputFormat, LockStrategy, StreamingEngine}; use query_engine_rust::drivers::AdapterConfig; use query_engine_rust::utils::file_io::{read_inference_config, read_streaming_config}; @@ -108,6 +110,18 @@ struct Args { #[arg(long)] promsketch_config: Option, + /// Backend implementation for Count-Min Sketch (legacy | sketchlib) + #[arg(long, value_enum, default_value_t = config::DEFAULT_IMPL_MODE)] + sketch_cms_impl: ImplMode, + + /// Backend implementation for KLL Sketch (legacy | sketchlib) + #[arg(long, value_enum, default_value_t = config::DEFAULT_IMPL_MODE)] + sketch_kll_impl: ImplMode, + + /// Backend implementation for Count-Min-With-Heap (legacy | sketchlib) + #[arg(long, value_enum, default_value_t = config::DEFAULT_IMPL_MODE)] + sketch_cmwh_impl: ImplMode, + /// Enable OTLP metrics ingest (gRPC + HTTP) #[arg(long)] enable_otel_ingest: bool, @@ -125,6 +139,14 @@ struct Args { async fn main() -> Result<()> { let args = Args::parse(); + // Configure sketch-core backends before any sketch operations. + config::configure( + args.sketch_cms_impl, + args.sketch_kll_impl, + args.sketch_cmwh_impl, + ) + .expect("sketch backend already initialised"); + // Create output directory fs::create_dir_all(&args.output_dir)?; diff --git a/asap-query-engine/tests/test_both_backends.rs b/asap-query-engine/tests/test_both_backends.rs new file mode 100644 index 0000000..5643756 --- /dev/null +++ b/asap-query-engine/tests/test_both_backends.rs @@ -0,0 +1,30 @@ +//! Integration test that runs the library test suite with the sketchlib backend. +//! +//! When you run `cargo test -p query_engine_rust` (without --features sketchlib-tests), +//! the lib tests run with the legacy backend. This test spawns a second run with the +//! sketchlib backend so both modes are exercised in one `cargo test` invocation. +//! +//! This test is only compiled when sketchlib-tests is NOT enabled, to avoid recursion. + +#[cfg(not(feature = "sketchlib-tests"))] +#[test] +fn test_sketchlib_backend() { + use std::process::Command; + + let status = Command::new(env!("CARGO")) + .args([ + "test", + "-p", + "query_engine_rust", + "--lib", + "--features", + "sketchlib-tests", + ]) + .status() + .expect("failed to spawn cargo test"); + + assert!( + status.success(), + "sketchlib backend tests failed (run `cargo test -p query_engine_rust --lib --features sketchlib-tests` for details)" + ); +}