From 9f7c5c9468d46c9967d3da2cdf4a03baa929af46 Mon Sep 17 00:00:00 2001 From: zz_y Date: Fri, 6 Mar 2026 11:21:14 -0600 Subject: [PATCH] Revert sketchlib-rust path dependency workarounds now that repo is public - Add sketchlib-rust git dependency to asap-query-engine/Cargo.toml - Remove workspace exclude for sketchlib-rust from root Cargo.toml - Remove sketchlib-rust/ from .gitignore - Remove sketchlib-rust rsync entry from asap-tools/components.conf - Uncomment series.rs and query.rs in promsketch_store - Remove stale commented-out private-repo CI steps from rust.yml - Remove stale commented-out secret-based build from Dockerfile - Regenerate Cargo.lock Closes #154 Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/rust.yml | 16 - .gitignore | 3 - Cargo.lock | 302 +++++++---- Cargo.toml | 1 - asap-query-engine/Cargo.toml | 1 + asap-query-engine/Dockerfile | 12 - .../src/stores/promsketch_store/query.rs | 512 +++++++++--------- .../src/stores/promsketch_store/series.rs | 360 ++++++------ asap-tools/components.conf | 1 - 9 files changed, 631 insertions(+), 577 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index bc2155d..15cd661 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -28,12 +28,6 @@ jobs: steps: - uses: actions/checkout@v4 - # - name: Configure git for private dependencies - # run: git config --global url."https://x-access-token:${{ secrets.PRIVATE_REPO_TOKEN }}@github.com/".insteadOf "https://github.com/" - - # - name: Clone sketchlib-rust - # run: git clone https://github.com/ProjectASAP/sketchlib-rust.git - - name: Install Rust uses: dtolnay/rust-toolchain@stable with: @@ -81,12 +75,6 @@ jobs: steps: - uses: actions/checkout@v4 - # - name: Configure git for private dependencies - # run: git config --global url."https://x-access-token:${{ secrets.PRIVATE_REPO_TOKEN }}@github.com/".insteadOf "https://github.com/" - - # - name: Clone sketchlib-rust - # run: git clone https://github.com/ProjectASAP/sketchlib-rust.git - - name: Install Rust uses: dtolnay/rust-toolchain@stable with: @@ -145,7 +133,3 @@ jobs: - name: Build Docker image run: docker build -f asap-query-engine/Dockerfile -t sketchdb-queryengine-rust:latest . - # run: | - # echo "${{ secrets.PRIVATE_REPO_TOKEN }}" > /tmp/git_token - # docker build --secret id=git_token,src=/tmp/git_token -f QueryEngineRust/Dockerfile -t sketchdb-queryengine-rust:latest . - # rm -f /tmp/git_token diff --git a/.gitignore b/.gitignore index fa17294..ffbe6e0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,6 @@ target/ experiment_outputs/ -# Private repo, vendored locally until open sourced (see GitHub issue) -sketchlib-rust/ - # Runtime and generated files metadata/ preprocessed_configs/ diff --git a/Cargo.lock b/Cargo.lock index ed0c3c7..bf5c3d5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1038,7 +1038,7 @@ dependencies = [ "parquet", "paste", "pin-project-lite", - "rand", + "rand 0.8.5", "sqlparser 0.51.0", "tempfile", "tokio", @@ -1115,7 +1115,7 @@ dependencies = [ "log", "object_store", "parking_lot", - "rand", + "rand 0.8.5", "tempfile", "url", ] @@ -1176,7 +1176,7 @@ dependencies = [ "itertools 0.13.0", "log", "md-5", - "rand", + "rand 0.8.5", "regex", "sha2", "unicode-segmentation", @@ -1215,7 +1215,7 @@ dependencies = [ "datafusion-common", "datafusion-expr-common", "datafusion-physical-expr-common", - "rand", + "rand 0.8.5", ] [[package]] @@ -1238,7 +1238,7 @@ dependencies = [ "itertools 0.13.0", "log", "paste", - "rand", + "rand 0.8.5", ] [[package]] @@ -1325,7 +1325,7 @@ dependencies = [ "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", - "rand", + "rand 0.8.5", ] [[package]] @@ -1375,7 +1375,7 @@ dependencies = [ "once_cell", "parking_lot", "pin-project-lite", - "rand", + "rand 0.8.5", "tokio", ] @@ -1476,6 +1476,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "errno" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +dependencies = [ + "errno-dragonfly", + "libc", + "winapi", +] + [[package]] name = "errno" version = "0.3.14" @@ -1486,6 +1497,16 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "fallible-iterator" version = "0.3.0" @@ -1715,19 +1736,19 @@ checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", - "r-efi", + "r-efi 5.3.0", "wasip2", ] [[package]] name = "getrandom" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" dependencies = [ "cfg-if", "libc", - "r-efi", + "r-efi 6.0.0", "wasip2", "wasip3", ] @@ -2158,9 +2179,9 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "ipnet" -version = "2.11.0" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "is_terminal_polyfill" @@ -2194,9 +2215,9 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jiff" -version = "0.2.21" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3e3d65f018c6ae946ab16e80944b97096ed73c35b221d1c478a6c81d8f57940" +checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -2209,9 +2230,9 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.21" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a17c2b211d863c7fde02cbea8a3c1a439b98e109286554f2860bdded7ff83818" +checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" dependencies = [ "proc-macro2", "quote", @@ -2220,9 +2241,9 @@ dependencies = [ [[package]] name = "jiff-tzdb" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2" +checksum = "c900ef84826f1338a557697dc8fc601df9ca9af4ac137c7fb61d4c6f2dfd3076" [[package]] name = "jiff-tzdb-platform" @@ -2328,6 +2349,16 @@ version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" +[[package]] +name = "libloading" +version = "0.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "351a32417a12d5f7e82c368a66781e307834dae04c6ce0cd4456d52989229883" +dependencies = [ + "cfg-if", + "winapi", +] + [[package]] name = "libm" version = "0.2.16" @@ -2336,12 +2367,13 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libredox" -version = "0.1.12" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" +checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" dependencies = [ "bitflags 2.11.0", "libc", + "plain", "redox_syscall 0.7.3", ] @@ -2864,6 +2896,21 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pcap" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99e935fc73d54a89fff576526c2ccd42bbf8247aae05b358693475b14fd4ff79" +dependencies = [ + "bitflags 1.3.2", + "errno 0.2.8", + "libc", + "libloading", + "pkg-config", + "regex", + "windows-sys 0.36.1", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -2916,6 +2963,12 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + [[package]] name = "portable-atomic" version = "1.13.1" @@ -2967,9 +3020,9 @@ dependencies = [ [[package]] name = "proc-macro-crate" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" dependencies = [ "toml_edit", ] @@ -3123,6 +3176,7 @@ dependencies = [ "serde_yaml", "sketch-core", "sketch_db_common", + "sketchlib-rust", "snap", "sql_utilities", "sqlparser 0.59.0", @@ -3141,9 +3195,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.44" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -3154,6 +3208,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "rand" version = "0.8.5" @@ -3161,8 +3221,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", ] [[package]] @@ -3172,7 +3242,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", ] [[package]] @@ -3184,6 +3264,15 @@ dependencies = [ "getrandom 0.2.17", ] +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + [[package]] name = "rdkafka" version = "0.34.0" @@ -3376,7 +3465,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ "bitflags 2.11.0", - "errno", + "errno 0.3.14", "libc", "linux-raw-sys", "windows-sys 0.61.2", @@ -3478,6 +3567,15 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde-big-array" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11fc7cc2c76d73e0f27ee52abbd64eec84d46f370c88371120433196934e4b7f" +dependencies = [ + "serde", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -3579,7 +3677,7 @@ version = "1.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" dependencies = [ - "errno", + "errno 0.3.14", "libc", ] @@ -3617,6 +3715,22 @@ dependencies = [ "serde_yaml", ] +[[package]] +name = "sketchlib-rust" +version = "0.1.0" +source = "git+https://github.com/ProjectASAP/sketchlib-rust#348db8415f97246c42de68b407b47fa038cf8b1f" +dependencies = [ + "ahash", + "clap 4.5.60", + "pcap", + "rand 0.9.2", + "rmp-serde", + "serde", + "serde-big-array", + "smallvec", + "twox-hash 2.1.2", +] + [[package]] name = "slab" version = "0.4.12" @@ -3668,12 +3782,12 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -3891,7 +4005,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" dependencies = [ "fastrand", - "getrandom 0.4.1", + "getrandom 0.4.2", "once_cell", "rustix", "windows-sys 0.61.2", @@ -4035,9 +4149,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.49.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" dependencies = [ "bytes", "libc", @@ -4045,16 +4159,16 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.2", + "socket2 0.6.3", "tokio-macros", "windows-sys 0.61.2", ] [[package]] name = "tokio-macros" -version = "2.6.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" dependencies = [ "proc-macro2", "quote", @@ -4108,18 +4222,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.7.5+spec-1.1.0" +version = "1.0.0+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" +checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e" dependencies = [ "serde_core", ] [[package]] name = "toml_edit" -version = "0.23.10+spec-1.0.0" +version = "0.25.4+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" +checksum = "7193cbd0ce53dc966037f54351dbbcf0d5a642c7f0038c382ef9e677ce8c13f2" dependencies = [ "indexmap", "toml_datetime", @@ -4259,6 +4373,9 @@ name = "twox-hash" version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" +dependencies = [ + "rand 0.9.2", +] [[package]] name = "typenum" @@ -4334,11 +4451,11 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.21.0" +version = "1.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" +checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" dependencies = [ - "getrandom 0.4.1", + "getrandom 0.4.2", "js-sys", "wasm-bindgen", ] @@ -4624,6 +4741,19 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +dependencies = [ + "windows_aarch64_msvc 0.36.1", + "windows_i686_gnu 0.36.1", + "windows_i686_msvc 0.36.1", + "windows_x86_64_gnu 0.36.1", + "windows_x86_64_msvc 0.36.1", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -4651,15 +4781,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows-sys" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" -dependencies = [ - "windows-targets 0.53.5", -] - [[package]] name = "windows-sys" version = "0.61.2" @@ -4693,30 +4814,13 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm 0.52.6", + "windows_i686_gnullvm", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] -[[package]] -name = "windows-targets" -version = "0.53.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" -dependencies = [ - "windows-link", - "windows_aarch64_gnullvm 0.53.1", - "windows_aarch64_msvc 0.53.1", - "windows_i686_gnu 0.53.1", - "windows_i686_gnullvm 0.53.1", - "windows_i686_msvc 0.53.1", - "windows_x86_64_gnu 0.53.1", - "windows_x86_64_gnullvm 0.53.1", - "windows_x86_64_msvc 0.53.1", -] - [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -4730,10 +4834,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] -name = "windows_aarch64_gnullvm" -version = "0.53.1" +name = "windows_aarch64_msvc" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" +checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" [[package]] name = "windows_aarch64_msvc" @@ -4748,10 +4852,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] -name = "windows_aarch64_msvc" -version = "0.53.1" +name = "windows_i686_gnu" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" +checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" [[package]] name = "windows_i686_gnu" @@ -4765,12 +4869,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" -[[package]] -name = "windows_i686_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" - [[package]] name = "windows_i686_gnullvm" version = "0.52.6" @@ -4778,10 +4876,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] -name = "windows_i686_gnullvm" -version = "0.53.1" +name = "windows_i686_msvc" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" +checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" [[package]] name = "windows_i686_msvc" @@ -4796,10 +4894,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] -name = "windows_i686_msvc" -version = "0.53.1" +name = "windows_x86_64_gnu" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" +checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" [[package]] name = "windows_x86_64_gnu" @@ -4813,12 +4911,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" -[[package]] -name = "windows_x86_64_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" - [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -4832,10 +4924,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] -name = "windows_x86_64_gnullvm" -version = "0.53.1" +name = "windows_x86_64_msvc" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" +checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" [[package]] name = "windows_x86_64_msvc" @@ -4849,17 +4941,11 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "windows_x86_64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" - [[package]] name = "winnow" -version = "0.7.14" +version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" dependencies = [ "memchr", ] diff --git a/Cargo.toml b/Cargo.toml index be74035..0ae123b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,4 +1,3 @@ [workspace] members = ["asap-common/sketch-core", "asap-query-engine"] -exclude = ["sketchlib-rust"] resolver = "2" diff --git a/asap-query-engine/Cargo.toml b/asap-query-engine/Cargo.toml index 73780ac..9e1ddd6 100644 --- a/asap-query-engine/Cargo.toml +++ b/asap-query-engine/Cargo.toml @@ -35,6 +35,7 @@ promql-parser = "0.5.0" reqwest = { version = "0.11", features = ["json"] } xxhash-rust = { version = "0.8", features = ["xxh32", "xxh64"] } dsrs = { git = "https://github.com/ProjectASAP/datasketches-rs" } +sketchlib-rust = { git = "https://github.com/ProjectASAP/sketchlib-rust" } base64 = "0.21" hex = "0.4" sqlparser = "0.59.0" diff --git a/asap-query-engine/Dockerfile b/asap-query-engine/Dockerfile index 14f8bdf..190d9c0 100644 --- a/asap-query-engine/Dockerfile +++ b/asap-query-engine/Dockerfile @@ -17,31 +17,19 @@ COPY asap-common/sketch-core ./asap-common/sketch-core COPY Cargo.toml ./ COPY Cargo.lock ./ COPY asap-query-engine/Cargo.toml ./asap-query-engine/ -# COPY asap-query-engine/.cargo ./asap-query-engine/.cargo # Create a dummy main.rs to build dependencies RUN mkdir -p asap-query-engine/src && echo "fn main() {}" > asap-query-engine/src/main.rs # Build dependencies (this layer will be cached) -# Uses BuildKit secret mount to pass git credentials without baking into a layer WORKDIR /code/asap-query-engine RUN cargo build --release && rm -rf src/ -# RUN --mount=type=secret,id=git_token \ -# if [ -f /run/secrets/git_token ]; then \ -# git config --global url."https://x-access-token:$(cat /run/secrets/git_token)@github.com/".insteadOf "https://github.com/"; \ -# fi && \ -# cargo build --release && rm -rf src/ # Copy source code COPY asap-query-engine/src ./src # Build the actual application RUN touch src/main.rs && cargo build --release -# RUN --mount=type=secret,id=git_token \ -# if [ -f /run/secrets/git_token ]; then \ -# git config --global url."https://x-access-token:$(cat /run/secrets/git_token)@github.com/".insteadOf "https://github.com/"; \ -# fi && \ -# touch src/main.rs && cargo build --release # Runtime stage with Ubuntu 24.04 (has newer glibc/libstdc++) FROM ubuntu:24.04 diff --git a/asap-query-engine/src/stores/promsketch_store/query.rs b/asap-query-engine/src/stores/promsketch_store/query.rs index e8316a2..2fdb653 100644 --- a/asap-query-engine/src/stores/promsketch_store/query.rs +++ b/asap-query-engine/src/stores/promsketch_store/query.rs @@ -1,286 +1,286 @@ -// use sketchlib_rust::{EHSketchList, SketchInput, UniformSampling}; +use sketchlib_rust::{EHSketchList, SketchInput, UniformSampling}; -// use super::series::PromSketchMemSeries; +use super::series::PromSketchMemSeries; -// /// Evaluate a PromQL aggregation function over sketches for a given time range. -// /// -// /// # Arguments -// /// * `func_name` - PromQL function name (e.g. "quantile_over_time") -// /// * `series` - The PromSketchMemSeries containing sketch instances -// /// * `args` - Extra argument (e.g. quantile phi value) -// /// * `mint` - Start of query time range (milliseconds) -// /// * `maxt` - End of query time range (milliseconds) -// pub fn eval_function( -// func_name: &str, -// series: &PromSketchMemSeries, -// args: f64, -// mint: u64, -// maxt: u64, -// ) -> Result> { -// match func_name { -// "entropy_over_time" => eval_univmon(series, "entropy", mint, maxt), -// "distinct_over_time" => eval_univmon(series, "cardinality", mint, maxt), -// "l1_over_time" => eval_univmon(series, "l1", mint, maxt), -// "l2_over_time" => eval_univmon(series, "l2", mint, maxt), -// "quantile_over_time" => eval_kll_quantile(series, args, mint, maxt), -// "min_over_time" => eval_kll_quantile(series, 0.0, mint, maxt), -// "max_over_time" => eval_kll_quantile(series, 1.0, mint, maxt), -// "avg_over_time" => eval_sampling_stat(series, "avg", mint, maxt), -// "count_over_time" => eval_sampling_stat(series, "count", mint, maxt), -// "sum_over_time" => eval_sampling_stat(series, "sum", mint, maxt), -// "sum2_over_time" => eval_sampling_stat(series, "sum2", mint, maxt), -// "stddev_over_time" => eval_sampling_stat(series, "stddev", mint, maxt), -// "stdvar_over_time" => eval_sampling_stat(series, "stdvar", mint, maxt), -// _ => Err(format!("unsupported function: {}", func_name).into()), -// } -// } +/// Evaluate a PromQL aggregation function over sketches for a given time range. +/// +/// # Arguments +/// * `func_name` - PromQL function name (e.g. "quantile_over_time") +/// * `series` - The PromSketchMemSeries containing sketch instances +/// * `args` - Extra argument (e.g. quantile phi value) +/// * `mint` - Start of query time range (milliseconds) +/// * `maxt` - End of query time range (milliseconds) +pub fn eval_function( + func_name: &str, + series: &PromSketchMemSeries, + args: f64, + mint: u64, + maxt: u64, +) -> Result> { + match func_name { + "entropy_over_time" => eval_univmon(series, "entropy", mint, maxt), + "distinct_over_time" => eval_univmon(series, "cardinality", mint, maxt), + "l1_over_time" => eval_univmon(series, "l1", mint, maxt), + "l2_over_time" => eval_univmon(series, "l2", mint, maxt), + "quantile_over_time" => eval_kll_quantile(series, args, mint, maxt), + "min_over_time" => eval_kll_quantile(series, 0.0, mint, maxt), + "max_over_time" => eval_kll_quantile(series, 1.0, mint, maxt), + "avg_over_time" => eval_sampling_stat(series, "avg", mint, maxt), + "count_over_time" => eval_sampling_stat(series, "count", mint, maxt), + "sum_over_time" => eval_sampling_stat(series, "sum", mint, maxt), + "sum2_over_time" => eval_sampling_stat(series, "sum2", mint, maxt), + "stddev_over_time" => eval_sampling_stat(series, "stddev", mint, maxt), + "stdvar_over_time" => eval_sampling_stat(series, "stdvar", mint, maxt), + _ => Err(format!("unsupported function: {}", func_name).into()), + } +} -// /// Evaluate UnivMon-based functions (entropy, cardinality, L1, L2) -// /// using the optimized EHUnivOptimized backend. -// fn eval_univmon( -// series: &PromSketchMemSeries, -// stat: &str, -// mint: u64, -// maxt: u64, -// ) -> Result> { -// let eh = series -// .sketch_instances -// .eh_univ -// .as_ref() -// .ok_or("eh_univ not initialized")?; +/// Evaluate UnivMon-based functions (entropy, cardinality, L1, L2) +/// using the optimized EHUnivOptimized backend. +fn eval_univmon( + series: &PromSketchMemSeries, + stat: &str, + mint: u64, + maxt: u64, +) -> Result> { + let eh = series + .sketch_instances + .eh_univ + .as_ref() + .ok_or("eh_univ not initialized")?; -// let result = eh -// .query_interval(mint, maxt) -// .ok_or("no buckets cover the requested time range for UnivMon")?; + let result = eh + .query_interval(mint, maxt) + .ok_or("no buckets cover the requested time range for UnivMon")?; -// match stat { -// "entropy" => Ok(result.calc_entropy()), -// "cardinality" => Ok(result.calc_card()), -// "l1" => Ok(result.calc_l1()), -// "l2" => Ok(result.calc_l2()), -// _ => Err(format!("unknown univmon stat: {}", stat).into()), -// } -// } + match stat { + "entropy" => Ok(result.calc_entropy()), + "cardinality" => Ok(result.calc_card()), + "l1" => Ok(result.calc_l1()), + "l2" => Ok(result.calc_l2()), + _ => Err(format!("unknown univmon stat: {}", stat).into()), + } +} -// /// Evaluate KLL-based functions (quantile, min, max). -// fn eval_kll_quantile( -// series: &PromSketchMemSeries, -// phi: f64, -// mint: u64, -// maxt: u64, -// ) -> Result> { -// let eh = series -// .sketch_instances -// .eh_kll -// .as_ref() -// .ok_or("eh_kll not initialized")?; +/// Evaluate KLL-based functions (quantile, min, max). +fn eval_kll_quantile( + series: &PromSketchMemSeries, + phi: f64, + mint: u64, + maxt: u64, +) -> Result> { + let eh = series + .sketch_instances + .eh_kll + .as_ref() + .ok_or("eh_kll not initialized")?; -// let merged = eh -// .query_interval_merge(mint, maxt) -// .ok_or("no volumes cover the requested time range for KLL")?; + let merged = eh + .query_interval_merge(mint, maxt) + .ok_or("no volumes cover the requested time range for KLL")?; -// merged -// .query(&SketchInput::F64(phi)) -// .map_err(|e| -> Box { e.into() }) -// } + merged + .query(&SketchInput::F64(phi)) + .map_err(|e| -> Box { e.into() }) +} -// /// Evaluate sampling-based functions (avg, count, sum, sum2, stddev, stdvar). -// /// -// /// Since sketchlib-rust's `UniformSampling` exposes `samples()` and `total_seen()` -// /// but not dedicated query methods like the Go version, we compute statistics -// /// from the raw merged samples. -// fn eval_sampling_stat( -// series: &PromSketchMemSeries, -// stat: &str, -// mint: u64, -// maxt: u64, -// ) -> Result> { -// let eh = series -// .sketch_instances -// .eh_sampling -// .as_ref() -// .ok_or("eh_sampling not initialized")?; +/// Evaluate sampling-based functions (avg, count, sum, sum2, stddev, stdvar). +/// +/// Since sketchlib-rust's `UniformSampling` exposes `samples()` and `total_seen()` +/// but not dedicated query methods like the Go version, we compute statistics +/// from the raw merged samples. +fn eval_sampling_stat( + series: &PromSketchMemSeries, + stat: &str, + mint: u64, + maxt: u64, +) -> Result> { + let eh = series + .sketch_instances + .eh_sampling + .as_ref() + .ok_or("eh_sampling not initialized")?; -// let merged = eh -// .query_interval_merge(mint, maxt) -// .ok_or("no volumes cover the requested time range for sampling")?; + let merged = eh + .query_interval_merge(mint, maxt) + .ok_or("no volumes cover the requested time range for sampling")?; -// let sampler = match &merged { -// EHSketchList::UNIFORM(us) => us, -// _ => return Err("merged EHSketchList is not UniformSampling".into()), -// }; + let sampler = match &merged { + EHSketchList::UNIFORM(us) => us, + _ => return Err("merged EHSketchList is not UniformSampling".into()), + }; -// compute_sampling_stat(sampler, stat) -// } + compute_sampling_stat(sampler, stat) +} -// /// Compute a statistic from a merged UniformSampling instance. -// fn compute_sampling_stat( -// sampler: &UniformSampling, -// stat: &str, -// ) -> Result> { -// let samples = sampler.samples(); -// if samples.is_empty() { -// return Err("no samples available".into()); -// } +/// Compute a statistic from a merged UniformSampling instance. +fn compute_sampling_stat( + sampler: &UniformSampling, + stat: &str, +) -> Result> { + let samples = sampler.samples(); + if samples.is_empty() { + return Err("no samples available".into()); + } -// let n = samples.len() as f64; -// let total_seen = sampler.total_seen() as f64; + let n = samples.len() as f64; + let total_seen = sampler.total_seen() as f64; -// match stat { -// "count" => { -// // Estimate total count from sample rate -// Ok(total_seen) -// } -// "sum" => { -// let sample_sum: f64 = samples.iter().sum(); -// // Scale up by (total_seen / n_samples) to estimate population sum -// Ok(sample_sum * (total_seen / n)) -// } -// "sum2" => { -// let sample_sum2: f64 = samples.iter().map(|x| x * x).sum(); -// Ok(sample_sum2 * (total_seen / n)) -// } -// "avg" => { -// let sample_sum: f64 = samples.iter().sum(); -// Ok(sample_sum / n) -// } -// "stddev" => { -// let mean = samples.iter().sum::() / n; -// let variance = samples.iter().map(|x| (x - mean).powi(2)).sum::() / n; -// Ok(variance.sqrt()) -// } -// "stdvar" => { -// let mean = samples.iter().sum::() / n; -// let variance = samples.iter().map(|x| (x - mean).powi(2)).sum::() / n; -// Ok(variance) -// } -// _ => Err(format!("unknown sampling stat: {}", stat).into()), -// } -// } + match stat { + "count" => { + // Estimate total count from sample rate + Ok(total_seen) + } + "sum" => { + let sample_sum: f64 = samples.iter().sum(); + // Scale up by (total_seen / n_samples) to estimate population sum + Ok(sample_sum * (total_seen / n)) + } + "sum2" => { + let sample_sum2: f64 = samples.iter().map(|x| x * x).sum(); + Ok(sample_sum2 * (total_seen / n)) + } + "avg" => { + let sample_sum: f64 = samples.iter().sum(); + Ok(sample_sum / n) + } + "stddev" => { + let mean = samples.iter().sum::() / n; + let variance = samples.iter().map(|x| (x - mean).powi(2)).sum::() / n; + Ok(variance.sqrt()) + } + "stdvar" => { + let mean = samples.iter().sum::() / n; + let variance = samples.iter().map(|x| (x - mean).powi(2)).sum::() / n; + Ok(variance) + } + _ => Err(format!("unknown sampling stat: {}", stat).into()), + } +} -// #[cfg(test)] -// mod tests { -// use super::*; -// use crate::stores::promsketch_store::config::PromSketchConfig; -// use crate::stores::promsketch_store::series::PromSketchMemSeries; -// use crate::stores::promsketch_store::PromSketchType; +#[cfg(test)] +mod tests { + use super::*; + use crate::stores::promsketch_store::config::PromSketchConfig; + use crate::stores::promsketch_store::series::PromSketchMemSeries; + use crate::stores::promsketch_store::PromSketchType; -// fn create_test_series_with_kll_data() -> PromSketchMemSeries { -// let config = PromSketchConfig::default(); -// let mut series = PromSketchMemSeries::new("test".to_string()); -// series -// .sketch_instances -// .ensure_initialized(PromSketchType::EHKLL, &config); + fn create_test_series_with_kll_data() -> PromSketchMemSeries { + let config = PromSketchConfig::default(); + let mut series = PromSketchMemSeries::new("test".to_string()); + series + .sketch_instances + .ensure_initialized(PromSketchType::EHKLL, &config); -// // Insert values 1..=100 at successive timestamps -// for i in 1..=100u64 { -// let input = SketchInput::F64(i as f64); -// if let Some(ref mut eh) = series.sketch_instances.eh_kll { -// eh.update(i, &input); -// } -// } -// series -// } + // Insert values 1..=100 at successive timestamps + for i in 1..=100u64 { + let input = SketchInput::F64(i as f64); + if let Some(ref mut eh) = series.sketch_instances.eh_kll { + eh.update(i, &input); + } + } + series + } -// fn create_test_series_with_sampling_data() -> PromSketchMemSeries { -// let config = PromSketchConfig::default(); -// let mut series = PromSketchMemSeries::new("test".to_string()); -// series -// .sketch_instances -// .ensure_initialized(PromSketchType::USampling, &config); + fn create_test_series_with_sampling_data() -> PromSketchMemSeries { + let config = PromSketchConfig::default(); + let mut series = PromSketchMemSeries::new("test".to_string()); + series + .sketch_instances + .ensure_initialized(PromSketchType::USampling, &config); -// for i in 1..=1000u64 { -// let input = SketchInput::F64(i as f64); -// if let Some(ref mut eh) = series.sketch_instances.eh_sampling { -// eh.update(i, &input); -// } -// } -// series -// } + for i in 1..=1000u64 { + let input = SketchInput::F64(i as f64); + if let Some(ref mut eh) = series.sketch_instances.eh_sampling { + eh.update(i, &input); + } + } + series + } -// fn create_test_series_with_univmon_data() -> PromSketchMemSeries { -// let config = PromSketchConfig::default(); -// let mut series = PromSketchMemSeries::new("test".to_string()); -// series -// .sketch_instances -// .ensure_initialized(PromSketchType::EHUniv, &config); + fn create_test_series_with_univmon_data() -> PromSketchMemSeries { + let config = PromSketchConfig::default(); + let mut series = PromSketchMemSeries::new("test".to_string()); + series + .sketch_instances + .ensure_initialized(PromSketchType::EHUniv, &config); -// for i in 1..=100u64 { -// let input = SketchInput::F64(i as f64); -// if let Some(ref mut eh) = series.sketch_instances.eh_univ { -// eh.update(i, &input, 1); -// } -// } -// series -// } + for i in 1..=100u64 { + let input = SketchInput::F64(i as f64); + if let Some(ref mut eh) = series.sketch_instances.eh_univ { + eh.update(i, &input, 1); + } + } + series + } -// #[test] -// fn test_eval_kll_quantile() { -// let series = create_test_series_with_kll_data(); -// let result = eval_function("quantile_over_time", &series, 0.5, 1, 100); -// assert!(result.is_ok()); -// let val = result.unwrap(); -// // Median of 1..100 should be around 50 -// assert!(val > 30.0 && val < 70.0, "median was {}", val); -// } + #[test] + fn test_eval_kll_quantile() { + let series = create_test_series_with_kll_data(); + let result = eval_function("quantile_over_time", &series, 0.5, 1, 100); + assert!(result.is_ok()); + let val = result.unwrap(); + // Median of 1..100 should be around 50 + assert!(val > 30.0 && val < 70.0, "median was {}", val); + } -// #[test] -// fn test_eval_min_max() { -// let series = create_test_series_with_kll_data(); + #[test] + fn test_eval_min_max() { + let series = create_test_series_with_kll_data(); -// let min_result = eval_function("min_over_time", &series, 0.0, 1, 100); -// assert!(min_result.is_ok()); -// let min_val = min_result.unwrap(); -// assert!(min_val <= 5.0, "min was {}", min_val); + let min_result = eval_function("min_over_time", &series, 0.0, 1, 100); + assert!(min_result.is_ok()); + let min_val = min_result.unwrap(); + assert!(min_val <= 5.0, "min was {}", min_val); -// let max_result = eval_function("max_over_time", &series, 0.0, 1, 100); -// assert!(max_result.is_ok()); -// let max_val = max_result.unwrap(); -// assert!(max_val >= 95.0, "max was {}", max_val); -// } + let max_result = eval_function("max_over_time", &series, 0.0, 1, 100); + assert!(max_result.is_ok()); + let max_val = max_result.unwrap(); + assert!(max_val >= 95.0, "max was {}", max_val); + } -// #[test] -// fn test_eval_sampling_avg() { -// let series = create_test_series_with_sampling_data(); -// let result = eval_function("avg_over_time", &series, 0.0, 1, 1000); -// assert!(result.is_ok()); -// let val = result.unwrap(); -// // avg of 1..1000 should be around 500.5 -// assert!( -// val > 300.0 && val < 700.0, -// "avg was {} (expected ~500.5)", -// val -// ); -// } + #[test] + fn test_eval_sampling_avg() { + let series = create_test_series_with_sampling_data(); + let result = eval_function("avg_over_time", &series, 0.0, 1, 1000); + assert!(result.is_ok()); + let val = result.unwrap(); + // avg of 1..1000 should be around 500.5 + assert!( + val > 300.0 && val < 700.0, + "avg was {} (expected ~500.5)", + val + ); + } -// #[test] -// fn test_eval_sampling_count() { -// let series = create_test_series_with_sampling_data(); -// let result = eval_function("count_over_time", &series, 0.0, 1, 1000); -// assert!(result.is_ok()); -// let val = result.unwrap(); -// // total_seen should be 1000 -// assert!( -// val > 500.0 && val <= 1000.0, -// "count was {} (expected ~1000)", -// val -// ); -// } + #[test] + fn test_eval_sampling_count() { + let series = create_test_series_with_sampling_data(); + let result = eval_function("count_over_time", &series, 0.0, 1, 1000); + assert!(result.is_ok()); + let val = result.unwrap(); + // total_seen should be 1000 + assert!( + val > 500.0 && val <= 1000.0, + "count was {} (expected ~1000)", + val + ); + } -// #[test] -// fn test_eval_univmon_entropy() { -// let series = create_test_series_with_univmon_data(); -// let result = eval_function("entropy_over_time", &series, 0.0, 1, 100); -// assert!(result.is_ok()); -// // UnivMon entropy with small data can be 0; verify query dispatches correctly -// let val = result.unwrap(); -// assert!(val >= 0.0, "entropy was {}", val); -// } + #[test] + fn test_eval_univmon_entropy() { + let series = create_test_series_with_univmon_data(); + let result = eval_function("entropy_over_time", &series, 0.0, 1, 100); + assert!(result.is_ok()); + // UnivMon entropy with small data can be 0; verify query dispatches correctly + let val = result.unwrap(); + assert!(val >= 0.0, "entropy was {}", val); + } -// #[test] -// fn test_unsupported_function() { -// let series = PromSketchMemSeries::new("test".to_string()); -// let result = eval_function("nonexistent_func", &series, 0.0, 1, 100); -// assert!(result.is_err()); -// } -// } + #[test] + fn test_unsupported_function() { + let series = PromSketchMemSeries::new("test".to_string()); + let result = eval_function("nonexistent_func", &series, 0.0, 1, 100); + assert!(result.is_err()); + } +} diff --git a/asap-query-engine/src/stores/promsketch_store/series.rs b/asap-query-engine/src/stores/promsketch_store/series.rs index 1a2514f..c0871f2 100644 --- a/asap-query-engine/src/stores/promsketch_store/series.rs +++ b/asap-query-engine/src/stores/promsketch_store/series.rs @@ -1,180 +1,180 @@ -// use sketchlib_rust::{ -// EHSketchList, EHUnivOptimized, ExponentialHistogram, SketchInput, UniformSampling, KLL, -// }; - -// use super::config::PromSketchConfig; -// use super::PromSketchType; - -// /// Per-series sketch instances. Each field wraps a different EHSketchList type -// /// inside an ExponentialHistogram for time-windowed merging. -// pub struct PromSketchInstances { -// /// Optimized hybrid EH for UnivMon — entropy, cardinality, L1, L2, distinct. -// pub eh_univ: Option, -// /// EH wrapping KLL — for quantile, min, max. -// pub eh_kll: Option, -// /// EH wrapping UniformSampling — for avg, count, sum, stddev, stdvar. -// pub eh_sampling: Option, -// } - -// impl Default for PromSketchInstances { -// fn default() -> Self { -// Self::new() -// } -// } - -// impl PromSketchInstances { -// pub fn new() -> Self { -// Self { -// eh_univ: None, -// eh_kll: None, -// eh_sampling: None, -// } -// } - -// /// Lazily initialize the sketch for the given type if not already present. -// pub fn ensure_initialized(&mut self, stype: PromSketchType, config: &PromSketchConfig) { -// match stype { -// PromSketchType::EHUniv => { -// if self.eh_univ.is_none() { -// self.eh_univ = Some(EHUnivOptimized::with_defaults( -// config.eh_univ.k, -// config.eh_univ.time_window, -// )); -// } -// } -// PromSketchType::EHKLL => { -// if self.eh_kll.is_none() { -// let chapter = EHSketchList::KLL(KLL::init_kll(config.eh_kll.kll_k)); -// self.eh_kll = Some(ExponentialHistogram::new( -// config.eh_kll.k, -// config.eh_kll.time_window, -// chapter, -// )); -// } -// } -// PromSketchType::USampling => { -// if self.eh_sampling.is_none() { -// let chapter = -// EHSketchList::UNIFORM(UniformSampling::new(config.sampling.sample_rate)); -// self.eh_sampling = Some(ExponentialHistogram::new( -// config.eh_kll.k, -// config.sampling.time_window, -// chapter, -// )); -// } -// } -// } -// } - -// /// Insert a data point into all active sketches. -// pub fn insert(&mut self, time: u64, value: f64) { -// let input = SketchInput::F64(value); - -// if let Some(ref mut eh) = self.eh_univ { -// // EHUnivOptimized::update(time, key, frequency_count) -// eh.update(time, &input, 1); -// } -// if let Some(ref mut eh) = self.eh_kll { -// eh.update(time, &input); -// } -// if let Some(ref mut eh) = self.eh_sampling { -// eh.update(time, &input); -// } -// } - -// /// Check whether the sketch for the given type covers the time range. -// pub fn cover(&self, stype: PromSketchType, mint: u64, maxt: u64) -> bool { -// match stype { -// PromSketchType::EHUniv => self.eh_univ.as_ref().is_some_and(|eh| eh.cover(mint, maxt)), -// PromSketchType::EHKLL => self.eh_kll.as_ref().is_some_and(|eh| eh.cover(mint, maxt)), -// PromSketchType::USampling => self -// .eh_sampling -// .as_ref() -// .is_some_and(|eh| eh.cover(mint, maxt)), -// } -// } -// } - -// /// A single time series with its label string and associated sketch instances. -// pub struct PromSketchMemSeries { -// pub labels: String, -// pub sketch_instances: PromSketchInstances, -// /// Earliest timestamp seen for this series (-1 means uninitialized). -// pub oldest_timestamp: i64, -// } - -// impl PromSketchMemSeries { -// pub fn new(labels: String) -> Self { -// Self { -// labels, -// sketch_instances: PromSketchInstances::new(), -// oldest_timestamp: -1, -// } -// } - -// /// Insert a data point, updating oldest_timestamp tracking. -// pub fn insert(&mut self, time: u64, value: f64) { -// if self.oldest_timestamp == -1 { -// self.oldest_timestamp = time as i64; -// } -// self.sketch_instances.insert(time, value); -// } -// } - -// #[cfg(test)] -// mod tests { -// use super::*; - -// #[test] -// fn test_ensure_initialized_creates_correct_types() { -// let config = PromSketchConfig::default(); -// let mut instances = PromSketchInstances::new(); - -// assert!(instances.eh_univ.is_none()); -// assert!(instances.eh_kll.is_none()); -// assert!(instances.eh_sampling.is_none()); - -// instances.ensure_initialized(PromSketchType::EHUniv, &config); -// assert!(instances.eh_univ.is_some()); -// assert!(instances.eh_kll.is_none()); - -// instances.ensure_initialized(PromSketchType::EHKLL, &config); -// assert!(instances.eh_kll.is_some()); -// assert!(instances.eh_sampling.is_none()); - -// instances.ensure_initialized(PromSketchType::USampling, &config); -// assert!(instances.eh_sampling.is_some()); -// } - -// #[test] -// fn test_ensure_initialized_idempotent() { -// let config = PromSketchConfig::default(); -// let mut instances = PromSketchInstances::new(); - -// instances.ensure_initialized(PromSketchType::EHUniv, &config); -// let ptr1 = instances.eh_univ.as_ref().unwrap() as *const EHUnivOptimized; - -// // Calling again should not replace the instance. -// instances.ensure_initialized(PromSketchType::EHUniv, &config); -// let ptr2 = instances.eh_univ.as_ref().unwrap() as *const EHUnivOptimized; -// assert_eq!(ptr1, ptr2); -// } - -// #[test] -// fn test_mem_series_insert_updates_oldest() { -// let mut series = PromSketchMemSeries::new("test_metric".to_string()); -// assert_eq!(series.oldest_timestamp, -1); - -// let config = PromSketchConfig::default(); -// series -// .sketch_instances -// .ensure_initialized(PromSketchType::EHKLL, &config); - -// series.insert(100, 1.0); -// assert_eq!(series.oldest_timestamp, 100); - -// series.insert(50, 2.0); -// // oldest_timestamp should not change once set -// assert_eq!(series.oldest_timestamp, 100); -// } -// } +use sketchlib_rust::{ + EHSketchList, EHUnivOptimized, ExponentialHistogram, SketchInput, UniformSampling, KLL, +}; + +use super::config::PromSketchConfig; +use super::PromSketchType; + +/// Per-series sketch instances. Each field wraps a different EHSketchList type +/// inside an ExponentialHistogram for time-windowed merging. +pub struct PromSketchInstances { + /// Optimized hybrid EH for UnivMon — entropy, cardinality, L1, L2, distinct. + pub eh_univ: Option, + /// EH wrapping KLL — for quantile, min, max. + pub eh_kll: Option, + /// EH wrapping UniformSampling — for avg, count, sum, stddev, stdvar. + pub eh_sampling: Option, +} + +impl Default for PromSketchInstances { + fn default() -> Self { + Self::new() + } +} + +impl PromSketchInstances { + pub fn new() -> Self { + Self { + eh_univ: None, + eh_kll: None, + eh_sampling: None, + } + } + + /// Lazily initialize the sketch for the given type if not already present. + pub fn ensure_initialized(&mut self, stype: PromSketchType, config: &PromSketchConfig) { + match stype { + PromSketchType::EHUniv => { + if self.eh_univ.is_none() { + self.eh_univ = Some(EHUnivOptimized::with_defaults( + config.eh_univ.k, + config.eh_univ.time_window, + )); + } + } + PromSketchType::EHKLL => { + if self.eh_kll.is_none() { + let chapter = EHSketchList::KLL(KLL::init_kll(config.eh_kll.kll_k)); + self.eh_kll = Some(ExponentialHistogram::new( + config.eh_kll.k, + config.eh_kll.time_window, + chapter, + )); + } + } + PromSketchType::USampling => { + if self.eh_sampling.is_none() { + let chapter = + EHSketchList::UNIFORM(UniformSampling::new(config.sampling.sample_rate)); + self.eh_sampling = Some(ExponentialHistogram::new( + config.eh_kll.k, + config.sampling.time_window, + chapter, + )); + } + } + } + } + + /// Insert a data point into all active sketches. + pub fn insert(&mut self, time: u64, value: f64) { + let input = SketchInput::F64(value); + + if let Some(ref mut eh) = self.eh_univ { + // EHUnivOptimized::update(time, key, frequency_count) + eh.update(time, &input, 1); + } + if let Some(ref mut eh) = self.eh_kll { + eh.update(time, &input); + } + if let Some(ref mut eh) = self.eh_sampling { + eh.update(time, &input); + } + } + + /// Check whether the sketch for the given type covers the time range. + pub fn cover(&self, stype: PromSketchType, mint: u64, maxt: u64) -> bool { + match stype { + PromSketchType::EHUniv => self.eh_univ.as_ref().is_some_and(|eh| eh.cover(mint, maxt)), + PromSketchType::EHKLL => self.eh_kll.as_ref().is_some_and(|eh| eh.cover(mint, maxt)), + PromSketchType::USampling => self + .eh_sampling + .as_ref() + .is_some_and(|eh| eh.cover(mint, maxt)), + } + } +} + +/// A single time series with its label string and associated sketch instances. +pub struct PromSketchMemSeries { + pub labels: String, + pub sketch_instances: PromSketchInstances, + /// Earliest timestamp seen for this series (-1 means uninitialized). + pub oldest_timestamp: i64, +} + +impl PromSketchMemSeries { + pub fn new(labels: String) -> Self { + Self { + labels, + sketch_instances: PromSketchInstances::new(), + oldest_timestamp: -1, + } + } + + /// Insert a data point, updating oldest_timestamp tracking. + pub fn insert(&mut self, time: u64, value: f64) { + if self.oldest_timestamp == -1 { + self.oldest_timestamp = time as i64; + } + self.sketch_instances.insert(time, value); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ensure_initialized_creates_correct_types() { + let config = PromSketchConfig::default(); + let mut instances = PromSketchInstances::new(); + + assert!(instances.eh_univ.is_none()); + assert!(instances.eh_kll.is_none()); + assert!(instances.eh_sampling.is_none()); + + instances.ensure_initialized(PromSketchType::EHUniv, &config); + assert!(instances.eh_univ.is_some()); + assert!(instances.eh_kll.is_none()); + + instances.ensure_initialized(PromSketchType::EHKLL, &config); + assert!(instances.eh_kll.is_some()); + assert!(instances.eh_sampling.is_none()); + + instances.ensure_initialized(PromSketchType::USampling, &config); + assert!(instances.eh_sampling.is_some()); + } + + #[test] + fn test_ensure_initialized_idempotent() { + let config = PromSketchConfig::default(); + let mut instances = PromSketchInstances::new(); + + instances.ensure_initialized(PromSketchType::EHUniv, &config); + let ptr1 = instances.eh_univ.as_ref().unwrap() as *const EHUnivOptimized; + + // Calling again should not replace the instance. + instances.ensure_initialized(PromSketchType::EHUniv, &config); + let ptr2 = instances.eh_univ.as_ref().unwrap() as *const EHUnivOptimized; + assert_eq!(ptr1, ptr2); + } + + #[test] + fn test_mem_series_insert_updates_oldest() { + let mut series = PromSketchMemSeries::new("test_metric".to_string()); + assert_eq!(series.oldest_timestamp, -1); + + let config = PromSketchConfig::default(); + series + .sketch_instances + .ensure_initialized(PromSketchType::EHKLL, &config); + + series.insert(100, 1.0); + assert_eq!(series.oldest_timestamp, 100); + + series.insert(50, 2.0); + // oldest_timestamp should not change once set + assert_eq!(series.oldest_timestamp, 100); + } +} diff --git a/asap-tools/components.conf b/asap-tools/components.conf index cd242f4..abb3e04 100644 --- a/asap-tools/components.conf +++ b/asap-tools/components.conf @@ -6,7 +6,6 @@ asap-tools asap-common #FlinkSketch #QueryEngine -sketchlib-rust asap-query-engine asap-planner #prometheus-kafka-adapter