diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..397172d
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,11 @@
+[flake8]
+max-line-length = 88
+extend-ignore = E203, E501, W503
+exclude =
+    .git,
+    __pycache__,
+    .venv,
+    .eggs,
+    *.egg,
+    dist,
+    build
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
new file mode 100644
index 0000000..1191162
--- /dev/null
+++ b/.github/workflows/docker.yml
@@ -0,0 +1,57 @@
+name: Docker Builds
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - 'CommonDependencies/installation/**'
+      - 'Controller/**'
+      - 'PrometheusClient/**'
+      - '.github/workflows/docker.yml'
+  pull_request:
+    branches: [ main ]
+    paths:
+      - 'CommonDependencies/installation/**'
+      - 'Controller/**'
+      - 'PrometheusClient/**'
+      - '.github/workflows/docker.yml'
+  workflow_dispatch:
+
+jobs:
+  build-images:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build base image
+        working-directory: CommonDependencies/installation
+        run: |
+          docker build \
+            -t sketchdb-base:latest \
+            -f Dockerfile \
+            ..
+
+      - name: Test base image
+        run: |
+          docker run --rm sketchdb-base:latest python --version
+          docker run --rm sketchdb-base:latest pip list
+
+      - name: Build Controller Docker image
+        working-directory: Controller
+        run: |
+          docker build \
+            -t sketchdb-controller:latest \
+            -f Dockerfile \
+            .
+
+      - name: Build PrometheusClient Docker image
+        working-directory: PrometheusClient
+        run: |
+          docker build \
+            -t sketchdb-prometheus-client:latest \
+            -f Dockerfile \
+            .
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
new file mode 100644
index 0000000..fc0e377
--- /dev/null
+++ b/.github/workflows/python.yml
@@ -0,0 +1,248 @@
+name: Python Projects CI
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - 'ArroyoSketch/**'
+      - 'PrometheusClient/**'
+      - 'Controller/**'
+      - 'Utilities/**'
+      - 'PrometheusExporters/**'
+      - 'ExecutionUtilities/**'
+      - 'CommonDependencies/dependencies/py/**'
+      - '.github/workflows/python.yml'
+  pull_request:
+    branches: [ main ]
+    paths:
+      - 'ArroyoSketch/**'
+      - 'PrometheusClient/**'
+      - 'Controller/**'
+      - 'Utilities/**'
+      - 'PrometheusExporters/**'
+      - 'ExecutionUtilities/**'
+      - 'CommonDependencies/dependencies/py/**'
+      - '.github/workflows/python.yml'
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pull-requests: read
+
+jobs:
+  detect-changes:
+    runs-on: ubuntu-latest
+    outputs:
+      arroyo_sketch: ${{ steps.filter.outputs.arroyo_sketch }}
+      prometheus_client: ${{ steps.filter.outputs.prometheus_client }}
+      controller: ${{ steps.filter.outputs.controller }}
+      utilities: ${{ steps.filter.outputs.utilities }}
+      prometheus_exporters: ${{ steps.filter.outputs.prometheus_exporters }}
+      execution_utilities: ${{ steps.filter.outputs.execution_utilities }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dorny/paths-filter@v2
+        id: filter
+        with:
+          filters: |
+            arroyo_sketch:
+              - 'ArroyoSketch/**'
+              - 'CommonDependencies/dependencies/py/**'
+            prometheus_client:
+              - 'PrometheusClient/**'
+              - 'CommonDependencies/dependencies/py/**'
+            controller:
+              - 'Controller/**'
+              - 'CommonDependencies/dependencies/py/**'
+            utilities:
+              - 'Utilities/**'
+              - 'CommonDependencies/dependencies/py/**'
+            prometheus_exporters:
+              - 'PrometheusExporters/**'
+              - 'CommonDependencies/dependencies/py/**'
+            execution_utilities:
+              - 'ExecutionUtilities/**'
+              - 'CommonDependencies/dependencies/py/**'
+
+  test-arroyo-sketch:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.arroyo_sketch == 'true'
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install black==24.8.0 flake8==6.1.0
+        if [ -f ArroyoSketch/requirements.txt ]; then pip install -r ArroyoSketch/requirements.txt; fi
+    - name: Check formatting with Black
+      working-directory: ArroyoSketch
+      run: black --check --diff .
+    - name: Lint with flake8
+      working-directory: ArroyoSketch
+      run: |
+        # Stop the build if there are Python syntax errors or undefined names
+        flake8 . --config=../.flake8 --count --select=E9,F63,F7,F82 --show-source --statistics
+        # Exit-zero treats all errors as warnings
+        flake8 . --config=../.flake8 --count --exit-zero --max-complexity=10 --statistics
+
+  test-prometheus-client:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.prometheus_client == 'true'
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install black==24.8.0 flake8==6.1.0 mypy types-requests types-PyYAML typing-extensions numpy prometheus-client urllib3
+        if [ -f PrometheusClient/requirements.txt ]; then pip install -r PrometheusClient/requirements.txt; fi
+    - name: Check formatting with Black
+      working-directory: PrometheusClient
+      run: black --check --diff .
+    - name: Lint with flake8
+      working-directory: PrometheusClient
+      run: |
+        # Stop the build if there are Python syntax errors or undefined names
+        flake8 . --config=../.flake8 --count --select=E9,F63,F7,F82 --show-source --statistics
+        # Exit-zero treats all errors as warnings
+        flake8 . --config=../.flake8 --count --exit-zero --max-complexity=10 --statistics
+    - name: Type check with mypy
+      working-directory: PrometheusClient
+      run: mypy .
+
+  test-controller:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.controller == 'true'
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install black==24.8.0 flake8==6.1.0
+        if [ -f Controller/requirements.txt ]; then pip install -r Controller/requirements.txt; fi
+    - name: Check formatting with Black
+      working-directory: Controller
+      run: black --check --diff .
+    - name: Lint with flake8
+      working-directory: Controller
+      run: |
+        # Stop the build if there are Python syntax errors or undefined names
+        flake8 . --config=../.flake8 --count --select=E9,F63,F7,F82 --show-source --statistics
+        # Exit-zero treats all errors as warnings
+        flake8 . --config=../.flake8 --count --exit-zero --max-complexity=10 --statistics
+
+  test-utilities:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.utilities == 'true'
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install black==24.8.0 flake8==6.1.0
+        if [ -f Utilities/requirements.txt ]; then pip install -r Utilities/requirements.txt; fi
+    - name: Check formatting with Black
+      working-directory: Utilities
+      run: black --check --diff .
+    - name: Lint with flake8
+      working-directory: Utilities
+      run: |
+        # Stop the build if there are Python syntax errors or undefined names
+        flake8 . --config=../.flake8 --count --select=E9,F63,F7,F82 --show-source --statistics
+        # Exit-zero treats all errors as warnings
+        flake8 . --config=../.flake8 --count --exit-zero --max-complexity=10 --statistics
+
+  test-prometheus-exporters:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.prometheus_exporters == 'true'
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install black==24.8.0 flake8==6.1.0 mypy isort
+        if [ -f PrometheusExporters/requirements.txt ]; then pip install -r PrometheusExporters/requirements.txt; fi
+    - name: Check formatting with Black
+      working-directory: PrometheusExporters
+      run: black --check --diff .
+    - name: Check import sorting with isort
+      working-directory: PrometheusExporters
+      run: isort --check-only --diff --settings-file .isort.cfg .
+    - name: Lint with flake8
+      working-directory: PrometheusExporters
+      run: |
+        # Stop the build if there are Python syntax errors or undefined names
+        flake8 . --config=../.flake8 --count --select=E9,F63,F7,F82 --show-source --statistics
+        # Exit-zero treats all errors as warnings
+        flake8 . --config=../.flake8 --count --exit-zero --max-complexity=10 --statistics
+    - name: Type check with mypy
+      working-directory: PrometheusExporters
+      run: mypy . --config-file=.mypy.ini
+
+  test-execution-utilities:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.execution_utilities == 'true'
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install black==24.8.0 flake8==6.1.0
+        if [ -f ExecutionUtilities/requirements.txt ]; then pip install -r ExecutionUtilities/requirements.txt; fi
+    - name: Check formatting with Black
+      working-directory: ExecutionUtilities
+      run: black --check --diff .
+    - name: Lint with flake8
+      working-directory: ExecutionUtilities
+      run: |
+        # Stop the build if there are Python syntax errors or undefined names
+        flake8 . --config=../.flake8 --count --select=E9,F63,F7,F82 --show-source --statistics
+        # Exit-zero treats all errors as warnings
+        flake8 . --config=../.flake8 --count --exit-zero --max-complexity=10 --statistics
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
new file mode 100644
index 0000000..143865b
--- /dev/null
+++ b/.github/workflows/rust.yml
@@ -0,0 +1,151 @@
+name: Rust Projects CI
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - 'QueryEngineRust/**'
+      - 'CommonDependencies/dependencies/rs/**'
+      - 'CommonDependencies/tests/**'
+      - 'sketch-core/**'
+      - '.github/workflows/rust.yml'
+  pull_request:
+    branches: [ main ]
+    paths:
+      - 'QueryEngineRust/**'
+      - 'CommonDependencies/dependencies/rs/**'
+      - 'CommonDependencies/tests/**'
+      - 'sketch-core/**'
+      - '.github/workflows/rust.yml'
+  workflow_dispatch:
+
+env:
+  CARGO_TERM_COLOR: always
+
+jobs:
+  format-and-lint:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    # - name: Configure git for private dependencies
+    #   run: git config --global url."https://x-access-token:${{ secrets.PRIVATE_REPO_TOKEN }}@github.com/".insteadOf "https://github.com/"
+
+    # - name: Clone sketchlib-rust
+    #   run: git clone https://github.com/ProjectASAP/sketchlib-rust.git
+
+    - name: Install Rust
+      uses: dtolnay/rust-toolchain@stable
+      with:
+        toolchain: stable
+        components: rustfmt, clippy
+
+    - name: Run sccache-cache
+      uses: mozilla-actions/sccache-action@v0.0.4
+
+    - name: Cache cargo
+      uses: actions/cache@v4
+      with:
+        path: |
+          ~/.cargo/registry
+          ~/.cargo/git
+          QueryEngineRust/target
+        key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
+
+    - name: Check formatting
+      run: cargo fmt -- --check
+      working-directory: QueryEngineRust
+
+    - name: Check formatting (sketch_db_common)
+      run: cargo fmt -- --check
+      working-directory: CommonDependencies/dependencies/rs/sketch_db_common
+
+    - name: Run clippy
+      run: cargo clippy --all-targets --all-features -- -D warnings
+      working-directory: QueryEngineRust
+      env:
+        RUSTC_WRAPPER: sccache
+
+    - name: Check formatting (sketch-core)
+      run: cargo fmt -- --check
+      working-directory: sketch-core
+
+    - name: Run clippy (sketch-core)
+      run: cargo clippy --all-targets --all-features -- -D warnings
+      working-directory: sketch-core
+      env:
+        RUSTC_WRAPPER: sccache
+
+  test:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    # - name: Configure git for private dependencies
+    #   run: git config --global url."https://x-access-token:${{ secrets.PRIVATE_REPO_TOKEN }}@github.com/".insteadOf "https://github.com/"
+
+    # - name: Clone sketchlib-rust
+    #   run: git clone https://github.com/ProjectASAP/sketchlib-rust.git
+
+    - name: Install Rust
+      uses: dtolnay/rust-toolchain@stable
+      with:
+        toolchain: stable
+
+    - name: Run sccache-cache
+      uses: mozilla-actions/sccache-action@v0.0.4
+
+    - name: Cache cargo
+      uses: actions/cache@v4
+      with:
+        path: |
+          ~/.cargo/registry
+          ~/.cargo/git
+          QueryEngineRust/target
+        key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
+
+    - name: Run QueryEngineRust tests
+      run: cargo test
+      working-directory: QueryEngineRust
+      env:
+        RUSTC_WRAPPER: sccache
+
+    - name: Run sql_utilities tests
+      run: cargo test
+      working-directory: CommonDependencies/dependencies/rs/sql_utilities
+      env:
+        RUSTC_WRAPPER: sccache
+
+    - name: Run promql_utilities tests
+      run: cargo test
+      working-directory: CommonDependencies/dependencies/rs/promql_utilities
+      env:
+        RUSTC_WRAPPER: sccache
+
+    - name: Run sketch-core tests
+      run: cargo test
+      working-directory: sketch-core
+    - name: Run sketch_db_common tests
+      run: cargo test
+      working-directory: CommonDependencies/dependencies/rs/sketch_db_common
+      env:
+        RUSTC_WRAPPER: sccache
+
+  docker:
+    runs-on: ubuntu-latest
+    needs: [format-and-lint, test]
+    steps:
+    - uses: actions/checkout@v4
+
+    # - name: Configure git for private dependencies
+    #   run: git config --global url."https://x-access-token:${{ secrets.PRIVATE_REPO_TOKEN }}@github.com/".insteadOf "https://github.com/"
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Build Docker image
+      run: docker build -f QueryEngineRust/Dockerfile -t sketchdb-queryengine-rust:latest .
+    #   run: |
+    #     echo "${{ secrets.PRIVATE_REPO_TOKEN }}" > /tmp/git_token
+    #     docker build --secret id=git_token,src=/tmp/git_token -f QueryEngineRust/Dockerfile -t sketchdb-queryengine-rust:latest .
+    #     rm -f /tmp/git_token
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fa17294
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,12 @@
+target/
+experiment_outputs/
+
+# Private repo, vendored locally until open sourced (see GitHub issue)
+sketchlib-rust/
+
+# Runtime and generated files
+metadata/
+preprocessed_configs/
+status
+uuid
+store/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..fed70d5
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,179 @@
+repos:
+  # General hooks for all file types
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
+      - id: check-case-conflict
+      - id: check-merge-conflict
+      - id: check-executables-have-shebangs
+
+  # Docker linting
+  - repo: https://github.com/hadolint/hadolint
+    rev: v2.12.0
+    hooks:
+      - id: hadolint-docker
+        files: ^.*Dockerfile.*$
+        args: ['--ignore', 'DL3008']
+
+  # Shell script linting
+  - repo: https://github.com/shellcheck-py/shellcheck-py
+    rev: v0.9.0.5
+    hooks:
+      - id: shellcheck
+        files: \.(sh|bash)$
+
+  # Python formatting and linting
+  - repo: https://github.com/psf/black
+    rev: 24.8.0
+    hooks:
+      - id: black
+        language_version: python3
+        files: \.(py|pyi)$
+
+  - repo: https://github.com/pycqa/isort
+    rev: 5.13.2 # last version of isort that supports python 3.8
+    hooks:
+      - id: isort
+        name: isort (python)
+        files: ^PrometheusExporters/.*\.py$
+        args: ["--settings-file", "PrometheusExporters/.isort.cfg"]
+
+  - repo: https://github.com/pycqa/flake8
+    rev: 6.1.0
+    hooks:
+      - id: flake8
+        files: \.(py|pyi)$
+
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: "v1.14.1"
+    hooks:
+      - id: mypy
+        files: ^PrometheusExporters/.*\.py$
+        args: [--config-file=PrometheusExporters/.mypy.ini]
+
+      - id: mypy
+        files: ^PrometheusClient/
+        args: [--config-file=PrometheusClient/pyproject.toml]
+        additional_dependencies:
+          - types-requests
+          - types-PyYAML
+          - typing-extensions
+          - numpy
+          - prometheus-client
+          - urllib3
+
+  # Rust formatting and linting
+  - repo: local
+    hooks:
+      - id: cargo-fmt
+        name: cargo fmt
+        description: Format Rust files with rustfmt.
+        entry: bash -c 'cargo fmt --manifest-path QueryEngineRust/Cargo.toml -- --check'
+        language: system
+        files: ^QueryEngineRust/.*\.rs$
+        pass_filenames: false
+
+      - id: cargo-check
+        name: cargo check
+        description: Check the package for errors.
+        entry: bash -c 'cargo check --manifest-path QueryEngineRust/Cargo.toml --all'
+        language: system
+        files: ^QueryEngineRust/.*\.rs$
+        pass_filenames: false
+
+      - id: cargo-clippy
+        name: cargo clippy
+        description: Lint Rust sources
+        entry: bash -c 'cargo clippy --manifest-path QueryEngineRust/Cargo.toml --all-targets --all-features -- -D warnings'
+        language: system
+        files: ^QueryEngineRust/.*\.rs$
+        pass_filenames: false
+
+      - id: cargo-fmt-datafusion-summary-library
+        name: cargo fmt (datafusion_summary_library)
+        description: Format datafusion_summary_libraryRust files with rustfmt.
+        entry: bash -c 'cargo fmt --manifest-path CommonDependencies/dependencies/rs/datafusion_summary_library/Cargo.toml -- --check'
+        language: system
+        files: ^CommonDependencies/dependencies/rs/datafusion_summary_library/.*\.rs$
+        pass_filenames: false
+
+      - id: cargo-test-datafusion-summary-library
+        name: cargo test (datafusion_summary_library)
+        description: Run datafusion_summary_library tests.
+        entry: bash -c 'cargo test --manifest-path CommonDependencies/dependencies/rs/datafusion_summary_library/Cargo.toml'
+        language: system
+        files: ^CommonDependencies/dependencies/rs/datafusion_summary_library/.*\.rs$
+        pass_filenames: false
+
+      - id: cargo-fmt-sql-utilities
+        name: cargo fmt (sql_utilities)
+        description: Format sql_utilities Rust files with rustfmt.
+        entry: bash -c 'cargo fmt --manifest-path CommonDependencies/dependencies/rs/sql_utilities/Cargo.toml -- --check'
+        language: system
+        files: ^CommonDependencies/dependencies/rs/sql_utilities/.*\.rs$
+        pass_filenames: false
+
+      - id: cargo-test-sql-utilities
+        name: cargo test (sql_utilities)
+        description: Run sql_utilities tests.
+        entry: bash -c 'cargo test --manifest-path CommonDependencies/dependencies/rs/sql_utilities/Cargo.toml'
+        language: system
+        files: ^CommonDependencies/dependencies/rs/sql_utilities/.*\.rs$
+        pass_filenames: false
+
+      - id: cargo-fmt-promql-utilities
+        name: cargo fmt (promql_utilities)
+        description: Format promql_utilities Rust files with rustfmt.
+        entry: bash -c 'cargo fmt --manifest-path CommonDependencies/dependencies/rs/promql_utilities/Cargo.toml -- --check'
+        language: system
+        files: ^CommonDependencies/dependencies/rs/promql_utilities/.*\.rs$
+        pass_filenames: false
+
+      - id: cargo-test-promql-utilities
+        name: cargo test (promql_utilities)
+        description: Run promql_utilities tests.
+        entry: bash -c 'cargo test --manifest-path CommonDependencies/dependencies/rs/promql_utilities/Cargo.toml'
+        language: system
+        files: ^CommonDependencies/dependencies/rs/promql_utilities/.*\.rs$
+        pass_filenames: false
+      - id: cargo-fmt-sketch-core
+        name: cargo fmt (sketch-core)
+        description: Format sketch-core Rust files with rustfmt.
+        entry: bash -c 'cargo fmt --manifest-path sketch-core/Cargo.toml -- --check'
+        language: system
+        files: ^sketch-core/.*\.rs$
+        pass_filenames: false
+
+      - id: cargo-clippy-sketch-core
+        name: cargo clippy (sketch-core)
+        description: Lint sketch-core Rust sources.
+        entry: bash -c 'cargo clippy --manifest-path sketch-core/Cargo.toml --all-targets --all-features -- -D warnings'
+        language: system
+        files: ^sketch-core/.*\.rs$
+        pass_filenames: false
+
+      - id: cargo-test-sketch-core
+        name: cargo test (sketch-core)
+        description: Run sketch-core tests.
+        entry: bash -c 'cargo test --manifest-path sketch-core/Cargo.toml'
+        language: system
+        files: ^sketch-core/.*\.rs$
+      - id: cargo-fmt-sketch-db-common
+        name: cargo fmt (sketch_db_common)
+        description: Format sketch_db_common Rust files with rustfmt.
+        entry: bash -c 'cargo fmt --manifest-path CommonDependencies/dependencies/rs/sketch_db_common/Cargo.toml -- --check'
+        language: system
+        files: ^CommonDependencies/dependencies/rs/sketch_db_common/.*\.rs$
+        pass_filenames: false
+
+      - id: cargo-test-sketch-db-common
+        name: cargo test (sketch_db_common)
+        description: Run sketch_db_common tests.
+        entry: bash -c 'cargo test --manifest-path CommonDependencies/dependencies/rs/sketch_db_common/Cargo.toml'
+        language: system
+        files: ^CommonDependencies/dependencies/rs/sketch_db_common/.*\.rs$
+        pass_filenames: false
diff --git a/.shellcheckrc b/.shellcheckrc
new file mode 100644
index 0000000..022d394
--- /dev/null
+++ b/.shellcheckrc
@@ -0,0 +1,4 @@
+# Disable warnings about not following external sources
+# SC1090: Can't follow non-constant source (e.g., source ~/.bashrc)
+# SC1091: Not following sourced files that are external/generated (e.g., nvm.sh, cargo env)
+disable=SC1090,SC1091
diff --git a/ArroyoSketch/.gitignore b/ArroyoSketch/.gitignore
new file mode 100644
index 0000000..f7ee054
--- /dev/null
+++ b/ArroyoSketch/.gitignore
@@ -0,0 +1,3 @@
+__pycache__
+**/*.pyc
+**/*.swp
diff --git a/ArroyoSketch/Dockerfile b/ArroyoSketch/Dockerfile
new file mode 100644
index 0000000..259e004
--- /dev/null
+++ b/ArroyoSketch/Dockerfile
@@ -0,0 +1,25 @@
+FROM sketchdb-base:latest
+
+LABEL maintainer="SketchDB Team"
+LABEL description="ArroyoSketch pipeline configuration service"
+
+# Set working directory
+WORKDIR /app
+
+# Install Python dependencies
+RUN pip3 install --no-cache-dir jinja2 requests loguru pyyaml
+
+# Copy application code
+COPY classes/ ./classes/
+COPY utils/ ./utils/
+COPY templates/ ./templates/
+COPY examples/ ./examples/
+COPY run_arroyosketch.py .
+COPY delete_pipeline.py .
+COPY validate_udfs.py .
+
+# Create output directory
+RUN mkdir -p /app/output
+
+# Set the entry point
+ENTRYPOINT ["python", "run_arroyosketch.py"]
diff --git a/ArroyoSketch/LICENSE b/ArroyoSketch/LICENSE
new file mode 100644
index 0000000..404d657
--- /dev/null
+++ b/ArroyoSketch/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 SketchDB
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/ArroyoSketch/README.md b/ArroyoSketch/README.md
new file mode 100644
index 0000000..576b181
--- /dev/null
+++ b/ArroyoSketch/README.md
@@ -0,0 +1,113 @@
+# ArroyoSketch
+
+ArroyoSketch is the pipeline configurator that creates Arroyo streaming pipelines from configuration files.
+
+## Purpose
+
+Given `streaming_config.yaml` (generated by Controller), ArroyoSketch:
+1. Renders SQL query templates using Jinja2
+2. Creates Arroyo pipelines via REST API
+3. Configures sketch-building UDFs with parameters
+4. Sets up connections to Kafka for sketch output
+
+This automation eliminates manual pipeline creation and ensures consistency with Controller decisions.
+
+## How It Works
+
+### Input: streaming_config.yaml
+
+The Controller generates this file describing which sketches to build:
+
+**TODO**
+
+### Process: Render and Deploy
+
+**TODO**
+
+## Key Files
+
+### Entry Point
+
+**TODO**
+
+### Templates
+
+**TODO**
+
+### Validation
+
+- **`validate_udfs.py`** - UDF validation script
+  - Checks if UDFs are available in Arroyo
+  - Validates UDF signatures match usage
+  - Run before creating pipelines
+
+## Running Locally
+
+### Basic Usage
+
+```bash
+python run_arroyosketch.py
+```
+
+**Expects:**
+- `streaming_config.yaml` in current directory
+- Arroyo running on `http://localhost:5115` (default)
+- Kafka running on `localhost:9092` (default)
+
+### With Custom Config
+
+```bash
+python run_arroyosketch.py \
+  --config /path/to/streaming_config.yaml \
+  --arroyo-url http://arroyo:5115 \
+  --kafka-bootstrap kafka:9092
+```
+
+### Dry-Run Mode
+
+Preview generated SQL without creating pipelines:
+
+```bash
+python run_arroyosketch.py --dry-run
+```
+
+## Testing
+
+### Validate UDFs
+
+Before creating pipelines, verify that UDFs can compile against Arroyo:
+
+```bash
+python validate_udfs.py
+```
+
+### Integration Test
+
+1. Start Arroyo:
+```bash
+cd arroyo
+docker compose up
+```
+
+2. Run ArroyoSketch:
+```bash
+python run_arroyosketch.py --config test_config.yaml
+```
+
+3. Verify pipeline created:
+```bash
+curl http://localhost:5115/api/pipelines
+# Should show your pipeline
+```
+
+4. Check pipeline is running:
+```bash
+# In Arroyo UI: http://localhost:5115
+# Navigate to Pipelines → See your pipeline status
+```
+
+## Extending
+
+### Adding a New Template
+
+**TODO**
diff --git a/ArroyoSketch/arroyo-compose.yml b/ArroyoSketch/arroyo-compose.yml
new file mode 100644
index 0000000..5e63756
--- /dev/null
+++ b/ArroyoSketch/arroyo-compose.yml
@@ -0,0 +1,11 @@
+services:
+  arroyo:
+    image: ghcr.io/projectasap/asap-arroyo:${ARROYO_VERSION:-v0.1.0}
+    container_name: sketchdb-arroyo
+    network_mode: host
+    volumes:
+      - ./config.yaml:/config.yaml
+    command: ["--config", "/config.yaml", "cluster"]
+    environment:
+      - ARROYO__API__RUN_HTTP_PORT=5115
+    restart: no
diff --git a/ArroyoSketch/arroyosketch-cli-compose.yml.j2 b/ArroyoSketch/arroyosketch-cli-compose.yml.j2
new file mode 100644
index 0000000..5421433
--- /dev/null
+++ b/ArroyoSketch/arroyosketch-cli-compose.yml.j2
@@ -0,0 +1,30 @@
+services:
+  arroyosketch:
+    build:
+      context: {{ arroyosketch_dir }}
+    container_name: {{ container_name }}
+    hostname: arroyosketch
+    networks:
+      - asap-network
+    command:
+      - "--config_file_path=/controller-output/streaming_config.yaml"
+      - "--source_type=prometheus_remote_write"
+      - "--prometheus_base_port={{ prometheus_base_port }}"
+      - "--prometheus_path={{ prometheus_path }}"
+      - "--prometheus_bind_ip={{ prometheus_bind_ip }}"
+      - "--parallelism={{ parallelism }}"
+      - "--output_kafka_topic={{ output_kafka_topic }}"
+      - "--output_format={{ output_format }}"
+      - "--pipeline_name={{ pipeline_name }}"
+      - "--output_dir=/arroyosketch-output"
+      - "--arroyo_url={{ arroyo_url }}"
+      - "--bootstrap_servers={{ bootstrap_servers }}"
+    volumes:
+      - {{ controller_output_dir }}:/controller-output:ro
+      - {{ arroyosketch_output_dir }}:/arroyosketch-output
+    depends_on:
+      controller:
+        condition: service_completed_successfully
+      arroyo:
+        condition: service_healthy
+    restart: "no"  # Init container - runs once and exits
diff --git a/ArroyoSketch/config.yaml b/ArroyoSketch/config.yaml
new file mode 100644
index 0000000..6388be9
--- /dev/null
+++ b/ArroyoSketch/config.yaml
@@ -0,0 +1,2 @@
+compiler:
+    use-local-udf-crate: true
diff --git a/ArroyoSketch/delete_pipeline.py b/ArroyoSketch/delete_pipeline.py
new file mode 100644
index 0000000..e493037
--- /dev/null
+++ b/ArroyoSketch/delete_pipeline.py
@@ -0,0 +1,50 @@
+import argparse
+
+from utils import arroyo_utils
+
+
+def main(args):
+    # http_utils.make_api_request(
+    #     url=f"{args.arroyo_url}/pipelines/{args.pipeline_id}",
+    #     method="patch",
+    #     data=json.dumps({"stop": "immediate"}),
+    # )
+    # http_utils.make_api_request(
+    #     url=f"{args.arroyo_url}/pipelines/{args.pipeline_id}",
+    #     method="delete",
+    # )
+
+    if not args.pipeline_id and not args.all_pipelines:
+        raise ValueError("You must specify either --pipeline_id or --all_pipelines.")
+
+    pipeline_ids = []
+    if args.pipeline_id:
+        pipeline_ids = [args.pipeline_id]
+    elif args.all_pipelines:
+        pipeline_ids = arroyo_utils.get_all_pipelines(arroyo_url=args.arroyo_url)
+
+    arroyo_utils.stop_and_delete_pipelines(
+        arroyo_url=args.arroyo_url, pipeline_ids=pipeline_ids
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Delete a pipeline.")
+
+    parser.add_argument(
+        "--pipeline_id",
+        type=str,
+        required=False,
+        help="The ID of the pipeline to delete.",
+    )
+    parser.add_argument(
+        "--all_pipelines", action="store_true", help="Delete all pipelines."
+    )
+    parser.add_argument(
+        "--arroyo_url",
+        default="http://localhost:5115/api/v1",
+        help="URL of the Arroyo API server",
+    )
+
+    args = parser.parse_args()
+    main(args)
diff --git a/ArroyoSketch/examples/configs/streaming_config.yaml b/ArroyoSketch/examples/configs/streaming_config.yaml
new file mode 100644
index 0000000..e42bef5
--- /dev/null
+++ b/ArroyoSketch/examples/configs/streaming_config.yaml
@@ -0,0 +1,74 @@
+aggregations:
+- aggregationId: 1
+  aggregationSubType: sum
+  aggregationType: MultipleSum
+  labels:
+    aggregated:
+    # - instance
+    # - job
+    - label_0
+    - label_1
+    - label_2
+    grouping:
+    - instance
+    - job
+    rollup: []
+  metric: fake_metric_total
+  parameters: {}
+  spatialFilter: ''
+  tumblingWindowSize: 10
+# - aggregationId: 2
+#   aggregationSubType: ''
+#   aggregationType: MultipleIncrease
+#   labels:
+#     aggregated:
+#     - instance
+#     - job
+#     - label_0
+#     - label_1
+#     - label_2
+#     grouping: []
+#     rollup: []
+#   metric: fake_metric_total
+#   parameters: {}
+#   spatialFilter: ''
+#   tumblingWindowSize: 10
+# - aggregationId: 3
+#   aggregationSubType: sum
+#   aggregationType: MultipleSum
+#   labels:
+#     aggregated:
+#     - instance
+#     - job
+#     grouping: []
+#     rollup:
+#     - label_0
+#     - label_1
+#     - label_2
+#   metric: fake_metric_total
+#   parameters: {}
+#   spatialFilter: ''
+#   tumblingWindowSize: 10
+- aggregationId: 4
+  aggregationSubType: sum
+  aggregationType: MultipleSum
+  labels:
+    aggregated:
+    - label_0
+    grouping: []
+    rollup:
+    - instance
+    - job
+    - label_1
+    - label_2
+  metric: fake_metric_total
+  parameters: {}
+  spatialFilter: ''
+  tumblingWindowSize: 10
+metrics:
+  fake_metric_total:
+  - instance
+  - job
+  - label_0
+  - label_1
+  - label_2
diff --git a/ArroyoSketch/examples/configs/test_promql_streaming_config.yaml b/ArroyoSketch/examples/configs/test_promql_streaming_config.yaml
new file mode 100644
index 0000000..1edd171
--- /dev/null
+++ b/ArroyoSketch/examples/configs/test_promql_streaming_config.yaml
@@ -0,0 +1,28 @@
+metrics:
+  fake_metric_total:
+  - instance
+  - job
+  - label_0
+  - label_1
+  - label_2
+aggregations:
+- aggregationId: 1
+  aggregationSubType: ''
+  aggregationType: DatasketchesKLL
+  labels:
+    aggregated: []
+    grouping:
+    - instance
+    - job
+    - label_0
+    - label_1
+    - label_2
+    rollup: []
+  metric: fake_metric_total
+  parameters:
+    K: 20
+  slideInterval: 60
+  spatialFilter: ''
+  tumblingWindowSize: 60
+  windowSize: 60
+  windowType: tumbling
diff --git a/ArroyoSketch/examples/configs/test_sql_streaming_config.yaml b/ArroyoSketch/examples/configs/test_sql_streaming_config.yaml
new file mode 100644
index 0000000..8dc260f
--- /dev/null
+++ b/ArroyoSketch/examples/configs/test_sql_streaming_config.yaml
@@ -0,0 +1,28 @@
+tables:
+- name: metrics_table
+  time_column: time
+  value_columns:
+    - cpu_usage
+    - memory_usage
+  metadata_columns:
+    - hostname
+    - datacenter
+aggregations:
+  - aggregationId: 1
+    table_name: metrics_table
+    value_column: cpu_usage
+    aggregationSubType: ''
+    aggregationType: DatasketchesKLL
+    labels:
+      grouping:
+        - datacenter
+      aggregated: []
+      rollup:
+        - hostname
+    parameters:
+      K: 20
+    slideInterval: 1
+    spatialFilter: ''
+    tumblingWindowSize: 1
+    windowSize: 1
+    windowType: tumbling
diff --git a/ArroyoSketch/examples/inputs/fake_metric_total_10.json b/ArroyoSketch/examples/inputs/fake_metric_total_10.json
new file mode 100644
index 0000000..49b080d
--- /dev/null
+++ b/ArroyoSketch/examples/inputs/fake_metric_total_10.json
@@ -0,0 +1,9 @@
+{"timestamp": 1744164268.348, "value": 8112599.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
+{"timestamp": 1744164273.348, "value": 16029022.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
+{"timestamp": 1744164278.348, "value": 23866149.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
+{"timestamp": 1744164283.348, "value": 31757738.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
+{"timestamp": 1744164288.348, "value": 39597673.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
+{"timestamp": 1744164293.348, "value": 47164135.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
+{"timestamp": 1744164298.348, "value": 54908613.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
+{"timestamp": 1744164303.348, "value": 63114376.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
+{"timestamp": 1744164308.348, "value": 70839602.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
diff --git a/ArroyoSketch/examples/inputs/fake_metric_total_10_2.json b/ArroyoSketch/examples/inputs/fake_metric_total_10_2.json
new file mode 100644
index 0000000..085cc1b
--- /dev/null
+++ b/ArroyoSketch/examples/inputs/fake_metric_total_10_2.json
@@ -0,0 +1,9 @@
+{"timestamp": 1744164268, "value": 8112599.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
+{"timestamp": 1744164273, "value": 16029022.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
+{"timestamp": 1744164278, "value": 23866149.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
+{"timestamp": 1744164283, "value": 31757738.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
+{"timestamp": 1744164288, "value": 39597673.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
+{"timestamp": 1744164293, "value": 47164135.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
+{"timestamp": 1744164298, "value": 54908613.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
+{"timestamp": 1744164303, "value": 63114376.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
+{"timestamp": 1744164308, "value": 70839602.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"}
diff --git a/ArroyoSketch/examples/inputs/value.json b/ArroyoSketch/examples/inputs/value.json
new file mode 100644
index 0000000..3ea6445
--- /dev/null
+++ b/ArroyoSketch/examples/inputs/value.json
@@ -0,0 +1,3 @@
+{"value": "abc"}
+{"value": "def"}
+{"value": "ghi"}
diff --git a/ArroyoSketch/examples/json/connection_profile.json b/ArroyoSketch/examples/json/connection_profile.json
new file mode 100644
index 0000000..9ebe430
--- /dev/null
+++ b/ArroyoSketch/examples/json/connection_profile.json
@@ -0,0 +1,10 @@
+{
+    "name": "default-kafka-config",
+    "connector": "kafka",
+    "config": {
+        "authentication": {},
+        "bootstrapServers": "localhost:9092",
+        "name": "default-kafka-config",
+        "schemaRegistryEnum": {}
+    }
+}
diff --git a/ArroyoSketch/examples/json/connection_table.json b/ArroyoSketch/examples/json/connection_table.json
new file mode 100644
index 0000000..08b505e
--- /dev/null
+++ b/ArroyoSketch/examples/json/connection_table.json
@@ -0,0 +1,118 @@
+{
+    "name": "arroyo_input_source",
+    "connector": "kafka",
+    "tableType": "source",
+    "config": {
+        "topic": "arroyo_input",
+        "type": {
+            "offset": "latest",
+            "read_mode": "read_uncommitted"
+        }
+    },
+    "schema": {
+        "format": {
+            "json": {
+                "confluentSchemaRegistry": false,
+                "schemaId": null,
+                "includeSchema": false,
+                "debezium": false,
+                "unstructured": false,
+                "timestampFormat": "rfc3339"
+            }
+        },
+        "badData": {
+            "fail": {}
+        },
+        "framing": null,
+        "structName": null,
+        "fields": [
+            {
+                "fieldName": "labels",
+                "fieldType": {
+                    "type": {
+                        "struct": {
+                            "name": null,
+                            "fields": [
+                                {
+                                    "fieldName": "application_name",
+                                    "fieldType": {
+                                        "type": {
+                                            "primitive": "String"
+                                        },
+                                        "sqlName": "TEXT"
+                                    },
+                                    "nullable": false,
+                                    "metadataKey": null
+                                },
+                                {
+                                    "fieldName": "hostname",
+                                    "fieldType": {
+                                        "type": {
+                                            "primitive": "String"
+                                        },
+                                        "sqlName": "TEXT"
+                                    },
+                                    "nullable": false,
+                                    "metadataKey": null
+                                },
+                                {
+                                    "fieldName": "location",
+                                    "fieldType": {
+                                        "type": {
+                                            "primitive": "String"
+                                        },
+                                        "sqlName": "TEXT"
+                                    },
+                                    "nullable": false,
+                                    "metadataKey": null
+                                }
+                            ]
+                        }
+                    },
+                    "sqlName": null
+                },
+                "nullable": false,
+                "metadataKey": null
+            },
+            {
+                "fieldName": "name",
+                "fieldType": {
+                    "type": {
+                        "primitive": "String"
+                    },
+                    "sqlName": "TEXT"
+                },
+                "nullable": false,
+                "metadataKey": null
+            },
+            {
+                "fieldName": "timestamp",
+                "fieldType": {
+                    "type": {
+                        "primitive": "UnixNanos"
+                    },
+                    "sqlName": "TIMESTAMP"
+                },
+                "nullable": false,
+                "metadataKey": null
+            },
+            {
+                "fieldName": "value",
+                "fieldType": {
+                    "type": {
+                        "primitive": "F64"
+                    },
+                    "sqlName": "DOUBLE"
+                },
+                "nullable": false,
+                "metadataKey": null
+            }
+        ],
+        "definition": {
+            "json_schema": "{\n  \"type\": \"object\",\n  \"required\": [\"labels\", \"value\", \"name\", \"timestamp\"],\n  \"properties\": {\n    \"labels\": {\n      \"type\": \"object\",\n      \"required\": [\"hostname\", \"location\", \"application_name\"],\n      \"properties\": {\n        \"hostname\": {\n          \"type\": \"string\",\n          \"description\": \"Host identifier\"\n        },\n        \"location\": {\n          \"type\": \"string\",\n          \"description\": \"Geographic or data center location\"\n        },\n        \"application_name\": {\n          \"type\": \"string\",\n          \"description\": \"Name of the application being monitored\"\n        }\n      },\n      \"additionalProperties\": false\n    },\n    \"value\": {\n      \"type\": \"number\",\n      \"description\": \"Metric value\"\n    },\n    \"name\": {\n      \"type\": \"string\",\n      \"description\": \"Metric name\"\n    },\n    \"timestamp\": {\n      \"type\": \"string\",\n      \"format\": \"date-time\",\n      \"description\": \"Time when the metric was recorded, in RFC 3339 format\"\n    }\n  },\n  \"additionalProperties\": false\n}"
+        },
+        "inferred": null,
+        "primaryKeys": []
+    },
+    "connectionProfileId": "cp_VzUf2EQ43R"
+}
diff --git a/ArroyoSketch/examples/json/connection_table_sink.json b/ArroyoSketch/examples/json/connection_table_sink.json
new file mode 100644
index 0000000..1970fdb
--- /dev/null
+++ b/ArroyoSketch/examples/json/connection_table_sink.json
@@ -0,0 +1,32 @@
+{
+    "name": "arroyo_output_sink",
+    "connector": "kafka",
+    "tableType": "sink",
+    "config": {
+        "topic": "arroyo_output",
+        "type": {
+            "commit_mode": "at_least_once"
+        }
+    },
+    "schema": {
+        "format": {
+            "json": {
+                "confluentSchemaRegistry": false,
+                "schemaId": null,
+                "includeSchema": false,
+                "debezium": false,
+                "unstructured": false,
+                "timestampFormat": "rfc3339"
+            }
+        },
+        "badData": {
+            "fail": {}
+        },
+        "framing": null,
+        "structName": null,
+        "fields": [],
+        "inferred": true,
+        "primaryKeys": []
+    },
+    "connectionProfileId": "cp_aBUB3tVozb"
+}
diff --git a/ArroyoSketch/examples/json/pipeline.json b/ArroyoSketch/examples/json/pipeline.json
new file mode 100644
index 0000000..cb2685a
--- /dev/null
+++ b/ArroyoSketch/examples/json/pipeline.json
@@ -0,0 +1,19 @@
+{
+    "name": "p4",
+    "query": "-- INSERT INTO arroyo_output_sink\n-- SELECT COUNT(*) as count, labels.hostname as hostname, TUMBLE(INTERVAL '5 seconds') as window\n-- FROM arroyo_input_source\n-- GROUP BY hostname, window\n\nINSERT INTO arroyo_output_sink\nSELECT COUNT(*) as count, labels.hostname as hostname, TUMBLE(INTERVAL '5 seconds') as window, my_hashmap(value, string_to_hash(labels.location)) as hashmap\nFROM arroyo_input_source\nGROUP BY hostname, window",
+    "udfs": [
+        {
+            "definition": "use arroyo_udf_plugin::udf;\n\n// #[udf]\n// fn my_median(mut args: Vec<f64>) -> Option<f64> {\n//     if args.is_empty() {\n//         return None;\n//     }\n\n//     args.sort();\n\n//     let mid = args.len() / 2;\n//     if args.len() % 2 == 0 {\n//         Some((args[mid] + args[mid - 1]) as f64 / 2.0)\n//     } else {\n//         Some(args[mid] as f64)\n//     }\n// }\n\n#[udf]\nfn my_median(mut args: Vec<f64>) -> Option<f64> {\n    // Filter out NaN values\n    args.retain(|x| !x.is_nan());\n    \n    if args.is_empty() {\n        return None;\n    }\n    \n    args.sort_by(|a, b| a.partial_cmp(b).unwrap());  // Safe now, no NaNs\n\n    let mid = args.len() / 2;\n    if args.len() % 2 == 0 {\n        Some((args[mid] + args[mid - 1]) / 2.0)\n    } else {\n        Some(args[mid])\n    }\n}",
+            "language": "rust"
+        },
+        {
+            "definition": "\n/*\n[dependencies]\nbincode = \"1.3\"\n*/\n\nuse arroyo_udf_plugin::udf;\nuse std::collections::HashMap;\n\n#[udf]\nfn my_hashmap(mut values: Vec<f64>, keys: Vec<u64>) -> Option<Vec<u8>> {\n    // Create a new hashmap to store the count of each name\n    let mut name_counts: HashMap<u64, usize> = HashMap::new();\n    \n    // Iterate through the keys and update the count for each name\n    for key in keys {\n        *name_counts.entry(key).or_insert(0) += 1;\n    }\n    \n    // Serialize the hashmap to bytes using bincode\n    bincode::serialize(&name_counts).ok()\n}",
+            "language": "rust"
+        },
+        {
+            "definition": "\n/*\n[dependencies]\nahash = \"0.8.6\"\n*/\n\nuse arroyo_udf_plugin::udf;\nuse ahash::AHasher;\nuse std::hash::{Hash, Hasher};\n\n#[udf]\nfn string_to_hash(input: &str) -> u64 {\n    let mut hasher = AHasher::default();\n    input.hash(&mut hasher);\n    hasher.finish()\n}",
+            "language": "rust"
+        }
+    ],
+    "parallelism": 1
+}
diff --git a/ArroyoSketch/examples/sql/create_fake_metric.sql b/ArroyoSketch/examples/sql/create_fake_metric.sql
new file mode 100644
index 0000000..e0430b6
--- /dev/null
+++ b/ArroyoSketch/examples/sql/create_fake_metric.sql
@@ -0,0 +1,47 @@
+CREATE TABLE your_table (
+  timestamp DOUBLE,
+  value DOUBLE,
+  metric_name TEXT,
+  __name__ TEXT,
+  instance TEXT,
+  job TEXT,
+  label_0 TEXT,
+  label_1 TEXT,
+  label_2 TEXT
+) WITH (
+  connector = 'filesystem',
+  type = 'source',
+  path = '/Users/milindsrivastava/Desktop/cmu/research/sketch_db_for_prometheus/code/arroyo_files/inputs/',
+  format = 'json',
+  'source.regex-pattern' = 'fake_metric_total_10\.json'
+);
+CREATE TABLE output_table (
+  sums DOUBLE,
+  instance TEXT,
+  job TEXT,
+  label_0 TEXT,
+  label_1 TEXT,
+  label_2 TEXT
+) WITH (
+  connector = 'filesystem',
+  type = 'sink',
+  path = '/Users/milindsrivastava/Desktop/cmu/research/sketch_db_for_prometheus/code/arroyo_files/outputs/',
+  format = 'json'
+);
+INSERT INTO output_table
+SELECT
+  SUM(value) as sums,
+  instance,
+  job,
+  label_0,
+  label_1,
+  label_2
+FROM your_table
+WHERE __name__ = 'fake_metric_total'
+GROUP BY
+  TUMBLE(INTERVAL '5 seconds'),
+  instance,
+  job,
+  label_0,
+  label_1,
+  label_2;
diff --git a/ArroyoSketch/examples/sql/create_value.sql b/ArroyoSketch/examples/sql/create_value.sql
new file mode 100644
index 0000000..a9b2505
--- /dev/null
+++ b/ArroyoSketch/examples/sql/create_value.sql
@@ -0,0 +1,19 @@
+CREATE TABLE your_table (
+  value TEXT
+) WITH (
+  connector = 'filesystem',
+  type = 'source',
+  path = '/Users/milindsrivastava/Desktop/cmu/research/sketch_db_for_prometheus/code/arroyo_files/inputs/',
+  format = 'json',
+  'source.regex-pattern' = 'value\.json'
+);
+CREATE TABLE output_table (
+  value TEXT
+) WITH (
+  connector = 'filesystem',
+  type = 'sink',
+  path = '/Users/milindsrivastava/Desktop/cmu/research/sketch_db_for_prometheus/code/arroyo_files/outputs/',
+  format = 'json'
+);
+INSERT INTO output_table
+SELECT value FROM your_table;
diff --git a/ArroyoSketch/examples/sql/tumbling_window.sql b/ArroyoSketch/examples/sql/tumbling_window.sql
new file mode 100644
index 0000000..1603672
--- /dev/null
+++ b/ArroyoSketch/examples/sql/tumbling_window.sql
@@ -0,0 +1,4 @@
+INSERT INTO arroyo_output_sink
+SELECT COUNT(*) as count, labels.hostname as hostname, TUMBLE(INTERVAL '5 seconds') as window
+FROM arroyo_input_source
+GROUP BY hostname, window
diff --git a/ArroyoSketch/installation/setup_dependencies.sh b/ArroyoSketch/installation/setup_dependencies.sh
new file mode 100755
index 0000000..9842e04
--- /dev/null
+++ b/ArroyoSketch/installation/setup_dependencies.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+sudo apt-get install -y python3-pip
+
+pip3 install --user jinja2
diff --git a/ArroyoSketch/run_arroyosketch.py b/ArroyoSketch/run_arroyosketch.py
new file mode 100644
index 0000000..2ee7d61
--- /dev/null
+++ b/ArroyoSketch/run_arroyosketch.py
@@ -0,0 +1,1081 @@
+import os
+import json
+import yaml
+import argparse
+from loguru import logger
+from jinja2 import Template
+from typing import Tuple, List
+
+from utils import arroyo_utils, http_utils, jinja_utils
+from promql_utilities.streaming_config.MetricConfig import MetricConfig
+from promql_utilities.streaming_config.SQLTableConfig import SQLTableConfig, TableSchema
+from promql_utilities.streaming_config.StreamingAggregationConfig import (
+    StreamingAggregationConfig,
+)
+
+
+def check_args(args):
+    if args.output_file_path:
+        raise NotImplementedError("Output file path is not implemented yet")
+
+    # Validate source type specific parameters
+    if args.source_type == "kafka":
+        if args.input_kafka_topic is None:
+            raise ValueError("Input Kafka topic is required when using Kafka source")
+        if args.kafka_input_format != "json":
+            raise NotImplementedError(
+                "Kafka input format {} is not implemented yet".format(
+                    args.kafka_input_format
+                )
+            )
+    elif args.source_type == "prometheus_remote_write":
+        if args.prometheus_base_port is None:
+            raise ValueError(
+                "Prometheus base port is required when using prometheus_remote_write source"
+            )
+        if args.prometheus_path is None:
+            raise ValueError(
+                "Prometheus path is required when using prometheus_remote_write source"
+            )
+        if args.prometheus_bind_ip is None:
+            raise ValueError(
+                "Prometheus bind IP is required when using prometheus_remote_write source"
+            )
+    elif args.source_type == "file":
+        if args.input_file_path is None:
+            raise ValueError("Input file path is required when using file source")
+        if args.file_format is None:
+            raise ValueError("--file_format is required when using file source")
+        if args.ts_format is None:
+            raise ValueError("--ts_format is required when using file source")
+        if args.query_language != "sql":
+            raise ValueError(
+                "File source only supports --query_language sql, got: {}".format(
+                    args.query_language
+                )
+            )
+
+    if args.output_kafka_topic is None:
+        raise ValueError("Output Kafka topic is required")
+
+    if args.output_format != "json":
+        raise NotImplementedError(
+            "Output format {} is not implemented yet".format(args.output_format)
+        )
+
+
+def create_connection_profile(args, template_dir) -> str:
+    """Create a connection profile JSON based on template"""
+    template = jinja_utils.load_template(template_dir, "connection_profile.j2")
+
+    rendered = template.render(
+        profile_name=args.profile_name, bootstrap_servers=args.bootstrap_servers
+    )
+
+    # Save to file
+    output_path = os.path.join(args.output_dir, "connection_profile.json")
+    with open(output_path, "w") as f:
+        f.write(rendered)
+
+    print(f"Created connection profile at: {output_path}")
+
+    if args.dry_run:
+        # Generate a dummy profile ID for dry run
+        profile_id = "dry_run_profile_id"
+        print(f"[DRY RUN] Would create connection profile with ID: {profile_id}")
+        return profile_id
+
+    # If API URL provided, create connection profile via API
+    response = http_utils.create_arroyo_resource(
+        arroyo_url=args.arroyo_url,
+        endpoint="connection_profiles",
+        data=rendered,
+        resource_type="connection profile",
+    )
+    profile_id = json.loads(response).get("id")
+
+    return profile_id
+
+
+def delete_connection_profile(args):
+    if args.dry_run:
+        print(
+            f"[DRY RUN] Would delete connection profiles with name: {args.profile_name}"
+        )
+        return
+
+    # list all connection profiles
+    response = http_utils.make_api_request(
+        url=f"{args.arroyo_url}/connection_profiles",
+        method="get",
+    )
+    response = json.loads(response)
+
+    # get the ID of the connection profile with the name args.profile_name
+    profiles = [
+        profile for profile in response["data"] if profile["name"] == args.profile_name
+    ]
+    if len(profiles) == 0:
+        print(f"No connection profile found with name {args.profile_name}")
+        return
+
+    # delete the connection profile with the ID
+    for profile in profiles:
+        http_utils.make_api_request(
+            url=f"{args.arroyo_url}/connection_profiles/{profile['id']}",
+            method="delete",
+        )
+
+
+def create_source_connection_table(
+    args,
+    topic_name,
+    table_name,
+    profile_id,
+    metric_labels: List[str],
+    template_dir,
+    query_language: str,
+    metrics_dict=None,
+    table_schema: TableSchema = None,
+):
+    """Create a connection table JSON (source) based on template
+
+    Args:
+        metrics_dict: For optimized source only. Dictionary mapping metric names to their label lists.
+                     e.g., {"cpu_usage": ["instance", "job"], "memory_usage": ["instance", "node"]}
+        query_language: "promql" or "sql" - determines schema structure
+        table_schema: For SQL mode, the TableSchema for this table
+    """
+
+    # Select template based on source type and query language
+    if args.source_type == "kafka":
+        if query_language == "sql":
+            template_name = "connection_table_kafka_sql.j2"
+        else:
+            template_name = "connection_table_kafka.j2"
+    elif args.source_type == "prometheus_remote_write":
+        if args.prometheus_remote_write_source == "optimized":
+            template_name = "connection_table_prometheus_remote_write_optimized.j2"
+        else:
+            template_name = "connection_table_prometheus_remote_write.j2"
+    elif args.source_type == "file":
+        template_name = "connection_table_file.j2"
+    else:
+        raise ValueError(f"Unsupported source type: {args.source_type}")
+
+    template = jinja_utils.load_template(template_dir, template_name)
+
+    # Create JSON schema definition for label fields
+    label_properties = {}
+    label_fields_json = []
+
+    for field in metric_labels:
+        # Add field to JSON schema properties
+        label_properties[field] = {"type": "string", "description": f"{field} label"}
+
+        # Add field to fields array for schema
+        label_fields_json.append(
+            {
+                "fieldName": field,
+                "fieldType": {"type": {"primitive": "String"}, "sqlName": "TEXT"},
+                "nullable": False,
+                "metadataKey": None,
+            }
+        )
+
+    # Generate the complete JSON schema definition
+    json_schema = {
+        "type": "object",
+        "required": ["labels", "value", "name", "timestamp"],
+        "properties": {
+            "labels": {
+                "type": "object",
+                "required": metric_labels,
+                "properties": label_properties,
+                "additionalProperties": False,
+            },
+            "value": {"type": "number", "description": "Metric value"},
+            "name": {"type": "string", "description": "Metric name"},
+            "timestamp": {
+                "type": "string",
+                "format": "date-time",
+                "description": "Time when the metric was recorded, in RFC 3339 format",
+            },
+        },
+        "additionalProperties": False,
+    }
+
+    if args.source_type == "kafka":
+        json_schema["properties"]["timestamp"] = {
+            "type": "string",
+            "format": "date-time",
+            "description": "Time when the metric was recorded, in RFC 3339 format",
+        }
+    elif args.source_type == "prometheus_remote_write":
+        json_schema["properties"]["timestamp"] = {
+            "type": "integer",
+            "description": "Unix timestamp in milliseconds when the metric was recorded",
+        }
+
+    template_vars = {
+        "table_name": table_name,
+        "label_fields": label_fields_json,
+        "json_schema": json.dumps(json_schema, indent=2)
+        .replace("\n", "\\n")
+        .replace('"', '\\"'),
+    }
+
+    if args.source_type == "kafka":
+        template_vars["topic_name"] = topic_name
+        template_vars["profile_id"] = profile_id
+
+        # For SQL mode, override template_vars with flat schema
+        if query_language == "sql" and table_schema is not None:
+            sql_json_schema = build_sql_json_schema(table_schema)
+            template_vars = {
+                "table_name": table_name,
+                "topic_name": topic_name,
+                "profile_id": profile_id,
+                "time_column": table_schema.time_column,
+                "value_columns": table_schema.value_columns,
+                "metadata_columns": table_schema.metadata_columns,
+                "json_schema": json.dumps(sql_json_schema, indent=2)
+                .replace("\n", "\\n")
+                .replace('"', '\\"'),
+            }
+    elif args.source_type == "prometheus_remote_write":
+        template_vars["base_port"] = args.prometheus_base_port
+        template_vars["parallelism"] = args.parallelism
+        template_vars["path"] = args.prometheus_path
+        template_vars["bind_ip"] = args.prometheus_bind_ip
+
+        # For optimized source, build metrics array from metrics_dict
+        if args.prometheus_remote_write_source == "optimized":
+            if metrics_dict is None:
+                raise ValueError("metrics_dict is required for optimized source")
+
+            # Build metrics array: [{"name": "cpu_usage", "labels": ["instance", "job"]}, ...]
+            metrics_array = [
+                {"name": metric_name, "labels": labels}
+                for metric_name, labels in metrics_dict.items()
+            ]
+            template_vars["metrics_json"] = json.dumps(metrics_array)
+            del template_vars["label_fields"]
+        #    # Create a minimal JSON schema (won't be used by connector but required by API)
+        #    minimal_schema = {
+        #        "type": "object",
+        #        "properties": {
+        #            "metric_name": {"type": "string"},
+        #            "timestamp": {"type": "integer"},
+        #            "value": {"type": "number"},
+        #        },
+        #    }
+        #    template_vars["json_schema"] = (
+        #        json.dumps(minimal_schema, indent=2)
+        #        .replace("\n", "\\n")
+        #        .replace('"', '\\"')
+        #    )
+    elif args.source_type == "file":
+        # NOTE: Currently assumes value_columns are F64/DOUBLE and metadata_columns are String/TEXT.
+        # If more precise type mappings are needed, extend SQLTableConfig with per-column type info.
+        ts_format_to_primitive = {
+            "unix_millis": "UnixMillis",
+            "unix_seconds": "UnixMillis",
+            "rfc3339": "UnixNanos",
+        }
+        template_vars = {
+            "table_name": table_name,
+            "file_path": args.input_file_path,
+            "file_format": args.file_format,
+            "timestamp_field": table_schema.time_column,
+            "ts_format": args.ts_format,
+            "time_column": table_schema.time_column,
+            "timestamp_primitive": ts_format_to_primitive[args.ts_format],
+            "value_columns": table_schema.value_columns,
+            "metadata_columns": table_schema.metadata_columns,
+        }
+
+    rendered = template.render(**template_vars)
+
+    # Save to file
+    filename = "connection_table_source.json"
+    output_path = os.path.join(args.output_dir, filename)
+    with open(output_path, "w") as f:
+        f.write(rendered)
+
+    print(f"Created source table at: {output_path}")
+
+    if args.dry_run:
+        print(f"[DRY RUN] Would create source connection table: {table_name}")
+        return
+
+    # If API URL provided, create connection table via API
+    http_utils.create_arroyo_resource(
+        arroyo_url=args.arroyo_url,
+        endpoint="connection_tables",
+        data=rendered,
+        resource_type="source table",
+    )
+
+
+def create_sink_connection_table(
+    args,
+    topic_name,
+    table_name,
+    profile_id,
+    template_dir,
+):
+    """Create a connection table JSON (sink) based on template"""
+
+    template = jinja_utils.load_template(template_dir, "connection_table_sink.j2")
+
+    rendered = template.render(
+        table_name=table_name, topic_name=topic_name, profile_id=profile_id
+    )
+
+    # Save to file
+    filename = "connection_table_sink.json"
+    output_path = os.path.join(args.output_dir, filename)
+    with open(output_path, "w") as f:
+        f.write(rendered)
+
+    print(f"Created sink table at: {output_path}")
+
+    if args.dry_run:
+        print(f"[DRY RUN] Would create sink connection table: {table_name}")
+        return
+
+    # If API URL provided, create connection table via API
+    http_utils.create_arroyo_resource(
+        arroyo_url=args.arroyo_url,
+        endpoint="connection_tables",
+        data=rendered,
+        resource_type="sink table",
+    )
+
+
+def delete_connection_table(args, table_name):
+    if args.dry_run:
+        print(f"[DRY RUN] Would delete connection table: {table_name}")
+        return
+
+    # list all connection tables
+    response = http_utils.make_api_request(
+        url=f"{args.arroyo_url}/connection_tables",
+        method="get",
+    )
+    response = json.loads(response)
+
+    # get the ID of the connection table with table_name
+    tables = [table for table in response["data"] if table["name"] == table_name]
+    if len(tables) == 0:
+        print(f"No connection table found with name {table_name}")
+        return
+
+    # delete the connection table with the ID
+    for table in tables:
+        http_utils.make_api_request(
+            url=f"{args.arroyo_url}/connection_tables/{table['id']}",
+            method="delete",
+        )
+
+
+def create_pipeline(
+    args: argparse.Namespace,
+    sql_queries: List[str],
+    agg_functions_with_params: List[Tuple[str, dict]],
+    streaming_aggregation_configs: List,
+    json_template_dir: str,
+    udf_dir: str,
+):
+    """Create a pipeline JSON based on template"""
+
+    # Escape newlines in SQL query for JSON compatibility
+    sql_queries = [sql_query.replace("\n", "\\n") for sql_query in sql_queries]
+    sql_query = "\\n\\n".join(sql_queries)
+
+    # UDFs handling
+    udfs = []
+    # NOTE: if we're using Arroyo built from source (v0.15.0-dev), we can directly support &str arguments in UDAFs, and thus don't need string_to_hash
+    # udf_names = list(set(agg_functions)) + ["string_to_hash"]
+    unique_agg_functions = list(
+        set([agg_func for agg_func, _ in agg_functions_with_params])
+    )
+    udf_names = unique_agg_functions + ["gzip_compress"]
+    # udf_names = list(set(agg_functions))
+
+    # Create a mapping of agg_function to parameters for UDF rendering
+    agg_function_params = {}
+    for agg_func, params in agg_functions_with_params:
+        if agg_func not in agg_function_params:
+            agg_function_params[agg_func] = params
+
+    # Special handling for deltasetaggregator - need separate UDF instances per aggregation_id
+    deltasetaggregator_instances = []
+    for config in streaming_aggregation_configs:
+        if config.aggregationType.lower() == "deltasetaggregator":
+            deltasetaggregator_instances.append(config.aggregationId)
+
+    for udf_name in udf_names:
+        # Special case for deltasetaggregator - generate separate UDF for each aggregation_id
+        if udf_name == "deltasetaggregator_":
+            for aggregation_id in deltasetaggregator_instances:
+                template_path = os.path.join(udf_dir, f"{udf_name}.rs.j2")
+
+                if os.path.exists(template_path):
+                    # Render the Jinja template with aggregation_id
+                    udf_template = jinja_utils.load_template(
+                        udf_dir, f"{udf_name}.rs.j2"
+                    )
+                    udf_body = udf_template.render(aggregation_id=aggregation_id)
+                    udfs.append({"definition": udf_body, "language": "rust"})
+                else:
+                    raise FileNotFoundError(
+                        f"Template {template_path} not found for deltasetaggregator"
+                    )
+        else:
+            # Regular UDF processing for non-deltasetaggregator UDFs
+            template_path = os.path.join(udf_dir, f"{udf_name}.rs.j2")
+            regular_path = os.path.join(udf_dir, f"{udf_name}.rs")
+
+            # Get parameters for this UDF
+            params = agg_function_params.get(udf_name, {})
+
+            if len(params) > 0 and not os.path.exists(template_path):
+                raise ValueError(
+                    f"UDF {udf_name} requires parameters {params} but no template found at {template_path}"
+                )
+
+            if os.path.exists(template_path):
+                # Read template source and get required parameters
+                with open(template_path, "r") as file:
+                    template_source = file.read()
+
+                # Render the Jinja template with parameters
+                udf_template = jinja_utils.load_template(udf_dir, f"{udf_name}.rs.j2")
+
+                # Get all required template variables
+                required_params = jinja_utils.get_template_variables(
+                    template_source, udf_template.environment
+                )
+
+                # Handle config key mapping (K -> k for KLL)
+                if "K" in params and "k" in required_params:
+                    params["k"] = params["K"]
+
+                # Check that all required parameters are provided
+                missing_params = required_params - set(params.keys())
+                if missing_params:
+                    raise ValueError(
+                        f"UDF {udf_name} requires parameters {missing_params} but they were not in the configuration"
+                    )
+
+                udf_body = udf_template.render(**params)
+            elif os.path.exists(regular_path):
+                # Use regular file if no template exists
+                with open(regular_path, "r") as f:
+                    udf_body = f.read()
+            else:
+                raise FileNotFoundError(
+                    f"Neither {template_path} nor {regular_path} exists"
+                )
+
+            udfs.append({"definition": udf_body, "language": "rust"})
+
+    # Load pipeline template
+    pipeline_template = jinja_utils.load_template(json_template_dir, "pipeline.j2")
+
+    rendered = pipeline_template.render(
+        pipeline_name=args.pipeline_name,
+        sql_query=sql_query,
+        udfs=udfs,
+        parallelism=args.parallelism,
+    )
+
+    # Save to file
+    output_path = os.path.join(args.output_dir, "pipeline.json")
+    with open(output_path, "w") as f:
+        f.write(rendered)
+
+    print(f"Creating pipeline at: {output_path}")
+
+    if args.dry_run:
+        pipeline_id = "dry_run_pipeline_id"
+        print(f"[DRY RUN] Would create pipeline with ID: {pipeline_id}")
+        return
+
+    # If API URL provided, create pipeline via API
+    response = http_utils.create_arroyo_resource(
+        arroyo_url=args.arroyo_url,
+        endpoint="pipelines",
+        data=rendered,
+        resource_type="pipeline",
+    )
+
+    response = json.loads(response)
+    pipeline_id = response["id"]
+    print(f"Pipeline created with ID: {pipeline_id}")
+
+    # Write pipeline ID to file for retrieval when running with avoid_long_ssh
+    pipeline_id_file = os.path.join(args.output_dir, "pipeline_id.txt")
+    with open(pipeline_id_file, "w") as f:
+        f.write(pipeline_id)
+        f.flush()
+        os.fsync(f.fileno())  # Ensure it's written to disk
+    print(f"Pipeline ID written to: {pipeline_id_file}")
+
+
+def delete_pipelines(args):
+    if args.dry_run:
+        print("[DRY RUN] Would delete all existing pipelines")
+        return
+
+    # # list all pipelines
+    # response = http_utils.make_api_request(
+    #     url=f"{args.arroyo_url}/pipelines",
+    #     method="get",
+    # )
+    # response = json.loads(response)
+    # if response["data"] is None:
+    #     print("No pipelines found")
+    #     return
+
+    # pipeline_ids = [pipeline["id"] for pipeline in response["data"]]
+    pipeline_ids = arroyo_utils.get_all_pipelines(arroyo_url=args.arroyo_url)
+
+    arroyo_utils.stop_and_delete_pipelines(
+        arroyo_url=args.arroyo_url, pipeline_ids=pipeline_ids
+    )
+
+    # # stop and delete all pipelines
+    # for pipeline_id in pipeline_ids:
+    #     response = http_utils.make_api_request(
+    #         url=f"{args.arroyo_url}/pipelines/{pipeline_id}",
+    #         method="patch",
+    #         data=json.dumps({"stop": "immediate"}),
+    #     )
+
+    # time.sleep(5)
+    # for pipeline_id in pipeline_ids:
+    #     success = False
+    #     for _ in range(num_retries):
+    #         try:
+    #             response = http_utils.make_api_request(
+    #                 url=f"{args.arroyo_url}/pipelines/{pipeline_id}",
+    #                 method="delete",
+    #             )
+    #             success = True
+    #         except Exception as e:
+    #             print(f"Failed to delete pipeline {pipeline_id}: {e}")
+    #             time.sleep(5)
+
+    #         if not success:
+    #             raise Exception(
+    #                 f"Failed to delete pipeline {pipeline_id} after {num_retries} retries"
+    #             )
+
+
+def get_sql_query(
+    streaming_aggregation_config: StreamingAggregationConfig,
+    schema_config,  # MetricConfig or SQLTableConfig
+    query_language: str,
+    sql_template: Template,
+    source_table: str,
+    sink_table: str,
+    source_type: str,
+    use_nested_labels: bool,
+    filter_metric_name: str = None,
+) -> Tuple[str, str, dict]:
+
+    # NEW: Support both tumbling and sliding windows (Issue #236)
+    window_type = streaming_aggregation_config.windowType
+    window_interval = "{} seconds".format(
+        streaming_aggregation_config.tumblingWindowSize
+    )
+    window_size = "{} seconds".format(streaming_aggregation_config.windowSize)
+    slide_interval = "{} seconds".format(streaming_aggregation_config.slideInterval)
+
+    logger.info(
+        f"Preparing SQL query for aggregation {streaming_aggregation_config.aggregationId}: "
+        f"windowType={window_type}, windowSize={window_size}, slideInterval={slide_interval}"
+    )
+
+    agg_function = "{}_{}".format(
+        streaming_aggregation_config.aggregationType,
+        streaming_aggregation_config.aggregationSubType,
+    )
+
+    # Get column names based on query language
+    if query_language == "sql":
+        time_column = schema_config.get_time_column(
+            streaming_aggregation_config.table_name
+        )
+        value_column = streaming_aggregation_config.value_column
+        label_prefix = ""  # SQL mode: no nesting
+    else:
+        time_column = "timestamp"
+        value_column = "value"
+        label_prefix = "labels." if use_nested_labels else ""
+
+    fully_qualified_group_by_columns = [
+        "{}{}".format(label_prefix, label)
+        for label in streaming_aggregation_config.labels["grouping"].keys
+    ]
+    fully_qualified_agg_columns = [
+        "{}{}".format(label_prefix, label)
+        for label in streaming_aggregation_config.labels["aggregated"].keys
+    ]
+
+    # Get all labels for this aggregation
+    if query_language == "sql":
+        source_identifier = streaming_aggregation_config.table_name
+        all_labels = schema_config.get_metadata_columns(source_identifier)
+    else:
+        source_identifier = streaming_aggregation_config.metric
+        all_labels = schema_config.config[source_identifier].keys
+
+    all_labels_agg_columns = [
+        "{}{}".format(label_prefix, label) for label in all_labels
+    ]
+
+    # Determine if timestamps should be included as argument
+    include_timestamps_as_argument = (
+        streaming_aggregation_config.aggregationType == "multipleincrease"
+    )
+
+    # This is just a patch for topk query.
+    if streaming_aggregation_config.aggregationSubType == "topk":
+        key_list = all_labels_agg_columns
+    else:
+        key_list = fully_qualified_agg_columns
+    agg_columns = ", ".join(key_list)
+
+    sql_query = sql_template.render(
+        aggregation_id=streaming_aggregation_config.aggregationId,
+        sink_table=sink_table,
+        agg_function=agg_function,
+        agg_columns=agg_columns,
+        source_table=source_table,
+        group_by_columns=", ".join(fully_qualified_group_by_columns),
+        window_interval=window_interval,
+        window_type=window_type,  # NEW: for sliding/tumbling selection
+        window_size=window_size,  # NEW: for HOP window size
+        slide_interval=slide_interval,  # NEW: for HOP slide interval
+        include_timestamps_as_argument=include_timestamps_as_argument,
+        source_type=source_type,
+        filter_metric_name=filter_metric_name,  # NEW: for multi-metric filtering
+        time_column=time_column,  # NEW: for SQL mode
+        value_column=value_column,  # NEW: for SQL mode
+    )
+
+    return sql_query, agg_function, streaming_aggregation_config.parameters
+
+
+def build_sql_json_schema(table_schema: TableSchema) -> dict:
+    """Build JSON schema for SQL-style Kafka data."""
+    properties = {
+        table_schema.time_column: {
+            "type": "string",
+            "format": "date-time",
+            "description": "Timestamp column",
+        }
+    }
+    required = [table_schema.time_column]
+
+    for value_col in table_schema.value_columns:
+        properties[value_col] = {
+            "type": "number",
+            "description": f"Value column: {value_col}",
+        }
+        required.append(value_col)
+
+    for meta_col in table_schema.metadata_columns:
+        properties[meta_col] = {
+            "type": "string",
+            "description": f"Metadata column: {meta_col}",
+        }
+        required.append(meta_col)
+
+    return {
+        "type": "object",
+        "required": required,
+        "properties": properties,
+        "additionalProperties": False,
+    }
+
+
+def get_source_table_name_sql(args, table_name: str) -> str:
+    """Get the source table name for SQL mode."""
+    if args.source_type == "kafka":
+        return f"{args.input_kafka_topic}_{table_name.replace(' ', '_')}"
+    elif args.source_type == "file":
+        filename = os.path.basename(args.input_file_path)
+        filename_no_ext = os.path.splitext(filename)[0]
+        return f"{filename_no_ext}_{table_name.replace(' ', '_')}"
+    else:
+        raise ValueError(f"Unsupported source type for SQL mode: {args.source_type}")
+
+
+def get_source_table_name(args, metric_name):
+    """Get the source table name based on the metric name and source type"""
+    if args.source_type == "kafka":
+        return "{}_{}".format(args.input_kafka_topic, metric_name.replace(" ", "_"))
+    elif args.source_type == "prometheus_remote_write":
+        return "prometheus_{}_{}".format(
+            args.prometheus_base_port, metric_name.replace(" ", "_")
+        )
+    elif args.source_type == "file":
+        # Use filename without extension for table name
+        filename = os.path.basename(args.input_file_path)
+        filename_no_ext = os.path.splitext(filename)[0]
+        return "{}_{}".format(filename_no_ext, metric_name.replace(" ", "_"))
+    else:
+        raise ValueError(f"Unsupported source type: {args.source_type}")
+
+
+def main(args):
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    # source_table = args.input_kafka_topic + "_table"
+    sink_table = args.output_kafka_topic + "_table"
+
+    with open(args.config_file_path, "r") as fin:
+        config = yaml.safe_load(fin)
+
+    # Query language from command line argument (defaults to promql)
+    query_language = args.query_language
+
+    # Create appropriate schema config based on query language
+    if query_language == "promql":
+        schema_config = MetricConfig(config["metrics"])
+    elif query_language == "sql":
+        schema_config = SQLTableConfig(config)
+    else:
+        raise ValueError(f"Unsupported query_language: {query_language}")
+
+    streaming_aggregation_configs = [
+        StreamingAggregationConfig.from_dict(aggregation_config)
+        for aggregation_config in config["aggregations"]
+    ]
+
+    for streaming_aggregation_config in streaming_aggregation_configs:
+        streaming_aggregation_config.aggregationType = (
+            streaming_aggregation_config.aggregationType.lower()
+        )
+        streaming_aggregation_config.aggregationSubType = (
+            streaming_aggregation_config.aggregationSubType.lower()
+        )
+        streaming_aggregation_config.validate(schema_config, query_language)
+
+    json_template_dir = os.path.join(args.template_dir, "json")
+    sql_template_dir = os.path.join(args.template_dir, "sql")
+    udf_dir = os.path.join(args.template_dir, "udfs")
+
+    # Create connection profile for Kafka, since we definitely need it for sink
+    delete_connection_profile(args)
+    profile_id = create_connection_profile(args, json_template_dir)
+
+    # For prometheus_remote_write optimized source, create ONE source for ALL metrics
+    if (
+        args.source_type == "prometheus_remote_write"
+        and args.prometheus_remote_write_source == "optimized"
+    ):
+        # Create single source table for all metrics
+        source_table = f"prometheus_{args.prometheus_base_port}_all_metrics"
+        delete_connection_table(args, source_table)
+
+        # Build metrics dict: {metric_name: [label1, label2, ...]}
+        metrics_dict = {
+            metric_name: list(metric_labels.keys)
+            for metric_name, metric_labels in schema_config.config.items()
+        }
+
+        create_source_connection_table(
+            args,
+            None,  # topic_name not needed
+            source_table,
+            profile_id,
+            [],  # metric_labels not used for multi-metric
+            json_template_dir,
+            query_language=query_language,
+            metrics_dict=metrics_dict,
+        )
+    elif query_language == "sql":
+        # SQL mode: create one source per table
+        for table_name, table_schema in schema_config.config.items():
+            source_table = get_source_table_name_sql(args, table_name)
+            delete_connection_table(args, source_table)
+
+            create_source_connection_table(
+                args,
+                args.input_kafka_topic,
+                source_table,
+                profile_id,
+                [],  # metric_labels not used for SQL mode
+                json_template_dir,
+                query_language=query_language,
+                table_schema=table_schema,
+            )
+    else:
+        # For other sources (Kafka, non-optimized prometheus, file), create one source per metric
+        for metric_name, metric_labels in schema_config.config.items():
+            source_table = get_source_table_name(args, metric_name)
+            delete_connection_table(args, source_table)
+
+            # Set topic_name based on source type (only needed for Kafka)
+            topic_name = args.input_kafka_topic if args.source_type == "kafka" else None
+
+            create_source_connection_table(
+                args,
+                topic_name,
+                source_table,
+                profile_id,
+                metric_labels.keys,
+                json_template_dir,
+                query_language=query_language,
+            )
+
+    delete_connection_table(args, sink_table)
+    create_sink_connection_table(
+        args, args.output_kafka_topic, sink_table, profile_id, json_template_dir
+    )
+
+    aggregation_sql_template = jinja_utils.load_template(
+        sql_template_dir, "single_windowed_aggregation.j2"
+    )
+    labels_sql_template = jinja_utils.load_template(
+        sql_template_dir, "distinct_windowed_labels.j2"
+    )
+    deltasetaggregator_sql_template = jinja_utils.load_template(
+        sql_template_dir, "distinct_windowed_labels_deltasetaggregator.j2"
+    )
+    value_only_sql_template = jinja_utils.load_template(
+        sql_template_dir, "single_arg_value_aggregation.j2"
+    )
+
+    sql_queries = []
+    agg_functions_with_params = []
+
+    # Determine if using single unified source table
+    use_unified_source_table = (
+        args.source_type == "prometheus_remote_write"
+        and args.prometheus_remote_write_source == "optimized"
+    )
+
+    for streaming_aggregation_config in streaming_aggregation_configs:
+        if use_unified_source_table:
+            # Use the unified table for all metrics
+            source_table = f"prometheus_{args.prometheus_base_port}_all_metrics"
+        elif query_language == "sql":
+            source_table = get_source_table_name_sql(
+                args, streaming_aggregation_config.table_name
+            )
+        else:
+            source_table = get_source_table_name(
+                args, streaming_aggregation_config.metric
+            )
+
+        is_labels_accumulator: bool = (
+            streaming_aggregation_config.aggregationType == "setaggregator"
+            or streaming_aggregation_config.aggregationType == "deltasetaggregator"
+        )
+
+        # Value-only aggregations that only take Vec<f64> as a single argument
+        is_value_only_aggregation: bool = (
+            streaming_aggregation_config.aggregationType == "datasketcheskll"
+        )
+
+        # Choose appropriate SQL template
+        if streaming_aggregation_config.aggregationType == "deltasetaggregator":
+            sql_template = deltasetaggregator_sql_template
+        elif is_labels_accumulator:
+            sql_template = labels_sql_template
+        elif is_value_only_aggregation:
+            sql_template = value_only_sql_template
+        else:
+            sql_template = aggregation_sql_template
+
+        # Determine if we should use nested labels based on source configuration
+        # SQL mode uses flat schema (no nesting), prometheus optimized also uses flat
+        use_nested_labels = not (
+            query_language == "sql"
+            or (
+                args.source_type == "prometheus_remote_write"
+                and args.prometheus_remote_write_source == "optimized"
+            )
+        )
+
+        # When using unified source table, pass metric name for WHERE clause filtering
+        filter_metric_name = (
+            streaming_aggregation_config.metric if use_unified_source_table else None
+        )
+
+        sql_query, agg_function, parameters = get_sql_query(
+            streaming_aggregation_config,
+            schema_config,
+            query_language,
+            sql_template,
+            source_table,
+            sink_table,
+            args.source_type,
+            use_nested_labels,
+            filter_metric_name,
+        )
+
+        sql_queries.append(sql_query)
+        # if not is_labels_accumulator:
+        agg_functions_with_params.append((agg_function, parameters))
+
+        print(
+            "Generated SQL query for aggregation ID {}: \n{}".format(
+                streaming_aggregation_config.aggregationId, sql_query
+            )
+        )
+    delete_pipelines(args)
+    create_pipeline(
+        args,
+        sql_queries,
+        agg_functions_with_params,
+        streaming_aggregation_configs,
+        json_template_dir,
+        udf_dir,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    # Dry run option
+    parser.add_argument(
+        "--dry_run",
+        action="store_true",
+        help="Test the logic without making API calls",
+    )
+
+    # StreamingConfig
+    parser.add_argument(
+        "--config_file_path",
+        type=str,
+        required=True,
+        help="Path to the configuration file",
+    )
+
+    # Connection profile parameters
+    parser.add_argument(
+        "--profile_name",
+        default="default-kafka-profile",
+        help="Name for the connection profile",
+    )
+    parser.add_argument(
+        "--bootstrap_servers", default="localhost:9092", help="Kafka bootstrap servers"
+    )
+
+    # Source type selection
+    parser.add_argument(
+        "--source_type",
+        type=str,
+        choices=["kafka", "prometheus_remote_write", "file"],
+        required=True,
+        help="Type of source to use",
+    )
+
+    # Connection table parameters
+    parser.add_argument(
+        "--input_kafka_topic", type=str, required=False, help="Input Kafka topic"
+    )
+    parser.add_argument(
+        "--input_file_path", type=str, required=False, help="Path to the input file"
+    )
+    parser.add_argument(
+        "--file_format",
+        type=str,
+        required=False,
+        choices=["json", "parquet"],
+        help="Format of the input file (required for file source)",
+    )
+    parser.add_argument(
+        "--ts_format",
+        type=str,
+        required=False,
+        choices=["unix_millis", "unix_seconds", "rfc3339"],
+        help="Timestamp format in the input file (required for file source)",
+    )
+
+    # Prometheus remote write source parameters
+    parser.add_argument(
+        "--prometheus_base_port",
+        type=int,
+        required=False,
+        help="Base port for Prometheus remote write endpoint",
+    )
+    parser.add_argument(
+        "--prometheus_path",
+        type=str,
+        required=False,
+        help="Path for Prometheus remote write endpoint",
+    )
+    parser.add_argument(
+        "--prometheus_bind_ip",
+        type=str,
+        required=False,
+        help="IP address to bind Prometheus remote write endpoint to",
+    )
+    parser.add_argument(
+        "--parallelism",
+        type=int,
+        required=True,
+        help="Pipeline parallelism (number of parallel tasks)",
+    )
+    parser.add_argument(
+        "--prometheus_remote_write_source",
+        type=str,
+        choices=["v1", "optimized"],
+        default="v1",
+        help="Version of Prometheus remote_write source (v1=nested labels, optimized=flattened labels)",
+    )
+
+    parser.add_argument(
+        "--output_kafka_topic", type=str, required=False, help="Output Kafka topic"
+    )
+    parser.add_argument(
+        "--output_file_path", type=str, required=False, help="Path to the output file"
+    )
+
+    parser.add_argument(
+        "--kafka_input_format",
+        required=False,
+        choices=["json", "avro-json", "avro-binary"],
+    )
+    parser.add_argument("--output_format", required=True, choices=["json", "byte"])
+
+    parser.add_argument("--pipeline_name", required=True, help="Pipeline name")
+
+    parser.add_argument(
+        "--template_dir",
+        default="./templates",
+        help="Directory containing template files",
+    )
+
+    parser.add_argument(
+        "--output_dir",
+        default="./outputs",
+        help="Directory to save the generated files",
+    )
+
+    parser.add_argument(
+        "--arroyo_url",
+        default="http://localhost:5115/api/v1",
+        help="URL of the Arroyo API server",
+    )
+
+    parser.add_argument(
+        "--query_language",
+        type=str,
+        choices=["promql", "sql"],
+        default="promql",
+        help="Query language for schema interpretation (default: promql)",
+    )
+
+    args = parser.parse_args()
+    check_args(args)
+    main(args)
diff --git a/ArroyoSketch/templates/hashed_key_udfs/countminsketch_sum.rs b/ArroyoSketch/templates/hashed_key_udfs/countminsketch_sum.rs
new file mode 100644
index 0000000..2d214b2
--- /dev/null
+++ b/ArroyoSketch/templates/hashed_key_udfs/countminsketch_sum.rs
@@ -0,0 +1,58 @@
+/*
+[dependencies]
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+twox-hash = "2.1.0"
+*/
+use arroyo_udf_plugin::udf;
+use rmp_serde::Serializer;
+use serde::{Serialize, Deserialize};
+use twox_hash::XxHash32
+
+// Count-Min Sketch parameters
+const WIDTH: usize = 1024;  // Number of buckets per hash function
+const DEPTH: usize = 4;     // Number of hash functions
+
+#[derive(Serialize, Deserialize, Clone)]
+struct CountMinSketch {
+    table: Vec<Vec<f64>>,
+    width: usize,
+    depth: usize,
+}
+
+impl CountMinSketch {
+    fn new() -> Self {
+        CountMinSketch {
+            table: vec![vec![0.0; WIDTH]; DEPTH],
+            width: WIDTH,
+            depth: DEPTH,
+        }
+    }
+
+    // Update the sketch with a key-value pair
+    fn update(&mut self, key: u32, value: f64) {
+        for i in 0..self.depth {
+            //let hash_val = xxh32(&key.to_le_bytes(), i as u32);
+            let hash = XxHash32::oneshot(i as u32, &key.to_le_bytes());
+            let bucket = (hash_val as usize) % self.width;
+            self.table[i][bucket] += value;
+        }
+    }
+}
+
+#[udf]
+fn countminsketch_sum(keys: Vec<u32>, values: Vec<f64>) -> Option<Vec<u8>> {
+    // Create a new Count-Min Sketch
+    let mut countminsketch = CountMinSketch::new();
+
+    // Iterate through the keys and values and update the sketch for each entry
+    for (i, &key) in keys.iter().enumerate() {
+        if i < values.len() {
+            countminsketch.update(key, values[i]);
+        }
+    }
+
+    let mut buf = Vec::new();
+    countminsketch.serialize(&mut Serializer::new(&mut buf)).ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/templates/hashed_key_udfs/multipleincrease_.rs b/ArroyoSketch/templates/hashed_key_udfs/multipleincrease_.rs
new file mode 100644
index 0000000..9f1225c
--- /dev/null
+++ b/ArroyoSketch/templates/hashed_key_udfs/multipleincrease_.rs
@@ -0,0 +1,53 @@
+/*
+[dependencies]
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+*/
+
+use arroyo_udf_plugin::udf;
+use std::collections::HashMap;
+use rmp_serde::Serializer;
+use serde::Serialize;
+
+#[derive(Serialize)]
+struct MeasurementData {
+    starting_measurement: f64,
+    starting_timestamp: i64,
+    last_seen_measurement: f64,
+    last_seen_timestamp: i64,
+}
+
+#[udf]
+fn multipleincrease_(keys: Vec<u32>, values: Vec<f64>, timestamps: Vec<i64>) -> Option<Vec<u8>> {
+    // Create a new hashmap to store measurement data with timestamps
+    let mut per_key_storage: HashMap<u32, MeasurementData> = HashMap::new();
+
+    // Iterate through the keys, values, and timestamps
+    for (i, &key) in keys.iter().enumerate() {
+        if i < values.len() && i < timestamps.len() {
+            let value = values[i];
+            let timestamp = timestamps[i];
+
+            let entry = per_key_storage.entry(key).or_insert(MeasurementData {
+                starting_measurement: value,
+                starting_timestamp: timestamp,
+                last_seen_measurement: value,
+                last_seen_timestamp: timestamp,
+            });
+
+            // Update last seen measurement and timestamp
+            entry.last_seen_measurement = value;
+            entry.last_seen_timestamp = timestamp;
+
+            // If this timestamp is earlier than our current starting timestamp, update starting values
+            //if timestamp < entry.starting_timestamp {
+            //    entry.starting_measurement = value;
+            //    entry.starting_timestamp = timestamp;
+            //}
+        }
+    }
+
+    let mut buf = Vec::new();
+    per_key_storage.serialize(&mut Serializer::new(&mut buf)).ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/templates/hashed_key_udfs/multipleminmax_max.rs b/ArroyoSketch/templates/hashed_key_udfs/multipleminmax_max.rs
new file mode 100644
index 0000000..bd35507
--- /dev/null
+++ b/ArroyoSketch/templates/hashed_key_udfs/multipleminmax_max.rs
@@ -0,0 +1,28 @@
+/*
+[dependencies]
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+*/
+
+use arroyo_udf_plugin::udf;
+use std::collections::HashMap;
+use rmp_serde::Serializer;
+use serde::Serialize;
+
+#[udf]
+fn multipleminmax_max(keys: Vec<u32>, values: Vec<f64>) -> Option<Vec<u8>> {
+    // Create a new hashmap
+    let mut per_key_storage: HashMap<u32, f64> = HashMap::new();
+
+    // Iterate through the keys and values
+    for (i, &key) in keys.iter().enumerate() {
+        if i < values.len() {
+            // If the key is not present or the value is less than the current stored value, update it
+            per_key_storage.entry(key).and_modify(|v| *v = (*v).max(values[i])).or_insert(values[i]);
+        }
+    }
+
+    let mut buf = Vec::new();
+    per_key_storage.serialize(&mut Serializer::new(&mut buf)).ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/templates/hashed_key_udfs/multipleminmax_min.rs b/ArroyoSketch/templates/hashed_key_udfs/multipleminmax_min.rs
new file mode 100644
index 0000000..750acaa
--- /dev/null
+++ b/ArroyoSketch/templates/hashed_key_udfs/multipleminmax_min.rs
@@ -0,0 +1,28 @@
+/*
+[dependencies]
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+*/
+
+use arroyo_udf_plugin::udf;
+use std::collections::HashMap;
+use rmp_serde::Serializer;
+use serde::Serialize;
+
+#[udf]
+fn multipleminmax_min(keys: Vec<u32>, values: Vec<f64>) -> Option<Vec<u8>> {
+    // Create a new hashmap
+    let mut per_key_storage: HashMap<u32, f64> = HashMap::new();
+
+    // Iterate through the keys and values
+    for (i, &key) in keys.iter().enumerate() {
+        if i < values.len() {
+            // If the key is not present or the value is less than the current stored value, update it
+            per_key_storage.entry(key).and_modify(|v| *v = (*v).min(values[i])).or_insert(values[i]);
+        }
+    }
+
+    let mut buf = Vec::new();
+    per_key_storage.serialize(&mut Serializer::new(&mut buf)).ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/templates/hashed_key_udfs/multiplesum_count.rs b/ArroyoSketch/templates/hashed_key_udfs/multiplesum_count.rs
new file mode 100644
index 0000000..d8441c3
--- /dev/null
+++ b/ArroyoSketch/templates/hashed_key_udfs/multiplesum_count.rs
@@ -0,0 +1,27 @@
+/*
+[dependencies]
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+*/
+
+use arroyo_udf_plugin::udf;
+use std::collections::HashMap;
+use rmp_serde::Serializer;
+use serde::Serialize;
+
+#[udf]
+fn multiplesum_count(keys: Vec<u32>, values: Vec<f64>) -> Option<Vec<u8>> {
+    // Create a new hashmap to store the count for each key
+    let mut key_sums: HashMap<u32, f64> = HashMap::new();
+
+    // Iterate through the keys and values
+    for (i, &key) in keys.iter().enumerate() {
+        if i < values.len() {
+            *key_sums.entry(key).or_insert(0.0) += 1.0;
+        }
+    }
+
+    let mut buf = Vec::new();
+    key_sums.serialize(&mut Serializer::new(&mut buf)).ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/templates/hashed_key_udfs/multiplesum_sum.rs b/ArroyoSketch/templates/hashed_key_udfs/multiplesum_sum.rs
new file mode 100644
index 0000000..0a3cc80
--- /dev/null
+++ b/ArroyoSketch/templates/hashed_key_udfs/multiplesum_sum.rs
@@ -0,0 +1,27 @@
+/*
+[dependencies]
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+*/
+
+use arroyo_udf_plugin::udf;
+use std::collections::HashMap;
+use rmp_serde::Serializer;
+use serde::Serialize;
+
+#[udf]
+fn multiplesum_sum(keys: Vec<u32>, values: Vec<f64>) -> Option<Vec<u8>> {
+    // Create a new hashmap to store the sum of values for each key
+    let mut key_sums: HashMap<u32, f64> = HashMap::new();
+
+    // Iterate through the keys and values and update the sum for each key
+    for (i, &key) in keys.iter().enumerate() {
+        if i < values.len() {
+            *key_sums.entry(key).or_insert(0.0) += values[i];
+        }
+    }
+
+    let mut buf = Vec::new();
+    key_sums.serialize(&mut Serializer::new(&mut buf)).ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/templates/hashed_key_udfs/old/countminsketch_sum.rs b/ArroyoSketch/templates/hashed_key_udfs/old/countminsketch_sum.rs
new file mode 100644
index 0000000..a29bee9
--- /dev/null
+++ b/ArroyoSketch/templates/hashed_key_udfs/old/countminsketch_sum.rs
@@ -0,0 +1,57 @@
+/*
+[dependencies]
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+xxhash-rust = { version = "0.8", features = ["xxh32"] }
+*/
+use arroyo_udf_plugin::udf;
+use rmp_serde::Serializer;
+use serde::{Serialize, Deserialize};
+use xxhash_rust::xxh32::xxh32;
+
+// Count-Min Sketch parameters
+const WIDTH: usize = 1024;  // Number of buckets per hash function
+const DEPTH: usize = 4;     // Number of hash functions
+
+#[derive(Serialize, Deserialize, Clone)]
+struct CountMinSketch {
+    table: Vec<Vec<f64>>,
+    width: usize,
+    depth: usize,
+}
+
+impl CountMinSketch {
+    fn new() -> Self {
+        CountMinSketch {
+            table: vec![vec![0.0; WIDTH]; DEPTH],
+            width: WIDTH,
+            depth: DEPTH,
+        }
+    }
+
+    // Update the sketch with a key-value pair
+    fn update(&mut self, key: u64, value: f64) {
+        for i in 0..self.depth {
+            let hash_val = xxh32(&key.to_le_bytes(), i as u32);
+            let bucket = (hash_val as usize) % self.width;
+            self.table[i][bucket] += value;
+        }
+    }
+}
+
+#[udf]
+fn countminsketch_sum(keys: Vec<u64>, values: Vec<f64>) -> Option<Vec<u8>> {
+    // Create a new Count-Min Sketch
+    let mut countminsketch = CountMinSketch::new();
+
+    // Iterate through the keys and values and update the sketch for each entry
+    for (i, &key) in keys.iter().enumerate() {
+        if i < values.len() {
+            countminsketch.update(key, values[i]);
+        }
+    }
+
+    let mut buf = Vec::new();
+    countminsketch.serialize(&mut Serializer::new(&mut buf)).ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/templates/hashed_key_udfs/old/string_to_hash.rs b/ArroyoSketch/templates/hashed_key_udfs/old/string_to_hash.rs
new file mode 100644
index 0000000..c08e244
--- /dev/null
+++ b/ArroyoSketch/templates/hashed_key_udfs/old/string_to_hash.rs
@@ -0,0 +1,16 @@
+/*
+[dependencies]
+ahash = "0.8.6"
+*/
+
+use arroyo_udf_plugin::udf;
+use ahash::AHasher;
+use std::hash::{Hash, Hasher};
+use xxhash_rust::xxh32::xxh32;
+
+#[udf]
+fn string_to_hash(input: &str) -> u64 {
+    let mut hasher = AHasher::default();
+    input.hash(&mut hasher);
+    hasher.finish()
+}
diff --git a/ArroyoSketch/templates/hashed_key_udfs/string_to_hash.rs b/ArroyoSketch/templates/hashed_key_udfs/string_to_hash.rs
new file mode 100644
index 0000000..7014a62
--- /dev/null
+++ b/ArroyoSketch/templates/hashed_key_udfs/string_to_hash.rs
@@ -0,0 +1,15 @@
+/*
+[dependencies]
+twox-hash = "2.1.0"
+*/
+
+use arroyo_udf_plugin::udf;
+use twox_hash::XxHash32;
+
+#[udf]
+fn string_to_hash(input: &str, seed: u32) -> u32 {
+    //let mut hasher = XxHash32::with_seed(seed);
+    //hasher.write(input.as_bytes());
+    //hasher.finish() as u32
+    XxHash32::oneshot(seed, input.as_bytes())
+}
diff --git a/ArroyoSketch/templates/json/connection_profile.j2 b/ArroyoSketch/templates/json/connection_profile.j2
new file mode 100644
index 0000000..2cf0827
--- /dev/null
+++ b/ArroyoSketch/templates/json/connection_profile.j2
@@ -0,0 +1,14 @@
+{
+    "name": "{{ profile_name }}",
+    "connector": "kafka",
+    "config": {
+        "authentication": {},
+        "bootstrapServers": "{{ bootstrap_servers }}",
+        "name": "{{ profile_name }}",
+        "schemaRegistryEnum": {},
+        "connectionProperties": {
+            "message.max.bytes": "20971520",
+            "batch.size": "20971520"
+        }
+    }
+}
diff --git a/ArroyoSketch/templates/json/connection_table_file.j2 b/ArroyoSketch/templates/json/connection_table_file.j2
new file mode 100644
index 0000000..100f971
--- /dev/null
+++ b/ArroyoSketch/templates/json/connection_table_file.j2
@@ -0,0 +1,62 @@
+{
+    "name": "{{ table_name }}",
+    "connector": "single_file_custom",
+    "tableType": "source",
+    "config": {
+        "path": "{{ file_path }}",
+        "file_format": "{{ file_format }}",
+        "timestamp_field": "{{ timestamp_field }}",
+        "ts_format": "{{ ts_format }}"
+    },
+    "schema": {
+        "format": {
+            "{{ file_format }}": {}
+        },
+        "badData": {
+            "fail": {}
+        },
+        "framing": null,
+        "structName": null,
+        "fields": [
+            {
+                "fieldName": "{{ time_column }}",
+                "fieldType": {
+                    "type": {
+                        "primitive": "{{ timestamp_primitive }}"
+                    },
+                    "sqlName": "TIMESTAMP"
+                },
+                "nullable": false,
+                "metadataKey": null
+            },
+            {% for value_col in value_columns %}
+            {
+                "fieldName": "{{ value_col }}",
+                "fieldType": {
+                    "type": {
+                        "primitive": "F64"
+                    },
+                    "sqlName": "DOUBLE"
+                },
+                "nullable": false,
+                "metadataKey": null
+            },
+            {% endfor %}
+            {% for col in metadata_columns %}
+            {
+                "fieldName": "{{ col }}",
+                "fieldType": {
+                    "type": {
+                        "primitive": "String"
+                    },
+                    "sqlName": "TEXT"
+                },
+                "nullable": false,
+                "metadataKey": null
+            }{% if not loop.last %},{% endif %}
+            {% endfor %}
+        ],
+        "inferred": true,
+        "primaryKeys": []
+    }
+}
diff --git a/ArroyoSketch/templates/json/connection_table_kafka.j2 b/ArroyoSketch/templates/json/connection_table_kafka.j2
new file mode 100644
index 0000000..519a2c9
--- /dev/null
+++ b/ArroyoSketch/templates/json/connection_table_kafka.j2
@@ -0,0 +1,98 @@
+{
+    "name": "{{ table_name }}",
+    "connector": "kafka",
+    "tableType": "source",
+    "config": {
+        "topic": "{{ topic_name }}",
+        "type": {
+            "offset": "latest",
+            "read_mode": "read_uncommitted"
+        }
+    },
+    "schema": {
+        "format": {
+            "json": {
+                "confluentSchemaRegistry": false,
+                "schemaId": null,
+                "includeSchema": false,
+                "debezium": false,
+                "unstructured": false,
+                "timestampFormat": "rfc3339"
+            }
+        },
+        "badData": {
+            "fail": {}
+        },
+        "framing": null,
+        "structName": null,
+        "fields": [
+            {
+                "fieldName": "labels",
+                "fieldType": {
+                    "type": {
+                        "struct": {
+                            "name": null,
+                            "fields": [
+                                {% for field in label_fields %}
+                                {
+                                    "fieldName": "{{ field.fieldName }}",
+                                    "fieldType": {
+                                        "type": {
+                                            "primitive": "String"
+                                        },
+                                        "sqlName": "TEXT"
+                                    },
+                                    "nullable": false,
+                                    "metadataKey": null
+                                }{% if not loop.last %},{% endif %}
+                                {% endfor %}
+                            ]
+                        }
+                    },
+                    "sqlName": null
+                },
+                "nullable": false,
+                "metadataKey": null
+            },
+            {
+                "fieldName": "name",
+                "fieldType": {
+                    "type": {
+                        "primitive": "String"
+                    },
+                    "sqlName": "TEXT"
+                },
+                "nullable": false,
+                "metadataKey": null
+            },
+            {
+                "fieldName": "timestamp",
+                "fieldType": {
+                    "type": {
+                        "primitive": "UnixNanos"
+                    },
+                    "sqlName": "TIMESTAMP"
+                },
+                "nullable": false,
+                "metadataKey": null
+            },
+            {
+                "fieldName": "value",
+                "fieldType": {
+                    "type": {
+                        "primitive": "F64"
+                    },
+                    "sqlName": "DOUBLE"
+                },
+                "nullable": false,
+                "metadataKey": null
+            }
+        ],
+        "definition": {
+            "json_schema": "{{ json_schema }}"
+        },
+        "inferred": null,
+        "primaryKeys": []
+    },
+    "connectionProfileId": "{{ profile_id }}"
+}
diff --git a/ArroyoSketch/templates/json/connection_table_kafka_sql.j2 b/ArroyoSketch/templates/json/connection_table_kafka_sql.j2
new file mode 100644
index 0000000..8926f6a
--- /dev/null
+++ b/ArroyoSketch/templates/json/connection_table_kafka_sql.j2
@@ -0,0 +1,74 @@
+{
+    "name": "{{ table_name }}",
+    "connector": "kafka",
+    "tableType": "source",
+    "config": {
+        "topic": "{{ topic_name }}",
+        "type": {
+            "offset": "latest",
+            "read_mode": "read_uncommitted"
+        }
+    },
+    "schema": {
+        "format": {
+            "json": {
+                "confluentSchemaRegistry": false,
+                "schemaId": null,
+                "includeSchema": false,
+                "debezium": false,
+                "unstructured": false,
+                "timestampFormat": "rfc3339"
+            }
+        },
+        "badData": {
+            "fail": {}
+        },
+        "framing": null,
+        "structName": null,
+        "fields": [
+            {
+                "fieldName": "{{ time_column }}",
+                "fieldType": {
+                    "type": {
+                        "primitive": "UnixNanos"
+                    },
+                    "sqlName": "TIMESTAMP"
+                },
+                "nullable": false,
+                "metadataKey": null
+            },
+            {% for value_col in value_columns %}
+            {
+                "fieldName": "{{ value_col }}",
+                "fieldType": {
+                    "type": {
+                        "primitive": "F64"
+                    },
+                    "sqlName": "DOUBLE"
+                },
+                "nullable": false,
+                "metadataKey": null
+            },
+            {% endfor %}
+            {% for col in metadata_columns %}
+            {
+                "fieldName": "{{ col }}",
+                "fieldType": {
+                    "type": {
+                        "primitive": "String"
+                    },
+                    "sqlName": "TEXT"
+                },
+                "nullable": false,
+                "metadataKey": null
+            }{% if not loop.last %},{% endif %}
+            {% endfor %}
+        ],
+        "definition": {
+            "json_schema": "{{ json_schema }}"
+        },
+        "inferred": null,
+        "primaryKeys": []
+    },
+    "connectionProfileId": "{{ profile_id }}"
+}
diff --git a/ArroyoSketch/templates/json/connection_table_prometheus_remote_write.j2 b/ArroyoSketch/templates/json/connection_table_prometheus_remote_write.j2
new file mode 100644
index 0000000..606f45e
--- /dev/null
+++ b/ArroyoSketch/templates/json/connection_table_prometheus_remote_write.j2
@@ -0,0 +1,96 @@
+{
+    "name": "{{ table_name }}",
+    "connector": "prometheus_remote_write_with_schema",
+    "tableType": "source",
+    "config": {
+        "bind_address": "{{ bind_ip }}",
+        "base_port": {{ base_port }},
+        "parallelism": {{ parallelism }},
+        "path": "{{ path }}"
+    },
+    "schema": {
+        "format": {
+            "json": {
+                "confluentSchemaRegistry": false,
+                "schemaId": null,
+                "includeSchema": false,
+                "debezium": false,
+                "unstructured": false,
+                "timestampFormat": "rfc3339"
+            }
+        },
+        "badData": {
+            "fail": {}
+        },
+        "framing": null,
+        "structName": null,
+        "fields": [
+            {
+                "fieldName": "labels",
+                "fieldType": {
+                    "type": {
+                        "struct": {
+                            "name": null,
+                            "fields": [
+                                {% for field in label_fields %}
+                                {
+                                    "fieldName": "{{ field.fieldName }}",
+                                    "fieldType": {
+                                        "type": {
+                                            "primitive": "String"
+                                        },
+                                        "sqlName": "TEXT"
+                                    },
+                                    "nullable": false,
+                                    "metadataKey": null
+                                }{% if not loop.last %},{% endif %}
+                                {% endfor %}
+                            ]
+                        }
+                    },
+                    "sqlName": null
+                },
+                "nullable": false,
+                "metadataKey": null
+            },
+            {
+                "fieldName": "name",
+                "fieldType": {
+                    "type": {
+                        "primitive": "String"
+                    },
+                    "sqlName": "TEXT"
+                },
+                "nullable": false,
+                "metadataKey": null
+            },
+            {
+                "fieldName": "timestamp",
+                "fieldType": {
+                    "type": {
+                        "primitive": "UnixMillis"
+                    },
+                    "sqlName": "TIMESTAMP"
+                },
+                "nullable": false,
+                "metadataKey": null
+            },
+            {
+                "fieldName": "value",
+                "fieldType": {
+                    "type": {
+                        "primitive": "F64"
+                    },
+                    "sqlName": "DOUBLE"
+                },
+                "nullable": false,
+                "metadataKey": null
+            }
+        ],
+        "definition": {
+            "json_schema": "{{ json_schema }}"
+        },
+        "inferred": null,
+        "primaryKeys": []
+    }
+}
diff --git a/ArroyoSketch/templates/json/connection_table_prometheus_remote_write_optimized.j2 b/ArroyoSketch/templates/json/connection_table_prometheus_remote_write_optimized.j2
new file mode 100644
index 0000000..d46cd8f
--- /dev/null
+++ b/ArroyoSketch/templates/json/connection_table_prometheus_remote_write_optimized.j2
@@ -0,0 +1,12 @@
+{
+    "name": "{{ table_name }}",
+    "connector": "prometheus_remote_write_optimized",
+    "tableType": "source",
+    "config": {
+        "bind_address": "{{ bind_ip }}",
+        "base_port": {{ base_port }},
+        "parallelism": {{ parallelism }},
+        "path": "{{ path }}",
+        "metrics": {{ metrics_json }}
+    }
+}
diff --git a/ArroyoSketch/templates/json/connection_table_sink.j2 b/ArroyoSketch/templates/json/connection_table_sink.j2
new file mode 100644
index 0000000..5baaeb1
--- /dev/null
+++ b/ArroyoSketch/templates/json/connection_table_sink.j2
@@ -0,0 +1,32 @@
+{
+    "name": "{{ table_name }}",
+    "connector": "kafka",
+    "tableType": "sink",
+    "config": {
+        "topic": "{{ topic_name }}",
+        "type": {
+            "commit_mode": "at_least_once"
+        }
+    },
+    "schema": {
+        "format": {
+            "json": {
+                "confluentSchemaRegistry": false,
+                "schemaId": null,
+                "includeSchema": false,
+                "debezium": false,
+                "unstructured": false,
+                "timestampFormat": "rfc3339"
+            }
+        },
+        "badData": {
+            "fail": {}
+        },
+        "framing": null,
+        "structName": null,
+        "fields": [],
+        "inferred": true,
+        "primaryKeys": []
+    },
+    "connectionProfileId": "{{ profile_id }}"
+}
diff --git a/ArroyoSketch/templates/json/pipeline.j2 b/ArroyoSketch/templates/json/pipeline.j2
new file mode 100644
index 0000000..1d7731b
--- /dev/null
+++ b/ArroyoSketch/templates/json/pipeline.j2
@@ -0,0 +1,6 @@
+{
+    "name": "{{ pipeline_name }}",
+    "query": "{{ sql_query }}",
+    "udfs": {{ udfs|tojson(indent=2) }},
+    "parallelism": {{ parallelism }}
+}
diff --git a/ArroyoSketch/templates/sql/distinct_windowed_labels.j2 b/ArroyoSketch/templates/sql/distinct_windowed_labels.j2
new file mode 100644
index 0000000..70da6d1
--- /dev/null
+++ b/ArroyoSketch/templates/sql/distinct_windowed_labels.j2
@@ -0,0 +1,20 @@
+INSERT INTO
+    {{ sink_table }}
+SELECT
+    gzip_compress({{ agg_function }}({% if agg_columns %}concat_ws(';', {{ agg_columns }}){% else %}''{% endif %})) as precompute,
+    {{ aggregation_id }} as aggregation_id,
+    {% if group_by_columns %}concat_ws(';', {{ group_by_columns }}){% else %}''{% endif %} as key,
+    {%- if window_type == "sliding" %}
+    HOP(INTERVAL '{{ slide_interval }}', INTERVAL '{{ window_size }}') as window
+    {%- else %}
+    TUMBLE(INTERVAL '{{ window_interval }}') as window
+    {%- endif %}
+FROM
+    {{ source_table }}
+{%- if filter_metric_name %}
+WHERE
+    metric_name = '{{ filter_metric_name }}'
+{%- endif %}
+GROUP BY
+    window, key
+;
diff --git a/ArroyoSketch/templates/sql/distinct_windowed_labels_deltasetaggregator.j2 b/ArroyoSketch/templates/sql/distinct_windowed_labels_deltasetaggregator.j2
new file mode 100644
index 0000000..49c41f4
--- /dev/null
+++ b/ArroyoSketch/templates/sql/distinct_windowed_labels_deltasetaggregator.j2
@@ -0,0 +1,20 @@
+INSERT INTO
+    {{ sink_table }}
+SELECT
+    gzip_compress({{ agg_function }}{{ aggregation_id }}_({% if agg_columns %}concat_ws(';', {{ agg_columns }}){% else %}''{% endif %})) as precompute,
+    {{ aggregation_id }} as aggregation_id,
+    {% if group_by_columns %}concat_ws(';', {{ group_by_columns }}){% else %}''{% endif %} as key,
+    {%- if window_type == "sliding" %}
+    HOP(INTERVAL '{{ slide_interval }}', INTERVAL '{{ window_size }}') as window
+    {%- else %}
+    TUMBLE(INTERVAL '{{ window_interval }}') as window
+    {%- endif %}
+FROM
+    {{ source_table }}
+{%- if filter_metric_name %}
+WHERE
+    metric_name = '{{ filter_metric_name }}'
+{%- endif %}
+GROUP BY
+    window, key
+;
diff --git a/ArroyoSketch/templates/sql/single_arg_value_aggregation.j2 b/ArroyoSketch/templates/sql/single_arg_value_aggregation.j2
new file mode 100644
index 0000000..2a68caf
--- /dev/null
+++ b/ArroyoSketch/templates/sql/single_arg_value_aggregation.j2
@@ -0,0 +1,20 @@
+INSERT INTO
+    {{ sink_table }}
+SELECT
+    gzip_compress({{ agg_function }}({{ value_column }})) as precompute,
+    {{ aggregation_id }} as aggregation_id,
+    {% if group_by_columns %}concat_ws(';', {{ group_by_columns }}){% else %}''{% endif %} as key,
+    {%- if window_type == "sliding" %}
+    HOP(INTERVAL '{{ slide_interval }}', INTERVAL '{{ window_size }}') as window
+    {%- else %}
+    TUMBLE(INTERVAL '{{ window_interval }}') as window
+    {%- endif %}
+FROM
+    {{ source_table }}
+{%- if filter_metric_name %}
+WHERE
+    metric_name = '{{ filter_metric_name }}'
+{%- endif %}
+GROUP BY
+    window, key
+;
diff --git a/ArroyoSketch/templates/sql/single_windowed_aggregation.j2 b/ArroyoSketch/templates/sql/single_windowed_aggregation.j2
new file mode 100644
index 0000000..0f55fe1
--- /dev/null
+++ b/ArroyoSketch/templates/sql/single_windowed_aggregation.j2
@@ -0,0 +1,20 @@
+INSERT INTO
+    {{ sink_table }}
+SELECT
+    gzip_compress({{ agg_function }}({% if agg_columns %}concat_ws(';', {{ agg_columns }}), {% else %}'', {% endif %}{{ value_column }}{% if include_timestamps_as_argument %}{% if source_type == "kafka" %}, cast(extract(epoch from {{ time_column }}) * 1000 as bigint){% else %}, cast({{ time_column }} as bigint){% endif %}{% endif %})) as precompute,
+    {{ aggregation_id }} as aggregation_id,
+    {% if group_by_columns %}concat_ws(';', {{ group_by_columns }}){% else %}''{% endif %} as key,
+    {%- if window_type == "sliding" %}
+    HOP(INTERVAL '{{ slide_interval }}', INTERVAL '{{ window_size }}') as window
+    {%- else %}
+    TUMBLE(INTERVAL '{{ window_interval }}') as window
+    {%- endif %}
+FROM
+    {{ source_table }}
+{%- if filter_metric_name %}
+WHERE
+    metric_name = '{{ filter_metric_name }}'
+{%- endif %}
+GROUP BY
+    window, key
+;
diff --git a/ArroyoSketch/templates/udfs/countminsketch_count.rs.j2 b/ArroyoSketch/templates/udfs/countminsketch_count.rs.j2
new file mode 100644
index 0000000..16b532c
--- /dev/null
+++ b/ArroyoSketch/templates/udfs/countminsketch_count.rs.j2
@@ -0,0 +1,58 @@
+/*
+[dependencies]
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+twox-hash = "2.1.0"
+*/
+use arroyo_udf_plugin::udf;
+use rmp_serde::Serializer;
+use serde::{Deserialize, Serialize};
+use twox_hash::XxHash32;
+
+// Count-Min Sketch parameters
+const DEPTH: usize = {{ depth }}; // Number of hash functions
+const WIDTH: usize = {{ width }}; // Number of buckets per hash function
+
+#[derive(Serialize, Deserialize, Clone)]
+struct CountMinSketch {
+    sketch: Vec<Vec<f64>>,
+    row_num: usize,
+    col_num: usize,
+}
+
+impl CountMinSketch {
+    fn new() -> Self {
+        CountMinSketch {
+            sketch: vec![vec![0.0; WIDTH]; DEPTH],
+            row_num: DEPTH,
+            col_num: WIDTH,
+        }
+    }
+
+    // Update the sketch with a key-value pair
+    fn update(&mut self, key: &str, value: f64) {
+        for i in 0..self.row_num {
+            // already UTF-8
+            let hash = XxHash32::oneshot(i as u32, key.as_bytes());
+            let bucket = (hash as usize) % self.col_num;
+            self.sketch[i][bucket] += value;
+        }
+    }
+}
+
+#[udf]
+fn countminsketch_count(keys: Vec<&str>, values: Vec<f64>) -> Option<Vec<u8>> {
+    // Create a new Count-Min Sketch
+    let mut countminsketch = CountMinSketch::new();
+
+    // Iterate through the keys and values and update the sketch for each entry
+    for (i, &key) in keys.iter().enumerate() {
+        countminsketch.update(key, 1.0);
+    }
+
+    let mut buf = Vec::new();
+    countminsketch
+        .serialize(&mut Serializer::new(&mut buf))
+        .ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/templates/udfs/countminsketch_sum.rs.j2 b/ArroyoSketch/templates/udfs/countminsketch_sum.rs.j2
new file mode 100644
index 0000000..8bf0530
--- /dev/null
+++ b/ArroyoSketch/templates/udfs/countminsketch_sum.rs.j2
@@ -0,0 +1,63 @@
+/*
+[dependencies]
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+twox-hash = "2.1.0"
+*/
+use arroyo_udf_plugin::udf;
+use rmp_serde::Serializer;
+use serde::{Deserialize, Serialize};
+use twox_hash::XxHash32;
+
+// Count-Min Sketch parameters
+const DEPTH: usize = {{ depth }}; // Number of hash functions
+const WIDTH: usize = {{ width }}; // Number of buckets per hash function
+
+#[derive(Serialize, Deserialize, Clone)]
+struct CountMinSketch {
+    sketch: Vec<Vec<f64>>,
+    row_num: usize,
+    col_num: usize,
+}
+
+impl CountMinSketch {
+    fn new() -> Self {
+        CountMinSketch {
+            sketch: vec![vec![0.0; WIDTH]; DEPTH],
+            row_num: DEPTH,
+            col_num: WIDTH,
+        }
+    }
+
+    // Update the sketch with a key-value pair
+    fn update(&mut self, key: &str, value: f64) {
+        for i in 0..self.row_num {
+            // already UTF-8
+            let hash = XxHash32::oneshot(i as u32, key.as_bytes());
+            let bucket = (hash as usize) % self.col_num;
+            self.sketch[i][bucket] += value;
+        }
+    }
+}
+
+#[udf]
+fn countminsketch_sum(keys: Vec<&str>, values: Vec<f64>) -> Option<Vec<u8>> {
+    // Check that keys and values have equal length
+    if keys.len() != values.len() {
+        return None;
+    }
+
+    // Create a new Count-Min Sketch
+    let mut countminsketch = CountMinSketch::new();
+
+    // Iterate through the keys and values and update the sketch for each entry
+    for (i, &key) in keys.iter().enumerate() {
+        countminsketch.update(key, values[i]);
+    }
+
+    let mut buf = Vec::new();
+    countminsketch
+        .serialize(&mut Serializer::new(&mut buf))
+        .ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/templates/udfs/countminsketchwithheap_topk.rs.j2 b/ArroyoSketch/templates/udfs/countminsketchwithheap_topk.rs.j2
new file mode 100644
index 0000000..988d780
--- /dev/null
+++ b/ArroyoSketch/templates/udfs/countminsketchwithheap_topk.rs.j2
@@ -0,0 +1,194 @@
+/*
+[dependencies]
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+twox-hash = "2.1.0"
+*/
+use arroyo_udf_plugin::udf;
+use rmp_serde::Serializer;
+use serde::{Deserialize, Serialize};
+use std::collections::BinaryHeap;
+use std::cmp::Ordering;
+use twox_hash::XxHash32;
+
+// Count-Min Sketch with Heap parameters
+const DEPTH: usize = {{ depth }}; // Number of hash functions
+const WIDTH: usize = {{ width }}; // Number of buckets per hash function
+const HEAP_SIZE: usize = {{ heapsize }}; // Maximum number of top-k items to track
+
+#[derive(Serialize, Deserialize, Clone)]
+struct CountMinSketch {
+    sketch: Vec<Vec<f64>>,
+    row_num: usize,
+    col_num: usize,
+}
+
+impl CountMinSketch {
+    fn new() -> Self {
+        CountMinSketch {
+            sketch: vec![vec![0.0; WIDTH]; DEPTH],
+            row_num: DEPTH,
+            col_num: WIDTH,
+        }
+    }
+
+    // Update the sketch with a key-value pair
+    fn update(&mut self, key: &str, value: f64) {
+        for i in 0..self.row_num {
+            // already UTF-8
+            let hash = XxHash32::oneshot(i as u32, key.as_bytes());
+            let bucket = (hash as usize) % self.col_num;
+            self.sketch[i][bucket] += value;
+        }
+    }
+
+    // Update the sketch and return the estimated frequency in one pass
+    fn update_with_query(&mut self, key: &str, value: f64) -> f64 {
+        let mut min_estimate = f64::MAX;
+        for i in 0..self.row_num {
+            // already UTF-8
+            let hash = XxHash32::oneshot(i as u32, key.as_bytes());
+            let bucket = (hash as usize) % self.col_num;
+            self.sketch[i][bucket] += value;
+            // Track minimum while updating
+            let estimate = self.sketch[i][bucket];
+            if estimate < min_estimate {
+                min_estimate = estimate;
+            }
+        }
+        min_estimate
+    }
+}
+
+// HeapItem: equality and ordering based on value only
+#[derive(Serialize, Deserialize, Clone)]
+struct HeapItem {
+    key: String,
+    value: f64,
+}
+
+// Implement PartialEq based on value only (consistent with Ord)
+impl PartialEq for HeapItem {
+    fn eq(&self, other: &Self) -> bool {
+        self.value == other.value
+    }
+}
+
+// Implement Eq for HeapItem (required for BinaryHeap)
+impl Eq for HeapItem {}
+
+// Implement Ord for HeapItem to create a min-heap (reverse ordering)
+impl Ord for HeapItem {
+    fn cmp(&self, other: &Self) -> Ordering {
+        // Reverse ordering for min-heap (smallest values at top)
+        // Suitable for topk, but if you want to implement bottomk, you should use a max-heap
+        other.value.partial_cmp(&self.value).unwrap_or(Ordering::Equal)
+    }
+}
+
+impl PartialOrd for HeapItem {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+struct CountMinSketchWithHeap {
+    sketch: CountMinSketch,
+    topk_heap: BinaryHeap<HeapItem>, // Maintain as heap during processing
+    heap_size: usize,
+}
+
+/// TODO: Make this serializable version more expanded as count_min_sketch_with_heap_accumulator.rs in QueryEngineRust
+// Serializable version for output
+#[derive(Serialize, Deserialize, Clone)]
+struct CountMinSketchWithHeapSerialized {
+    sketch: CountMinSketch,
+    topk_heap: Vec<HeapItem>, // Convert to Vec for serialization
+    heap_size: usize,
+}
+
+impl CountMinSketchWithHeap {
+    fn new() -> Self {
+        CountMinSketchWithHeap {
+            sketch: CountMinSketch::new(),
+            topk_heap: BinaryHeap::new(),
+            heap_size: HEAP_SIZE,
+        }
+    }
+
+    // Update the sketch and maintain the top-k heap
+    fn update_with_topk(&mut self, key: &str, value: f64) {
+        // Update the Count-Min Sketch and get the estimated frequency in one pass
+        let estimated_freq = self.sketch.update_with_query(key, value);
+
+        // Check if the key already exists in the heap
+        // TODO: This takes O(k) time, can we do better?
+        // Or is different keys guaranteed and we can just omit this check?
+        // Or can we optimize this with a HashMap?
+        let existing_item = self.topk_heap.iter().find(|item| item.key == key).cloned();
+
+        if let Some(existing) = existing_item {
+            // Remove the old entry and add updated one
+            self.topk_heap.retain(|item| item.key != key); // retain others = remove this one
+            self.topk_heap.push(HeapItem {
+                key: key.to_string(),
+                value: estimated_freq,
+            });
+        } else {
+            // New key: add to heap
+            if self.topk_heap.len() < self.heap_size {
+                // Heap not full, just add
+                self.topk_heap.push(HeapItem {
+                    key: key.to_string(),
+                    value: estimated_freq,
+                });
+            } else {
+                // Heap is full, check if this item should replace the minimum
+                // Peeking is cheap. No worries.
+                if let Some(min_item) = self.topk_heap.peek() {
+                    if estimated_freq > min_item.value {
+                        self.topk_heap.pop(); // Remove minimum
+                        self.topk_heap.push(HeapItem {
+                            key: key.to_string(),
+                            value: estimated_freq,
+                        });
+                    }
+                }
+            }
+        }
+    }
+
+    // Convert to serializable format
+    fn to_serializable(self) -> CountMinSketchWithHeapSerialized {
+        CountMinSketchWithHeapSerialized {
+            sketch: self.sketch,
+            topk_heap: self.topk_heap.into_iter().collect(),
+            heap_size: self.heap_size,
+        }
+    }
+}
+
+#[udf]
+fn countminsketchwithheap_topk(keys: Vec<&str>, values: Vec<f64>) -> Option<Vec<u8>> {
+    // Check that keys and values have equal length
+    if keys.len() != values.len() {
+        return None;
+    }
+
+    // Create a new Count-Min Sketch with Heap
+    let mut cms_with_heap = CountMinSketchWithHeap::new();
+
+    // Iterate through the keys and values and update the sketch and heap
+    for (i, &key) in keys.iter().enumerate() {
+        cms_with_heap.update_with_topk(key, values[i]);
+    }
+
+    // Convert to serializable format (heap to vec conversion happens only once here)
+    let serializable = cms_with_heap.to_serializable();
+
+    let mut buf = Vec::new();
+    serializable
+        .serialize(&mut Serializer::new(&mut buf))
+        .ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/templates/udfs/datasketcheskll_.rs.j2 b/ArroyoSketch/templates/udfs/datasketcheskll_.rs.j2
new file mode 100644
index 0000000..ca34027
--- /dev/null
+++ b/ArroyoSketch/templates/udfs/datasketcheskll_.rs.j2
@@ -0,0 +1,59 @@
+/*
+[dependencies]
+dsrs = { git = "https://github.com/SketchDB/datasketches-rs" }
+arroyo-udf-plugin = "0.1"
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+*/
+
+use arroyo_udf_plugin::udf;
+use dsrs::KllDoubleSketch;
+use rmp_serde::Serializer;
+use serde::{Deserialize, Serialize};
+
+const DEFAULT_K: u16 = {{ k }};
+
+#[derive(Serialize, Deserialize)]
+struct KllSketchData {
+    k: u16,
+    sketch_bytes: Vec<u8>,
+}
+
+struct KllSketchWrapper {
+    k: u16,
+    sketch: KllDoubleSketch,
+}
+
+impl KllSketchWrapper {
+    fn new(k: u16) -> Self {
+        KllSketchWrapper {
+            k,
+            sketch: KllDoubleSketch::with_k(k),
+        }
+    }
+
+    fn update(&mut self, values: &[f64]) {
+        for &value in values {
+            self.sketch.update(value);
+        }
+    }
+
+    fn serialize_bytes(&self) -> Vec<u8> {
+        let sketch_data = self.sketch.serialize();
+        let serialized = KllSketchData {
+            k: self.k,
+            sketch_bytes: sketch_data.as_ref().to_vec(),
+        };
+        let mut buf = Vec::new();
+        rmp_serde::encode::write(&mut buf, &serialized).unwrap();
+        buf
+    }
+}
+
+#[udf]
+fn datasketcheskll_(values: Vec<f64>) -> Option<Vec<u8>> {
+    let mut kll_wrapper = KllSketchWrapper::new(DEFAULT_K);
+    kll_wrapper.update(&values);
+
+    Some(kll_wrapper.serialize_bytes())
+}
diff --git a/ArroyoSketch/templates/udfs/deltasetaggregator_.rs.j2 b/ArroyoSketch/templates/udfs/deltasetaggregator_.rs.j2
new file mode 100644
index 0000000..7a70a86
--- /dev/null
+++ b/ArroyoSketch/templates/udfs/deltasetaggregator_.rs.j2
@@ -0,0 +1,62 @@
+/*
+[dependencies]
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+lazy_static = "1.4"
+*/
+
+use arroyo_udf_plugin::udf;
+use rmp_serde::Serializer;
+use serde::{Deserialize, Serialize};
+use std::collections::HashSet;
+use std::sync::Mutex;
+use lazy_static::lazy_static;
+
+#[derive(Serialize, Deserialize, Clone)]
+struct DeltaResult {
+    added: HashSet<String>,
+    removed: HashSet<String>,
+}
+
+// Global state to store the previous window's labels for aggregation {{aggregation_id}}
+lazy_static! {
+    static ref PREVIOUS_LABELS_{{aggregation_id}}: Mutex<HashSet<String>> = Mutex::new(HashSet::new());
+}
+
+// Stateful UDAF that tracks label changes between windows
+// This takes all concatenated label strings for a window and computes delta vs previous window
+#[udf]
+fn deltasetaggregator_{{aggregation_id}}_(concatenated_labels: Vec<&str>) -> Option<Vec<u8>> {
+    // Return None if input is empty
+    if concatenated_labels.is_empty() {
+        return None;
+    }
+
+    // Collect current window's unique label combinations
+    let mut current_labels = HashSet::new();
+    for label_string in concatenated_labels {
+        current_labels.insert(label_string.to_string());
+    }
+
+    // Get previous window's labels and update state
+    let mut previous_labels_guard = PREVIOUS_LABELS_{{aggregation_id}}.lock().unwrap();
+    let previous_labels = previous_labels_guard.clone();
+
+    // Calculate differences
+    let added: HashSet<String> = current_labels.difference(&previous_labels).cloned().collect();
+    let removed: HashSet<String> = previous_labels.difference(&current_labels).cloned().collect();
+
+    // Update state for next window
+    *previous_labels_guard = current_labels;
+
+    // Create delta result
+    let delta_result = DeltaResult {
+        added,
+        removed,
+    };
+
+    // Serialize result
+    let mut buf = Vec::new();
+    delta_result.serialize(&mut Serializer::new(&mut buf)).ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/templates/udfs/gzip_compress.rs b/ArroyoSketch/templates/udfs/gzip_compress.rs
new file mode 100644
index 0000000..1977723
--- /dev/null
+++ b/ArroyoSketch/templates/udfs/gzip_compress.rs
@@ -0,0 +1,16 @@
+/*
+[dependencies]
+flate2 = "1.1.1"
+*/
+
+use arroyo_udf_plugin::udf;
+use std::io::Write;
+use flate2::{Compression, write::GzEncoder};
+
+#[udf]
+fn gzip_compress(data: &[u8]) -> Option<Vec<u8>> {
+    let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
+
+	encoder.write_all(&data).ok()?;
+    encoder.finish().ok()
+}
diff --git a/ArroyoSketch/templates/udfs/hydrakll_.rs.j2 b/ArroyoSketch/templates/udfs/hydrakll_.rs.j2
new file mode 100644
index 0000000..b9be3cb
--- /dev/null
+++ b/ArroyoSketch/templates/udfs/hydrakll_.rs.j2
@@ -0,0 +1,83 @@
+/*
+[dependencies]
+dsrs = { git = "https://github.com/SketchDB/datasketches-rs" }
+arroyo-udf-plugin = "0.1"
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+xxhash-rust = { version = "0.8", features = ["xxh32"] }
+*/
+
+use arroyo_udf_plugin::udf;
+use dsrs::KllDoubleSketch;
+use rmp_serde::Serializer;
+use serde::{Deserialize, Serialize};
+use xxhash_rust::xxh32::xxh32;
+
+const ROW_NUM: usize = {{ row_num }};
+const COL_NUM: usize = {{ col_num }};
+const DEFAULT_K: u16 = {{ k }};
+
+// Match QueryEngineRust format exactly
+#[derive(Deserialize, Serialize)]
+struct KllSketchData {
+    k: u16,
+    sketch_bytes: Vec<u8>,
+}
+
+#[derive(Serialize, Deserialize)]
+struct HydraKllSketchData {
+    row_num: usize,
+    col_num: usize,
+    sketches: Vec<Vec<KllSketchData>>,
+}
+
+#[udf]
+fn hydrakll_(keys: Vec<&str>, values: Vec<f64>) -> Option<Vec<u8>> {
+    // Initialize 2D matrix of KLL sketches
+    let mut sketches: Vec<Vec<KllDoubleSketch>> = vec![
+        vec![KllDoubleSketch::with_k(DEFAULT_K); COL_NUM];
+        ROW_NUM
+    ];
+
+    // Process each key-value pair
+    for (i, &key) in keys.iter().enumerate() {
+        if i >= values.len() {
+            break;
+        }
+
+        let key_bytes = key.as_bytes();
+
+        // Update each row using different hash functions
+        for row in 0..ROW_NUM {
+            let hash_value = xxh32(key_bytes, row as u32);
+            let col_index = (hash_value as usize) % COL_NUM;
+            sketches[row][col_index].update(values[i]);
+        }
+    }
+
+    // Serialize to match QueryEngineRust format
+    let sketch_data: Vec<Vec<KllSketchData>> = sketches
+        .iter()
+        .map(|row| {
+            row.iter()
+                .map(|sketch| {
+                    let sketch_bytes = sketch.serialize();
+                    KllSketchData {
+                        k: DEFAULT_K,
+                        sketch_bytes: sketch_bytes.as_ref().to_vec(),
+                    }
+                })
+                .collect()
+        })
+        .collect();
+
+    let hydra_data = HydraKllSketchData {
+        row_num: ROW_NUM,
+        col_num: COL_NUM,
+        sketches: sketch_data,
+    };
+
+    let mut buf = Vec::new();
+    hydra_data.serialize(&mut Serializer::new(&mut buf)).ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/templates/udfs/multipleincrease_.rs b/ArroyoSketch/templates/udfs/multipleincrease_.rs
new file mode 100644
index 0000000..3ba2b42
--- /dev/null
+++ b/ArroyoSketch/templates/udfs/multipleincrease_.rs
@@ -0,0 +1,55 @@
+/*
+[dependencies]
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+*/
+
+use arroyo_udf_plugin::udf;
+use rmp_serde::Serializer;
+use serde::Serialize;
+use std::collections::HashMap;
+
+#[derive(Serialize)]
+struct MeasurementData {
+    starting_measurement: f64,
+    starting_timestamp: i64,
+    last_seen_measurement: f64,
+    last_seen_timestamp: i64,
+}
+
+#[udf]
+fn multipleincrease_(keys: Vec<&str>, values: Vec<f64>, timestamps: Vec<i64>) -> Option<Vec<u8>> {
+    // Create a new hashmap to store measurement data with timestamps
+    let mut per_key_storage: HashMap<String, MeasurementData> = HashMap::new();
+
+    // Iterate through the keys, values, and timestamps
+    for (i, &key) in keys.iter().enumerate() {
+        if i < values.len() && i < timestamps.len() {
+            let value = values[i];
+            let timestamp = timestamps[i];
+
+            let entry = per_key_storage.entry(key.to_string()).or_insert(MeasurementData {
+                starting_measurement: value,
+                starting_timestamp: timestamp,
+                last_seen_measurement: value,
+                last_seen_timestamp: timestamp,
+            });
+
+            // Update last seen measurement and timestamp
+            entry.last_seen_measurement = value;
+            entry.last_seen_timestamp = timestamp;
+
+            // If this timestamp is earlier than our current starting timestamp, update starting values
+            //if timestamp < entry.starting_timestamp {
+            //    entry.starting_measurement = value;
+            //    entry.starting_timestamp = timestamp;
+            //}
+        }
+    }
+
+    let mut buf = Vec::new();
+    per_key_storage
+        .serialize(&mut Serializer::new(&mut buf))
+        .ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/templates/udfs/multipleminmax_max.rs b/ArroyoSketch/templates/udfs/multipleminmax_max.rs
new file mode 100644
index 0000000..a636208
--- /dev/null
+++ b/ArroyoSketch/templates/udfs/multipleminmax_max.rs
@@ -0,0 +1,33 @@
+/*
+[dependencies]
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+*/
+
+use arroyo_udf_plugin::udf;
+use rmp_serde::Serializer;
+use serde::Serialize;
+use std::collections::HashMap;
+
+#[udf]
+fn multipleminmax_max(keys: Vec<&str>, values: Vec<f64>) -> Option<Vec<u8>> {
+    // Create a new hashmap
+    let mut per_key_storage: HashMap<String, f64> = HashMap::new();
+
+    // Iterate through the keys and values
+    for (i, &key) in keys.iter().enumerate() {
+        if i < values.len() {
+            // If the key is not present or the value is less than the current stored value, update it
+            per_key_storage
+                .entry(key.to_string())
+                .and_modify(|v| *v = (*v).max(values[i]))
+                .or_insert(values[i]);
+        }
+    }
+
+    let mut buf = Vec::new();
+    per_key_storage
+        .serialize(&mut Serializer::new(&mut buf))
+        .ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/templates/udfs/multipleminmax_min.rs b/ArroyoSketch/templates/udfs/multipleminmax_min.rs
new file mode 100644
index 0000000..a44268c
--- /dev/null
+++ b/ArroyoSketch/templates/udfs/multipleminmax_min.rs
@@ -0,0 +1,33 @@
+/*
+[dependencies]
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+*/
+
+use arroyo_udf_plugin::udf;
+use rmp_serde::Serializer;
+use serde::Serialize;
+use std::collections::HashMap;
+
+#[udf]
+fn multipleminmax_min(keys: Vec<&str>, values: Vec<f64>) -> Option<Vec<u8>> {
+    // Create a new hashmap
+    let mut per_key_storage: HashMap<String, f64> = HashMap::new();
+
+    // Iterate through the keys and values
+    for (i, &key) in keys.iter().enumerate() {
+        if i < values.len() {
+            // If the key is not present or the value is less than the current stored value, update it
+            per_key_storage
+                .entry(key.to_string())
+                .and_modify(|v| *v = (*v).min(values[i]))
+                .or_insert(values[i]);
+        }
+    }
+
+    let mut buf = Vec::new();
+    per_key_storage
+        .serialize(&mut Serializer::new(&mut buf))
+        .ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/templates/udfs/multiplesum_count.rs b/ArroyoSketch/templates/udfs/multiplesum_count.rs
new file mode 100644
index 0000000..67a8691
--- /dev/null
+++ b/ArroyoSketch/templates/udfs/multiplesum_count.rs
@@ -0,0 +1,27 @@
+/*
+[dependencies]
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+*/
+
+use arroyo_udf_plugin::udf;
+use rmp_serde::Serializer;
+use serde::Serialize;
+use std::collections::HashMap;
+
+#[udf]
+fn multiplesum_count(keys: Vec<&str>, values: Vec<f64>) -> Option<Vec<u8>> {
+    // Create a new hashmap to store the count for each key
+    let mut key_sums: HashMap<String, f64> = HashMap::new();
+
+    // Iterate through the keys and values
+    for (i, &key) in keys.iter().enumerate() {
+        if i < values.len() {
+            *key_sums.entry(key.to_string()).or_insert(0.0) += 1.0;
+        }
+    }
+
+    let mut buf = Vec::new();
+    key_sums.serialize(&mut Serializer::new(&mut buf)).ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/templates/udfs/multiplesum_sum.rs b/ArroyoSketch/templates/udfs/multiplesum_sum.rs
new file mode 100644
index 0000000..3615105
--- /dev/null
+++ b/ArroyoSketch/templates/udfs/multiplesum_sum.rs
@@ -0,0 +1,27 @@
+/*
+[dependencies]
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+*/
+
+use arroyo_udf_plugin::udf;
+use rmp_serde::Serializer;
+use serde::Serialize;
+use std::collections::HashMap;
+
+#[udf]
+fn multiplesum_sum(keys: Vec<&str>, values: Vec<f64>) -> Option<Vec<u8>> {
+    // Create a new hashmap to store the sum of values for each key
+    let mut key_sums: HashMap<String, f64> = HashMap::new();
+
+    // Iterate through the keys and values and update the sum for each key
+    for (i, &key) in keys.iter().enumerate() {
+        if i < values.len() {
+            *key_sums.entry(key.to_string()).or_insert(0.0) += values[i];
+        }
+    }
+
+    let mut buf = Vec::new();
+    key_sums.serialize(&mut Serializer::new(&mut buf)).ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/templates/udfs/setaggregator_.rs b/ArroyoSketch/templates/udfs/setaggregator_.rs
new file mode 100644
index 0000000..9ab897a
--- /dev/null
+++ b/ArroyoSketch/templates/udfs/setaggregator_.rs
@@ -0,0 +1,38 @@
+/*
+[dependencies]
+rmp-serde = "1.1"
+serde = { version = "1.0", features = ["derive"] }
+*/
+
+use arroyo_udf_plugin::udf;
+use rmp_serde::Serializer;
+use serde::{Deserialize, Serialize};
+use std::collections::HashSet;
+
+#[derive(Serialize, Deserialize)]
+struct StringSet {
+    values: HashSet<String>,
+}
+
+#[udf]
+fn setaggregator_(strings: Vec<&str>) -> Option<Vec<u8>> {
+    // Return None if input is empty
+    if strings.is_empty() {
+        return None;
+    }
+
+    // Create a HashSet and collect all unique strings
+    let mut unique_strings = HashSet::new();
+    for s in strings {
+        unique_strings.insert(s.to_string());
+    }
+
+    // Wrap in a serializable struct
+    let string_set = StringSet {
+        values: unique_strings,
+    };
+
+    let mut buf = Vec::new();
+    string_set.serialize(&mut Serializer::new(&mut buf)).ok()?;
+    Some(buf)
+}
diff --git a/ArroyoSketch/tests/__init__.py b/ArroyoSketch/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/ArroyoSketch/tests/test_integration.py b/ArroyoSketch/tests/test_integration.py
new file mode 100644
index 0000000..335ddeb
--- /dev/null
+++ b/ArroyoSketch/tests/test_integration.py
@@ -0,0 +1,435 @@
+"""
+Integration tests for SQL schema support in ArroyoSketch.
+
+Tests cover:
+1. Helper functions (build_sql_json_schema, get_source_table_name_sql)
+2. get_sql_query with SQL mode
+3. End-to-end config parsing
+"""
+
+import pytest
+import sys
+import os
+import yaml
+
+# Add parent directory to path for imports
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from promql_utilities.streaming_config.SQLTableConfig import (  # noqa: E402
+    SQLTableConfig,
+    TableSchema,
+)
+from promql_utilities.streaming_config.StreamingAggregationConfig import (  # noqa: E402
+    StreamingAggregationConfig,
+)
+from promql_utilities.streaming_config.MetricConfig import MetricConfig  # noqa: E402
+from run_arroyosketch import (  # noqa: E402
+    build_sql_json_schema,
+    get_source_table_name_sql,
+    get_sql_query,
+)
+from utils import jinja_utils  # noqa: E402
+
+
+class TestBuildSqlJsonSchema:
+    """Tests for build_sql_json_schema helper function."""
+
+    def test_basic_schema(self):
+        """Test building JSON schema for a simple table."""
+        table_schema = TableSchema(
+            time_column="timestamp",
+            value_columns=["cpu_usage"],
+            metadata_columns=["host"],
+        )
+
+        json_schema = build_sql_json_schema(table_schema)
+
+        assert json_schema["type"] == "object"
+        assert "timestamp" in json_schema["required"]
+        assert "cpu_usage" in json_schema["required"]
+        assert "host" in json_schema["required"]
+        assert json_schema["properties"]["timestamp"]["type"] == "string"
+        assert json_schema["properties"]["timestamp"]["format"] == "date-time"
+        assert json_schema["properties"]["cpu_usage"]["type"] == "number"
+        assert json_schema["properties"]["host"]["type"] == "string"
+
+    def test_multiple_value_columns(self):
+        """Test building JSON schema with multiple value columns."""
+        table_schema = TableSchema(
+            time_column="ts",
+            value_columns=["val1", "val2", "val3"],
+            metadata_columns=["label"],
+        )
+
+        json_schema = build_sql_json_schema(table_schema)
+
+        assert "val1" in json_schema["required"]
+        assert "val2" in json_schema["required"]
+        assert "val3" in json_schema["required"]
+        assert json_schema["properties"]["val1"]["type"] == "number"
+        assert json_schema["properties"]["val2"]["type"] == "number"
+        assert json_schema["properties"]["val3"]["type"] == "number"
+
+    def test_multiple_metadata_columns(self):
+        """Test building JSON schema with multiple metadata columns."""
+        table_schema = TableSchema(
+            time_column="ts",
+            value_columns=["val"],
+            metadata_columns=["host", "region", "datacenter"],
+        )
+
+        json_schema = build_sql_json_schema(table_schema)
+
+        assert "host" in json_schema["required"]
+        assert "region" in json_schema["required"]
+        assert "datacenter" in json_schema["required"]
+        for col in ["host", "region", "datacenter"]:
+            assert json_schema["properties"][col]["type"] == "string"
+
+    def test_additional_properties_false(self):
+        """Test that additionalProperties is set to False."""
+        table_schema = TableSchema(
+            time_column="ts",
+            value_columns=["val"],
+            metadata_columns=["label"],
+        )
+
+        json_schema = build_sql_json_schema(table_schema)
+        assert json_schema["additionalProperties"] is False
+
+
+class TestGetSourceTableNameSql:
+    """Tests for get_source_table_name_sql helper function."""
+
+    def test_kafka_source(self):
+        """Test source table name generation for Kafka."""
+
+        class MockArgs:
+            source_type = "kafka"
+            input_kafka_topic = "test_topic"
+
+        args = MockArgs()
+        result = get_source_table_name_sql(args, "my_table")
+        assert result == "test_topic_my_table"
+
+    def test_kafka_source_with_spaces(self):
+        """Test source table name generation with spaces in table name."""
+
+        class MockArgs:
+            source_type = "kafka"
+            input_kafka_topic = "test_topic"
+
+        args = MockArgs()
+        result = get_source_table_name_sql(args, "my table name")
+        assert result == "test_topic_my_table_name"
+
+    def test_file_source(self):
+        """Test source table name generation for file source."""
+
+        class MockArgs:
+            source_type = "file"
+            input_file_path = "/data/metrics.parquet"
+
+        args = MockArgs()
+        result = get_source_table_name_sql(args, "my_table")
+        assert result == "metrics_my_table"
+
+    def test_file_source_with_spaces(self):
+        """Test source table name generation for file source with spaces in table name."""
+
+        class MockArgs:
+            source_type = "file"
+            input_file_path = "/data/metrics.parquet"
+
+        args = MockArgs()
+        result = get_source_table_name_sql(args, "my table name")
+        assert result == "metrics_my_table_name"
+
+    def test_unsupported_source_type(self):
+        """Test that unsupported source types raise ValueError."""
+
+        class MockArgs:
+            source_type = "prometheus_remote_write"
+            input_kafka_topic = "test_topic"
+
+        args = MockArgs()
+        with pytest.raises(ValueError, match="Unsupported source type for SQL mode"):
+            get_source_table_name_sql(args, "my_table")
+
+
+class TestGetSqlQuerySQL:
+    """Tests for get_sql_query with SQL mode."""
+
+    @pytest.fixture
+    def sql_schema_config(self):
+        """Create a sample SQL schema config."""
+        return SQLTableConfig(
+            {
+                "tables": [
+                    {
+                        "name": "cpu_metrics",
+                        "time_column": "event_time",
+                        "value_columns": ["cpu_usage", "cpu_system"],
+                        "metadata_columns": ["host", "region", "service"],
+                    }
+                ]
+            }
+        )
+
+    @pytest.fixture
+    def sql_agg_config(self):
+        """Create a sample SQL aggregation config."""
+        return StreamingAggregationConfig.from_dict(
+            {
+                "aggregationId": 1,
+                "aggregationType": "MultipleSum",
+                "aggregationSubType": "sum",
+                "table_name": "cpu_metrics",
+                "value_column": "cpu_usage",
+                "labels": {
+                    "grouping": ["host", "region"],
+                    "aggregated": ["service"],
+                    "rollup": [],
+                },
+                "parameters": {},
+                "spatialFilter": "",
+                "tumblingWindowSize": 10,
+            }
+        )
+
+    @pytest.fixture
+    def sql_template(self):
+        """Load the SQL template."""
+        template_dir = os.path.join(
+            os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
+            "templates",
+            "sql",
+        )
+        return jinja_utils.load_template(template_dir, "single_windowed_aggregation.j2")
+
+    def test_sql_query_uses_value_column(
+        self, sql_schema_config, sql_agg_config, sql_template
+    ):
+        """Test that SQL query uses the correct value_column."""
+        sql_agg_config.aggregationType = "multiplesum"
+        sql_agg_config.aggregationSubType = "sum"
+
+        sql_query, agg_function, params = get_sql_query(
+            streaming_aggregation_config=sql_agg_config,
+            schema_config=sql_schema_config,
+            query_language="sql",
+            sql_template=sql_template,
+            source_table="test_source",
+            sink_table="test_sink",
+            source_type="kafka",
+            use_nested_labels=False,
+        )
+
+        assert "cpu_usage" in sql_query
+        assert "value" not in sql_query or "cpu_usage" in sql_query
+
+    def test_sql_query_no_label_prefix(
+        self, sql_schema_config, sql_agg_config, sql_template
+    ):
+        """Test that SQL mode doesn't use labels. prefix."""
+        sql_agg_config.aggregationType = "multiplesum"
+        sql_agg_config.aggregationSubType = "sum"
+
+        sql_query, _, _ = get_sql_query(
+            streaming_aggregation_config=sql_agg_config,
+            schema_config=sql_schema_config,
+            query_language="sql",
+            sql_template=sql_template,
+            source_table="test_source",
+            sink_table="test_sink",
+            source_type="kafka",
+            use_nested_labels=False,
+        )
+
+        # Should have flat column names, not labels.host
+        assert "labels.host" not in sql_query
+        assert "labels.region" not in sql_query
+        # Should have host and region directly
+        assert "host" in sql_query
+        assert "region" in sql_query
+
+
+class TestGetSqlQueryPromQL:
+    """Tests for get_sql_query with PromQL mode (backward compatibility)."""
+
+    @pytest.fixture
+    def promql_metric_config(self):
+        """Create a sample PromQL metric config."""
+        return MetricConfig({"fake_metric": ["instance", "job", "label_0"]})
+
+    @pytest.fixture
+    def promql_agg_config(self):
+        """Create a sample PromQL aggregation config."""
+        return StreamingAggregationConfig.from_dict(
+            {
+                "aggregationId": 1,
+                "aggregationType": "MultipleSum",
+                "aggregationSubType": "sum",
+                "metric": "fake_metric",
+                "labels": {
+                    "grouping": ["instance", "job"],
+                    "aggregated": ["label_0"],
+                    "rollup": [],
+                },
+                "parameters": {},
+                "spatialFilter": "",
+                "tumblingWindowSize": 10,
+            }
+        )
+
+    @pytest.fixture
+    def sql_template(self):
+        """Load the SQL template."""
+        template_dir = os.path.join(
+            os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
+            "templates",
+            "sql",
+        )
+        return jinja_utils.load_template(template_dir, "single_windowed_aggregation.j2")
+
+    def test_promql_query_uses_value(
+        self, promql_metric_config, promql_agg_config, sql_template
+    ):
+        """Test that PromQL query uses 'value' column."""
+        promql_agg_config.aggregationType = "multiplesum"
+        promql_agg_config.aggregationSubType = "sum"
+
+        sql_query, _, _ = get_sql_query(
+            streaming_aggregation_config=promql_agg_config,
+            schema_config=promql_metric_config,
+            query_language="promql",
+            sql_template=sql_template,
+            source_table="test_source",
+            sink_table="test_sink",
+            source_type="kafka",
+            use_nested_labels=True,
+        )
+
+        assert "value" in sql_query
+
+    def test_promql_query_uses_label_prefix(
+        self, promql_metric_config, promql_agg_config, sql_template
+    ):
+        """Test that PromQL query uses labels. prefix when nested."""
+        promql_agg_config.aggregationType = "multiplesum"
+        promql_agg_config.aggregationSubType = "sum"
+
+        sql_query, _, _ = get_sql_query(
+            streaming_aggregation_config=promql_agg_config,
+            schema_config=promql_metric_config,
+            query_language="promql",
+            sql_template=sql_template,
+            source_table="test_source",
+            sink_table="test_sink",
+            source_type="kafka",
+            use_nested_labels=True,
+        )
+
+        assert "labels.instance" in sql_query
+        assert "labels.job" in sql_query
+
+
+class TestEndToEndConfigParsing:
+    """End-to-end tests for config file parsing."""
+
+    def test_parse_sql_config_file(self):
+        """Test parsing a complete SQL config file."""
+        config_content = """
+query_language: sql
+
+tables:
+  - name: system_metrics
+    time_column: event_time
+    value_columns:
+      - cpu_percent
+      - memory_mb
+    metadata_columns:
+      - hostname
+      - datacenter
+      - service
+
+aggregations:
+  - aggregationId: 1
+    table_name: system_metrics
+    value_column: cpu_percent
+    aggregationType: MultipleSum
+    aggregationSubType: sum
+    labels:
+      grouping:
+        - hostname
+        - datacenter
+      aggregated:
+        - service
+      rollup: []
+    parameters: {}
+    spatialFilter: ''
+    tumblingWindowSize: 10
+"""
+        config = yaml.safe_load(config_content)
+
+        assert config["query_language"] == "sql"
+
+        schema_config = SQLTableConfig(config)
+        assert "system_metrics" in schema_config.config
+
+        agg_configs = [
+            StreamingAggregationConfig.from_dict(agg) for agg in config["aggregations"]
+        ]
+        assert len(agg_configs) == 1
+        assert agg_configs[0].table_name == "system_metrics"
+        assert agg_configs[0].value_column == "cpu_percent"
+
+        # Validate should pass
+        agg_configs[0].validate(schema_config, query_language="sql")
+
+    def test_parse_promql_config_file(self):
+        """Test parsing a complete PromQL config file (backward compatibility)."""
+        config_content = """
+aggregations:
+  - aggregationId: 1
+    aggregationSubType: sum
+    aggregationType: MultipleSum
+    labels:
+      aggregated:
+        - label_0
+      grouping:
+        - instance
+        - job
+      rollup: []
+    metric: fake_metric_total
+    parameters: {}
+    spatialFilter: ''
+    tumblingWindowSize: 10
+
+metrics:
+  fake_metric_total:
+    - instance
+    - job
+    - label_0
+"""
+        config = yaml.safe_load(config_content)
+
+        # No query_language means default to promql
+        query_language = config.get("query_language", "promql")
+        assert query_language == "promql"
+
+        metric_config = MetricConfig(config["metrics"])
+        assert "fake_metric_total" in metric_config.config
+
+        agg_configs = [
+            StreamingAggregationConfig.from_dict(agg) for agg in config["aggregations"]
+        ]
+        assert len(agg_configs) == 1
+        assert agg_configs[0].metric == "fake_metric_total"
+
+        # Validate should pass
+        agg_configs[0].validate(metric_config, query_language="promql")
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/ArroyoSketch/tests/test_sql_schema.py b/ArroyoSketch/tests/test_sql_schema.py
new file mode 100644
index 0000000..5924ddf
--- /dev/null
+++ b/ArroyoSketch/tests/test_sql_schema.py
@@ -0,0 +1,463 @@
+"""
+Unit tests for SQL schema support in ArroyoSketch.
+
+Tests cover:
+1. SQLTableConfig parsing and validation
+2. StreamingAggregationConfig SQL mode support
+3. Backward compatibility with PromQL configs
+"""
+
+import pytest
+import sys
+import os
+
+# Add parent directory to path for imports
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from promql_utilities.streaming_config.SQLTableConfig import (  # noqa: E402
+    SQLTableConfig,
+    TableSchema,
+)
+from promql_utilities.streaming_config.StreamingAggregationConfig import (  # noqa: E402
+    StreamingAggregationConfig,
+)
+from promql_utilities.streaming_config.MetricConfig import MetricConfig  # noqa: E402
+
+
+class TestSQLTableConfig:
+    """Tests for SQLTableConfig class."""
+
+    def test_parse_single_table(self):
+        """Test parsing a single table from sql_schema."""
+        yaml_dict = {
+            "tables": [
+                {
+                    "name": "cpu_metrics",
+                    "time_column": "timestamp",
+                    "value_columns": ["cpu_usage", "cpu_system"],
+                    "metadata_columns": ["host", "region"],
+                }
+            ]
+        }
+
+        config = SQLTableConfig(yaml_dict)
+
+        assert "cpu_metrics" in config.config
+        table = config.get_table("cpu_metrics")
+        assert table.time_column == "timestamp"
+        assert table.value_columns == ["cpu_usage", "cpu_system"]
+        assert table.metadata_columns == ["host", "region"]
+
+    def test_parse_multiple_tables(self):
+        """Test parsing multiple tables from sql_schema."""
+        yaml_dict = {
+            "tables": [
+                {
+                    "name": "cpu_metrics",
+                    "time_column": "ts",
+                    "value_columns": ["cpu_usage"],
+                    "metadata_columns": ["host"],
+                },
+                {
+                    "name": "memory_metrics",
+                    "time_column": "event_time",
+                    "value_columns": ["memory_used", "memory_free"],
+                    "metadata_columns": ["host", "datacenter"],
+                },
+            ]
+        }
+
+        config = SQLTableConfig(yaml_dict)
+
+        assert len(config.config) == 2
+        assert "cpu_metrics" in config.config
+        assert "memory_metrics" in config.config
+
+        cpu_table = config.get_table("cpu_metrics")
+        assert cpu_table.time_column == "ts"
+
+        mem_table = config.get_table("memory_metrics")
+        assert mem_table.time_column == "event_time"
+        assert mem_table.value_columns == ["memory_used", "memory_free"]
+
+    def test_get_time_column(self):
+        """Test get_time_column helper method."""
+        yaml_dict = {
+            "tables": [
+                {
+                    "name": "test_table",
+                    "time_column": "custom_timestamp",
+                    "value_columns": ["val"],
+                    "metadata_columns": ["label"],
+                }
+            ]
+        }
+
+        config = SQLTableConfig(yaml_dict)
+        assert config.get_time_column("test_table") == "custom_timestamp"
+
+    def test_get_metadata_columns(self):
+        """Test get_metadata_columns helper method."""
+        yaml_dict = {
+            "tables": [
+                {
+                    "name": "test_table",
+                    "time_column": "ts",
+                    "value_columns": ["val"],
+                    "metadata_columns": ["host", "region", "cluster"],
+                }
+            ]
+        }
+
+        config = SQLTableConfig(yaml_dict)
+        assert config.get_metadata_columns("test_table") == [
+            "host",
+            "region",
+            "cluster",
+        ]
+
+    def test_get_nonexistent_table(self):
+        """Test that get_table returns None for nonexistent table."""
+        yaml_dict = {"tables": []}
+        config = SQLTableConfig(yaml_dict)
+        assert config.get_table("nonexistent") is None
+
+    def test_empty_tables_list(self):
+        """Test parsing with empty tables list."""
+        yaml_dict = {"tables": []}
+        config = SQLTableConfig(yaml_dict)
+        assert len(config.config) == 0
+
+    def test_missing_tables_key(self):
+        """Test parsing with missing tables key."""
+        yaml_dict = {}
+        config = SQLTableConfig(yaml_dict)
+        assert len(config.config) == 0
+
+
+class TestStreamingAggregationConfigSQL:
+    """Tests for StreamingAggregationConfig SQL mode support."""
+
+    def test_parse_sql_aggregation_config(self):
+        """Test parsing aggregation config with SQL fields."""
+        config_dict = {
+            "aggregationId": 1,
+            "aggregationType": "MultipleSum",
+            "aggregationSubType": "sum",
+            "table_name": "cpu_metrics",
+            "value_column": "cpu_usage",
+            "labels": {
+                "grouping": ["host", "region"],
+                "aggregated": ["cluster"],
+                "rollup": [],
+            },
+            "parameters": {},
+            "spatialFilter": "",
+            "tumblingWindowSize": 10,
+        }
+
+        agg_config = StreamingAggregationConfig.from_dict(config_dict)
+
+        assert agg_config.table_name == "cpu_metrics"
+        assert agg_config.value_column == "cpu_usage"
+        assert agg_config.metric is None
+
+    def test_default_value_column(self):
+        """Test that value_column defaults to 'value' if not specified."""
+        config_dict = {
+            "aggregationId": 1,
+            "aggregationType": "MultipleSum",
+            "aggregationSubType": "sum",
+            "table_name": "cpu_metrics",
+            "labels": {
+                "grouping": [],
+                "aggregated": ["host"],
+                "rollup": [],
+            },
+            "parameters": {},
+            "spatialFilter": "",
+            "tumblingWindowSize": 10,
+        }
+
+        agg_config = StreamingAggregationConfig.from_dict(config_dict)
+        assert agg_config.value_column == "value"
+
+    def test_get_source_identifier_sql(self):
+        """Test get_source_identifier returns table_name for SQL mode."""
+        config_dict = {
+            "aggregationId": 1,
+            "aggregationType": "MultipleSum",
+            "aggregationSubType": "sum",
+            "table_name": "my_table",
+            "value_column": "val",
+            "labels": {
+                "grouping": [],
+                "aggregated": ["label"],
+                "rollup": [],
+            },
+            "parameters": {},
+            "spatialFilter": "",
+            "tumblingWindowSize": 10,
+        }
+
+        agg_config = StreamingAggregationConfig.from_dict(config_dict)
+        assert agg_config.get_source_identifier() == "my_table"
+
+    def test_validate_sql_mode_success(self):
+        """Test successful validation in SQL mode."""
+        config = {
+            "tables": [
+                {
+                    "name": "cpu_metrics",
+                    "time_column": "ts",
+                    "value_columns": ["cpu_usage", "cpu_system"],
+                    "metadata_columns": ["host", "region"],
+                }
+            ]
+        }
+        schema_config = SQLTableConfig(config)
+
+        config_dict = {
+            "aggregationId": 1,
+            "aggregationType": "MultipleSum",
+            "aggregationSubType": "sum",
+            "table_name": "cpu_metrics",
+            "value_column": "cpu_usage",
+            "labels": {
+                "grouping": ["host"],
+                "aggregated": ["region"],
+                "rollup": [],
+            },
+            "parameters": {},
+            "spatialFilter": "",
+            "tumblingWindowSize": 10,
+        }
+
+        agg_config = StreamingAggregationConfig.from_dict(config_dict)
+        # Should not raise
+        agg_config.validate(schema_config, query_language="sql")
+
+    def test_validate_sql_mode_missing_table(self):
+        """Test validation fails when table doesn't exist."""
+        config = {"tables": []}
+        schema_config = SQLTableConfig(config)
+
+        config_dict = {
+            "aggregationId": 1,
+            "aggregationType": "MultipleSum",
+            "aggregationSubType": "sum",
+            "table_name": "nonexistent_table",
+            "value_column": "val",
+            "labels": {
+                "grouping": [],
+                "aggregated": ["label"],
+                "rollup": [],
+            },
+            "parameters": {},
+            "spatialFilter": "",
+            "tumblingWindowSize": 10,
+        }
+
+        agg_config = StreamingAggregationConfig.from_dict(config_dict)
+
+        with pytest.raises(ValueError, match="not found in sql_schema"):
+            agg_config.validate(schema_config, query_language="sql")
+
+    def test_validate_sql_mode_invalid_value_column(self):
+        """Test validation fails when value_column doesn't exist in table."""
+        config = {
+            "tables": [
+                {
+                    "name": "cpu_metrics",
+                    "time_column": "ts",
+                    "value_columns": ["cpu_usage"],
+                    "metadata_columns": ["host"],
+                }
+            ]
+        }
+        schema_config = SQLTableConfig(config)
+
+        config_dict = {
+            "aggregationId": 1,
+            "aggregationType": "MultipleSum",
+            "aggregationSubType": "sum",
+            "table_name": "cpu_metrics",
+            "value_column": "nonexistent_column",
+            "labels": {
+                "grouping": [],
+                "aggregated": ["host"],
+                "rollup": [],
+            },
+            "parameters": {},
+            "spatialFilter": "",
+            "tumblingWindowSize": 10,
+        }
+
+        agg_config = StreamingAggregationConfig.from_dict(config_dict)
+
+        with pytest.raises(ValueError, match="value_column.*not in table"):
+            agg_config.validate(schema_config, query_language="sql")
+
+    def test_validate_sql_mode_mismatched_labels(self):
+        """Test validation fails when labels don't match metadata_columns."""
+        config = {
+            "tables": [
+                {
+                    "name": "cpu_metrics",
+                    "time_column": "ts",
+                    "value_columns": ["cpu_usage"],
+                    "metadata_columns": ["host", "region"],
+                }
+            ]
+        }
+        schema_config = SQLTableConfig(config)
+
+        config_dict = {
+            "aggregationId": 1,
+            "aggregationType": "MultipleSum",
+            "aggregationSubType": "sum",
+            "table_name": "cpu_metrics",
+            "value_column": "cpu_usage",
+            "labels": {
+                "grouping": ["host"],
+                "aggregated": ["wrong_label"],  # doesn't match metadata_columns
+                "rollup": [],
+            },
+            "parameters": {},
+            "spatialFilter": "",
+            "tumblingWindowSize": 10,
+        }
+
+        agg_config = StreamingAggregationConfig.from_dict(config_dict)
+
+        with pytest.raises(ValueError, match="Labels do not match metadata_columns"):
+            agg_config.validate(schema_config, query_language="sql")
+
+
+class TestStreamingAggregationConfigPromQL:
+    """Tests for backward compatibility with PromQL configs."""
+
+    def test_parse_promql_aggregation_config(self):
+        """Test parsing aggregation config with PromQL fields."""
+        config_dict = {
+            "aggregationId": 1,
+            "aggregationType": "MultipleSum",
+            "aggregationSubType": "sum",
+            "metric": "fake_metric_total",
+            "labels": {
+                "grouping": ["instance", "job"],
+                "aggregated": ["label_0"],
+                "rollup": [],
+            },
+            "parameters": {},
+            "spatialFilter": "",
+            "tumblingWindowSize": 10,
+        }
+
+        agg_config = StreamingAggregationConfig.from_dict(config_dict)
+
+        assert agg_config.metric == "fake_metric_total"
+        assert agg_config.table_name is None
+
+    def test_get_source_identifier_promql(self):
+        """Test get_source_identifier returns metric for PromQL mode."""
+        config_dict = {
+            "aggregationId": 1,
+            "aggregationType": "MultipleSum",
+            "aggregationSubType": "sum",
+            "metric": "my_metric",
+            "labels": {
+                "grouping": [],
+                "aggregated": ["label"],
+                "rollup": [],
+            },
+            "parameters": {},
+            "spatialFilter": "",
+            "tumblingWindowSize": 10,
+        }
+
+        agg_config = StreamingAggregationConfig.from_dict(config_dict)
+        assert agg_config.get_source_identifier() == "my_metric"
+
+    def test_validate_promql_mode_success(self):
+        """Test successful validation in PromQL mode."""
+        metrics = {"fake_metric": ["host", "region"]}
+        metric_config = MetricConfig(metrics)
+
+        config_dict = {
+            "aggregationId": 1,
+            "aggregationType": "MultipleSum",
+            "aggregationSubType": "sum",
+            "metric": "fake_metric",
+            "labels": {
+                "grouping": ["host"],
+                "aggregated": ["region"],
+                "rollup": [],
+            },
+            "parameters": {},
+            "spatialFilter": "",
+            "tumblingWindowSize": 10,
+        }
+
+        agg_config = StreamingAggregationConfig.from_dict(config_dict)
+        # Should not raise
+        agg_config.validate(metric_config, query_language="promql")
+
+    def test_validate_promql_mode_default(self):
+        """Test that query_language defaults to promql."""
+        metrics = {"fake_metric": ["host"]}
+        metric_config = MetricConfig(metrics)
+
+        config_dict = {
+            "aggregationId": 1,
+            "aggregationType": "MultipleSum",
+            "aggregationSubType": "sum",
+            "metric": "fake_metric",
+            "labels": {
+                "grouping": [],
+                "aggregated": ["host"],
+                "rollup": [],
+            },
+            "parameters": {},
+            "spatialFilter": "",
+            "tumblingWindowSize": 10,
+        }
+
+        agg_config = StreamingAggregationConfig.from_dict(config_dict)
+        # Should not raise - defaults to promql
+        agg_config.validate(metric_config, query_language="promql")
+
+
+class TestTableSchema:
+    """Tests for TableSchema dataclass."""
+
+    def test_table_schema_creation(self):
+        """Test creating a TableSchema."""
+        schema = TableSchema(
+            time_column="ts",
+            value_columns=["val1", "val2"],
+            metadata_columns=["label1", "label2"],
+        )
+
+        assert schema.time_column == "ts"
+        assert schema.value_columns == ["val1", "val2"]
+        assert schema.metadata_columns == ["label1", "label2"]
+
+    def test_table_schema_equality(self):
+        """Test TableSchema equality."""
+        schema1 = TableSchema(
+            time_column="ts",
+            value_columns=["val"],
+            metadata_columns=["label"],
+        )
+        schema2 = TableSchema(
+            time_column="ts",
+            value_columns=["val"],
+            metadata_columns=["label"],
+        )
+
+        assert schema1 == schema2
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/ArroyoSketch/utils/arroyo_utils.py b/ArroyoSketch/utils/arroyo_utils.py
new file mode 100644
index 0000000..2f33870
--- /dev/null
+++ b/ArroyoSketch/utils/arroyo_utils.py
@@ -0,0 +1,72 @@
+import json
+import time
+from typing import List
+
+from utils import http_utils
+
+
+def get_all_pipelines(arroyo_url: str) -> List[str]:
+    # list all pipelines
+    response = http_utils.make_api_request(
+        url=f"{arroyo_url}/pipelines",
+        method="get",
+    )
+    response = json.loads(response)
+    if response["data"] is None:
+        print("No pipelines found")
+        return []
+
+    pipeline_ids = [pipeline["id"] for pipeline in response["data"]]
+    return pipeline_ids
+
+
+def stop_and_delete_pipelines(
+    arroyo_url: str, pipeline_ids: List[str], num_retries: int = 30
+):
+    # stop each pipeline
+    for pipeline_id in pipeline_ids:
+        response = http_utils.make_api_request(
+            url=f"{arroyo_url}/pipelines/{pipeline_id}",
+            method="patch",
+            data=json.dumps({"stop": "immediate"}),
+        )
+        print("Sent stop request for pipeline:", pipeline_id)
+
+    # for each pipeline, get status and verify that stop==immediate and actionInProgress==False
+    # for pipelines not satisfying this, retry N times with a delay, before raising an error
+    for pipeline_id in pipeline_ids:
+        for attempt in range(num_retries):
+            try:
+                response = http_utils.make_api_request(
+                    url=f"{arroyo_url}/pipelines/{pipeline_id}",
+                    method="get",
+                )
+                print("Got status for pipeline:", pipeline_id)
+
+                try:
+                    data = json.loads(response)
+                    print("data['stop']:", data["stop"], type(data["stop"]))
+                    print(
+                        "data['actionInProgress']:",
+                        data["actionInProgress"],
+                        type(data["actionInProgress"]),
+                    )
+                    if data["stop"] == "immediate" and not data["actionInProgress"]:
+                        break
+                except json.JSONDecodeError as e:
+                    print("Failed to decode JSON response:", e)
+                    pass
+                time.sleep(10)
+            except Exception as e:
+                if attempt < num_retries - 1:
+                    continue
+                else:
+                    raise e
+
+    # delete each pipeline
+    for pipeline_id in pipeline_ids:
+        response = http_utils.make_api_request(
+            url=f"{arroyo_url}/pipelines/{pipeline_id}",
+            method="delete",
+        )
+        print("Sent delete request for pipeline:", pipeline_id)
diff --git a/ArroyoSketch/utils/http_utils.py b/ArroyoSketch/utils/http_utils.py
new file mode 100644
index 0000000..08e08ce
--- /dev/null
+++ b/ArroyoSketch/utils/http_utils.py
@@ -0,0 +1,74 @@
+import requests
+
+
+def make_api_request(url, method, data=None):
+    """
+    Make an API request to the Arroyo API.
+
+    Args:
+        url (str): The URL to make the request to
+        method (str): The HTTP method (get or post)
+        data (str): The data to send with the request (for POST)
+
+    Returns:
+        dict: The response JSON data
+
+    Raises:
+        Exception: If the request fails
+    """
+    headers = {"Content-Type": "application/json"}
+
+    try:
+        if method.lower() == "post":
+            response = requests.post(url, headers=headers, data=data)
+        elif method.lower() == "get":
+            response = requests.get(url, headers=headers)
+        elif method.lower() == "delete":
+            response = requests.delete(url, headers=headers)
+        elif method.lower() == "patch":
+            response = requests.patch(url, headers=headers, data=data)
+        else:
+            raise ValueError(f"Unsupported HTTP method: {method}")
+
+        try:
+            response.raise_for_status()
+        except requests.exceptions.HTTPError as e:
+            print(
+                f"HTTP Error {response.status_code}: {response.content.decode('utf-8')}"
+            )
+            raise e
+
+        response_data = response.content.decode("utf-8")
+
+        return response_data
+    except Exception as e:
+        error_msg = f"Failed {method} request to URL: {url}"
+        print("Error details:", e)
+        print(error_msg)
+        raise Exception(error_msg)
+
+
+def create_arroyo_resource(arroyo_url, endpoint, data, resource_type):
+    """
+    Create a resource using the Arroyo API.
+
+    Args:
+        arroyo_url (str): Base URL of the Arroyo API
+        endpoint (str): API endpoint (e.g., 'connection_profiles')
+        data (str): JSON data for the resource
+
+    Returns:
+        dict: The response JSON data
+    """
+    url = f"{arroyo_url.rstrip('/')}/{endpoint}"
+    try:
+        # print(f"Creating {resource_type} resource at {url}...\n")
+        # print(f"Data: {data}\n")
+        # input("Press Enter to continue...")
+        response_data = make_api_request(url=url, method="post", data=data)
+    except Exception as e:
+        error_msg = f"Failed to create {resource_type} resource: {e}"
+        print(error_msg)
+        raise Exception(error_msg)
+
+    return response_data
diff --git a/ArroyoSketch/utils/jinja_utils.py b/ArroyoSketch/utils/jinja_utils.py
new file mode 100644
index 0000000..08f6aa8
--- /dev/null
+++ b/ArroyoSketch/utils/jinja_utils.py
@@ -0,0 +1,27 @@
+from jinja2 import Environment, FileSystemLoader, nodes
+
+
+def load_template(template_dir, template_name):
+    """Load a template from the specified directory"""
+    env = Environment(loader=FileSystemLoader(template_dir))
+    template = env.get_template(template_name)
+    return template
+
+
+def get_template_variables(template_source, environment=None):
+    """
+    Extract all template variables from a Jinja2 template source.
+
+    Args:
+        template_source (str): The raw template source code
+        environment (Environment, optional): Jinja2 environment. If None, creates a default one.
+
+    Returns:
+        set: Set of variable names found in the template
+    """
+    if environment is None:
+        environment = Environment()
+
+    ast = environment.parse(template_source)
+    template_vars = ast.find_all(nodes.Name)
+    return {var.name for var in template_vars if var.ctx == "load"}
diff --git a/ArroyoSketch/validate_udfs.py b/ArroyoSketch/validate_udfs.py
new file mode 100644
index 0000000..0e7b1bc
--- /dev/null
+++ b/ArroyoSketch/validate_udfs.py
@@ -0,0 +1,115 @@
+import os
+import json
+import argparse
+from typing import List
+
+import utils.http_utils as http_utils
+import utils.jinja_utils as jinja_utils
+
+
+def main(args):
+    if args.all_udfs and args.udfs:
+        raise ValueError(
+            "Cannot specify both --all_udfs and --udfs. Use one or the other."
+        )
+    if not args.all_udfs and not args.udfs:
+        raise ValueError("You must specify either --all_udfs or --udfs.")
+
+    udfs: List[str] = []
+    if args.udfs:
+        udfs = args.udfs.strip().split(",")
+        udfs = [udf.strip() for udf in udfs if udf.strip()]
+    else:
+        udf_templates = os.listdir(os.path.join(args.template_dir, "udfs"))
+        udfs = [
+            udf.split(".rs")[0]
+            for udf in udf_templates
+            if udf.endswith(".rs") or udf.endswith(".rs.j2")
+        ]
+
+    if not udfs:
+        raise ValueError("No UDFs found to validate.")
+    udfs = sorted(udfs)
+
+    print(f"Validating UDFs: {', '.join(udfs)}")
+
+    for udf in udfs:
+        udf_body = None
+        udf_dir = os.path.join(args.template_dir, "udfs")
+
+        # Check if we have a Jinja template version first
+        template_path = os.path.join(udf_dir, f"{udf}.rs.j2")
+        regular_path = os.path.join(udf_dir, f"{udf}.rs")
+
+        if os.path.exists(template_path):
+            # Read template source and parse for variables
+            with open(template_path, "r") as file:
+                template_source = file.read()
+
+            # Load the template for rendering
+            udf_template = jinja_utils.load_template(udf_dir, f"{udf}.rs.j2")
+
+            # Get all template variables and set them to 100
+            template_vars = jinja_utils.get_template_variables(
+                template_source, udf_template.environment
+            )
+            params = {var_name: 100 for var_name in template_vars}
+
+            udf_body = udf_template.render(**params)
+        elif os.path.exists(regular_path):
+            # Use regular file if no template exists
+            with open(regular_path, "r") as file:
+                udf_body = file.read()
+        else:
+            raise ValueError(
+                f"UDF {udf} not found. Neither {template_path} nor {regular_path} exists."
+            )
+
+        if not udf_body:
+            raise ValueError(f"UDF {udf} is empty or could not be rendered.")
+
+        data = {"definition": udf_body, "language": "rust"}
+
+        response = http_utils.create_arroyo_resource(
+            args.arroyo_url,
+            endpoint="udfs/validate",
+            data=json.dumps(data),
+            resource_type="UDF",
+        )
+        response = json.loads(response)
+
+        print(f"Validating UDF: {udf}")
+        print(response)
+        print("-" * 80)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Validate UDFs in a given directory against a template directory."
+    )
+    parser.add_argument(
+        "--template_dir",
+        default="./templates",
+        help="Directory containing template files",
+    )
+
+    parser.add_argument(
+        "--arroyo_url",
+        default="http://localhost:5115/api/v1",
+        help="URL of the Arroyo API server",
+    )
+
+    parser.add_argument(
+        "--all_udfs",
+        action="store_true",
+        help="Validate all UDFs in the template directory",
+    )
+    parser.add_argument(
+        "--udfs",
+        type=str,
+        required=False,
+        help="Comma-separated list of UDFs to validate",
+    )
+
+    args = parser.parse_args()
+    main(args)
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..ed0c3c7
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,5115 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
+[[package]]
+name = "ahash"
+version = "0.8.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
+dependencies = [
+ "cfg-if",
+ "const-random",
+ "getrandom 0.3.4",
+ "once_cell",
+ "version_check",
+ "zerocopy",
+]
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "alloc-no-stdlib"
+version = "2.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
+
+[[package]]
+name = "alloc-stdlib"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
+dependencies = [
+ "alloc-no-stdlib",
+]
+
+[[package]]
+name = "allocator-api2"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
+
+[[package]]
+name = "android-tzdata"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "ansi_term"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.102"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
+
+[[package]]
+name = "ar_archive_writer"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b"
+dependencies = [
+ "object",
+]
+
+[[package]]
+name = "arrayref"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
+
+[[package]]
+name = "arrayvec"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
+
+[[package]]
+name = "arrow"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3a3ec4fe573f9d1f59d99c085197ef669b00b088ba1d7bb75224732d9357a74"
+dependencies = [
+ "arrow-arith",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-csv",
+ "arrow-data",
+ "arrow-ipc",
+ "arrow-json",
+ "arrow-ord",
+ "arrow-row",
+ "arrow-schema",
+ "arrow-select",
+ "arrow-string",
+]
+
+[[package]]
+name = "arrow-arith"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dcf19f07792d8c7f91086c67b574a79301e367029b17fcf63fb854332246a10"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "chrono",
+ "half",
+ "num",
+]
+
+[[package]]
+name = "arrow-array"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7845c32b41f7053e37a075b3c2f29c6f5ea1b3ca6e5df7a2d325ee6e1b4a63cf"
+dependencies = [
+ "ahash",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "chrono",
+ "chrono-tz",
+ "half",
+ "hashbrown 0.15.5",
+ "num",
+]
+
+[[package]]
+name = "arrow-buffer"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b5c681a99606f3316f2a99d9c8b6fa3aad0b1d34d8f6d7a1b471893940219d8"
+dependencies = [
+ "bytes",
+ "half",
+ "num",
+]
+
+[[package]]
+name = "arrow-cast"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6365f8527d4f87b133eeb862f9b8093c009d41a210b8f101f91aa2392f61daac"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "arrow-select",
+ "atoi",
+ "base64 0.22.1",
+ "chrono",
+ "comfy-table",
+ "half",
+ "lexical-core",
+ "num",
+ "ryu",
+]
+
+[[package]]
+name = "arrow-csv"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30dac4d23ac769300349197b845e0fd18c7f9f15d260d4659ae6b5a9ca06f586"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-data",
+ "arrow-schema",
+ "chrono",
+ "csv",
+ "csv-core",
+ "lazy_static",
+ "lexical-core",
+ "regex",
+]
+
+[[package]]
+name = "arrow-data"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd962fc3bf7f60705b25bcaa8eb3318b2545aa1d528656525ebdd6a17a6cd6fb"
+dependencies = [
+ "arrow-buffer",
+ "arrow-schema",
+ "half",
+ "num",
+]
+
+[[package]]
+name = "arrow-ipc"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3527365b24372f9c948f16e53738eb098720eea2093ae73c7af04ac5e30a39b"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-data",
+ "arrow-schema",
+ "flatbuffers",
+ "lz4_flex",
+]
+
+[[package]]
+name = "arrow-json"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acdec0024749fc0d95e025c0b0266d78613727b3b3a5d4cf8ea47eb6d38afdd1"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-data",
+ "arrow-schema",
+ "chrono",
+ "half",
+ "indexmap",
+ "lexical-core",
+ "num",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "arrow-ord"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79af2db0e62a508d34ddf4f76bfd6109b6ecc845257c9cba6f939653668f89ac"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "arrow-select",
+ "half",
+ "num",
+]
+
+[[package]]
+name = "arrow-row"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da30e9d10e9c52f09ea0cf15086d6d785c11ae8dcc3ea5f16d402221b6ac7735"
+dependencies = [
+ "ahash",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "half",
+]
+
+[[package]]
+name = "arrow-schema"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "35b0f9c0c3582dd55db0f136d3b44bfa0189df07adcf7dc7f2f2e74db0f52eb8"
+
+[[package]]
+name = "arrow-select"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92fc337f01635218493c23da81a364daf38c694b05fc20569c3193c11c561984"
+dependencies = [
+ "ahash",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "num",
+]
+
+[[package]]
+name = "arrow-string"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d596a9fc25dae556672d5069b090331aca8acb93cae426d8b7dcdf1c558fa0ce"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "arrow-select",
+ "memchr",
+ "num",
+ "regex",
+ "regex-syntax",
+]
+
+[[package]]
+name = "async-compression"
+version = "0.4.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c"
+dependencies = [
+ "bzip2 0.5.2",
+ "flate2",
+ "futures-core",
+ "futures-io",
+ "memchr",
+ "pin-project-lite",
+ "tokio",
+ "xz2",
+ "zstd",
+ "zstd-safe",
+]
+
+[[package]]
+name = "async-trait"
+version = "0.1.89"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "atoi"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "atomic-waker"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
+
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi 0.1.19",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "axum"
+version = "0.7.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
+dependencies = [
+ "async-trait",
+ "axum-core",
+ "bytes",
+ "futures-util",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "http-body-util",
+ "hyper 1.8.1",
+ "hyper-util",
+ "itoa",
+ "matchit",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "rustversion",
+ "serde",
+ "serde_json",
+ "serde_path_to_error",
+ "serde_urlencoded",
+ "sync_wrapper 1.0.2",
+ "tokio",
+ "tower",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "axum-core"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "futures-util",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "rustversion",
+ "sync_wrapper 1.0.2",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "base64"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
+
+[[package]]
+name = "base64"
+version = "0.21.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
+
+[[package]]
+name = "base64"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+
+[[package]]
+name = "bincode"
+version = "1.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "bitflags"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
+
+[[package]]
+name = "blake2"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe"
+dependencies = [
+ "digest",
+]
+
+[[package]]
+name = "blake3"
+version = "1.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d"
+dependencies = [
+ "arrayref",
+ "arrayvec",
+ "cc",
+ "cfg-if",
+ "constant_time_eq",
+ "cpufeatures",
+]
+
+[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array",
+]
+
+[[package]]
+name = "brotli"
+version = "7.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+ "brotli-decompressor",
+]
+
+[[package]]
+name = "brotli-decompressor"
+version = "4.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+]
+
+[[package]]
+name = "bstr"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223"
+dependencies = [
+ "lazy_static",
+ "memchr",
+ "regex-automata 0.1.10",
+]
+
+[[package]]
+name = "bumpalo"
+version = "3.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
+
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+[[package]]
+name = "bytes"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
+
+[[package]]
+name = "bzip2"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8"
+dependencies = [
+ "bzip2-sys",
+ "libc",
+]
+
+[[package]]
+name = "bzip2"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47"
+dependencies = [
+ "bzip2-sys",
+]
+
+[[package]]
+name = "bzip2-sys"
+version = "0.1.13+1.0.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14"
+dependencies = [
+ "cc",
+ "pkg-config",
+]
+
+[[package]]
+name = "cactus"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acbc26382d871df4b7442e3df10a9402bf3cf5e55cbd66f12be38861425f0564"
+
+[[package]]
+name = "cc"
+version = "1.2.56"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2"
+dependencies = [
+ "find-msvc-tools",
+ "jobserver",
+ "libc",
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "cfgrammar"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf"
+dependencies = [
+ "indexmap",
+ "lazy_static",
+ "num-traits",
+ "regex",
+ "serde",
+ "vob",
+]
+
+[[package]]
+name = "chrono"
+version = "0.4.39"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825"
+dependencies = [
+ "android-tzdata",
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "serde",
+ "wasm-bindgen",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "chrono-tz"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3"
+dependencies = [
+ "chrono",
+ "phf",
+]
+
+[[package]]
+name = "clap"
+version = "2.34.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
+dependencies = [
+ "ansi_term",
+ "atty",
+ "bitflags 1.3.2",
+ "strsim 0.8.0",
+ "textwrap",
+ "unicode-width 0.1.14",
+ "vec_map",
+]
+
+[[package]]
+name = "clap"
+version = "4.5.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim 0.11.1",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.55"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5"
+dependencies = [
+ "heck 0.5.0",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "clap_lex"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831"
+
+[[package]]
+name = "codespan-reporting"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af491d569909a7e4dee0ad7db7f5341fef5c614d5b8ec8cf765732aba3cff681"
+dependencies = [
+ "serde",
+ "termcolor",
+ "unicode-width 0.2.2",
+]
+
+[[package]]
+name = "colorchoice"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+
+[[package]]
+name = "comfy-table"
+version = "7.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47"
+dependencies = [
+ "unicode-segmentation",
+ "unicode-width 0.2.2",
+]
+
+[[package]]
+name = "const-random"
+version = "0.1.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359"
+dependencies = [
+ "const-random-macro",
+]
+
+[[package]]
+name = "const-random-macro"
+version = "0.1.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
+dependencies = [
+ "getrandom 0.2.17",
+ "once_cell",
+ "tiny-keccak",
+]
+
+[[package]]
+name = "constant_time_eq"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
+
+[[package]]
+name = "core-foundation"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "core-foundation"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "cpufeatures"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "crc32fast"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "crossbeam-channel"
+version = "0.5.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "crunchy"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
+
+[[package]]
+name = "crypto-common"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
+name = "csv"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938"
+dependencies = [
+ "csv-core",
+ "itoa",
+ "ryu",
+ "serde_core",
+]
+
+[[package]]
+name = "csv-core"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "cxx"
+version = "1.0.194"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "747d8437319e3a2f43d93b341c137927ca70c0f5dabeea7a005a73665e247c7e"
+dependencies = [
+ "cc",
+ "cxx-build",
+ "cxxbridge-cmd",
+ "cxxbridge-flags",
+ "cxxbridge-macro",
+ "foldhash 0.2.0",
+ "link-cplusplus",
+]
+
+[[package]]
+name = "cxx-build"
+version = "1.0.194"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0f4697d190a142477b16aef7da8a99bfdc41e7e8b1687583c0d23a79c7afc1e"
+dependencies = [
+ "cc",
+ "codespan-reporting",
+ "indexmap",
+ "proc-macro2",
+ "quote",
+ "scratch",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "cxxbridge-cmd"
+version = "1.0.194"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0956799fa8678d4c50eed028f2de1c0552ae183c76e976cf7ca8c4e36a7c328"
+dependencies = [
+ "clap 4.5.60",
+ "codespan-reporting",
+ "indexmap",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "cxxbridge-flags"
+version = "1.0.194"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23384a836ab4f0ad98ace7e3955ad2de39de42378ab487dc28d3990392cb283a"
+
+[[package]]
+name = "cxxbridge-macro"
+version = "1.0.194"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6acc6b5822b9526adfb4fc377b67128fdd60aac757cc4a741a6278603f763cf"
+dependencies = [
+ "indexmap",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "dashmap"
+version = "5.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
+dependencies = [
+ "cfg-if",
+ "hashbrown 0.14.5",
+ "lock_api",
+ "once_cell",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "dashmap"
+version = "6.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+ "hashbrown 0.14.5",
+ "lock_api",
+ "once_cell",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "datafusion"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-array",
+ "arrow-ipc",
+ "arrow-schema",
+ "async-compression",
+ "async-trait",
+ "bytes",
+ "bzip2 0.4.4",
+ "chrono",
+ "dashmap 6.1.0",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions",
+ "datafusion-functions-aggregate",
+ "datafusion-functions-nested",
+ "datafusion-functions-window",
+ "datafusion-optimizer",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-optimizer",
+ "datafusion-physical-plan",
+ "datafusion-sql",
+ "flate2",
+ "futures",
+ "glob",
+ "half",
+ "hashbrown 0.14.5",
+ "indexmap",
+ "itertools 0.13.0",
+ "log",
+ "num_cpus",
+ "object_store",
+ "parking_lot",
+ "parquet",
+ "paste",
+ "pin-project-lite",
+ "rand",
+ "sqlparser 0.51.0",
+ "tempfile",
+ "tokio",
+ "tokio-util",
+ "url",
+ "uuid",
+ "xz2",
+ "zstd",
+]
+
+[[package]]
+name = "datafusion-catalog"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560"
+dependencies = [
+ "arrow-schema",
+ "async-trait",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-plan",
+ "parking_lot",
+]
+
+[[package]]
+name = "datafusion-common"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-schema",
+ "chrono",
+ "half",
+ "hashbrown 0.14.5",
+ "indexmap",
+ "instant",
+ "libc",
+ "num_cpus",
+ "object_store",
+ "parquet",
+ "paste",
+ "sqlparser 0.51.0",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-common-runtime"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685"
+dependencies = [
+ "log",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-execution"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361"
+dependencies = [
+ "arrow",
+ "chrono",
+ "dashmap 6.1.0",
+ "datafusion-common",
+ "datafusion-expr",
+ "futures",
+ "hashbrown 0.14.5",
+ "log",
+ "object_store",
+ "parking_lot",
+ "rand",
+ "tempfile",
+ "url",
+]
+
+[[package]]
+name = "datafusion-expr"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-array",
+ "arrow-buffer",
+ "chrono",
+ "datafusion-common",
+ "datafusion-expr-common",
+ "datafusion-functions-aggregate-common",
+ "datafusion-functions-window-common",
+ "datafusion-physical-expr-common",
+ "indexmap",
+ "paste",
+ "serde_json",
+ "sqlparser 0.51.0",
+ "strum",
+ "strum_macros",
+]
+
+[[package]]
+name = "datafusion-expr-common"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2"
+dependencies = [
+ "arrow",
+ "datafusion-common",
+ "itertools 0.13.0",
+ "paste",
+]
+
+[[package]]
+name = "datafusion-functions"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162"
+dependencies = [
+ "arrow",
+ "arrow-buffer",
+ "base64 0.22.1",
+ "blake2",
+ "blake3",
+ "chrono",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "hashbrown 0.14.5",
+ "hex",
+ "itertools 0.13.0",
+ "log",
+ "md-5",
+ "rand",
+ "regex",
+ "sha2",
+ "unicode-segmentation",
+ "uuid",
+]
+
+[[package]]
+name = "datafusion-functions-aggregate"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-schema",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions-aggregate-common",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "half",
+ "indexmap",
+ "log",
+ "paste",
+]
+
+[[package]]
+name = "datafusion-functions-aggregate-common"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143"
+dependencies = [
+ "ahash",
+ "arrow",
+ "datafusion-common",
+ "datafusion-expr-common",
+ "datafusion-physical-expr-common",
+ "rand",
+]
+
+[[package]]
+name = "datafusion-functions-nested"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317"
+dependencies = [
+ "arrow",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-ord",
+ "arrow-schema",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions",
+ "datafusion-functions-aggregate",
+ "datafusion-physical-expr-common",
+ "itertools 0.13.0",
+ "log",
+ "paste",
+ "rand",
+]
+
+[[package]]
+name = "datafusion-functions-window"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe"
+dependencies = [
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-functions-window-common",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "log",
+ "paste",
+]
+
+[[package]]
+name = "datafusion-functions-window-common"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b"
+dependencies = [
+ "datafusion-common",
+ "datafusion-physical-expr-common",
+]
+
+[[package]]
+name = "datafusion-optimizer"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "chrono",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "hashbrown 0.14.5",
+ "indexmap",
+ "itertools 0.13.0",
+ "log",
+ "paste",
+ "regex-syntax",
+]
+
+[[package]]
+name = "datafusion-physical-expr"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-ord",
+ "arrow-schema",
+ "arrow-string",
+ "chrono",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-expr-common",
+ "datafusion-functions-aggregate-common",
+ "datafusion-physical-expr-common",
+ "half",
+ "hashbrown 0.14.5",
+ "indexmap",
+ "itertools 0.13.0",
+ "log",
+ "paste",
+ "petgraph",
+]
+
+[[package]]
+name = "datafusion-physical-expr-common"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b"
+dependencies = [
+ "ahash",
+ "arrow",
+ "datafusion-common",
+ "datafusion-expr-common",
+ "hashbrown 0.14.5",
+ "rand",
+]
+
+[[package]]
+name = "datafusion-physical-optimizer"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe"
+dependencies = [
+ "arrow",
+ "arrow-schema",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr-common",
+ "datafusion-physical-expr",
+ "datafusion-physical-plan",
+ "itertools 0.13.0",
+]
+
+[[package]]
+name = "datafusion-physical-plan"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-ord",
+ "arrow-schema",
+ "async-trait",
+ "chrono",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions-aggregate-common",
+ "datafusion-functions-window-common",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "futures",
+ "half",
+ "hashbrown 0.14.5",
+ "indexmap",
+ "itertools 0.13.0",
+ "log",
+ "once_cell",
+ "parking_lot",
+ "pin-project-lite",
+ "rand",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-sql"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51"
+dependencies = [
+ "arrow",
+ "arrow-array",
+ "arrow-schema",
+ "datafusion-common",
+ "datafusion-expr",
+ "indexmap",
+ "log",
+ "regex",
+ "sqlparser 0.51.0",
+ "strum",
+]
+
+[[package]]
+name = "datafusion_summary_library"
+version = "0.1.0"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "datafusion",
+ "futures",
+ "hyperloglogplus",
+]
+
+[[package]]
+name = "deranged"
+version = "0.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c"
+dependencies = [
+ "powerfmt",
+]
+
+[[package]]
+name = "digest"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+dependencies = [
+ "block-buffer",
+ "crypto-common",
+ "subtle",
+]
+
+[[package]]
+name = "displaydoc"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "dsrs"
+version = "0.6.1"
+source = "git+https://github.com/ProjectASAP/datasketches-rs#d748ec75c80fff21f7b24897244dd1c895df2e9a"
+dependencies = [
+ "base64 0.13.1",
+ "bstr",
+ "cxx",
+ "cxx-build",
+ "memchr",
+ "rmp-serde",
+ "serde",
+ "structopt",
+ "thin-dst",
+]
+
+[[package]]
+name = "either"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+
+[[package]]
+name = "encoding_rs"
+version = "0.8.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "errno"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
+dependencies = [
+ "libc",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "fallible-iterator"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
+
+[[package]]
+name = "fallible-streaming-iterator"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
+
+[[package]]
+name = "fastrand"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+
+[[package]]
+name = "filetime"
+version = "0.2.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "libredox",
+]
+
+[[package]]
+name = "find-msvc-tools"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
+
+[[package]]
+name = "fixedbitset"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
+
+[[package]]
+name = "flatbuffers"
+version = "24.12.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096"
+dependencies = [
+ "bitflags 1.3.2",
+ "rustc_version",
+]
+
+[[package]]
+name = "flate2"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "foldhash"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+
+[[package]]
+name = "foldhash"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
+
+[[package]]
+name = "foreign-types"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
+dependencies = [
+ "foreign-types-shared",
+]
+
+[[package]]
+name = "foreign-types-shared"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
+
+[[package]]
+name = "form_urlencoded"
+version = "1.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf"
+dependencies = [
+ "percent-encoding",
+]
+
+[[package]]
+name = "futures"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
+
+[[package]]
+name = "futures-executor"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-io"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718"
+
+[[package]]
+name = "futures-macro"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893"
+
+[[package]]
+name = "futures-task"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
+
+[[package]]
+name = "futures-util"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "slab",
+]
+
+[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
+[[package]]
+name = "getopts"
+version = "0.2.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
+dependencies = [
+ "unicode-width 0.2.2",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasip2",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasip2",
+ "wasip3",
+]
+
+[[package]]
+name = "glob"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
+
+[[package]]
+name = "h2"
+version = "0.3.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d"
+dependencies = [
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "futures-util",
+ "http 0.2.12",
+ "indexmap",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
+[[package]]
+name = "half"
+version = "2.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+ "num-traits",
+ "zerocopy",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
+dependencies = [
+ "ahash",
+ "allocator-api2",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+dependencies = [
+ "foldhash 0.1.5",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+
+[[package]]
+name = "hashlink"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af"
+dependencies = [
+ "hashbrown 0.14.5",
+]
+
+[[package]]
+name = "heck"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
+dependencies = [
+ "unicode-segmentation",
+]
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "hermit-abi"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
+[[package]]
+name = "hex"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
+
+[[package]]
+name = "http"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1"
+dependencies = [
+ "bytes",
+ "fnv",
+ "itoa",
+]
+
+[[package]]
+name = "http"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a"
+dependencies = [
+ "bytes",
+ "itoa",
+]
+
+[[package]]
+name = "http-body"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2"
+dependencies = [
+ "bytes",
+ "http 0.2.12",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "http-body"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
+dependencies = [
+ "bytes",
+ "http 1.4.0",
+]
+
+[[package]]
+name = "http-body-util"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "httparse"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
+
+[[package]]
+name = "httpdate"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
+
+[[package]]
+name = "humantime"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424"
+
+[[package]]
+name = "hyper"
+version = "0.14.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7"
+dependencies = [
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "futures-util",
+ "h2",
+ "http 0.2.12",
+ "http-body 0.4.6",
+ "httparse",
+ "httpdate",
+ "itoa",
+ "pin-project-lite",
+ "socket2 0.5.10",
+ "tokio",
+ "tower-service",
+ "tracing",
+ "want",
+]
+
+[[package]]
+name = "hyper"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "httparse",
+ "httpdate",
+ "itoa",
+ "pin-project-lite",
+ "pin-utils",
+ "smallvec",
+ "tokio",
+]
+
+[[package]]
+name = "hyper-tls"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905"
+dependencies = [
+ "bytes",
+ "hyper 0.14.32",
+ "native-tls",
+ "tokio",
+ "tokio-native-tls",
+]
+
+[[package]]
+name = "hyper-util"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0"
+dependencies = [
+ "bytes",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "hyper 1.8.1",
+ "pin-project-lite",
+ "tokio",
+ "tower-service",
+]
+
+[[package]]
+name = "hyperloglogplus"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "621debdf94dcac33e50475fdd76d34d5ea9c0362a834b9db08c3024696c1fbe3"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.65"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "icu_collections"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43"
+dependencies = [
+ "displaydoc",
+ "potential_utf",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locale_core"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6"
+dependencies = [
+ "displaydoc",
+ "litemap",
+ "tinystr",
+ "writeable",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599"
+dependencies = [
+ "icu_collections",
+ "icu_normalizer_data",
+ "icu_properties",
+ "icu_provider",
+ "smallvec",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer_data"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a"
+
+[[package]]
+name = "icu_properties"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec"
+dependencies = [
+ "icu_collections",
+ "icu_locale_core",
+ "icu_properties_data",
+ "icu_provider",
+ "zerotrie",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_properties_data"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af"
+
+[[package]]
+name = "icu_provider"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614"
+dependencies = [
+ "displaydoc",
+ "icu_locale_core",
+ "writeable",
+ "yoke",
+ "zerofrom",
+ "zerotrie",
+ "zerovec",
+]
+
+[[package]]
+name = "id-arena"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
+
+[[package]]
+name = "idna"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
+dependencies = [
+ "idna_adapter",
+ "smallvec",
+ "utf8_iter",
+]
+
+[[package]]
+name = "idna_adapter"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344"
+dependencies = [
+ "icu_normalizer",
+ "icu_properties",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.16.1",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "instant"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "integer-encoding"
+version = "3.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02"
+
+[[package]]
+name = "ipnet"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
+
+[[package]]
+name = "itertools"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itertools"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
+
+[[package]]
+name = "jiff"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3e3d65f018c6ae946ab16e80944b97096ed73c35b221d1c478a6c81d8f57940"
+dependencies = [
+ "jiff-static",
+ "jiff-tzdb-platform",
+ "log",
+ "portable-atomic",
+ "portable-atomic-util",
+ "serde_core",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "jiff-static"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a17c2b211d863c7fde02cbea8a3c1a439b98e109286554f2860bdded7ff83818"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "jiff-tzdb"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2"
+
+[[package]]
+name = "jiff-tzdb-platform"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8"
+dependencies = [
+ "jiff-tzdb",
+]
+
+[[package]]
+name = "jobserver"
+version = "0.1.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
+dependencies = [
+ "getrandom 0.3.4",
+ "libc",
+]
+
+[[package]]
+name = "js-sys"
+version = "0.3.91"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "leb128fmt"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
+
+[[package]]
+name = "lexical-core"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594"
+dependencies = [
+ "lexical-parse-float",
+ "lexical-parse-integer",
+ "lexical-util",
+ "lexical-write-float",
+ "lexical-write-integer",
+]
+
+[[package]]
+name = "lexical-parse-float"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56"
+dependencies = [
+ "lexical-parse-integer",
+ "lexical-util",
+]
+
+[[package]]
+name = "lexical-parse-integer"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34"
+dependencies = [
+ "lexical-util",
+]
+
+[[package]]
+name = "lexical-util"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17"
+
+[[package]]
+name = "lexical-write-float"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361"
+dependencies = [
+ "lexical-util",
+ "lexical-write-integer",
+]
+
+[[package]]
+name = "lexical-write-integer"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df"
+dependencies = [
+ "lexical-util",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.182"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
+
+[[package]]
+name = "libm"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
+
+[[package]]
+name = "libredox"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616"
+dependencies = [
+ "bitflags 2.11.0",
+ "libc",
+ "redox_syscall 0.7.3",
+]
+
+[[package]]
+name = "libsqlite3-sys"
+version = "0.28.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f"
+dependencies = [
+ "cc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "libz-sys"
+version = "1.1.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4735e9cbde5aac84a5ce588f6b23a90b9b0b528f6c5a8db8a4aff300463a0839"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "link-cplusplus"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f78c730aaa7d0b9336a299029ea49f9ee53b0ed06e9202e8cb7db9bae7b8c82"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
+
+[[package]]
+name = "litemap"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
+
+[[package]]
+name = "lrlex"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c71364e868116ee891b0f93559eb9eca5675bec28b22d33c58481e66c3951d7e"
+dependencies = [
+ "cfgrammar",
+ "getopts",
+ "lazy_static",
+ "lrpar",
+ "num-traits",
+ "quote",
+ "regex",
+ "regex-syntax",
+ "serde",
+ "vergen",
+]
+
+[[package]]
+name = "lrpar"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51b265a81193d94c92d1c9c715498d6fa505bce3f789ceecb24ab5d6fa2dbc71"
+dependencies = [
+ "bincode",
+ "cactus",
+ "cfgrammar",
+ "filetime",
+ "indexmap",
+ "lazy_static",
+ "lrtable",
+ "num-traits",
+ "packedvec",
+ "regex",
+ "serde",
+ "static_assertions",
+ "vergen",
+ "vob",
+]
+
+[[package]]
+name = "lrtable"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc36d15214ca997a5097845be1f932b7ee6125c36f5c5e55f6c49e027ddeb6de"
+dependencies = [
+ "cfgrammar",
+ "fnv",
+ "num-traits",
+ "serde",
+ "sparsevec",
+ "vob",
+]
+
+[[package]]
+name = "lz4_flex"
+version = "0.11.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a"
+dependencies = [
+ "twox-hash 2.1.2",
+]
+
+[[package]]
+name = "lzma-sys"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+]
+
+[[package]]
+name = "matchers"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9"
+dependencies = [
+ "regex-automata 0.4.14",
+]
+
+[[package]]
+name = "matchit"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
+
+[[package]]
+name = "md-5"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
+dependencies = [
+ "cfg-if",
+ "digest",
+]
+
+[[package]]
+name = "memchr"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+
+[[package]]
+name = "mime"
+version = "0.3.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+ "simd-adler32",
+]
+
+[[package]]
+name = "mio"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "native-tls"
+version = "0.2.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2"
+dependencies = [
+ "libc",
+ "log",
+ "openssl",
+ "openssl-probe",
+ "openssl-sys",
+ "schannel",
+ "security-framework",
+ "security-framework-sys",
+ "tempfile",
+]
+
+[[package]]
+name = "nu-ansi-term"
+version = "0.50.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "num"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
+dependencies = [
+ "num-bigint",
+ "num-complex",
+ "num-integer",
+ "num-iter",
+ "num-rational",
+ "num-traits",
+]
+
+[[package]]
+name = "num-bigint"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
+dependencies = [
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-complex"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-conv"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"
+
+[[package]]
+name = "num-integer"
+version = "0.1.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-iter"
+version = "0.1.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-rational"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
+dependencies = [
+ "num-bigint",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+ "libm",
+]
+
+[[package]]
+name = "num_cpus"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
+dependencies = [
+ "hermit-abi 0.5.2",
+ "libc",
+]
+
+[[package]]
+name = "num_enum"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c"
+dependencies = [
+ "num_enum_derive",
+ "rustversion",
+]
+
+[[package]]
+name = "num_enum_derive"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7"
+dependencies = [
+ "proc-macro-crate",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "num_threads"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "object"
+version = "0.37.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "object_store"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3cfccb68961a56facde1163f9319e0d15743352344e7808a11795fb99698dcaf"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "chrono",
+ "futures",
+ "humantime",
+ "itertools 0.13.0",
+ "parking_lot",
+ "percent-encoding",
+ "snafu",
+ "tokio",
+ "tracing",
+ "url",
+ "walkdir",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
+
+[[package]]
+name = "openssl"
+version = "0.10.75"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328"
+dependencies = [
+ "bitflags 2.11.0",
+ "cfg-if",
+ "foreign-types",
+ "libc",
+ "once_cell",
+ "openssl-macros",
+ "openssl-sys",
+]
+
+[[package]]
+name = "openssl-macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "openssl-probe"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe"
+
+[[package]]
+name = "openssl-sys"
+version = "0.9.111"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "ordered-float"
+version = "2.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "packedvec"
+version = "1.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a69e0a534dd2e6aefce319af62a0aa0066a76bdfcec0201dfe02df226bc9ec70"
+dependencies = [
+ "num-traits",
+ "serde",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall 0.5.18",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "parquet"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f8cf58b29782a7add991f655ff42929e31a7859f5319e53db9e39a714cb113c"
+dependencies = [
+ "ahash",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-data",
+ "arrow-ipc",
+ "arrow-schema",
+ "arrow-select",
+ "base64 0.22.1",
+ "brotli",
+ "bytes",
+ "chrono",
+ "flate2",
+ "futures",
+ "half",
+ "hashbrown 0.15.5",
+ "lz4_flex",
+ "num",
+ "num-bigint",
+ "object_store",
+ "paste",
+ "seq-macro",
+ "snap",
+ "thrift",
+ "tokio",
+ "twox-hash 1.6.3",
+ "zstd",
+ "zstd-sys",
+]
+
+[[package]]
+name = "parse_datetime"
+version = "0.13.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acea383beda9652270f3c9678d83aa58cbfc16880343cae0c0c8c7d6c0974132"
+dependencies = [
+ "jiff",
+ "num-traits",
+ "winnow",
+]
+
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
+[[package]]
+name = "percent-encoding"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
+
+[[package]]
+name = "petgraph"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
+dependencies = [
+ "fixedbitset",
+ "indexmap",
+]
+
+[[package]]
+name = "phf"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7"
+dependencies = [
+ "phf_shared",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981"
+dependencies = [
+ "siphasher",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
+
+[[package]]
+name = "portable-atomic"
+version = "1.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
+
+[[package]]
+name = "portable-atomic-util"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5"
+dependencies = [
+ "portable-atomic",
+]
+
+[[package]]
+name = "potential_utf"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77"
+dependencies = [
+ "zerovec",
+]
+
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "prettyplease"
+version = "0.2.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
+dependencies = [
+ "proc-macro2",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "proc-macro-crate"
+version = "3.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983"
+dependencies = [
+ "toml_edit",
+]
+
+[[package]]
+name = "proc-macro-error"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
+dependencies = [
+ "proc-macro-error-attr",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro-error-attr"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "prometheus"
+version = "0.13.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d33c28a30771f7f96db69893f78b857f7450d7e0237e9c8fc6427a81bae7ed1"
+dependencies = [
+ "cfg-if",
+ "fnv",
+ "lazy_static",
+ "memchr",
+ "parking_lot",
+ "protobuf",
+ "thiserror 1.0.69",
+]
+
+[[package]]
+name = "promql-parser"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60d851f6523a8215e2fbf86b6cef4548433f8b76092e9ffb607105de52ae63fd"
+dependencies = [
+ "cfgrammar",
+ "chrono",
+ "lazy_static",
+ "lrlex",
+ "lrpar",
+ "regex",
+]
+
+[[package]]
+name = "promql_utilities"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "promql-parser",
+ "serde",
+ "serde_json",
+ "thiserror 1.0.69",
+ "tokio-test",
+ "tracing",
+]
+
+[[package]]
+name = "prost"
+version = "0.13.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
+dependencies = [
+ "bytes",
+ "prost-derive",
+]
+
+[[package]]
+name = "prost-derive"
+version = "0.13.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
+dependencies = [
+ "anyhow",
+ "itertools 0.14.0",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "protobuf"
+version = "2.28.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
+
+[[package]]
+name = "psm"
+version = "0.1.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3852766467df634d74f0b2d7819bf8dc483a0eb2e3b0f50f756f9cfe8b0d18d8"
+dependencies = [
+ "ar_archive_writer",
+ "cc",
+]
+
+[[package]]
+name = "query_engine_rust"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "arrow",
+ "async-trait",
+ "axum",
+ "base64 0.21.7",
+ "bincode",
+ "chrono",
+ "clap 4.5.60",
+ "dashmap 5.5.3",
+ "datafusion",
+ "datafusion_summary_library",
+ "dsrs",
+ "flate2",
+ "form_urlencoded",
+ "futures",
+ "hex",
+ "lazy_static",
+ "prometheus",
+ "promql-parser",
+ "promql_utilities",
+ "prost",
+ "rdkafka",
+ "regex",
+ "reqwest",
+ "rmp-serde",
+ "rusqlite",
+ "serde",
+ "serde_json",
+ "serde_yaml",
+ "sketch-core",
+ "sketch_db_common",
+ "snap",
+ "sql_utilities",
+ "sqlparser 0.59.0",
+ "structopt",
+ "tempfile",
+ "thiserror 1.0.69",
+ "tokio",
+ "tracing",
+ "tracing-appender",
+ "tracing-subscriber",
+ "urlencoding",
+ "uuid",
+ "xxhash-rust",
+ "zstd",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom 0.2.17",
+]
+
+[[package]]
+name = "rdkafka"
+version = "0.34.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053adfa02fab06e86c01d586cc68aa47ee0ff4489a59469081dc12cbcde578bf"
+dependencies = [
+ "futures-channel",
+ "futures-util",
+ "libc",
+ "log",
+ "rdkafka-sys",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "slab",
+ "tokio",
+]
+
+[[package]]
+name = "rdkafka-sys"
+version = "4.10.0+2.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e234cf318915c1059d4921ef7f75616b5219b10b46e9f3a511a15eb4b56a3f77"
+dependencies = [
+ "libc",
+ "libz-sys",
+ "num_enum",
+ "pkg-config",
+]
+
+[[package]]
+name = "recursive"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e"
+dependencies = [
+ "recursive-proc-macro-impl",
+ "stacker",
+]
+
+[[package]]
+name = "recursive-proc-macro-impl"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
+dependencies = [
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags 2.11.0",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16"
+dependencies = [
+ "bitflags 2.11.0",
+]
+
+[[package]]
+name = "regex"
+version = "1.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata 0.4.14",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
+
+[[package]]
+name = "regex-automata"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
+
+[[package]]
+name = "reqwest"
+version = "0.11.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62"
+dependencies = [
+ "base64 0.21.7",
+ "bytes",
+ "encoding_rs",
+ "futures-core",
+ "futures-util",
+ "h2",
+ "http 0.2.12",
+ "http-body 0.4.6",
+ "hyper 0.14.32",
+ "hyper-tls",
+ "ipnet",
+ "js-sys",
+ "log",
+ "mime",
+ "native-tls",
+ "once_cell",
+ "percent-encoding",
+ "pin-project-lite",
+ "rustls-pemfile",
+ "serde",
+ "serde_json",
+ "serde_urlencoded",
+ "sync_wrapper 0.1.2",
+ "system-configuration",
+ "tokio",
+ "tokio-native-tls",
+ "tower-service",
+ "url",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "web-sys",
+ "winreg",
+]
+
+[[package]]
+name = "rmp"
+version = "0.8.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ba8be72d372b2c9b35542551678538b562e7cf86c3315773cae48dfbfe7790c"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "rmp-serde"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72f81bee8c8ef9b577d1681a70ebbc962c232461e397b22c208c43c04b67a155"
+dependencies = [
+ "rmp",
+ "serde",
+]
+
+[[package]]
+name = "rusqlite"
+version = "0.31.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae"
+dependencies = [
+ "bitflags 2.11.0",
+ "fallible-iterator",
+ "fallible-streaming-iterator",
+ "hashlink",
+ "libsqlite3-sys",
+ "smallvec",
+]
+
+[[package]]
+name = "rustc_version"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
+dependencies = [
+ "semver",
+]
+
+[[package]]
+name = "rustix"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
+dependencies = [
+ "bitflags 2.11.0",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "rustls-pemfile"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c"
+dependencies = [
+ "base64 0.21.7",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "ryu"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "schannel"
+version = "0.1.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "scratch"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d68f2ec51b097e4c1a75b681a8bec621909b5e91f15bb7b840c4f2f7b01148b2"
+
+[[package]]
+name = "security-framework"
+version = "3.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d"
+dependencies = [
+ "bitflags 2.11.0",
+ "core-foundation 0.10.1",
+ "core-foundation-sys",
+ "libc",
+ "security-framework-sys",
+]
+
+[[package]]
+name = "security-framework-sys"
+version = "2.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "semver"
+version = "1.0.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
+
+[[package]]
+name = "seq-macro"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.149"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+dependencies = [
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
+name = "serde_path_to_error"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457"
+dependencies = [
+ "itoa",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "serde_urlencoded"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
+dependencies = [
+ "form_urlencoded",
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "serde_yaml"
+version = "0.9.34+deprecated"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
+dependencies = [
+ "indexmap",
+ "itoa",
+ "ryu",
+ "serde",
+ "unsafe-libyaml",
+]
+
+[[package]]
+name = "sha2"
+version = "0.10.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "digest",
+]
+
+[[package]]
+name = "sharded-slab"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
+dependencies = [
+ "lazy_static",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b"
+dependencies = [
+ "errno",
+ "libc",
+]
+
+[[package]]
+name = "simd-adler32"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
+
+[[package]]
+name = "siphasher"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
+
+[[package]]
+name = "sketch-core"
+version = "0.1.0"
+dependencies = [
+ "dsrs",
+ "rmp-serde",
+ "serde",
+ "xxhash-rust",
+]
+
+[[package]]
+name = "sketch_db_common"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "clap 4.5.60",
+ "promql_utilities",
+ "serde",
+ "serde_json",
+ "serde_yaml",
+]
+
+[[package]]
+name = "slab"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "snafu"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e84b3f4eacbf3a1ce05eac6763b4d629d60cbc94d632e4092c54ade71f1e1a2"
+dependencies = [
+ "snafu-derive",
+]
+
+[[package]]
+name = "snafu-derive"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451"
+dependencies = [
+ "heck 0.5.0",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "snap"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b"
+
+[[package]]
+name = "socket2"
+version = "0.5.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "socket2"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0"
+dependencies = [
+ "libc",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "sparsevec"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68b4a8ce3045f0fe173fb5ae3c6b7dcfbec02bfa650bb8618b2301f52af0134d"
+dependencies = [
+ "num-traits",
+ "packedvec",
+ "serde",
+ "vob",
+]
+
+[[package]]
+name = "sql_utilities"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "parse_datetime",
+ "sqlparser 0.59.0",
+ "tokio-test",
+]
+
+[[package]]
+name = "sqlparser"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7"
+dependencies = [
+ "log",
+ "sqlparser_derive",
+]
+
+[[package]]
+name = "sqlparser"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f"
+dependencies = [
+ "log",
+ "recursive",
+]
+
+[[package]]
+name = "sqlparser_derive"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
+
+[[package]]
+name = "stacker"
+version = "0.1.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d74a23609d509411d10e2176dc2a4346e3b4aea2e7b1869f19fdedbc71c013"
+dependencies = [
+ "cc",
+ "cfg-if",
+ "libc",
+ "psm",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "static_assertions"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
+[[package]]
+name = "strsim"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "structopt"
+version = "0.3.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10"
+dependencies = [
+ "clap 2.34.0",
+ "lazy_static",
+ "structopt-derive",
+]
+
+[[package]]
+name = "structopt-derive"
+version = "0.4.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0"
+dependencies = [
+ "heck 0.3.3",
+ "proc-macro-error",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "strum"
+version = "0.26.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
+dependencies = [
+ "strum_macros",
+]
+
+[[package]]
+name = "strum_macros"
+version = "0.26.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
+dependencies = [
+ "heck 0.5.0",
+ "proc-macro2",
+ "quote",
+ "rustversion",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "subtle"
+version = "2.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
+
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.117"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "sync_wrapper"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
+
+[[package]]
+name = "sync_wrapper"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
+
+[[package]]
+name = "synstructure"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "system-configuration"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7"
+dependencies = [
+ "bitflags 1.3.2",
+ "core-foundation 0.9.4",
+ "system-configuration-sys",
+]
+
+[[package]]
+name = "system-configuration-sys"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "tempfile"
+version = "3.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0"
+dependencies = [
+ "fastrand",
+ "getrandom 0.4.1",
+ "once_cell",
+ "rustix",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "termcolor"
+version = "1.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "textwrap"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
+dependencies = [
+ "unicode-width 0.1.14",
+]
+
+[[package]]
+name = "thin-dst"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db3c46be180f1af9673ebb27bc1235396f61ef6965b3fe0dbb2e624deb604f0e"
+
+[[package]]
+name = "thiserror"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl 1.0.69",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
+dependencies = [
+ "thiserror-impl 2.0.18",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "thread_local"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "thrift"
+version = "0.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09"
+dependencies = [
+ "byteorder",
+ "integer-encoding",
+ "ordered-float",
+]
+
+[[package]]
+name = "time"
+version = "0.3.47"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c"
+dependencies = [
+ "deranged",
+ "itoa",
+ "libc",
+ "num-conv",
+ "num_threads",
+ "powerfmt",
+ "serde_core",
+ "time-core",
+ "time-macros",
+]
+
+[[package]]
+name = "time-core"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca"
+
+[[package]]
+name = "time-macros"
+version = "0.2.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215"
+dependencies = [
+ "num-conv",
+ "time-core",
+]
+
+[[package]]
+name = "tiny-keccak"
+version = "2.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
+dependencies = [
+ "crunchy",
+]
+
+[[package]]
+name = "tinystr"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869"
+dependencies = [
+ "displaydoc",
+ "zerovec",
+]
+
+[[package]]
+name = "tokio"
+version = "1.49.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86"
+dependencies = [
+ "bytes",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "socket2 0.6.2",
+ "tokio-macros",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "tokio-native-tls"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
+dependencies = [
+ "native-tls",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-stream"
+version = "0.1.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70"
+dependencies = [
+ "futures-core",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-test"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f6d24790a10a7af737693a3e8f1d03faef7e6ca0cc99aae5066f533766de545"
+dependencies = [
+ "futures-core",
+ "tokio",
+ "tokio-stream",
+]
+
+[[package]]
+name = "tokio-util"
+version = "0.7.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "futures-sink",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "toml_datetime"
+version = "0.7.5+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "toml_edit"
+version = "0.23.10+spec-1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269"
+dependencies = [
+ "indexmap",
+ "toml_datetime",
+ "toml_parser",
+ "winnow",
+]
+
+[[package]]
+name = "toml_parser"
+version = "1.0.9+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4"
+dependencies = [
+ "winnow",
+]
+
+[[package]]
+name = "tower"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
+dependencies = [
+ "futures-core",
+ "futures-util",
+ "pin-project-lite",
+ "sync_wrapper 1.0.2",
+ "tokio",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tower-layer"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
+
+[[package]]
+name = "tower-service"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
+
+[[package]]
+name = "tracing"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
+dependencies = [
+ "log",
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-appender"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "786d480bce6247ab75f005b14ae1624ad978d3029d9113f0a22fa1ac773faeaf"
+dependencies = [
+ "crossbeam-channel",
+ "thiserror 2.0.18",
+ "time",
+ "tracing-subscriber",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
+dependencies = [
+ "once_cell",
+ "valuable",
+]
+
+[[package]]
+name = "tracing-log"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
+dependencies = [
+ "log",
+ "once_cell",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-subscriber"
+version = "0.3.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e"
+dependencies = [
+ "matchers",
+ "nu-ansi-term",
+ "once_cell",
+ "regex-automata 0.4.14",
+ "sharded-slab",
+ "smallvec",
+ "thread_local",
+ "tracing",
+ "tracing-core",
+ "tracing-log",
+]
+
+[[package]]
+name = "try-lock"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
+
+[[package]]
+name = "twox-hash"
+version = "1.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
+dependencies = [
+ "cfg-if",
+ "static_assertions",
+]
+
+[[package]]
+name = "twox-hash"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c"
+
+[[package]]
+name = "typenum"
+version = "1.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "unicode-segmentation"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
+
+[[package]]
+name = "unicode-width"
+version = "0.1.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
+
+[[package]]
+name = "unicode-width"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
+
+[[package]]
+name = "unsafe-libyaml"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
+
+[[package]]
+name = "url"
+version = "2.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed"
+dependencies = [
+ "form_urlencoded",
+ "idna",
+ "percent-encoding",
+ "serde",
+]
+
+[[package]]
+name = "urlencoding"
+version = "2.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
+
+[[package]]
+name = "utf8_iter"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "uuid"
+version = "1.21.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb"
+dependencies = [
+ "getrandom 0.4.1",
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "valuable"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
+
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
+[[package]]
+name = "vec_map"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
+
+[[package]]
+name = "vergen"
+version = "8.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2990d9ea5967266ea0ccf413a4aa5c42a93dbcfda9cb49a97de6931726b12566"
+dependencies = [
+ "anyhow",
+ "rustversion",
+ "time",
+]
+
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+
+[[package]]
+name = "vob"
+version = "3.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc936b5a7202a703aeaf7ce05e7931db2e0c8126813f97db3e9e06d867b0bb38"
+dependencies = [
+ "num-traits",
+ "serde",
+]
+
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
+[[package]]
+name = "want"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
+dependencies = [
+ "try-lock",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasip2"
+version = "1.0.2+wasi-0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasip3"
+version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.114"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-futures"
+version = "0.4.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8"
+dependencies = [
+ "cfg-if",
+ "futures-util",
+ "js-sys",
+ "once_cell",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.114"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.114"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3"
+dependencies = [
+ "bumpalo",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.114"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "wasm-encoder"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319"
+dependencies = [
+ "leb128fmt",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasm-metadata"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
+dependencies = [
+ "anyhow",
+ "indexmap",
+ "wasm-encoder",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasmparser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
+dependencies = [
+ "bitflags 2.11.0",
+ "hashbrown 0.15.5",
+ "indexmap",
+ "semver",
+]
+
+[[package]]
+name = "web-sys"
+version = "0.3.91"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "windows-core"
+version = "0.62.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-result"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
+dependencies = [
+ "windows-targets 0.48.5",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.5",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
+dependencies = [
+ "windows_aarch64_gnullvm 0.48.5",
+ "windows_aarch64_msvc 0.48.5",
+ "windows_i686_gnu 0.48.5",
+ "windows_i686_msvc 0.48.5",
+ "windows_x86_64_gnu 0.48.5",
+ "windows_x86_64_gnullvm 0.48.5",
+ "windows_x86_64_msvc 0.48.5",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm 0.53.1",
+ "windows_aarch64_msvc 0.53.1",
+ "windows_i686_gnu 0.53.1",
+ "windows_i686_gnullvm 0.53.1",
+ "windows_i686_msvc 0.53.1",
+ "windows_x86_64_gnu 0.53.1",
+ "windows_x86_64_gnullvm 0.53.1",
+ "windows_x86_64_msvc 0.53.1",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
+
+[[package]]
+name = "winnow"
+version = "0.7.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "winreg"
+version = "0.50.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
+dependencies = [
+ "cfg-if",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "wit-bindgen"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
+dependencies = [
+ "wit-bindgen-rust-macro",
+]
+
+[[package]]
+name = "wit-bindgen-core"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc"
+dependencies = [
+ "anyhow",
+ "heck 0.5.0",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-bindgen-rust"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
+dependencies = [
+ "anyhow",
+ "heck 0.5.0",
+ "indexmap",
+ "prettyplease",
+ "syn 2.0.117",
+ "wasm-metadata",
+ "wit-bindgen-core",
+ "wit-component",
+]
+
+[[package]]
+name = "wit-bindgen-rust-macro"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a"
+dependencies = [
+ "anyhow",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+ "wit-bindgen-core",
+ "wit-bindgen-rust",
+]
+
+[[package]]
+name = "wit-component"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
+dependencies = [
+ "anyhow",
+ "bitflags 2.11.0",
+ "indexmap",
+ "log",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "wasm-encoder",
+ "wasm-metadata",
+ "wasmparser",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-parser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
+dependencies = [
+ "anyhow",
+ "id-arena",
+ "indexmap",
+ "log",
+ "semver",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "unicode-xid",
+ "wasmparser",
+]
+
+[[package]]
+name = "writeable"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
+
+[[package]]
+name = "xxhash-rust"
+version = "0.8.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
+
+[[package]]
+name = "xz2"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
+dependencies = [
+ "lzma-sys",
+]
+
+[[package]]
+name = "yoke"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954"
+dependencies = [
+ "stable_deref_trait",
+ "yoke-derive",
+ "zerofrom",
+]
+
+[[package]]
+name = "yoke-derive"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+ "synstructure",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a789c6e490b576db9f7e6b6d661bcc9799f7c0ac8352f56ea20193b2681532e5"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f65c489a7071a749c849713807783f70672b28094011623e200cb86dcb835953"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "zerofrom"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
+dependencies = [
+ "zerofrom-derive",
+]
+
+[[package]]
+name = "zerofrom-derive"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+ "synstructure",
+]
+
+[[package]]
+name = "zerotrie"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
+]
+
+[[package]]
+name = "zerovec"
+version = "0.11.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002"
+dependencies = [
+ "yoke",
+ "zerofrom",
+ "zerovec-derive",
+]
+
+[[package]]
+name = "zerovec-derive"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
+
+[[package]]
+name = "zstd"
+version = "0.13.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
+dependencies = [
+ "zstd-safe",
+]
+
+[[package]]
+name = "zstd-safe"
+version = "7.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059"
+dependencies = [
+ "zstd-sys",
+]
+
+[[package]]
+name = "zstd-sys"
+version = "2.0.13+zstd.1.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa"
+dependencies = [
+ "cc",
+ "pkg-config",
+]
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..f6da817
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,4 @@
+[workspace]
+members = ["sketch-core", "QueryEngineRust"]
+exclude = ["sketchlib-rust"]
+resolver = "2"
diff --git a/CommonDependencies/.gitignore b/CommonDependencies/.gitignore
new file mode 100644
index 0000000..9e7080c
--- /dev/null
+++ b/CommonDependencies/.gitignore
@@ -0,0 +1,11 @@
+**/__pycache__
+**/*.pyc
+**/*.swp
+.DS_Store
+.vscode/
+
+dependencies/py/promql_utilities/promql_utilities.egg-info/
+dependencies/rs/**/target/
+
+tests/**/*.json
+tests/**/target/
diff --git a/CommonDependencies/LICENSE b/CommonDependencies/LICENSE
new file mode 100644
index 0000000..404d657
--- /dev/null
+++ b/CommonDependencies/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 SketchDB
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/__init__.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/ast_matching/PromQLPattern.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/ast_matching/PromQLPattern.py
new file mode 100644
index 0000000..8336949
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/ast_matching/PromQLPattern.py
@@ -0,0 +1,261 @@
+from dataclasses import dataclass
+from typing import Dict, Any
+from promql_parser import (
+    VectorSelector,
+    MatrixSelector,
+    Call,
+    BinaryExpr,
+    AggregateExpr,
+    SubqueryExpr,
+    NumberLiteral,
+    TokenType,
+)
+
+
+@dataclass
+class MatchResult:
+    matches: bool
+    tokens: Dict[str, Any]
+
+    def __bool__(self):
+        return self.matches
+
+
+class PromQLPattern:
+    """Pattern matching implementation (same as before)"""
+
+    def __init__(self, ast_pattern: dict):
+        self.pattern = ast_pattern
+
+    def matches(self, node, debug=False) -> MatchResult:
+        tokens = {}
+        result = self._matches_recursive(node, self.pattern, tokens, debug)
+        return MatchResult(matches=result, tokens=tokens)
+
+    def _node_to_dict(self, node: Any) -> dict:
+        """Convert a promql_parser node to a dictionary representation"""
+        if isinstance(node, VectorSelector):
+            return {
+                "type": "VectorSelector",
+                "name": node.name,
+                "matchers": node.matchers,
+                "at": node.at,  # Include the "@" modifier
+                "ast": node,
+            }
+        elif isinstance(node, MatrixSelector):
+            return {
+                "type": "MatrixSelector",
+                "vector_selector": node.vector_selector,
+                "range": node.range,
+                "ast": node,
+            }
+        elif isinstance(node, Call):
+            return {
+                "type": "Call",
+                "func": {"type": "Function", "name": node.func.name},
+                "args": node.args,
+                "ast": node,
+            }
+        elif isinstance(node, BinaryExpr):
+            return {
+                "type": "BinaryExpr",
+                "op": node.op,
+                "left": node.lhs,
+                "right": node.rhs,
+                "ast": node,
+            }
+        elif isinstance(node, AggregateExpr):
+            return {
+                "type": "AggregateExpr",
+                "op": str(node.op),
+                "expr": node.expr,
+                "param": node.param,
+                "modifier": node.modifier,
+                "ast": node,
+            }
+        elif isinstance(node, SubqueryExpr):
+            return {
+                "type": "SubqueryExpr",
+                "expr": node.expr,
+                "range": node.range,
+                "step": node.step,
+                "offset": node.offset,
+                "ast": node,
+            }
+        elif isinstance(node, NumberLiteral):
+            return {"type": "NumberLiteral", "value": node.val, "ast": node}
+        elif isinstance(node, dict):
+            return node
+        else:
+            raise ValueError(f"Unsupported node type: {type(node)}")
+
+    def _matches_recursive(
+        self, node, pattern: dict, tokens: dict, debug: bool
+    ) -> bool:
+        if pattern is None:
+            return True
+
+        # if not isinstance(node, dict) and not isinstance(node, VectorSelector):
+        #    return False
+
+        node_dict = self._node_to_dict(node)
+
+        if debug:
+            print("After return point 2")
+            print(node_dict)
+            print(pattern)
+            print(tokens)
+
+        if "type" in pattern and pattern["type"] != node_dict["type"]:
+            return False
+
+        if debug:
+            print("After return point 3")
+            print(node_dict)
+            print(pattern)
+            print(tokens)
+
+        # Collect tokens if requested
+        collect_as = pattern.get("_collect_as")
+        if collect_as:
+            if node_dict["type"] == "VectorSelector":
+                tokens[collect_as] = {
+                    "name": node_dict["name"],
+                    "labels": node_dict["matchers"],
+                    "at": node_dict["at"],
+                    "ast": node_dict["ast"],
+                }
+            elif node_dict["type"] == "Call":
+                tokens[collect_as] = {
+                    "name": node_dict["func"]["name"],
+                    "args": node_dict["args"],
+                    "ast": node_dict["ast"],
+                }
+            elif node_dict["type"] == "MatrixSelector":
+                tokens[collect_as] = {
+                    "range": node_dict["range"],
+                    "ast": node_dict["ast"],
+                }
+            elif node_dict["type"] == "SubqueryExpr":
+                tokens[collect_as] = {
+                    "range": node_dict["range"],
+                    "offset": node_dict["offset"],
+                    "step": node_dict["step"],
+                    "ast": node_dict["ast"],
+                }
+            elif node_dict["type"] == "AggregateExpr":
+                tokens[collect_as] = {
+                    "op": node_dict["op"],
+                    "modifier": node_dict["modifier"],
+                    "param": node_dict["param"],
+                    "ast": node_dict["ast"],
+                }
+            elif node_dict["type"] == "NumberLiteral":
+                tokens[collect_as] = node_dict["value"]
+            elif node_dict["type"] == "BinaryExpr":
+                tokens[collect_as] = {
+                    "op": node_dict["op"],
+                    "left": node_dict["left"],
+                    "right": node_dict["right"],
+                    "ast": node_dict["ast"],
+                }
+
+        # Special handling for function arguments collection
+        collect_args_as = pattern.get("_collect_args_as")
+        if collect_args_as:
+            tokens[collect_args_as] = node_dict["args"]
+
+        for key, pattern_value in pattern.items():
+            if key.startswith("_"):  # Skip our special collection directives
+                continue
+
+            if key not in node_dict:
+                if debug:
+                    print(f"Key {key} not found in node_dict")
+                return False
+
+            node_value = node_dict[key]
+
+            if key in ["name", "op"] and isinstance(pattern_value, list):
+                if node_value not in pattern_value:
+                    if debug:
+                        print(f"Failed to match {node_value} with {pattern_value}")
+                    return False
+                continue
+
+            if pattern_value is None:
+                continue
+
+            if isinstance(pattern_value, dict):
+                if not self._matches_recursive(
+                    node_value, pattern_value, tokens, debug
+                ):
+                    if debug:
+                        print(f"(a) Failed to match {node_value} with {pattern_value}")
+                    return False
+            elif isinstance(pattern_value, list):
+                if not isinstance(node_value, list) or len(pattern_value) != len(
+                    node_value
+                ):
+                    if debug:
+                        print(
+                            f"(b) Failed to match list {node_value} with {pattern_value}"
+                        )
+                    return False
+                for p_item, n_item in zip(pattern_value, node_value):
+                    if isinstance(p_item, dict):
+                        if not self._matches_recursive(n_item, p_item, tokens, debug):
+                            if debug:
+                                print(f"(c) Failed to match {n_item} with {p_item}")
+                            return False
+                    elif p_item != n_item:
+                        if debug:
+                            print(f"(d) Failed to match {n_item} with {p_item}")
+                        return False
+            elif isinstance(node_value, TokenType):
+                if pattern_value != str(node_value):
+                    if debug:
+                        print(
+                            f"(e) Failed to match token {node_value} with {pattern_value}"
+                        )
+                    return False
+            elif pattern_value != node_value:
+                if debug:
+                    print(f"(f) Failed to match {node_value} with {pattern_value}")
+                return False
+
+        return True
+
+    # def matches(self, node) -> bool:
+    #    if self.pattern is None:
+    #        return True
+    #
+    #    if not isinstance(node, dict) and not isinstance(node, VectorSelector):
+    #        return False
+    #
+    #    if isinstance(node, VectorSelector):
+    #        node = {
+    #            'type': 'VectorSelector',
+    #            'name': node.name,
+    #            'label_matchers': node.label_matchers
+    #        }
+    #
+    #    if 'type' in self.pattern and self.pattern['type'] != node.get('type'):
+    #        return False
+    #
+    #    for key, pattern_value in self.pattern.items():
+    #        if key not in node:
+    #            return False
+    #
+    #        node_value = node[key]
+    #
+    #        if pattern_value is None:
+    #            continue
+    #
+    #        if isinstance(pattern_value, dict):
+    #            if not self.matches(node_value):
+    #                return False
+    #        elif pattern_value != node_value:
+    #            return False
+    #
+    #    return True
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/ast_matching/PromQLPatternBuilder.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/ast_matching/PromQLPatternBuilder.py
new file mode 100644
index 0000000..cddbbfc
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/ast_matching/PromQLPatternBuilder.py
@@ -0,0 +1,105 @@
+from dataclasses import dataclass
+from typing import List, Optional, Union, Dict
+
+
+@dataclass
+class PromQLPatternBuilder:
+    @staticmethod
+    def any():
+        return None
+
+    @staticmethod
+    def binary_op(op: str, left, right, collect_as: Optional[str] = None):
+        return {
+            "type": "BinaryExpr",
+            "op": op,
+            "left": left,
+            "right": right,
+            "_collect_as": collect_as,  # If set, store the binary operation details
+        }
+
+    @staticmethod
+    def metric(
+        name: Optional[str] = None,
+        labels: Optional[Dict[str, str]] = None,
+        at: Optional[str] = None,
+        collect_as: Optional[str] = None,
+    ):
+        return {
+            "type": "VectorSelector",
+            "name": name,
+            "matchers": labels,
+            "at": at,  # Add the "@" modifier
+            "_collect_as": collect_as,  # If set, store the matched metric details
+        }
+
+    @staticmethod
+    def function(
+        name: Union[str, List[str]],
+        *args,
+        collect_args_as: Optional[str] = None,
+        collect_as: Optional[str] = None,
+    ):
+        if isinstance(name, str):
+            name = [name]
+        return {
+            "type": "Call",
+            "func": {"type": "Function", "name": name},
+            "args": list(args),
+            "_collect_args_as": collect_args_as,  # If set, store the function arguments
+            "_collect_as": collect_as,  # If set, store the function details
+        }
+
+    @staticmethod
+    def subquery(
+        expr, duration: Optional[str] = None, collect_as: Optional[str] = None
+    ):
+        return {
+            "type": "SubqueryExpr",
+            "expr": expr,
+            "range": duration,
+            "step": None,
+            "offset": None,
+            "_collect_as": collect_as,  # If set, store the range details
+        }
+
+    @staticmethod
+    def matrix_selector(
+        vector_selector, range: Optional[str] = None, collect_as: Optional[str] = None
+    ):
+        """Match a matrix selector (range vector selector)"""
+        return {
+            "type": "MatrixSelector",
+            "vector_selector": vector_selector,
+            "range": range,  # e.g., '5m', '1h'
+            "_collect_as": collect_as,
+        }
+
+    @staticmethod
+    def aggregation(
+        op: Union[str, List[str]],
+        expr,
+        param=None,
+        by: Optional[List[str]] = None,
+        without: Optional[List[str]] = None,
+        collect_as: Optional[str] = None,
+    ):
+        if isinstance(op, str):
+            op = [op]
+
+        return {
+            "type": "AggregateExpr",
+            "op": op,
+            "expr": expr,
+            "param": param,
+            "modifier": by or without or None,
+            "_collect_as": collect_as,  # If set, store the aggregation details
+        }
+
+    @staticmethod
+    def number(value: Optional[float] = None, collect_as: Optional[str] = None):
+        return {
+            "type": "NumberLiteral",
+            "value": value,
+            "_collect_as": collect_as,  # If set, store the number value
+        }
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/ast_matching/__init__.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/ast_matching/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/data_model/KeyByLabelNames.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/data_model/KeyByLabelNames.py
new file mode 100644
index 0000000..e869a6d
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/data_model/KeyByLabelNames.py
@@ -0,0 +1,34 @@
+from typing import List
+
+
+class KeyByLabelNames:
+    def __init__(self, keys: List[str]):
+        self.keys = sorted(keys)
+
+    def __repr__(self) -> str:
+        return f"KeyByLabelNames({self.keys})"
+
+    def __hash__(self) -> int:
+        return hash(tuple(self.keys))
+
+    def __eq__(self, other) -> bool:
+        if not isinstance(other, KeyByLabelNames):
+            return False
+        return self.keys == other.keys
+
+    def __add__(self, other: "KeyByLabelNames") -> "KeyByLabelNames":
+        if not isinstance(other, KeyByLabelNames):
+            raise ValueError("Addition is only supported for KeyByLabelNames")
+        return KeyByLabelNames(list(set(self.keys) | set(other.keys)))
+
+    def __sub__(self, other: "KeyByLabelNames") -> "KeyByLabelNames":
+        if not isinstance(other, KeyByLabelNames):
+            raise ValueError("Subtraction is only supported for KeyByLabelNames")
+        return KeyByLabelNames(list(set(self.keys) - set(other.keys)))
+
+    def serialize_to_json(self) -> List[str]:
+        return self.keys
+
+    @staticmethod
+    def deserialize_from_json(data: List[str]) -> "KeyByLabelNames":
+        return KeyByLabelNames(data)
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/data_model/__init__.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/data_model/__init__.py
new file mode 100644
index 0000000..bd2ac97
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/data_model/__init__.py
@@ -0,0 +1 @@
+# data_model module for promql_utilities
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/__init__.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/__init__.py
new file mode 100644
index 0000000..3158bea
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/__init__.py
@@ -0,0 +1 @@
+# query_logics module for promql_utilities
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/enums.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/enums.py
new file mode 100644
index 0000000..4e750ac
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/enums.py
@@ -0,0 +1,36 @@
+from enum import Enum, auto
+
+
+class QueryPatternType(Enum):
+    ONLY_TEMPORAL = auto()
+    ONLY_SPATIAL = auto()
+    ONE_TEMPORAL_ONE_SPATIAL = auto()
+
+
+class QueryTreatmentType(Enum):
+    EXACT = auto()
+    APPROXIMATE = auto()
+
+
+class Statistic(Enum):
+    COUNT = auto()
+    SUM = auto()
+    CARDINALITY = auto()
+    INCREASE = auto()
+    RATE = auto()
+    MIN = auto()
+    MAX = auto()
+    QUANTILE = auto()
+    TOPK = auto()
+
+
+class QueryResultType(Enum):
+    INSTANT_VECTOR = auto()
+
+
+class CleanupPolicy(Enum):
+    """Policy for cleaning up old aggregates from the store."""
+
+    CIRCULAR_BUFFER = "circular_buffer"
+    READ_BASED = "read_based"
+    NO_CLEANUP = "no_cleanup"
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/logics.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/logics.py
new file mode 100644
index 0000000..469d8ea
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/logics.py
@@ -0,0 +1,123 @@
+from typing import Tuple
+
+from promql_utilities.query_logics.enums import QueryTreatmentType, Statistic
+
+# def map_statistic_to_precompute_operators(
+#     statistic: str, treatment_type: QueryTreatmentType
+# ) -> List[Tuple[str, str]]:
+#     # if statistic in ["quantile", "stddev", "stdvar"]:
+#     if statistic == "quantile":
+#         if treatment_type == QueryTreatmentType.EXACT:
+#             raise ValueError(f"Statistic {statistic} cannot be computed exactly")
+#         else:
+#             return [("KLL", "")]
+#         # else:
+#         #     return [("UnivMon", "")]
+#     elif statistic in ["min", "max"]:
+#         if treatment_type == QueryTreatmentType.APPROXIMATE:
+#             return [("KLL", "")]
+#         else:
+#             return [("MinMax", statistic)]
+#     elif statistic in ["sum", "count"]:
+#         if treatment_type == QueryTreatmentType.APPROXIMATE:
+#             return [("CountMinSketch", statistic)]
+#         else:
+#             return [("Sum", statistic)]
+#     elif statistic == "avg":
+#         if treatment_type == QueryTreatmentType.APPROXIMATE:
+#             return [("CountMinSketch", "sum"), ("CountMinSketch", "count")]
+#         else:
+#             return [("Sum", "sum"), ("Sum", "count")]
+#     elif statistic in ["rate", "increase"]:
+#         return [("Increase", "")]
+#     else:
+#         raise NotImplementedError(f"Statistic {statistic} not supported")
+
+
+def map_statistic_to_precompute_operator(
+    statistic: Statistic, treatment_type: QueryTreatmentType
+) -> Tuple[str, str]:
+    # if statistic in ["quantile", "stddev", "stdvar"]:
+    if statistic == Statistic.QUANTILE:
+        if treatment_type == QueryTreatmentType.EXACT:
+            raise ValueError(f"Statistic {statistic} cannot be computed exactly")
+        else:
+            return ("DatasketchesKLL", "")
+            # return ("HydraKLL", "")
+        # else:
+        #     return [("UnivMon", "")]
+    elif statistic == Statistic.TOPK:
+        if treatment_type == QueryTreatmentType.EXACT:
+            raise ValueError(f"Statistic {statistic} cannot be computed exactly")
+        else:
+            return ("CountMinSketchWithHeap", statistic.name.lower())
+    elif statistic in [Statistic.MIN, Statistic.MAX]:
+        if treatment_type == QueryTreatmentType.APPROXIMATE:
+            return ("DatasketchesKLL", "")
+            # return ("HydraKLL", "")
+        else:
+            # NOTE: Change to Multiple<>Accumulator
+            # return ("MinMax", statistic.name.lower())
+            return ("MultipleMinMax", statistic.name.lower())
+    elif statistic in [Statistic.SUM, Statistic.COUNT]:
+        if treatment_type == QueryTreatmentType.APPROXIMATE:
+            return ("CountMinSketch", statistic.name.lower())
+        else:
+            # NOTE: Change to Multiple<>Accumulator
+            # return ("Sum", statistic.name.lower())
+            return ("MultipleSum", statistic.name.lower())
+    # elif statistic == "avg":
+    #     if treatment_type == QueryTreatmentType.APPROXIMATE:
+    #         return [("CountMinSketch", "sum"), ("CountMinSketch", "count")]
+    #     else:
+    #         return [("Sum", "sum"), ("Sum", "count")]
+    elif statistic in [Statistic.RATE, Statistic.INCREASE]:
+        # NOTE: Change to Multiple<>Accumulator
+        # return ("Increase", "")
+        return ("MultipleIncrease", "")
+    else:
+        raise NotImplementedError(f"Statistic {statistic} not supported")
+
+
+def does_precompute_operator_support_subpopulations(
+    statistic: Statistic, precompute_operator: str
+) -> bool:
+    if precompute_operator in ["Increase", "MinMax", "Sum", "DatasketchesKLL"]:
+        return False
+    elif precompute_operator in [
+        "MultipleIncrease",
+        "MultipleMinMax",
+        "MultipleSum",
+        "HydraKLL",
+    ]:
+        # TODO: do we need to check for statistic here? If not, remove the check from CountMinSketch
+        return True
+    elif precompute_operator == "CountMinSketch":
+        return statistic in [Statistic.SUM, Statistic.COUNT]
+    elif (
+        precompute_operator == "CountMinSketchWithHeap" and statistic == Statistic.TOPK
+    ):
+        # topk and bottomk do not support subpopulations!
+        # other usages of CountMinSketchWithHeap will fall through.
+        return False
+    # elif precompute_operator == "UnivMon":
+    #     return statistic in ["sum", "count", "avg"]
+    else:
+        raise NotImplementedError(
+            f"Precompute operator {precompute_operator} not supported"
+        )
+
+
+def get_is_collapsable(temporal_aggregation: str, spatial_aggregation: str) -> bool:
+    if spatial_aggregation == "sum":
+        return temporal_aggregation in [
+            "sum_over_time",
+            "count_over_time",
+            # "increase",
+            # "rate",
+        ]
+    elif spatial_aggregation == "min":
+        return temporal_aggregation == "min_over_time"
+    elif spatial_aggregation == "max":
+        return temporal_aggregation == "max_over_time"
+    return False
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/parsing.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/parsing.py
new file mode 100644
index 0000000..81b710d
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/parsing.py
@@ -0,0 +1,68 @@
+from typing import Tuple, List
+
+from promql_utilities.data_model.KeyByLabelNames import KeyByLabelNames
+from promql_utilities.query_logics.enums import QueryPatternType, Statistic
+
+
+def get_metric_and_spatial_filter(query_pattern_match) -> Tuple[str, str]:
+    metric = query_pattern_match.tokens["metric"]["name"]
+    spatial_filter = ""
+
+    if query_pattern_match.tokens["metric"]["labels"].matchers:
+        spatial_filter = (
+            query_pattern_match.tokens["metric"]["ast"]
+            .prettify()
+            .split("{")[1]
+            .split("}")[0]
+        )
+        metric = metric.split("{")[0]
+
+    return metric, spatial_filter
+
+
+def get_statistics_to_compute(
+    query_pattern_type, query_pattern_match
+) -> List[Statistic]:
+    statistic_to_compute = None
+
+    if (
+        query_pattern_type == QueryPatternType.ONLY_TEMPORAL
+        or query_pattern_type == QueryPatternType.ONE_TEMPORAL_ONE_SPATIAL
+    ):
+        statistic_to_compute = query_pattern_match.tokens["function"]["name"].split(
+            "_"
+        )[0]
+        # template_config.tumblingWindowSize = self.t_repeat
+    elif query_pattern_type == QueryPatternType.ONLY_SPATIAL:
+        statistic_to_compute = query_pattern_match.tokens["aggregation"]["op"]
+        # template_config.tumblingWindowSize = self.prometheus_scrape_interval
+    else:
+        raise ValueError("Invalid query pattern type")
+
+    if statistic_to_compute == "avg":
+        return [Statistic.SUM, Statistic.COUNT]
+    else:
+        # get enum value from string
+        return [Statistic[statistic_to_compute.upper()]]
+
+
+def get_spatial_aggregation_output_labels(
+    query_pattern_match, all_labels: KeyByLabelNames
+) -> KeyByLabelNames:
+    aggregation_modifier = query_pattern_match.tokens["aggregation"]["modifier"]
+    aggregation_modifier_labels = None
+
+    # Fixing issue https://github.com/ProjectASAP/asap-internal/issues/24
+    if aggregation_modifier is None:
+        return KeyByLabelNames([])
+    
+    if aggregation_modifier.type == aggregation_modifier.type.By:
+        aggregation_modifier_labels = KeyByLabelNames(aggregation_modifier.labels)
+    elif aggregation_modifier.type == aggregation_modifier.type.Without:
+        aggregation_modifier_labels = all_labels - KeyByLabelNames(
+            aggregation_modifier.labels
+        )
+    else:
+        raise ValueError("Invalid aggregation modifier")
+
+    return aggregation_modifier_labels
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/__init__.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/__init__.py
new file mode 100644
index 0000000..97c74ca
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/__init__.py
@@ -0,0 +1 @@
+# query_results module for promql_utilities
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/classes.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/classes.py
new file mode 100644
index 0000000..7f43d74
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/classes.py
@@ -0,0 +1,143 @@
+import numpy as np
+from typing import List, Dict, Optional, Set
+
+
+class TimeSeries:
+    def __init__(self, key: frozenset, values: List[Optional[float]]):
+        self.key = key
+        self.values = np.array(values)
+
+
+class QueryResult:
+    def __init__(
+        self,
+        server_name: str,
+        query: str,
+        query_idx: int,
+        repetition_idx: int,
+        result: Optional[List[Dict]],
+        latency: Optional[float],
+        cumulative_latency: Optional[float],
+        query_group_idx: int = 0,
+        raw_text_result: Optional[str] = None,
+    ):
+        self.server_name = server_name
+        self.query = query
+        self.query_idx = query_idx
+        self.repetition_idx = repetition_idx
+        self.query_group_idx = query_group_idx
+        self.latency = latency
+        self.cumulative_latency = cumulative_latency
+        self.raw_text_result = raw_text_result
+
+        self.result: Optional[Dict[frozenset, float]] = None
+        if result:
+            self.result = {
+                frozenset(result_per_key["metric"].items()): float(
+                    result_per_key["value"][1]
+                )
+                for result_per_key in result
+            }
+
+
+class QueryResultAcrossTime:
+    def __init__(self, server_name, query, query_idx, num_repetitions):
+        self.server_name = server_name
+        self.query = query
+        self.query_idx = query_idx
+        self.num_repetitions = num_repetitions
+        self.query_results: List[QueryResult] = []
+
+    def add_result(self, query_result: QueryResult):
+        self.query_results.append(query_result)
+
+    def get_all_timeseries(self) -> Dict[frozenset, TimeSeries]:
+        keys: Set[frozenset] = set()
+        for query_result in self.query_results:
+            if query_result.result:
+                keys.update(query_result.result.keys())
+
+        assert len(self.query_results) == self.num_repetitions
+        ret: Dict[frozenset, TimeSeries] = {}
+        intermediate_ret: Dict[frozenset, List[Optional[float]]] = {
+            k: [None for _ in range(self.num_repetitions)] for k in keys
+        }
+
+        for k in keys:
+            for repetition_idx, result in enumerate(self.query_results):
+                if result.result:
+                    intermediate_ret[k][repetition_idx] = result.result[k]
+
+            ret[k] = TimeSeries(k, intermediate_ret[k])
+
+        return ret
+
+
+class LatencyResult:
+    """Represents latency data for a single query execution."""
+
+    def __init__(
+        self,
+        server_name: str,
+        query: str,
+        query_idx: int,
+        repetition_idx: int,
+        latency: Optional[float],
+        cumulative_latency: Optional[float],
+        query_group_idx: int = 0,
+    ):
+        self.server_name = server_name
+        self.query = query
+        self.query_idx = query_idx
+        self.repetition_idx = repetition_idx
+        self.query_group_idx = query_group_idx
+        self.latency = latency
+        self.cumulative_latency = cumulative_latency
+
+
+class LatencyResultAcrossTime:
+    """Represents latency data for a query across multiple repetitions."""
+
+    def __init__(
+        self, server_name: str, query: str, query_idx: int, num_repetitions: int
+    ):
+        self.server_name = server_name
+        self.query = query
+        self.query_idx = query_idx
+        self.num_repetitions = num_repetitions
+        self.latency_results: List[LatencyResult] = []
+
+    def add_latency_result(self, latency_result: LatencyResult):
+        """Add a latency result for a specific repetition."""
+        self.latency_results.append(latency_result)
+
+    def get_latencies(self) -> List[Optional[float]]:
+        """Get list of latencies across all repetitions."""
+        return [lr.latency for lr in self.latency_results]
+
+    def get_cumulative_latencies(self) -> List[Optional[float]]:
+        """Get list of cumulative latencies across all repetitions."""
+        return [lr.cumulative_latency for lr in self.latency_results]
+
+    @classmethod
+    def from_query_result_across_time(
+        cls, qrat: "QueryResultAcrossTime"
+    ) -> "LatencyResultAcrossTime":
+        """Create LatencyResultAcrossTime from existing QueryResultAcrossTime."""
+        latency_result_across_time = cls(
+            qrat.server_name, qrat.query, qrat.query_idx, qrat.num_repetitions
+        )
+
+        for query_result in qrat.query_results:
+            latency_result = LatencyResult(
+                server_name=query_result.server_name,
+                query=query_result.query,
+                query_idx=query_result.query_idx,
+                repetition_idx=query_result.repetition_idx,
+                latency=query_result.latency,
+                cumulative_latency=query_result.cumulative_latency,
+                query_group_idx=query_result.query_group_idx,
+            )
+            latency_result_across_time.add_latency_result(latency_result)
+
+        return latency_result_across_time
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/__init__.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/__init__.py
new file mode 100644
index 0000000..ad96e65
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/__init__.py
@@ -0,0 +1,13 @@
+"""
+Streaming serialization interfaces for query results.
+
+This module provides multiple serialization formats for query results:
+- JSONL + gzip: Compressed streaming format, human-readable
+- Parquet: Columnar format for analytics, high compression
+- Backward compatibility with pickle format
+"""
+
+from .base import ResultsSerializer
+from .factory import SerializerFactory, get_available_formats
+
+__all__ = ["ResultsSerializer", "SerializerFactory", "get_available_formats"]
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/base.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/base.py
new file mode 100644
index 0000000..55ef371
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/base.py
@@ -0,0 +1,84 @@
+"""
+Abstract base class for results serializers.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict
+from ..classes import QueryResultAcrossTime, LatencyResultAcrossTime, QueryResult
+
+
+class ResultsSerializer(ABC):
+    """Abstract interface for query results serialization."""
+
+    def __init__(self, output_dir: str):
+        """Initialize serializer with output directory.
+
+        Args:
+            output_dir: Directory where results will be written
+        """
+        self.output_dir = output_dir
+
+    @abstractmethod
+    def write_results(
+        self, results_across_servers: Dict[str, Dict[int, QueryResultAcrossTime]]
+    ) -> None:
+        """Write query results to storage.
+
+        Args:
+            results_across_servers: Nested dict of server -> query_idx -> QueryResultAcrossTime
+        """
+        pass
+
+    @abstractmethod
+    def read_results(self) -> Dict[str, Dict[int, QueryResultAcrossTime]]:
+        """Read query results from storage.
+
+        Returns:
+            Nested dict of server -> query_idx -> QueryResultAcrossTime
+        """
+        pass
+
+    @abstractmethod
+    def exists(self) -> bool:
+        """Check if serialized results exist.
+
+        Returns:
+            True if results exist and can be read
+        """
+        pass
+
+    @abstractmethod
+    def streaming_write_start(self, metadata: Dict[str, Any]) -> None:
+        """Initialize streaming write session with experiment metadata.
+
+        Args:
+            metadata: Experiment metadata containing queries, servers, repetitions, etc.
+        """
+        pass
+
+    @abstractmethod
+    def streaming_write_result(self, query_result: QueryResult) -> None:
+        """Write a single query result incrementally.
+
+        Args:
+            query_result: Individual query result to write
+        """
+        pass
+
+    @abstractmethod
+    def streaming_write_end(self) -> None:
+        """Finalize streaming write session and close any open resources."""
+        pass
+
+    def cleanup(self) -> None:
+        """Clean up any resources. Override if needed."""
+        pass
+
+    @abstractmethod
+    def read_latencies_only(self) -> Dict[str, Dict[int, LatencyResultAcrossTime]]:
+        """Read only latency data without loading full results.
+
+        Returns:
+            Nested dict of server -> query_idx -> LatencyResultAcrossTime
+        """
+        pass
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/factory.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/factory.py
new file mode 100644
index 0000000..f9b22d6
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/factory.py
@@ -0,0 +1,121 @@
+"""
+Factory for creating serializers with automatic format detection.
+"""
+
+import os
+import logging
+from typing import List, Optional
+from .base import ResultsSerializer
+
+
+logger = logging.getLogger(__name__)
+
+
+def get_available_formats() -> List[str]:
+    """Get list of available serialization formats.
+
+    Returns:
+        List of format names that can be used
+    """
+    return ["jsonl", "parquet"]
+
+
+class SerializerFactory:
+    """Factory for creating results serializers."""
+
+    @staticmethod
+    def create(format_name: str, output_dir: str, **kwargs) -> ResultsSerializer:
+        """Create a serializer for the specified format.
+
+        Args:
+            format_name: Format name ('jsonl', 'parquet', or 'auto')
+            output_dir: Directory for output files
+            **kwargs: Additional arguments passed to serializer
+
+        Returns:
+            ResultsSerializer instance
+
+        Raises:
+            ValueError: If format is not supported
+            ImportError: If required dependencies are missing
+        """
+        if format_name == "auto":
+            format_name = SerializerFactory._detect_format(output_dir)
+
+        if format_name == "jsonl":
+            from .jsonl_serializer import JSONLResultsSerializer
+
+            return JSONLResultsSerializer(output_dir, **kwargs)
+
+        elif format_name == "parquet":
+            from .parquet_serializer import ParquetResultsSerializer
+
+            return ParquetResultsSerializer(output_dir, **kwargs)
+
+        else:
+            available = get_available_formats()
+            raise ValueError(
+                f"Unsupported format '{format_name}'. Available formats: {available}"
+            )
+
+    @staticmethod
+    def _detect_format(output_dir: str) -> str:
+        """Auto-detect format based on existing files.
+
+        Args:
+            output_dir: Directory to check for existing files
+
+        Returns:
+            Detected format name, defaults to 'jsonl' if none found
+        """
+        if not os.path.exists(output_dir):
+            return "jsonl"  # Default for new directories
+
+        # Check for Parquet files first (they indicate intent for analytics)
+        parquet_files = ["query_results.parquet", "query_latencies.parquet"]
+
+        if any(os.path.exists(os.path.join(output_dir, f)) for f in parquet_files):
+            return "parquet"
+
+        # Check for JSONL files
+        jsonl_files = [
+            "query_results.jsonl.gz",
+            "query_results.jsonl",
+            "query_latencies.jsonl.gz",
+            "query_latencies.jsonl",
+        ]
+
+        if any(os.path.exists(os.path.join(output_dir, f)) for f in jsonl_files):
+            return "jsonl"
+
+        # Default to JSONL for new directories
+        logger.debug(
+            f"No existing format detected in {output_dir}, defaulting to JSONL"
+        )
+        return "jsonl"
+
+    @staticmethod
+    def create_from_existing(output_dir: str) -> Optional[ResultsSerializer]:
+        """Create serializer by detecting format from existing files.
+
+        Args:
+            output_dir: Directory containing existing results
+
+        Returns:
+            ResultsSerializer instance, or None if no results found
+        """
+        if not os.path.exists(output_dir):
+            return None
+
+        detected_format = SerializerFactory._detect_format(output_dir)
+
+        try:
+            serializer = SerializerFactory.create(detected_format, output_dir)
+            if serializer.exists():
+                return serializer
+        except (ValueError, ImportError) as e:
+            logger.warning(
+                f"Could not create serializer for detected format {detected_format}: {e}"
+            )
+
+        return None
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/jsonl_serializer.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/jsonl_serializer.py
new file mode 100644
index 0000000..1a07595
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/jsonl_serializer.py
@@ -0,0 +1,499 @@
+"""
+JSONL+gzip streaming serializer for query results.
+"""
+
+import json
+import gzip
+import os
+import threading
+from typing import Any, Dict, Iterator
+from .base import ResultsSerializer
+from ..classes import (
+    QueryResult,
+    QueryResultAcrossTime,
+    LatencyResult,
+    LatencyResultAcrossTime,
+)
+
+
+class JSONLResultsSerializer(ResultsSerializer):
+    """JSONL+gzip streaming serializer for query results."""
+
+    def __init__(self, output_dir: str, use_compression: bool = True):
+        """Initialize JSONL serializer.
+
+        Args:
+            output_dir: Directory for output files
+            use_compression: Whether to use gzip compression
+        """
+        super().__init__(output_dir)
+        self.use_compression = use_compression
+        self.results_file = os.path.join(output_dir, "query_results.jsonl")
+        self.latency_file = os.path.join(output_dir, "query_latencies.jsonl")
+        self.metadata_file = os.path.join(output_dir, "experiment_metadata.json")
+
+        if use_compression:
+            self.results_file += ".gz"
+            self.latency_file += ".gz"
+
+        os.makedirs(output_dir, exist_ok=True)
+
+        # Streaming write state
+        self._streaming_results_file = None
+        self._streaming_latency_file = None
+        self._streaming_metadata = None
+
+        # Thread safety for streaming writes
+        self._write_lock = threading.Lock()
+
+    def _open_for_write(self, filepath: str):
+        """Open file for writing with optional compression."""
+        if self.use_compression:
+            return gzip.open(filepath, "wt", encoding="utf-8")
+        return open(filepath, "w", encoding="utf-8")
+
+    def _open_for_read(self, filepath: str):
+        """Open file for reading with optional compression."""
+        if self.use_compression:
+            return gzip.open(filepath, "rt", encoding="utf-8")
+        return open(filepath, "r", encoding="utf-8")
+
+    def write_results(
+        self, results_across_servers: Dict[str, Dict[int, QueryResultAcrossTime]]
+    ) -> None:
+        """Write query results to JSONL files.
+
+        Args:
+            results_across_servers: Nested dict of server -> query_idx -> QueryResultAcrossTime
+        """
+        # Write metadata
+        self._write_metadata(results_across_servers)
+
+        # Write results and latencies
+        with self._open_for_write(self.results_file) as results_f, self._open_for_write(
+            self.latency_file
+        ) as latency_f:
+            for server_name, server_results in results_across_servers.items():
+                for query_idx, query_result_across_time in server_results.items():
+                    for query_result in query_result_across_time.query_results:
+                        # Write result record
+                        if query_result.result:
+                            for frozenset_key, value in query_result.result.items():
+                                result_record = {
+                                    "server_name": server_name,
+                                    "query": query_result.query,
+                                    "query_idx": query_idx,
+                                    "repetition_idx": query_result.repetition_idx,
+                                    "result_labels": self._serialize_frozenset_key(
+                                        frozenset_key
+                                    ),
+                                    "result_value": value,
+                                }
+                                results_f.write(json.dumps(result_record) + "\n")
+
+                        # Write latency record
+                        latency_record = {
+                            "server_name": server_name,
+                            "query_idx": query_idx,
+                            "repetition_idx": query_result.repetition_idx,
+                            "latency": query_result.latency,
+                            "cumulative_latency": query_result.cumulative_latency,
+                        }
+                        latency_f.write(json.dumps(latency_record) + "\n")
+
+    def read_results(self) -> Dict[str, Dict[int, QueryResultAcrossTime]]:
+        """Read query results from JSONL files.
+
+        Returns:
+            Nested dict of server -> query_idx -> QueryResultAcrossTime
+        """
+        if not self.exists():
+            raise FileNotFoundError(f"No results found in {self.output_dir}")
+
+        # Read metadata
+        metadata = self._read_metadata()
+
+        # Handle both old and new metadata formats
+        if "query_groups" in metadata:
+            # New format with query groups
+            all_queries = []
+            query_idx_to_repetitions = {}
+            global_query_idx = 0
+
+            for qg in metadata["query_groups"]:
+                for query in qg["queries"]:
+                    all_queries.append(query)
+                    query_idx_to_repetitions[global_query_idx] = qg["repetitions"]
+                    global_query_idx += 1
+
+            servers = metadata["servers"]
+        else:
+            # Old format (backward compatible)
+            all_queries = metadata["queries"]
+            servers = metadata["servers"]
+            query_idx_to_repetitions = {
+                i: metadata["repetitions"] for i in range(len(all_queries))
+            }
+
+        # Initialize nested structure
+        results = {}
+        for server in servers:
+            results[server] = {}
+            for query_idx, query in enumerate(all_queries):
+                results[server][query_idx] = QueryResultAcrossTime(
+                    server,
+                    query,
+                    query_idx,
+                    query_idx_to_repetitions[query_idx],
+                )
+
+        # Read latencies into lookup table
+        latencies = {}
+        if os.path.exists(self.latency_file):
+            with self._open_for_read(self.latency_file) as f:
+                for line in f:
+                    line = line.strip()
+                    if line:
+                        latency_record = json.loads(line)
+                        key = (
+                            latency_record["server_name"],
+                            latency_record["query_idx"],
+                            latency_record["repetition_idx"],
+                        )
+                        latencies[key] = (
+                            latency_record["latency"],
+                            latency_record["cumulative_latency"],
+                        )
+
+        # Read results and reconstruct QueryResult objects
+        query_results = {}  # (server, query_idx, repetition_idx) -> partial QueryResult
+
+        if os.path.exists(self.results_file):
+            with self._open_for_read(self.results_file) as f:
+                for line in f:
+                    line = line.strip()
+                    if line:
+                        result_record = json.loads(line)
+
+                        key = (
+                            result_record["server_name"],
+                            result_record["query_idx"],
+                            result_record["repetition_idx"],
+                        )
+
+                        # Check if this is a raw_text_result (SQL/ClickHouse) record
+                        is_raw_text = "raw_text_result" in result_record
+
+                        # Initialize QueryResult if not exists
+                        if key not in query_results:
+                            latency, cumulative_latency = latencies.get(
+                                key, (None, None)
+                            )
+                            query_results[key] = QueryResult(
+                                server_name=result_record["server_name"],
+                                query=result_record["query"],
+                                query_idx=result_record["query_idx"],
+                                repetition_idx=result_record["repetition_idx"],
+                                result=None,  # Will be populated below for Prometheus
+                                latency=latency,
+                                cumulative_latency=cumulative_latency,
+                                query_group_idx=result_record.get("query_group_idx", 0),
+                                raw_text_result=None,  # Will be populated for SQL
+                            )
+                            if not is_raw_text:
+                                query_results[key].result = {}
+
+                        if is_raw_text:
+                            # SQL/ClickHouse raw text result
+                            query_results[key].raw_text_result = result_record[
+                                "raw_text_result"
+                            ]
+                        else:
+                            # Prometheus-style result
+                            frozenset_key = self._deserialize_frozenset_key(
+                                result_record["result_labels"]
+                            )
+                            query_results[key].result[frozenset_key] = result_record[
+                                "result_value"
+                            ]
+
+        # Add QueryResult objects to the nested structure
+        for (
+            server_name,
+            query_idx,
+            repetition_idx,
+        ), query_result in query_results.items():
+            results[server_name][query_idx].add_result(query_result)
+
+        # Handle cases where we have latencies but no results
+        for (server_name, query_idx, repetition_idx), (
+            latency,
+            cumulative_latency,
+        ) in latencies.items():
+            if (server_name, query_idx, repetition_idx) not in query_results:
+                # Create empty QueryResult with just latency data
+                empty_result = QueryResult(
+                    server_name=server_name,
+                    query=all_queries[query_idx],
+                    query_idx=query_idx,
+                    repetition_idx=repetition_idx,
+                    result=None,
+                    latency=latency,
+                    cumulative_latency=cumulative_latency,
+                    query_group_idx=0,  # Default for backward compatibility
+                )
+                results[server_name][query_idx].add_result(empty_result)
+
+        return results
+
+    def exists(self) -> bool:
+        """Check if serialized results exist.
+
+        Returns:
+            True if results exist and can be read
+        """
+        return os.path.exists(self.metadata_file) and (
+            os.path.exists(self.results_file) or os.path.exists(self.latency_file)
+        )
+
+    def streaming_write_start(self, metadata: Dict[str, Any]) -> None:
+        """Initialize streaming write session with experiment metadata.
+
+        Args:
+            metadata: Experiment metadata containing queries, servers, repetitions, etc.
+        """
+        if (
+            self._streaming_results_file is not None
+            or self._streaming_latency_file is not None
+        ):
+            raise RuntimeError("Streaming write session already active")
+
+        self._streaming_metadata = metadata
+        self._streaming_results_file = self._open_for_write(self.results_file)
+        self._streaming_latency_file = self._open_for_write(self.latency_file)
+
+    def streaming_write_result(self, query_result: QueryResult) -> None:
+        """Write a single query result incrementally.
+
+        Args:
+            query_result: Individual query result to write
+        """
+        if self._streaming_results_file is None or self._streaming_latency_file is None:
+            raise RuntimeError("Streaming write session not started")
+
+        with self._write_lock:
+            # Write result records - handle both Prometheus (result) and SQL (raw_text_result)
+            if query_result.result:
+                # Prometheus-style normalized results
+                for frozenset_key, value in query_result.result.items():
+                    result_record = {
+                        "query_group_idx": query_result.query_group_idx,
+                        "server_name": query_result.server_name,
+                        "query": query_result.query,
+                        "query_idx": query_result.query_idx,
+                        "repetition_idx": query_result.repetition_idx,
+                        "result_labels": self._serialize_frozenset_key(frozenset_key),
+                        "result_value": value,
+                    }
+                    self._streaming_results_file.write(json.dumps(result_record) + "\n")
+            elif query_result.raw_text_result is not None:
+                # SQL/ClickHouse raw text result
+                result_record = {
+                    "query_group_idx": query_result.query_group_idx,
+                    "server_name": query_result.server_name,
+                    "query": query_result.query,
+                    "query_idx": query_result.query_idx,
+                    "repetition_idx": query_result.repetition_idx,
+                    "raw_text_result": query_result.raw_text_result,
+                }
+                self._streaming_results_file.write(json.dumps(result_record) + "\n")
+
+            # Write latency record
+            latency_record = {
+                "query_group_idx": query_result.query_group_idx,
+                "server_name": query_result.server_name,
+                "query_idx": query_result.query_idx,
+                "repetition_idx": query_result.repetition_idx,
+                "latency": query_result.latency,
+                "cumulative_latency": query_result.cumulative_latency,
+            }
+            self._streaming_latency_file.write(json.dumps(latency_record) + "\n")
+
+    def streaming_write_end(self) -> None:
+        """Finalize streaming write session and close any open resources."""
+        if self._streaming_results_file is not None:
+            self._streaming_results_file.close()
+            self._streaming_results_file = None
+
+        if self._streaming_latency_file is not None:
+            self._streaming_latency_file.close()
+            self._streaming_latency_file = None
+
+        # Write metadata at the end
+        if self._streaming_metadata is not None:
+            with open(self.metadata_file, "w") as f:
+                json.dump(self._streaming_metadata, f, indent=2)
+            self._streaming_metadata = None
+
+    def stream_results(self) -> Iterator[Dict]:
+        """Stream read query results one record at a time.
+
+        Yields:
+            Dict containing result record data
+        """
+        if not os.path.exists(self.results_file):
+            return
+
+        with self._open_for_read(self.results_file) as f:
+            for line in f:
+                line = line.strip()
+                if line:
+                    yield json.loads(line)
+
+    def stream_latencies(self) -> Iterator[Dict]:
+        """Stream read latency data one record at a time.
+
+        Yields:
+            Dict containing latency record data
+        """
+        if not os.path.exists(self.latency_file):
+            return
+
+        with self._open_for_read(self.latency_file) as f:
+            for line in f:
+                line = line.strip()
+                if line:
+                    yield json.loads(line)
+
+    def _write_metadata(
+        self, results_across_servers: Dict[str, Dict[int, QueryResultAcrossTime]]
+    ):
+        """Write experiment metadata."""
+        if not results_across_servers:
+            return
+
+        servers = list(results_across_servers.keys())
+        queries = []
+        repetitions = 0
+
+        if servers:
+            first_server = servers[0]
+            if results_across_servers[first_server]:
+                query_indices = sorted(results_across_servers[first_server].keys())
+                queries = [
+                    results_across_servers[first_server][i].query for i in query_indices
+                ]
+                if query_indices:
+                    repetitions = results_across_servers[first_server][
+                        query_indices[0]
+                    ].num_repetitions
+
+        metadata = {
+            "queries": queries,
+            "servers": servers,
+            "repetitions": repetitions,
+            "total_queries": len(queries),
+        }
+
+        with open(self.metadata_file, "w") as f:
+            json.dump(metadata, f, indent=2)
+
+    def _read_metadata(self) -> Dict:
+        """Read experiment metadata."""
+        with open(self.metadata_file, "r") as f:
+            return json.load(f)
+
+    def _serialize_frozenset_key(self, frozenset_key: frozenset) -> str:
+        """Convert frozenset key to JSON string.
+
+        Args:
+            frozenset_key: frozenset of (key, value) tuples
+
+        Returns:
+            JSON string representation
+        """
+        # Convert to dict and serialize as JSON with sorted keys for consistency
+        labels_dict = dict(frozenset_key)
+        return json.dumps(labels_dict, sort_keys=True)
+
+    def _deserialize_frozenset_key(self, json_str: str) -> frozenset:
+        """Convert JSON string back to frozenset key.
+
+        Args:
+            json_str: JSON string representation
+
+        Returns:
+            frozenset of (key, value) tuples
+        """
+        labels_dict = json.loads(json_str)
+        return frozenset(labels_dict.items())
+
+    def read_latencies_only(self) -> Dict[str, Dict[int, LatencyResultAcrossTime]]:
+        """Read only latency data without loading full results.
+
+        Returns:
+            Nested dict of server -> query_idx -> LatencyResultAcrossTime
+        """
+        if not self.exists():
+            raise FileNotFoundError(f"No results found in {self.output_dir}")
+
+        # Read metadata
+        metadata = self._read_metadata()
+
+        # Handle both old and new metadata formats
+        if "query_groups" in metadata:
+            # New format with query groups
+            all_queries = []
+            query_idx_to_repetitions = {}
+            global_query_idx = 0
+
+            for qg in metadata["query_groups"]:
+                for query in qg["queries"]:
+                    all_queries.append(query)
+                    query_idx_to_repetitions[global_query_idx] = qg["repetitions"]
+                    global_query_idx += 1
+
+            servers = metadata["servers"]
+        else:
+            # Old format (backward compatible)
+            all_queries = metadata["queries"]
+            servers = metadata["servers"]
+            query_idx_to_repetitions = {
+                i: metadata["repetitions"] for i in range(len(all_queries))
+            }
+
+        # Initialize nested structure
+        latencies = {}
+        for server in servers:
+            latencies[server] = {}
+            for query_idx, query in enumerate(all_queries):
+                latencies[server][query_idx] = LatencyResultAcrossTime(
+                    server,
+                    query,
+                    query_idx,
+                    query_idx_to_repetitions[query_idx],
+                )
+
+        # Read only latency data
+        if os.path.exists(self.latency_file):
+            with self._open_for_read(self.latency_file) as f:
+                for line in f:
+                    line = line.strip()
+                    if line:
+                        latency_record = json.loads(line)
+
+                        latency_result = LatencyResult(
+                            server_name=latency_record["server_name"],
+                            query=all_queries[latency_record["query_idx"]],
+                            query_idx=latency_record["query_idx"],
+                            repetition_idx=latency_record["repetition_idx"],
+                            latency=latency_record["latency"],
+                            cumulative_latency=latency_record["cumulative_latency"],
+                            query_group_idx=latency_record.get("query_group_idx", 0),
+                        )
+
+                        latencies[latency_record["server_name"]][
+                            latency_record["query_idx"]
+                        ].add_latency_result(latency_result)
+
+        return latencies
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/parquet_serializer.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/parquet_serializer.py
new file mode 100644
index 0000000..faadc3d
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/parquet_serializer.py
@@ -0,0 +1,505 @@
+"""
+Parquet serializer for query results using JSON columns for labels.
+"""
+
+import json
+import os
+import threading
+from typing import Any, Dict, List, Optional
+import pandas as pd
+import pyarrow as pa
+import pyarrow.parquet as pq
+from .base import ResultsSerializer
+from ..classes import (
+    QueryResult,
+    QueryResultAcrossTime,
+    LatencyResult,
+    LatencyResultAcrossTime,
+)
+
+
+class ParquetResultsSerializer(ResultsSerializer):
+    """Parquet serializer for query results with JSON column for labels."""
+
+    def __init__(
+        self, output_dir: str, compression: str = "snappy", batch_size: int = 1000
+    ):
+        """Initialize Parquet serializer.
+
+        Args:
+            output_dir: Directory for output files
+            compression: Compression algorithm ('snappy', 'gzip', 'lz4', etc.)
+            batch_size: Number of records to batch before writing to parquet
+        """
+        super().__init__(output_dir)
+        self.compression = compression
+        self.batch_size = batch_size
+        self.results_file = os.path.join(output_dir, "query_results.parquet")
+        self.latency_file = os.path.join(output_dir, "query_latencies.parquet")
+        self.metadata_file = os.path.join(output_dir, "experiment_metadata.json")
+
+        os.makedirs(output_dir, exist_ok=True)
+
+        # Streaming write state
+        self._streaming_results_writer: Optional[pq.ParquetWriter] = None
+        self._streaming_latency_writer: Optional[pq.ParquetWriter] = None
+        self._results_batch: List[Dict] = []
+        self._latency_batch: List[Dict] = []
+        self._streaming_metadata = None
+
+        # Define schemas for streaming
+        self._results_schema = pa.schema(
+            [
+                ("query_group_idx", pa.int64()),
+                ("server_name", pa.string()),
+                ("query", pa.string()),
+                ("query_idx", pa.int64()),
+                ("repetition_idx", pa.int64()),
+                ("result_labels", pa.string()),
+                ("result_value", pa.float64()),
+            ]
+        )
+
+        self._latency_schema = pa.schema(
+            [
+                ("query_group_idx", pa.int64()),
+                ("server_name", pa.string()),
+                ("query_idx", pa.int64()),
+                ("repetition_idx", pa.int64()),
+                ("latency", pa.float64()),
+                ("cumulative_latency", pa.float64()),
+            ]
+        )
+
+        # Thread safety for streaming writes
+        self._write_lock = threading.Lock()
+
+    def write_results(
+        self, results_across_servers: Dict[str, Dict[int, QueryResultAcrossTime]]
+    ) -> None:
+        """Write query results to Parquet files.
+
+        Args:
+            results_across_servers: Nested dict of server -> query_idx -> QueryResultAcrossTime
+        """
+        # Write metadata
+        self._write_metadata(results_across_servers)
+
+        results_rows = []
+        latency_rows = []
+
+        for server_name, server_results in results_across_servers.items():
+            for query_idx, query_result_across_time in server_results.items():
+                query = query_result_across_time.query
+
+                for query_result in query_result_across_time.query_results:
+                    # Process query results
+                    if query_result.result:
+                        for frozenset_key, value in query_result.result.items():
+                            # Convert frozenset to JSON string
+                            labels_dict = dict(frozenset_key)
+                            labels_json = json.dumps(labels_dict, sort_keys=True)
+
+                            results_rows.append(
+                                {
+                                    "server_name": server_name,
+                                    "query": query,
+                                    "query_idx": query_idx,
+                                    "repetition_idx": query_result.repetition_idx,
+                                    "result_labels": labels_json,
+                                    "result_value": value,
+                                }
+                            )
+
+                    # Process latency data separately
+                    latency_rows.append(
+                        {
+                            "server_name": server_name,
+                            "query_idx": query_idx,
+                            "repetition_idx": query_result.repetition_idx,
+                            "latency": query_result.latency,
+                            "cumulative_latency": query_result.cumulative_latency,
+                        }
+                    )
+
+        # Write results DataFrame
+        if results_rows:
+            results_df = pd.DataFrame(results_rows)
+            results_df.to_parquet(
+                self.results_file, compression=self.compression, index=False
+            )
+
+        # Write latencies DataFrame
+        if latency_rows:
+            latency_df = pd.DataFrame(latency_rows)
+            latency_df.to_parquet(
+                self.latency_file, compression=self.compression, index=False
+            )
+
+    def read_results(self) -> Dict[str, Dict[int, QueryResultAcrossTime]]:
+        """Read query results from Parquet files.
+
+        Returns:
+            Nested dict of server -> query_idx -> QueryResultAcrossTime
+        """
+        if not self.exists():
+            raise FileNotFoundError(f"No results found in {self.output_dir}")
+
+        # Read metadata
+        metadata = self._read_metadata()
+
+        # Handle both old and new metadata formats
+        if "query_groups" in metadata:
+            # New format with query groups
+            all_queries = []
+            query_idx_to_repetitions = {}
+            global_query_idx = 0
+
+            for qg in metadata["query_groups"]:
+                for query in qg["queries"]:
+                    all_queries.append(query)
+                    query_idx_to_repetitions[global_query_idx] = qg["repetitions"]
+                    global_query_idx += 1
+
+            servers = metadata["servers"]
+        else:
+            # Old format (backward compatible)
+            all_queries = metadata["queries"]
+            servers = metadata["servers"]
+            query_idx_to_repetitions = {
+                i: metadata["repetitions"] for i in range(len(all_queries))
+            }
+
+        # Initialize nested structure
+        results = {}
+        for server in servers:
+            results[server] = {}
+            for query_idx, query in enumerate(all_queries):
+                results[server][query_idx] = QueryResultAcrossTime(
+                    server,
+                    query,
+                    query_idx,
+                    query_idx_to_repetitions[query_idx],
+                )
+
+        # Read latencies
+        latencies = {}
+        if os.path.exists(self.latency_file):
+            latency_df = pd.read_parquet(self.latency_file)
+            for _, row in latency_df.iterrows():
+                key = (row["server_name"], row["query_idx"], row["repetition_idx"])
+                latencies[key] = (row["latency"], row["cumulative_latency"])
+
+        # Read results and reconstruct QueryResult objects
+        query_results = {}  # (server, query_idx, repetition_idx) -> QueryResult
+
+        if os.path.exists(self.results_file):
+            results_df = pd.read_parquet(self.results_file)
+
+            for _, row in results_df.iterrows():
+                key = (row["server_name"], row["query_idx"], row["repetition_idx"])
+
+                # Initialize QueryResult if not exists
+                if key not in query_results:
+                    latency, cumulative_latency = latencies.get(key, (None, None))
+                    query_results[key] = QueryResult(
+                        server_name=row["server_name"],
+                        query=row["query"],
+                        query_idx=row["query_idx"],
+                        repetition_idx=row["repetition_idx"],
+                        result=None,  # Will be populated below
+                        latency=latency,
+                        cumulative_latency=cumulative_latency,
+                        query_group_idx=row.get("query_group_idx", 0),
+                    )
+                    query_results[key].result = {}
+
+                # Parse labels back to frozenset
+                labels_dict = json.loads(row["result_labels"])
+                frozenset_key = frozenset(labels_dict.items())
+                query_results[key].result[frozenset_key] = row["result_value"]
+
+        # Add QueryResult objects to the nested structure
+        for (
+            server_name,
+            query_idx,
+            repetition_idx,
+        ), query_result in query_results.items():
+            results[server_name][query_idx].add_result(query_result)
+
+        # Handle cases where we have latencies but no results
+        for (server_name, query_idx, repetition_idx), (
+            latency,
+            cumulative_latency,
+        ) in latencies.items():
+            if (server_name, query_idx, repetition_idx) not in query_results:
+                # Create empty QueryResult with just latency data
+                empty_result = QueryResult(
+                    server_name=server_name,
+                    query=all_queries[query_idx],
+                    query_idx=query_idx,
+                    repetition_idx=repetition_idx,
+                    result=None,
+                    latency=latency,
+                    cumulative_latency=cumulative_latency,
+                    query_group_idx=0,  # Default for backward compatibility
+                )
+                results[server_name][query_idx].add_result(empty_result)
+
+        return results
+
+    def exists(self) -> bool:
+        """Check if serialized results exist.
+
+        Returns:
+            True if results exist and can be read
+        """
+        return os.path.exists(self.metadata_file) and (
+            os.path.exists(self.results_file) or os.path.exists(self.latency_file)
+        )
+
+    def streaming_write_start(self, metadata: Dict[str, Any]) -> None:
+        """Initialize streaming write session with experiment metadata.
+
+        Args:
+            metadata: Experiment metadata containing queries, servers, repetitions, etc.
+        """
+        if (
+            self._streaming_results_writer is not None
+            or self._streaming_latency_writer is not None
+        ):
+            raise RuntimeError("Streaming write session already active")
+
+        self._streaming_metadata = metadata
+        self._results_batch = []
+        self._latency_batch = []
+
+        # Initialize ParquetWriter instances with schemas
+        self._streaming_results_writer = pq.ParquetWriter(
+            self.results_file, schema=self._results_schema, compression=self.compression
+        )
+        self._streaming_latency_writer = pq.ParquetWriter(
+            self.latency_file, schema=self._latency_schema, compression=self.compression
+        )
+
+    def streaming_write_result(self, query_result: QueryResult) -> None:
+        """Write a single query result incrementally.
+
+        Args:
+            query_result: Individual query result to write
+        """
+        if (
+            self._streaming_results_writer is None
+            or self._streaming_latency_writer is None
+        ):
+            raise RuntimeError("Streaming write session not started")
+
+        with self._write_lock:
+            # Add result records to batch
+            if query_result.result:
+                for frozenset_key, value in query_result.result.items():
+                    labels_dict = dict(frozenset_key)
+                    labels_json = json.dumps(labels_dict, sort_keys=True)
+
+                    self._results_batch.append(
+                        {
+                            "query_group_idx": query_result.query_group_idx,
+                            "server_name": query_result.server_name,
+                            "query": query_result.query,
+                            "query_idx": query_result.query_idx,
+                            "repetition_idx": query_result.repetition_idx,
+                            "result_labels": labels_json,
+                            "result_value": value,
+                        }
+                    )
+
+            # Add latency record to batch
+            self._latency_batch.append(
+                {
+                    "query_group_idx": query_result.query_group_idx,
+                    "server_name": query_result.server_name,
+                    "query_idx": query_result.query_idx,
+                    "repetition_idx": query_result.repetition_idx,
+                    "latency": query_result.latency,
+                    "cumulative_latency": query_result.cumulative_latency,
+                }
+            )
+
+            # Flush batches if they reach batch_size
+            if len(self._results_batch) >= self.batch_size:
+                self._flush_results_batch()
+            if len(self._latency_batch) >= self.batch_size:
+                self._flush_latency_batch()
+
+    def streaming_write_end(self) -> None:
+        """Finalize streaming write session and close any open resources."""
+        # Flush any remaining batches
+        if self._results_batch:
+            self._flush_results_batch()
+        if self._latency_batch:
+            self._flush_latency_batch()
+
+        # Close writers
+        if self._streaming_results_writer is not None:
+            self._streaming_results_writer.close()
+            self._streaming_results_writer = None
+
+        if self._streaming_latency_writer is not None:
+            self._streaming_latency_writer.close()
+            self._streaming_latency_writer = None
+
+        # Write metadata at the end
+        if self._streaming_metadata is not None:
+            with open(self.metadata_file, "w") as f:
+                json.dump(self._streaming_metadata, f, indent=2)
+            self._streaming_metadata = None
+
+    def _flush_results_batch(self) -> None:
+        """Write current results batch to parquet."""
+        if self._results_batch and self._streaming_results_writer is not None:
+            results_df = pd.DataFrame(self._results_batch)
+            table = pa.Table.from_pandas(results_df, schema=self._results_schema)
+            self._streaming_results_writer.write_table(table)
+            self._results_batch = []
+
+    def _flush_latency_batch(self) -> None:
+        """Write current latency batch to parquet."""
+        if self._latency_batch and self._streaming_latency_writer is not None:
+            latency_df = pd.DataFrame(self._latency_batch)
+            table = pa.Table.from_pandas(latency_df, schema=self._latency_schema)
+            self._streaming_latency_writer.write_table(table)
+            self._latency_batch = []
+
+    def query_results(self, filters=None, columns=None) -> pd.DataFrame:
+        """Query results with optional filtering and column selection.
+
+        Args:
+            filters: PyArrow filters for row selection
+            columns: List of column names to read
+
+        Returns:
+            Pandas DataFrame with query results
+        """
+        if not os.path.exists(self.results_file):
+            return pd.DataFrame()
+
+        return pd.read_parquet(self.results_file, filters=filters, columns=columns)
+
+    def query_latencies(self, filters=None, columns=None) -> pd.DataFrame:
+        """Query latencies with optional filtering and column selection.
+
+        Args:
+            filters: PyArrow filters for row selection
+            columns: List of column names to read
+
+        Returns:
+            Pandas DataFrame with latency data
+        """
+        if not os.path.exists(self.latency_file):
+            return pd.DataFrame()
+
+        return pd.read_parquet(self.latency_file, filters=filters, columns=columns)
+
+    def _write_metadata(
+        self, results_across_servers: Dict[str, Dict[int, QueryResultAcrossTime]]
+    ):
+        """Write experiment metadata."""
+        if not results_across_servers:
+            return
+
+        servers = list(results_across_servers.keys())
+        queries = []
+        repetitions = 0
+
+        if servers:
+            first_server = servers[0]
+            if results_across_servers[first_server]:
+                query_indices = sorted(results_across_servers[first_server].keys())
+                queries = [
+                    results_across_servers[first_server][i].query for i in query_indices
+                ]
+                if query_indices:
+                    repetitions = results_across_servers[first_server][
+                        query_indices[0]
+                    ].num_repetitions
+
+        metadata = {
+            "queries": queries,
+            "servers": servers,
+            "repetitions": repetitions,
+            "total_queries": len(queries),
+        }
+
+        with open(self.metadata_file, "w") as f:
+            json.dump(metadata, f, indent=2)
+
+    def _read_metadata(self) -> Dict:
+        """Read experiment metadata."""
+        with open(self.metadata_file, "r") as f:
+            return json.load(f)
+
+    def read_latencies_only(self) -> Dict[str, Dict[int, LatencyResultAcrossTime]]:
+        """Read only latency data without loading full results.
+
+        Returns:
+            Nested dict of server -> query_idx -> LatencyResultAcrossTime
+        """
+        if not self.exists():
+            raise FileNotFoundError(f"No results found in {self.output_dir}")
+
+        # Read metadata
+        metadata = self._read_metadata()
+
+        # Handle both old and new metadata formats
+        if "query_groups" in metadata:
+            # New format with query groups
+            all_queries = []
+            query_idx_to_repetitions = {}
+            global_query_idx = 0
+
+            for qg in metadata["query_groups"]:
+                for query in qg["queries"]:
+                    all_queries.append(query)
+                    query_idx_to_repetitions[global_query_idx] = qg["repetitions"]
+                    global_query_idx += 1
+
+            servers = metadata["servers"]
+        else:
+            # Old format (backward compatible)
+            all_queries = metadata["queries"]
+            servers = metadata["servers"]
+            query_idx_to_repetitions = {
+                i: metadata["repetitions"] for i in range(len(all_queries))
+            }
+
+        # Initialize nested structure
+        latencies = {}
+        for server in servers:
+            latencies[server] = {}
+            for query_idx, query in enumerate(all_queries):
+                latencies[server][query_idx] = LatencyResultAcrossTime(
+                    server,
+                    query,
+                    query_idx,
+                    query_idx_to_repetitions[query_idx],
+                )
+
+        # Read only latency data
+        if os.path.exists(self.latency_file):
+            latency_df = pd.read_parquet(self.latency_file)
+            for _, row in latency_df.iterrows():
+                latency_result = LatencyResult(
+                    server_name=row["server_name"],
+                    query=all_queries[row["query_idx"]],
+                    query_idx=row["query_idx"],
+                    repetition_idx=row["repetition_idx"],
+                    latency=row["latency"],
+                    cumulative_latency=row["cumulative_latency"],
+                    query_group_idx=row.get("query_group_idx", 0),
+                )
+
+                latencies[row["server_name"]][row["query_idx"]].add_latency_result(
+                    latency_result
+                )
+
+        return latencies
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/MetricConfig.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/MetricConfig.py
new file mode 100644
index 0000000..49b176e
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/MetricConfig.py
@@ -0,0 +1,17 @@
+from promql_utilities.data_model.KeyByLabelNames import KeyByLabelNames
+
+
+class MetricConfig:
+    def __init__(self, yaml_str):
+        self.config = {}
+        for metric, labels in yaml_str.items():
+            self.config[metric] = KeyByLabelNames(labels)
+
+    @classmethod
+    def from_list(cls, yaml_list):
+        """Create MetricConfig from a list-of-dicts format used by Controller.
+
+        Format: [{"metric": "name", "labels": ["l1", "l2"]}, ...]
+        """
+        as_dict = {item["metric"]: item["labels"] for item in yaml_list}
+        return cls(as_dict)
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/SQLTableConfig.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/SQLTableConfig.py
new file mode 100644
index 0000000..ac903f3
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/SQLTableConfig.py
@@ -0,0 +1,38 @@
+from dataclasses import dataclass
+from typing import Dict, List
+
+
+@dataclass
+class TableSchema:
+    """Schema for a single SQL table."""
+
+    time_column: str
+    value_columns: List[str]
+    metadata_columns: List[str]
+
+
+class SQLTableConfig:
+    """
+    SQL schema configuration, equivalent to MetricConfig for SQL mode.
+
+    Mirrors the Rust SQLSchema/Table structure in:
+    CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlhelper.rs
+    """
+
+    def __init__(self, yaml_dict: dict):
+        self.config: Dict[str, TableSchema] = {}
+        for table in yaml_dict.get("tables", []):
+            self.config[table["name"]] = TableSchema(
+                time_column=table["time_column"],
+                value_columns=table["value_columns"],
+                metadata_columns=table["metadata_columns"],
+            )
+
+    def get_table(self, table_name: str) -> TableSchema:
+        return self.config.get(table_name)
+
+    def get_time_column(self, table_name: str) -> str:
+        return self.config[table_name].time_column
+
+    def get_metadata_columns(self, table_name: str) -> List[str]:
+        return self.config[table_name].metadata_columns
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/StreamingAggregationConfig.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/StreamingAggregationConfig.py
new file mode 100644
index 0000000..540d411
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/StreamingAggregationConfig.py
@@ -0,0 +1,167 @@
+import yaml
+
+# from ruamel.yaml import YAML
+
+from typing import Dict, Tuple, Union
+from promql_utilities.streaming_config.MetricConfig import MetricConfig
+from promql_utilities.streaming_config.SQLTableConfig import SQLTableConfig
+from promql_utilities.data_model.KeyByLabelNames import KeyByLabelNames
+
+yaml.add_representer(
+    KeyByLabelNames,
+    lambda dumper, data: dumper.represent_list(data.serialize_to_json()),
+)
+
+# yaml_writer = YAML()
+# yaml_writer.representer.add_representer(
+#     KeyByLabelNames,
+#     lambda dumper, data: dumper.represent_sequence(
+#         "tag:yaml.org,2002:seq", data.serialize_to_json(), flow_style=False
+#     ),
+# )
+
+
+class StreamingAggregationConfig:
+    aggregationId: int
+    aggregationType: str
+    aggregationSubType: str
+
+    # NEW fields for sliding window support (Issue #236)
+    windowSize: int  # Window size in seconds (e.g., 900s for 15m)
+    slideInterval: int  # Slide/hop interval in seconds (e.g., 30s)
+    windowType: str  # "tumbling" or "sliding"
+
+    # DEPRECATED but kept for backward compatibility
+    tumblingWindowSize: int  # For reading old configs
+
+    spatialFilter: str
+    metric: str  # PromQL mode: metric name
+    parameters: dict
+
+    labels: Dict[str, KeyByLabelNames]
+
+    # SQL-specific fields (optional, used when query_language=sql)
+    table_name: str  # SQL mode: table name
+    value_column: str  # SQL mode: which value column to aggregate
+
+    def __init__(self):
+        self.labels = {
+            "rollup": KeyByLabelNames([]),
+            "grouping": KeyByLabelNames([]),
+            "aggregated": KeyByLabelNames([]),
+        }
+        # Default to tumbling windows for backward compatibility
+        self.windowType = "tumbling"
+        # SQL fields default to None
+        self.table_name = None
+        self.value_column = None
+        self.metric = None
+
+    @staticmethod
+    def from_dict(aggregation_config: dict) -> "StreamingAggregationConfig":
+        aggregation = StreamingAggregationConfig()
+        aggregation.aggregationId = aggregation_config["aggregationId"]
+        aggregation.aggregationType = aggregation_config["aggregationType"]
+        aggregation.aggregationSubType = aggregation_config["aggregationSubType"]
+
+        # NEW: Handle new window fields with backward compatibility
+        aggregation.windowType = aggregation_config.get("windowType", "tumbling")
+        aggregation.windowSize = aggregation_config.get(
+            "windowSize", aggregation_config.get("tumblingWindowSize")
+        )
+        aggregation.slideInterval = aggregation_config.get(
+            "slideInterval", aggregation_config.get("tumblingWindowSize")
+        )
+
+        # Keep deprecated field for backward compatibility
+        aggregation.tumblingWindowSize = aggregation_config.get(
+            "tumblingWindowSize", aggregation.windowSize
+        )
+
+        aggregation.spatialFilter = aggregation_config["spatialFilter"]
+        aggregation.parameters = aggregation_config["parameters"]
+
+        # Handle both PromQL (metric) and SQL (table_name/value_column) formats
+        aggregation.metric = aggregation_config.get("metric")
+        aggregation.table_name = aggregation_config.get("table_name")
+        aggregation.value_column = aggregation_config.get("value_column", "value")
+
+        for k, v in aggregation_config["labels"].items():
+            if k not in aggregation.labels:
+                raise ValueError(f"Invalid label name: {k}")
+            if v is not None:
+                aggregation.labels[k] = KeyByLabelNames(v)
+
+        return aggregation
+
+    def validate(
+        self,
+        schema_config: Union[MetricConfig, SQLTableConfig],
+        query_language: str,
+    ):
+        """Validate against MetricConfig (promql) or SQLTableConfig (sql)."""
+        configured_labels = KeyByLabelNames([])
+        for k, v in self.labels.items():
+            assert v is not None
+            configured_labels += v
+
+        if query_language == "promql":
+            # Existing validation logic for PromQL
+            if schema_config.config[self.metric] != configured_labels:
+                raise ValueError(
+                    "Labels do not match: {} vs {}".format(
+                        schema_config.config[self.metric],
+                        configured_labels,
+                    )
+                )
+        elif query_language == "sql":
+            # SQL validation: check labels match metadata_columns
+            table_schema = schema_config.get_table(self.table_name)
+            if table_schema is None:
+                raise ValueError(f"Table '{self.table_name}' not found in sql_schema")
+
+            expected_columns = set(table_schema.metadata_columns)
+            actual_columns = set(configured_labels.keys)
+            if expected_columns != actual_columns:
+                raise ValueError(
+                    f"Labels do not match metadata_columns for table {self.table_name}: "
+                    f"expected {expected_columns}, got {actual_columns}"
+                )
+            # Validate value_column exists
+            if self.value_column not in table_schema.value_columns:
+                raise ValueError(
+                    f"value_column '{self.value_column}' not in table {self.table_name} "
+                    f"value_columns: {table_schema.value_columns}"
+                )
+
+    def to_dict(
+        self,
+        schema_config: Union[MetricConfig, SQLTableConfig],
+        query_language: str,
+    ) -> dict:
+        self.validate(schema_config, query_language)
+        return self.__dict__
+
+    def get_source_identifier(self) -> str:
+        """Get the metric name (promql) or table name (sql)."""
+        return self.metric if self.metric else self.table_name
+
+    def get_identifying_key(self) -> Tuple:
+        keys = [
+            self.aggregationType,
+            self.aggregationSubType,
+            self.windowType,  # NEW: Include window type
+            self.windowSize,  # NEW: Include window size
+            self.slideInterval,  # NEW: Include slide interval
+            self.tumblingWindowSize,  # Keep for backward compatibility
+            self.spatialFilter,
+            self.metric,
+            self.table_name,  # SQL mode: table name
+            self.value_column,  # SQL mode: value column
+            tuple(self.parameters.items()),
+        ]
+        for k in sorted(self.labels.keys()):
+            keys.append(k)
+            keys.append(tuple(self.labels[k].serialize_to_json()))
+
+        return tuple(keys)
diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/__init__.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/__init__.py
new file mode 100644
index 0000000..e822aff
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/__init__.py
@@ -0,0 +1,12 @@
+from promql_utilities.streaming_config.MetricConfig import MetricConfig
+from promql_utilities.streaming_config.SQLTableConfig import SQLTableConfig, TableSchema
+from promql_utilities.streaming_config.StreamingAggregationConfig import (
+    StreamingAggregationConfig,
+)
+
+__all__ = [
+    "MetricConfig",
+    "SQLTableConfig",
+    "TableSchema",
+    "StreamingAggregationConfig",
+]
diff --git a/CommonDependencies/dependencies/py/promql_utilities/setup.py b/CommonDependencies/dependencies/py/promql_utilities/setup.py
new file mode 100644
index 0000000..0b1378c
--- /dev/null
+++ b/CommonDependencies/dependencies/py/promql_utilities/setup.py
@@ -0,0 +1,8 @@
+from setuptools import setup, find_packages
+
+setup(
+    name="promql_utilities",
+    version="0.1",
+    packages=find_packages(),
+    install_requires=["promql-parser>=0.4.1", "pandas", "pyarrow"],
+)
diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/Cargo.lock b/CommonDependencies/dependencies/rs/datafusion_summary_library/Cargo.lock
new file mode 100644
index 0000000..0bad1fe
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/Cargo.lock
@@ -0,0 +1,2814 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
+[[package]]
+name = "ahash"
+version = "0.8.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
+dependencies = [
+ "cfg-if",
+ "const-random",
+ "getrandom 0.3.4",
+ "once_cell",
+ "version_check",
+ "zerocopy",
+]
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "alloc-no-stdlib"
+version = "2.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
+
+[[package]]
+name = "alloc-stdlib"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
+dependencies = [
+ "alloc-no-stdlib",
+]
+
+[[package]]
+name = "allocator-api2"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
+
+[[package]]
+name = "android-tzdata"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "arrayref"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
+
+[[package]]
+name = "arrayvec"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
+
+[[package]]
+name = "arrow"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3a3ec4fe573f9d1f59d99c085197ef669b00b088ba1d7bb75224732d9357a74"
+dependencies = [
+ "arrow-arith",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-csv",
+ "arrow-data",
+ "arrow-ipc",
+ "arrow-json",
+ "arrow-ord",
+ "arrow-row",
+ "arrow-schema",
+ "arrow-select",
+ "arrow-string",
+]
+
+[[package]]
+name = "arrow-arith"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dcf19f07792d8c7f91086c67b574a79301e367029b17fcf63fb854332246a10"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "chrono",
+ "half",
+ "num",
+]
+
+[[package]]
+name = "arrow-array"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7845c32b41f7053e37a075b3c2f29c6f5ea1b3ca6e5df7a2d325ee6e1b4a63cf"
+dependencies = [
+ "ahash",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "chrono",
+ "chrono-tz",
+ "half",
+ "hashbrown 0.15.5",
+ "num",
+]
+
+[[package]]
+name = "arrow-buffer"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b5c681a99606f3316f2a99d9c8b6fa3aad0b1d34d8f6d7a1b471893940219d8"
+dependencies = [
+ "bytes",
+ "half",
+ "num",
+]
+
+[[package]]
+name = "arrow-cast"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6365f8527d4f87b133eeb862f9b8093c009d41a210b8f101f91aa2392f61daac"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "arrow-select",
+ "atoi",
+ "base64",
+ "chrono",
+ "comfy-table",
+ "half",
+ "lexical-core",
+ "num",
+ "ryu",
+]
+
+[[package]]
+name = "arrow-csv"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30dac4d23ac769300349197b845e0fd18c7f9f15d260d4659ae6b5a9ca06f586"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-data",
+ "arrow-schema",
+ "chrono",
+ "csv",
+ "csv-core",
+ "lazy_static",
+ "lexical-core",
+ "regex",
+]
+
+[[package]]
+name = "arrow-data"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd962fc3bf7f60705b25bcaa8eb3318b2545aa1d528656525ebdd6a17a6cd6fb"
+dependencies = [
+ "arrow-buffer",
+ "arrow-schema",
+ "half",
+ "num",
+]
+
+[[package]]
+name = "arrow-ipc"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3527365b24372f9c948f16e53738eb098720eea2093ae73c7af04ac5e30a39b"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-data",
+ "arrow-schema",
+ "flatbuffers",
+ "lz4_flex",
+]
+
+[[package]]
+name = "arrow-json"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acdec0024749fc0d95e025c0b0266d78613727b3b3a5d4cf8ea47eb6d38afdd1"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-data",
+ "arrow-schema",
+ "chrono",
+ "half",
+ "indexmap",
+ "lexical-core",
+ "num",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "arrow-ord"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79af2db0e62a508d34ddf4f76bfd6109b6ecc845257c9cba6f939653668f89ac"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "arrow-select",
+ "half",
+ "num",
+]
+
+[[package]]
+name = "arrow-row"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da30e9d10e9c52f09ea0cf15086d6d785c11ae8dcc3ea5f16d402221b6ac7735"
+dependencies = [
+ "ahash",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "half",
+]
+
+[[package]]
+name = "arrow-schema"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "35b0f9c0c3582dd55db0f136d3b44bfa0189df07adcf7dc7f2f2e74db0f52eb8"
+
+[[package]]
+name = "arrow-select"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92fc337f01635218493c23da81a364daf38c694b05fc20569c3193c11c561984"
+dependencies = [
+ "ahash",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "num",
+]
+
+[[package]]
+name = "arrow-string"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d596a9fc25dae556672d5069b090331aca8acb93cae426d8b7dcdf1c558fa0ce"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "arrow-select",
+ "memchr",
+ "num",
+ "regex",
+ "regex-syntax",
+]
+
+[[package]]
+name = "async-compression"
+version = "0.4.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c"
+dependencies = [
+ "bzip2 0.5.2",
+ "flate2",
+ "futures-core",
+ "futures-io",
+ "memchr",
+ "pin-project-lite",
+ "tokio",
+ "xz2",
+ "zstd",
+ "zstd-safe",
+]
+
+[[package]]
+name = "async-trait"
+version = "0.1.89"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "atoi"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "base64"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "bitflags"
+version = "2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
+
+[[package]]
+name = "blake2"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe"
+dependencies = [
+ "digest",
+]
+
+[[package]]
+name = "blake3"
+version = "1.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d"
+dependencies = [
+ "arrayref",
+ "arrayvec",
+ "cc",
+ "cfg-if",
+ "constant_time_eq",
+ "cpufeatures",
+]
+
+[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array",
+]
+
+[[package]]
+name = "brotli"
+version = "7.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+ "brotli-decompressor",
+]
+
+[[package]]
+name = "brotli-decompressor"
+version = "4.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+]
+
+[[package]]
+name = "bumpalo"
+version = "3.19.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510"
+
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+[[package]]
+name = "bytes"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3"
+
+[[package]]
+name = "bzip2"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8"
+dependencies = [
+ "bzip2-sys",
+ "libc",
+]
+
+[[package]]
+name = "bzip2"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47"
+dependencies = [
+ "bzip2-sys",
+]
+
+[[package]]
+name = "bzip2-sys"
+version = "0.1.13+1.0.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14"
+dependencies = [
+ "cc",
+ "pkg-config",
+]
+
+[[package]]
+name = "cc"
+version = "1.2.55"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29"
+dependencies = [
+ "find-msvc-tools",
+ "jobserver",
+ "libc",
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "chrono"
+version = "0.4.39"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825"
+dependencies = [
+ "android-tzdata",
+ "iana-time-zone",
+ "num-traits",
+ "windows-targets",
+]
+
+[[package]]
+name = "chrono-tz"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3"
+dependencies = [
+ "chrono",
+ "phf",
+]
+
+[[package]]
+name = "comfy-table"
+version = "7.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47"
+dependencies = [
+ "unicode-segmentation",
+ "unicode-width",
+]
+
+[[package]]
+name = "const-random"
+version = "0.1.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359"
+dependencies = [
+ "const-random-macro",
+]
+
+[[package]]
+name = "const-random-macro"
+version = "0.1.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
+dependencies = [
+ "getrandom 0.2.17",
+ "once_cell",
+ "tiny-keccak",
+]
+
+[[package]]
+name = "constant_time_eq"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "cpufeatures"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "crc32fast"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "crunchy"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
+
+[[package]]
+name = "crypto-common"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
+name = "csv"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938"
+dependencies = [
+ "csv-core",
+ "itoa",
+ "ryu",
+ "serde_core",
+]
+
+[[package]]
+name = "csv-core"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "dashmap"
+version = "6.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+ "hashbrown 0.14.5",
+ "lock_api",
+ "once_cell",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "datafusion"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-array",
+ "arrow-ipc",
+ "arrow-schema",
+ "async-compression",
+ "async-trait",
+ "bytes",
+ "bzip2 0.4.4",
+ "chrono",
+ "dashmap",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions",
+ "datafusion-functions-aggregate",
+ "datafusion-functions-nested",
+ "datafusion-functions-window",
+ "datafusion-optimizer",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-optimizer",
+ "datafusion-physical-plan",
+ "datafusion-sql",
+ "flate2",
+ "futures",
+ "glob",
+ "half",
+ "hashbrown 0.14.5",
+ "indexmap",
+ "itertools",
+ "log",
+ "num_cpus",
+ "object_store",
+ "parking_lot",
+ "parquet",
+ "paste",
+ "pin-project-lite",
+ "rand",
+ "sqlparser",
+ "tempfile",
+ "tokio",
+ "tokio-util",
+ "url",
+ "uuid",
+ "xz2",
+ "zstd",
+]
+
+[[package]]
+name = "datafusion-catalog"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560"
+dependencies = [
+ "arrow-schema",
+ "async-trait",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-plan",
+ "parking_lot",
+]
+
+[[package]]
+name = "datafusion-common"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-schema",
+ "chrono",
+ "half",
+ "hashbrown 0.14.5",
+ "indexmap",
+ "instant",
+ "libc",
+ "num_cpus",
+ "object_store",
+ "parquet",
+ "paste",
+ "sqlparser",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-common-runtime"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685"
+dependencies = [
+ "log",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-execution"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361"
+dependencies = [
+ "arrow",
+ "chrono",
+ "dashmap",
+ "datafusion-common",
+ "datafusion-expr",
+ "futures",
+ "hashbrown 0.14.5",
+ "log",
+ "object_store",
+ "parking_lot",
+ "rand",
+ "tempfile",
+ "url",
+]
+
+[[package]]
+name = "datafusion-expr"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-array",
+ "arrow-buffer",
+ "chrono",
+ "datafusion-common",
+ "datafusion-expr-common",
+ "datafusion-functions-aggregate-common",
+ "datafusion-functions-window-common",
+ "datafusion-physical-expr-common",
+ "indexmap",
+ "paste",
+ "serde_json",
+ "sqlparser",
+ "strum",
+ "strum_macros",
+]
+
+[[package]]
+name = "datafusion-expr-common"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2"
+dependencies = [
+ "arrow",
+ "datafusion-common",
+ "itertools",
+ "paste",
+]
+
+[[package]]
+name = "datafusion-functions"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162"
+dependencies = [
+ "arrow",
+ "arrow-buffer",
+ "base64",
+ "blake2",
+ "blake3",
+ "chrono",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "hashbrown 0.14.5",
+ "hex",
+ "itertools",
+ "log",
+ "md-5",
+ "rand",
+ "regex",
+ "sha2",
+ "unicode-segmentation",
+ "uuid",
+]
+
+[[package]]
+name = "datafusion-functions-aggregate"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-schema",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions-aggregate-common",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "half",
+ "indexmap",
+ "log",
+ "paste",
+]
+
+[[package]]
+name = "datafusion-functions-aggregate-common"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143"
+dependencies = [
+ "ahash",
+ "arrow",
+ "datafusion-common",
+ "datafusion-expr-common",
+ "datafusion-physical-expr-common",
+ "rand",
+]
+
+[[package]]
+name = "datafusion-functions-nested"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317"
+dependencies = [
+ "arrow",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-ord",
+ "arrow-schema",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions",
+ "datafusion-functions-aggregate",
+ "datafusion-physical-expr-common",
+ "itertools",
+ "log",
+ "paste",
+ "rand",
+]
+
+[[package]]
+name = "datafusion-functions-window"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe"
+dependencies = [
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-functions-window-common",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "log",
+ "paste",
+]
+
+[[package]]
+name = "datafusion-functions-window-common"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b"
+dependencies = [
+ "datafusion-common",
+ "datafusion-physical-expr-common",
+]
+
+[[package]]
+name = "datafusion-optimizer"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "chrono",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "hashbrown 0.14.5",
+ "indexmap",
+ "itertools",
+ "log",
+ "paste",
+ "regex-syntax",
+]
+
+[[package]]
+name = "datafusion-physical-expr"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-ord",
+ "arrow-schema",
+ "arrow-string",
+ "chrono",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-expr-common",
+ "datafusion-functions-aggregate-common",
+ "datafusion-physical-expr-common",
+ "half",
+ "hashbrown 0.14.5",
+ "indexmap",
+ "itertools",
+ "log",
+ "paste",
+ "petgraph",
+]
+
+[[package]]
+name = "datafusion-physical-expr-common"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b"
+dependencies = [
+ "ahash",
+ "arrow",
+ "datafusion-common",
+ "datafusion-expr-common",
+ "hashbrown 0.14.5",
+ "rand",
+]
+
+[[package]]
+name = "datafusion-physical-optimizer"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe"
+dependencies = [
+ "arrow",
+ "arrow-schema",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr-common",
+ "datafusion-physical-expr",
+ "datafusion-physical-plan",
+ "itertools",
+]
+
+[[package]]
+name = "datafusion-physical-plan"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-ord",
+ "arrow-schema",
+ "async-trait",
+ "chrono",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions-aggregate-common",
+ "datafusion-functions-window-common",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "futures",
+ "half",
+ "hashbrown 0.14.5",
+ "indexmap",
+ "itertools",
+ "log",
+ "once_cell",
+ "parking_lot",
+ "pin-project-lite",
+ "rand",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-sql"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51"
+dependencies = [
+ "arrow",
+ "arrow-array",
+ "arrow-schema",
+ "datafusion-common",
+ "datafusion-expr",
+ "indexmap",
+ "log",
+ "regex",
+ "sqlparser",
+ "strum",
+]
+
+[[package]]
+name = "datafusion_summary_library"
+version = "0.1.0"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "datafusion",
+ "futures",
+ "hyperloglogplus",
+]
+
+[[package]]
+name = "digest"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+dependencies = [
+ "block-buffer",
+ "crypto-common",
+ "subtle",
+]
+
+[[package]]
+name = "displaydoc"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "either"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "errno"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
+dependencies = [
+ "libc",
+ "windows-sys",
+]
+
+[[package]]
+name = "fastrand"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+
+[[package]]
+name = "find-msvc-tools"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
+
+[[package]]
+name = "fixedbitset"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
+
+[[package]]
+name = "flatbuffers"
+version = "24.12.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096"
+dependencies = [
+ "bitflags 1.3.2",
+ "rustc_version",
+]
+
+[[package]]
+name = "flate2"
+version = "1.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "form_urlencoded"
+version = "1.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf"
+dependencies = [
+ "percent-encoding",
+]
+
+[[package]]
+name = "futures"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
+
+[[package]]
+name = "futures-executor"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-io"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
+
+[[package]]
+name = "futures-macro"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
+
+[[package]]
+name = "futures-task"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
+
+[[package]]
+name = "futures-util"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "pin-utils",
+ "slab",
+]
+
+[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasip2",
+]
+
+[[package]]
+name = "glob"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
+
+[[package]]
+name = "half"
+version = "2.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+ "num-traits",
+ "zerocopy",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
+dependencies = [
+ "ahash",
+ "allocator-api2",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
+[[package]]
+name = "hex"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
+
+[[package]]
+name = "humantime"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424"
+
+[[package]]
+name = "hyperloglogplus"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "621debdf94dcac33e50475fdd76d34d5ea9c0362a834b9db08c3024696c1fbe3"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.65"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "icu_collections"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43"
+dependencies = [
+ "displaydoc",
+ "potential_utf",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locale_core"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6"
+dependencies = [
+ "displaydoc",
+ "litemap",
+ "tinystr",
+ "writeable",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599"
+dependencies = [
+ "icu_collections",
+ "icu_normalizer_data",
+ "icu_properties",
+ "icu_provider",
+ "smallvec",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer_data"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a"
+
+[[package]]
+name = "icu_properties"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec"
+dependencies = [
+ "icu_collections",
+ "icu_locale_core",
+ "icu_properties_data",
+ "icu_provider",
+ "zerotrie",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_properties_data"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af"
+
+[[package]]
+name = "icu_provider"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614"
+dependencies = [
+ "displaydoc",
+ "icu_locale_core",
+ "writeable",
+ "yoke",
+ "zerofrom",
+ "zerotrie",
+ "zerovec",
+]
+
+[[package]]
+name = "idna"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
+dependencies = [
+ "idna_adapter",
+ "smallvec",
+ "utf8_iter",
+]
+
+[[package]]
+name = "idna_adapter"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344"
+dependencies = [
+ "icu_normalizer",
+ "icu_properties",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.16.1",
+]
+
+[[package]]
+name = "instant"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "integer-encoding"
+version = "3.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02"
+
+[[package]]
+name = "itertools"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
+
+[[package]]
+name = "jobserver"
+version = "0.1.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
+dependencies = [
+ "getrandom 0.3.4",
+ "libc",
+]
+
+[[package]]
+name = "js-sys"
+version = "0.3.85"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "lexical-core"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594"
+dependencies = [
+ "lexical-parse-float",
+ "lexical-parse-integer",
+ "lexical-util",
+ "lexical-write-float",
+ "lexical-write-integer",
+]
+
+[[package]]
+name = "lexical-parse-float"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56"
+dependencies = [
+ "lexical-parse-integer",
+ "lexical-util",
+]
+
+[[package]]
+name = "lexical-parse-integer"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34"
+dependencies = [
+ "lexical-util",
+]
+
+[[package]]
+name = "lexical-util"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17"
+
+[[package]]
+name = "lexical-write-float"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361"
+dependencies = [
+ "lexical-util",
+ "lexical-write-integer",
+]
+
+[[package]]
+name = "lexical-write-integer"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df"
+dependencies = [
+ "lexical-util",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.180"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
+
+[[package]]
+name = "libm"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
+
+[[package]]
+name = "litemap"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
+
+[[package]]
+name = "lz4_flex"
+version = "0.11.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a"
+dependencies = [
+ "twox-hash 2.1.2",
+]
+
+[[package]]
+name = "lzma-sys"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+]
+
+[[package]]
+name = "md-5"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
+dependencies = [
+ "cfg-if",
+ "digest",
+]
+
+[[package]]
+name = "memchr"
+version = "2.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+ "simd-adler32",
+]
+
+[[package]]
+name = "num"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
+dependencies = [
+ "num-bigint",
+ "num-complex",
+ "num-integer",
+ "num-iter",
+ "num-rational",
+ "num-traits",
+]
+
+[[package]]
+name = "num-bigint"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
+dependencies = [
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-complex"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-integer"
+version = "0.1.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-iter"
+version = "0.1.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-rational"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
+dependencies = [
+ "num-bigint",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+ "libm",
+]
+
+[[package]]
+name = "num_cpus"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
+dependencies = [
+ "hermit-abi",
+ "libc",
+]
+
+[[package]]
+name = "object_store"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3cfccb68961a56facde1163f9319e0d15743352344e7808a11795fb99698dcaf"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "chrono",
+ "futures",
+ "humantime",
+ "itertools",
+ "parking_lot",
+ "percent-encoding",
+ "snafu",
+ "tokio",
+ "tracing",
+ "url",
+ "walkdir",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "ordered-float"
+version = "2.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "parquet"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f8cf58b29782a7add991f655ff42929e31a7859f5319e53db9e39a714cb113c"
+dependencies = [
+ "ahash",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-data",
+ "arrow-ipc",
+ "arrow-schema",
+ "arrow-select",
+ "base64",
+ "brotli",
+ "bytes",
+ "chrono",
+ "flate2",
+ "futures",
+ "half",
+ "hashbrown 0.15.5",
+ "lz4_flex",
+ "num",
+ "num-bigint",
+ "object_store",
+ "paste",
+ "seq-macro",
+ "snap",
+ "thrift",
+ "tokio",
+ "twox-hash 1.6.3",
+ "zstd",
+ "zstd-sys",
+]
+
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
+[[package]]
+name = "percent-encoding"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
+
+[[package]]
+name = "petgraph"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
+dependencies = [
+ "fixedbitset",
+ "indexmap",
+]
+
+[[package]]
+name = "phf"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7"
+dependencies = [
+ "phf_shared",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981"
+dependencies = [
+ "siphasher",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
+
+[[package]]
+name = "potential_utf"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77"
+dependencies = [
+ "zerovec",
+]
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom 0.2.17",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags 2.10.0",
+]
+
+[[package]]
+name = "regex"
+version = "1.12.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
+
+[[package]]
+name = "rustc_version"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
+dependencies = [
+ "semver",
+]
+
+[[package]]
+name = "rustix"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34"
+dependencies = [
+ "bitflags 2.10.0",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "ryu"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984"
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "semver"
+version = "1.0.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
+
+[[package]]
+name = "seq-macro"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.149"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+dependencies = [
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
+name = "sha2"
+version = "0.10.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "digest",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "simd-adler32"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
+
+[[package]]
+name = "siphasher"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
+
+[[package]]
+name = "slab"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "snafu"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e84b3f4eacbf3a1ce05eac6763b4d629d60cbc94d632e4092c54ade71f1e1a2"
+dependencies = [
+ "snafu-derive",
+]
+
+[[package]]
+name = "snafu-derive"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "snap"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b"
+
+[[package]]
+name = "sqlparser"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7"
+dependencies = [
+ "log",
+ "sqlparser_derive",
+]
+
+[[package]]
+name = "sqlparser_derive"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
+
+[[package]]
+name = "static_assertions"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
+[[package]]
+name = "strum"
+version = "0.26.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
+dependencies = [
+ "strum_macros",
+]
+
+[[package]]
+name = "strum_macros"
+version = "0.26.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "rustversion",
+ "syn",
+]
+
+[[package]]
+name = "subtle"
+version = "2.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
+
+[[package]]
+name = "syn"
+version = "2.0.114"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "synstructure"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tempfile"
+version = "3.24.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c"
+dependencies = [
+ "fastrand",
+ "getrandom 0.3.4",
+ "once_cell",
+ "rustix",
+ "windows-sys",
+]
+
+[[package]]
+name = "thrift"
+version = "0.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09"
+dependencies = [
+ "byteorder",
+ "integer-encoding",
+ "ordered-float",
+]
+
+[[package]]
+name = "tiny-keccak"
+version = "2.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
+dependencies = [
+ "crunchy",
+]
+
+[[package]]
+name = "tinystr"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869"
+dependencies = [
+ "displaydoc",
+ "zerovec",
+]
+
+[[package]]
+name = "tokio"
+version = "1.49.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86"
+dependencies = [
+ "bytes",
+ "pin-project-lite",
+ "tokio-macros",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tokio-util"
+version = "0.7.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "futures-sink",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "tracing"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
+dependencies = [
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
+dependencies = [
+ "once_cell",
+]
+
+[[package]]
+name = "twox-hash"
+version = "1.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
+dependencies = [
+ "cfg-if",
+ "static_assertions",
+]
+
+[[package]]
+name = "twox-hash"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c"
+
+[[package]]
+name = "typenum"
+version = "1.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
+
+[[package]]
+name = "unicode-segmentation"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
+
+[[package]]
+name = "unicode-width"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
+
+[[package]]
+name = "url"
+version = "2.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed"
+dependencies = [
+ "form_urlencoded",
+ "idna",
+ "percent-encoding",
+ "serde",
+]
+
+[[package]]
+name = "utf8_iter"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
+
+[[package]]
+name = "uuid"
+version = "1.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee48d38b119b0cd71fe4141b30f5ba9c7c5d9f4e7a3a8b4a674e4b6ef789976f"
+dependencies = [
+ "getrandom 0.3.4",
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasip2"
+version = "1.0.2+wasi-0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55"
+dependencies = [
+ "bumpalo",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "web-sys"
+version = "0.3.85"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "winapi-util"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.62.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-result"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "wit-bindgen"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
+
+[[package]]
+name = "writeable"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
+
+[[package]]
+name = "xz2"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
+dependencies = [
+ "lzma-sys",
+]
+
+[[package]]
+name = "yoke"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954"
+dependencies = [
+ "stable_deref_trait",
+ "yoke-derive",
+ "zerofrom",
+]
+
+[[package]]
+name = "yoke-derive"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7456cf00f0685ad319c5b1693f291a650eaf345e941d082fc4e03df8a03996ac"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1328722bbf2115db7e19d69ebcc15e795719e2d66b60827c6a69a117365e37a0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "zerofrom"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
+dependencies = [
+ "zerofrom-derive",
+]
+
+[[package]]
+name = "zerofrom-derive"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
+]
+
+[[package]]
+name = "zerotrie"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
+]
+
+[[package]]
+name = "zerovec"
+version = "0.11.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002"
+dependencies = [
+ "yoke",
+ "zerofrom",
+ "zerovec-derive",
+]
+
+[[package]]
+name = "zerovec-derive"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "zmij"
+version = "1.0.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ff05f8caa9038894637571ae6b9e29466c1f4f829d26c9b28f869a29cbe3445"
+
+[[package]]
+name = "zstd"
+version = "0.13.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
+dependencies = [
+ "zstd-safe",
+]
+
+[[package]]
+name = "zstd-safe"
+version = "7.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059"
+dependencies = [
+ "zstd-sys",
+]
+
+[[package]]
+name = "zstd-sys"
+version = "2.0.13+zstd.1.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa"
+dependencies = [
+ "cc",
+ "pkg-config",
+]
diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/Cargo.toml b/CommonDependencies/dependencies/rs/datafusion_summary_library/Cargo.toml
new file mode 100644
index 0000000..503f62f
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "datafusion_summary_library"
+version = "0.1.0"
+edition = "2021"
+authors = ["ProjectASAP Team"]
+
+[dependencies]
+datafusion = "43"
+arrow = "53"
+hyperloglogplus = "0.4"
+async-trait = "0.1"
+futures = "0.3"
diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/src/lib.rs b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/lib.rs
new file mode 100644
index 0000000..4183785
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/lib.rs
@@ -0,0 +1,15 @@
+// DataFusion Summary Library
+//
+// This library provides logical and physical operators for sketch-based
+// query optimization in DataFusion. It supports approximate query processing
+// using data structures like HyperLogLog for COUNT(DISTINCT) operations.
+
+pub mod physical;
+pub mod sketch_operators;
+
+pub use physical::{HllSketch, SketchExtensionPlanner, SummaryInferExec, SummaryInsertExec};
+pub use sketch_operators::{
+    GroupingStrategy, InferOperation, PrecomputedSummaryRead, SketchMetadata, SketchSpec,
+    SketchType, SummaryInfer, SummaryInsert, SummaryMerge, SummaryMergeMultiple, SummaryRead,
+    SummarySubtract, SummaryType, TypedExpr,
+};
diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/hll.rs b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/hll.rs
new file mode 100644
index 0000000..6fa0dbd
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/hll.rs
@@ -0,0 +1,169 @@
+// HyperLogLog wrapper for cardinality estimation.
+//
+// This provides a simple wrapper around the hyperloglogplus crate for use
+// in sketch-based COUNT(DISTINCT) queries.
+
+use std::collections::hash_map::{DefaultHasher, RandomState};
+use std::hash::{Hash, Hasher};
+
+use hyperloglogplus::{HyperLogLog, HyperLogLogPlus};
+
+/// Wrapper around HyperLogLog++ for cardinality estimation.
+///
+/// Uses precision 14 by default which gives ~0.8% standard error.
+#[derive(Clone)]
+pub struct HllSketch {
+    hll: HyperLogLogPlus<u64, RandomState>,
+}
+
+impl HllSketch {
+    /// Create a new HLL sketch with default precision (14).
+    pub fn new() -> Self {
+        Self::with_precision(14)
+    }
+
+    /// Create a new HLL sketch with specified precision.
+    ///
+    /// Precision must be between 4 and 18. Higher precision means
+    /// more accuracy but more memory usage.
+    pub fn with_precision(precision: u8) -> Self {
+        let hll = HyperLogLogPlus::new(precision, RandomState::new())
+            .expect("Valid precision range is 4-18");
+        Self { hll }
+    }
+
+    /// Insert a value into the sketch.
+    ///
+    /// The value is hashed to u64 before insertion.
+    pub fn insert<T: Hash>(&mut self, value: &T) {
+        // Hash the value to u64 first, then insert
+        let mut hasher = DefaultHasher::new();
+        value.hash(&mut hasher);
+        let hash = hasher.finish();
+        self.hll.insert(&hash);
+    }
+
+    /// Insert a byte slice into the sketch.
+    pub fn insert_bytes(&mut self, value: &[u8]) {
+        self.insert(&value);
+    }
+
+    /// Get the estimated cardinality.
+    pub fn count(&mut self) -> u64 {
+        self.hll.count().round() as u64
+    }
+
+    /// Merge another HLL sketch into this one.
+    #[allow(dead_code)]
+    pub fn merge(&mut self, other: &mut Self) {
+        self.hll
+            .merge(&other.hll)
+            .expect("HLL merge should succeed for same precision");
+    }
+
+    /// Serialize the sketch to bytes.
+    ///
+    /// Format: [precision: u8][count as f64 bytes(8)]
+    /// This is a hacky serialization that just stores the current count.
+    pub fn to_bytes(&mut self) -> Vec<u8> {
+        let precision = 14u8; // We always use 14 for now
+        let count = self.hll.count();
+
+        // Simple format: [precision(1)][count as f64 bytes(8)]
+        let mut bytes = Vec::with_capacity(9);
+        bytes.push(precision);
+        bytes.extend_from_slice(&count.to_le_bytes());
+        bytes
+    }
+
+    /// Deserialize a sketch from bytes.
+    /// Note: This only recovers the count, not the full HLL state.
+    #[allow(dead_code)]
+    pub fn from_bytes(bytes: &[u8]) -> Option<Self> {
+        if bytes.len() < 9 {
+            return None;
+        }
+
+        let _precision = bytes[0];
+        let count_bytes: [u8; 8] = bytes[1..9].try_into().ok()?;
+        let _count = f64::from_le_bytes(count_bytes);
+
+        // Since we can't truly deserialize the HLL state from just the count,
+        // we create an empty HLL. This is a limitation of the simple format.
+        Some(Self::new())
+    }
+}
+
+impl Default for HllSketch {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_hll_basic() {
+        let mut hll = HllSketch::new();
+
+        // Insert 1000 unique values
+        for i in 0..1000 {
+            hll.insert(&i);
+        }
+
+        let count = hll.count();
+        // HLL has ~0.8% error at precision 14, so allow 5% tolerance
+        assert!(count > 900, "Count {} should be > 900", count);
+        assert!(count < 1100, "Count {} should be < 1100", count);
+    }
+
+    #[test]
+    fn test_hll_duplicates() {
+        let mut hll = HllSketch::new();
+
+        // Insert same value many times
+        for _ in 0..1000 {
+            hll.insert(&42);
+        }
+
+        let count = hll.count();
+        assert_eq!(count, 1, "Duplicates should not increase count");
+    }
+
+    #[test]
+    fn test_hll_merge() {
+        let mut hll1 = HllSketch::new();
+        let mut hll2 = HllSketch::new();
+
+        // Insert different values into each
+        for i in 0..500 {
+            hll1.insert(&i);
+        }
+        for i in 500..1000 {
+            hll2.insert(&i);
+        }
+
+        hll1.merge(&mut hll2);
+        let count = hll1.count();
+
+        // Should have ~1000 unique values
+        assert!(count > 900, "Merged count {} should be > 900", count);
+        assert!(count < 1100, "Merged count {} should be < 1100", count);
+    }
+
+    #[test]
+    fn test_hll_strings() {
+        let mut hll = HllSketch::new();
+
+        for i in 0..1000 {
+            let s = format!("user_{}", i);
+            hll.insert(&s);
+        }
+
+        let count = hll.count();
+        assert!(count > 900, "String count {} should be > 900", count);
+        assert!(count < 1100, "String count {} should be < 1100", count);
+    }
+}
diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/mod.rs b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/mod.rs
new file mode 100644
index 0000000..ae432b9
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/mod.rs
@@ -0,0 +1,17 @@
+// Physical execution module for sketch-based query plans.
+//
+// This module provides physical execution plan nodes for sketch operators:
+// - SummaryInsertExec: Computes sketches from raw data
+// - SummaryInferExec: Extracts results from sketches
+//
+// Currently only HLL (HyperLogLog) sketches are supported for COUNT(DISTINCT).
+
+mod hll;
+mod planner;
+mod summary_infer_exec;
+mod summary_insert_exec;
+
+pub use hll::HllSketch;
+pub use planner::SketchExtensionPlanner;
+pub use summary_infer_exec::SummaryInferExec;
+pub use summary_insert_exec::SummaryInsertExec;
diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/planner.rs b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/planner.rs
new file mode 100644
index 0000000..5823923
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/planner.rs
@@ -0,0 +1,205 @@
+// ExtensionPlanner for sketch-based logical plan nodes.
+//
+// This planner converts SummaryInsert and SummaryInfer logical nodes
+// into their physical execution plan counterparts.
+
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use datafusion::common::{DataFusionError, Result as DFResult};
+use datafusion::execution::context::SessionState;
+use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNode};
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion::physical_planner::{ExtensionPlanner, PhysicalPlanner};
+
+use crate::sketch_operators::{InferOperation, SketchType, SummaryInfer, SummaryInsert};
+
+use super::{SummaryInferExec, SummaryInsertExec};
+
+/// ExtensionPlanner that handles SummaryInsert and SummaryInfer logical nodes.
+#[derive(Debug, Default)]
+pub struct SketchExtensionPlanner;
+
+impl SketchExtensionPlanner {
+    pub fn new() -> Self {
+        Self
+    }
+}
+
+#[async_trait]
+impl ExtensionPlanner for SketchExtensionPlanner {
+    async fn plan_extension(
+        &self,
+        _planner: &dyn PhysicalPlanner,
+        node: &dyn UserDefinedLogicalNode,
+        _logical_inputs: &[&LogicalPlan],
+        physical_inputs: &[Arc<dyn ExecutionPlan>],
+        _session_state: &SessionState,
+    ) -> DFResult<Option<Arc<dyn ExecutionPlan>>> {
+        // Try to downcast to SummaryInsert
+        if let Some(summary_insert) = node.as_any().downcast_ref::<SummaryInsert>() {
+            return self.plan_summary_insert(summary_insert, physical_inputs);
+        }
+
+        // Try to downcast to SummaryInfer
+        if let Some(summary_infer) = node.as_any().downcast_ref::<SummaryInfer>() {
+            return self.plan_summary_infer(summary_infer, physical_inputs);
+        }
+
+        // Unknown node type, let other planners handle it
+        Ok(None)
+    }
+}
+
+impl SketchExtensionPlanner {
+    fn plan_summary_insert(
+        &self,
+        node: &SummaryInsert,
+        physical_inputs: &[Arc<dyn ExecutionPlan>],
+    ) -> DFResult<Option<Arc<dyn ExecutionPlan>>> {
+        if physical_inputs.len() != 1 {
+            return Err(DataFusionError::Internal(
+                "SummaryInsert expects exactly one input".to_string(),
+            ));
+        }
+
+        let input = physical_inputs[0].clone();
+        let input_schema = input.schema();
+
+        // Only support HLL for now
+        if node.sketches.len() != 1 {
+            return Err(DataFusionError::NotImplemented(
+                "SummaryInsert with multiple sketches not yet supported".to_string(),
+            ));
+        }
+
+        let sketch_spec = &node.sketches[0];
+        if sketch_spec.sketch_type != SketchType::HLL {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Sketch type {:?} not yet supported, only HLL is implemented",
+                sketch_spec.sketch_type
+            )));
+        }
+
+        // Find value column index
+        let value_col_idx = match &sketch_spec.value_column {
+            Some(col_name) => input_schema
+                .fields()
+                .iter()
+                .position(|f| f.name() == col_name)
+                .ok_or_else(|| {
+                    DataFusionError::Plan(format!(
+                        "Value column '{}' not found in input schema",
+                        col_name
+                    ))
+                })?,
+            None => {
+                return Err(DataFusionError::Plan(
+                    "SummaryInsert requires a value column for HLL".to_string(),
+                ));
+            }
+        };
+
+        // Find group-by column indices
+        let group_by_indices: Vec<usize> = if !node.group_by_exprs.is_empty() {
+            // Use group_by_exprs: find columns by expression name
+            node.group_by_exprs
+                .iter()
+                .map(|typed_expr| {
+                    // For simple column expressions, extract the column name
+                    let col_name =
+                        if let datafusion::logical_expr::Expr::Column(col) = &typed_expr.expr {
+                            col.name.clone()
+                        } else {
+                            typed_expr.expr.schema_name().to_string()
+                        };
+
+                    input_schema
+                        .fields()
+                        .iter()
+                        .position(|f| f.name() == &col_name)
+                        .ok_or_else(|| {
+                            DataFusionError::Plan(format!(
+                                "Group-by column '{}' not found in input schema",
+                                col_name
+                            ))
+                        })
+                })
+                .collect::<DFResult<Vec<_>>>()?
+        } else {
+            // Use legacy group_by strings
+            node.group_by
+                .iter()
+                .map(|col_name| {
+                    input_schema
+                        .fields()
+                        .iter()
+                        .position(|f| f.name() == col_name)
+                        .ok_or_else(|| {
+                            DataFusionError::Plan(format!(
+                                "Group-by column '{}' not found in input schema",
+                                col_name
+                            ))
+                        })
+                })
+                .collect::<DFResult<Vec<_>>>()?
+        };
+
+        let exec = SummaryInsertExec::new(
+            input,
+            value_col_idx,
+            group_by_indices,
+            sketch_spec.output_column_name.clone(),
+        );
+
+        Ok(Some(Arc::new(exec)))
+    }
+
+    fn plan_summary_infer(
+        &self,
+        node: &SummaryInfer,
+        physical_inputs: &[Arc<dyn ExecutionPlan>],
+    ) -> DFResult<Option<Arc<dyn ExecutionPlan>>> {
+        if physical_inputs.len() != 1 {
+            return Err(DataFusionError::Internal(
+                "SummaryInfer expects exactly one input".to_string(),
+            ));
+        }
+
+        let input = physical_inputs[0].clone();
+        let input_schema = input.schema();
+
+        // Only support single operation for now
+        if node.operations.len() != 1 {
+            return Err(DataFusionError::NotImplemented(
+                "SummaryInfer with multiple operations not yet supported".to_string(),
+            ));
+        }
+
+        let operation = node.operations[0].clone();
+        let output_name = node.output_names[0].clone();
+
+        // Only support CountDistinct for now
+        if !matches!(operation, InferOperation::CountDistinct) {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Infer operation {:?} not yet supported, only CountDistinct is implemented",
+                operation
+            )));
+        }
+
+        // Find sketch column index (last column with "sketch" in name, or Binary type)
+        let sketch_col_idx = input_schema
+            .fields()
+            .iter()
+            .rposition(|f| {
+                f.name().contains("sketch") || f.data_type() == &arrow::datatypes::DataType::Binary
+            })
+            .ok_or_else(|| {
+                DataFusionError::Plan("No sketch column found in input schema".to_string())
+            })?;
+
+        let exec = SummaryInferExec::new(input, sketch_col_idx, operation, output_name);
+
+        Ok(Some(Arc::new(exec)))
+    }
+}
diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/summary_infer_exec.rs b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/summary_infer_exec.rs
new file mode 100644
index 0000000..8549452
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/summary_infer_exec.rs
@@ -0,0 +1,282 @@
+// Physical execution plan for SummaryInfer (sketch querying).
+//
+// This ExecutionPlan reads sketch data and extracts results.
+// Currently only supports CountDistinct operation on HLL sketches.
+
+use std::any::Any;
+use std::fmt;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+
+use arrow::array::{Array, ArrayRef, BinaryArray, RecordBatch, UInt64Builder};
+use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use datafusion::common::{DataFusionError, Result as DFResult};
+use datafusion::execution::{SendableRecordBatchStream, TaskContext};
+use datafusion::physical_expr::{EquivalenceProperties, Partitioning};
+use datafusion::physical_plan::{
+    DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties,
+};
+use futures::Stream;
+
+use crate::sketch_operators::InferOperation;
+
+/// Physical execution plan for extracting results from HLL sketches.
+///
+/// Takes input batches with sketch columns and produces one row per input row with:
+/// - Group key columns (passed through)
+/// - Result column (e.g., UInt64 for CountDistinct)
+#[derive(Debug)]
+pub struct SummaryInferExec {
+    /// Input execution plan (typically SummaryInsertExec)
+    input: Arc<dyn ExecutionPlan>,
+
+    /// Index of the sketch column in input schema
+    sketch_col_idx: usize,
+
+    /// Infer operation to perform
+    operation: InferOperation,
+
+    /// Output column name
+    output_name: String,
+
+    /// Output schema
+    schema: SchemaRef,
+
+    /// Plan properties (cached)
+    properties: PlanProperties,
+}
+
+impl SummaryInferExec {
+    pub fn new(
+        input: Arc<dyn ExecutionPlan>,
+        sketch_col_idx: usize,
+        operation: InferOperation,
+        output_name: String,
+    ) -> Self {
+        let input_schema = input.schema();
+
+        // Build output schema: all columns except sketch column, plus result column
+        let mut fields: Vec<Field> = input_schema
+            .fields()
+            .iter()
+            .enumerate()
+            .filter(|(idx, _)| *idx != sketch_col_idx)
+            .map(|(_, f)| f.as_ref().clone())
+            .collect();
+
+        // Add result column based on operation type
+        let result_type = match &operation {
+            InferOperation::CountDistinct => DataType::UInt64,
+            InferOperation::Quantile(_) | InferOperation::Median => DataType::Float64,
+            _ => DataType::UInt64, // Default for unsupported ops
+        };
+
+        fields.push(Field::new(&output_name, result_type, false));
+
+        let schema = Arc::new(Schema::new(fields));
+
+        // Plan properties: same partitioning as input
+        let properties = PlanProperties::new(
+            EquivalenceProperties::new(schema.clone()),
+            Partitioning::UnknownPartitioning(1),
+            ExecutionMode::Bounded,
+        );
+
+        Self {
+            input,
+            sketch_col_idx,
+            operation,
+            output_name,
+            schema,
+            properties,
+        }
+    }
+}
+
+impl DisplayAs for SummaryInferExec {
+    fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result {
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                write!(
+                    f,
+                    "SummaryInferExec: sketch_col={}, op={}, output={}",
+                    self.sketch_col_idx, self.operation, self.output_name
+                )
+            }
+        }
+    }
+}
+
+impl ExecutionPlan for SummaryInferExec {
+    fn name(&self) -> &str {
+        "SummaryInferExec"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> DFResult<Arc<dyn ExecutionPlan>> {
+        if children.len() != 1 {
+            return Err(DataFusionError::Internal(
+                "SummaryInferExec expects exactly one child".to_string(),
+            ));
+        }
+        Ok(Arc::new(SummaryInferExec::new(
+            children[0].clone(),
+            self.sketch_col_idx,
+            self.operation.clone(),
+            self.output_name.clone(),
+        )))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> DFResult<SendableRecordBatchStream> {
+        if partition != 0 {
+            return Err(DataFusionError::Internal(format!(
+                "SummaryInferExec only supports partition 0, got {}",
+                partition
+            )));
+        }
+
+        let input_stream = self.input.execute(0, context)?;
+
+        let stream = SummaryInferStream::new(
+            input_stream,
+            self.schema.clone(),
+            self.sketch_col_idx,
+            self.operation.clone(),
+        );
+
+        Ok(Box::pin(stream))
+    }
+}
+
+/// Stream that transforms sketch batches into result batches.
+struct SummaryInferStream {
+    /// Input stream
+    input: SendableRecordBatchStream,
+
+    /// Output schema
+    schema: SchemaRef,
+
+    /// Sketch column index
+    sketch_col_idx: usize,
+
+    /// Operation to perform
+    operation: InferOperation,
+}
+
+impl SummaryInferStream {
+    fn new(
+        input: SendableRecordBatchStream,
+        schema: SchemaRef,
+        sketch_col_idx: usize,
+        operation: InferOperation,
+    ) -> Self {
+        Self {
+            input,
+            schema,
+            sketch_col_idx,
+            operation,
+        }
+    }
+
+    /// Transform a batch by extracting results from sketches.
+    fn transform_batch(&self, batch: &RecordBatch) -> DFResult<RecordBatch> {
+        let num_rows = batch.num_rows();
+
+        // Get the sketch column
+        let sketch_col = batch.column(self.sketch_col_idx);
+        let sketch_array = sketch_col
+            .as_any()
+            .downcast_ref::<BinaryArray>()
+            .ok_or_else(|| {
+                DataFusionError::Internal("Sketch column is not Binary type".to_string())
+            })?;
+
+        // Build result column based on operation
+        let result_col: ArrayRef = match &self.operation {
+            InferOperation::CountDistinct => {
+                let mut builder = UInt64Builder::with_capacity(num_rows);
+                for i in 0..num_rows {
+                    if sketch_array.is_null(i) {
+                        builder.append_null();
+                    } else {
+                        let sketch_bytes = sketch_array.value(i);
+                        // Deserialize and get count
+                        // Note: Our simple serialization format stores the count directly
+                        let count = if sketch_bytes.len() >= 9 {
+                            let count_bytes: [u8; 8] = sketch_bytes[1..9].try_into().unwrap();
+                            f64::from_le_bytes(count_bytes).round() as u64
+                        } else {
+                            0
+                        };
+                        builder.append_value(count);
+                    }
+                }
+                Arc::new(builder.finish())
+            }
+            _ => {
+                return Err(DataFusionError::NotImplemented(format!(
+                    "Infer operation {:?} not yet implemented",
+                    self.operation
+                )));
+            }
+        };
+
+        // Build output columns: all input columns except sketch, plus result
+        let mut columns: Vec<ArrayRef> = batch
+            .columns()
+            .iter()
+            .enumerate()
+            .filter(|(idx, _)| *idx != self.sketch_col_idx)
+            .map(|(_, col)| col.clone())
+            .collect();
+        columns.push(result_col);
+
+        RecordBatch::try_new(self.schema.clone(), columns)
+            .map_err(|e| DataFusionError::ArrowError(e, None))
+    }
+}
+
+impl Stream for SummaryInferStream {
+    type Item = DFResult<RecordBatch>;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        match Pin::new(&mut self.input).poll_next(cx) {
+            Poll::Ready(Some(Ok(batch))) => {
+                let result = self.transform_batch(&batch);
+                Poll::Ready(Some(result))
+            }
+            Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(e))),
+            Poll::Ready(None) => Poll::Ready(None),
+            Poll::Pending => Poll::Pending,
+        }
+    }
+}
+
+impl datafusion::physical_plan::RecordBatchStream for SummaryInferStream {
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}
diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/summary_insert_exec.rs b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/summary_insert_exec.rs
new file mode 100644
index 0000000..a62d8ba
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/summary_insert_exec.rs
@@ -0,0 +1,434 @@
+// Physical execution plan for SummaryInsert (sketch building).
+//
+// This ExecutionPlan consumes input batches and builds HLL sketches
+// for each group. Currently only supports HLL sketches.
+
+use std::any::Any;
+use std::collections::HashMap;
+use std::fmt;
+use std::sync::Arc;
+
+use arrow::array::{Array, ArrayRef, BinaryBuilder, RecordBatch, StringBuilder, UInt64Builder};
+use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use datafusion::common::{DataFusionError, Result as DFResult, ScalarValue};
+use datafusion::execution::{SendableRecordBatchStream, TaskContext};
+use datafusion::physical_expr::{EquivalenceProperties, Partitioning};
+use datafusion::physical_plan::{
+    DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties,
+};
+
+use super::hll::HllSketch;
+
+/// Physical execution plan for building HLL sketches.
+///
+/// Takes input batches and produces one row per group with:
+/// - Group key columns
+/// - Binary column containing serialized HLL sketch
+#[derive(Debug)]
+pub struct SummaryInsertExec {
+    /// Input execution plan
+    input: Arc<dyn ExecutionPlan>,
+
+    /// Index of the value column to sketch (in input schema)
+    value_col_idx: usize,
+
+    /// Indices of group-by columns (in input schema)
+    group_by_indices: Vec<usize>,
+
+    /// Name of the output sketch column
+    sketch_col_name: String,
+
+    /// Output schema
+    schema: SchemaRef,
+
+    /// Plan properties (cached)
+    properties: PlanProperties,
+}
+
+impl SummaryInsertExec {
+    pub fn new(
+        input: Arc<dyn ExecutionPlan>,
+        value_col_idx: usize,
+        group_by_indices: Vec<usize>,
+        sketch_col_name: String,
+    ) -> Self {
+        let input_schema = input.schema();
+
+        // Build output schema: group columns + sketch column
+        let mut fields: Vec<Field> = group_by_indices
+            .iter()
+            .map(|&idx| input_schema.field(idx).clone())
+            .collect();
+
+        // Add sketch column with the specified name
+        fields.push(Field::new(&sketch_col_name, DataType::Binary, false));
+
+        let schema = Arc::new(Schema::new(fields));
+
+        // Plan properties: single partition output, no ordering guarantees
+        let properties = PlanProperties::new(
+            EquivalenceProperties::new(schema.clone()),
+            Partitioning::UnknownPartitioning(1),
+            ExecutionMode::Bounded,
+        );
+
+        Self {
+            input,
+            value_col_idx,
+            group_by_indices,
+            sketch_col_name,
+            schema,
+            properties,
+        }
+    }
+
+    /// Extracts a group key from a row as a vector of ScalarValues.
+    fn extract_group_key(
+        batch: &RecordBatch,
+        row_idx: usize,
+        group_by_indices: &[usize],
+    ) -> Vec<ScalarValue> {
+        group_by_indices
+            .iter()
+            .map(|&col_idx| {
+                ScalarValue::try_from_array(batch.column(col_idx), row_idx)
+                    .unwrap_or(ScalarValue::Null)
+            })
+            .collect()
+    }
+
+    /// Extracts a value as bytes for hashing.
+    fn extract_value_bytes(array: &ArrayRef, row_idx: usize) -> Vec<u8> {
+        // Convert any value to string representation for hashing
+        // This is a hacky but universal approach
+        if array.is_null(row_idx) {
+            return b"__NULL__".to_vec();
+        }
+
+        // Use Arrow's display formatter to get string representation
+        let value = ScalarValue::try_from_array(array.as_ref(), row_idx)
+            .map(|v| v.to_string())
+            .unwrap_or_else(|_| "__ERROR__".to_string());
+
+        value.into_bytes()
+    }
+}
+
+impl DisplayAs for SummaryInsertExec {
+    fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result {
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                write!(
+                    f,
+                    "SummaryInsertExec: value_col={}, group_by={:?}",
+                    self.value_col_idx, self.group_by_indices
+                )
+            }
+        }
+    }
+}
+
+impl ExecutionPlan for SummaryInsertExec {
+    fn name(&self) -> &str {
+        "SummaryInsertExec"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> DFResult<Arc<dyn ExecutionPlan>> {
+        if children.len() != 1 {
+            return Err(DataFusionError::Internal(
+                "SummaryInsertExec expects exactly one child".to_string(),
+            ));
+        }
+        Ok(Arc::new(SummaryInsertExec::new(
+            children[0].clone(),
+            self.value_col_idx,
+            self.group_by_indices.clone(),
+            self.sketch_col_name.clone(),
+        )))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> DFResult<SendableRecordBatchStream> {
+        if partition != 0 {
+            return Err(DataFusionError::Internal(format!(
+                "SummaryInsertExec only supports partition 0, got {}",
+                partition
+            )));
+        }
+
+        // Get input stream
+        let input_stream = self.input.execute(0, context)?;
+
+        // Create the output stream
+        let schema = self.schema.clone();
+        let value_col_idx = self.value_col_idx;
+        let group_by_indices = self.group_by_indices.clone();
+
+        let stream =
+            SummaryInsertStream::new(input_stream, schema, value_col_idx, group_by_indices);
+
+        Ok(Box::pin(stream))
+    }
+}
+
+/// Stream that consumes input batches and produces aggregated sketch results.
+struct SummaryInsertStream {
+    /// Input stream
+    input: SendableRecordBatchStream,
+
+    /// Output schema
+    schema: SchemaRef,
+
+    /// Value column index
+    value_col_idx: usize,
+
+    /// Group-by column indices
+    group_by_indices: Vec<usize>,
+
+    /// Accumulated sketches per group
+    groups: HashMap<Vec<ScalarValue>, HllSketch>,
+
+    /// Whether we've finished consuming input
+    finished_input: bool,
+
+    /// Whether we've emitted the final result
+    emitted_result: bool,
+}
+
+impl SummaryInsertStream {
+    fn new(
+        input: SendableRecordBatchStream,
+        schema: SchemaRef,
+        value_col_idx: usize,
+        group_by_indices: Vec<usize>,
+    ) -> Self {
+        Self {
+            input,
+            schema,
+            value_col_idx,
+            group_by_indices,
+            groups: HashMap::new(),
+            finished_input: false,
+            emitted_result: false,
+        }
+    }
+
+    /// Process a batch of input data.
+    fn process_batch(&mut self, batch: &RecordBatch) {
+        let value_array = batch.column(self.value_col_idx);
+        let num_rows = batch.num_rows();
+
+        for row_idx in 0..num_rows {
+            // Extract group key
+            let group_key =
+                SummaryInsertExec::extract_group_key(batch, row_idx, &self.group_by_indices);
+
+            // Get or create HLL for this group
+            let hll = self.groups.entry(group_key).or_default();
+
+            // Extract value and insert into HLL
+            let value_bytes = SummaryInsertExec::extract_value_bytes(value_array, row_idx);
+            hll.insert_bytes(&value_bytes);
+        }
+    }
+
+    /// Build the final output batch from accumulated sketches.
+    fn build_output(&mut self) -> DFResult<RecordBatch> {
+        let num_groups = self.groups.len();
+
+        // Build group key columns
+        let mut group_builders: Vec<ScalarArrayBuilder> = self
+            .schema
+            .fields()
+            .iter()
+            .take(self.group_by_indices.len())
+            .map(|field| ScalarArrayBuilder::new(field.data_type(), num_groups))
+            .collect();
+
+        // Build sketch column
+        let mut sketch_builder = BinaryBuilder::with_capacity(num_groups, num_groups * 16);
+
+        // Populate arrays
+        for (group_key, hll) in &mut self.groups {
+            // Add group key values
+            for (idx, scalar) in group_key.iter().enumerate() {
+                group_builders[idx].append(scalar);
+            }
+
+            // Add serialized sketch
+            let sketch_bytes = hll.to_bytes();
+            sketch_builder.append_value(&sketch_bytes);
+        }
+
+        // Finish building arrays
+        let mut columns: Vec<ArrayRef> = group_builders.iter_mut().map(|b| b.finish()).collect();
+        columns.push(Arc::new(sketch_builder.finish()));
+
+        RecordBatch::try_new(self.schema.clone(), columns)
+            .map_err(|e| DataFusionError::ArrowError(e, None))
+    }
+}
+
+use futures::Stream;
+use std::pin::Pin;
+use std::task::{Context, Poll};
+
+impl Stream for SummaryInsertStream {
+    type Item = DFResult<RecordBatch>;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        // If we've emitted the result, we're done
+        if self.emitted_result {
+            return Poll::Ready(None);
+        }
+
+        // Consume all input batches first
+        if !self.finished_input {
+            loop {
+                match Pin::new(&mut self.input).poll_next(cx) {
+                    Poll::Ready(Some(Ok(batch))) => {
+                        self.process_batch(&batch);
+                    }
+                    Poll::Ready(Some(Err(e))) => {
+                        return Poll::Ready(Some(Err(e)));
+                    }
+                    Poll::Ready(None) => {
+                        self.finished_input = true;
+                        break;
+                    }
+                    Poll::Pending => {
+                        return Poll::Pending;
+                    }
+                }
+            }
+        }
+
+        // Build and emit the final result
+        self.emitted_result = true;
+
+        // Handle case with no groups
+        if self.groups.is_empty() {
+            return Poll::Ready(None);
+        }
+
+        let batch = self.build_output();
+        Poll::Ready(Some(batch))
+    }
+}
+
+impl datafusion::physical_plan::RecordBatchStream for SummaryInsertStream {
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}
+
+// Helper enum for building arrays dynamically
+enum ScalarArrayBuilder {
+    Utf8(StringBuilder),
+    UInt64(UInt64Builder),
+    Int64(arrow::array::Int64Builder),
+    Float64(arrow::array::Float64Builder),
+    Int32(arrow::array::Int32Builder),
+    UInt32(arrow::array::UInt32Builder),
+}
+
+impl ScalarArrayBuilder {
+    fn new(data_type: &DataType, capacity: usize) -> Self {
+        match data_type {
+            DataType::Utf8 => {
+                ScalarArrayBuilder::Utf8(StringBuilder::with_capacity(capacity, capacity * 32))
+            }
+            DataType::UInt64 => ScalarArrayBuilder::UInt64(UInt64Builder::with_capacity(capacity)),
+            DataType::Int64 => {
+                ScalarArrayBuilder::Int64(arrow::array::Int64Builder::with_capacity(capacity))
+            }
+            DataType::Float64 => {
+                ScalarArrayBuilder::Float64(arrow::array::Float64Builder::with_capacity(capacity))
+            }
+            DataType::Int32 => {
+                ScalarArrayBuilder::Int32(arrow::array::Int32Builder::with_capacity(capacity))
+            }
+            DataType::UInt32 => {
+                ScalarArrayBuilder::UInt32(arrow::array::UInt32Builder::with_capacity(capacity))
+            }
+            // For unsupported types, fall back to string representation
+            _ => ScalarArrayBuilder::Utf8(StringBuilder::with_capacity(capacity, capacity * 32)),
+        }
+    }
+
+    fn append(&mut self, scalar: &ScalarValue) {
+        match (self, scalar) {
+            (ScalarArrayBuilder::Utf8(b), ScalarValue::Utf8(v)) => match v {
+                Some(s) => b.append_value(s),
+                None => b.append_null(),
+            },
+            (ScalarArrayBuilder::UInt64(b), ScalarValue::UInt64(v)) => match v {
+                Some(val) => b.append_value(*val),
+                None => b.append_null(),
+            },
+            (ScalarArrayBuilder::Int64(b), ScalarValue::Int64(v)) => match v {
+                Some(val) => b.append_value(*val),
+                None => b.append_null(),
+            },
+            (ScalarArrayBuilder::Float64(b), ScalarValue::Float64(v)) => match v {
+                Some(val) => b.append_value(*val),
+                None => b.append_null(),
+            },
+            (ScalarArrayBuilder::Int32(b), ScalarValue::Int32(v)) => match v {
+                Some(val) => b.append_value(*val),
+                None => b.append_null(),
+            },
+            (ScalarArrayBuilder::UInt32(b), ScalarValue::UInt32(v)) => match v {
+                Some(val) => b.append_value(*val),
+                None => b.append_null(),
+            },
+            // Fallback: convert to string for Utf8 builder
+            (ScalarArrayBuilder::Utf8(b), scalar) => {
+                if scalar.is_null() {
+                    b.append_null();
+                } else {
+                    b.append_value(scalar.to_string());
+                }
+            }
+            // For type mismatches with non-Utf8 builders, append null
+            (ScalarArrayBuilder::UInt64(b), _) => b.append_null(),
+            (ScalarArrayBuilder::Int64(b), _) => b.append_null(),
+            (ScalarArrayBuilder::Float64(b), _) => b.append_null(),
+            (ScalarArrayBuilder::Int32(b), _) => b.append_null(),
+            (ScalarArrayBuilder::UInt32(b), _) => b.append_null(),
+        }
+    }
+
+    fn finish(&mut self) -> ArrayRef {
+        match self {
+            ScalarArrayBuilder::Utf8(b) => Arc::new(b.finish()),
+            ScalarArrayBuilder::UInt64(b) => Arc::new(b.finish()),
+            ScalarArrayBuilder::Int64(b) => Arc::new(b.finish()),
+            ScalarArrayBuilder::Float64(b) => Arc::new(b.finish()),
+            ScalarArrayBuilder::Int32(b) => Arc::new(b.finish()),
+            ScalarArrayBuilder::UInt32(b) => Arc::new(b.finish()),
+        }
+    }
+}
diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/src/sketch_operators.rs b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/sketch_operators.rs
new file mode 100644
index 0000000..64fe394
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/sketch_operators.rs
@@ -0,0 +1,1630 @@
+// Sketch-based query plan operators for DataFusion
+//
+// This module defines custom logical plan nodes for sketch-based query optimization.
+// These operators support exploring different sketch-based execution strategies.
+#![allow(deprecated)]
+
+#[allow(deprecated)]
+use datafusion::arrow::datatypes::{DataType, Field};
+use datafusion::common::{DFSchema, DFSchemaRef, Result as DFResult};
+use datafusion::error::DataFusionError;
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+use std::cmp::Ordering;
+use std::collections::BTreeMap; // BTreeMap instead of HashMap (can derive Hash)
+use std::fmt;
+use std::hash::{Hash, Hasher};
+use std::sync::Arc;
+
+// ============================================================================
+// TypedExpr - Expression with pre-resolved type
+// ============================================================================
+
+/// An expression paired with its pre-resolved data type.
+///
+/// This is used to preserve type information when expressions are passed through
+/// plan nodes that may not have access to the original schema needed to resolve types.
+/// For example, SummaryInfer's input is a SummaryInsert which may not include
+/// the columns referenced in GROUP BY expressions (especially for Hydra strategy).
+#[derive(Debug, Clone)]
+pub struct TypedExpr {
+    pub expr: Expr,
+    pub data_type: DataType,
+}
+
+impl TypedExpr {
+    pub fn new(expr: Expr, data_type: DataType) -> Self {
+        Self { expr, data_type }
+    }
+}
+
+// Manual trait implementations since Expr implements these traits
+impl PartialEq for TypedExpr {
+    fn eq(&self, other: &Self) -> bool {
+        self.expr == other.expr && self.data_type == other.data_type
+    }
+}
+
+impl Eq for TypedExpr {}
+
+impl Hash for TypedExpr {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.expr.hash(state);
+        self.data_type.hash(state);
+    }
+}
+
+// ============================================================================
+// Sketch Types
+// ============================================================================
+
+/// Types of sketches/summaries supported for query processing
+/// Also aliased as SummaryType for clarity (includes both sketches and exact aggregators)
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub enum SketchType {
+    // ========================================================================
+    // Exact aggregators (non-sketch, single population)
+    // ========================================================================
+    Sum,      // Exact sum accumulator
+    Increase, // Counter increase tracking
+    MinMax,   // Min/max values
+
+    MultipleSum,
+    MultipleIncrease,
+    MultipleMinMax,
+
+    // ========================================================================
+    // Set aggregators
+    // ========================================================================
+    SetAggregator,      // Exact set of group keys (HashSet-based)
+    DeltaSetAggregator, // Set aggregation with separate key tracking
+
+    // ========================================================================
+    // COUNT DISTINCT sketches
+    // ========================================================================
+    HLL,         // HyperLogLog
+    UltraLogLog, // UltraLogLog (improved HLL)
+    HydraHLL,    // HyperLogLog with multi-population support
+
+    // ========================================================================
+    // Quantile sketches
+    // ========================================================================
+    KLL,      // KLL sketch
+    TDigest,  // T-Digest
+    HydraKLL, // KLL with multi-population support
+
+    // ========================================================================
+    // Heavy hitters / TOP K
+    // ========================================================================
+    SpaceSaving,   // Space-Saving algorithm
+    FrequentItems, // Frequent items sketch
+
+    // ========================================================================
+    // Frequency estimation
+    // ========================================================================
+    CountMinSketch, // Count-Min Sketch
+    CountSketch,    // Count Sketch
+
+    // ========================================================================
+    // General purpose
+    // ========================================================================
+    Sampling, // Reservoir sampling
+}
+
+/// Type alias for clarity - SummaryType includes both sketches and exact aggregators
+pub type SummaryType = SketchType;
+
+impl fmt::Display for SketchType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            // Exact aggregators
+            SketchType::Sum => write!(f, "Sum"),
+            SketchType::Increase => write!(f, "Increase"),
+            SketchType::MinMax => write!(f, "MinMax"),
+            SketchType::SetAggregator => write!(f, "SetAggregator"),
+            SketchType::DeltaSetAggregator => write!(f, "DeltaSetAggregator"),
+            SketchType::MultipleSum => write!(f, "MultipleSum"),
+            SketchType::MultipleIncrease => write!(f, "MultipleIncrease"),
+            SketchType::MultipleMinMax => write!(f, "MultipleMinMax"),
+            // Sketches
+            SketchType::HLL => write!(f, "HLL"),
+            SketchType::UltraLogLog => write!(f, "UltraLogLog"),
+            SketchType::HydraHLL => write!(f, "HydraHLL"),
+            SketchType::KLL => write!(f, "KLL"),
+            SketchType::TDigest => write!(f, "TDigest"),
+            SketchType::HydraKLL => write!(f, "HydraKLL"),
+            SketchType::SpaceSaving => write!(f, "SpaceSaving"),
+            SketchType::FrequentItems => write!(f, "FrequentItems"),
+            SketchType::CountMinSketch => write!(f, "CountMinSketch"),
+            SketchType::CountSketch => write!(f, "CountSketch"),
+            SketchType::Sampling => write!(f, "Sampling"),
+        }
+    }
+}
+
+impl SketchType {
+    /// Check if this sketch type supports multi-population (Hydra-style)
+    pub fn is_hydra(&self) -> bool {
+        matches!(self, SketchType::HydraHLL | SketchType::HydraKLL)
+    }
+
+    /// Get the base sketch type (non-Hydra version)
+    pub fn base_type(&self) -> SketchType {
+        match self {
+            SketchType::HydraHLL => SketchType::HLL,
+            SketchType::HydraKLL => SketchType::KLL,
+            other => other.clone(),
+        }
+    }
+}
+
+// ============================================================================
+// Inference Operations
+// ============================================================================
+
+/// Operations that can be performed on sketches/summaries to extract results
+/// Note: Uses simplified types (strings instead of Expr) for DataFusion integration
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub enum InferOperation {
+    // ========================================================================
+    // Exact aggregator extraction operations
+    // ========================================================================
+    /// Extract sum value from Sum/HydraSum accumulator
+    ExtractSum,
+
+    /// Extract count value from an accumulator
+    ExtractCount,
+
+    /// Extract minimum value from MinMax/HydraMinMax accumulator
+    ExtractMin,
+
+    /// Extract maximum value from MinMax/HydraMinMax accumulator
+    ExtractMax,
+
+    /// Extract increase value from Increase/HydraIncrease accumulator
+    ExtractIncrease,
+
+    /// Extract rate (increase / time_range) from Increase accumulator
+    ExtractRate,
+
+    // ========================================================================
+    // Sketch operations
+    // ========================================================================
+    /// COUNT(DISTINCT column)
+    CountDistinct,
+
+    /// Quantile/percentile estimation
+    /// Stores quantile as integer (0-10000) for 4 decimal places: 0.9500 = 9500
+    Quantile(u16),
+
+    /// Median (equivalent to Quantile(0.5))
+    Median,
+
+    /// TOP K items
+    TopK(usize),
+
+    /// Frequency-based COUNT(*) aggregation with GROUP BY
+    /// Queries frequency sketch to get count for each group key
+    FrequencyCount,
+
+    /// Frequency-based SUM(column) aggregation with GROUP BY
+    /// Queries frequency sketch to get sum for each group key
+    FrequencySum,
+
+    /// Frequency-based AVG(column) aggregation with GROUP BY
+    /// Computed as SUM(column) / COUNT(*) using two frequency sketches
+    FrequencyAvg,
+
+    /// Frequency estimate for a specific value (stored as string)
+    FrequencyEstimate(String),
+
+    /// Get all frequent items above threshold (stored as integer, 0-10000)
+    FrequentItems(u16),
+
+    /// Enumerate set contents (for SetAggregator)
+    /// Returns all unique values seen in the set
+    EnumerateSet,
+}
+
+impl InferOperation {
+    /// Create a Quantile operation from a float (0.0 to 1.0)
+    pub fn quantile(p: f64) -> Self {
+        InferOperation::Quantile((p * 10000.0).round() as u16)
+    }
+
+    /// Get the quantile value as f64
+    pub fn quantile_value(&self) -> Option<f64> {
+        match self {
+            InferOperation::Quantile(p) => Some(*p as f64 / 10000.0),
+            InferOperation::Median => Some(0.5),
+            _ => None,
+        }
+    }
+
+    /// Create a FrequentItems operation from a float threshold
+    pub fn frequent_items(threshold: f64) -> Self {
+        InferOperation::FrequentItems((threshold * 10000.0).round() as u16)
+    }
+
+    /// Get the threshold value as f64
+    pub fn threshold_value(&self) -> Option<f64> {
+        match self {
+            InferOperation::FrequentItems(t) => Some(*t as f64 / 10000.0),
+            _ => None,
+        }
+    }
+}
+
+impl fmt::Display for InferOperation {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            // Exact aggregator extractions
+            InferOperation::ExtractSum => write!(f, "EXTRACT_SUM"),
+            InferOperation::ExtractCount => write!(f, "EXTRACT_COUNT"),
+            InferOperation::ExtractMin => write!(f, "EXTRACT_MIN"),
+            InferOperation::ExtractMax => write!(f, "EXTRACT_MAX"),
+            InferOperation::ExtractIncrease => write!(f, "EXTRACT_INCREASE"),
+            InferOperation::ExtractRate => write!(f, "EXTRACT_RATE"),
+            // Sketch operations
+            InferOperation::CountDistinct => write!(f, "COUNT_DISTINCT"),
+            InferOperation::Quantile(p) => write!(f, "QUANTILE({:.4})", *p as f64 / 10000.0),
+            InferOperation::Median => write!(f, "MEDIAN"),
+            InferOperation::TopK(k) => write!(f, "TOPK({})", k),
+            InferOperation::FrequencyCount => write!(f, "FREQ_COUNT"),
+            InferOperation::FrequencySum => write!(f, "FREQ_SUM"),
+            InferOperation::FrequencyAvg => write!(f, "FREQ_AVG"),
+            InferOperation::FrequencyEstimate(value) => write!(f, "FREQ_EST({})", value),
+            InferOperation::FrequentItems(threshold) => {
+                write!(f, "FREQ_ITEMS({:.4})", *threshold as f64 / 10000.0)
+            }
+            InferOperation::EnumerateSet => write!(f, "ENUM_SET"),
+        }
+    }
+}
+
+// ============================================================================
+// Grouping Strategy
+// ============================================================================
+
+/// Strategy for handling GROUP BY queries
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum GroupingStrategy {
+    /// One sketch per group (filter-based, computed separately)
+    PerGroup,
+
+    /// Single Hydra-style sketch containing all groups
+    Hydra,
+
+    /// No grouping (simple aggregation)
+    None,
+}
+
+// ============================================================================
+// Sketch Metadata
+// ============================================================================
+
+/// Metadata for identifying and loading sketches
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct SketchMetadata {
+    pub table_name: String,
+    pub column_name: String,
+    pub sketch_type: SketchType,
+    pub filter_predicate: Option<String>,
+    pub key_columns: Vec<String>, // For Hydra sketches
+}
+
+impl fmt::Display for SketchMetadata {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "{}.{}.{}",
+            self.table_name, self.column_name, self.sketch_type
+        )?;
+        if let Some(filter) = &self.filter_predicate {
+            write!(f, " WHERE {}", filter)?;
+        }
+        if !self.key_columns.is_empty() {
+            write!(f, " KEY BY [{}]", self.key_columns.join(", "))?;
+        }
+        Ok(())
+    }
+}
+
+// ============================================================================
+// Sketch Specification
+// ============================================================================
+
+/// Specification for a single sketch to create
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct SketchSpec {
+    pub value_column: Option<String>,
+    pub sketch_type: SketchType,
+    pub output_column_name: String, // e.g., "host_sketch", "cpu_sketch"
+}
+
+// ============================================================================
+// SummaryInsert - Compute sketch from raw data
+// ============================================================================
+
+/// Logical plan node: Compute a sketch from raw data
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct SummaryInsert {
+    /// Input data source
+    pub input: Arc<LogicalPlan>,
+
+    /// Sketches to create (one or more)
+    pub sketches: Vec<SketchSpec>,
+
+    /// GROUP BY columns for per-group strategy (columns appear in output schema)
+    /// Legacy field - use group_by_exprs for computed expressions
+    pub group_by: Vec<String>,
+
+    /// Key columns for Hydra strategy (columns embedded in sketch, NOT in output schema)
+    /// Legacy field - use key_column_exprs for computed expressions
+    pub key_columns: Vec<String>,
+
+    /// GROUP BY expressions with pre-resolved types for per-group strategy
+    /// When non-empty, takes precedence over group_by in compute_schema
+    pub group_by_exprs: Vec<TypedExpr>,
+
+    /// Key column expressions with pre-resolved types for Hydra strategy
+    /// These are embedded in the sketch, not in output schema
+    pub key_column_exprs: Vec<TypedExpr>,
+
+    /// Optional parameters (e.g., HLL precision, KLL k value)
+    /// Using BTreeMap instead of HashMap so it can derive Hash
+    pub parameters: BTreeMap<String, String>,
+
+    /// Cached output schema
+    schema: DFSchemaRef,
+}
+
+impl SummaryInsert {
+    /// Create a new SummaryInsert with multiple sketches
+    pub fn new(input: Arc<LogicalPlan>, sketches: Vec<SketchSpec>) -> DFResult<Self> {
+        if sketches.is_empty() {
+            return Err(DataFusionError::Plan(
+                "SummaryInsert requires at least one sketch".to_string(),
+            ));
+        }
+
+        let schema = Self::compute_schema(&input, &sketches, &[], &[], &[])?;
+        Ok(Self {
+            input,
+            sketches,
+            group_by: vec![],
+            key_columns: vec![],
+            group_by_exprs: vec![],
+            key_column_exprs: vec![],
+            parameters: BTreeMap::new(),
+            schema,
+        })
+    }
+
+    /// Helper constructor for single sketch (backward compatibility)
+    pub fn single(
+        input: Arc<LogicalPlan>,
+        value_column: Option<String>,
+        sketch_type: SketchType,
+    ) -> DFResult<Self> {
+        let output_column_name = match &value_column {
+            Some(col) => format!("{}_sketch", col),
+            None => "value_sketch".to_string(),
+        };
+
+        Self::new(
+            input,
+            vec![SketchSpec {
+                value_column,
+                sketch_type,
+                output_column_name,
+            }],
+        )
+    }
+
+    pub fn with_group_by(mut self, group_by: Vec<String>) -> DFResult<Self> {
+        self.schema = Self::compute_schema(
+            &self.input,
+            &self.sketches,
+            &group_by,
+            &self.key_columns,
+            &self.group_by_exprs,
+        )?;
+        self.group_by = group_by;
+        Ok(self)
+    }
+
+    pub fn with_key_columns(mut self, key_columns: Vec<String>) -> DFResult<Self> {
+        self.schema = Self::compute_schema(
+            &self.input,
+            &self.sketches,
+            &self.group_by,
+            &key_columns,
+            &self.group_by_exprs,
+        )?;
+        self.key_columns = key_columns;
+        Ok(self)
+    }
+
+    /// Set GROUP BY expressions with pre-resolved types (supports computed expressions)
+    pub fn with_group_by_exprs(mut self, group_by_exprs: Vec<TypedExpr>) -> DFResult<Self> {
+        self.schema = Self::compute_schema(
+            &self.input,
+            &self.sketches,
+            &self.group_by,
+            &self.key_columns,
+            &group_by_exprs,
+        )?;
+        self.group_by_exprs = group_by_exprs;
+        Ok(self)
+    }
+
+    /// Set key column expressions with pre-resolved types for Hydra strategy
+    /// Note: These are embedded in the sketch, not in the output schema
+    pub fn with_key_column_exprs(mut self, key_column_exprs: Vec<TypedExpr>) -> DFResult<Self> {
+        // key_column_exprs don't affect output schema, but store them for later use
+        self.key_column_exprs = key_column_exprs;
+        Ok(self)
+    }
+
+    pub fn with_parameters(mut self, parameters: BTreeMap<String, String>) -> Self {
+        self.parameters = parameters;
+        self
+    }
+
+    /// Compute output schema based on grouping strategy
+    fn compute_schema(
+        input: &Arc<LogicalPlan>,
+        sketches: &[SketchSpec],
+        group_by: &[String],
+        _key_columns: &[String],
+        group_by_exprs: &[TypedExpr],
+    ) -> DFResult<DFSchemaRef> {
+        let input_schema = input.schema();
+        let mut qualified_fields = Vec::new();
+
+        // For per-group strategy: include group columns in output with their qualifications
+        // This matches vanilla DataFusion Aggregate behavior
+        //
+        // Prefer group_by_exprs (TypedExpr) over group_by (String) if available
+        if !group_by_exprs.is_empty() {
+            // Use TypedExpr - supports computed expressions like date_part(), CASE, etc.
+            for typed_expr in group_by_exprs {
+                // For simple columns, use col.name as field name and col.relation as qualifier
+                // For computed expressions, use schema_name() with no qualifier
+                let (qualifier, field_name) = if let Expr::Column(col) = &typed_expr.expr {
+                    (col.relation.clone(), col.name.clone())
+                } else {
+                    (None, typed_expr.expr.schema_name().to_string())
+                };
+                qualified_fields.push((
+                    qualifier,
+                    Arc::new(Field::new(&field_name, typed_expr.data_type.clone(), true)),
+                ));
+            }
+        } else if !group_by.is_empty() {
+            // Fallback to legacy string-based group_by
+            for col_name in group_by {
+                // Get both qualifier and field to preserve qualification
+                let (qualifier, field) = input_schema
+                    .qualified_field_with_unqualified_name(col_name)
+                    .map_err(|e| {
+                        DataFusionError::Plan(format!(
+                            "Group column '{}' not found in input schema: {}",
+                            col_name, e
+                        ))
+                    })?;
+                qualified_fields.push((qualifier.cloned(), Arc::new(field.clone())));
+            }
+        }
+
+        // For Hydra strategy: key columns are embedded in sketch, not in output
+        // (no fields added here - neither key_columns nor key_column_exprs affect output)
+
+        // Add sketch columns (Binary type, unqualified)
+        // Create one column per sketch specification
+        for sketch_spec in sketches {
+            qualified_fields.push((
+                None,
+                Arc::new(Field::new(
+                    &sketch_spec.output_column_name,
+                    DataType::Binary,
+                    false,
+                )),
+            ));
+        }
+
+        // Create DFSchema from qualified fields
+        let schema = DFSchema::new_with_metadata(qualified_fields, Default::default())
+            .map_err(|e| DataFusionError::Plan(format!("Failed to create schema: {}", e)))?;
+
+        Ok(Arc::new(schema))
+    }
+}
+
+impl PartialOrd for SummaryInsert {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        // Compare by sketches, then grouping, then parameters, then input
+        match self.sketches.partial_cmp(&other.sketches) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.group_by.partial_cmp(&other.group_by) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.key_columns.partial_cmp(&other.key_columns) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.parameters.partial_cmp(&other.parameters) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.input.partial_cmp(&other.input) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        // Compare schemas by pointer (Arc comparison)
+        Some(Arc::as_ptr(&self.schema).cmp(&Arc::as_ptr(&other.schema)))
+    }
+}
+
+impl UserDefinedLogicalNodeCore for SummaryInsert {
+    fn name(&self) -> &str {
+        "SummaryInsert"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![self.input.as_ref()]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.sketches.len() == 1 {
+            // Single sketch: show simplified format
+            let sketch = &self.sketches[0];
+            write!(f, "SummaryInsert: sketch_type={}", sketch.sketch_type)?;
+            if let Some(col) = &sketch.value_column {
+                write!(f, ", value_column={}", col)?;
+            }
+        } else {
+            // Multiple sketches: show as array
+            write!(f, "SummaryInsert: sketches=[")?;
+            for (i, sketch) in self.sketches.iter().enumerate() {
+                if i > 0 {
+                    write!(f, ", ")?;
+                }
+                write!(f, "{{type={}", sketch.sketch_type)?;
+                if let Some(col) = &sketch.value_column {
+                    write!(f, ", column={}", col)?;
+                }
+                write!(f, "}}")?;
+            }
+            write!(f, "]")?;
+        }
+        if !self.group_by.is_empty() {
+            write!(f, ", group_by=[{}]", self.group_by.join(", "))?;
+        }
+        if !self.key_columns.is_empty() {
+            write!(f, ", key_columns=[{}]", self.key_columns.join(", "))?;
+        }
+        Ok(())
+    }
+
+    fn from_template(&self, _exprs: &[Expr], inputs: &[LogicalPlan]) -> Self {
+        let input = Arc::new(inputs[0].clone());
+        // Recompute schema with new input
+        let schema = Self::compute_schema(
+            &input,
+            &self.sketches,
+            &self.group_by,
+            &self.key_columns,
+            &self.group_by_exprs,
+        )
+        .unwrap_or_else(|_| self.schema.clone());
+
+        Self {
+            input,
+            sketches: self.sketches.clone(),
+            group_by: self.group_by.clone(),
+            key_columns: self.key_columns.clone(),
+            group_by_exprs: self.group_by_exprs.clone(),
+            key_column_exprs: self.key_column_exprs.clone(),
+            parameters: self.parameters.clone(),
+            schema,
+        }
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> DFResult<Self> {
+        Ok(self.from_template(&_exprs, &inputs))
+    }
+}
+
+// ============================================================================
+// SummaryRead - Load pre-computed sketch
+// ============================================================================
+
+/// Logical plan node: Load a pre-computed sketch
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct SummaryRead {
+    /// Metadata to identify the sketch
+    pub metadata: SketchMetadata,
+
+    /// Optional: Direct sketch ID if known
+    pub sketch_id: Option<String>,
+
+    /// Schema (placeholder for now)
+    schema: DFSchemaRef,
+}
+
+impl SummaryRead {
+    pub fn new(metadata: SketchMetadata, schema: DFSchemaRef) -> Self {
+        Self {
+            metadata,
+            sketch_id: None,
+            schema,
+        }
+    }
+
+    pub fn with_sketch_id(mut self, sketch_id: String) -> Self {
+        self.sketch_id = Some(sketch_id);
+        self
+    }
+}
+
+impl PartialOrd for SummaryRead {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        match self.metadata.partial_cmp(&other.metadata) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.sketch_id.partial_cmp(&other.sketch_id) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        // DFSchemaRef is Arc<DFSchema>, and DFSchema likely doesn't implement PartialOrd
+        // So we compare by pointer
+        Some(Arc::as_ptr(&self.schema).cmp(&Arc::as_ptr(&other.schema)))
+    }
+}
+
+impl UserDefinedLogicalNodeCore for SummaryRead {
+    fn name(&self) -> &str {
+        "SummaryRead"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![] // No inputs - reads from storage
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "SummaryRead: {}", self.metadata)?;
+        if let Some(id) = &self.sketch_id {
+            write!(f, " [id={}]", id)?;
+        }
+        Ok(())
+    }
+
+    fn from_template(&self, _exprs: &[Expr], _inputs: &[LogicalPlan]) -> Self {
+        self.clone()
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> DFResult<Self> {
+        Ok(self.from_template(&_exprs, &inputs))
+    }
+}
+
+// ============================================================================
+// SummaryMerge - Merge multiple sketches
+// ============================================================================
+
+/// Logical plan node: Merge two or more sketches
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct SummaryMerge {
+    /// Left sketch source
+    pub left: Arc<LogicalPlan>,
+
+    /// Right sketch source
+    pub right: Arc<LogicalPlan>,
+
+    /// Sketch type (for validation)
+    pub sketch_type: SketchType,
+}
+
+impl SummaryMerge {
+    pub fn new(left: Arc<LogicalPlan>, right: Arc<LogicalPlan>, sketch_type: SketchType) -> Self {
+        Self {
+            left,
+            right,
+            sketch_type,
+        }
+    }
+}
+
+impl PartialOrd for SummaryMerge {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        match self.sketch_type.partial_cmp(&other.sketch_type) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.left.partial_cmp(&other.left) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        self.right.partial_cmp(&other.right)
+    }
+}
+
+impl UserDefinedLogicalNodeCore for SummaryMerge {
+    fn name(&self) -> &str {
+        "SummaryMerge"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![self.left.as_ref(), self.right.as_ref()]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        // Return left schema (should be compatible with right)
+        self.left.schema()
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "SummaryMerge: sketch_type={}", self.sketch_type)
+    }
+
+    fn from_template(&self, _exprs: &[Expr], inputs: &[LogicalPlan]) -> Self {
+        Self {
+            left: Arc::new(inputs[0].clone()),
+            right: Arc::new(inputs[1].clone()),
+            sketch_type: self.sketch_type.clone(),
+        }
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> DFResult<Self> {
+        Ok(self.from_template(&_exprs, &inputs))
+    }
+}
+
+// ============================================================================
+// SummarySubtract - Subtract one sketch from another
+// ============================================================================
+
+/// Logical plan node: Subtract one sketch from another (for sliding windows)
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct SummarySubtract {
+    /// Sketch to subtract FROM (minuend)
+    pub minuend: Arc<LogicalPlan>,
+
+    /// Sketch to subtract (subtrahend)
+    pub subtrahend: Arc<LogicalPlan>,
+
+    /// Sketch type (for validation)
+    pub sketch_type: SketchType,
+}
+
+impl SummarySubtract {
+    pub fn new(
+        minuend: Arc<LogicalPlan>,
+        subtrahend: Arc<LogicalPlan>,
+        sketch_type: SketchType,
+    ) -> Self {
+        Self {
+            minuend,
+            subtrahend,
+            sketch_type,
+        }
+    }
+}
+
+impl PartialOrd for SummarySubtract {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        match self.sketch_type.partial_cmp(&other.sketch_type) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.minuend.partial_cmp(&other.minuend) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        self.subtrahend.partial_cmp(&other.subtrahend)
+    }
+}
+
+impl UserDefinedLogicalNodeCore for SummarySubtract {
+    fn name(&self) -> &str {
+        "SummarySubtract"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![self.minuend.as_ref(), self.subtrahend.as_ref()]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        self.minuend.schema()
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "SummarySubtract: sketch_type={}", self.sketch_type)
+    }
+
+    fn from_template(&self, _exprs: &[Expr], inputs: &[LogicalPlan]) -> Self {
+        Self {
+            minuend: Arc::new(inputs[0].clone()),
+            subtrahend: Arc::new(inputs[1].clone()),
+            sketch_type: self.sketch_type.clone(),
+        }
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> DFResult<Self> {
+        Ok(self.from_template(&_exprs, &inputs))
+    }
+}
+
+// ============================================================================
+// SummaryInfer - Extract result from sketch
+// ============================================================================
+
+/// Logical plan node: Extract a result from a sketch
+#[derive(Debug, Clone)]
+pub struct SummaryInfer {
+    /// Input sketch source
+    pub input: Arc<LogicalPlan>,
+
+    /// Optional second input for keys enumeration (multi-population accumulators).
+    /// When present, SummaryInferExec deserializes the value sketch once per spatial group
+    /// and queries it N times (once per sub-key from the keys input).
+    pub keys_input: Option<Arc<LogicalPlan>>,
+
+    /// Operations to perform on the sketch(es)
+    /// For single sketch with multiple operations: operations map to sketch in order
+    pub operations: Vec<InferOperation>,
+
+    /// Output column names (one per operation)
+    pub output_names: Vec<String>,
+
+    /// Optional group key columns for Hydra sketches (column names, not full Expr)
+    /// Legacy field - use group_key_exprs for computed expressions
+    pub group_key_columns: Vec<String>,
+
+    /// Optional qualifier for the group key columns (for Hydra sketches)
+    pub group_key_qualifier: Option<String>,
+
+    /// Group key expressions with pre-resolved types (supports computed expressions)
+    /// When non-empty, takes precedence over group_key_columns in compute_schema
+    pub group_key_exprs: Vec<TypedExpr>,
+
+    /// Cached output schema
+    schema: DFSchemaRef,
+}
+
+impl SummaryInfer {
+    /// Create a new SummaryInfer with multiple operations
+    pub fn new(
+        input: Arc<LogicalPlan>,
+        operations: Vec<InferOperation>,
+        output_names: Vec<String>,
+    ) -> DFResult<Self> {
+        // Validate inputs
+        if operations.is_empty() {
+            return Err(DataFusionError::Plan(
+                "SummaryInfer requires at least one operation".to_string(),
+            ));
+        }
+        if operations.len() != output_names.len() {
+            return Err(DataFusionError::Plan(format!(
+                "SummaryInfer operations ({}) and output_names ({}) length mismatch",
+                operations.len(),
+                output_names.len()
+            )));
+        }
+
+        let schema = Self::compute_schema(&input, &operations, &output_names, &[], &None, &[])?;
+        Ok(Self {
+            input,
+            keys_input: None,
+            operations,
+            output_names,
+            group_key_columns: vec![],
+            group_key_qualifier: None,
+            group_key_exprs: vec![],
+            schema,
+        })
+    }
+
+    /// Helper constructor for single operation (backward compatibility)
+    pub fn single(
+        input: Arc<LogicalPlan>,
+        operation: InferOperation,
+        output_name: String,
+    ) -> DFResult<Self> {
+        Self::new(input, vec![operation], vec![output_name])
+    }
+
+    /// Add group key columns for Hydra sketches (supports multiple columns)
+    /// Legacy method - use with_group_key_exprs for computed expressions
+    pub fn with_group_key_columns(
+        mut self,
+        group_key_columns: Vec<String>,
+        qualifier: Option<String>,
+    ) -> DFResult<Self> {
+        self.schema = Self::compute_schema(
+            &self.input,
+            &self.operations,
+            &self.output_names,
+            &group_key_columns,
+            &qualifier,
+            &self.group_key_exprs,
+        )?;
+        self.group_key_columns = group_key_columns;
+        self.group_key_qualifier = qualifier;
+        Ok(self)
+    }
+
+    /// Set a second input for keys enumeration (multi-population accumulators).
+    pub fn with_keys_input(mut self, keys_input: Arc<LogicalPlan>) -> Self {
+        self.keys_input = Some(keys_input);
+        self
+    }
+
+    /// Add group key expressions with pre-resolved types (supports computed expressions)
+    pub fn with_group_key_exprs(
+        mut self,
+        group_key_exprs: Vec<TypedExpr>,
+        qualifier: Option<String>,
+    ) -> DFResult<Self> {
+        self.schema = Self::compute_schema(
+            &self.input,
+            &self.operations,
+            &self.output_names,
+            &self.group_key_columns,
+            &qualifier,
+            &group_key_exprs,
+        )?;
+        self.group_key_exprs = group_key_exprs;
+        self.group_key_qualifier = qualifier;
+        Ok(self)
+    }
+
+    /// Compute output schema based on operations and grouping
+    fn compute_schema(
+        input: &Arc<LogicalPlan>,
+        operations: &[InferOperation],
+        output_names: &[String],
+        group_key_columns: &[String],
+        group_key_qualifier: &Option<String>,
+        group_key_exprs: &[TypedExpr],
+    ) -> DFResult<DFSchemaRef> {
+        let input_schema = input.schema();
+        let mut qualified_fields = Vec::new();
+
+        // Add group columns to output with qualifications preserved
+        // Prefer group_key_exprs (TypedExpr) over group_key_columns (String) if available
+        if !group_key_exprs.is_empty() {
+            // Use TypedExpr - supports computed expressions like date_part(), CASE, etc.
+            // First: pass through input label columns not covered by group_key_exprs
+            let expr_names: Vec<String> = group_key_exprs
+                .iter()
+                .filter_map(|te| {
+                    if let Expr::Column(col) = &te.expr {
+                        Some(col.name.clone())
+                    } else {
+                        None
+                    }
+                })
+                .collect();
+            for (qualifier, field) in input_schema.iter() {
+                if field.name() != "sketch"
+                    && !field.name().ends_with("_sketch")
+                    && !expr_names.contains(&field.name().to_string())
+                {
+                    qualified_fields.push((qualifier.cloned(), field.clone()));
+                }
+            }
+            // Then: add group key expression columns
+            for typed_expr in group_key_exprs {
+                // For simple columns, use col.name as field name and col.relation as qualifier
+                // For computed expressions, use schema_name() with optional provided qualifier
+                let (qualifier, field_name) = if let Expr::Column(col) = &typed_expr.expr {
+                    (col.relation.clone(), col.name.clone())
+                } else if let Some(qual) = group_key_qualifier {
+                    (
+                        Some(datafusion::common::TableReference::bare(qual.clone())),
+                        typed_expr.expr.schema_name().to_string(),
+                    )
+                } else {
+                    (None, typed_expr.expr.schema_name().to_string())
+                };
+                qualified_fields.push((
+                    qualifier,
+                    Arc::new(Field::new(&field_name, typed_expr.data_type.clone(), true)),
+                ));
+            }
+        } else if !group_key_columns.is_empty() {
+            // Fallback to legacy string-based group_key_columns
+            // Hydra/self-keyed case: pass through input label columns first,
+            // then add materialized group key columns from the accumulator.
+            for (qualifier, field) in input_schema.iter() {
+                if field.name() != "sketch"
+                    && !field.name().ends_with("_sketch")
+                    && !group_key_columns.contains(&field.name().to_string())
+                {
+                    qualified_fields.push((qualifier.cloned(), field.clone()));
+                }
+            }
+            for key_col in group_key_columns {
+                // Try to find it in the input schema first
+                if let Ok((qualifier, field)) =
+                    input_schema.qualified_field_with_unqualified_name(key_col)
+                {
+                    qualified_fields.push((qualifier.cloned(), Arc::new(field.clone())));
+                } else if let Some(qual) = group_key_qualifier {
+                    // Use provided qualifier if input schema doesn't have it
+                    // This happens for Hydra where SummaryInsert doesn't output the group keys
+                    let qualifier = Some(datafusion::common::TableReference::bare(qual.clone()));
+                    qualified_fields.push((
+                        qualifier,
+                        Arc::new(Field::new(key_col, DataType::Utf8, false)),
+                    ));
+                } else {
+                    // No qualifier available - use unqualified
+                    qualified_fields
+                        .push((None, Arc::new(Field::new(key_col, DataType::Utf8, false))));
+                }
+            }
+        } else {
+            // Per-group case: preserve group columns from input (non-sketch columns) with qualifications
+            for (qualifier, field) in input_schema.iter() {
+                if field.name() != "sketch" && !field.name().ends_with("_sketch") {
+                    qualified_fields.push((qualifier.cloned(), field.clone()));
+                }
+            }
+        }
+
+        // Add result columns based on operation types (unqualified)
+        for (operation, output_name) in operations.iter().zip(output_names.iter()) {
+            let result_type = match operation {
+                // Exact aggregator extractions - all return Float64
+                InferOperation::ExtractSum => DataType::Float64,
+                InferOperation::ExtractCount => DataType::Float64,
+                InferOperation::ExtractMin => DataType::Float64,
+                InferOperation::ExtractMax => DataType::Float64,
+                InferOperation::ExtractIncrease => DataType::Float64,
+                InferOperation::ExtractRate => DataType::Float64,
+                // Sketch operations
+                InferOperation::CountDistinct => DataType::UInt64,
+                InferOperation::Quantile(_) | InferOperation::Median => DataType::Float64,
+                InferOperation::TopK(_) => {
+                    DataType::List(Arc::new(Field::new("item", DataType::Utf8, true)))
+                }
+                InferOperation::FrequencyCount => DataType::Float64, // COUNT returns numeric
+                InferOperation::FrequencySum => DataType::Float64,   // SUM returns numeric
+                InferOperation::FrequencyAvg => DataType::Float64,   // AVG returns numeric
+                InferOperation::FrequencyEstimate(_) => DataType::UInt64,
+                InferOperation::FrequentItems(_) => {
+                    DataType::List(Arc::new(Field::new("item", DataType::Utf8, true)))
+                }
+                InferOperation::EnumerateSet => {
+                    DataType::List(Arc::new(Field::new("item", DataType::Utf8, true)))
+                }
+            };
+
+            qualified_fields.push((None, Arc::new(Field::new(output_name, result_type, false))));
+        }
+
+        // Create DFSchema from qualified fields
+        let schema = DFSchema::new_with_metadata(qualified_fields, Default::default())
+            .map_err(|e| DataFusionError::Plan(format!("Failed to create schema: {}", e)))?;
+
+        Ok(Arc::new(schema))
+    }
+}
+
+impl PartialEq for SummaryInfer {
+    fn eq(&self, other: &Self) -> bool {
+        self.input == other.input
+            && self.keys_input == other.keys_input
+            && self.operations == other.operations
+            && self.output_names == other.output_names
+            && self.group_key_columns == other.group_key_columns
+            && self.group_key_qualifier == other.group_key_qualifier
+            && self.schema == other.schema
+    }
+}
+
+impl Eq for SummaryInfer {}
+
+impl std::hash::Hash for SummaryInfer {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.input.hash(state);
+        self.keys_input.hash(state);
+        self.operations.hash(state);
+        self.output_names.hash(state);
+        self.group_key_columns.hash(state);
+        self.group_key_qualifier.hash(state);
+        self.schema.hash(state);
+    }
+}
+
+impl PartialOrd for SummaryInfer {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        match self.operations.partial_cmp(&other.operations) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.output_names.partial_cmp(&other.output_names) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.group_key_columns.partial_cmp(&other.group_key_columns) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.input.partial_cmp(&other.input) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.keys_input.partial_cmp(&other.keys_input) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        // Compare schemas by pointer
+        Some(Arc::as_ptr(&self.schema).cmp(&Arc::as_ptr(&other.schema)))
+    }
+}
+
+impl UserDefinedLogicalNodeCore for SummaryInfer {
+    fn name(&self) -> &str {
+        "SummaryInfer"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        let mut inputs = vec![self.input.as_ref()];
+        if let Some(ref keys_input) = self.keys_input {
+            inputs.push(keys_input.as_ref());
+        }
+        inputs
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        // No Expr types stored anymore - group_key_column is just a string
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.operations.len() == 1 {
+            write!(
+                f,
+                "SummaryInfer: operation={}, output={}",
+                self.operations[0], self.output_names[0]
+            )?;
+        } else {
+            write!(f, "SummaryInfer: operations=[")?;
+            for (i, op) in self.operations.iter().enumerate() {
+                if i > 0 {
+                    write!(f, ", ")?;
+                }
+                write!(f, "{}", op)?;
+            }
+            write!(f, "], outputs=[")?;
+            for (i, name) in self.output_names.iter().enumerate() {
+                if i > 0 {
+                    write!(f, ", ")?;
+                }
+                write!(f, "{}", name)?;
+            }
+            write!(f, "]")?;
+        }
+        if !self.group_key_columns.is_empty() {
+            write!(
+                f,
+                ", group_key_columns=[{}]",
+                self.group_key_columns.join(", ")
+            )?;
+        }
+        if self.keys_input.is_some() {
+            write!(f, ", has_keys_input=true")?;
+        }
+        Ok(())
+    }
+
+    fn from_template(&self, _exprs: &[Expr], inputs: &[LogicalPlan]) -> Self {
+        let input = Arc::new(inputs[0].clone());
+        let keys_input = if inputs.len() > 1 {
+            Some(Arc::new(inputs[1].clone()))
+        } else {
+            self.keys_input.clone()
+        };
+        // Recompute schema with new input
+        let schema = Self::compute_schema(
+            &input,
+            &self.operations,
+            &self.output_names,
+            &self.group_key_columns,
+            &self.group_key_qualifier,
+            &self.group_key_exprs,
+        )
+        .unwrap_or_else(|_| self.schema.clone());
+
+        Self {
+            input,
+            keys_input,
+            operations: self.operations.clone(),
+            output_names: self.output_names.clone(),
+            group_key_columns: self.group_key_columns.clone(),
+            group_key_qualifier: self.group_key_qualifier.clone(),
+            group_key_exprs: self.group_key_exprs.clone(),
+            schema,
+        }
+    }
+
+    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> DFResult<Self> {
+        Ok(self.from_template(&exprs, &inputs))
+    }
+}
+
+// ============================================================================
+// PrecomputedSummaryRead - Read precomputed summaries from store
+// ============================================================================
+
+/// Logical plan node: Read precomputed summaries from a store
+///
+/// This is a leaf node that represents reading precomputed aggregates
+/// (summaries) from a PrecomputedOutputStore. Used for OnlySpatial queries
+/// where data has already been aggregated by a streaming engine.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct PrecomputedSummaryRead {
+    /// Metric name being queried
+    metric: String,
+
+    /// Aggregation ID to query
+    aggregation_id: u64,
+
+    /// Start timestamp of the query range
+    start_timestamp: u64,
+
+    /// End timestamp of the query range
+    end_timestamp: u64,
+
+    /// Whether this is an exact query (sliding window) vs approximate (tumbling)
+    is_exact_query: bool,
+
+    /// Output label names (group by columns)
+    output_labels: Vec<String>,
+
+    /// Type of summary being read
+    summary_type: SketchType,
+
+    /// Cached output schema
+    schema: DFSchemaRef,
+}
+
+impl PrecomputedSummaryRead {
+    #[allow(clippy::too_many_arguments)]
+    pub fn new(
+        metric: String,
+        aggregation_id: u64,
+        start_timestamp: u64,
+        end_timestamp: u64,
+        is_exact_query: bool,
+        output_labels: Vec<String>,
+        summary_type: SketchType,
+        schema: DFSchemaRef,
+    ) -> Self {
+        Self {
+            metric,
+            aggregation_id,
+            start_timestamp,
+            end_timestamp,
+            is_exact_query,
+            output_labels,
+            summary_type,
+            schema,
+        }
+    }
+
+    /// Create with auto-generated schema based on output_labels
+    pub fn with_auto_schema(
+        metric: String,
+        aggregation_id: u64,
+        start_timestamp: u64,
+        end_timestamp: u64,
+        is_exact_query: bool,
+        output_labels: Vec<String>,
+        summary_type: SketchType,
+    ) -> DFResult<Self> {
+        let schema = Self::compute_schema(&output_labels)?;
+        Ok(Self::new(
+            metric,
+            aggregation_id,
+            start_timestamp,
+            end_timestamp,
+            is_exact_query,
+            output_labels,
+            summary_type,
+            schema,
+        ))
+    }
+
+    /// Compute schema: [label columns (Utf8), sketch (Binary)]
+    fn compute_schema(output_labels: &[String]) -> DFResult<DFSchemaRef> {
+        let mut qualified_fields = Vec::new();
+
+        // Add label columns (all Utf8, nullable)
+        for label in output_labels {
+            qualified_fields.push((None, Arc::new(Field::new(label, DataType::Utf8, true))));
+        }
+
+        // Add sketch column (Binary, not nullable)
+        qualified_fields.push((
+            None,
+            Arc::new(Field::new("sketch", DataType::Binary, false)),
+        ));
+
+        let schema = DFSchema::new_with_metadata(qualified_fields, Default::default())
+            .map_err(|e| DataFusionError::Plan(format!("Failed to create schema: {}", e)))?;
+
+        Ok(Arc::new(schema))
+    }
+
+    // Getters
+    pub fn metric(&self) -> &str {
+        &self.metric
+    }
+
+    pub fn aggregation_id(&self) -> u64 {
+        self.aggregation_id
+    }
+
+    pub fn start_timestamp(&self) -> u64 {
+        self.start_timestamp
+    }
+
+    pub fn end_timestamp(&self) -> u64 {
+        self.end_timestamp
+    }
+
+    pub fn is_exact_query(&self) -> bool {
+        self.is_exact_query
+    }
+
+    pub fn output_labels(&self) -> &[String] {
+        &self.output_labels
+    }
+
+    pub fn summary_type(&self) -> &SketchType {
+        &self.summary_type
+    }
+}
+
+impl PartialOrd for PrecomputedSummaryRead {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        match self.metric.partial_cmp(&other.metric) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.aggregation_id.partial_cmp(&other.aggregation_id) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.start_timestamp.partial_cmp(&other.start_timestamp) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.end_timestamp.partial_cmp(&other.end_timestamp) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.is_exact_query.partial_cmp(&other.is_exact_query) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.output_labels.partial_cmp(&other.output_labels) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.summary_type.partial_cmp(&other.summary_type) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        Some(Arc::as_ptr(&self.schema).cmp(&Arc::as_ptr(&other.schema)))
+    }
+}
+
+impl UserDefinedLogicalNodeCore for PrecomputedSummaryRead {
+    fn name(&self) -> &str {
+        "PrecomputedSummaryRead"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![] // Leaf node - no inputs
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "PrecomputedSummaryRead: metric={}, agg_id={}, range=[{}, {}], exact={}, type={}, labels=[{}]",
+            self.metric,
+            self.aggregation_id,
+            self.start_timestamp,
+            self.end_timestamp,
+            self.is_exact_query,
+            self.summary_type,
+            self.output_labels.join(", ")
+        )
+    }
+
+    fn from_template(&self, _exprs: &[Expr], _inputs: &[LogicalPlan]) -> Self {
+        self.clone()
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        _exprs: Vec<Expr>,
+        _inputs: Vec<LogicalPlan>,
+    ) -> DFResult<Self> {
+        Ok(self.clone())
+    }
+}
+
+// ============================================================================
+// SummaryMergeMultiple - Merge multiple summaries by group key
+// ============================================================================
+
+/// Logical plan node: Merge multiple summaries with the same group key
+///
+/// Takes an input with multiple rows per group key (e.g., from multiple
+/// precomputed buckets) and merges them into one summary per group key.
+/// This is used when tumbling windows need to be merged for a query range.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct SummaryMergeMultiple {
+    /// Input plan (typically PrecomputedSummaryRead)
+    input: Arc<LogicalPlan>,
+
+    /// Columns to group by when merging
+    group_by: Vec<String>,
+
+    /// Column containing the sketch/summary data
+    sketch_column: String,
+
+    /// Type of summary being merged (for dispatch to correct merge logic)
+    summary_type: SketchType,
+
+    /// Cached output schema (same as input - merging reduces rows, not columns)
+    schema: DFSchemaRef,
+}
+
+impl SummaryMergeMultiple {
+    pub fn new(
+        input: Arc<LogicalPlan>,
+        group_by: Vec<String>,
+        sketch_column: String,
+        summary_type: SketchType,
+    ) -> Self {
+        // Schema is same as input (we reduce rows, not columns)
+        let schema = input.schema().clone();
+        Self {
+            input,
+            group_by,
+            sketch_column,
+            summary_type,
+            schema,
+        }
+    }
+
+    // Getters
+    pub fn input(&self) -> &LogicalPlan {
+        &self.input
+    }
+
+    pub fn group_by(&self) -> &[String] {
+        &self.group_by
+    }
+
+    pub fn sketch_column(&self) -> &str {
+        &self.sketch_column
+    }
+
+    pub fn summary_type(&self) -> &SketchType {
+        &self.summary_type
+    }
+}
+
+impl PartialOrd for SummaryMergeMultiple {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        match self.group_by.partial_cmp(&other.group_by) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.sketch_column.partial_cmp(&other.sketch_column) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.summary_type.partial_cmp(&other.summary_type) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        match self.input.partial_cmp(&other.input) {
+            Some(Ordering::Equal) => {}
+            other => return other,
+        }
+        Some(Arc::as_ptr(&self.schema).cmp(&Arc::as_ptr(&other.schema)))
+    }
+}
+
+impl UserDefinedLogicalNodeCore for SummaryMergeMultiple {
+    fn name(&self) -> &str {
+        "SummaryMergeMultiple"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![self.input.as_ref()]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "SummaryMergeMultiple: group_by=[{}], sketch_column={}, type={}",
+            self.group_by.join(", "),
+            self.sketch_column,
+            self.summary_type
+        )
+    }
+
+    fn from_template(&self, _exprs: &[Expr], inputs: &[LogicalPlan]) -> Self {
+        Self {
+            input: Arc::new(inputs[0].clone()),
+            group_by: self.group_by.clone(),
+            sketch_column: self.sketch_column.clone(),
+            summary_type: self.summary_type.clone(),
+            schema: inputs[0].schema().clone(),
+        }
+    }
+
+    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> DFResult<Self> {
+        Ok(self.from_template(&exprs, &inputs))
+    }
+}
diff --git a/CommonDependencies/dependencies/rs/promql_utilities/.gitignore b/CommonDependencies/dependencies/rs/promql_utilities/.gitignore
new file mode 100644
index 0000000..9f97022
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/promql_utilities/.gitignore
@@ -0,0 +1 @@
+target/
\ No newline at end of file
diff --git a/CommonDependencies/dependencies/rs/promql_utilities/Cargo.lock b/CommonDependencies/dependencies/rs/promql_utilities/Cargo.lock
new file mode 100644
index 0000000..f32cf1d
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/promql_utilities/Cargo.lock
@@ -0,0 +1,1064 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "addr2line"
+version = "0.24.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
+dependencies = [
+ "gimli",
+]
+
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "android-tzdata"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.99"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100"
+
+[[package]]
+name = "async-stream"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476"
+dependencies = [
+ "async-stream-impl",
+ "futures-core",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "async-stream-impl"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "backtrace"
+version = "0.3.75"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
+dependencies = [
+ "addr2line",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "bincode"
+version = "1.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "bitflags"
+version = "2.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a65b545ab31d687cff52899d4890855fec459eb6afe0da6417b8a18da87aa29"
+
+[[package]]
+name = "bumpalo"
+version = "3.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
+
+[[package]]
+name = "bytes"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
+
+[[package]]
+name = "cactus"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acbc26382d871df4b7442e3df10a9402bf3cf5e55cbd66f12be38861425f0564"
+
+[[package]]
+name = "cc"
+version = "1.2.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ee0f8803222ba5a7e2777dd72ca451868909b1ac410621b676adf07280e9b5f"
+dependencies = [
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
+
+[[package]]
+name = "cfgrammar"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf"
+dependencies = [
+ "indexmap",
+ "lazy_static",
+ "num-traits",
+ "regex",
+ "serde",
+ "vob",
+]
+
+[[package]]
+name = "chrono"
+version = "0.4.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
+dependencies = [
+ "android-tzdata",
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "serde",
+ "wasm-bindgen",
+ "windows-link",
+]
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "deranged"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e"
+dependencies = [
+ "powerfmt",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "filetime"
+version = "0.2.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "libredox",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "futures-core"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
+
+[[package]]
+name = "getopts"
+version = "0.2.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cba6ae63eb948698e300f645f87c70f76630d505f23b8907cf1e193ee85048c1"
+dependencies = [
+ "unicode-width",
+]
+
+[[package]]
+name = "gimli"
+version = "0.31.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.63"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "io-uring"
+version = "0.7.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
+dependencies = [
+ "bitflags",
+ "cfg-if",
+ "libc",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+
+[[package]]
+name = "js-sys"
+version = "0.3.77"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.175"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
+
+[[package]]
+name = "libredox"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3"
+dependencies = [
+ "bitflags",
+ "libc",
+ "redox_syscall",
+]
+
+[[package]]
+name = "log"
+version = "0.4.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
+
+[[package]]
+name = "lrlex"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c71364e868116ee891b0f93559eb9eca5675bec28b22d33c58481e66c3951d7e"
+dependencies = [
+ "cfgrammar",
+ "getopts",
+ "lazy_static",
+ "lrpar",
+ "num-traits",
+ "quote",
+ "regex",
+ "regex-syntax",
+ "serde",
+ "vergen",
+]
+
+[[package]]
+name = "lrpar"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51b265a81193d94c92d1c9c715498d6fa505bce3f789ceecb24ab5d6fa2dbc71"
+dependencies = [
+ "bincode",
+ "cactus",
+ "cfgrammar",
+ "filetime",
+ "indexmap",
+ "lazy_static",
+ "lrtable",
+ "num-traits",
+ "packedvec",
+ "regex",
+ "serde",
+ "static_assertions",
+ "vergen",
+ "vob",
+]
+
+[[package]]
+name = "lrtable"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc36d15214ca997a5097845be1f932b7ee6125c36f5c5e55f6c49e027ddeb6de"
+dependencies = [
+ "cfgrammar",
+ "fnv",
+ "num-traits",
+ "serde",
+ "sparsevec",
+ "vob",
+]
+
+[[package]]
+name = "memchr"
+version = "2.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+]
+
+[[package]]
+name = "mio"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "num-conv"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num_threads"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "object"
+version = "0.36.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "packedvec"
+version = "1.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a69e0a534dd2e6aefce319af62a0aa0066a76bdfcec0201dfe02df226bc9ec70"
+dependencies = [
+ "num-traits",
+ "serde",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "promql-parser"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60d851f6523a8215e2fbf86b6cef4548433f8b76092e9ffb607105de52ae63fd"
+dependencies = [
+ "cfgrammar",
+ "chrono",
+ "lazy_static",
+ "lrlex",
+ "lrpar",
+ "regex",
+]
+
+[[package]]
+name = "promql_utilities"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "promql-parser",
+ "serde",
+ "serde_json",
+ "thiserror",
+ "tokio-test",
+ "tracing",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "regex"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
+
+[[package]]
+name = "rustc-demangle"
+version = "0.1.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "ryu"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+
+[[package]]
+name = "serde"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.143"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a"
+dependencies = [
+ "itoa",
+ "memchr",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "slab"
+version = "0.4.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
+
+[[package]]
+name = "sparsevec"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68b4a8ce3045f0fe173fb5ae3c6b7dcfbec02bfa650bb8618b2301f52af0134d"
+dependencies = [
+ "num-traits",
+ "packedvec",
+ "serde",
+ "vob",
+]
+
+[[package]]
+name = "static_assertions"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
+[[package]]
+name = "syn"
+version = "2.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "time"
+version = "0.3.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40"
+dependencies = [
+ "deranged",
+ "itoa",
+ "libc",
+ "num-conv",
+ "num_threads",
+ "powerfmt",
+ "serde",
+ "time-core",
+ "time-macros",
+]
+
+[[package]]
+name = "time-core"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c"
+
+[[package]]
+name = "time-macros"
+version = "0.2.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49"
+dependencies = [
+ "num-conv",
+ "time-core",
+]
+
+[[package]]
+name = "tokio"
+version = "1.47.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038"
+dependencies = [
+ "backtrace",
+ "io-uring",
+ "libc",
+ "mio",
+ "pin-project-lite",
+ "slab",
+]
+
+[[package]]
+name = "tokio-stream"
+version = "0.1.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047"
+dependencies = [
+ "futures-core",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-test"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2468baabc3311435b55dd935f702f42cd1b8abb7e754fb7dfb16bd36aa88f9f7"
+dependencies = [
+ "async-stream",
+ "bytes",
+ "futures-core",
+ "tokio",
+ "tokio-stream",
+]
+
+[[package]]
+name = "tracing"
+version = "0.1.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
+dependencies = [
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
+dependencies = [
+ "once_cell",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
+
+[[package]]
+name = "unicode-width"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c"
+
+[[package]]
+name = "vergen"
+version = "8.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2990d9ea5967266ea0ccf413a4aa5c42a93dbcfda9cb49a97de6931726b12566"
+dependencies = [
+ "anyhow",
+ "rustversion",
+ "time",
+]
+
+[[package]]
+name = "vob"
+version = "3.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc936b5a7202a703aeaf7ce05e7931db2e0c8126813f97db3e9e06d867b0bb38"
+dependencies = [
+ "num-traits",
+ "serde",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
+dependencies = [
+ "bumpalo",
+ "log",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
+
+[[package]]
+name = "windows-result"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.3",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm 0.53.0",
+ "windows_aarch64_msvc 0.53.0",
+ "windows_i686_gnu 0.53.0",
+ "windows_i686_gnullvm 0.53.0",
+ "windows_i686_msvc 0.53.0",
+ "windows_x86_64_gnu 0.53.0",
+ "windows_x86_64_gnullvm 0.53.0",
+ "windows_x86_64_msvc 0.53.0",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
diff --git a/CommonDependencies/dependencies/rs/promql_utilities/Cargo.toml b/CommonDependencies/dependencies/rs/promql_utilities/Cargo.toml
new file mode 100644
index 0000000..adb9ec5
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/promql_utilities/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "promql_utilities"
+version = "0.1.0"
+edition = "2021"
+authors = ["SketchDB Team"]
+description = "A standalone PromQL pattern matching and query analysis library for Rust"
+license = "MIT"
+keywords = ["prometheus", "promql", "pattern-matching", "query-analysis"]
+categories = ["parsing", "database", "development-tools"]
+
+[dependencies]
+promql-parser = "0.5.0"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+thiserror = "1.0"
+chrono = { version = "0.4", features = ["serde"] }
+tracing = "0.1"
+
+[dev-dependencies]
+tokio-test = "0.4"
diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/mod.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/mod.rs
new file mode 100644
index 0000000..bcc58df
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/mod.rs
@@ -0,0 +1,7 @@
+pub mod promql_pattern;
+pub mod promql_pattern_builder;
+//pub mod promql_pattern_factory;
+
+pub use promql_pattern::*;
+pub use promql_pattern_builder::*;
+//pub use promql_pattern_factory::*;
diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern.rs
new file mode 100644
index 0000000..625fee5
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern.rs
@@ -0,0 +1,917 @@
+use chrono::Duration;
+use core::panic;
+use promql_parser::label::MatchOp;
+use promql_parser::parser::{AtModifier, Expr, LabelModifier, SubqueryExpr, VectorSelector};
+use serde::Serialize;
+use serde_json::Value;
+use std::collections::HashMap;
+use std::time::UNIX_EPOCH;
+use tracing::debug;
+
+/// PromQL pattern for AST-based matching
+#[derive(Debug, Clone)]
+pub struct PromQLPattern {
+    /// AST pattern definition (JSON-like structure). None indicates a wildcard (match any).
+    pub ast_pattern: Option<HashMap<String, Value>>,
+    ///// Tokens to collect during matching
+    //pub collect_tokens: Vec<String>,
+}
+
+impl PromQLPattern {
+    /// Create a new pattern with AST pattern definition
+    //pub fn new(ast_pattern: Option<HashMap<String, Value>>, collect_tokens: Vec<String>) -> Self {
+    pub fn new(ast_pattern: Option<HashMap<String, Value>>) -> Self {
+        debug!("Creating new PromQLPattern");
+        Self {
+            ast_pattern,
+            //collect_tokens,
+        }
+    }
+
+    /// Convert an Expr to a clean string representation
+    fn expr_to_string(expr: &Expr) -> String {
+        match expr {
+            Expr::NumberLiteral(num) => num.val.to_string(),
+            _ => format!("{:?}", expr),
+        }
+    }
+
+    /// Match this pattern against a parsed AST
+    pub fn matches(&self, ast: &Expr) -> PromQLMatchResult {
+        debug!("Starting pattern matching against AST");
+        debug!("Pattern: {:?}", self.ast_pattern);
+        debug!("AST: {:?}", ast);
+        let mut tokens = HashMap::new();
+        let matches = self.matches_recursive(ast, self.ast_pattern.as_ref(), &mut tokens);
+        debug!(
+            "Pattern matching completed: {}, collected {} tokens",
+            matches,
+            tokens.len()
+        );
+        if !matches {
+            debug!("MATCH FAILED - tokens collected: {:?}", tokens);
+        }
+        PromQLMatchResult { matches, tokens }
+    }
+
+    /// Recursive pattern matching implementation
+    fn matches_recursive(
+        &self,
+        node: &Expr,
+        pattern: Option<&HashMap<String, Value>>,
+        tokens: &mut HashMap<String, TokenData>,
+    ) -> bool {
+        // None pattern is treated as wildcard (matches anything) to mirror Python's None
+        if pattern.is_none() {
+            debug!("Wildcard pattern matched");
+            return true;
+        }
+        let pattern = pattern.unwrap();
+        if pattern.is_empty() {
+            panic!("Empty pattern is not allowed");
+        }
+
+        // Get the pattern type
+        let pattern_type = match pattern.get("type") {
+            Some(Value::String(t)) => t.as_str(),
+            _ => panic!("Pattern must have a 'type' field of string type"),
+        };
+
+        debug!("Matching pattern type: {} against node type", pattern_type);
+        debug!("Full pattern: {:?}", pattern);
+        debug!("Node: {:?}", node);
+        match (pattern_type, node) {
+            // Match metric selectors
+            ("VectorSelector", Expr::VectorSelector(vs)) => {
+                self.match_metric_selector(vs, pattern, tokens)
+            }
+
+            // Match function calls
+            ("Call", Expr::Call(call)) => self.match_function_call(call, pattern, tokens),
+
+            // Match aggregation operations
+            ("AggregateExpr", Expr::Aggregate(agg)) => self.match_aggregation(agg, pattern, tokens),
+
+            // Match matrix selectors (range vectors)
+            ("MatrixSelector", Expr::MatrixSelector(ms)) => {
+                self.match_matrix_selector(ms, pattern, tokens)
+            }
+
+            // Match binary operations
+            ("BinaryExpr", Expr::Binary(bin_op)) => {
+                self.match_binary_operation(bin_op, pattern, tokens)
+            }
+
+            // Match number literals
+            ("NumberLiteral", Expr::NumberLiteral(num)) => {
+                self.match_number_literal(num, pattern, tokens)
+            }
+
+            // Match subquery expressions
+            ("SubqueryExpr", Expr::Subquery(subquery)) => {
+                self.match_subquery(subquery, pattern, tokens)
+            }
+
+            _ => false, // Simply return false for non-matching types
+        }
+    }
+
+    /// Match a VectorSelector node against pattern
+    fn match_metric_selector(
+        &self,
+        vs: &VectorSelector,
+        pattern: &HashMap<String, Value>,
+        tokens: &mut HashMap<String, TokenData>,
+    ) -> bool {
+        // Check metric name if specified in pattern
+        if let Some(Value::String(expected_name)) = pattern.get("name") {
+            if let Some(metric_name) = &vs.name {
+                if *metric_name != *expected_name {
+                    return false;
+                }
+            } else {
+                return false; // Pattern expects name but node has none
+            }
+        }
+
+        // Extract and store token data if this node should be collected
+        if let Some(Value::String(collect_as)) = pattern.get("_collect_as") {
+            debug!("Collecting metric token as: {}", collect_as);
+            let mut labels = HashMap::new();
+
+            // Extract label matchers
+            for matcher in &vs.matchers.matchers {
+                if matcher.op == MatchOp::Equal {
+                    labels.insert(matcher.name.clone(), matcher.value.clone());
+                }
+            }
+
+            let at_modifier_opt = match &vs.at {
+                Some(AtModifier::At(t)) => {
+                    // Convert SystemTime to seconds since UNIX_EPOCH (u64).
+                    // Panic if time is earlier than UNIX_EPOCH (pre-epoch) as requested.
+                    let secs = match t.duration_since(UNIX_EPOCH) {
+                        Ok(dur) => dur.as_secs(),
+                        Err(_) => panic!("AtModifier::At contains a time before UNIX_EPOCH, which is not supported by the pattern matcher"),
+                    };
+
+                    Some(secs)
+                }
+                Some(AtModifier::Start) => {
+                    panic!("AtModifier::Start is not supported by pattern matcher")
+                }
+                Some(AtModifier::End) => {
+                    panic!("AtModifier::End is not supported by pattern matcher")
+                }
+                None => None,
+            };
+
+            let metric_token = MetricToken {
+                name: vs.name.clone().unwrap_or_default(),
+                labels,
+                at_modifier: at_modifier_opt,
+                ast: Some(vs.clone()),
+            };
+
+            let token_data = TokenData {
+                metric: Some(metric_token),
+                function: None,
+                aggregation: None,
+                range_vector: None,
+                subquery: None,
+                binary_op: None,
+                number: None,
+            };
+
+            tokens.insert(collect_as.clone(), token_data);
+        }
+
+        true
+    }
+
+    /// Match a Call node (function call) against pattern
+    fn match_function_call(
+        &self,
+        call: &promql_parser::parser::Call,
+        pattern: &HashMap<String, Value>,
+        tokens: &mut HashMap<String, TokenData>,
+    ) -> bool {
+        // Check function name
+        // BUGFIX: Pattern builder creates "func" as an Object, not Array of Objects
+        // Original code (incorrect - expected Array of Objects):
+        // if let Some(Value::Array(expected_names)) = pattern.get("func") {
+        //     if let Some(func_obj) = expected_names.first() {
+        //         if let Some(func_map) = func_obj.as_object() {
+        //             if let Some(Value::Array(names)) = func_map.get("name") {
+        //                 let function_name = call.func.name;
+        //                 let matches_name = names.iter().any(|name| {
+        //                     if let Some(name_str) = name.as_str() {
+        //                         name_str == function_name
+        //                     } else {
+        //                         false
+        //                     }
+        //                 });
+        //
+        //                 if !matches_name {
+        //                     return false;
+        //                 }
+        //             }
+        //         }
+        //     }
+        // }
+
+        // Fixed code (correct - expects Object with "name" field):
+        if let Some(func_pattern_value) = pattern.get("func") {
+            if let Some(func_pattern) = func_pattern_value.as_object() {
+                if let Some(Value::Array(names)) = func_pattern.get("name") {
+                    let function_name = call.func.name;
+                    let matches_name = names.iter().any(|name| {
+                        if let Some(name_str) = name.as_str() {
+                            name_str == function_name
+                        } else {
+                            false
+                        }
+                    });
+
+                    if !matches_name {
+                        return false;
+                    }
+                }
+            }
+        }
+
+        // Check arguments recursively
+        if let Some(Value::Array(expected_args)) = pattern.get("args") {
+            if call.args.args.len() != expected_args.len() {
+                return false;
+            }
+
+            for (i, arg) in call.args.args.iter().enumerate() {
+                if let Some(arg_pattern) = expected_args[i].as_object() {
+                    let arg_pattern_map: HashMap<String, Value> = arg_pattern
+                        .clone()
+                        .into_iter()
+                        .map(|(k, v)| (k.clone(), v.clone()))
+                        .collect();
+
+                    if !self.matches_recursive(arg.as_ref(), Some(&arg_pattern_map), tokens) {
+                        return false;
+                    }
+                }
+            }
+        }
+
+        // Extract and store token data if this node should be collected
+        if let Some(Value::String(collect_as)) = pattern.get("_collect_as") {
+            debug!("Collecting function token as: {}", collect_as);
+            let function_token = FunctionToken {
+                name: call.func.name.to_string(),
+                args: call
+                    .args
+                    .args
+                    .iter()
+                    .map(|arg| Self::expr_to_string(arg))
+                    .collect(), // Capture actual args
+            };
+
+            let token_data = TokenData {
+                metric: None,
+                function: Some(function_token),
+                aggregation: None,
+                range_vector: None,
+                subquery: None,
+                binary_op: None,
+                number: None,
+            };
+
+            tokens.insert(collect_as.clone(), token_data);
+        }
+
+        // If requested, collect the raw function arguments (as strings) under a separate token
+        if let Some(Value::String(collect_args_as)) = pattern.get("_collect_args_as") {
+            let arg_strs: Vec<String> = call
+                .args
+                .args
+                .iter()
+                .map(|arg| Self::expr_to_string(arg))
+                .collect();
+
+            let function_args_token = FunctionToken {
+                name: call.func.name.to_string(),
+                args: arg_strs,
+            };
+
+            let token_data = TokenData {
+                metric: None,
+                function: Some(function_args_token),
+                aggregation: None,
+                range_vector: None,
+                subquery: None,
+                binary_op: None,
+                number: None,
+            };
+
+            tokens.insert(collect_args_as.clone(), token_data);
+        }
+
+        true
+    }
+
+    /// Match an Aggregate node against pattern
+    fn match_aggregation(
+        &self,
+        agg: &promql_parser::parser::AggregateExpr,
+        pattern: &HashMap<String, Value>,
+        tokens: &mut HashMap<String, TokenData>,
+    ) -> bool {
+        debug!("=== AGGREGATION MATCHING START ===");
+        debug!("Aggregation pattern: {:?}", pattern);
+        debug!("Aggregation AST: {:?}", agg);
+        // Check aggregation operation
+        if let Some(Value::Array(expected_ops)) = pattern.get("op") {
+            let agg_op = agg.op.to_string();
+            debug!(
+                "Checking aggregation op '{}' against pattern ops: {:?}",
+                agg_op, expected_ops
+            );
+            let matches_op = expected_ops.iter().any(|op| {
+                if let Some(op_str) = op.as_str() {
+                    op_str == agg_op
+                } else {
+                    false
+                }
+            });
+
+            if !matches_op {
+                debug!("Aggregation op '{}' does not match pattern ops", agg_op);
+                return false;
+            }
+            debug!("Aggregation op '{}' matched!", agg_op);
+        }
+
+        // Check inner expression recursively
+        if let Some(expr_pattern_value) = pattern.get("expr") {
+            debug!("Found expr pattern value: {:?}", expr_pattern_value);
+            if let Some(expr_pattern) = expr_pattern_value.as_object() {
+                debug!("Expr pattern is an object, recursing...");
+                let expr_pattern_map: HashMap<String, Value> = expr_pattern
+                    .clone()
+                    .into_iter()
+                    .map(|(k, v)| (k.clone(), v.clone()))
+                    .collect();
+
+                if !self.matches_recursive(&agg.expr, Some(&expr_pattern_map), tokens) {
+                    debug!("Inner expression recursive match FAILED");
+                    return false;
+                }
+                debug!("Inner expression recursive match SUCCESS");
+            } else if expr_pattern_value.is_null() {
+                debug!("Expr pattern is null, skipping validation");
+            } else {
+                debug!(
+                    "Expr pattern is neither object nor null: {:?}",
+                    expr_pattern_value
+                );
+            }
+        } else {
+            debug!("No expr pattern found, skipping inner expression check");
+        }
+
+        // Check modifier if specified in pattern
+        // Original code (too strict - fails when query has modifier but pattern is null):
+        // if let Some(pattern_modifier_value) = pattern.get("modifier") {
+        //     let actual_modifier = match &agg.modifier {
+        //         Some(LabelModifier::Include(_)) => "by",
+        //         Some(LabelModifier::Exclude(_)) => "without",
+        //         None => "null",
+        //     };
+        //
+        //     match pattern_modifier_value {
+        //         Value::String(expected_modifier) => {
+        //             if actual_modifier != expected_modifier {
+        //                 return false;
+        //             }
+        //         }
+        //         Value::Null => {
+        //             if actual_modifier != "null" {
+        //                 return false;
+        //             }
+        //         }
+        //         _ => {
+        //             // Invalid pattern modifier format
+        //             return false;
+        //         }
+        //     }
+        // }
+
+        // Fixed code - only validate modifiers if pattern explicitly specifies a non-null modifier
+        if let Some(pattern_modifier_value) = pattern.get("modifier") {
+            debug!("Found modifier pattern: {:?}", pattern_modifier_value);
+            let actual_modifier = match &agg.modifier {
+                Some(LabelModifier::Include(_)) => "by",
+                Some(LabelModifier::Exclude(_)) => "without",
+                None => "null",
+            };
+            debug!("Actual aggregation modifier: '{}'", actual_modifier);
+
+            // Only validate if pattern explicitly requires a specific modifier (not null)
+            if !pattern_modifier_value.is_null() {
+                debug!("Pattern requires specific modifier, validating...");
+                match pattern_modifier_value {
+                    Value::String(expected_modifier) => {
+                        debug!(
+                            "Expected modifier: '{}', actual: '{}'",
+                            expected_modifier, actual_modifier
+                        );
+                        if actual_modifier != expected_modifier {
+                            debug!("Modifier mismatch - FAILED");
+                            return false;
+                        }
+                        debug!("Modifier match - SUCCESS");
+                    }
+                    _ => {
+                        debug!("Invalid pattern modifier format - FAILED");
+                        return false;
+                    }
+                }
+            } else {
+                debug!("Pattern modifier is null, allowing any query modifier (wildcard)");
+            }
+        } else {
+            debug!("No modifier pattern found, allowing any query modifier");
+        }
+
+        debug!("=== AGGREGATION MATCHING SUCCESS ===");
+
+        // Extract and store token data if this node should be collected
+        if let Some(Value::String(collect_as)) = pattern.get("_collect_as") {
+            debug!("Collecting aggregation token as: {}", collect_as);
+            let modifier = match &agg.modifier {
+                Some(LabelModifier::Include(labels)) => Some(AggregationModifier {
+                    modifier_type: "by".to_string(),
+                    labels: labels.labels.clone(),
+                }),
+                Some(LabelModifier::Exclude(labels)) => Some(AggregationModifier {
+                    modifier_type: "without".to_string(),
+                    labels: labels.labels.clone(),
+                }),
+                None => None,
+            };
+
+            let aggregation_token = AggregationToken {
+                op: agg.op.to_string(),
+                modifier,
+                param: agg.param.as_ref().map(|p| Self::expr_to_string(p)),
+            };
+
+            let token_data = TokenData {
+                metric: None,
+                function: None,
+                aggregation: Some(aggregation_token),
+                range_vector: None,
+                subquery: None,
+                binary_op: None,
+                number: None,
+            };
+
+            tokens.insert(collect_as.clone(), token_data);
+        }
+
+        true
+    }
+
+    /// Match a MatrixSelector node against pattern
+    fn match_matrix_selector(
+        &self,
+        ms: &promql_parser::parser::MatrixSelector,
+        pattern: &HashMap<String, Value>,
+        tokens: &mut HashMap<String, TokenData>,
+    ) -> bool {
+        // Check the inner vector selector
+        if let Some(vs_pattern_value) = pattern.get("vector_selector") {
+            if let Some(vs_pattern) = vs_pattern_value.as_object() {
+                let vs_pattern_map: HashMap<String, Value> = vs_pattern
+                    .clone()
+                    .into_iter()
+                    .map(|(k, v)| (k.clone(), v.clone()))
+                    .collect();
+
+                if !self.match_metric_selector(&ms.vs, &vs_pattern_map, tokens) {
+                    return false;
+                }
+            }
+        }
+
+        // Extract and store token data if this node should be collected
+        if let Some(Value::String(collect_as)) = pattern.get("_collect_as") {
+            // Convert std::time::Duration to chrono::Duration and store directly
+            let chrono_dur = Duration::from_std(ms.range)
+                .map_err(|_| Duration::zero())
+                .unwrap();
+
+            let range_token = RangeToken {
+                range: chrono_dur,
+                offset: ms.vs.offset.as_ref().map(|offset| format!("{:?}", offset)),
+            };
+
+            let token_data = TokenData {
+                metric: None,
+                function: None,
+                aggregation: None,
+                range_vector: Some(range_token),
+                subquery: None,
+                binary_op: None,
+                number: None,
+            };
+
+            tokens.insert(collect_as.clone(), token_data);
+        }
+
+        true
+    }
+
+    ///// Normalize duration to standard PromQL format (prefer larger units when possible)
+    // fn normalize_duration_string(duration: &std::time::Duration) -> String {
+    //     let secs = duration.as_secs();
+
+    //     // Convert to the most appropriate unit, preferring larger units when possible
+    //     if secs >= 3600 && secs % 3600 == 0 {
+    //         format!("{}h", secs / 3600)
+    //     } else if secs >= 60 && secs % 60 == 0 {
+    //         format!("{}m", secs / 60)
+    //     } else if secs > 0 {
+    //         format!("{secs}s")
+    //     } else {
+    //         // Handle sub-second durations
+    //         let millis = duration.as_millis();
+    //         if millis > 0 {
+    //             format!("{millis}ms")
+    //         } else {
+    //             "0s".to_string()
+    //         }
+    //     }
+    // }
+
+    /// Match a Binary expression node against pattern
+    fn match_binary_operation(
+        &self,
+        bin_op: &promql_parser::parser::BinaryExpr,
+        pattern: &HashMap<String, Value>,
+        tokens: &mut HashMap<String, TokenData>,
+    ) -> bool {
+        // Check operation type
+        if let Some(Value::String(expected_op)) = pattern.get("op") {
+            if bin_op.op.to_string() != *expected_op {
+                return false;
+            }
+        }
+
+        // Check left and right expressions recursively
+        if let Some(left_pattern_value) = pattern.get("left") {
+            if let Some(left_pattern) = left_pattern_value.as_object() {
+                let left_pattern_map: HashMap<String, Value> = left_pattern
+                    .clone()
+                    .into_iter()
+                    .map(|(k, v)| (k.clone(), v.clone()))
+                    .collect();
+
+                if !self.matches_recursive(&bin_op.lhs, Some(&left_pattern_map), tokens) {
+                    return false;
+                }
+            }
+        }
+
+        if let Some(right_pattern_value) = pattern.get("right") {
+            if let Some(right_pattern) = right_pattern_value.as_object() {
+                let right_pattern_map: HashMap<String, Value> = right_pattern
+                    .clone()
+                    .into_iter()
+                    .map(|(k, v)| (k.clone(), v.clone()))
+                    .collect();
+
+                if !self.matches_recursive(&bin_op.rhs, Some(&right_pattern_map), tokens) {
+                    return false;
+                }
+            }
+        }
+
+        // Extract and store token data if this node should be collected
+        if let Some(Value::String(collect_as)) = pattern.get("_collect_as") {
+            let binary_token = BinaryOpToken {
+                op: bin_op.op.to_string(),
+                matching: None, // TODO: Add vector matching support
+            };
+
+            let token_data = TokenData {
+                metric: None,
+                function: None,
+                aggregation: None,
+                range_vector: None,
+                subquery: None,
+                binary_op: Some(binary_token),
+                number: None,
+            };
+
+            tokens.insert(collect_as.clone(), token_data);
+        }
+
+        true
+    }
+
+    /// Match a NumberLiteral node against pattern
+    fn match_number_literal(
+        &self,
+        num: &promql_parser::parser::NumberLiteral,
+        pattern: &HashMap<String, Value>,
+        tokens: &mut HashMap<String, TokenData>,
+    ) -> bool {
+        // Check value if specified in pattern
+        if let Some(Value::Number(expected_value)) = pattern.get("value") {
+            if let Some(expected_f64) = expected_value.as_f64() {
+                if (num.val - expected_f64).abs() > f64::EPSILON {
+                    return false;
+                }
+            }
+        }
+
+        // Extract and store token data if this node should be collected
+        if let Some(Value::String(collect_as)) = pattern.get("_collect_as") {
+            let number_token = NumberToken { value: num.val };
+
+            let token_data = TokenData {
+                metric: None,
+                function: None,
+                aggregation: None,
+                range_vector: None,
+                subquery: None,
+                binary_op: None,
+                number: Some(number_token),
+            };
+
+            tokens.insert(collect_as.clone(), token_data);
+        }
+
+        true
+    }
+
+    /// Match a SubqueryExpr node against pattern
+    fn match_subquery(
+        &self,
+        subquery: &SubqueryExpr,
+        pattern: &HashMap<String, Value>,
+        tokens: &mut HashMap<String, TokenData>,
+    ) -> bool {
+        // Check inner expression recursively
+        if let Some(expr_pattern_value) = pattern.get("expr") {
+            if let Some(expr_pattern) = expr_pattern_value.as_object() {
+                let expr_pattern_map: HashMap<String, Value> = expr_pattern
+                    .clone()
+                    .into_iter()
+                    .map(|(k, v)| (k.clone(), v.clone()))
+                    .collect();
+
+                if !self.matches_recursive(&subquery.expr, Some(&expr_pattern_map), tokens) {
+                    return false;
+                }
+            }
+        }
+
+        // Extract and store token data if this node should be collected
+        if let Some(Value::String(collect_as)) = pattern.get("_collect_as") {
+            // Convert std::time::Duration to chrono::Duration and store
+            let chrono_dur = Duration::from_std(subquery.range)
+                .map_err(|_| Duration::zero())
+                .unwrap();
+
+            let subquery_token = SubqueryToken {
+                range: chrono_dur,
+                offset: subquery
+                    .offset
+                    .as_ref()
+                    .map(|offset| format!("{:?}", offset)),
+                step: subquery.step.as_ref().map(|step| format!("{:?}", step)),
+            };
+
+            let token_data = TokenData {
+                metric: None,
+                function: None,
+                aggregation: None,
+                range_vector: None,
+                subquery: Some(subquery_token),
+                binary_op: None,
+                number: None,
+            };
+
+            tokens.insert(collect_as.clone(), token_data);
+        }
+
+        true
+    }
+}
+
+/// Token data extracted from AST nodes - pattern matching system
+#[derive(Debug, Clone, Serialize)]
+pub struct TokenData {
+    pub metric: Option<MetricToken>,
+    pub function: Option<FunctionToken>,
+    pub aggregation: Option<AggregationToken>,
+    pub range_vector: Option<RangeToken>,
+    pub subquery: Option<SubqueryToken>,
+    pub binary_op: Option<BinaryOpToken>,
+    pub number: Option<NumberToken>,
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct MetricToken {
+    pub name: String,
+    pub labels: HashMap<String, String>,
+    // seconds since UNIX_EPOCH
+    pub at_modifier: Option<u64>,
+    #[serde(skip_serializing, skip_deserializing)]
+    pub ast: Option<VectorSelector>,
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct FunctionToken {
+    pub name: String,
+    pub args: Vec<String>,
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct AggregationToken {
+    pub op: String,
+    pub modifier: Option<AggregationModifier>,
+    pub param: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct RangeToken {
+    pub range: Duration,
+    pub offset: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct SubqueryToken {
+    pub range: Duration,
+    pub offset: Option<String>,
+    pub step: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct BinaryOpToken {
+    pub op: String,
+    pub matching: Option<VectorMatching>,
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct NumberToken {
+    pub value: f64,
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct VectorMatching {
+    pub card: String, // "one-to-one", "one-to-many", "many-to-one"
+    pub on: Vec<String>,
+    pub ignoring: Vec<String>,
+    pub group_left: Vec<String>,
+    pub group_right: Vec<String>,
+}
+
+/// Match result with token-based extraction
+#[derive(Debug, Clone)]
+pub struct PromQLMatchResult {
+    pub matches: bool,
+    pub tokens: HashMap<String, TokenData>,
+}
+
+impl PromQLMatchResult {
+    /// Create a new empty result
+    pub fn new() -> Self {
+        Self {
+            matches: false,
+            tokens: HashMap::new(),
+        }
+    }
+
+    /// Create a successful match result with tokens
+    pub fn with_tokens(tokens: HashMap<String, TokenData>) -> Self {
+        Self {
+            matches: true,
+            tokens,
+        }
+    }
+
+    /// Get metric name from tokens
+    pub fn get_metric_name(&self) -> Option<String> {
+        self.tokens
+            .get("metric")?
+            .metric
+            .as_ref()
+            .map(|m| m.name.clone())
+    }
+
+    /// Get function name from tokens
+    pub fn get_function_name(&self) -> Option<String> {
+        self.tokens
+            .get("function")?
+            .function
+            .as_ref()
+            .map(|f| f.name.clone())
+    }
+
+    /// Get aggregation operation from tokens
+    pub fn get_aggregation_op(&self) -> Option<String> {
+        self.tokens
+            .get("aggregation")?
+            .aggregation
+            .as_ref()
+            .map(|a| a.op.clone())
+    }
+
+    /// Get range duration from tokens as chrono::Duration
+    pub fn get_range_duration(&self) -> Option<Duration> {
+        self.tokens
+            .get("range_vector")?
+            .range_vector
+            .as_ref()
+            .map(|r| r.range)
+    }
+}
+
+impl Default for PromQLMatchResult {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Represents aggregation modifiers like "by" or "without"
+#[derive(Debug, Clone, Serialize)]
+pub struct AggregationModifier {
+    pub modifier_type: String, // "by" or "without"
+    pub labels: Vec<String>,
+}
+
+impl AggregationModifier {
+    /// Create a new AggregationModifier
+    pub fn new(modifier_type: String, labels: Vec<String>) -> Self {
+        Self {
+            modifier_type,
+            labels,
+        }
+    }
+
+    // /// Check if a function name represents a temporal function
+    // fn is_temporal_function(&self, function_name: &str) -> bool {
+    //     matches!(
+    //         function_name,
+    //         "rate"
+    //             | "increase"
+    //             | "sum_over_time"
+    //             | "min_over_time"
+    //             | "max_over_time"
+    //             | "avg_over_time"
+    //             | "count_over_time"
+    //             | "quantile_over_time"
+    //             | "stddev_over_time"
+    //             | "stdvar_over_time"
+    //             | "last_over_time"
+    //             | "present_over_time"
+    //     )
+    // }
+
+    // /// Extract label filters from matchers
+    // fn extract_label_filters(&self, matchers: &Matchers) -> HashMap<String, String> {
+    //     let mut filters = HashMap::new();
+
+    //     for matcher in &matchers.matchers {
+    //         // For now, only handle exact equality matches
+    //         if matcher.op == MatchOp::Equal {
+    //             filters.insert(matcher.name.clone(), matcher.value.clone());
+    //         }
+    //     }
+
+    //     filters
+    // }
+
+    // /// Convert Duration to string representation in PromQL format
+    // fn duration_to_string(&self, duration: &std::time::Duration) -> String {
+    //     let secs = duration.as_secs();
+
+    //     // Convert to the most appropriate unit, preferring larger units when possible
+    //     if secs >= 3600 && secs % 3600 == 0 {
+    //         format!("{}h", secs / 3600)
+    //     } else if secs >= 60 && secs % 60 == 0 {
+    //         format!("{}m", secs / 60)
+    //     } else if secs > 0 {
+    //         format!("{secs}s")
+    //     } else {
+    //         // Handle sub-second durations
+    //         let millis = duration.as_millis();
+    //         if millis > 0 {
+    //             format!("{millis}ms")
+    //         } else {
+    //             "0s".to_string()
+    //         }
+    //     }
+    // }
+}
diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern_builder.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern_builder.rs
new file mode 100644
index 0000000..5a45a92
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern_builder.rs
@@ -0,0 +1,238 @@
+use serde_json::Value;
+use std::collections::HashMap;
+use tracing::debug;
+
+/// PromQL Pattern Builder for creating PromQL-based patterns
+/// This mirrors the Python PromQLPatternBuilder class
+pub struct PromQLPatternBuilder;
+
+impl PromQLPatternBuilder {
+    /// Create a pattern for any node type
+    pub fn any() -> Option<HashMap<String, Value>> {
+        debug!("Creating wildcard pattern (any)");
+        None
+    }
+
+    /// Create a binary operation pattern (BinaryExpr)
+    pub fn binary_op(
+        op: &str,
+        left: Option<HashMap<String, Value>>,
+        right: Option<HashMap<String, Value>>,
+        collect_as: Option<&str>,
+    ) -> Option<HashMap<String, Value>> {
+        debug!("Creating binary operation pattern for op: {}", op);
+        let mut pattern = HashMap::new();
+        pattern.insert("type".to_string(), Value::String("BinaryExpr".to_string()));
+        pattern.insert("op".to_string(), Value::String(op.to_string()));
+        pattern.insert("left".to_string(), serde_json::to_value(left).unwrap());
+        pattern.insert("right".to_string(), serde_json::to_value(right).unwrap());
+
+        match collect_as {
+            Some(collect) => pattern.insert(
+                "_collect_as".to_string(),
+                Value::String(collect.to_string()),
+            ),
+            None => pattern.insert("_collect_as".to_string(), Value::Null),
+        };
+
+        Some(pattern)
+    }
+
+    /// Create a metric pattern (VectorSelector)
+    pub fn metric(
+        name: Option<&str>,
+        labels: Option<HashMap<String, String>>,
+        at_modifier: Option<&str>,
+        collect_as: Option<&str>,
+    ) -> Option<HashMap<String, Value>> {
+        debug!("Creating metric pattern for name: {:?}", name);
+        let mut pattern = HashMap::new();
+        pattern.insert(
+            "type".to_string(),
+            Value::String("VectorSelector".to_string()),
+        );
+
+        match name {
+            Some(n) => pattern.insert("name".to_string(), Value::String(n.to_string())),
+            None => pattern.insert("name".to_string(), Value::Null),
+        };
+
+        match labels {
+            Some(l) => {
+                let labels_value = serde_json::to_value(l).unwrap();
+                pattern.insert("matchers".to_string(), labels_value)
+            }
+            None => pattern.insert("matchers".to_string(), Value::Null),
+        };
+
+        match at_modifier {
+            Some(a) => pattern.insert("at".to_string(), Value::String(a.to_string())),
+            None => pattern.insert("at".to_string(), Value::Null),
+        };
+
+        match collect_as {
+            Some(c) => pattern.insert("_collect_as".to_string(), Value::String(c.to_string())),
+            None => pattern.insert("_collect_as".to_string(), Value::Null),
+        };
+
+        Some(pattern)
+    }
+
+    /// Create a function pattern (Call)
+    pub fn function(
+        names: Vec<&str>,
+        args: Vec<Option<HashMap<String, Value>>>,
+        collect_as: Option<&str>,
+        collect_args_as: Option<&str>,
+    ) -> Option<HashMap<String, Value>> {
+        debug!("Creating function pattern for names: {:?}", names);
+        let mut pattern = HashMap::new();
+        pattern.insert("type".to_string(), Value::String("Call".to_string()));
+
+        let mut func = HashMap::new();
+        func.insert("type".to_string(), Value::String("Function".to_string()));
+        func.insert(
+            "name".to_string(),
+            Value::Array(names.iter().map(|n| Value::String(n.to_string())).collect()),
+        );
+
+        pattern.insert("func".to_string(), serde_json::to_value(func).unwrap());
+        pattern.insert("args".to_string(), serde_json::to_value(args).unwrap());
+
+        match collect_args_as {
+            Some(c) => pattern.insert("_collect_args_as".to_string(), Value::String(c.to_string())),
+            None => pattern.insert("_collect_args_as".to_string(), Value::Null),
+        };
+
+        match collect_as {
+            Some(c) => pattern.insert("_collect_as".to_string(), Value::String(c.to_string())),
+            None => pattern.insert("_collect_as".to_string(), Value::Null),
+        };
+
+        Some(pattern)
+    }
+
+    /// Create a subquery pattern (SubqueryExpr)
+    pub fn subquery(
+        expr: Option<HashMap<String, Value>>,
+        duration: Option<&str>,
+        collect_as: Option<&str>,
+    ) -> Option<HashMap<String, Value>> {
+        let mut pattern = HashMap::new();
+        pattern.insert(
+            "type".to_string(),
+            Value::String("SubqueryExpr".to_string()),
+        );
+        pattern.insert("expr".to_string(), serde_json::to_value(expr).unwrap());
+
+        match duration {
+            Some(d) => pattern.insert("range".to_string(), Value::String(d.to_string())),
+            None => pattern.insert("range".to_string(), Value::Null),
+        };
+
+        // Initialize step and offset as null, matching Python implementation
+        pattern.insert("step".to_string(), Value::Null);
+        pattern.insert("offset".to_string(), Value::Null);
+
+        match collect_as {
+            Some(c) => pattern.insert("_collect_as".to_string(), Value::String(c.to_string())),
+            None => pattern.insert("_collect_as".to_string(), Value::Null),
+        };
+
+        Some(pattern)
+    }
+
+    /// Create a matrix selector pattern (MatrixSelector)
+    pub fn matrix_selector(
+        vector_selector: Option<HashMap<String, Value>>,
+        range: Option<&str>,
+        collect_as: Option<&str>,
+    ) -> Option<HashMap<String, Value>> {
+        let mut pattern = HashMap::new();
+        pattern.insert(
+            "type".to_string(),
+            Value::String("MatrixSelector".to_string()),
+        );
+        pattern.insert(
+            "vector_selector".to_string(),
+            serde_json::to_value(vector_selector).unwrap(),
+        );
+
+        match range {
+            Some(r) => pattern.insert("range".to_string(), Value::String(r.to_string())),
+            None => pattern.insert("range".to_string(), Value::Null),
+        };
+
+        match collect_as {
+            Some(c) => pattern.insert("_collect_as".to_string(), Value::String(c.to_string())),
+            None => pattern.insert("_collect_as".to_string(), Value::Null),
+        };
+
+        Some(pattern)
+    }
+
+    /// Create an aggregation pattern (AggregateExpr)
+    pub fn aggregation(
+        ops: Vec<&str>,
+        expr: Option<HashMap<String, Value>>,
+        param: Option<HashMap<String, Value>>,
+        by_labels: Option<Vec<&str>>,
+        without_labels: Option<Vec<&str>>,
+        collect_as: Option<&str>,
+    ) -> Option<HashMap<String, Value>> {
+        let mut pattern = HashMap::new();
+        pattern.insert(
+            "type".to_string(),
+            Value::String("AggregateExpr".to_string()),
+        );
+        pattern.insert(
+            "op".to_string(),
+            Value::Array(ops.iter().map(|op| Value::String(op.to_string())).collect()),
+        );
+        pattern.insert("expr".to_string(), serde_json::to_value(expr).unwrap());
+
+        match param {
+            Some(p) => pattern.insert("param".to_string(), serde_json::to_value(p).unwrap()),
+            None => pattern.insert("param".to_string(), Value::Null),
+        };
+
+        // Use single "modifier" field to match Python format
+        let modifier_value = match (by_labels, without_labels) {
+            (Some(_), None) => Value::String("by".to_string()),
+            (None, Some(_)) => Value::String("without".to_string()),
+            _ => Value::Null,
+        };
+        pattern.insert("modifier".to_string(), modifier_value);
+
+        match collect_as {
+            Some(c) => pattern.insert("_collect_as".to_string(), Value::String(c.to_string())),
+            None => pattern.insert("_collect_as".to_string(), Value::Null),
+        };
+
+        Some(pattern)
+    }
+
+    /// Create a number literal pattern
+    pub fn number(value: Option<f64>, collect_as: Option<&str>) -> Option<HashMap<String, Value>> {
+        let mut pattern = HashMap::new();
+        pattern.insert(
+            "type".to_string(),
+            Value::String("NumberLiteral".to_string()),
+        );
+
+        match value {
+            Some(v) => pattern.insert(
+                "value".to_string(),
+                Value::Number(serde_json::Number::from_f64(v).unwrap()),
+            ),
+            None => pattern.insert("value".to_string(), Value::Null),
+        };
+
+        match collect_as {
+            Some(c) => pattern.insert("_collect_as".to_string(), Value::String(c.to_string())),
+            None => pattern.insert("_collect_as".to_string(), Value::Null),
+        };
+
+        Some(pattern)
+    }
+}
diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern_factory.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern_factory.rs
new file mode 100644
index 0000000..d8181ec
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern_factory.rs
@@ -0,0 +1,123 @@
+//use crate::ast_matching::{PromQLPattern, PromQLPatternBuilder};
+//use tracing::debug;
+//
+///// Pattern factory for creating common PromQL patterns
+//pub struct PromQLPatternFactory;
+//
+//impl PromQLPatternFactory {
+//    /// Create pattern for OnlyTemporal queries (e.g., rate(metric[5m]))
+//    pub fn only_temporal_pattern() -> PromQLPattern {
+//        debug!("Creating only temporal pattern");
+//        let ms = PromQLPatternBuilder::matrix_selector(
+//            PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+//            None,
+//            Some("range_vector"),
+//        );
+//
+//        let func_args: Vec<Option<std::collections::HashMap<String, serde_json::Value>>> = vec![ms];
+//
+//        let pattern = PromQLPatternBuilder::function(
+//            vec![
+//                "rate",
+//                "increase",
+//                "sum_over_time",
+//                "avg_over_time",
+//                "min_over_time",
+//                "max_over_time",
+//                "count_over_time",
+//            ],
+//            func_args,
+//            Some("function"),
+//            None,
+//        );
+//
+//        PromQLPattern::new(
+//            pattern,
+//            //vec![
+//            //    "metric".to_string(),
+//            //    "function".to_string(),
+//            //    "range_vector".to_string(),
+//            //],
+//            // QueryPatternType::OnlyTemporal,
+//        )
+//    }
+//
+//    /// Create pattern for OnlySpatial queries (e.g., sum(metric) by (label))
+//    pub fn only_spatial_pattern() -> PromQLPattern {
+//        debug!("Creating only spatial pattern");
+//        let metric = PromQLPatternBuilder::metric(None, None, None, Some("metric"));
+//
+//        let pattern = PromQLPatternBuilder::aggregation(
+//            vec!["sum", "count", "avg", "min", "max", "quantile"],
+//            metric,
+//            None,
+//            None,
+//            None,
+//            Some("aggregation"),
+//        );
+//
+//        PromQLPattern::new(
+//            pattern,
+//            //vec!["metric".to_string(), "aggregation".to_string()],
+//            // QueryPatternType::OnlySpatial,
+//        )
+//    }
+//
+//    /// Create pattern for OneTemporalOneSpatial queries (e.g., sum(rate(metric[5m])) by (label))
+//    pub fn one_temporal_one_spatial_pattern() -> PromQLPattern {
+//        debug!("Creating one temporal one spatial pattern");
+//        let ms2 = PromQLPatternBuilder::matrix_selector(
+//            PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+//            None,
+//            Some("range_vector"),
+//        );
+//
+//        let func_args2: Vec<Option<std::collections::HashMap<String, serde_json::Value>>> =
+//            vec![ms2];
+//
+//        let temporal_part = PromQLPatternBuilder::function(
+//            vec![
+//                "rate",
+//                "increase",
+//                "sum_over_time",
+//                "avg_over_time",
+//                "min_over_time",
+//                "max_over_time",
+//                "count_over_time",
+//            ],
+//            func_args2,
+//            Some("function"),
+//            None,
+//        );
+//
+//        let pattern = PromQLPatternBuilder::aggregation(
+//            vec!["sum", "count", "avg", "min", "max", "quantile"],
+//            temporal_part,
+//            None,
+//            None,
+//            None,
+//            Some("aggregation"),
+//        );
+//
+//        PromQLPattern::new(
+//            pattern,
+//            //vec![
+//            //    "metric".to_string(),
+//            //    "function".to_string(),
+//            //    "range_vector".to_string(),
+//            //    "aggregation".to_string(),
+//            //],
+//            // QueryPatternType::OneTemporalOneSpatial,
+//        )
+//    }
+//
+//    /// Get all standard patterns
+//    pub fn get_all_patterns() -> Vec<PromQLPattern> {
+//        debug!("Getting all standard patterns");
+//        vec![
+//            Self::one_temporal_one_spatial_pattern(),
+//            Self::only_temporal_pattern(),
+//            Self::only_spatial_pattern(),
+//        ]
+//    }
+//}
diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/data_model/key_by_label_names.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/data_model/key_by_label_names.rs
new file mode 100644
index 0000000..df4f0f0
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/promql_utilities/src/data_model/key_by_label_names.rs
@@ -0,0 +1,134 @@
+use serde::{Deserialize, Serialize};
+use tracing::debug;
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub struct KeyByLabelNames {
+    pub labels: Vec<String>, // Renamed from label_names to match query_logics usage
+}
+
+impl KeyByLabelNames {
+    pub fn new(label_names: Vec<String>) -> Self {
+        debug!("Creating KeyByLabelNames with {} labels", label_names.len());
+        let mut sorted_names = label_names;
+        sorted_names.sort(); // Match Python behavior - keys are sorted
+        debug!("Sorted labels: {:?}", sorted_names);
+        Self {
+            labels: sorted_names,
+        }
+    }
+
+    pub fn empty() -> Self {
+        Self::new(Vec::new())
+    }
+
+    pub fn from_names(names: Vec<String>) -> Self {
+        Self::new(names)
+    }
+
+    pub fn push(&mut self, name: String) {
+        debug!("Adding label: {}", name);
+        self.labels.push(name);
+        self.labels.sort(); // Keep sorted
+    }
+
+    /// Set difference operation - remove labels that are in the other set
+    /// Based on Python implementation: KeyByLabelNames.__sub__
+    pub fn difference(&self, other: &KeyByLabelNames) -> KeyByLabelNames {
+        debug!(
+            "Computing difference between {:?} and {:?}",
+            self.labels, other.labels
+        );
+        let other_set: std::collections::HashSet<_> = other.labels.iter().collect();
+        let result: Vec<String> = self
+            .labels
+            .iter()
+            .filter(|label| !other_set.contains(label))
+            .cloned()
+            .collect();
+        KeyByLabelNames::new(result)
+    }
+
+    /// Set union operation - combine labels from both sets
+    /// Based on Python implementation: KeyByLabelNames.__add__
+    pub fn union(&self, other: &KeyByLabelNames) -> KeyByLabelNames {
+        debug!(
+            "Computing union between {:?} and {:?}",
+            self.labels, other.labels
+        );
+        let mut combined = std::collections::HashSet::new();
+        for label in &self.labels {
+            combined.insert(label.clone());
+        }
+        for label in &other.labels {
+            combined.insert(label.clone());
+        }
+        KeyByLabelNames::new(combined.into_iter().collect())
+    }
+
+    pub fn serialize_to_json(&self) -> serde_json::Value {
+        serde_json::to_value(&self.labels).unwrap_or(serde_json::Value::Null)
+    }
+
+    pub fn deserialize_from_json(data: &serde_json::Value) -> Result<Self, serde_json::Error> {
+        let names: Vec<String> = serde_json::from_value(data.clone())?;
+        Ok(Self::new(names))
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.labels.is_empty()
+    }
+
+    pub fn len(&self) -> usize {
+        self.labels.len()
+    }
+}
+
+impl Default for KeyByLabelNames {
+    fn default() -> Self {
+        Self::empty()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_key_by_label_names() {
+        let key = KeyByLabelNames::new(vec!["instance".to_string(), "job".to_string()]);
+
+        assert_eq!(key.len(), 2);
+        assert_eq!(key.labels, vec!["instance".to_string(), "job".to_string()]);
+
+        let mut key = KeyByLabelNames::new(vec!["instance".to_string(), "job".to_string()]);
+        key.push("new_label".to_string());
+        assert_eq!(key.len(), 3);
+        // After sorting, should be in alphabetical order
+        assert!(key.labels.contains(&"instance".to_string()));
+        assert!(key.labels.contains(&"job".to_string()));
+        assert!(key.labels.contains(&"new_label".to_string()));
+    }
+
+    #[test]
+    fn test_difference() {
+        let key1 = KeyByLabelNames::new(vec!["a".to_string(), "b".to_string(), "c".to_string()]);
+        let key2 = KeyByLabelNames::new(vec!["b".to_string(), "c".to_string()]);
+
+        let diff = key1.difference(&key2);
+        assert_eq!(diff.len(), 1);
+        assert_eq!(diff.labels, vec!["a".to_string()]);
+    }
+
+    #[test]
+    fn test_union() {
+        let key1 = KeyByLabelNames::new(vec!["a".to_string(), "b".to_string()]);
+        let key2 = KeyByLabelNames::new(vec!["b".to_string(), "c".to_string()]);
+
+        let union = key1.union(&key2);
+        assert_eq!(union.len(), 3);
+        assert_eq!(
+            union.labels,
+            vec!["a".to_string(), "b".to_string(), "c".to_string()]
+        );
+    }
+}
diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/data_model/mod.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/data_model/mod.rs
new file mode 100644
index 0000000..f587f43
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/promql_utilities/src/data_model/mod.rs
@@ -0,0 +1,3 @@
+pub mod key_by_label_names;
+
+pub use key_by_label_names::*;
diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/lib.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/lib.rs
new file mode 100644
index 0000000..5de6fa3
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/promql_utilities/src/lib.rs
@@ -0,0 +1,7 @@
+pub mod ast_matching;
+pub mod data_model;
+pub mod query_logics;
+
+pub use ast_matching::*;
+pub use data_model::*;
+pub use query_logics::*;
diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/enums.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/enums.rs
new file mode 100644
index 0000000..7369f79
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/enums.rs
@@ -0,0 +1,142 @@
+use serde::{Deserialize, Serialize};
+use tracing::debug;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub enum QueryPatternType {
+    OnlyTemporal,
+    OnlySpatial,
+    OneTemporalOneSpatial,
+}
+
+impl std::fmt::Display for QueryPatternType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        debug!("Formatting QueryPatternType: {:?}", self);
+        match self {
+            QueryPatternType::OnlyTemporal => write!(f, "only_temporal"),
+            QueryPatternType::OnlySpatial => write!(f, "only_spatial"),
+            QueryPatternType::OneTemporalOneSpatial => write!(f, "one_temporal_one_spatial"),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub enum QueryTreatmentType {
+    Exact,
+    Approximate,
+}
+
+impl std::fmt::Display for QueryTreatmentType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        debug!("Formatting QueryTreatmentType: {:?}", self);
+        match self {
+            QueryTreatmentType::Exact => write!(f, "exact"),
+            QueryTreatmentType::Approximate => write!(f, "approximate"),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub enum Statistic {
+    Count,
+    Sum,
+    Cardinality,
+    Increase,
+    Rate,
+    Min,
+    Max,
+    Quantile,
+    Topk,
+}
+
+impl std::fmt::Display for Statistic {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        debug!("Formatting Statistic: {:?}", self);
+        match self {
+            Statistic::Count => write!(f, "count"),
+            Statistic::Sum => write!(f, "sum"),
+            Statistic::Cardinality => write!(f, "cardinality"),
+            Statistic::Increase => write!(f, "increase"),
+            Statistic::Rate => write!(f, "rate"),
+            Statistic::Min => write!(f, "min"),
+            Statistic::Max => write!(f, "max"),
+            Statistic::Quantile => write!(f, "quantile"),
+            Statistic::Topk => write!(f, "topk"),
+        }
+    }
+}
+
+#[allow(clippy::should_implement_trait)]
+impl Statistic {
+    pub fn from_str(s: &str) -> Option<Self> {
+        debug!("Parsing Statistic from string: {}", s);
+        match s.to_lowercase().as_str() {
+            "count" => Some(Statistic::Count),
+            "sum" => Some(Statistic::Sum),
+            "cardinality" => Some(Statistic::Cardinality),
+            "increase" => Some(Statistic::Increase),
+            "rate" => Some(Statistic::Rate),
+            "min" => Some(Statistic::Min),
+            "max" => Some(Statistic::Max),
+            "quantile" => Some(Statistic::Quantile),
+            "topk" => Some(Statistic::Topk),
+            _ => None,
+        }
+    }
+}
+
+impl std::str::FromStr for Statistic {
+    type Err = ();
+
+    /// Parse a statistic from a string (case-insensitive).
+    /// Use `s.parse::<Statistic>()` or `Statistic::from_str(s)`.
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        debug!("FromStr trait parsing Statistic: {}", s);
+        Statistic::from_str(s).ok_or(())
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub enum QueryResultType {
+    InstantVector,
+    RangeVector,
+}
+
+impl std::fmt::Display for QueryResultType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        debug!("Formatting QueryResultType: {:?}", self);
+        match self {
+            QueryResultType::InstantVector => write!(f, "instant_vector"),
+            QueryResultType::RangeVector => write!(f, "range_vector"),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_query_treatment_type_display() {
+        assert_eq!(QueryTreatmentType::Exact.to_string(), "exact");
+        assert_eq!(QueryTreatmentType::Approximate.to_string(), "approximate");
+    }
+
+    #[test]
+    fn test_query_treatment_type_serialization() {
+        let exact = QueryTreatmentType::Exact;
+        let approximate = QueryTreatmentType::Approximate;
+
+        // Test that they can be serialized/deserialized
+        let exact_str = serde_json::to_string(&exact).unwrap();
+        let approximate_str = serde_json::to_string(&approximate).unwrap();
+
+        assert_eq!(exact_str, "\"Exact\"");
+        assert_eq!(approximate_str, "\"Approximate\"");
+
+        let exact_back: QueryTreatmentType = serde_json::from_str(&exact_str).unwrap();
+        let approximate_back: QueryTreatmentType = serde_json::from_str(&approximate_str).unwrap();
+
+        assert_eq!(exact_back, QueryTreatmentType::Exact);
+        assert_eq!(approximate_back, QueryTreatmentType::Approximate);
+    }
+}
diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/logics.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/logics.rs
new file mode 100644
index 0000000..90dd913
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/logics.rs
@@ -0,0 +1,169 @@
+use crate::query_logics::enums::{QueryTreatmentType, Statistic};
+use tracing::debug;
+
+/// Map statistic to precompute operator based on treatment type
+/// This mirrors the Python implementation's logic
+pub fn map_statistic_to_precompute_operator(
+    statistic: Statistic,
+    treatment_type: QueryTreatmentType,
+) -> Result<(String, String), String> {
+    debug!(
+        "Mapping statistic {:?} with treatment type {:?} to precompute operator",
+        statistic, treatment_type
+    );
+    match statistic {
+        Statistic::Quantile => {
+            if treatment_type == QueryTreatmentType::Exact {
+                Err("Statistic Quantile cannot be computed exactly".to_string())
+            } else {
+                Ok(("DatasketchesKLL".to_string(), "".to_string()))
+                //Ok(("HydraKLL".to_string(), "".to_string()))
+            }
+        }
+        Statistic::Min | Statistic::Max => {
+            if treatment_type == QueryTreatmentType::Approximate {
+                Ok(("DatasketchesKLL".to_string(), "".to_string()))
+                //Ok(("HydraKLL".to_string(), "".to_string()))
+            } else {
+                Ok((
+                    "MultipleMinMax".to_string(),
+                    statistic.to_string().to_lowercase(),
+                ))
+            }
+        }
+        Statistic::Sum | Statistic::Count => {
+            if treatment_type == QueryTreatmentType::Approximate {
+                Ok((
+                    "CountMinSketch".to_string(),
+                    statistic.to_string().to_lowercase(),
+                ))
+            } else {
+                Ok((
+                    "MultipleSum".to_string(),
+                    statistic.to_string().to_lowercase(),
+                ))
+            }
+        }
+        Statistic::Rate | Statistic::Increase => {
+            Ok(("MultipleIncrease".to_string(), "".to_string()))
+        }
+        _ => Err(format!("Statistic {statistic:?} not supported")),
+    }
+}
+
+/// Check if a precompute operator supports subpopulations (multiple keys)
+pub fn does_precompute_operator_support_subpopulations(
+    statistic: Statistic,
+    precompute_operator: &str,
+) -> bool {
+    debug!(
+        "Checking if precompute operator '{}' supports subpopulations for statistic {:?}",
+        precompute_operator, statistic
+    );
+    match precompute_operator {
+        // Single-key operators
+        "Increase" | "MinMax" | "Sum" | "DatasketchesKLL" => false,
+
+        // Multi-key operators
+        "MultipleIncrease" | "MultipleMinMax" | "MultipleSum" | "HydraKLL" => true,
+
+        // CountMinSketch supports subpopulations only for certain statistics
+        "CountMinSketch" => matches!(statistic, Statistic::Sum | Statistic::Count),
+
+        // "CountMinSketchWithHeap" is only supported for Topk
+        // Other usages of CountMinSketchWithHeap will fall through.
+        "CountMinSketchWithHeap" if matches!(statistic, Statistic::Topk) => false,
+
+        // Default: not supported
+        _ => panic!("Unexpected precompute operator: {}", precompute_operator),
+    }
+}
+
+/// Check if temporal and spatial aggregations are collapsible
+/// Based on Python implementation in promql_utilities/query_logics/logics.py
+pub fn get_is_collapsable(temporal_aggregation: &str, spatial_aggregation: &str) -> bool {
+    debug!(
+        "Checking if temporal aggregation '{}' and spatial aggregation '{}' are collapsable",
+        temporal_aggregation, spatial_aggregation
+    );
+    match spatial_aggregation {
+        "sum" => matches!(
+            temporal_aggregation,
+            "sum_over_time" | "count_over_time" // Note: "increase" and "rate" are commented out in Python
+        ),
+        "min" => temporal_aggregation == "min_over_time",
+        "max" => temporal_aggregation == "max_over_time",
+        _ => false,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_map_statistic_to_precompute_operator() {
+        // Test exact sum
+        let result =
+            map_statistic_to_precompute_operator(Statistic::Sum, QueryTreatmentType::Exact)
+                .unwrap();
+        assert_eq!(result, ("MultipleSum".to_string(), "sum".to_string()));
+
+        // Test approximate sum
+        let result =
+            map_statistic_to_precompute_operator(Statistic::Sum, QueryTreatmentType::Approximate)
+                .unwrap();
+        assert_eq!(result, ("CountMinSketch".to_string(), "sum".to_string()));
+
+        // Test exact quantile (should fail)
+        let result =
+            map_statistic_to_precompute_operator(Statistic::Quantile, QueryTreatmentType::Exact);
+        assert!(result.is_err());
+
+        // Test approximate quantile
+        let result = map_statistic_to_precompute_operator(
+            Statistic::Quantile,
+            QueryTreatmentType::Approximate,
+        )
+        .unwrap();
+        assert_eq!(result, ("DatasketchesKLL".to_string(), "".to_string()));
+        //assert_eq!(result, ("HydraKLL".to_string(), "".to_string()));
+    }
+
+    #[test]
+    fn test_does_precompute_operator_support_subpopulations() {
+        // Test MultipleSum supports subpopulations
+        assert!(does_precompute_operator_support_subpopulations(
+            Statistic::Sum,
+            "MultipleSum"
+        ));
+
+        // Test DatasketchesKLL does not support subpopulations
+        assert!(!does_precompute_operator_support_subpopulations(
+            Statistic::Quantile,
+            "DatasketchesKLL"
+        ));
+
+        // Test HydraKLL supports subpopulations
+        assert!(does_precompute_operator_support_subpopulations(
+            Statistic::Quantile,
+            "HydraKLL"
+        ));
+
+        // Test CountMinSketch with valid statistic
+        assert!(does_precompute_operator_support_subpopulations(
+            Statistic::Sum,
+            "CountMinSketch"
+        ));
+    }
+
+    #[test]
+    fn test_get_is_collapsable() {
+        assert!(get_is_collapsable("sum_over_time", "sum"));
+        assert!(get_is_collapsable("count_over_time", "sum"));
+        assert!(get_is_collapsable("min_over_time", "min"));
+        assert!(get_is_collapsable("max_over_time", "max"));
+        assert!(!get_is_collapsable("min_over_time", "sum"));
+        assert!(!get_is_collapsable("unknown", "sum"));
+    }
+}
diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/mod.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/mod.rs
new file mode 100644
index 0000000..f3a98d1
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/mod.rs
@@ -0,0 +1,7 @@
+pub mod enums;
+pub mod logics;
+pub mod parsing;
+
+pub use enums::*;
+pub use logics::*;
+pub use parsing::*;
diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/parsing.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/parsing.rs
new file mode 100644
index 0000000..c37bb48
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/parsing.rs
@@ -0,0 +1,135 @@
+use core::panic;
+
+use promql_parser::parser::Expr;
+use tracing::debug;
+
+use crate::ast_matching::PromQLMatchResult;
+use crate::data_model::KeyByLabelNames;
+use crate::query_logics::enums::{QueryPatternType, Statistic};
+
+pub fn get_metric_and_spatial_filter(match_result: &PromQLMatchResult) -> (String, String) {
+    debug!("Extracting metric and spatial filter from match result");
+    let mut metric_name = match_result.get_metric_name().unwrap_or_default();
+    debug!("Initial metric name: {}", metric_name);
+
+    let spatial_filter = if let Some(metric_token) = match_result
+        .tokens
+        .get("metric")
+        .and_then(|token| token.metric.as_ref())
+    {
+        if let Some(ast_vs) = metric_token.ast.as_ref() {
+            // Render the VectorSelector AST to string and extract inner `{...}` content
+            // let ast_str = format!("{}", ast_vs);
+            let ast_str = Expr::from(ast_vs.clone()).prettify();
+            if let Some(inner) = ast_str.split('{').nth(1).and_then(|s| s.split('}').next()) {
+                debug!("Found spatial filter content: {}", inner);
+                // Ensure metric_name does not include the selector part
+                metric_name = metric_name
+                    .split('{')
+                    .next()
+                    .unwrap_or(&metric_name)
+                    .to_string();
+                debug!("Cleaned metric name: {}", metric_name);
+                inner.to_string()
+            } else {
+                String::new()
+            }
+        } else {
+            // No AST available -> return empty spatial filter (no fallback reconstruction)
+            String::new()
+        }
+    } else {
+        String::new()
+    };
+
+    debug!(
+        "Final result - metric: {}, spatial_filter: {}",
+        metric_name, spatial_filter
+    );
+    (metric_name, spatial_filter)
+}
+
+/// Get statistics to compute based on pattern type and tokens
+pub fn get_statistics_to_compute(
+    pattern_type: QueryPatternType,
+    match_result: &PromQLMatchResult,
+) -> Vec<Statistic> {
+    debug!("Computing statistics for pattern type {:?}", pattern_type);
+    let statistic_to_compute: Option<String> = if pattern_type == QueryPatternType::OnlyTemporal
+        || pattern_type == QueryPatternType::OneTemporalOneSpatial
+    {
+        match_result.get_function_name().map(|function_name| {
+            let name = function_name.to_lowercase();
+            name.split('_').next().unwrap_or(&name).to_string()
+        })
+    } else if pattern_type == QueryPatternType::OnlySpatial {
+        match_result
+            .get_aggregation_op()
+            .map(|agg| agg.to_lowercase())
+    } else {
+        panic!("Unsupported query pattern type");
+    };
+
+    if let Some(statistic_to_compute) = statistic_to_compute {
+        debug!("Found statistic to compute: {}", statistic_to_compute);
+        if statistic_to_compute == "avg" {
+            vec![Statistic::Sum, Statistic::Count]
+        } else if let Ok(stat) = statistic_to_compute.parse::<Statistic>() {
+            vec![stat]
+        } else {
+            panic!("Unsupported statistic: {}", statistic_to_compute);
+        }
+    } else {
+        panic!("No statistic found in the query");
+    }
+}
+
+pub fn get_spatial_aggregation_output_labels(
+    match_result: &PromQLMatchResult,
+    all_labels: &KeyByLabelNames,
+) -> KeyByLabelNames {
+    debug!("Getting spatial aggregation output labels");
+    debug!("All labels: {:?}", all_labels);
+    // Match Python behaviour: assume aggregation token and modifier exist
+    // and raise (panic) if missing or invalid. "by" and "without" logic
+    // remain the same.
+    let aggregation_token = match_result
+        .tokens
+        .get("aggregation")
+        .and_then(|token| token.aggregation.as_ref())
+        .expect("aggregation token missing");
+
+    // Patching: When the query is topk, we should always return all labels
+    if aggregation_token.op.to_lowercase() == "topk" {
+        debug!("Aggregation operation is 'topk', returning all labels");
+        return all_labels.clone();
+    }
+
+    // Fixing issue https://github.com/ProjectASAP/asap-internal/issues/24
+    let modifier: &crate::AggregationModifier = match aggregation_token.modifier.as_ref() {
+        Some(m) => m,
+        None => {
+            debug!("No aggregation modifier found, returning empty KeyByLabelNames");
+            return KeyByLabelNames::new(vec![]);
+        }
+    };
+
+    debug!(
+        "Modifier type: {}, labels: {:?}",
+        modifier.modifier_type, modifier.labels
+    );
+    match modifier.modifier_type.as_str() {
+        "by" => {
+            debug!("Processing 'by' modifier");
+            // Return only the labels specified in "by" clause
+            KeyByLabelNames::new(modifier.labels.clone())
+        }
+        "without" => {
+            debug!("Processing 'without' modifier");
+            // Return all labels except those specified in "without" clause
+            let without_labels = KeyByLabelNames::new(modifier.labels.clone());
+            all_labels.difference(&without_labels)
+        }
+        _ => panic!("Invalid aggregation modifier"),
+    }
+}
diff --git a/CommonDependencies/dependencies/rs/sketch_db_common/Cargo.lock b/CommonDependencies/dependencies/rs/sketch_db_common/Cargo.lock
new file mode 100644
index 0000000..133a015
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/sketch_db_common/Cargo.lock
@@ -0,0 +1,919 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.102"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "bincode"
+version = "1.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "bitflags"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
+
+[[package]]
+name = "bumpalo"
+version = "3.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
+
+[[package]]
+name = "cactus"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acbc26382d871df4b7442e3df10a9402bf3cf5e55cbd66f12be38861425f0564"
+
+[[package]]
+name = "cc"
+version = "1.2.56"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2"
+dependencies = [
+ "find-msvc-tools",
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "cfgrammar"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf"
+dependencies = [
+ "indexmap",
+ "lazy_static",
+ "num-traits",
+ "regex",
+ "serde",
+ "vob",
+]
+
+[[package]]
+name = "chrono"
+version = "0.4.43"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118"
+dependencies = [
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "serde",
+ "wasm-bindgen",
+ "windows-link",
+]
+
+[[package]]
+name = "clap"
+version = "4.5.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.55"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "deranged"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4"
+dependencies = [
+ "powerfmt",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "filetime"
+version = "0.2.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "libredox",
+]
+
+[[package]]
+name = "find-msvc-tools"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "getopts"
+version = "0.2.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
+dependencies = [
+ "unicode-width",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.65"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
+
+[[package]]
+name = "itoa"
+version = "1.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
+
+[[package]]
+name = "js-sys"
+version = "0.3.85"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.182"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
+
+[[package]]
+name = "libredox"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616"
+dependencies = [
+ "bitflags",
+ "libc",
+ "redox_syscall",
+]
+
+[[package]]
+name = "log"
+version = "0.4.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
+
+[[package]]
+name = "lrlex"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c71364e868116ee891b0f93559eb9eca5675bec28b22d33c58481e66c3951d7e"
+dependencies = [
+ "cfgrammar",
+ "getopts",
+ "lazy_static",
+ "lrpar",
+ "num-traits",
+ "quote",
+ "regex",
+ "regex-syntax",
+ "serde",
+ "vergen",
+]
+
+[[package]]
+name = "lrpar"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51b265a81193d94c92d1c9c715498d6fa505bce3f789ceecb24ab5d6fa2dbc71"
+dependencies = [
+ "bincode",
+ "cactus",
+ "cfgrammar",
+ "filetime",
+ "indexmap",
+ "lazy_static",
+ "lrtable",
+ "num-traits",
+ "packedvec",
+ "regex",
+ "serde",
+ "static_assertions",
+ "vergen",
+ "vob",
+]
+
+[[package]]
+name = "lrtable"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc36d15214ca997a5097845be1f932b7ee6125c36f5c5e55f6c49e027ddeb6de"
+dependencies = [
+ "cfgrammar",
+ "fnv",
+ "num-traits",
+ "serde",
+ "sparsevec",
+ "vob",
+]
+
+[[package]]
+name = "memchr"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+
+[[package]]
+name = "num-conv"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num_threads"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
+
+[[package]]
+name = "packedvec"
+version = "1.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a69e0a534dd2e6aefce319af62a0aa0066a76bdfcec0201dfe02df226bc9ec70"
+dependencies = [
+ "num-traits",
+ "serde",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "promql-parser"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60d851f6523a8215e2fbf86b6cef4548433f8b76092e9ffb607105de52ae63fd"
+dependencies = [
+ "cfgrammar",
+ "chrono",
+ "lazy_static",
+ "lrlex",
+ "lrpar",
+ "regex",
+]
+
+[[package]]
+name = "promql_utilities"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "promql-parser",
+ "serde",
+ "serde_json",
+ "thiserror",
+ "tracing",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "35985aa610addc02e24fc232012c86fd11f14111180f902b67e2d5331f8ebf2b"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "regex"
+version = "1.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c"
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "ryu"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.149"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+dependencies = [
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
+name = "serde_yaml"
+version = "0.9.34+deprecated"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
+dependencies = [
+ "indexmap",
+ "itoa",
+ "ryu",
+ "serde",
+ "unsafe-libyaml",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "sketch_db_common"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "clap",
+ "promql_utilities",
+ "serde",
+ "serde_json",
+ "serde_yaml",
+]
+
+[[package]]
+name = "sparsevec"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68b4a8ce3045f0fe173fb5ae3c6b7dcfbec02bfa650bb8618b2301f52af0134d"
+dependencies = [
+ "num-traits",
+ "packedvec",
+ "serde",
+ "vob",
+]
+
+[[package]]
+name = "static_assertions"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "syn"
+version = "2.0.117"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "time"
+version = "0.3.47"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c"
+dependencies = [
+ "deranged",
+ "itoa",
+ "libc",
+ "num-conv",
+ "num_threads",
+ "powerfmt",
+ "serde_core",
+ "time-core",
+ "time-macros",
+]
+
+[[package]]
+name = "time-core"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca"
+
+[[package]]
+name = "time-macros"
+version = "0.2.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215"
+dependencies = [
+ "num-conv",
+ "time-core",
+]
+
+[[package]]
+name = "tracing"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
+dependencies = [
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
+dependencies = [
+ "once_cell",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "unicode-width"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
+
+[[package]]
+name = "unsafe-libyaml"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "vergen"
+version = "8.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2990d9ea5967266ea0ccf413a4aa5c42a93dbcfda9cb49a97de6931726b12566"
+dependencies = [
+ "anyhow",
+ "rustversion",
+ "time",
+]
+
+[[package]]
+name = "vob"
+version = "3.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc936b5a7202a703aeaf7ce05e7931db2e0c8126813f97db3e9e06d867b0bb38"
+dependencies = [
+ "num-traits",
+ "serde",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55"
+dependencies = [
+ "bumpalo",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.62.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-result"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
diff --git a/CommonDependencies/dependencies/rs/sketch_db_common/Cargo.toml b/CommonDependencies/dependencies/rs/sketch_db_common/Cargo.toml
new file mode 100644
index 0000000..a70577c
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/sketch_db_common/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "sketch_db_common"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+promql_utilities = { path = "../promql_utilities" }
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+serde_yaml = "0.9"
+anyhow = "1.0"
+clap = { version = "4.0", features = ["derive"] }
diff --git a/CommonDependencies/dependencies/rs/sketch_db_common/src/aggregation_config.rs b/CommonDependencies/dependencies/rs/sketch_db_common/src/aggregation_config.rs
new file mode 100644
index 0000000..20dcc2b
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/sketch_db_common/src/aggregation_config.rs
@@ -0,0 +1,387 @@
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use serde_yaml;
+use std::collections::HashMap;
+
+use crate::enums::QueryLanguage;
+use crate::traits::SerializableToSink;
+use crate::utils::normalize_spatial_filter;
+use promql_utilities::data_model::KeyByLabelNames;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AggregationConfig {
+    pub aggregation_id: u64,
+    pub aggregation_type: String,
+    pub aggregation_sub_type: String,
+    pub parameters: HashMap<String, Value>,
+    pub grouping_labels: KeyByLabelNames,
+    pub aggregated_labels: KeyByLabelNames,
+    pub rollup_labels: KeyByLabelNames,
+    pub original_yaml: String,
+
+    // NEW fields for sliding window support (Issue #236)
+    pub window_size: u64,    // Window size in seconds (e.g., 900s for 15m)
+    pub slide_interval: u64, // Slide/hop interval in seconds (e.g., 30s)
+    pub window_type: String, // "tumbling" or "sliding"
+
+    // DEPRECATED but kept for backward compatibility
+    pub tumbling_window_size: u64,
+
+    pub spatial_filter: String,
+    pub spatial_filter_normalized: String,
+    pub metric: String, // PromQL mode: metric name; SQL mode: derived from table_name.value_column
+    pub num_aggregates_to_retain: Option<u64>,
+    pub read_count_threshold: Option<u64>,
+
+    // SQL-specific fields (optional, used when query_language=sql)
+    pub table_name: Option<String>,   // SQL mode: table name
+    pub value_column: Option<String>, // SQL mode: which value column to aggregate
+}
+
+// TODO: need to implement deserialization methods
+
+impl AggregationConfig {
+    #[allow(clippy::too_many_arguments)]
+    pub fn new(
+        aggregation_id: u64,
+        aggregation_type: String,
+        aggregation_sub_type: String,
+        parameters: HashMap<String, Value>,
+        grouping_labels: KeyByLabelNames,
+        aggregated_labels: KeyByLabelNames,
+        rollup_labels: KeyByLabelNames,
+        original_yaml: String,
+        tumbling_window_size: u64,
+        spatial_filter: String,
+        metric: String,
+        num_aggregates_to_retain: Option<u64>,
+        read_count_threshold: Option<u64>,
+        // NEW parameters for sliding window support
+        window_size: Option<u64>,
+        slide_interval: Option<u64>,
+        window_type: Option<String>,
+        // SQL-specific fields
+        table_name: Option<String>,
+        value_column: Option<String>,
+    ) -> Self {
+        // Generate normalized spatial filter (placeholder implementation)
+        let spatial_filter_normalized = normalize_spatial_filter(&spatial_filter);
+
+        // Handle backward compatibility: if new fields not provided, use tumbling_window_size
+        let window_size = window_size.unwrap_or(tumbling_window_size);
+        let slide_interval = slide_interval.unwrap_or(tumbling_window_size);
+        let window_type = window_type.unwrap_or_else(|| "tumbling".to_string());
+
+        Self {
+            aggregation_id,
+            aggregation_type,
+            aggregation_sub_type,
+            parameters,
+            grouping_labels,
+            aggregated_labels,
+            rollup_labels,
+            original_yaml,
+            window_size,
+            slide_interval,
+            window_type,
+            tumbling_window_size,
+            spatial_filter,
+            spatial_filter_normalized,
+            metric,
+            num_aggregates_to_retain,
+            read_count_threshold,
+            table_name,
+            value_column,
+        }
+    }
+
+    // pub fn with_sub_type(mut self, sub_type: String) -> Self {
+    //     self.aggregation_sub_type = Some(sub_type);
+    //     self
+    // }
+
+    // pub fn with_parameters(mut self, parameters: HashMap<String, String>) -> Self {
+    //     self.parameters = parameters;
+    //     self
+    // }
+
+    pub fn with_original_yaml(mut self, yaml: String) -> Self {
+        self.original_yaml = yaml;
+        self
+    }
+
+    pub fn deserialize_from_json(
+        data: &Value,
+    ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+        let aggregation_id = data["aggregationId"]
+            .as_u64()
+            .ok_or("Missing aggregationId")?;
+
+        let aggregation_type = data["aggregationType"]
+            .as_str()
+            .ok_or("Missing aggregationType")?
+            .to_string();
+
+        let aggregation_sub_type = data["aggregationSubType"]
+            .as_str()
+            .ok_or("Missing aggregationSubType")?
+            .to_string();
+
+        let parameters = data["parameters"]
+            .as_object()
+            .ok_or("Missing parameters")?
+            .iter()
+            .map(|(k, v)| (k.clone(), v.clone()))
+            .collect();
+
+        // Note: In Python, eval(data["originalYaml"]) is used, but this is unsafe
+        // Using the string value directly instead
+        let original_yaml = data["originalYaml"].as_str().unwrap_or("").to_string();
+
+        // Deserialize KeyByLabelNames - assuming they have deserialize_from_json methods
+        let grouping_labels = KeyByLabelNames::deserialize_from_json(&data["groupingLabels"])?;
+        let aggregated_labels = KeyByLabelNames::deserialize_from_json(&data["aggregatedLabels"])?;
+        let rollup_labels = KeyByLabelNames::deserialize_from_json(&data["rollupLabels"])?;
+
+        let tumbling_window_size = data["tumblingWindowSize"]
+            .as_u64()
+            .ok_or("Missing tumblingWindowSize")?;
+
+        // NEW: Handle new window fields with backward compatibility
+        let window_type = data
+            .get("windowType")
+            .and_then(|v| v.as_str())
+            .map(|s| s.to_string());
+
+        let window_size = data.get("windowSize").and_then(|v| v.as_u64());
+
+        let slide_interval = data.get("slideInterval").and_then(|v| v.as_u64());
+
+        let spatial_filter = data["spatialFilter"].as_str().unwrap_or("").to_string();
+
+        let metric = data["metric"].as_str().ok_or("Missing metric")?.to_string();
+
+        let num_aggregates_to_retain = data.get("numAggregatesToRetain").and_then(|v| v.as_u64());
+        let read_count_threshold = data.get("readCountThreshold").and_then(|v| v.as_u64());
+
+        // SQL-specific fields (optional)
+        let table_name = data
+            .get("tableName")
+            .and_then(|v| v.as_str())
+            .map(|s| s.to_string());
+        let value_column = data
+            .get("valueColumn")
+            .and_then(|v| v.as_str())
+            .map(|s| s.to_string());
+
+        Ok(Self::new(
+            aggregation_id,
+            aggregation_type,
+            aggregation_sub_type,
+            parameters,
+            grouping_labels,
+            aggregated_labels,
+            rollup_labels,
+            original_yaml,
+            tumbling_window_size,
+            spatial_filter,
+            metric,
+            num_aggregates_to_retain,
+            read_count_threshold,
+            window_size,
+            slide_interval,
+            window_type,
+            table_name,
+            value_column,
+        ))
+    }
+
+    pub fn deserialize_from_bytes(
+        bytes: &[u8],
+    ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+        let data_str = std::str::from_utf8(bytes)?.trim();
+        let data: Value = serde_json::from_str(data_str)?;
+        Self::deserialize_from_json(&data)
+    }
+
+    pub fn from_yaml_data(
+        aggregation_data: &serde_yaml::Value,
+        num_aggregates_to_retain: Option<u64>,
+        read_count_threshold: Option<u64>,
+        query_language: QueryLanguage,
+    ) -> Result<Self, anyhow::Error> {
+        let aggregation_id = aggregation_data["aggregationId"]
+            .as_u64()
+            .ok_or_else(|| anyhow::anyhow!("Missing aggregationId"))?;
+
+        let labels = &aggregation_data["labels"];
+        let grouping_labels = KeyByLabelNames::new(
+            labels["grouping"]
+                .as_sequence()
+                .ok_or_else(|| anyhow::anyhow!("Missing grouping labels"))?
+                .iter()
+                .filter_map(|v| v.as_str())
+                .map(|s| s.to_string())
+                .collect(),
+        );
+        let aggregated_labels = KeyByLabelNames::new(
+            labels["aggregated"]
+                .as_sequence()
+                .ok_or_else(|| anyhow::anyhow!("Missing aggregated labels"))?
+                .iter()
+                .filter_map(|v| v.as_str())
+                .map(|s| s.to_string())
+                .collect(),
+        );
+        let rollup_labels = KeyByLabelNames::new(
+            labels["rollup"]
+                .as_sequence()
+                .ok_or_else(|| anyhow::anyhow!("Missing rollup labels"))?
+                .iter()
+                .filter_map(|v| v.as_str())
+                .map(|s| s.to_string())
+                .collect(),
+        );
+
+        let aggregation_type = aggregation_data["aggregationType"]
+            .as_str()
+            .ok_or_else(|| anyhow::anyhow!("Missing aggregationType"))?
+            .to_string();
+
+        let aggregation_sub_type = aggregation_data["aggregationSubType"]
+            .as_str()
+            .ok_or_else(|| anyhow::anyhow!("Missing aggregationSubType"))?
+            .to_string();
+
+        // Convert serde_yaml::Value to serde_json::Value for parameters
+        let parameters: HashMap<String, Value> = aggregation_data["parameters"]
+            .as_mapping()
+            .ok_or_else(|| anyhow::anyhow!("Missing parameters"))?
+            .iter()
+            .map(|(k, v)| {
+                let key = k.as_str().unwrap_or("").to_string();
+                let value = serde_json::to_value(v).unwrap_or(Value::Null);
+                (key, value)
+            })
+            .collect();
+
+        let tumbling_window_size = aggregation_data["tumblingWindowSize"]
+            .as_u64()
+            .ok_or_else(|| anyhow::anyhow!("Missing tumblingWindowSize"))?;
+
+        // NEW: Handle new window fields with backward compatibility
+        let window_type = aggregation_data
+            .get("windowType")
+            .and_then(|v| v.as_str())
+            .map(|s| s.to_string());
+
+        let window_size = aggregation_data.get("windowSize").and_then(|v| v.as_u64());
+
+        let slide_interval = aggregation_data
+            .get("slideInterval")
+            .and_then(|v| v.as_u64());
+
+        let spatial_filter = aggregation_data["spatialFilter"]
+            .as_str()
+            .unwrap_or("")
+            .to_string();
+
+        // Handle PromQL (metric) vs SQL (table_name/value_column) based on query_language
+        let (metric, table_name, value_column) = match query_language {
+            QueryLanguage::promql => {
+                let metric = aggregation_data["metric"]
+                    .as_str()
+                    .ok_or_else(|| anyhow::anyhow!("Missing metric for PromQL query language"))?
+                    .to_string();
+                (metric, None, None)
+            }
+            QueryLanguage::sql => {
+                let table_name = aggregation_data
+                    .get("table_name")
+                    .and_then(|v| v.as_str())
+                    .ok_or_else(|| anyhow::anyhow!("Missing table_name for SQL query language"))?
+                    .to_string();
+                let value_column = aggregation_data
+                    .get("value_column")
+                    .and_then(|v| v.as_str())
+                    .unwrap_or("value")
+                    .to_string();
+                // Derive metric from table_name.value_column for internal use
+                let metric = format!("{}.{}", table_name, value_column);
+                (metric, Some(table_name), Some(value_column))
+            }
+            QueryLanguage::elastic_querydsl => {
+                // Elastic doesn't use metric/table_name in aggregations
+                (String::new(), None, None)
+            }
+            QueryLanguage::elastic_sql => {
+                // Elastic doesn't use metric/table_name in aggregations
+                (String::new(), None, None)
+            }
+        };
+
+        Ok(Self::new(
+            aggregation_id,
+            aggregation_type,
+            aggregation_sub_type,
+            parameters,
+            grouping_labels,
+            aggregated_labels,
+            rollup_labels,
+            String::new(), // original_yaml - empty as in Python
+            tumbling_window_size,
+            spatial_filter,
+            metric,
+            num_aggregates_to_retain,
+            read_count_threshold,
+            window_size,
+            slide_interval,
+            window_type,
+            table_name,
+            value_column,
+        ))
+    }
+}
+
+impl SerializableToSink for AggregationConfig {
+    fn serialize_to_json(&self) -> Value {
+        let mut json = serde_json::json!({
+            "aggregationId": self.aggregation_id,
+            "aggregationType": self.aggregation_type,
+            "aggregationSubType": self.aggregation_sub_type,
+            "parameters": self.parameters,
+            "originalYaml": self.original_yaml,
+            "tumblingWindowSize": self.tumbling_window_size,
+            // NEW: Include new window fields
+            "windowSize": self.window_size,
+            "slideInterval": self.slide_interval,
+            "windowType": self.window_type,
+            "spatialFilter": self.spatial_filter,
+            "metric": self.metric,
+        });
+
+        // Only include numAggregatesToRetain if it's Some
+        if let Some(num_aggregates) = self.num_aggregates_to_retain {
+            json["numAggregatesToRetain"] = serde_json::json!(num_aggregates);
+        }
+
+        // Only include readCountThreshold if it's Some
+        if let Some(threshold) = self.read_count_threshold {
+            json["readCountThreshold"] = serde_json::json!(threshold);
+        }
+
+        // SQL-specific fields (only include if present)
+        if let Some(ref table_name) = self.table_name {
+            json["tableName"] = serde_json::json!(table_name);
+        }
+        if let Some(ref value_column) = self.value_column {
+            json["valueColumn"] = serde_json::json!(value_column);
+        }
+
+        json
+    }
+
+    fn serialize_to_bytes(&self) -> Vec<u8> {
+        self.original_yaml.as_bytes().to_vec()
+    }
+}
diff --git a/CommonDependencies/dependencies/rs/sketch_db_common/src/enums.rs b/CommonDependencies/dependencies/rs/sketch_db_common/src/enums.rs
new file mode 100644
index 0000000..3b5bb6a
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/sketch_db_common/src/enums.rs
@@ -0,0 +1,25 @@
+#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq)]
+#[allow(non_camel_case_types)]
+pub enum QueryLanguage {
+    #[value(alias = "SQL")]
+    sql,
+    #[value(alias = "PROMQL")]
+    promql,
+    #[value(alias = "ElasticQueryDSL")]
+    elastic_querydsl,
+    #[value(alias = "ElasticSQL")]
+    elastic_sql,
+}
+
+/// Policy for cleaning up old aggregates from the store.
+/// Must be explicitly specified in inference_config.yaml.
+#[derive(Clone, Debug, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum CleanupPolicy {
+    /// Keep only the N most recent aggregates (circular buffer behavior)
+    CircularBuffer,
+    /// Remove aggregates after they've been read N times
+    ReadBased,
+    /// Never clean up aggregates
+    NoCleanup,
+}
diff --git a/CommonDependencies/dependencies/rs/sketch_db_common/src/lib.rs b/CommonDependencies/dependencies/rs/sketch_db_common/src/lib.rs
new file mode 100644
index 0000000..286882f
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/sketch_db_common/src/lib.rs
@@ -0,0 +1,4 @@
+pub mod aggregation_config;
+pub mod enums;
+pub mod traits;
+pub mod utils;
diff --git a/CommonDependencies/dependencies/rs/sketch_db_common/src/traits.rs b/CommonDependencies/dependencies/rs/sketch_db_common/src/traits.rs
new file mode 100644
index 0000000..196d908
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/sketch_db_common/src/traits.rs
@@ -0,0 +1,7 @@
+use serde_json::Value;
+
+/// Trait for objects that can be serialized to different formats
+pub trait SerializableToSink {
+    fn serialize_to_json(&self) -> Value;
+    fn serialize_to_bytes(&self) -> Vec<u8>;
+}
diff --git a/CommonDependencies/dependencies/rs/sketch_db_common/src/utils.rs b/CommonDependencies/dependencies/rs/sketch_db_common/src/utils.rs
new file mode 100644
index 0000000..ef90e78
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/sketch_db_common/src/utils.rs
@@ -0,0 +1,41 @@
+/// Normalize spatial filter for PromQL queries
+pub fn normalize_spatial_filter(filter: &str) -> String {
+    if filter.is_empty() {
+        return String::new();
+    }
+
+    // TODO: Parse the spatial filter, make fake ASTs, each one with matcher,
+    // prettify each, and sort them. Unfortunately, unable to manually create fake ASTs
+    // Current workaround: split spatial filter by commas, sort, and join
+
+    let trimmed = filter.trim().strip_prefix('{').unwrap_or(filter.trim());
+    let trimmed = trimmed.strip_suffix('}').unwrap_or(trimmed);
+    let trimmed = trimmed.trim();
+
+    let mut parts: Vec<&str> = trimmed.split(',').collect();
+    parts.sort();
+
+    format!("{{{}}}", parts.join(","))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_normalize_spatial_filter() {
+        assert_eq!(normalize_spatial_filter("").as_str(), "");
+
+        let result = normalize_spatial_filter("instance=\"localhost:9090\"");
+        assert_eq!(result, "{instance=\"localhost:9090\"}");
+
+        let result = normalize_spatial_filter("{instance=\"localhost:9090\"}");
+        assert_eq!(result, "{instance=\"localhost:9090\"}");
+
+        let result = normalize_spatial_filter("{job=\"prometheus\",instance=\"localhost:9090\"}");
+        assert_eq!(result, "{instance=\"localhost:9090\",job=\"prometheus\"}");
+
+        let result = normalize_spatial_filter("job=\"prometheus\",instance=\"localhost:9090\"");
+        assert_eq!(result, "{instance=\"localhost:9090\",job=\"prometheus\"}");
+    }
+}
diff --git a/CommonDependencies/dependencies/rs/sql_utilities/.gitignore b/CommonDependencies/dependencies/rs/sql_utilities/.gitignore
new file mode 100644
index 0000000..9f97022
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/sql_utilities/.gitignore
@@ -0,0 +1 @@
+target/
\ No newline at end of file
diff --git a/CommonDependencies/dependencies/rs/sql_utilities/Cargo.lock b/CommonDependencies/dependencies/rs/sql_utilities/Cargo.lock
new file mode 100644
index 0000000..0a9134e
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/sql_utilities/Cargo.lock
@@ -0,0 +1,587 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "async-stream"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476"
+dependencies = [
+ "async-stream-impl",
+ "futures-core",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "async-stream-impl"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "bumpalo"
+version = "3.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
+
+[[package]]
+name = "bytes"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
+
+[[package]]
+name = "cc"
+version = "1.2.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7"
+dependencies = [
+ "find-msvc-tools",
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "chrono"
+version = "0.4.42"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
+dependencies = [
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "wasm-bindgen",
+ "windows-link",
+]
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "find-msvc-tools"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127"
+
+[[package]]
+name = "futures-core"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "jiff"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35"
+dependencies = [
+ "jiff-static",
+ "jiff-tzdb-platform",
+ "log",
+ "portable-atomic",
+ "portable-atomic-util",
+ "serde_core",
+ "windows-sys",
+]
+
+[[package]]
+name = "jiff-static"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "jiff-tzdb"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2"
+
+[[package]]
+name = "jiff-tzdb-platform"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8"
+dependencies = [
+ "jiff-tzdb",
+]
+
+[[package]]
+name = "js-sys"
+version = "0.3.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.177"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
+
+[[package]]
+name = "log"
+version = "0.4.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
+
+[[package]]
+name = "memchr"
+version = "2.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "parse_datetime"
+version = "0.13.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acea383beda9652270f3c9678d83aa58cbfc16880343cae0c0c8c7d6c0974132"
+dependencies = [
+ "jiff",
+ "num-traits",
+ "winnow",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+
+[[package]]
+name = "portable-atomic"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
+
+[[package]]
+name = "portable-atomic-util"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507"
+dependencies = [
+ "portable-atomic",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "psm"
+version = "0.1.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e66fcd288453b748497d8fb18bccc83a16b0518e3906d4b8df0a8d42d93dbb1c"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "recursive"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e"
+dependencies = [
+ "recursive-proc-macro-impl",
+ "stacker",
+]
+
+[[package]]
+name = "recursive-proc-macro-impl"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
+dependencies = [
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "sql_utilities"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "parse_datetime",
+ "sqlparser",
+ "tokio-test",
+]
+
+[[package]]
+name = "sqlparser"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f"
+dependencies = [
+ "log",
+ "recursive",
+]
+
+[[package]]
+name = "stacker"
+version = "0.1.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59"
+dependencies = [
+ "cc",
+ "cfg-if",
+ "libc",
+ "psm",
+ "windows-sys",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "tokio"
+version = "1.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408"
+dependencies = [
+ "pin-project-lite",
+]
+
+[[package]]
+name = "tokio-stream"
+version = "0.1.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047"
+dependencies = [
+ "futures-core",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-test"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2468baabc3311435b55dd935f702f42cd1b8abb7e754fb7dfb16bd36aa88f9f7"
+dependencies = [
+ "async-stream",
+ "bytes",
+ "futures-core",
+ "tokio",
+ "tokio-stream",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40"
+dependencies = [
+ "bumpalo",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.62.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-result"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "winnow"
+version = "0.7.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
+dependencies = [
+ "memchr",
+]
diff --git a/CommonDependencies/dependencies/rs/sql_utilities/Cargo.toml b/CommonDependencies/dependencies/rs/sql_utilities/Cargo.toml
new file mode 100644
index 0000000..7ad9756
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/sql_utilities/Cargo.toml
@@ -0,0 +1,17 @@
+[package]
+name = "sql_utilities"
+version = "0.1.0"
+edition = "2021"
+authors = ["SketchDB Team"]
+description = "A standalone SQL pattern matching and query analysis library for Rust"
+license = "MIT"
+keywords = ["Clickhouse", "sql", "pattern-matching", "query-analysis"]
+categories = ["parsing", "database", "development-tools"]
+
+[dependencies]
+chrono = "0.4.39"
+parse_datetime = "0.13.3"
+sqlparser = "0.59.0"
+
+[dev-dependencies]
+tokio-test = "0.4"
diff --git a/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/mod.rs b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/mod.rs
new file mode 100644
index 0000000..74fd48f
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/mod.rs
@@ -0,0 +1,8 @@
+pub mod sqlhelper;
+pub mod sqlparser_test;
+pub mod sqlpattern_matcher;
+pub mod sqlpattern_parser;
+
+pub use sqlhelper::{SQLSchema, Table};
+pub use sqlpattern_matcher::*;
+pub use sqlpattern_parser::*;
diff --git a/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlhelper.rs b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlhelper.rs
new file mode 100644
index 0000000..3e3176f
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlhelper.rs
@@ -0,0 +1,157 @@
+use std::collections::{HashMap, HashSet};
+
+#[derive(Debug, Clone)]
+struct Columns {
+    time: String,
+    value_columns: HashSet<String>,
+    metadata_columns: HashSet<String>,
+}
+
+#[derive(Debug, Clone)]
+pub struct Table {
+    pub name: String,
+    pub time_column: String,
+    pub value_columns: HashSet<String>,
+    pub metadata_columns: HashSet<String>,
+}
+
+impl Table {
+    pub fn new(
+        table_name: String,
+        time_column: String,
+        value_columns: HashSet<String>,
+        metadata_columns: HashSet<String>,
+    ) -> Self {
+        Self {
+            name: table_name,
+            time_column,
+            value_columns,
+            metadata_columns,
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct SQLSchema {
+    info: HashMap<String, Columns>,
+}
+
+impl SQLSchema {
+    pub fn new(table_schemas: Vec<Table>) -> Self {
+        let mut info = HashMap::new();
+
+        for table in table_schemas {
+            let columns = Columns {
+                time: table.time_column,
+                value_columns: table.value_columns,
+                metadata_columns: table.metadata_columns,
+            };
+            info.insert(table.name, columns);
+        }
+
+        Self { info }
+    }
+
+    pub fn get_time_column(&self, table_name: &str) -> Option<&String> {
+        self.info.get(table_name).map(|cols| &cols.time)
+    }
+
+    pub fn get_value_columns(&self, table_name: &str) -> Option<&HashSet<String>> {
+        self.info.get(table_name).map(|cols| &cols.value_columns)
+    }
+
+    pub fn get_metadata_columns(&self, table_name: &str) -> Option<&HashSet<String>> {
+        self.info.get(table_name).map(|cols| &cols.metadata_columns)
+    }
+
+    pub fn is_valid_value_column(&self, table: &str, value_column: &str) -> bool {
+        if let Some(value_columns) = self.get_value_columns(table) {
+            value_columns.contains(value_column)
+        } else {
+            false
+        }
+    }
+
+    pub fn are_valid_metadata_columns(&self, table: &str, columns: &HashSet<String>) -> bool {
+        if let Some(table_metadata_columns) = self.get_metadata_columns(table) {
+            for col in columns {
+                if !table_metadata_columns.contains(col) {
+                    return false;
+                }
+            }
+            true
+        } else {
+            false
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct SQLQueryData {
+    pub aggregation_info: AggregationInfo,
+    pub metric: String,
+    pub labels: HashSet<String>,
+    pub time_info: TimeInfo,
+    pub subquery: Option<Box<SQLQueryData>>,
+}
+
+#[derive(Debug, Clone)]
+pub struct TimeInfo {
+    time_col_name: String,
+    // Can be changed to use timezone (normal datetime incorporates TimeZone) in the future
+    start: f64,
+    // is_now: bool,
+    duration: f64,
+}
+
+impl TimeInfo {
+    pub fn new(time_col_name: String, start: f64, duration: f64) -> Self {
+        Self {
+            time_col_name,
+            start,
+            // is_now,
+            duration,
+        }
+    }
+
+    pub fn get_time_col_name(&self) -> &str {
+        &self.time_col_name
+    }
+
+    pub fn get_start(&self) -> f64 {
+        self.start
+    }
+
+    pub fn get_duration(&self) -> f64 {
+        self.duration
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct AggregationInfo {
+    name: String,
+    value_column_name: String,
+    args: Vec<String>,
+}
+
+impl AggregationInfo {
+    pub fn new(name: String, value_column_name: String, args: Vec<String>) -> Self {
+        Self {
+            name,
+            value_column_name,
+            args,
+        }
+    }
+
+    pub fn get_name(&self) -> &str {
+        &self.name
+    }
+
+    pub fn get_value_column_name(&self) -> &str {
+        &self.value_column_name
+    }
+
+    pub fn get_args(&self) -> &Vec<String> {
+        &self.args
+    }
+}
diff --git a/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlparser_test.rs b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlparser_test.rs
new file mode 100644
index 0000000..fe40fcd
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlparser_test.rs
@@ -0,0 +1,412 @@
+#[cfg(test)]
+mod tests {
+    // use super::*;
+    use sqlparser::dialect::GenericDialect;
+    use sqlparser::parser::Parser;
+    use std::collections::HashSet;
+
+    use crate::sqlhelper::{SQLQueryData, SQLSchema as Schema, Table};
+    use crate::sqlpattern_matcher::{QueryError, QueryType, SQLPatternMatcher};
+    use crate::sqlpattern_parser::SQLPatternParser;
+
+    pub fn create_test_schema() -> Schema {
+        let mut cpu_labels = HashSet::new();
+        cpu_labels.insert("L1".to_string());
+        cpu_labels.insert("L2".to_string());
+        cpu_labels.insert("L3".to_string());
+        cpu_labels.insert("L4".to_string());
+
+        let mut mem_labels = HashSet::new();
+        mem_labels.insert("L1".to_string());
+        mem_labels.insert("L2".to_string());
+        mem_labels.insert("L3".to_string());
+        mem_labels.insert("L4".to_string());
+
+        let cpu_table = Table::new(
+            "cpu_usage".to_string(),
+            "time".to_string(),
+            HashSet::from(["value".to_string()]),
+            cpu_labels,
+        );
+        let mem_table = Table::new(
+            "mem_usage".to_string(),
+            "ms".to_string(),
+            HashSet::from(["mb".to_string()]),
+            mem_labels,
+        );
+
+        Schema::new(vec![cpu_table, mem_table])
+    }
+
+    #[test]
+    fn test_basic_parsing() {
+        let schema = create_test_schema();
+        let time = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_secs_f64();
+        let dialect = GenericDialect {};
+        let sql = "SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1";
+
+        let statements = Parser::parse_sql(&dialect, sql).unwrap();
+        let query_data = SQLPatternParser::new(&schema, time).parse_query(&statements);
+
+        assert!(query_data.is_some());
+        let query = query_data.unwrap();
+        assert_eq!(query.metric, "cpu_usage");
+        assert_eq!(query.aggregation_info.get_name(), "AVG");
+        assert!(query.labels.contains("L1"));
+    }
+
+    #[test]
+    fn test_pattern_matching() {
+        let schema = create_test_schema();
+        let time = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_secs_f64();
+        let matcher = SQLPatternMatcher::new(schema.clone(), 1.0);
+
+        let dialect = GenericDialect {};
+        let sql = "SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1, L2, L3, L4";
+
+        let statements = Parser::parse_sql(&dialect, sql).unwrap();
+
+        if let Some(query_data) = SQLPatternParser::new(&schema, time).parse_query(&statements) {
+            let result = matcher.query_info_to_pattern(&query_data);
+            assert!(result.is_valid());
+            assert_eq!(result.query_type, vec![QueryType::Spatial]);
+        }
+    }
+
+    #[test]
+    fn test_full_suite() {
+        let tables = vec![Table::new(
+            String::from("cpu_usage"),
+            String::from("time"),
+            HashSet::from([String::from("value")]),
+            HashSet::from([
+                String::from("L1"),
+                String::from("L2"),
+                String::from("L3"),
+                String::from("L4"),
+            ]),
+        )];
+        let schema = Schema::new(tables);
+        let scrape_interval = 1.0;
+
+        let test_queries = vec![
+            (
+                "dated_temporal_sum",
+                "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, '2025-10-01 00:00:00') AND '2025-10-01 00:00:00' GROUP BY L1, L2, L3, L4",
+                vec![QueryType::TemporalGeneric],
+                None
+            ),
+            (
+                "dated_temporal_quantile",
+                "SELECT QUANTILE(0.95, value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, '2025-10-01 00:00:00') AND '2025-10-01 00:00:00' GROUP BY L1, L2, L3, L4",
+                vec![QueryType::TemporalQuantile],
+                None
+            ),
+            (
+                "dated_spatial_avg",
+                "SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, '2025-10-01 00:00:00') AND '2025-10-01 00:00:00' GROUP BY L1, L2, L3, L4",
+                vec![QueryType::Spatial],
+                None
+            ),
+            (
+                "dated_spatial_quantile",
+                "SELECT QUANTILE(0.95, value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, '2025-10-01 00:00:00') AND '2025-10-01 00:00:00' GROUP BY L1",
+                vec![QueryType::Spatial],
+                None
+            ),
+            (
+                "dated_spatial_of_temporal_quantile_max",
+                "SELECT QUANTILE(0.95, value) FROM (SELECT MAX(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, '2025-10-01 00:00:00') AND '2025-10-01 00:00:00' GROUP BY L1, L2, L3, L4) GROUP BY L1",
+                vec![QueryType::Spatial, QueryType::TemporalGeneric],
+                None
+            ),
+            // // Temporal queries
+            (
+                "temporal_quantile",
+                "SELECT QUANTILE(0.95, value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4",
+                vec![QueryType::TemporalQuantile],
+                None
+            ),
+            (
+                "temporal_sum",
+                "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4",
+                vec![QueryType::TemporalGeneric],
+                None
+            ),
+            (
+                "temporal_max",
+                "SELECT MAX(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4",
+                vec![QueryType::TemporalGeneric],
+                None
+            ),
+            (
+                "temporal_min",
+                "SELECT MIN(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4",
+                vec![QueryType::TemporalGeneric],
+                None
+            ),
+            (
+                "temporal_avg",
+                "SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4",
+                vec![QueryType::TemporalGeneric],
+                None
+            ),
+            // // // Spatial queries
+            (
+                "spatial_sum",
+                "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1",
+                vec![QueryType::Spatial],
+                None
+            ),
+            (
+                "spatial_max",
+                "SELECT MAX(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1, L2",
+                vec![QueryType::Spatial],
+                None
+            ),
+            (
+                "spatial_min",
+                "SELECT MIN(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1, L2, L3",
+                vec![QueryType::Spatial],
+                None
+            ),
+            (
+                "spatial_avg",
+                "SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1, L2, L3, L4",
+                vec![QueryType::Spatial],
+                None
+            ),
+            (
+                "spatial_quantile",
+                "SELECT QUANTILE(0.95, value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1",
+                vec![QueryType::Spatial],
+                None
+            ),
+            // // // Spatial of temporal queries
+            (
+                "spatial_of_temporal_sum_sum",
+                "SELECT SUM(result) FROM (SELECT SUM(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1",
+                vec![QueryType::Spatial, QueryType::TemporalGeneric],
+                None
+            ),
+            (
+                "spatial_of_temporal_sum_min",
+                "SELECT SUM(result) FROM (SELECT MIN(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1, L2",
+                vec![QueryType::Spatial, QueryType::TemporalGeneric],
+                None
+            ),
+            (
+                "spatial_of_temporal_sum_max",
+                "SELECT SUM(result) FROM (SELECT MAX(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1, L2, L3",
+                vec![QueryType::Spatial, QueryType::TemporalGeneric],
+                None
+            ),
+            (
+                "spatial_of_temporal_sum_avg",
+                "SELECT SUM(result) FROM (SELECT AVG(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1, L2, L3, L4",
+                vec![QueryType::Spatial, QueryType::TemporalGeneric],
+                None
+            ),
+            (
+                "spatial_of_temporal_max_sum",
+                "SELECT MAX(result) FROM (SELECT SUM(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1, L2",
+                vec![QueryType::Spatial, QueryType::TemporalGeneric],
+                None
+            ),
+            (
+                "spatial_of_temporal_max_min",
+                "SELECT MAX(result) FROM (SELECT MIN(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1",
+                vec![QueryType::Spatial, QueryType::TemporalGeneric],
+                None
+            ),
+            (
+                "spatial_of_temporal_max_max",
+                "SELECT MAX(result) FROM (SELECT MAX(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1, L2, L3",
+                vec![QueryType::Spatial, QueryType::TemporalGeneric],
+                None
+            ),
+            (
+                "spatial_of_temporal_max_avg",
+                "SELECT MAX(result) FROM (SELECT AVG(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1, L2, L3, L4",
+                vec![QueryType::Spatial, QueryType::TemporalGeneric],
+                None
+            ),
+            (
+                "spatial_of_temporal_quantile_max",
+                "SELECT QUANTILE(0.95, value) FROM (SELECT MAX(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1",
+                vec![QueryType::Spatial, QueryType::TemporalGeneric],
+                None
+            ),
+            (
+                "spatial_of_temporal_quantile_min",
+                "SELECT QUANTILE(0.95, value) FROM (SELECT MIN(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1",
+                vec![QueryType::Spatial, QueryType::TemporalGeneric],
+                None
+            ),
+            (
+                "spatial_of_temporal_quantile_sum",
+                "SELECT QUANTILE(0.95, value) FROM (SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1",
+                vec![QueryType::Spatial, QueryType::TemporalGeneric],
+                None
+            ),
+            (
+                "spatial_of_temporal_quantile_avg",
+                "SELECT QUANTILE(0.95, value) FROM (SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1",
+                vec![QueryType::Spatial, QueryType::TemporalGeneric],
+                None
+            ),
+            (
+                "spatial_of_temporal_avg_quantile",
+                "SELECT AVG(result) FROM (SELECT QUANTILE(0.95, value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1, L2",
+                vec![QueryType::Spatial, QueryType::TemporalQuantile],
+                None
+            ),
+            (
+                "spatial_of_temporal_quantile_quantile",
+                "SELECT QUANTILE(0.95, value) FROM (SELECT QUANTILE(0.95, value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1, L2, L3",
+                vec![QueryType::Spatial, QueryType::TemporalQuantile],
+                None
+            ),
+            // // // Error cases
+            (
+                "temporal_invalid_aggregation_label",
+                "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, FAKE_LABEL",
+                vec![],
+                Some(QueryError::InvalidAggregationLabel)
+            ),
+            (
+                "temporal_invalid_time_column",
+                "SELECT SUM(value) FROM cpu_usage WHERE datetime BETWEEN NOW() AND DATEADD(s, -10, NOW()) GROUP BY L1, L2, L3, L4",
+                vec![],
+                Some(QueryError::InvalidTimeCol)
+            ),
+            (
+                "temporal_invalid_value_column",
+                "SELECT SUM(not_a_value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4",
+                vec![],
+                Some(QueryError::InvalidValueCol)
+            ),
+            // SpatioTemporal queries - span multiple scrape intervals but group by subset of labels
+            (
+                "spatiotemporal_sum",
+                "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1",
+                vec![QueryType::SpatioTemporal],
+                None
+            ),
+            (
+                "spatiotemporal_max",
+                "SELECT MAX(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2",
+                vec![QueryType::SpatioTemporal],
+                None
+            ),
+            (
+                "spatiotemporal_min",
+                "SELECT MIN(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3",
+                vec![QueryType::SpatioTemporal],
+                None
+            ),
+            (
+                "spatiotemporal_avg",
+                "SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1",
+                vec![QueryType::SpatioTemporal],
+                None
+            ),
+            (
+                "spatiotemporal_quantile",
+                "SELECT QUANTILE(0.95, value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2",
+                vec![QueryType::SpatioTemporal],
+                None
+            ),
+            (
+                "temporal_illegal_aggregation_function",
+                "SELECT HARMONIC_MEAN(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3",
+                vec![],
+                Some(QueryError::IllegalAggregationFn)
+            ),
+            (
+                "spatial_scrape_duration_too_small",
+                "SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN NOW() AND DATEADD(s, 0, NOW()) GROUP BY L1, L2",
+                vec![],
+                Some(QueryError::SpatialDurationSmall)
+            ),
+            (
+                "temporal_percentile",
+                "SELECT PERCENTILE(value, 95) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4",
+                vec![QueryType::TemporalQuantile],
+                None
+            ),
+            (
+                "spatial_percentile",
+                "SELECT PERCENTILE(value, 95) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1",
+                vec![QueryType::Spatial],
+                None
+            ),
+            (
+                "spatiotemporal_percentile",
+                "SELECT PERCENTILE(value, 95) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2",
+                vec![QueryType::SpatioTemporal],
+                None
+            ),
+            (
+                "spatial_of_temporal_percentile_max",
+                "SELECT PERCENTILE(value, 95) FROM (SELECT MAX(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1",
+                vec![QueryType::Spatial, QueryType::TemporalGeneric],
+                None
+            ),
+        ];
+
+        let mut successes = 0;
+        let mut failures = 0;
+
+        for (name, sql, expected_types, error) in test_queries {
+            println!("Testing: {}", name);
+
+            if let Some(query_data) = parse_sql_query(sql) {
+                let matcher = SQLPatternMatcher::new(schema.clone(), scrape_interval);
+                let result = matcher.query_info_to_pattern(&query_data);
+
+                assert_eq!(result.query_type, expected_types);
+                assert_eq!(result.error, error);
+
+                if result.query_type == expected_types && result.error == error {
+                    println!("✓ Passed");
+                    successes += 1;
+                } else {
+                    println!("✗ Failed");
+                    println!("expected type, error: {:?}, {:?}", expected_types, error);
+                    println!(
+                        "got type, error: {:?}, {:?}",
+                        result.query_type, result.error
+                    );
+                    failures += 1;
+                }
+            } else {
+                println!("✗ Failed to parse");
+                failures += 1;
+            }
+        }
+
+        println!("\nRESULTS\n=======");
+        println!("Passed: {}", successes);
+        println!("Failed: {}", failures);
+    }
+
+    pub fn parse_sql_query(sql: &str) -> Option<SQLQueryData> {
+        let schema = create_test_schema();
+        let time = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_secs_f64();
+        let dialect = sqlparser::dialect::ClickHouseDialect {};
+        let statements = Parser::parse_sql(&dialect, sql).ok()?;
+        print!("Query: {sql}, AST: {statements:#?}\n");
+
+        SQLPatternParser::new(&schema, time).parse_query(&statements)
+    }
+}
diff --git a/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_matcher.rs b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_matcher.rs
new file mode 100644
index 0000000..1aac0da
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_matcher.rs
@@ -0,0 +1,287 @@
+use crate::sqlhelper::AggregationInfo;
+use crate::sqlhelper::SQLQueryData;
+use crate::sqlhelper::SQLSchema;
+use crate::sqlhelper::TimeInfo;
+
+use std::collections::HashSet;
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum QueryType {
+    Spatial,
+    TemporalGeneric,
+    TemporalQuantile,
+    SpatioTemporal,
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum QueryError {
+    InvalidAggregationLabel,
+    InvalidTimeCol,
+    InvalidValueCol,
+    TemporalMissingLabels, // indistinguishable from too large scrape duration
+    IllegalAggregationFn,
+    SpatialDurationSmall,
+}
+
+#[derive(Debug)]
+pub struct SQLQuery {
+    pub query_type: Vec<QueryType>,
+    pub query_data: Vec<SQLQueryData>,
+    pub error: Option<QueryError>,
+    pub msg: Option<String>,
+}
+
+impl SQLQuery {
+    pub fn new(query_type: Vec<QueryType>, error: Option<QueryError>, msg: Option<String>) -> Self {
+        Self {
+            query_type,
+            query_data: Vec::new(),
+            error,
+            msg,
+        }
+    }
+
+    pub fn add_subquery(
+        &mut self,
+        query_type: QueryType,
+        aggregation: AggregationInfo,
+        metric: String,
+        labels: HashSet<String>,
+        time: TimeInfo,
+    ) {
+        self.query_type.push(query_type);
+
+        let query_data = SQLQueryData {
+            aggregation_info: aggregation,
+            metric,
+            labels,
+            time_info: time,
+            subquery: None,
+        };
+
+        self.query_data.push(query_data);
+    }
+
+    pub fn invalidate_query(&mut self, error: QueryError, msg: String) {
+        self.error = Some(error);
+        self.msg = Some(msg);
+        self.query_type.clear();
+    }
+
+    pub fn is_valid(&self) -> bool {
+        self.error.is_none()
+    }
+}
+
+pub struct SQLPatternMatcher {
+    schema: SQLSchema,
+    scrape_interval: f64,
+    legal_aggregations: HashSet<&'static str>,
+}
+
+impl SQLPatternMatcher {
+    pub fn new(schema: SQLSchema, scrape_interval: f64) -> Self {
+        let mut legal_aggregations = HashSet::new();
+        legal_aggregations.insert("AVG");
+        legal_aggregations.insert("SUM");
+        legal_aggregations.insert("COUNT");
+        legal_aggregations.insert("MIN");
+        legal_aggregations.insert("MAX");
+        legal_aggregations.insert("QUANTILE");
+
+        Self {
+            schema,
+            scrape_interval,
+            legal_aggregations,
+        }
+    }
+
+    pub fn is_valid_aggregation(&self, aggregation: &str) -> bool {
+        self.legal_aggregations.contains(aggregation)
+    }
+
+    #[allow(clippy::type_complexity)]
+    pub fn flatten_query_info(
+        &self,
+        query: &SQLQueryData,
+    ) -> Result<Vec<(String, AggregationInfo, f64, HashSet<String>, TimeInfo)>, (QueryError, String)>
+    {
+        let mut query_data = Vec::new();
+        let mut current_query = Some(query);
+        let mut scraped_intervals = 0.0;
+
+        while let Some(query) = current_query {
+            if !self
+                .schema
+                .are_valid_metadata_columns(&query.metric, &query.labels)
+            {
+                if let Some(schema_metadata_columns) =
+                    self.schema.get_metadata_columns(&query.metric)
+                {
+                    let illegal_columns: HashSet<_> =
+                        query.labels.difference(schema_metadata_columns).collect();
+                    println!("Returned QueryError::InvalidAggregationLabel");
+                    return Err((
+                        QueryError::InvalidAggregationLabel,
+                        format!(
+                            "attempt to aggregate by columns {:?}, which are not present for metric {}",
+                            illegal_columns, query.metric
+                        )
+                    ));
+                }
+            }
+
+            if !self.is_valid_aggregation(query.aggregation_info.get_name()) {
+                println!("Returned QueryError::IllegalAggregationFn");
+
+                return Err((
+                    QueryError::IllegalAggregationFn,
+                    format!(
+                        "attempt to use illegal aggregation function {}",
+                        query.aggregation_info.get_name()
+                    ),
+                ));
+            }
+
+            let time_info = &query.time_info;
+            let time_column_name = time_info.get_time_col_name();
+
+            if time_column_name != "UNUSED" {
+                if let Some(schema_time_column) = self.schema.get_time_column(&query.metric) {
+                    if time_column_name != schema_time_column {
+                        println!("Returned QueryError::InvalidTimeCol: {time_column_name}");
+
+                        return Err((
+                            QueryError::InvalidTimeCol,
+                            format!(
+                                "Attempted to scrape from column [ {} ] instead of correct time column [ {} ]",
+                                time_column_name, schema_time_column
+                            )
+                        ));
+                    }
+                }
+
+                let value_column_name = query.aggregation_info.get_value_column_name();
+                if !self
+                    .schema
+                    .is_valid_value_column(&query.metric, value_column_name)
+                {
+                    println!("Returned QueryError::InvalidValueCol");
+
+                    return Err((
+                        QueryError::InvalidValueCol,
+                        format!("Incorrect value column name: {}", value_column_name),
+                    ));
+                }
+
+                let scrape_duration = time_info.get_duration();
+                scraped_intervals = scrape_duration / self.scrape_interval;
+
+                if scraped_intervals < self.scrape_interval {
+                    println!("Returned QueryError::SpatialDurationSmall");
+
+                    return Err((
+                        QueryError::SpatialDurationSmall,
+                        format!(
+                            "scrape duration {} less than one interval {}",
+                            scraped_intervals, self.scrape_interval
+                        ),
+                    ));
+                }
+            }
+
+            query_data.push((
+                query.metric.clone(),
+                query.aggregation_info.clone(),
+                scraped_intervals,
+                query.labels.clone(),
+                time_info.clone(),
+            ));
+
+            current_query = query.subquery.as_deref();
+        }
+
+        Ok(query_data)
+    }
+
+    pub fn query_info_to_pattern(&self, query_data: &SQLQueryData) -> SQLQuery {
+        println!("SQLQueryData: {query_data:?}");
+        let query_data = match self.flatten_query_info(query_data) {
+            Ok(data) => data,
+            Err((error, msg)) => {
+                return SQLQuery::new(Vec::new(), Some(error), Some(msg));
+            }
+        };
+        println!("flattened QueryData: {query_data:?}");
+
+        let mut sql_query = SQLQuery::new(Vec::new(), None, None);
+
+        for (i, (metric, aggregation_info, scrape_duration, labels, time_info)) in
+            query_data.iter().enumerate()
+        {
+            if i < query_data.len() - 1 {
+                // Not the last query
+                // let time_info = TimeInfo::new("time".to_string(), *start, *scrape_duration); // You may need to adjust this
+                sql_query.add_subquery(
+                    QueryType::Spatial,
+                    aggregation_info.clone(),
+                    metric.clone(),
+                    labels.clone(),
+                    time_info.clone(),
+                );
+            } else {
+                // Last query
+                // let time_info = TimeInfo::new("time".to_string(), *start, *scrape_duration);
+
+                if (scrape_duration - self.scrape_interval).abs() < f64::EPSILON {
+                    sql_query.add_subquery(
+                        QueryType::Spatial,
+                        aggregation_info.clone(),
+                        metric.clone(),
+                        labels.clone(),
+                        time_info.clone(),
+                    );
+                } else if *scrape_duration > self.scrape_interval {
+                    // Check if labels match all metadata columns
+                    let has_all_labels = self
+                        .schema
+                        .get_metadata_columns(metric)
+                        .map(|schema_metadata_columns| labels == schema_metadata_columns)
+                        .unwrap_or(true);
+
+                    if has_all_labels {
+                        // Full temporal query with all labels (PromQL-equivalent)
+                        if aggregation_info.get_name() == "QUANTILE" {
+                            sql_query.add_subquery(
+                                QueryType::TemporalQuantile,
+                                aggregation_info.clone(),
+                                metric.clone(),
+                                labels.clone(),
+                                time_info.clone(),
+                            );
+                        } else {
+                            sql_query.add_subquery(
+                                QueryType::TemporalGeneric,
+                                aggregation_info.clone(),
+                                metric.clone(),
+                                labels.clone(),
+                                time_info.clone(),
+                            );
+                        }
+                    } else {
+                        // SpatioTemporal: spans multiple scrape intervals but groups by subset of labels
+                        sql_query.add_subquery(
+                            QueryType::SpatioTemporal,
+                            aggregation_info.clone(),
+                            metric.clone(),
+                            labels.clone(),
+                            time_info.clone(),
+                        );
+                    }
+                }
+            }
+        }
+
+        sql_query
+    }
+}
diff --git a/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_parser.rs b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_parser.rs
new file mode 100644
index 0000000..c7aa245
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_parser.rs
@@ -0,0 +1,528 @@
+use crate::sqlhelper::SQLSchema;
+use crate::sqlhelper::{AggregationInfo, SQLQueryData, TimeInfo};
+use sqlparser::ast::*;
+use std::collections::HashSet;
+
+use parse_datetime::parse_datetime;
+use sqlparser::ast::Value::SingleQuotedString;
+
+pub struct SQLPatternParser {
+    schema: SQLSchema,
+    query_evaluation_time: f64,
+}
+
+impl SQLPatternParser {
+    pub fn new(schema: &SQLSchema, query_evaluation_time: f64) -> SQLPatternParser {
+        Self {
+            schema: schema.clone(),
+            query_evaluation_time,
+        }
+    }
+
+    pub fn parse_query(&self, statements: &[Statement]) -> Option<SQLQueryData> {
+        if statements.len() != 1 {
+            println!("illegal query length");
+            return None;
+        }
+
+        match &statements[0] {
+            Statement::Query(query) => self.parse_query_node(query),
+            _ => {
+                println!("Not a query statement");
+                None
+            }
+        }
+    }
+
+    fn parse_query_node(&self, query: &Query) -> Option<SQLQueryData> {
+        // Convert CTE to subquery if present
+        let query = self.cte_to_subquery(query);
+
+        match &query.body.as_ref() {
+            SetExpr::Select(select) => self.parse_select(select),
+            _ => {
+                println!("Not a SELECT statement");
+                None
+            }
+        }
+    }
+
+    fn cte_to_subquery(&self, query: &Query) -> Query {
+        let mut query = query.clone();
+
+        if let Some(with) = &query.with {
+            if !with.cte_tables.is_empty() {
+                let cte = &with.cte_tables[0];
+
+                // Create a subquery from the CTE
+                if let Some(new_body) = match &query.body.as_ref() {
+                    SetExpr::Select(select) => {
+                        let mut new_select = select.clone();
+                        new_select.from = vec![TableWithJoins {
+                            relation: TableFactor::Derived {
+                                lateral: false,
+                                subquery: Box::new(*(cte.query).clone()),
+                                alias: None,
+                            },
+                            joins: vec![],
+                        }];
+                        Some(SetExpr::Select(Box::new(*new_select)))
+                    }
+                    _ => None,
+                } {
+                    query.body = Box::new(new_body);
+                    query.with = None;
+                }
+            }
+        }
+
+        query
+    }
+
+    fn parse_select(&self, select: &Select) -> Option<SQLQueryData> {
+        let (metric, has_subquery) = self.get_metric(select)?;
+
+        let aggregation = self.get_aggregation(select)?;
+
+        let group_bys = self.get_groupbys(select)?;
+
+        if !has_subquery {
+            let time_info = self.get_time_info(select, &metric)?;
+
+            // Check for unexpected fields
+            if select.distinct.is_some()
+                || select.top.is_some()
+                || select.into.is_some()
+                || !select.lateral_views.is_empty()
+                || select.prewhere.is_some()
+                || !select.cluster_by.is_empty()
+                || !select.distribute_by.is_empty()
+                || !select.sort_by.is_empty()
+                || select.having.is_some()
+                || !select.named_window.is_empty()
+                || select.window_before_qualify
+            {
+                println!("Unexpected SELECT fields present");
+                return None;
+            }
+
+            Some(SQLQueryData {
+                aggregation_info: aggregation,
+                metric,
+                labels: group_bys,
+                time_info,
+                subquery: None,
+            })
+        } else {
+            // Parse subquery
+            let subquery = match &select.from[0].relation {
+                TableFactor::Derived { subquery, .. } => match subquery.body.as_ref() {
+                    SetExpr::Select(inner_select) => {
+                        let inner_aggregation = self.get_aggregation(inner_select)?;
+                        let inner_group_bys = self.get_groupbys(inner_select)?;
+                        let time_info = self.get_time_info(inner_select, &metric)?;
+
+                        Some(Box::new(SQLQueryData {
+                            aggregation_info: inner_aggregation,
+                            metric: metric.clone(),
+                            labels: inner_group_bys,
+                            time_info,
+                            subquery: None,
+                        }))
+                    }
+                    _ => None,
+                },
+                _ => None,
+            }?;
+
+            Some(SQLQueryData {
+                aggregation_info: aggregation,
+                metric,
+                labels: group_bys,
+                time_info: TimeInfo::new("UNUSED".to_string(), -1.0, -1_f64),
+                subquery: Some(subquery),
+            })
+        }
+    }
+
+    fn get_quantile_args(&self, func: &Function) -> Vec<String> {
+        let name = func.name.to_string().to_uppercase();
+
+        match (&func.args, name.as_str()) {
+            (FunctionArguments::List(args), "QUANTILE") => {
+                let mut quantile_arg = Vec::new();
+
+                match &args.args[0] {
+                    FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(value))) => {
+                        quantile_arg.push(value.value.to_string());
+                        quantile_arg
+                    }
+                    _ => quantile_arg,
+                }
+            }
+            (FunctionArguments::List(args), "PERCENTILE") => {
+                let mut quantile_arg = Vec::new();
+
+                // Convert PERCENTILE to QUANTILE format
+                match &args.args[1] {
+                    FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(value))) => {
+                        let val_str = value.value.to_string();
+                        if let Ok(percentile) = val_str.parse::<f64>() {
+                            // Convert to quantile (0-1 range)
+                            let quantile = if percentile > 1.0 {
+                                percentile / 100.0
+                            } else {
+                                percentile
+                            };
+                            quantile_arg.push(quantile.to_string());
+                        }
+                        quantile_arg
+                    }
+                    _ => quantile_arg,
+                }
+            }
+            _ => Vec::new(),
+        }
+    }
+
+    fn get_aggregation(&self, select: &Select) -> Option<AggregationInfo> {
+        if select.projection.len() != 1 {
+            return None;
+        }
+
+        match &select.projection[0] {
+            SelectItem::UnnamedExpr(Expr::Function(func))
+            | SelectItem::ExprWithAlias {
+                expr: Expr::Function(func),
+                ..
+            } => {
+                let name = func.name.to_string().to_uppercase();
+
+                let args = self.get_quantile_args(func);
+
+                // Get the column being aggregated
+                let col = match &func.args {
+                    FunctionArguments::None => return None,
+                    FunctionArguments::Subquery(_) => return None,
+                    FunctionArguments::List(func_args) => {
+                        if name == "QUANTILE" {
+                            // QUANTILE(0.95, value) - column is second argument
+                            if func_args.args.len() < 2 {
+                                return None;
+                            }
+                            match &func_args.args[1] {
+                                FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
+                                    ident,
+                                ))) => ident.value.clone(),
+                                _ => return None,
+                            }
+                        } else if name == "PERCENTILE" {
+                            // PERCENTILE(value, 95) - column is first argument
+                            if func_args.args.is_empty() {
+                                return None;
+                            }
+                            match &func_args.args[0] {
+                                FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
+                                    ident,
+                                ))) => ident.value.clone(),
+                                _ => return None,
+                            }
+                        } else {
+                            // For other aggregations - column is first argument
+                            if func_args.args.is_empty() {
+                                return None;
+                            }
+                            match &func_args.args[0] {
+                                FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
+                                    ident,
+                                ))) => ident.value.clone(),
+                                _ => return None,
+                            }
+                        }
+                    }
+                };
+
+                // Always store PERCENTILE as QUANTILE internally
+                let normalized_name = if name == "PERCENTILE" {
+                    "QUANTILE".to_string()
+                } else {
+                    name
+                };
+
+                Some(AggregationInfo::new(normalized_name, col, args))
+            }
+            _ => None,
+        }
+    }
+
+    fn get_metric(&self, select: &Select) -> Option<(String, bool)> {
+        if select.from.is_empty() {
+            return None;
+        }
+
+        match &select.from[0].relation {
+            TableFactor::Table { name, .. } => {
+                let metric = name.0.first()?.to_string();
+                Some((metric, false))
+            }
+            TableFactor::Derived { subquery, .. } => match subquery.body.as_ref() {
+                SetExpr::Select(inner_select) => {
+                    if inner_select.from.is_empty() {
+                        return None;
+                    }
+                    match &inner_select.from[0].relation {
+                        TableFactor::Table { name, .. } => {
+                            let metric = name.0.first()?.to_string();
+                            Some((metric, true))
+                        }
+                        _ => None,
+                    }
+                }
+                _ => None,
+            },
+            _ => None,
+        }
+    }
+
+    fn get_timestamp_from_datetime_str(datetime_str: &str) -> Option<f64> {
+        let parsed_datetime = parse_datetime(datetime_str).ok()?;
+        Some(parsed_datetime.timestamp().as_second() as f64)
+    }
+
+    fn get_timestamp_from_between_highlow(&self, highlow: &Expr) -> Option<f64> {
+        match highlow {
+            Expr::Function(func) if func.name.to_string().to_uppercase() == "NOW" => {
+                Some(self.query_evaluation_time)
+            }
+            Expr::Value(ValueWithSpan {
+                value: SingleQuotedString(datetime_str),
+                span: _,
+            }) => Self::get_timestamp_from_datetime_str(datetime_str),
+            Expr::Function(func) if func.name.to_string().to_uppercase() == "DATEADD" => {
+                self.parse_dateadd(func)
+            }
+            _ => {
+                panic!("invalid time syntax {:?}", highlow);
+            }
+        }
+    }
+
+    fn get_time_info(&self, select: &Select, table_name: &str) -> Option<TimeInfo> {
+        let selection = select.selection.as_ref()?;
+
+        match selection {
+            Expr::Between {
+                expr,
+                negated,
+                low,
+                high,
+            } => {
+                if *negated {
+                    return None;
+                }
+
+                // Extract time column name
+                let col_name = match expr.as_ref() {
+                    Expr::Identifier(ident) => ident.value.clone(),
+                    _ => return None,
+                };
+
+                let time_col_name = self.schema.get_time_column(table_name)?;
+
+                if col_name != *time_col_name {
+                    println!(
+                        "Found selection statement with column name {} but time column name is {}",
+                        col_name, time_col_name
+                    );
+                    return None;
+                }
+
+                let start = self.get_timestamp_from_between_highlow(low.as_ref())?;
+                let end = self.get_timestamp_from_between_highlow(high.as_ref())?;
+
+                let duration = end - start;
+
+                Some(TimeInfo::new(col_name, start, duration))
+            }
+            _ => None,
+        }
+    }
+
+    fn parse_dateadd(&self, func: &Function) -> Option<f64> {
+        let args = match &func.args {
+            FunctionArguments::List(args) => &args.args,
+            _ => return None,
+        };
+
+        if args.len() != 3 {
+            return None;
+        }
+
+        // First arg is time unit
+        let time_unit = match &args[0] {
+            FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(ident))) => {
+                ident.value.to_lowercase()
+            }
+            _ => return None,
+        };
+
+        // Second arg is the value
+        let duration_to_add = match &args[1] {
+            FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::UnaryOp {
+                op: UnaryOperator::Minus,
+                expr,
+            })) => {
+                println!("CORRECT MATCH EXPR!: {:?}", args[1]);
+                match expr.as_ref() {
+                    Expr::Value(ValueWithSpan {
+                        value: Value::Number(n, _),
+                        span: _,
+                    }) => -n.parse::<i64>().ok()?,
+                    _ => return None,
+                }
+            }
+            FunctionArg::Unnamed(FunctionArgExpr::Expr(expr)) => match expr {
+                Expr::Value(ValueWithSpan {
+                    value: Value::Number(n, _),
+                    span: _,
+                }) => n.parse::<i64>().ok()?,
+                _ => return None,
+            },
+            _ => {
+                println!("DID NOT MATCH EXPR!: {:?}", args[1]);
+                return None;
+            }
+        };
+
+        let base_timestamp = match &args[2] {
+            FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Function(func)))
+                if func.name.to_string().to_uppercase() == "NOW" =>
+            {
+                self.query_evaluation_time
+            }
+            FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(ValueWithSpan {
+                value: SingleQuotedString(datetime_str),
+                span: _,
+            }))) => parse_datetime(datetime_str).ok()?.timestamp().as_second() as f64,
+            _ => {
+                println!("time upper bound not calculating from present");
+                return None;
+            }
+        };
+
+        // Convert to seconds
+        let multiplier = match time_unit.as_str() {
+            "s" | "second" | "seconds" => 1.0,
+            "m" | "minute" | "minutes" => 60.0,
+            "h" | "hour" | "hours" => 3600.0,
+            "d" | "day" | "days" => 86400.0,
+            _ => return None,
+        };
+
+        Some(base_timestamp + (duration_to_add as f64) * multiplier)
+    }
+
+    // fn parse_dateadd_duration(&self, func: &Function, start: f64) -> Option<f64> {
+    //     let args = match &func.args {
+    //         FunctionArguments::List(args) => &args.args,
+    //         _ => return None,
+    //     };
+
+    //     if args.len() != 3 {
+    //         return None;
+    //     }
+
+    //     // First arg is time unit
+    //     let time_unit = match &args[0] {
+    //         FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(ident))) => {
+    //             ident.value.to_lowercase()
+    //         }
+    //         _ => return None,
+    //     };
+
+    //     // Second arg is the value
+    //     let time_value = match &args[1] {
+    //         FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::UnaryOp {
+    //             op: UnaryOperator::Minus,
+    //             expr,
+    //         })) => {
+    //             println!("CORRECT MATCH EXPR!: {:?}", args[1]);
+    //             match expr.as_ref() {
+    //                 Expr::Value(ValueWithSpan {
+    //                     value: Value::Number(n, _),
+    //                     span: _,
+    //                 }) => n.parse::<i64>().ok()?,
+    //                 _ => return None,
+    //             }
+    //         }
+    //         FunctionArg::Unnamed(FunctionArgExpr::Expr(expr)) => match expr {
+    //             Expr::Value(ValueWithSpan {
+    //                 value: Value::Number(n, _),
+    //                 span: _,
+    //             }) => n.parse::<i64>().ok()?,
+    //             _ => return None,
+    //         },
+    //         _ => {
+    //             println!("DID NOT MATCH EXPR!: {:?}", args[1]);
+    //             return None;
+    //         }
+    //     };
+
+    //     // Third arg should be NOW() or start
+    //     // let printargs = &args[2];
+    //     // println!("DATEADD ARGS: {printargs:?}");
+    //     match &args[2] {
+    //         FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Function(func)))
+    //             if func.name.to_string().to_uppercase() == "NOW" => {}
+    //         FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(ValueWithSpan {
+    //             value: SingleQuotedString(datetime_str),
+    //             span: _,
+    //         }))) if start
+    //             == (parse_datetime(datetime_str).ok()?.timestamp().as_second() as f64) => {}
+
+    //         _ => {
+    //             println!("time upper bound not calculating from present");
+    //             return None;
+    //         }
+    //     }
+
+    //     // Convert to seconds
+    //     let multiplier = match time_unit.as_str() {
+    //         "s" | "second" | "seconds" => 1.0,
+    //         "m" | "minute" | "minutes" => 60.0,
+    //         "h" | "hour" | "hours" => 3600.0,
+    //         "d" | "day" | "days" => 86400.0,
+    //         _ => return None,
+    //     };
+
+    //     Some(time_value as f64 * multiplier)
+    // }
+
+    fn get_groupbys(&self, select: &Select) -> Option<HashSet<String>> {
+        match &select.group_by {
+            GroupByExpr::Expressions(exprs, mods) => {
+                if !mods.is_empty() {
+                    return None;
+                }
+
+                let mut group_bys = HashSet::new();
+
+                for expr in exprs {
+                    match expr {
+                        Expr::Identifier(ident) => {
+                            group_bys.insert(ident.value.clone());
+                        }
+                        _ => return None,
+                    }
+                }
+
+                if group_bys.is_empty() {
+                    None
+                } else {
+                    Some(group_bys)
+                }
+            }
+            _ => None,
+        }
+    }
+}
diff --git a/CommonDependencies/dependencies/rs/sql_utilities/src/lib.rs b/CommonDependencies/dependencies/rs/sql_utilities/src/lib.rs
new file mode 100644
index 0000000..02c154d
--- /dev/null
+++ b/CommonDependencies/dependencies/rs/sql_utilities/src/lib.rs
@@ -0,0 +1,3 @@
+pub mod ast_matching;
+
+pub use ast_matching::*;
diff --git a/CommonDependencies/installation/Dockerfile b/CommonDependencies/installation/Dockerfile
new file mode 100644
index 0000000..272bb15
--- /dev/null
+++ b/CommonDependencies/installation/Dockerfile
@@ -0,0 +1,39 @@
+# CommonDependencies/Dockerfile
+# Shared base image for SketchDB services containing common dependencies and internal packages
+
+FROM python:3.10-slim AS sketchdb-base
+
+LABEL maintainer="SketchDB Team"
+LABEL description="Shared base image with common dependencies for SketchDB services"
+
+WORKDIR /app
+
+# Install system dependencies needed across all services
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install promql_utilities (shared internal package)
+COPY dependencies/py/promql_utilities /tmp/promql_utilities
+RUN pip install --no-cache-dir /tmp/promql_utilities && rm -rf /tmp/promql_utilities
+
+# Install common Python dependencies used across multiple services
+COPY installation/requirements.txt /tmp/requirements.txt
+RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
+
+# Create common directories
+RUN mkdir -p /app/logs /app/config /app/outputs
+
+# Set Python path to include app directory
+#ENV PYTHONPATH=/app${PYTHONPATH:+:${PYTHONPATH}}
+ENV PYTHONPATH=/app
+
+# Default working directory for services
+WORKDIR /app
+
+# Default user (can be overridden by services if needed)
+# RUN useradd -m -s /bin/bash sketchdb
+# USER sketchdb
+
+# Services will inherit from this base image
diff --git a/CommonDependencies/installation/install.sh b/CommonDependencies/installation/install.sh
new file mode 100755
index 0000000..304dfbe
--- /dev/null
+++ b/CommonDependencies/installation/install.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# Build script for SketchDB shared base image
+# This script builds the base image that contains common dependencies
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+BASE_DIR="$(dirname "$SCRIPT_DIR")"
+
+# Image name and tag
+IMAGE_NAME="sketchdb-base"
+IMAGE_TAG="latest"
+FULL_IMAGE_NAME="${IMAGE_NAME}:${IMAGE_TAG}"
+
+echo "Building SketchDB base image: $FULL_IMAGE_NAME"
+echo "Build context: $BASE_DIR"
+
+# Build the base image
+docker build \
+    -t "$FULL_IMAGE_NAME" \
+    -f "$SCRIPT_DIR/Dockerfile" \
+    "$BASE_DIR"
+
+echo "Successfully built base image: $FULL_IMAGE_NAME"
+
+echo "Base image build complete!"
+echo "Services can now use: FROM $FULL_IMAGE_NAME"
diff --git a/CommonDependencies/installation/requirements.txt b/CommonDependencies/installation/requirements.txt
new file mode 100644
index 0000000..ff61d97
--- /dev/null
+++ b/CommonDependencies/installation/requirements.txt
@@ -0,0 +1,7 @@
+loguru==0.7.3
+PyYAML==6.0.2
+confluent_kafka==2.3.0
+requests==2.32.3
+xxhash==3.5.0
+datasketches==5.1.1
+promql_parser==0.4.2
diff --git a/CommonDependencies/installation/setup_dependencies.sh b/CommonDependencies/installation/setup_dependencies.sh
new file mode 100755
index 0000000..4f14ee6
--- /dev/null
+++ b/CommonDependencies/installation/setup_dependencies.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+
+sudo apt-get install -y python3-pip
+# TODO: change to virtualenv
+pip3 install --user -r "${THIS_DIR}/requirements.txt"
+(
+  cd "${THIS_DIR}/../dependencies/py/promql_utilities" || exit
+  pip3 install --user -e .
+)
diff --git a/CommonDependencies/tests/compare_matched_tokens/README.md b/CommonDependencies/tests/compare_matched_tokens/README.md
new file mode 100644
index 0000000..f09b79b
--- /dev/null
+++ b/CommonDependencies/tests/compare_matched_tokens/README.md
@@ -0,0 +1,183 @@
+# Cross-Language PromQL Pattern Testing Framework
+
+This framework provides comprehensive testing to compare the functionality between Python and Rust implementations of PromQL pattern matching utilities.
+
+## Directory Structure
+
+```
+tests/cross_language_comparison/
+├── test_data/
+│   └── promql_queries.json          # Test cases and expected results
+├── python_tests/
+│   ├── test_data.py                 # Python test data structures
+│   ├── pattern_tests.py             # Python pattern testing logic
+│   └── test_runner.py               # Python test runner
+├── rust_tests/
+│   ├── src/
+│   │   ├── main.rs                  # Rust test runner entry point
+│   │   ├── test_data.rs             # Rust test data structures
+│   │   └── pattern_tests.rs         # Rust pattern testing logic
+│   └── Cargo.toml                   # Rust project configuration
+├── comparison_tests/
+│   └── result_comparator.py         # Cross-language result comparison
+├── utilities/
+│   └── master_test_runner.py        # Orchestrates all tests
+└── README.md                        # This file
+```
+
+## Quick Start
+
+### Prerequisites
+
+1. **Python**: Ensure Python 3.8+ is installed with access to the `promql_utilities` package
+2. **Rust**: Ensure Rust 1.70+ is installed with Cargo
+3. **Dependencies**: The promql_utilities packages for both Python and Rust must be available
+
+### Running All Tests
+
+```bash
+# From the project root directory
+cd tests/cross_language_comparison
+python utilities/master_test_runner.py
+```
+
+This will:
+1. Run Python pattern tests
+2. Run Rust pattern tests
+3. Compare results between both implementations
+4. Generate comprehensive reports
+
+### Running Individual Test Suites
+
+#### Python Tests Only
+```bash
+cd tests/cross_language_comparison/python_tests
+python test_runner.py ../test_data/promql_queries.json
+```
+
+#### Rust Tests Only
+```bash
+cd tests/cross_language_comparison/rust_tests
+cargo run --release -- ../test_data/promql_queries.json
+```
+
+#### Comparison Only
+```bash
+cd tests/cross_language_comparison/comparison_tests
+python result_comparator.py ../python_tests/python_test_results.json ../rust_tests/rust_test_results.json
+```
+
+## Test Data Format
+
+The test data is defined in `test_data/promql_queries.json`:
+
+```json
+{
+  "test_cases": [
+    {
+      "id": "unique_test_id",
+      "description": "Human readable description",
+      "query": "actual_promql_query",
+      "expected_pattern_type": "ONLY_TEMPORAL|ONLY_SPATIAL|ONE_TEMPORAL_ONE_SPATIAL",
+      "expected_tokens": {
+        "metric": {"name": "...", "labels": {...}},
+        "function": {"name": "..."},
+        "aggregation": {"op": "..."}
+      }
+    }
+  ],
+  "pattern_builder_tests": [
+    // Tests for PromQLPatternBuilder functionality
+  ]
+}
+```
+
+## Adding New Test Cases
+
+1. **Add test case to JSON**: Edit `test_data/promql_queries.json` to include new queries
+2. **Update patterns if needed**: Modify pattern definitions in both Python and Rust implementations
+3. **Run tests**: Execute the master test runner to validate new cases
+
+### Example Test Case
+
+```json
+{
+  "id": "custom_aggregation",
+  "description": "Custom aggregation test",
+  "query": "avg(cpu_usage{instance=\"server1\"})",
+  "expected_pattern_type": "ONLY_SPATIAL",
+  "expected_tokens": {
+    "metric": {
+      "name": "cpu_usage",
+      "labels": {"instance": "server1"},
+      "at_modifier": null
+    },
+    "aggregation": {
+      "op": "avg",
+      "modifier": null
+    }
+  }
+}
+```
+
+## Output Files
+
+After running tests, several output files are generated:
+
+- `python_tests/python_test_results.json` - Python test results
+- `rust_tests/rust_test_results.json` - Rust test results
+- `comparison_tests/comparison_report.json` - Detailed comparison report
+- `test_summary.json` - High-level test execution summary
+
+## Understanding Results
+
+### Success Metrics
+- **Both Passed**: Both implementations correctly handled the test case
+- **Pattern Type Match**: Both implementations identified the same pattern type
+- **Token Similarity**: Measure of how similar the extracted tokens are (0.0-1.0)
+
+### Common Issues
+- **Pattern Type Mismatch**: Implementations categorize queries differently
+- **Token Extraction Differences**: Different token data extracted from the same query
+- **Success Rate Differences**: One implementation handles a query that the other doesn't
+
+### Performance Comparison
+The framework also compares execution times between implementations to identify performance characteristics.
+
+## Extending the Framework
+
+### Adding New Pattern Types
+1. Update both Python and Rust `QueryPatternType` enums
+2. Add corresponding patterns to both test implementations
+3. Update test data with examples of the new pattern type
+
+### Adding New Token Types
+1. Define token structures in both `test_data.py` and `test_data.rs`
+2. Update token extraction logic in both pattern testers
+3. Update comparison logic in `result_comparator.py`
+
+## Troubleshooting
+
+### Common Issues
+
+**"Module not found" errors**: Ensure the promql_utilities packages are properly installed and accessible
+
+**Rust build failures**: Check that all Rust dependencies are available and versions are compatible
+
+**Path issues**: Run commands from the correct directories as shown in the examples
+
+**Missing test files**: Ensure all required files are present and have correct permissions
+
+### Debug Mode
+
+For more detailed output, you can run individual components with verbose logging or add debug prints to the test implementations.
+
+## Contributing
+
+When contributing new tests or improvements:
+
+1. Follow the existing code patterns
+2. Add appropriate documentation
+3. Test both happy path and edge cases
+4. Ensure cross-platform compatibility
+5. Update this README with any new features
diff --git a/CommonDependencies/tests/compare_matched_tokens/ast_matching_comparison.md b/CommonDependencies/tests/compare_matched_tokens/ast_matching_comparison.md
new file mode 100644
index 0000000..a144c7d
--- /dev/null
+++ b/CommonDependencies/tests/compare_matched_tokens/ast_matching_comparison.md
@@ -0,0 +1,237 @@
+## AST Matching: Python vs Rust — detailed comparison
+
+Date: 2025-08-26
+
+Purpose: dump a full, function-level and class-level comparison of the `ast_matching` modules in
+Python (dependencies/py/promql_utilities/promql_utilities/ast_matching) and Rust
+(dependencies/rs/promql_utilities/src/ast_matching). Each discrepancy or change is tagged as
+"MUST HAVE" (correctness-related) or "GOOD TO HAVE" (portability/ergonomics/perf).
+
+---
+
+Files compared
+- Python
+  - PromQLPattern.py
+  - PromQLPatternBuilder.py
+- Rust
+  - promql_pattern.rs
+  - promql_pattern_builder.rs
+  - promql_pattern_factory.rs
+
+Note: This file assumes the versions present in the repo as of the timestamp above. The Rust
+`promql_pattern.rs` file already includes `SubqueryExpr` handling (line ranges present in the
+attachment).
+
+### High-level summary
+- Both sides implement: pattern builder -> pattern object -> matcher that walks a parsed PromQL AST
+  and optionally collects tokens.
+- Major conceptual parity but concrete representation, naming, and normalization differences exist.
+
+---
+
+## Class/struct level mapping
+
+- Python: `PromQLPattern` (class)
+  - Holds pattern dict, exposes `matches(node)` -> `MatchResult(matches: bool, tokens: Dict)`.
+  - Key internals: `_node_to_dict`, `_matches_recursive`.
+
+- Python: `PromQLPatternBuilder` (static-method-only dataclass)
+  - Produces Python-native pattern dicts (or `None` for `any()` wildcard).
+
+- Rust: `PromQLPattern` (struct)
+  - Holds `ast_pattern: HashMap<String, Value>`, typed token model, `expected_pattern_type`.
+  - Exposes `matches(&Expr)` -> `PromQLMatchResult` (typed tokens).
+  - Internals: `matches_recursive`, typed `match_*` helpers.
+
+- Rust: `PromQLPatternBuilder` (impl)
+  - Produces `HashMap<String, serde_json::Value>` patterns.
+
+- Rust: typed token structs (`TokenData`, `MetricToken`, `FunctionToken`, ...).
+
+Discrepancy tag: class/struct correspondence — GOOD TO HAVE. It's fine for Rust to use typed tokens, but if cross-language token portability is desired, aligning JSON shapes is recommended.
+
+---
+
+## Function-by-function comparison (Python -> Rust)
+
+Legend: MUST HAVE = correctness/security-related; GOOD TO HAVE = portability/ergonomics/perf.
+
+1) Builder: any()
+  - Python: `PromQLPatternBuilder.any()` returns `None`. Python matcher treats `pattern is None` as wildcard -> matches anything.
+  - Rust: `PromQLPatternBuilder::any()` returns an empty `HashMap<String, Value>` (i.e., `{}`). `matches_recursive` requires a `type` string and returns false if missing; an empty map does NOT act as wildcard.
+  - Discrepancy: semantics differ and lead to non-matching behavior in Rust when user expects wildcard.
+  - Tag: MUST HAVE (pattern wildcard semantics affect correctness of many patterns).
+  - Suggested fixes (MUST HAVE): make Rust `matches_recursive` treat empty pattern as wildcard (e.g., `if pattern.is_empty() { return true; }`) or change `any()` to return a sentinel `Value::Null` and handle it.
+
+2) Builder: binary_op / BinaryExpr naming
+  - Python builder returns `type: "BinaryOpExpr"` (PromQLPatternBuilder.binary_op).
+  - Python `_node_to_dict` for actual AST Binary returns `type: "BinaryExpr"`.
+  - Therefore patterns built by Python builder will not match binary AST nodes; token collection for binary ops (which checks "BinaryOpExpr") will also never trigger.
+  - Rust builder and matcher consistently use `"BinaryExpr"`.
+  - Discrepancy: naming typo/inconsistency in Python.
+  - Tag: MUST HAVE (causes incorrect matching of binary expressions).
+  - Suggested fix (MUST HAVE): change Python builder to produce `"BinaryExpr"` (or change `_node_to_dict` to produce `"BinaryOpExpr"`, but updating builder is minimal).
+
+3) Builder: function (`function` / `Call` / `func` field shape)
+  - Python builder sets `func` to `{"type":"Function","name": [ ... ]}` (dict with `name` list).
+  - Rust builder sets `func` to `Value::Array([ func_object ])` (an array containing the func object). Rust matcher expects this array-wrapped shape.
+  - Both matchers work with their own builders but cross-language serialized patterns will differ.
+  - Discrepancy: pattern JSON shape mismatch; porting patterns across languages will fail unless normalized.
+  - Tag: GOOD TO HAVE (affects portability, not correctness inside a single language runtime).
+  - Suggested fix (GOOD TO HAVE): normalize representations to a single shape (prefer object rather than array) or make matchers accept both shapes.
+
+4) Function args matching and collection
+  - Python `_matches_recursive` checks `args` as list; requires same length and recurses per-item; `_collect_args_as` stores `tokens[collect_args_as] = node_dict['args']` (raw arg ASTs) and `_collect_as` stores args raw.
+  - Rust `match_function_call` checks arg count and recurses. For `_collect_as` Rust stores args as `format!("{:?}", arg)` (stringified) and for `_collect_args_as` does the same. Earlier Rust code used placeholders for args in some versions; current code stringifies args (improvement).
+  - Discrepancy: token shape differs (Python raw AST vs Rust stringified args).
+  - Tag: GOOD TO HAVE (token shape matters for portability and downstream consumers).
+
+5) Aggregate / AggregateExpr
+  - Python builder stores `op` possibly as list or string (builder converts to list), `modifier` field as `by`/`without` stored under `modifier` key.
+  - Rust builder stores `op` as array, stores `by` and `without` separately in the pattern JSON. Rust `match_aggregation` checks membership and recurses into `expr`. Rust sets `param` to `agg.param.as_ref().map(|p| format!("{:?}", p))` while Python earlier stored `param` more directly.
+  - Discrepancy: minor shape/field naming differences for modifiers (`modifier` vs `by`/`without`) and param normalization.
+  - Tag: GOOD TO HAVE (affects portability; correctness preserved if each side consumes its own builder).
+  - Suggested fix (GOOD TO HAVE): agree on `by`/`without` keys or accept both forms in matchers.
+
+6) MatrixSelector / range vector
+  - Python `_node_to_dict` exposes `range` verbatim from parser; builder stores `range` string.
+  - Rust `match_matrix_selector` converts `ms.range` (std::time::Duration) to `chrono::Duration` in tokens and stores `offset` from `ms.vs.offset`. Rust token normalizes duration; Python currently leaves raw parser value.
+  - Discrepancy: duration representation difference and `offset` location naming.
+  - Tag: GOOD TO HAVE (normalization difference — important for portability but not strictly correctness inside runtime).
+
+7) NumberLiteral numeric comparison
+  - Python compares pattern value vs node value using equality (exact) in general code; there is no explicit epsilon handling unless the pattern_value is TokenType then handled specially. (Note: Python code uses TokenType branch for token comparisons; numeric equality uses Python's `==` semantics on floats.)
+  - Rust compares floats using `if (num.val - expected_f64).abs() > f64::EPSILON { return false; }` i.e., epsilon-based equivalence.
+  - Discrepancy: Python exact vs Rust EPSILON tolerance.
+  - Tag: MUST HAVE (numeric equality semantics can cause correctness surprises across languages).
+  - Suggested fix (MUST HAVE): pick one policy (recommended: epsilon compare) and apply to Python; or clearly document language-specific rule.
+
+8) SubqueryExpr
+  - Python: builder + `_node_to_dict` include `SubqueryExpr` support (range, step, offset) and `_matches_recursive` handles nested dicts for subquery patterns.
+  - Rust: the current `promql_pattern.rs` includes `match_subquery` and `SubqueryToken` — so Rust supports subquery matching now.
+  - Discrepancy: earlier there was a gap; currently parity exists in repo (good).
+  - Tag: GOOD TO HAVE (presence is correctness-related only if you rely on subquery patterns; treat as MUST HAVE if you need subquery correctness). For correctness: mark MUST HAVE if you plan to support subquery-based pattern matching; otherwise GOOD TO HAVE.
+
+9) AtModifier (`@` modifier) handling
+  - Python: stores `at` raw in `node_dict` and in tokens (no conversion) — flexible.
+  - Rust: converts `AtModifier::At(t)` to seconds since UNIX_EPOCH and panics on `AtModifier::Start` or `AtModifier::End` (explicit panics). That means Rust can panic on certain AST values.
+  - Discrepancy: Rust panics on `Start/End`, Python will simply put the value in token.
+  - Tag: MUST HAVE (panic on parser output is correctness/robustness issue).
+  - Suggested fix (MUST HAVE): make Rust handle `Start`/`End` gracefully (either encode them as sentinel strings or treat as non-matching rather than panic). Convert time to a normalized representation but don't panic.
+
+10) Pattern strictness & missing-type handling
+  - Python: `if pattern is None: return True` (wildcard) and when a key exists with value `None` the matcher treats that as wildcard for that field. Python is permissive.
+  - Rust: `matches_recursive` requires `pattern.get("type")` to be a `Value::String` and returns false otherwise. Nested checks require `Value::Object` for nested patterns. Rust is strict about pattern shape.
+  - Discrepancy: permissiveness vs strictness causes different failure modes and different ways of expressing wildcards in nested positions.
+  - Tag: MUST HAVE (expressing patterns consistently across languages is essential for correctness of pattern design).
+  - Suggested fix (MUST HAVE): either document the strict JSON contract for Rust builders or make Rust accept `Value::Null` or empty maps as wildcards; conversely, validate Python patterns to guarantee shape if you prefer Rust's strictness.
+
+11) Token shapes and type normalization
+  - Python tokens: lightweight dicts; include `ast` fields that carry parser nodes. Values are not normalized (e.g., `at` raw).
+  - Rust tokens: typed structs, normalized fields (`at_modifier: Option<u64>`, `RangeToken.range: chrono::Duration`) and some stringification via `format!("{:?}", ...)` for parameters/args when necessary.
+  - Discrepancy: serialization and field names differ; cross-language consumers will need mapping.
+  - Tag: GOOD TO HAVE (portability/contract-related). If consumers rely on specific token fields for correctness, escalate to MUST HAVE.
+
+12) Utility / Factory functions
+  - Rust includes `PromQLPatternFactory` with prebuilt patterns for OnlyTemporal / OnlySpatial patterns.
+  - Python lacks the same factory file (you can emulate using `PromQLPatternBuilder`).
+  - Discrepancy: convenience API mismatch.
+  - Tag: GOOD TO HAVE.
+
+---
+
+## Per-function diffs (concise) — where to look
+
+- PromQLPattern.__init__ (py)  vs PromQLPattern::new (rs)
+  - Both store the pattern. Python stores pattern as an arbitrary dict possibly `None`; Rust requires `HashMap<String, Value>` and an explicit `expected_pattern_type`.
+  - Tag: GOOD TO HAVE.
+
+- PromQLPattern.matches(node) (py) vs PromQLPattern::matches(&Expr) (rs)
+  - Both call recursive matching and return a pair of (matches, tokens). Python returns `MatchResult(matches, tokens)` where tokens are a plain dict; Rust returns typed `PromQLMatchResult`.
+  - Tag: GOOD TO HAVE.
+
+- _node_to_dict (py) vs explicit typed match arms (rs)
+  - Python converts parser nodes to dict forms used by recursive matcher.
+  - Rust uses pattern_type & node enum and calls typed `match_*` helpers directly. Rust does not use a transient dict representation.
+  - Tag: GOOD TO HAVE (architectural difference; both valid).
+
+- _matches_recursive (py) vs matches_recursive (rs)
+  - Python: flexible dict-driven matching with list/dict/TokenType handlers and `_collect_as` logic.
+  - Rust: strict: pattern must include `type` string; then match arms call typed helpers.
+  - Key correctness mismatch: Python supports `pattern is None` wildcard; Rust requires `type` key.
+  - Tag: MUST HAVE for wildcard semantics.
+
+- match_metric_selector (rs) vs VectorSelector handling in Python
+  - Both check `name` membership; both can collect labels. Rust extracts equality-match labels only (`MatchOp::Equal`) and builds typed `MetricToken` with `at_modifier` normalized to seconds or panics on Start/End.
+  - Python exposes `labels` as `matchers` and leaves `at` raw.
+  - Tag: MUST HAVE for panic behavior on `@` variants; GOOD TO HAVE for normalization parity.
+
+- match_function_call (rs) vs Call handling in Python
+  - Similar high-level behavior (name membership, arg count, recursive matching). Differences in tokenization and `func` pattern shape.
+  - Tag: GOOD TO HAVE.
+
+- match_aggregation (rs) vs AggregateExpr handling in Python
+  - Both check `op` membership and recurse into `expr`. Rust builds typed `AggregationToken` and stringifies `param`; Python stores param in token dict.
+  - Tag: GOOD TO HAVE.
+
+- match_matrix_selector (rs) vs MatrixSelector handling in Python
+  - Both support vector_selector nested matching and token collection. Rust normalizes durations into chrono::Duration and extracts `offset`; Python leaves raw range and step/offset fields in node dict.
+  - Tag: GOOD TO HAVE.
+
+- match_binary_operation (rs) vs BinaryExpr handling in Python
+  - Rust expects pattern type `BinaryExpr` and checks `op`, left/right recursion, collects token.
+  - Python builder mismatch (BinaryOpExpr vs BinaryExpr) is a MUST HAVE fix.
+
+- match_number_literal (rs) vs NumberLiteral handling in Python
+  - Rust uses epsilon comparison; Python uses direct equality (unless pattern is None). Make numeric equality policy consistent (MUST HAVE).
+
+- match_subquery (rs) vs Subquery handling in Python
+  - Current repo: Rust includes `match_subquery` and `SubqueryToken` (parity achieved). If you rely on subquery correctness, tests must validate behavior.
+  - Tag: GOOD TO HAVE / MUST HAVE depending on usage.
+
+---
+
+## Concrete list of discrepancies & tags (compact)
+
+1. any() wildcard semantics — MUST HAVE
+2. Python binary builder `type` naming (`BinaryOpExpr` vs `BinaryExpr`) — MUST HAVE
+3. Numeric equality epsilon (Py exact vs Rust eps) — MUST HAVE
+4. Rust panics on `AtModifier::Start` / `End` — MUST HAVE
+5. `func` shape (object vs array-wrapped object) — GOOD TO HAVE
+6. Token shapes and normalization (raw AST vs typed/normalized representation) — GOOD TO HAVE
+7. Aggregation modifier naming (`modifier` vs `by`/`without`) — GOOD TO HAVE
+8. Matrix range and offset normalization differences — GOOD TO HAVE
+9. Subquery support parity (now present in Rust) — GOOD TO HAVE (escalate to MUST HAVE if subqueries are required)
+10. Presence of `PromQLPatternFactory` in Rust but not Python — GOOD TO HAVE
+
+---
+
+## Minimal recommended fixes (priority order)
+1. Fix Python builder `binary_op` to set `type: "BinaryExpr"` (MUST HAVE)
+2. Make Rust `matches_recursive` treat empty `pattern` (or `Value::Null`) as wildcard, or change `PromQLPatternBuilder::any()` to return `Value::Null` and recognize it (MUST HAVE)
+3. Unify numeric equality policy (use epsilon both sides) (MUST HAVE)
+4. Prevent Rust panics on `AtModifier::Start`/`End`: encode them as sentinel strings (e.g., "start"/"end") or treat as non-match (MUST HAVE)
+5. Add optional tolerant parsing for `func` pattern shapes (accept both array-wrapped and object forms) (GOOD TO HAVE)
+6. Add small JSON-token serializer in Python matching Rust token schema, or vice versa, for portability (GOOD TO HAVE)
+
+---
+
+## Must-have tests to add (short list)
+- `test_any_wildcard_matches_any_node` (Py + Rust)
+- `test_binary_expr_matching` (detect Python builder bug)
+- `test_numeric_equality_policy` (float epsilon consistency)
+- `test_at_modifier_no_panic` (Rust must not panic for `Start`/`End`)
+- `test_token_contracts` (verify presence and basic types of token fields)
+
+## Good-to-have tests
+- cross-language serialized pattern roundtrip tests
+- token schema parity tests (JSON serialize Rust tokens, compare to Python tokens)
+- factory pattern equivalence (Rust `PromQLPatternFactory` vs composed Python builder)
+
+---
+
+If you'd like, I can now:
+- apply the MUST HAVE code fixes (small, targeted edits) and run the unit tests; or
+- add the MUST HAVE tests first to surface current failures.
+
+Tell me which action to run next and I'll edit files + run tests.
diff --git a/CommonDependencies/tests/compare_matched_tokens/comparison_tests/result_comparator.py b/CommonDependencies/tests/compare_matched_tokens/comparison_tests/result_comparator.py
new file mode 100755
index 0000000..cc6f189
--- /dev/null
+++ b/CommonDependencies/tests/compare_matched_tokens/comparison_tests/result_comparator.py
@@ -0,0 +1,272 @@
+#!/usr/bin/env python3
+
+import json
+import sys
+from typing import Dict, List, Any, Optional, Tuple
+from dataclasses import dataclass
+from datetime import datetime
+
+@dataclass
+class ComparisonResult:
+    test_id: str
+    python_success: bool
+    rust_success: bool
+    both_passed: bool
+    pattern_type_match: bool
+    token_similarity: float
+    execution_time_diff_ms: float
+    issues: List[str]
+
+@dataclass
+class ComparisonSummary:
+    total_tests: int
+    both_passed: int
+    python_only_passed: int
+    rust_only_passed: int
+    both_failed: int
+    pattern_type_matches: int
+    avg_token_similarity: float
+    avg_execution_time_python: float
+    avg_execution_time_rust: float
+    results: List[ComparisonResult]
+
+class ResultComparator:
+    def __init__(self):
+        pass
+
+    def compare_results(self, python_results_file: str, rust_results_file: str) -> ComparisonSummary:
+        """Compare Python and Rust test results"""
+
+        with open(python_results_file, 'r') as f:
+            python_data = json.load(f)
+
+        with open(rust_results_file, 'r') as f:
+            rust_data = json.load(f)
+
+        # Create lookup maps
+        python_results = {r['test_id']: r for r in python_data['results']}
+        rust_results = {r['test_id']: r for r in rust_data['results']}
+
+        comparison_results = []
+        both_passed = 0
+        python_only_passed = 0
+        rust_only_passed = 0
+        both_failed = 0
+        pattern_type_matches = 0
+        total_token_similarity = 0.0
+        total_python_time = 0.0
+        total_rust_time = 0.0
+
+        all_test_ids = set(python_results.keys()) | set(rust_results.keys())
+
+        for test_id in all_test_ids:
+            python_result = python_results.get(test_id)
+            rust_result = rust_results.get(test_id)
+
+            if not python_result:
+                print(f"Warning: Test {test_id} missing from Python results")
+                continue
+            if not rust_result:
+                print(f"Warning: Test {test_id} missing from Rust results")
+                continue
+
+            python_success = python_result['success']
+            rust_success = rust_result['success']
+
+            # Count success patterns
+            if python_success and rust_success:
+                both_passed += 1
+            elif python_success and not rust_success:
+                python_only_passed += 1
+            elif not python_success and rust_success:
+                rust_only_passed += 1
+            else:
+                both_failed += 1
+
+            # Check pattern type match
+            pattern_type_match = (
+                python_result.get('actual_pattern_type') ==
+                rust_result.get('actual_pattern_type')
+            )
+            if pattern_type_match:
+                pattern_type_matches += 1
+
+            # Calculate token similarity
+            token_similarity = self._calculate_token_similarity(
+                python_result.get('actual_tokens', {}),
+                rust_result.get('actual_tokens', {})
+            )
+            total_token_similarity += token_similarity
+
+            # Calculate execution time difference
+            python_time = python_result.get('execution_time_ms', 0.0)
+            rust_time = rust_result.get('execution_time_ms', 0.0)
+            total_python_time += python_time
+            total_rust_time += rust_time
+            execution_time_diff = abs(python_time - rust_time)
+
+            # Identify issues
+            issues = []
+            if not pattern_type_match:
+                issues.append(f"Pattern type mismatch: Python={python_result.get('actual_pattern_type')}, Rust={rust_result.get('actual_pattern_type')}")
+            if token_similarity < 0.8:
+                issues.append(f"Low token similarity: {token_similarity:.2f}")
+            if python_success != rust_success:
+                issues.append(f"Success mismatch: Python={python_success}, Rust={rust_success}")
+            if execution_time_diff > 100:  # More than 100ms difference
+                issues.append(f"Large execution time difference: {execution_time_diff:.2f}ms")
+
+            comparison_result = ComparisonResult(
+                test_id=test_id,
+                python_success=python_success,
+                rust_success=rust_success,
+                both_passed=python_success and rust_success,
+                pattern_type_match=pattern_type_match,
+                token_similarity=token_similarity,
+                execution_time_diff_ms=execution_time_diff,
+                issues=issues
+            )
+            comparison_results.append(comparison_result)
+
+        total_tests = len(comparison_results)
+        avg_token_similarity = total_token_similarity / max(total_tests, 1)
+        avg_python_time = total_python_time / max(total_tests, 1)
+        avg_rust_time = total_rust_time / max(total_tests, 1)
+
+        return ComparisonSummary(
+            total_tests=total_tests,
+            both_passed=both_passed,
+            python_only_passed=python_only_passed,
+            rust_only_passed=rust_only_passed,
+            both_failed=both_failed,
+            pattern_type_matches=pattern_type_matches,
+            avg_token_similarity=avg_token_similarity,
+            avg_execution_time_python=avg_python_time,
+            avg_execution_time_rust=avg_rust_time,
+            results=comparison_results
+        )
+
+    def _calculate_token_similarity(self, python_tokens: Dict[str, Any], rust_tokens: Dict[str, Any]) -> float:
+        """Calculate similarity between token dictionaries (0.0 to 1.0)"""
+        if not python_tokens and not rust_tokens:
+            return 1.0
+        if not python_tokens or not rust_tokens:
+            return 0.0
+
+        # Compare keys
+        python_keys = set(python_tokens.keys())
+        rust_keys = set(rust_tokens.keys())
+        common_keys = python_keys & rust_keys
+        total_keys = python_keys | rust_keys
+
+        if not total_keys:
+            return 1.0
+
+        key_similarity = len(common_keys) / len(total_keys)
+
+        # Compare values for common keys
+        value_matches = 0
+        for key in common_keys:
+            if self._tokens_match(python_tokens[key], rust_tokens[key]):
+                value_matches += 1
+
+        value_similarity = value_matches / max(len(common_keys), 1)
+
+        # Weight: 50% key similarity, 50% value similarity
+        return (key_similarity + value_similarity) / 2
+
+    def _tokens_match(self, python_token: Any, rust_token: Any) -> bool:
+        """Check if individual tokens match"""
+        # Handle different token representations
+        if isinstance(python_token, dict) and isinstance(rust_token, dict):
+            # Compare key token fields
+            if 'name' in python_token and 'name' in rust_token:
+                return python_token['name'] == rust_token['name']
+            if 'op' in python_token and 'op' in rust_token:
+                return python_token['op'] == rust_token['op']
+            if 'range' in python_token and 'range' in rust_token:
+                return python_token['range'] == rust_token['range']
+
+        # Fallback to direct comparison
+        return python_token == rust_token
+
+    def generate_report(self, summary: ComparisonSummary, output_file: str):
+        """Generate a detailed comparison report"""
+        report = {
+            'timestamp': datetime.utcnow().isoformat(),
+            'summary': {
+                'total_tests': summary.total_tests,
+                'both_passed': summary.both_passed,
+                'python_only_passed': summary.python_only_passed,
+                'rust_only_passed': summary.rust_only_passed,
+                'both_failed': summary.both_failed,
+                'pattern_type_matches': summary.pattern_type_matches,
+                'pattern_type_match_rate': summary.pattern_type_matches / max(summary.total_tests, 1),
+                'avg_token_similarity': summary.avg_token_similarity,
+                'avg_execution_time_python_ms': summary.avg_execution_time_python,
+                'avg_execution_time_rust_ms': summary.avg_execution_time_rust,
+                'performance_ratio': summary.avg_execution_time_rust / max(summary.avg_execution_time_python, 0.001)
+            },
+            'detailed_results': [
+                {
+                    'test_id': r.test_id,
+                    'python_success': r.python_success,
+                    'rust_success': r.rust_success,
+                    'both_passed': r.both_passed,
+                    'pattern_type_match': r.pattern_type_match,
+                    'token_similarity': r.token_similarity,
+                    'execution_time_diff_ms': r.execution_time_diff_ms,
+                    'issues': r.issues
+                }
+                for r in summary.results
+            ]
+        }
+
+        with open(output_file, 'w') as f:
+            json.dump(report, f, indent=2)
+
+def main():
+    if len(sys.argv) < 3:
+        print("Usage: python result_comparator.py <python_results.json> <rust_results.json>")
+        sys.exit(1)
+
+    python_file = sys.argv[1]
+    rust_file = sys.argv[2]
+
+    comparator = ResultComparator()
+
+    print("Comparing Python and Rust test results...")
+    print("==========================================")
+
+    try:
+        summary = comparator.compare_results(python_file, rust_file)
+
+        print(f"\nComparison Summary:")
+        print(f"Total tests: {summary.total_tests}")
+        print(f"Both passed: {summary.both_passed}")
+        print(f"Python only passed: {summary.python_only_passed}")
+        print(f"Rust only passed: {summary.rust_only_passed}")
+        print(f"Both failed: {summary.both_failed}")
+        print(f"Pattern type matches: {summary.pattern_type_matches}/{summary.total_tests} ({summary.pattern_type_matches/max(summary.total_tests,1)*100:.1f}%)")
+        print(f"Average token similarity: {summary.avg_token_similarity:.2f}")
+        print(f"Avg execution time - Python: {summary.avg_execution_time_python:.2f}ms")
+        print(f"Avg execution time - Rust: {summary.avg_execution_time_rust:.2f}ms")
+
+        # Show tests with issues
+        issues_found = [r for r in summary.results if r.issues]
+        if issues_found:
+            print(f"\nTests with issues ({len(issues_found)}):")
+            for result in issues_found:
+                print(f"  {result.test_id}: {', '.join(result.issues)}")
+
+        # Generate detailed report
+        output_file = "comparison_report.json"
+        comparator.generate_report(summary, output_file)
+        print(f"\nDetailed report written to: {output_file}")
+
+    except Exception as e:
+        print(f"Error during comparison: {e}")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
diff --git a/CommonDependencies/tests/compare_matched_tokens/python_tests/pattern_tests.py b/CommonDependencies/tests/compare_matched_tokens/python_tests/pattern_tests.py
new file mode 100644
index 0000000..3900a9f
--- /dev/null
+++ b/CommonDependencies/tests/compare_matched_tokens/python_tests/pattern_tests.py
@@ -0,0 +1,213 @@
+import sys
+import os
+import time
+import promql_parser
+from typing import Any, Dict, List, Optional, Tuple
+
+# Add the dependencies to the path
+sys.path.append(
+    os.path.join(
+        os.path.dirname(__file__),
+        "../../../CommonDependencies/dependencies/py/promql_utilities",
+    )
+)
+
+from promql_utilities.ast_matching.PromQLPattern import PromQLPattern, MatchResult
+from promql_utilities.ast_matching.PromQLPatternBuilder import PromQLPatternBuilder
+# Using string keys for pattern categories instead of QueryPatternType enum
+
+from test_data import TestCase, TestResult
+
+
+class PatternTester:
+    def __init__(self):
+        self.patterns = self._build_patterns()
+
+    def _build_patterns(self) -> Dict[str, List[PromQLPattern]]:
+        patterns = {}
+
+        # ONLY_TEMPORAL patterns
+        temporal_patterns = [
+            # Rate/increase pattern
+            PromQLPattern(
+                PromQLPatternBuilder.function(
+                    ["rate", "increase"],
+                    PromQLPatternBuilder.matrix_selector(
+                        PromQLPatternBuilder.metric(collect_as="metric"),
+                        collect_as="range_vector",
+                    ),
+                    collect_as="function",
+                )
+            ),
+            # Quantile over time pattern
+            PromQLPattern(
+                PromQLPatternBuilder.function(
+                    "quantile_over_time",
+                    PromQLPatternBuilder.number(),
+                    PromQLPatternBuilder.matrix_selector(
+                        PromQLPatternBuilder.metric(collect_as="metric"),
+                        collect_as="range_vector",
+                    ),
+                    collect_as="function",
+                    collect_args_as="function_args",
+                )
+            ),
+            # Other over_time functions
+            PromQLPattern(
+                PromQLPatternBuilder.function(
+                    [
+                        "sum_over_time",
+                        "count_over_time",
+                        "avg_over_time",
+                        "min_over_time",
+                        "max_over_time",
+                    ],
+                    PromQLPatternBuilder.matrix_selector(
+                        PromQLPatternBuilder.metric(collect_as="metric"),
+                        collect_as="range_vector",
+                    ),
+                    collect_as="function",
+                )
+            ),
+        ]
+
+        # ONLY_SPATIAL patterns
+        spatial_patterns = [
+            # Aggregation pattern
+            PromQLPattern(
+                PromQLPatternBuilder.aggregation(
+                    ["sum", "count", "avg", "quantile", "min", "max"],
+                    PromQLPatternBuilder.metric(collect_as="metric"),
+                    collect_as="aggregation",
+                )
+            ),
+            # Simple metric pattern (for standalone metrics)
+            PromQLPattern(PromQLPatternBuilder.metric(collect_as="metric")),
+        ]
+
+        # ONE_TEMPORAL_ONE_SPATIAL patterns
+        combined_patterns = [
+            # Aggregation of quantile_over_time
+            PromQLPattern(
+                PromQLPatternBuilder.aggregation(
+                    ["sum", "count", "avg", "quantile", "min", "max"],
+                    PromQLPatternBuilder.function(
+                        "quantile_over_time",
+                        PromQLPatternBuilder.number(),
+                        PromQLPatternBuilder.matrix_selector(
+                            PromQLPatternBuilder.metric(collect_as="metric"),
+                            collect_as="range_vector",
+                        ),
+                        collect_as="function",
+                        collect_args_as="function_args",
+                    ),
+                    collect_as="aggregation",
+                )
+            ),
+            # Aggregation of other temporal functions
+            PromQLPattern(
+                PromQLPatternBuilder.aggregation(
+                    ["sum", "count", "avg", "quantile", "min", "max"],
+                    PromQLPatternBuilder.function(
+                        [
+                            "sum_over_time",
+                            "count_over_time",
+                            "avg_over_time",
+                            "min_over_time",
+                            "max_over_time",
+                            "rate",
+                            "increase",
+                        ],
+                        PromQLPatternBuilder.matrix_selector(
+                            PromQLPatternBuilder.metric(collect_as="metric"),
+                            collect_as="range_vector",
+                        ),
+                        collect_as="function",
+                    ),
+                    collect_as="aggregation",
+                )
+        ),
+        ]
+
+        # ONLY_VECTOR mirrors ONLY_SPATIAL but represents plain instant vector selectors
+        patterns["ONLY_TEMPORAL"] = temporal_patterns
+        patterns["ONLY_SPATIAL"] = spatial_patterns
+        patterns["ONLY_VECTOR"] = spatial_patterns
+        patterns["ONE_TEMPORAL_ONE_SPATIAL"] = combined_patterns
+
+        return patterns
+
+    def test_query(self, test_case: TestCase) -> TestResult:
+        start_time = time.time()
+        test_id = test_case.id
+
+        try:
+            # Parse the query
+            ast = promql_parser.parse(test_case.query)
+        except Exception as e:
+            return TestResult(
+                test_id=test_id,
+                success=False,
+                error_message=f"Failed to parse query: {str(e)}",
+                execution_time_ms=(time.time() - start_time) * 1000,
+            )
+
+        # Try to match against all patterns
+        matched_pattern_type = None
+        matched_tokens = None
+        matched_raw = None
+
+        for pattern_type, pattern_list in self.patterns.items():
+            for pattern in pattern_list:
+                match_result: MatchResult = pattern.matches(ast)
+                if match_result.matches:
+                    matched_raw = (pattern_type, match_result)
+                    break
+            if matched_raw:
+                break
+
+        if matched_raw:
+            pattern_type, match_result = matched_raw
+            # If a plain vector selector matched under the spatial patterns, classify as ONLY_VECTOR
+            if pattern_type == "ONLY_SPATIAL":
+                if "metric" in match_result.tokens and "aggregation" not in match_result.tokens:
+                    matched_pattern_type = "ONLY_VECTOR"
+                else:
+                    matched_pattern_type = "ONLY_SPATIAL"
+            else:
+                matched_pattern_type = self._pattern_type_to_string(pattern_type)
+
+            matched_tokens = self._serialize_tokens(match_result.tokens)
+
+        execution_time = (time.time() - start_time) * 1000
+
+        # Check if results match expectations
+        expected_type = test_case.expected_pattern_type
+        success = matched_pattern_type == expected_type
+
+        return TestResult(
+            test_id=test_id,
+            success=success,
+            error_message=(
+                None
+                if success
+                else f"Pattern type mismatch. Expected: {expected_type}, Got: {matched_pattern_type}"
+            ),
+            actual_pattern_type=matched_pattern_type,
+            actual_tokens=matched_tokens,
+            execution_time_ms=execution_time,
+        )
+
+    def _pattern_type_to_string(self, pattern_type: Any) -> str:
+        # pattern_type is already a string in this decoupled design
+        return pattern_type if isinstance(pattern_type, str) else str(pattern_type)
+
+    def _serialize_tokens(self, tokens: Dict) -> Dict:
+        """Convert tokens to JSON-serializable format"""
+        serialized = {}
+        for key, value in tokens.items():
+            if hasattr(value, "__dict__"):
+                serialized[key] = value.__dict__
+            else:
+                serialized[key] = value
+        return serialized
diff --git a/CommonDependencies/tests/compare_matched_tokens/python_tests/test_data.py b/CommonDependencies/tests/compare_matched_tokens/python_tests/test_data.py
new file mode 100644
index 0000000..fdbd2cd
--- /dev/null
+++ b/CommonDependencies/tests/compare_matched_tokens/python_tests/test_data.py
@@ -0,0 +1,112 @@
+import json
+from dataclasses import dataclass
+from typing import Dict, List, Any, Optional
+from datetime import datetime
+
+@dataclass
+class MetricToken:
+    name: str
+    labels: Dict[str, str]
+    at_modifier: Optional[str]
+
+@dataclass
+class FunctionToken:
+    name: str
+
+@dataclass
+class AggregationToken:
+    op: str
+    modifier: Optional[str]
+
+@dataclass
+class RangeToken:
+    range: str
+
+@dataclass
+class TestCase:
+    id: str
+    description: str
+    query: str
+    expected_pattern_type: str
+    expected_tokens: Dict[str, Any]
+
+@dataclass
+class PatternBuilderTest:
+    id: str
+    description: str
+    builder_call: str
+    parameters: Dict[str, Any]
+    expected_pattern: Dict[str, Any]
+
+@dataclass
+class TestResult:
+    test_id: str
+    success: bool
+    error_message: Optional[str] = None
+    actual_pattern_type: Optional[str] = None
+    actual_tokens: Optional[Dict[str, Any]] = None
+    execution_time_ms: float = 0.0
+
+@dataclass
+class TestSuiteResult:
+    language: str
+    timestamp: str
+    total_tests: int
+    passed_tests: int
+    failed_tests: int
+    results: List[TestResult]
+
+class TestData:
+    def __init__(self, test_cases: List[TestCase], pattern_builder_tests: List[PatternBuilderTest]):
+        self.test_cases = test_cases
+        self.pattern_builder_tests = pattern_builder_tests
+
+    @classmethod
+    def load_from_file(cls, file_path: str) -> 'TestData':
+        with open(file_path, 'r') as f:
+            data = json.load(f)
+
+        test_cases = [
+            TestCase(
+                id=case['id'],
+                description=case['description'],
+                query=case['query'],
+                expected_pattern_type=case['expected_pattern_type'],
+                expected_tokens=case['expected_tokens']
+            )
+            for case in data['test_cases']
+        ]
+
+        pattern_builder_tests = [
+            PatternBuilderTest(
+                id=test['id'],
+                description=test['description'],
+                builder_call=test['builder_call'],
+                parameters=test['parameters'],
+                expected_pattern=test['expected_pattern']
+            )
+            for test in data['pattern_builder_tests']
+        ]
+
+        return cls(test_cases, pattern_builder_tests)
+
+    def save_results(self, results: List[TestResult], output_file: str):
+        passed = sum(1 for r in results if r.success)
+        total = len(results)
+
+        suite_result = TestSuiteResult(
+            language="python",
+            timestamp=datetime.utcnow().isoformat(),
+            total_tests=total,
+            passed_tests=passed,
+            failed_tests=total - passed,
+            results=results
+        )
+
+        with open(output_file, 'w') as f:
+            json.dump(suite_result.__dict__, f, indent=2, default=self._serialize_result)
+
+    def _serialize_result(self, obj):
+        if hasattr(obj, '__dict__'):
+            return obj.__dict__
+        return str(obj)
diff --git a/CommonDependencies/tests/compare_matched_tokens/python_tests/test_runner.py b/CommonDependencies/tests/compare_matched_tokens/python_tests/test_runner.py
new file mode 100755
index 0000000..240a3be
--- /dev/null
+++ b/CommonDependencies/tests/compare_matched_tokens/python_tests/test_runner.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+from test_data import TestData
+from pattern_tests import PatternTester
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python test_runner.py <test_data_file>")
+        sys.exit(1)
+
+    test_data_file = sys.argv[1]
+
+    try:
+        test_data = TestData.load_from_file(test_data_file)
+    except Exception as e:
+        print(f"Failed to load test data: {e}")
+        sys.exit(1)
+
+    tester = PatternTester()
+    results = []
+
+    print("Running Python PromQL Pattern Tests...")
+    print("======================================")
+
+    for test_case in test_data.test_cases:
+        print(f"Running test: {test_case.id} - {test_case.description}")
+        result = tester.test_query(test_case)
+
+        if result.success:
+            print(f"✅ PASSED ({result.execution_time_ms:.2f}ms)")
+        else:
+            print(f"❌ FAILED ({result.execution_time_ms:.2f}ms): {result.error_message}")
+
+        results.append(result)
+
+    passed = sum(1 for r in results if r.success)
+    total = len(results)
+
+    print(f"\nTest Summary:")
+    print(f"Total: {total}, Passed: {passed}, Failed: {total - passed}")
+
+    # Save results
+    output_file = "python_test_results.json"
+    test_data.save_results(results, output_file)
+    print(f"Results written to: {output_file}")
+
+if __name__ == "__main__":
+    main()
diff --git a/CommonDependencies/tests/compare_matched_tokens/rust_tests/Cargo.lock b/CommonDependencies/tests/compare_matched_tokens/rust_tests/Cargo.lock
new file mode 100644
index 0000000..9116731
--- /dev/null
+++ b/CommonDependencies/tests/compare_matched_tokens/rust_tests/Cargo.lock
@@ -0,0 +1,1187 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "addr2line"
+version = "0.24.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
+dependencies = [
+ "gimli",
+]
+
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "android-tzdata"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.99"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100"
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "backtrace"
+version = "0.3.75"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
+dependencies = [
+ "addr2line",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "bincode"
+version = "1.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "bitflags"
+version = "2.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d"
+
+[[package]]
+name = "bumpalo"
+version = "3.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
+
+[[package]]
+name = "bytes"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
+
+[[package]]
+name = "cactus"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acbc26382d871df4b7442e3df10a9402bf3cf5e55cbd66f12be38861425f0564"
+
+[[package]]
+name = "cc"
+version = "1.2.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc"
+dependencies = [
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
+
+[[package]]
+name = "cfgrammar"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf"
+dependencies = [
+ "indexmap",
+ "lazy_static",
+ "num-traits",
+ "regex",
+ "serde",
+ "vob",
+]
+
+[[package]]
+name = "chrono"
+version = "0.4.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
+dependencies = [
+ "android-tzdata",
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "serde",
+ "wasm-bindgen",
+ "windows-link",
+]
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "deranged"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e"
+dependencies = [
+ "powerfmt",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "filetime"
+version = "0.2.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "libredox",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "getopts"
+version = "0.2.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cba6ae63eb948698e300f645f87c70f76630d505f23b8907cf1e193ee85048c1"
+dependencies = [
+ "unicode-width",
+]
+
+[[package]]
+name = "gimli"
+version = "0.31.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.63"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "io-uring"
+version = "0.7.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b"
+dependencies = [
+ "bitflags",
+ "cfg-if",
+ "libc",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+
+[[package]]
+name = "js-sys"
+version = "0.3.77"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.175"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
+
+[[package]]
+name = "libredox"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3"
+dependencies = [
+ "bitflags",
+ "libc",
+ "redox_syscall",
+]
+
+[[package]]
+name = "lock_api"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
+dependencies = [
+ "autocfg",
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
+
+[[package]]
+name = "lrlex"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c71364e868116ee891b0f93559eb9eca5675bec28b22d33c58481e66c3951d7e"
+dependencies = [
+ "cfgrammar",
+ "getopts",
+ "lazy_static",
+ "lrpar",
+ "num-traits",
+ "quote",
+ "regex",
+ "regex-syntax",
+ "serde",
+ "vergen",
+]
+
+[[package]]
+name = "lrpar"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51b265a81193d94c92d1c9c715498d6fa505bce3f789ceecb24ab5d6fa2dbc71"
+dependencies = [
+ "bincode",
+ "cactus",
+ "cfgrammar",
+ "filetime",
+ "indexmap",
+ "lazy_static",
+ "lrtable",
+ "num-traits",
+ "packedvec",
+ "regex",
+ "serde",
+ "static_assertions",
+ "vergen",
+ "vob",
+]
+
+[[package]]
+name = "lrtable"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc36d15214ca997a5097845be1f932b7ee6125c36f5c5e55f6c49e027ddeb6de"
+dependencies = [
+ "cfgrammar",
+ "fnv",
+ "num-traits",
+ "serde",
+ "sparsevec",
+ "vob",
+]
+
+[[package]]
+name = "matchers"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9"
+dependencies = [
+ "regex-automata",
+]
+
+[[package]]
+name = "memchr"
+version = "2.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+]
+
+[[package]]
+name = "mio"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "nu-ansi-term"
+version = "0.50.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399"
+dependencies = [
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "num-conv"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num_threads"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "object"
+version = "0.36.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "packedvec"
+version = "1.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a69e0a534dd2e6aefce319af62a0aa0066a76bdfcec0201dfe02df226bc9ec70"
+dependencies = [
+ "num-traits",
+ "serde",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "promql-parser"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60d851f6523a8215e2fbf86b6cef4548433f8b76092e9ffb607105de52ae63fd"
+dependencies = [
+ "cfgrammar",
+ "chrono",
+ "lazy_static",
+ "lrlex",
+ "lrpar",
+ "regex",
+]
+
+[[package]]
+name = "promql_cross_lang_tests"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "promql-parser",
+ "promql_utilities",
+ "serde",
+ "serde_json",
+ "tokio",
+ "tracing",
+ "tracing-subscriber",
+]
+
+[[package]]
+name = "promql_utilities"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "promql-parser",
+ "serde",
+ "serde_json",
+ "thiserror",
+ "tracing",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "regex"
+version = "1.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
+
+[[package]]
+name = "rustc-demangle"
+version = "0.1.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "ryu"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "serde"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.143"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a"
+dependencies = [
+ "itoa",
+ "memchr",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "sharded-slab"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
+dependencies = [
+ "lazy_static",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "slab"
+version = "0.4.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "socket2"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807"
+dependencies = [
+ "libc",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "sparsevec"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68b4a8ce3045f0fe173fb5ae3c6b7dcfbec02bfa650bb8618b2301f52af0134d"
+dependencies = [
+ "num-traits",
+ "packedvec",
+ "serde",
+ "vob",
+]
+
+[[package]]
+name = "static_assertions"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
+[[package]]
+name = "syn"
+version = "2.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "thread_local"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "time"
+version = "0.3.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40"
+dependencies = [
+ "deranged",
+ "itoa",
+ "libc",
+ "num-conv",
+ "num_threads",
+ "powerfmt",
+ "serde",
+ "time-core",
+ "time-macros",
+]
+
+[[package]]
+name = "time-core"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c"
+
+[[package]]
+name = "time-macros"
+version = "0.2.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49"
+dependencies = [
+ "num-conv",
+ "time-core",
+]
+
+[[package]]
+name = "tokio"
+version = "1.47.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038"
+dependencies = [
+ "backtrace",
+ "bytes",
+ "io-uring",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "slab",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tracing"
+version = "0.1.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
+dependencies = [
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
+dependencies = [
+ "once_cell",
+ "valuable",
+]
+
+[[package]]
+name = "tracing-log"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
+dependencies = [
+ "log",
+ "once_cell",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-subscriber"
+version = "0.3.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5"
+dependencies = [
+ "matchers",
+ "nu-ansi-term",
+ "once_cell",
+ "regex-automata",
+ "sharded-slab",
+ "smallvec",
+ "thread_local",
+ "tracing",
+ "tracing-core",
+ "tracing-log",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
+
+[[package]]
+name = "unicode-width"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c"
+
+[[package]]
+name = "valuable"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
+
+[[package]]
+name = "vergen"
+version = "8.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2990d9ea5967266ea0ccf413a4aa5c42a93dbcfda9cb49a97de6931726b12566"
+dependencies = [
+ "anyhow",
+ "rustversion",
+ "time",
+]
+
+[[package]]
+name = "vob"
+version = "3.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc936b5a7202a703aeaf7ce05e7931db2e0c8126813f97db3e9e06d867b0bb38"
+dependencies = [
+ "num-traits",
+ "serde",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
+dependencies = [
+ "bumpalo",
+ "log",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
+
+[[package]]
+name = "windows-result"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.3",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm 0.53.0",
+ "windows_aarch64_msvc 0.53.0",
+ "windows_i686_gnu 0.53.0",
+ "windows_i686_gnullvm 0.53.0",
+ "windows_i686_msvc 0.53.0",
+ "windows_x86_64_gnu 0.53.0",
+ "windows_x86_64_gnullvm 0.53.0",
+ "windows_x86_64_msvc 0.53.0",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
diff --git a/CommonDependencies/tests/compare_matched_tokens/rust_tests/Cargo.toml b/CommonDependencies/tests/compare_matched_tokens/rust_tests/Cargo.toml
new file mode 100644
index 0000000..b472386
--- /dev/null
+++ b/CommonDependencies/tests/compare_matched_tokens/rust_tests/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "promql_cross_lang_tests"
+version = "0.1.0"
+edition = "2021"
+
+[[bin]]
+name = "test_runner"
+path = "src/main.rs"
+
+[dependencies]
+promql-parser = "0.5.0"
+serde_json = "1.0"
+serde = { version = "1.0", features = ["derive"] }
+tokio = { version = "1.0", features = ["full"] }
+chrono = "0.4.41"
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+
+[dependencies.promql_utilities]
+path = "../../../../CommonDependencies/dependencies/rs/promql_utilities"
diff --git a/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/main.rs b/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/main.rs
new file mode 100644
index 0000000..fe20d3c
--- /dev/null
+++ b/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/main.rs
@@ -0,0 +1,69 @@
+mod test_data;
+mod pattern_tests;
+
+use pattern_tests::PatternTester;
+use test_data::*;
+use std::env;
+use tracing_subscriber::filter::LevelFilter;
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // Initialize tracing with debug level
+    tracing_subscriber::fmt()
+        .with_max_level(LevelFilter::DEBUG)
+        .init();
+
+    let args: Vec<String> = env::args().collect();
+    if args.len() < 2 {
+        eprintln!("Usage: {} <test_data_file>", args[0]);
+        std::process::exit(1);
+    }
+
+    let test_data_file = &args[1];
+    let test_data = TestData::load_from_file(test_data_file)?;
+
+    let tester = PatternTester::new();
+    let mut results = Vec::new();
+
+    println!("Running Rust PromQL Pattern Tests...");
+    println!("=====================================");
+
+    for test_case in &test_data.test_cases {
+        println!("Running test: {} - {}", test_case.id, test_case.description);
+        let result = tester.test_query(test_case);
+
+        if result.success {
+            println!("✅ PASSED ({}ms)", result.execution_time_ms);
+        } else {
+            println!("❌ FAILED ({}ms): {}",
+                     result.execution_time_ms,
+                     result.error_message.as_deref().unwrap_or("Unknown error"));
+        }
+
+        results.push(result);
+    }
+
+    let passed = results.iter().filter(|r| r.success).count();
+    let total = results.len();
+
+    println!("\nTest Summary:");
+    println!("Total: {}, Passed: {}, Failed: {}", total, passed, total - passed);
+
+    // Create test suite result
+    let suite_result = TestSuiteResult {
+        language: "rust".to_string(),
+        timestamp: chrono::Utc::now().to_rfc3339(),
+        total_tests: total,
+        passed_tests: passed,
+        failed_tests: total - passed,
+        results,
+    };
+
+    // Write results to file
+    let output_file = "rust_test_results.json";
+    let json_output = serde_json::to_string_pretty(&suite_result)?;
+    std::fs::write(output_file, json_output)?;
+
+    println!("Results written to: {}", output_file);
+
+    Ok(())
+}
diff --git a/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/pattern_tests.rs b/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/pattern_tests.rs
new file mode 100644
index 0000000..fcc210f
--- /dev/null
+++ b/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/pattern_tests.rs
@@ -0,0 +1,385 @@
+use crate::test_data::*;
+use promql_parser::parser as promql;
+use promql_utilities::ast_matching::{PromQLPattern, PromQLPatternBuilder};
+// Decoupled from QueryPatternType: use string category keys
+use serde_json::Value;
+use std::collections::HashMap;
+use std::time::Instant;
+
+pub struct PatternTester {
+    patterns: HashMap<String, Vec<PromQLPattern>>,
+}
+
+impl PatternTester {
+    pub fn new() -> Self {
+        let mut patterns = HashMap::new();
+
+        // ONLY_TEMPORAL patterns
+        let temporal_patterns = vec![
+            // Rate pattern
+            PromQLPattern::new(
+                Self::build_rate_pattern(),
+                vec![
+                    "metric".to_string(),
+                    "function".to_string(),
+                    "range_vector".to_string(),
+                ],
+                // Some("ONLY_TEMPORAL".to_string()),
+            ),
+            // Quantile over time pattern
+            PromQLPattern::new(
+                Self::build_quantile_over_time_pattern(),
+                vec![
+                    "metric".to_string(),
+                    "function".to_string(),
+                    "range_vector".to_string(),
+                    "function_args".to_string(),
+                ],
+                // Some("ONLY_TEMPORAL".to_string()),
+            ),
+        ];
+
+        // ONLY_SPATIAL patterns
+        let spatial_patterns = vec![
+            // Sum aggregation pattern
+            PromQLPattern::new(
+                Self::build_sum_pattern(),
+                vec!["metric".to_string(), "aggregation".to_string()],
+                // Some("ONLY_SPATIAL".to_string()),
+            ),
+            // Simple metric pattern
+            PromQLPattern::new(
+                Self::build_metric_pattern(),
+                vec!["metric".to_string()],
+                // Some("ONLY_SPATIAL".to_string()),
+            ),
+        ];
+
+        // ONE_TEMPORAL_ONE_SPATIAL patterns
+        let combined_patterns = vec![
+            // Sum of rate pattern
+            PromQLPattern::new(
+                Self::build_one_temporal_one_spatial_pattern(),
+                vec![
+                    "metric".to_string(),
+                    "function".to_string(),
+                    "aggregation".to_string(),
+                    "range_vector".to_string(),
+                ],
+                // Some("ONE_TEMPORAL_ONE_SPATIAL".to_string()),
+            ),
+        ];
+
+        // Insert in order from simple to complex to avoid panics
+        patterns.insert("ONLY_VECTOR".to_string(), spatial_patterns.clone());
+        patterns.insert("ONLY_SPATIAL".to_string(), spatial_patterns);
+        patterns.insert("ONLY_TEMPORAL".to_string(), temporal_patterns);
+        patterns.insert("ONE_TEMPORAL_ONE_SPATIAL".to_string(), combined_patterns);
+
+        Self { patterns }
+    }
+
+    pub fn test_query(&self, test_case: &TestCase) -> TestResult {
+        let start_time = Instant::now();
+        let test_id = test_case.id.clone();
+
+        // Parse the query
+        let ast = match promql::parse(&test_case.query) {
+            Ok(ast) => ast,
+            Err(e) => {
+                return TestResult {
+                    test_id,
+                    success: false,
+                    error_message: Some(format!("Failed to parse query: {}", e)),
+                    actual_pattern_type: None,
+                    actual_tokens: None,
+                    execution_time_ms: start_time.elapsed().as_secs_f64() * 1000.0,
+                };
+            }
+        };
+
+        // Try to match against all patterns
+        let mut matched_pattern_type = None;
+        let mut matched_tokens = None;
+
+        for (pattern_type, pattern_list) in &self.patterns {
+            for pattern in pattern_list {
+                let match_result = pattern.matches(&ast);
+                if match_result.matches {
+                    // If a plain vector selector matched under the spatial patterns, classify as ONLY_VECTOR
+                    let final_type = if pattern_type == "ONLY_SPATIAL" {
+                        if match_result.tokens.contains_key("aggregation") {
+                            pattern_type.clone()
+                        } else if match_result.tokens.contains_key("metric") {
+                            "ONLY_VECTOR".to_string()
+                        } else {
+                            pattern_type.clone()
+                        }
+                    } else {
+                        pattern_type.clone()
+                    };
+
+                    // Debug: show pattern_type and token keys for failing test
+                    // debug removed
+                    matched_pattern_type = Some(final_type);
+                    // Extract only relevant token data to match Python format
+                    let flattened_tokens = Self::flatten_token_data(&match_result.tokens);
+                    matched_tokens =
+                        Some(serde_json::to_value(&flattened_tokens).unwrap_or_default());
+                    break;
+                }
+            }
+            if matched_pattern_type.is_some() {
+                break;
+            }
+        }
+
+        let execution_time = start_time.elapsed().as_secs_f64() * 1000.0;
+
+        // Check if results match expectations
+        let expected_type = &test_case.expected_pattern_type;
+        let success = matched_pattern_type.as_ref() == Some(expected_type);
+
+        TestResult {
+            test_id,
+            success,
+            error_message: if success {
+                None
+            } else {
+                Some(format!(
+                    "Pattern type mismatch. Expected: {}, Got: {:?}",
+                    expected_type, matched_pattern_type
+                ))
+            },
+            actual_pattern_type: matched_pattern_type,
+            actual_tokens: matched_tokens,
+            execution_time_ms: execution_time,
+        }
+    }
+
+    // No conversion needed anymore; keys are already strings
+
+    fn flatten_token_data(
+        tokens: &HashMap<String, promql_utilities::ast_matching::TokenData>,
+    ) -> HashMap<String, Value> {
+        let mut result = HashMap::new();
+
+        for (token_name, token_data) in tokens {
+            // Extract only the relevant data from the token based on what's populated
+            if let Some(metric) = &token_data.metric {
+                let mut metric_data = serde_json::Map::new();
+                metric_data.insert("name".to_string(), Value::String(metric.name.clone()));
+                metric_data.insert(
+                    "labels".to_string(),
+                    serde_json::to_value(&metric.labels).unwrap_or(Value::Null),
+                );
+                metric_data.insert(
+                    "at".to_string(),
+                    if let Some(at) = metric.at_modifier {
+                        Value::Number(serde_json::Number::from(at))
+                    } else {
+                        Value::Null
+                    },
+                );
+                // Note: Skipping AST for now since it's not serializable
+                result.insert(token_name.clone(), Value::Object(metric_data));
+            } else if let Some(function) = &token_data.function {
+                let mut function_data = serde_json::Map::new();
+                function_data.insert("name".to_string(), Value::String(function.name.clone()));
+                let args_values: Vec<Value> = function
+                    .args
+                    .iter()
+                    .map(|arg| Value::String(arg.clone()))
+                    .collect();
+                function_data.insert("args".to_string(), Value::Array(args_values));
+                // Note: Skipping AST for now since it's not serializable
+                result.insert(token_name.clone(), Value::Object(function_data));
+            } else if let Some(aggregation) = &token_data.aggregation {
+                let mut aggregation_data = serde_json::Map::new();
+                aggregation_data.insert("op".to_string(), Value::String(aggregation.op.clone()));
+                aggregation_data.insert(
+                    "modifier".to_string(),
+                    if let Some(modifier) = &aggregation.modifier {
+                        serde_json::to_value(modifier).unwrap_or(Value::Null)
+                    } else {
+                        Value::Null
+                    },
+                );
+                aggregation_data.insert(
+                    "param".to_string(),
+                    if let Some(param) = &aggregation.param {
+                        Value::String(param.clone())
+                    } else {
+                        Value::Null
+                    },
+                );
+                // Note: Skipping AST for now since it's not serializable
+                result.insert(token_name.clone(), Value::Object(aggregation_data));
+            } else if let Some(range_vector) = &token_data.range_vector {
+                let mut range_data = serde_json::Map::new();
+                // Convert chrono Duration to human-readable format like Python's "0:05:00"
+                let total_seconds = range_vector.range.num_seconds() as u64;
+                let hours = total_seconds / 3600;
+                let minutes = (total_seconds % 3600) / 60;
+                let seconds = total_seconds % 60;
+                let range_str = format!("{}:{:02}:{:02}", hours, minutes, seconds);
+                range_data.insert("range".to_string(), Value::String(range_str));
+                // Note: Skipping AST for now since it's not serializable
+                result.insert(token_name.clone(), Value::Object(range_data));
+            } else if let Some(subquery) = &token_data.subquery {
+                let mut subquery_data = serde_json::Map::new();
+                // Convert chrono Duration to human-readable format like Python's "0:05:00"
+                let total_seconds = subquery.range.num_seconds() as u64;
+                let hours = total_seconds / 3600;
+                let minutes = (total_seconds % 3600) / 60;
+                let seconds = total_seconds % 60;
+                let range_str = format!("{}:{:02}:{:02}", hours, minutes, seconds);
+                subquery_data.insert("range".to_string(), Value::String(range_str));
+                if let Some(offset) = &subquery.offset {
+                    subquery_data.insert("offset".to_string(), Value::String(offset.clone()));
+                }
+                if let Some(step) = &subquery.step {
+                    subquery_data.insert("step".to_string(), Value::String(step.clone()));
+                }
+                // Note: Skipping AST for now since it's not serializable
+                result.insert(token_name.clone(), Value::Object(subquery_data));
+            } else if let Some(number) = &token_data.number {
+                let mut number_data = serde_json::Map::new();
+                number_data.insert(
+                    "value".to_string(),
+                    Value::Number(
+                        serde_json::Number::from_f64(number.value)
+                            .unwrap_or(serde_json::Number::from(0)),
+                    ),
+                );
+                result.insert(token_name.clone(), Value::Object(number_data));
+            }
+
+            // Handle special case for function_args (like Python does)
+            if token_name == "function_args" {
+                if let Some(function) = &token_data.function {
+                    let args_values: Vec<Value> = function
+                        .args
+                        .iter()
+                        .map(|arg| Value::String(arg.clone()))
+                        .collect();
+                    result.insert(token_name.clone(), Value::Array(args_values));
+                }
+            }
+        }
+
+        result
+    }
+
+    fn build_rate_pattern() -> Option<HashMap<String, Value>> {
+        let ms = PromQLPatternBuilder::matrix_selector(
+            PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+            None,
+            Some("range_vector"),
+        );
+
+        let args: Vec<Option<HashMap<String, Value>>> = vec![ms];
+
+        PromQLPatternBuilder::function(vec!["rate", "increase"], args, Some("function"), None)
+    }
+
+    fn build_quantile_over_time_pattern() -> Option<HashMap<String, Value>> {
+        let num = PromQLPatternBuilder::number(None, None);
+        let ms = PromQLPatternBuilder::matrix_selector(
+            PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+            None,
+            Some("range_vector"),
+        );
+
+        let args: Vec<Option<HashMap<String, Value>>> = vec![num, ms];
+
+        PromQLPatternBuilder::function(
+            vec!["quantile_over_time"],
+            args,
+            Some("function"),
+            Some("function_args"),
+        )
+    }
+
+    fn build_sum_pattern() -> Option<HashMap<String, Value>> {
+        PromQLPatternBuilder::aggregation(
+            vec!["sum", "count", "avg", "min", "max"],
+            PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+            None,
+            None,
+            None,
+            Some("aggregation"),
+        )
+    }
+
+    fn build_metric_pattern() -> Option<HashMap<String, Value>> {
+        PromQLPatternBuilder::metric(None, None, None, Some("metric"))
+    }
+
+    fn build_one_temporal_one_spatial_pattern() -> Option<HashMap<String, Value>> {
+        let ms = PromQLPatternBuilder::matrix_selector(
+            PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+            None,
+            Some("range_vector"),
+        );
+
+        let func_args: Vec<Option<HashMap<String, Value>>> = vec![ms];
+
+        let func = PromQLPatternBuilder::function(
+            vec![
+                "quantile_over_time",
+                "sum_over_time",
+                "count_over_time",
+                "avg_over_time",
+                "min_over_time",
+                "max_over_time",
+                "rate",
+                "increase",
+            ],
+            func_args,
+            Some("function"),
+            None,
+        );
+
+        PromQLPatternBuilder::aggregation(
+            vec!["sum", "count", "avg", "quantile", "min", "max"],
+            func,
+            None,
+            None,
+            None,
+            Some("aggregation"),
+        )
+    }
+
+    fn build_sum_rate_pattern() -> Option<HashMap<String, Value>> {
+        let ms = PromQLPatternBuilder::matrix_selector(
+            PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+            None,
+            Some("range_vector"),
+        );
+
+        let func_args: Vec<Option<HashMap<String, Value>>> = vec![ms];
+
+        let func = PromQLPatternBuilder::function(
+            vec!["rate", "increase"],
+            func_args,
+            Some("function"),
+            None,
+        );
+
+        PromQLPatternBuilder::aggregation(
+            vec!["sum", "count", "avg", "min", "max"],
+            func,
+            None,
+            None,
+            None,
+            Some("aggregation"),
+        )
+    }
+}
+
+impl Default for PatternTester {
+    fn default() -> Self {
+        Self::new()
+    }
+}
diff --git a/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/test_data.rs b/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/test_data.rs
new file mode 100644
index 0000000..dc86bf0
--- /dev/null
+++ b/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/test_data.rs
@@ -0,0 +1,87 @@
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TestData {
+    pub test_cases: Vec<TestCase>,
+    pub pattern_builder_tests: Vec<PatternBuilderTest>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TestCase {
+    pub id: String,
+    pub description: String,
+    pub query: String,
+    pub expected_pattern_type: String,
+    pub expected_tokens: HashMap<String, ExpectedToken>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum ExpectedToken {
+    Metric(MetricToken),
+    Function(FunctionToken),
+    Aggregation(AggregationToken),
+    RangeVector(RangeToken),
+    FunctionArgs(Vec<serde_json::Value>),
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MetricToken {
+    pub name: String,
+    pub labels: HashMap<String, String>,
+    pub at_modifier: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct FunctionToken {
+    pub name: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AggregationToken {
+    pub op: String,
+    pub modifier: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RangeToken {
+    pub range: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PatternBuilderTest {
+    pub id: String,
+    pub description: String,
+    pub builder_call: String,
+    pub parameters: serde_json::Value,
+    pub expected_pattern: serde_json::Value,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct TestResult {
+    pub test_id: String,
+    pub success: bool,
+    pub error_message: Option<String>,
+    pub actual_pattern_type: Option<String>,
+    pub actual_tokens: Option<serde_json::Value>,
+    pub execution_time_ms: f64,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct TestSuiteResult {
+    pub language: String,
+    pub timestamp: String,
+    pub total_tests: usize,
+    pub passed_tests: usize,
+    pub failed_tests: usize,
+    pub results: Vec<TestResult>,
+}
+
+impl TestData {
+    pub fn load_from_file(path: &str) -> Result<Self, Box<dyn std::error::Error>> {
+        let content = std::fs::read_to_string(path)?;
+        let test_data: TestData = serde_json::from_str(&content)?;
+        Ok(test_data)
+    }
+}
diff --git a/CommonDependencies/tests/compare_matched_tokens/utilities/master_test_runner.py b/CommonDependencies/tests/compare_matched_tokens/utilities/master_test_runner.py
new file mode 100755
index 0000000..43f5df2
--- /dev/null
+++ b/CommonDependencies/tests/compare_matched_tokens/utilities/master_test_runner.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import subprocess
+import json
+from pathlib import Path
+from datetime import datetime
+
+
+class MasterTestRunner:
+    def __init__(self, base_dir: str):
+        self.base_dir = Path(base_dir).resolve()
+        self.test_data_file = self.base_dir / "test_data" / "promql_queries.json"
+        self.python_dir = self.base_dir / "python_tests"
+        self.rust_dir = self.base_dir / "rust_tests"
+        self.comparison_dir = self.base_dir / "comparison_tests"
+
+    def run_all_tests(self):
+        """Run the complete test suite: Python, Rust, and comparison"""
+
+        print("🚀 Starting Cross-Language PromQL Pattern Testing")
+        print("=" * 60)
+
+        if not self.test_data_file.exists():
+            print(f"❌ Test data file not found: {self.test_data_file}")
+            return False
+
+        # Run Python tests
+        print("\n📍 Step 1: Running Python tests...")
+        python_success = self._run_python_tests()
+
+        # Run Rust tests
+        print("\n📍 Step 2: Running Rust tests...")
+        rust_success = self._run_rust_tests()
+
+        # Compare results
+        if python_success and rust_success:
+            print("\n📍 Step 3: Comparing results...")
+            self._compare_results()
+        else:
+            print("\n⚠️  Skipping comparison due to test failures")
+
+        print(f"\n✅ Test suite completed at {datetime.now()}")
+        return python_success and rust_success
+
+    def _run_python_tests(self) -> bool:
+        """Run Python test suite"""
+        try:
+            os.chdir(self.python_dir)
+
+            cmd = [sys.executable, "test_runner.py", str(self.test_data_file)]
+
+            print(f"Running: {' '.join(cmd)}")
+            result = subprocess.run(cmd, capture_output=True, text=True)
+
+            print("Python test output:")
+            print(result.stdout)
+            if result.stderr:
+                print("Python test errors:")
+                print(result.stderr)
+
+            return result.returncode == 0
+
+        except Exception as e:
+            print(f"❌ Error running Python tests: {e}")
+            return False
+        finally:
+            os.chdir(self.base_dir)
+
+    def _run_rust_tests(self) -> bool:
+        """Run Rust test suite"""
+        try:
+            os.chdir(self.rust_dir)
+
+            # Build the Rust project first
+            print("Building Rust test runner...")
+            build_result = subprocess.run(
+                ["cargo", "build", "--release"], capture_output=True, text=True
+            )
+
+            if build_result.returncode != 0:
+                print("❌ Rust build failed:")
+                print(build_result.stderr)
+                return False
+
+            # Run the tests
+            cmd = ["cargo", "run", "--release", "--", str(self.test_data_file)]
+
+            print(f"Running: {' '.join(cmd)}")
+            result = subprocess.run(cmd, capture_output=True, text=True)
+
+            print("Rust test output:")
+            print(result.stdout)
+            if result.stderr:
+                print("Rust test errors:")
+                print(result.stderr)
+
+            return result.returncode == 0
+
+        except Exception as e:
+            print(f"❌ Error running Rust tests: {e}")
+            return False
+        finally:
+            os.chdir(self.base_dir)
+
+    def _compare_results(self):
+        """Compare Python and Rust test results"""
+        try:
+            python_results = self.python_dir / "python_test_results.json"
+            rust_results = self.rust_dir / "rust_test_results.json"
+
+            if not python_results.exists():
+                print("❌ Python results file not found")
+                return
+
+            if not rust_results.exists():
+                print("❌ Rust results file not found")
+                return
+
+            os.chdir(self.comparison_dir)
+
+            cmd = [
+                sys.executable,
+                "result_comparator.py",
+                str(python_results),
+                str(rust_results),
+            ]
+
+            print(f"Running: {' '.join(cmd)}")
+            result = subprocess.run(cmd, capture_output=True, text=True)
+
+            print("Comparison output:")
+            print(result.stdout)
+            if result.stderr:
+                print("Comparison errors:")
+                print(result.stderr)
+
+        except Exception as e:
+            print(f"❌ Error comparing results: {e}")
+        finally:
+            os.chdir(self.base_dir)
+
+    def generate_test_summary(self):
+        """Generate a comprehensive test summary"""
+        summary = {
+            "timestamp": datetime.utcnow().isoformat(),
+            "test_data_file": str(self.test_data_file),
+            "files_generated": [],
+        }
+
+        # Collect generated files
+        for results_file in [
+            self.python_dir / "python_test_results.json",
+            self.rust_dir / "rust_test_results.json",
+            self.comparison_dir / "comparison_report.json",
+        ]:
+            if results_file.exists():
+                summary["files_generated"].append(str(results_file))
+
+        summary_file = self.base_dir / "test_summary.json"
+        with open(summary_file, "w") as f:
+            json.dump(summary, f, indent=2)
+
+        print(f"📊 Test summary written to: {summary_file}")
+
+
+def main():
+    script_dir = Path(__file__).parent.parent
+    runner = MasterTestRunner(str(script_dir))
+
+    success = runner.run_all_tests()
+    runner.generate_test_summary()
+
+    if success:
+        print("\n🎉 All tests completed successfully!")
+        sys.exit(0)
+    else:
+        print("\n💥 Some tests failed. Check the output above.")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/CommonDependencies/tests/compare_patterns/Cargo.lock b/CommonDependencies/tests/compare_patterns/Cargo.lock
new file mode 100644
index 0000000..920e610
--- /dev/null
+++ b/CommonDependencies/tests/compare_patterns/Cargo.lock
@@ -0,0 +1,795 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "android-tzdata"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.99"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100"
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "bincode"
+version = "1.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "bitflags"
+version = "2.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d"
+
+[[package]]
+name = "bumpalo"
+version = "3.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
+
+[[package]]
+name = "cactus"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acbc26382d871df4b7442e3df10a9402bf3cf5e55cbd66f12be38861425f0564"
+
+[[package]]
+name = "cc"
+version = "1.2.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc"
+dependencies = [
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
+
+[[package]]
+name = "cfgrammar"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf"
+dependencies = [
+ "indexmap",
+ "lazy_static",
+ "num-traits",
+ "regex",
+ "serde",
+ "vob",
+]
+
+[[package]]
+name = "chrono"
+version = "0.4.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
+dependencies = [
+ "android-tzdata",
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "serde",
+ "wasm-bindgen",
+ "windows-link",
+]
+
+[[package]]
+name = "compare_patterns_runner"
+version = "0.1.0"
+dependencies = [
+ "promql_utilities",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "deranged"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e"
+dependencies = [
+ "powerfmt",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "filetime"
+version = "0.2.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "libredox",
+ "windows-sys",
+]
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "getopts"
+version = "0.2.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cba6ae63eb948698e300f645f87c70f76630d505f23b8907cf1e193ee85048c1"
+dependencies = [
+ "unicode-width",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.63"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+
+[[package]]
+name = "js-sys"
+version = "0.3.77"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.175"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
+
+[[package]]
+name = "libredox"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3"
+dependencies = [
+ "bitflags",
+ "libc",
+ "redox_syscall",
+]
+
+[[package]]
+name = "log"
+version = "0.4.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
+
+[[package]]
+name = "lrlex"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c71364e868116ee891b0f93559eb9eca5675bec28b22d33c58481e66c3951d7e"
+dependencies = [
+ "cfgrammar",
+ "getopts",
+ "lazy_static",
+ "lrpar",
+ "num-traits",
+ "quote",
+ "regex",
+ "regex-syntax",
+ "serde",
+ "vergen",
+]
+
+[[package]]
+name = "lrpar"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51b265a81193d94c92d1c9c715498d6fa505bce3f789ceecb24ab5d6fa2dbc71"
+dependencies = [
+ "bincode",
+ "cactus",
+ "cfgrammar",
+ "filetime",
+ "indexmap",
+ "lazy_static",
+ "lrtable",
+ "num-traits",
+ "packedvec",
+ "regex",
+ "serde",
+ "static_assertions",
+ "vergen",
+ "vob",
+]
+
+[[package]]
+name = "lrtable"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc36d15214ca997a5097845be1f932b7ee6125c36f5c5e55f6c49e027ddeb6de"
+dependencies = [
+ "cfgrammar",
+ "fnv",
+ "num-traits",
+ "serde",
+ "sparsevec",
+ "vob",
+]
+
+[[package]]
+name = "memchr"
+version = "2.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
+
+[[package]]
+name = "num-conv"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num_threads"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "packedvec"
+version = "1.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a69e0a534dd2e6aefce319af62a0aa0066a76bdfcec0201dfe02df226bc9ec70"
+dependencies = [
+ "num-traits",
+ "serde",
+]
+
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "promql-parser"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60d851f6523a8215e2fbf86b6cef4548433f8b76092e9ffb607105de52ae63fd"
+dependencies = [
+ "cfgrammar",
+ "chrono",
+ "lazy_static",
+ "lrlex",
+ "lrpar",
+ "regex",
+]
+
+[[package]]
+name = "promql_utilities"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "promql-parser",
+ "serde",
+ "serde_json",
+ "thiserror",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "regex"
+version = "1.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "ryu"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+
+[[package]]
+name = "serde"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.143"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a"
+dependencies = [
+ "itoa",
+ "memchr",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "sparsevec"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68b4a8ce3045f0fe173fb5ae3c6b7dcfbec02bfa650bb8618b2301f52af0134d"
+dependencies = [
+ "num-traits",
+ "packedvec",
+ "serde",
+ "vob",
+]
+
+[[package]]
+name = "static_assertions"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
+[[package]]
+name = "syn"
+version = "2.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "time"
+version = "0.3.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40"
+dependencies = [
+ "deranged",
+ "itoa",
+ "libc",
+ "num-conv",
+ "num_threads",
+ "powerfmt",
+ "serde",
+ "time-core",
+ "time-macros",
+]
+
+[[package]]
+name = "time-core"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c"
+
+[[package]]
+name = "time-macros"
+version = "0.2.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49"
+dependencies = [
+ "num-conv",
+ "time-core",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
+
+[[package]]
+name = "unicode-width"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c"
+
+[[package]]
+name = "vergen"
+version = "8.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2990d9ea5967266ea0ccf413a4aa5c42a93dbcfda9cb49a97de6931726b12566"
+dependencies = [
+ "anyhow",
+ "rustversion",
+ "time",
+]
+
+[[package]]
+name = "vob"
+version = "3.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc936b5a7202a703aeaf7ce05e7931db2e0c8126813f97db3e9e06d867b0bb38"
+dependencies = [
+ "num-traits",
+ "serde",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
+dependencies = [
+ "bumpalo",
+ "log",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
+
+[[package]]
+name = "windows-result"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
diff --git a/CommonDependencies/tests/compare_patterns/Cargo.toml b/CommonDependencies/tests/compare_patterns/Cargo.toml
new file mode 100644
index 0000000..8d04a6f
--- /dev/null
+++ b/CommonDependencies/tests/compare_patterns/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "compare_patterns_runner"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+promql_utilities = { path = "../../dependencies/rs/promql_utilities" }
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
diff --git a/CommonDependencies/tests/compare_patterns/README.md b/CommonDependencies/tests/compare_patterns/README.md
new file mode 100644
index 0000000..2b16ff1
--- /dev/null
+++ b/CommonDependencies/tests/compare_patterns/README.md
@@ -0,0 +1,18 @@
+Compare patterns generated by Python and Rust implementations.
+
+Steps
+1. Generate Python patterns:
+
+   python3 tests/compare_patterns/python_generate_patterns.py
+
+2. Build and run the Rust generator. From repository root:
+
+   cargo run --manifest-path dependencies/rs/promql_utilities/Cargo.toml --bin tests/compare_patterns/rust_generate_patterns.rs
+
+   (Alternatively, compile the small program with `rustc`.)
+
+3. Compare:
+
+   python3 tests/compare_patterns/compare_serialized_patterns.py
+
+The comparator returns exit code 0 when patterns are equivalent.
diff --git a/CommonDependencies/tests/compare_patterns/compare_serialized_patterns.py b/CommonDependencies/tests/compare_patterns/compare_serialized_patterns.py
new file mode 100644
index 0000000..761962f
--- /dev/null
+++ b/CommonDependencies/tests/compare_patterns/compare_serialized_patterns.py
@@ -0,0 +1,56 @@
+"""Compare serialized pattern JSON files from Python and Rust generators.
+
+Exits with code 0 if equivalent, 1 otherwise.
+"""
+
+import json
+import os
+import sys
+
+
+def load(path):
+    with open(path, "r") as f:
+        return json.load(f)
+
+
+def normalize(value):
+    """Normalize pattern structures for comparison: sort keys in dicts and recursively apply."""
+    if isinstance(value, dict):
+        return {k: normalize(value[k]) for k in sorted(value.keys())}
+    if isinstance(value, list):
+        return [normalize(v) for v in value]
+    return value
+
+
+def main():
+    base = os.path.dirname(__file__)
+    out_dir = os.path.join(base, "out")
+    py_path = os.path.join(out_dir, "python_patterns.json")
+    rs_path = os.path.join(out_dir, "rust_patterns.json")
+
+    if not os.path.exists(py_path) or not os.path.exists(rs_path):
+        print(
+            "Missing generated pattern files. Run python_generate_patterns.py and rust generator."
+        )
+        sys.exit(2)
+
+    py = load(py_path)
+    rs = load(rs_path)
+
+    py_n = normalize(py)
+    rs_n = normalize(rs)
+
+    if py_n == rs_n:
+        print("Patterns match")
+        sys.exit(0)
+    else:
+        print("Patterns differ")
+        print("--- Python patterns ---")
+        print(json.dumps(py_n, indent=2))
+        print("--- Rust patterns ---")
+        print(json.dumps(rs_n, indent=2))
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/CommonDependencies/tests/compare_patterns/python_generate_patterns.py b/CommonDependencies/tests/compare_patterns/python_generate_patterns.py
new file mode 100644
index 0000000..3ca7bea
--- /dev/null
+++ b/CommonDependencies/tests/compare_patterns/python_generate_patterns.py
@@ -0,0 +1,110 @@
+"""Generate JSON-serialized patterns from Python builder.
+
+Writes to tests/compare_patterns/out/python_patterns.json
+"""
+
+import json
+import os
+import sys
+
+root = os.path.dirname(__file__)
+sys.path.append(
+    os.path.abspath(
+        os.path.join(root, "../../CommonDependencies/dependencies/py/promql_utilities")
+    )
+)
+
+from promql_utilities.ast_matching.PromQLPatternBuilder import PromQLPatternBuilder
+
+
+def build_all():
+    patterns = {}
+
+    temporal = [
+        PromQLPatternBuilder.function(
+            ["rate", "increase"],
+            PromQLPatternBuilder.matrix_selector(
+                PromQLPatternBuilder.metric(collect_as="metric"),
+                collect_as="range_vector",
+            ),
+            collect_as="function",
+        ),
+        PromQLPatternBuilder.function(
+            "quantile_over_time",
+            PromQLPatternBuilder.number(),
+            PromQLPatternBuilder.matrix_selector(
+                PromQLPatternBuilder.metric(collect_as="metric"),
+                collect_as="range_vector",
+            ),
+            collect_as="function",
+            collect_args_as="function_args",
+        ),
+    ]
+
+    spatial = [
+        PromQLPatternBuilder.aggregation(
+            ["sum", "count", "avg", "quantile", "min", "max"],
+            PromQLPatternBuilder.metric(collect_as="metric"),
+            collect_as="aggregation",
+        ),
+        PromQLPatternBuilder.metric(collect_as="metric"),
+    ]
+
+    combined = [
+        PromQLPatternBuilder.aggregation(
+            ["sum", "count", "avg", "quantile", "min", "max"],
+            PromQLPatternBuilder.function(
+                "quantile_over_time",
+                PromQLPatternBuilder.number(),
+                PromQLPatternBuilder.matrix_selector(
+                    PromQLPatternBuilder.metric(collect_as="metric"),
+                    collect_as="range_vector",
+                ),
+                collect_as="function",
+                collect_args_as="function_args",
+            ),
+            collect_as="aggregation",
+        ),
+        PromQLPatternBuilder.aggregation(
+            ["sum", "count", "avg", "quantile", "min", "max"],
+            PromQLPatternBuilder.function(
+                [
+                    "sum_over_time",
+                    "count_over_time",
+                    "avg_over_time",
+                    "min_over_time",
+                    "max_over_time",
+                    "rate",
+                    "increase",
+                ],
+                PromQLPatternBuilder.matrix_selector(
+                    PromQLPatternBuilder.metric(collect_as="metric"),
+                    collect_as="range_vector",
+                ),
+                collect_as="function",
+            ),
+            collect_as="aggregation",
+        ),
+    ]
+
+    patterns["ONLY_TEMPORAL"] = temporal
+    patterns["ONLY_SPATIAL"] = spatial
+    patterns["ONE_TEMPORAL_ONE_SPATIAL"] = combined
+
+    return patterns
+
+
+def main():
+    out_dir = os.path.join(os.path.dirname(__file__), "out")
+    os.makedirs(out_dir, exist_ok=True)
+    patterns = build_all()
+    out_path = os.path.join(out_dir, "python_patterns.json")
+    with open(out_path, "w") as f:
+        # sort by keys
+        sorted_patterns = {k: patterns[k] for k in sorted(patterns.keys())}
+        json.dump(sorted_patterns, f, indent=2)
+    print("Wrote", out_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/CommonDependencies/tests/compare_patterns/src/main.rs b/CommonDependencies/tests/compare_patterns/src/main.rs
new file mode 100644
index 0000000..6c429f1
--- /dev/null
+++ b/CommonDependencies/tests/compare_patterns/src/main.rs
@@ -0,0 +1,156 @@
+use promql_utilities::ast_matching::promql_pattern::TokenData;
+use promql_utilities::ast_matching::{PromQLPattern, PromQLPatternBuilder};
+use serde_json::json;
+use std::collections::HashMap;
+use std::fs;
+
+fn tokendata_to_json(_t: &TokenData) -> serde_json::Value {
+    // We only need the pattern ASTs themselves; tokens are runtime and can be skipped.
+    json!(null)
+}
+
+fn main() {
+    let mut out: HashMap<String, Vec<serde_json::Value>> = HashMap::new();
+
+    // ONLY_TEMPORAL patterns
+    let mut only_temporal_patterns = Vec::new();
+
+    // Pattern 1: rate/increase functions
+    let ms1 = PromQLPatternBuilder::matrix_selector(
+        PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+        None,
+        Some("range_vector"),
+    );
+    let func_args1: Vec<Option<std::collections::HashMap<String, serde_json::Value>>> = vec![ms1];
+    let pattern_1 = PromQLPatternBuilder::function(vec!["rate", "increase"], func_args1, Some("function"), None);
+    let pattern1 = PromQLPattern::new(
+        pattern_1,
+        vec!["metric".to_string(), "function".to_string(), "range_vector".to_string()],
+    );
+    if let Some(ast) = pattern1.ast_pattern {
+        only_temporal_patterns.push(serde_json::Value::Object(ast.into_iter().collect()));
+    }
+
+    // Pattern 2: quantile_over_time function
+    let ms2 = PromQLPatternBuilder::matrix_selector(
+        PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+        None,
+        Some("range_vector"),
+    );
+    let func_args2: Vec<Option<std::collections::HashMap<String, serde_json::Value>>> = vec![
+        PromQLPatternBuilder::number(None, None),
+        ms2,
+    ];
+    let pattern_2 = PromQLPatternBuilder::function(vec!["quantile_over_time"], func_args2, Some("function"), Some("function_args"));
+    let pattern2 = PromQLPattern::new(
+        pattern_2,
+        vec!["metric".to_string(), "function".to_string(), "range_vector".to_string(), "function_args".to_string()],
+    );
+    if let Some(ast) = pattern2.ast_pattern {
+        only_temporal_patterns.push(serde_json::Value::Object(ast.into_iter().collect()));
+    }
+
+    out.insert("ONLY_TEMPORAL".to_string(), only_temporal_patterns);
+
+    // ONLY_SPATIAL patterns
+    let mut only_spatial_patterns = Vec::new();
+
+    // Pattern 1: aggregation functions
+    let pattern_3 = PromQLPatternBuilder::aggregation(
+        vec!["sum", "count", "avg", "quantile", "min", "max"],
+        PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+        None,
+        None,
+        None,
+        Some("aggregation")
+    );
+    let pattern3 = PromQLPattern::new(
+        pattern_3,
+        vec!["metric".to_string(), "aggregation".to_string()],
+    );
+    if let Some(ast) = pattern3.ast_pattern {
+        only_spatial_patterns.push(serde_json::Value::Object(ast.into_iter().collect()));
+    }
+
+    // Pattern 2: basic metric
+    let pattern_4 = PromQLPatternBuilder::metric(None, None, None, Some("metric"));
+    let pattern4 = PromQLPattern::new(
+        pattern_4,
+        vec!["metric".to_string()],
+    );
+    if let Some(ast) = pattern4.ast_pattern {
+        only_spatial_patterns.push(serde_json::Value::Object(ast.into_iter().collect()));
+    }
+
+    out.insert("ONLY_SPATIAL".to_string(), only_spatial_patterns);
+
+    // ONE_TEMPORAL_ONE_SPATIAL patterns
+    let mut one_temporal_one_spatial_patterns = Vec::new();
+
+    // Pattern 1: aggregation of quantile_over_time
+    let ms3 = PromQLPatternBuilder::matrix_selector(
+        PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+        None,
+        Some("range_vector"),
+    );
+    let quantile_func_args: Vec<Option<std::collections::HashMap<String, serde_json::Value>>> = vec![
+        PromQLPatternBuilder::number(None, None),
+        ms3,
+    ];
+    let quantile_func = PromQLPatternBuilder::function(vec!["quantile_over_time"], quantile_func_args, Some("function"), Some("function_args"));
+    let pattern_5 = PromQLPatternBuilder::aggregation(
+        vec!["sum", "count", "avg", "quantile", "min", "max"],
+        quantile_func,
+        None,
+        None,
+        None,
+        Some("aggregation")
+    );
+    let pattern5 = PromQLPattern::new(
+        pattern_5,
+        vec!["metric".to_string(), "range_vector".to_string(), "function".to_string(), "function_args".to_string(), "aggregation".to_string()],
+    );
+    if let Some(ast) = pattern5.ast_pattern {
+        one_temporal_one_spatial_patterns.push(serde_json::Value::Object(ast.into_iter().collect()));
+    }
+
+    // Pattern 2: aggregation of various temporal functions
+    let ms4 = PromQLPatternBuilder::matrix_selector(
+        PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+        None,
+        Some("range_vector"),
+    );
+    let temporal_func_args: Vec<Option<std::collections::HashMap<String, serde_json::Value>>> = vec![ms4];
+    let temporal_func = PromQLPatternBuilder::function(
+        vec!["sum_over_time", "count_over_time", "avg_over_time", "min_over_time", "max_over_time", "rate", "increase"],
+        temporal_func_args,
+        Some("function"),
+        None
+    );
+    let pattern_6 = PromQLPatternBuilder::aggregation(
+        vec!["sum", "count", "avg", "quantile", "min", "max"],
+        temporal_func,
+        None,
+        None,
+        None,
+        Some("aggregation")
+    );
+    let pattern6 = PromQLPattern::new(
+        pattern_6,
+        vec!["metric".to_string(), "range_vector".to_string(), "function".to_string(), "aggregation".to_string()],
+    );
+    if let Some(ast) = pattern6.ast_pattern {
+        one_temporal_one_spatial_patterns.push(serde_json::Value::Object(ast.into_iter().collect()));
+    }
+
+    out.insert("ONE_TEMPORAL_ONE_SPATIAL".to_string(), one_temporal_one_spatial_patterns);
+
+    let out_dir = std::path::Path::new("./out");
+    std::fs::create_dir_all(out_dir).unwrap();
+    let out_path = out_dir.join("rust_patterns.json");
+    // sort by keys
+    let sorted: HashMap<_, _> = out.into_iter().collect();
+    let s = serde_json::to_string_pretty(&sorted).unwrap();
+    fs::write(&out_path, s).unwrap();
+    println!("Wrote {}", out_path.display());
+}
diff --git a/CommonDependencies/tests/rust_pattern_matching/Cargo.lock b/CommonDependencies/tests/rust_pattern_matching/Cargo.lock
new file mode 100644
index 0000000..a8c86c6
--- /dev/null
+++ b/CommonDependencies/tests/rust_pattern_matching/Cargo.lock
@@ -0,0 +1,1204 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "addr2line"
+version = "0.24.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
+dependencies = [
+ "gimli",
+]
+
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.99"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100"
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "backtrace"
+version = "0.3.75"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
+dependencies = [
+ "addr2line",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "bincode"
+version = "1.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "bitflags"
+version = "2.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
+
+[[package]]
+name = "bumpalo"
+version = "3.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
+
+[[package]]
+name = "bytes"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
+
+[[package]]
+name = "cactus"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acbc26382d871df4b7442e3df10a9402bf3cf5e55cbd66f12be38861425f0564"
+
+[[package]]
+name = "cc"
+version = "1.2.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "65193589c6404eb80b450d618eaf9a2cafaaafd57ecce47370519ef674a7bd44"
+dependencies = [
+ "find-msvc-tools",
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
+
+[[package]]
+name = "cfgrammar"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf"
+dependencies = [
+ "indexmap",
+ "lazy_static",
+ "num-traits",
+ "regex",
+ "serde",
+ "vob",
+]
+
+[[package]]
+name = "chrono"
+version = "0.4.42"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
+dependencies = [
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "serde",
+ "wasm-bindgen",
+ "windows-link 0.2.0",
+]
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "deranged"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d630bccd429a5bb5a64b5e94f693bfc48c9f8566418fda4c494cc94f911f87cc"
+dependencies = [
+ "powerfmt",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "filetime"
+version = "0.2.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "libredox",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "find-msvc-tools"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fd99930f64d146689264c637b5af2f0233a933bef0d8570e2526bf9e083192d"
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "getopts"
+version = "0.2.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
+dependencies = [
+ "unicode-width",
+]
+
+[[package]]
+name = "gimli"
+version = "0.31.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92119844f513ffa41556430369ab02c295a3578af21cf945caa3e9e0c2481ac3"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "io-uring"
+version = "0.7.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b"
+dependencies = [
+ "bitflags",
+ "cfg-if",
+ "libc",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+
+[[package]]
+name = "js-sys"
+version = "0.3.78"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c0b063578492ceec17683ef2f8c5e89121fbd0b172cbc280635ab7567db2738"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.175"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
+
+[[package]]
+name = "libredox"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb"
+dependencies = [
+ "bitflags",
+ "libc",
+ "redox_syscall",
+]
+
+[[package]]
+name = "lock_api"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
+dependencies = [
+ "autocfg",
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
+
+[[package]]
+name = "lrlex"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c71364e868116ee891b0f93559eb9eca5675bec28b22d33c58481e66c3951d7e"
+dependencies = [
+ "cfgrammar",
+ "getopts",
+ "lazy_static",
+ "lrpar",
+ "num-traits",
+ "quote",
+ "regex",
+ "regex-syntax",
+ "serde",
+ "vergen",
+]
+
+[[package]]
+name = "lrpar"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51b265a81193d94c92d1c9c715498d6fa505bce3f789ceecb24ab5d6fa2dbc71"
+dependencies = [
+ "bincode",
+ "cactus",
+ "cfgrammar",
+ "filetime",
+ "indexmap",
+ "lazy_static",
+ "lrtable",
+ "num-traits",
+ "packedvec",
+ "regex",
+ "serde",
+ "static_assertions",
+ "vergen",
+ "vob",
+]
+
+[[package]]
+name = "lrtable"
+version = "0.13.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc36d15214ca997a5097845be1f932b7ee6125c36f5c5e55f6c49e027ddeb6de"
+dependencies = [
+ "cfgrammar",
+ "fnv",
+ "num-traits",
+ "serde",
+ "sparsevec",
+ "vob",
+]
+
+[[package]]
+name = "matchers"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9"
+dependencies = [
+ "regex-automata",
+]
+
+[[package]]
+name = "memchr"
+version = "2.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+]
+
+[[package]]
+name = "mio"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "nu-ansi-term"
+version = "0.50.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399"
+dependencies = [
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "num-conv"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num_threads"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "object"
+version = "0.36.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "packedvec"
+version = "1.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a69e0a534dd2e6aefce319af62a0aa0066a76bdfcec0201dfe02df226bc9ec70"
+dependencies = [
+ "num-traits",
+ "serde",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "promql-parser"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60d851f6523a8215e2fbf86b6cef4548433f8b76092e9ffb607105de52ae63fd"
+dependencies = [
+ "cfgrammar",
+ "chrono",
+ "lazy_static",
+ "lrlex",
+ "lrpar",
+ "regex",
+]
+
+[[package]]
+name = "promql_cross_lang_tests"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "promql-parser",
+ "promql_utilities",
+ "serde",
+ "serde_json",
+ "tokio",
+ "tracing",
+ "tracing-subscriber",
+]
+
+[[package]]
+name = "promql_utilities"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "promql-parser",
+ "serde",
+ "serde_json",
+ "thiserror",
+ "tracing",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "regex"
+version = "1.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
+
+[[package]]
+name = "rustc-demangle"
+version = "0.1.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "ryu"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "serde"
+version = "1.0.224"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6aaeb1e94f53b16384af593c71e20b095e958dab1d26939c1b70645c5cfbcc0b"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.224"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32f39390fa6346e24defbcdd3d9544ba8a19985d0af74df8501fbfe9a64341ab"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.224"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87ff78ab5e8561c9a675bfc1785cb07ae721f0ee53329a595cefd8c04c2ac4e0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.145"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
+dependencies = [
+ "itoa",
+ "memchr",
+ "ryu",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "sharded-slab"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
+dependencies = [
+ "lazy_static",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "slab"
+version = "0.4.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "socket2"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807"
+dependencies = [
+ "libc",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "sparsevec"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68b4a8ce3045f0fe173fb5ae3c6b7dcfbec02bfa650bb8618b2301f52af0134d"
+dependencies = [
+ "num-traits",
+ "packedvec",
+ "serde",
+ "vob",
+]
+
+[[package]]
+name = "static_assertions"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
+[[package]]
+name = "syn"
+version = "2.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "thread_local"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "time"
+version = "0.3.43"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83bde6f1ec10e72d583d91623c939f623002284ef622b87de38cfd546cbf2031"
+dependencies = [
+ "deranged",
+ "libc",
+ "num-conv",
+ "num_threads",
+ "powerfmt",
+ "serde",
+ "time-core",
+ "time-macros",
+]
+
+[[package]]
+name = "time-core"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b"
+
+[[package]]
+name = "time-macros"
+version = "0.2.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3"
+dependencies = [
+ "num-conv",
+ "time-core",
+]
+
+[[package]]
+name = "tokio"
+version = "1.47.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038"
+dependencies = [
+ "backtrace",
+ "bytes",
+ "io-uring",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "slab",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tracing"
+version = "0.1.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
+dependencies = [
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
+dependencies = [
+ "once_cell",
+ "valuable",
+]
+
+[[package]]
+name = "tracing-log"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
+dependencies = [
+ "log",
+ "once_cell",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-subscriber"
+version = "0.3.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5"
+dependencies = [
+ "matchers",
+ "nu-ansi-term",
+ "once_cell",
+ "regex-automata",
+ "sharded-slab",
+ "smallvec",
+ "thread_local",
+ "tracing",
+ "tracing-core",
+ "tracing-log",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
+
+[[package]]
+name = "unicode-width"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c"
+
+[[package]]
+name = "valuable"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
+
+[[package]]
+name = "vergen"
+version = "8.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2990d9ea5967266ea0ccf413a4aa5c42a93dbcfda9cb49a97de6931726b12566"
+dependencies = [
+ "anyhow",
+ "rustversion",
+ "time",
+]
+
+[[package]]
+name = "vob"
+version = "3.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc936b5a7202a703aeaf7ce05e7931db2e0c8126813f97db3e9e06d867b0bb38"
+dependencies = [
+ "num-traits",
+ "serde",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e14915cadd45b529bb8d1f343c4ed0ac1de926144b746e2710f9cd05df6603b"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e28d1ba982ca7923fd01448d5c30c6864d0a14109560296a162f80f305fb93bb"
+dependencies = [
+ "bumpalo",
+ "log",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c3d463ae3eff775b0c45df9da45d68837702ac35af998361e2c84e7c5ec1b0d"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7bb4ce89b08211f923caf51d527662b75bdc9c9c7aab40f86dcb9fb85ac552aa"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f143854a3b13752c6950862c906306adb27c7e839f7414cec8fea35beab624c1"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.62.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57fe7168f7de578d2d8a05b07fd61870d2e73b4020e9f49aa00da8471723497c"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link 0.2.0",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
+
+[[package]]
+name = "windows-link"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65"
+
+[[package]]
+name = "windows-result"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f"
+dependencies = [
+ "windows-link 0.2.0",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda"
+dependencies = [
+ "windows-link 0.2.0",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.3",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91"
+dependencies = [
+ "windows-link 0.1.3",
+ "windows_aarch64_gnullvm 0.53.0",
+ "windows_aarch64_msvc 0.53.0",
+ "windows_i686_gnu 0.53.0",
+ "windows_i686_gnullvm 0.53.0",
+ "windows_i686_msvc 0.53.0",
+ "windows_x86_64_gnu 0.53.0",
+ "windows_x86_64_gnullvm 0.53.0",
+ "windows_x86_64_msvc 0.53.0",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
diff --git a/CommonDependencies/tests/rust_pattern_matching/Cargo.toml b/CommonDependencies/tests/rust_pattern_matching/Cargo.toml
new file mode 100644
index 0000000..6712b6b
--- /dev/null
+++ b/CommonDependencies/tests/rust_pattern_matching/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "promql_cross_lang_tests"
+version = "0.1.0"
+edition = "2021"
+
+[[bin]]
+name = "test_runner"
+path = "src/main.rs"
+
+[dependencies]
+promql-parser = "0.5.0"
+serde_json = "1.0"
+serde = { version = "1.0", features = ["derive"] }
+tokio = { version = "1.0", features = ["full"] }
+chrono = "0.4.41"
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+
+[dependencies.promql_utilities]
+path = "../../../CommonDependencies/dependencies/rs/promql_utilities"
diff --git a/CommonDependencies/tests/rust_pattern_matching/src/main.rs b/CommonDependencies/tests/rust_pattern_matching/src/main.rs
new file mode 100644
index 0000000..3a540af
--- /dev/null
+++ b/CommonDependencies/tests/rust_pattern_matching/src/main.rs
@@ -0,0 +1,168 @@
+use promql_utilities::ast_matching::{PromQLPattern, PromQLPatternBuilder};
+use promql_utilities::query_logics::enums::QueryPatternType;
+use serde_json::Value;
+use std::collections::HashMap;
+
+// Helper functions (these would be closures or separate methods)
+fn temporal_pattern(
+    pattern_type: &str,
+    blocks: &HashMap<String, Option<HashMap<String, Value>>>,
+) -> PromQLPattern {
+    PromQLPattern::new(
+        blocks[pattern_type].clone(),
+        vec![
+            "metric".to_string(),
+            "function".to_string(),
+            "range_vector".to_string(),
+        ],
+    )
+}
+
+fn spatial_pattern(
+    pattern_type: &str,
+    blocks: &HashMap<String, Option<HashMap<String, Value>>>,
+) -> PromQLPattern {
+    PromQLPattern::new(
+        blocks[pattern_type].clone(),
+        vec!["metric".to_string(), "aggregation".to_string()],
+    )
+}
+
+fn spatial_of_temporal_pattern(temporal_block: &Option<HashMap<String, Value>>) -> PromQLPattern {
+    let pattern = PromQLPatternBuilder::aggregation(
+        vec!["sum", "count", "avg", "quantile", "min", "max"],
+        temporal_block.clone(),
+        None,
+        None,
+        None,
+        Some("aggregation"),
+    );
+    PromQLPattern::new(
+        pattern,
+        vec![
+            "metric".to_string(),
+            "function".to_string(),
+            "range_vector".to_string(),
+            "aggregation".to_string(),
+        ],
+    )
+}
+
+fn main() {
+    let mut temporal_pattern_blocks = HashMap::new();
+    temporal_pattern_blocks.insert(
+        "quantile".to_string(),
+        PromQLPatternBuilder::function(
+            vec!["quantile_over_time"],
+            vec![
+                PromQLPatternBuilder::number(None, Some("quantile_param")),
+                PromQLPatternBuilder::matrix_selector(
+                    PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+                    None,
+                    Some("range_vector"),
+                ),
+            ],
+            Some("function"),
+            Some("function_args"),
+        ),
+    );
+
+    temporal_pattern_blocks.insert(
+        "generic".to_string(),
+        PromQLPatternBuilder::function(
+            vec![
+                "sum_over_time",
+                "count_over_time",
+                "avg_over_time",
+                "min_over_time",
+                "max_over_time",
+                "increase",
+                "rate",
+            ],
+            vec![PromQLPatternBuilder::matrix_selector(
+                PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+                None,
+                Some("range_vector"),
+            )],
+            Some("function"),
+            Some("function_args"),
+        ),
+    );
+
+    // Create spatial pattern blocks
+    let mut spatial_pattern_blocks = HashMap::new();
+    spatial_pattern_blocks.insert(
+        "generic".to_string(),
+        PromQLPatternBuilder::aggregation(
+            vec!["sum", "count", "avg", "quantile", "min", "max"],
+            PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+            None,
+            None,
+            None,
+            Some("aggregation"),
+        ),
+    );
+
+    // Create controller patterns
+    let mut controller_patterns = HashMap::new();
+    controller_patterns.insert(
+        QueryPatternType::OnlyTemporal,
+        vec![
+            temporal_pattern("quantile", &temporal_pattern_blocks),
+            temporal_pattern("generic", &temporal_pattern_blocks),
+        ],
+    );
+    controller_patterns.insert(
+        QueryPatternType::OnlySpatial,
+        vec![spatial_pattern("generic", &spatial_pattern_blocks)],
+    );
+    controller_patterns.insert(
+        QueryPatternType::OneTemporalOneSpatial,
+        vec![
+            spatial_of_temporal_pattern(&temporal_pattern_blocks["quantile"]),
+            spatial_of_temporal_pattern(&temporal_pattern_blocks["generic"]),
+        ],
+    );
+
+    let queries = vec![
+        // "sum_over_time(fake_metric_total[1m])",
+        // "count_over_time(fake_metric_total[1m])",
+        // "quantile_over_time(0.95, fake_metric_total[1m])",
+        // "sum by (instance, job) (fake_metric_total)",
+        // "count without (instance) (fake_metric_total)",
+        // "quantile by (instance) (0.95, fake_metric_total)",
+        // "sum by (instance, job) (rate(fake_metric_total[1m]))",
+        "sum by (instance, job) (sum_over_time(fake_metric_total[1m]))",
+        "sum by (instance, job) (count_over_time(fake_metric_total[1m]))",
+    ];
+
+    for query in queries {
+        let ast = match promql_parser::parser::parse(&query) {
+            Ok(parsed) => parsed,
+            Err(e) => {
+                eprintln!("Failed to parse query '{}': {}", query, e);
+                continue;
+            }
+        };
+
+        let mut found_match = None;
+        for (pattern_type, patterns) in &controller_patterns {
+            for pattern in patterns {
+                // println!(
+                //     "Trying pattern type: {:?} for query: {}",
+                //     pattern_type, query
+                // );
+                let match_result = pattern.matches(&ast);
+                if match_result.matches {
+                    println!("Query: {}; Pattern: {:?}", query, pattern_type);
+                    println!("Match result: {:?}", match_result);
+                    found_match = Some((*pattern_type, match_result));
+                    break;
+                }
+            }
+            if found_match.is_some() {
+                break;
+            }
+        }
+    }
+}
diff --git a/Controller/.gitignore b/Controller/.gitignore
new file mode 100644
index 0000000..35758b0
--- /dev/null
+++ b/Controller/.gitignore
@@ -0,0 +1,3 @@
+**/*.pyc
+**/__pycache__
+.DS_store
diff --git a/Controller/Dockerfile b/Controller/Dockerfile
new file mode 100644
index 0000000..7b66ff0
--- /dev/null
+++ b/Controller/Dockerfile
@@ -0,0 +1,24 @@
+FROM sketchdb-base:latest
+
+LABEL maintainer="SketchDB Team"
+LABEL description="Main Controller for SketchDB"
+
+# Set working directory
+WORKDIR /app
+
+# Copy requirements first for better layer caching
+COPY requirements.txt .
+
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY classes/ ./classes/
+COPY utils/ ./utils/
+COPY main_controller.py .
+
+# Create directories for input/output
+RUN mkdir -p /app/input /app/output
+
+# Set the entry point
+ENTRYPOINT ["python", "main_controller.py"]
\ No newline at end of file
diff --git a/Controller/LICENSE b/Controller/LICENSE
new file mode 100644
index 0000000..404d657
--- /dev/null
+++ b/Controller/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 SketchDB
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Controller/README.md b/Controller/README.md
new file mode 100644
index 0000000..8be2f8b
--- /dev/null
+++ b/Controller/README.md
@@ -0,0 +1,124 @@
+# Controller
+
+The Controller is ASAP's auto-configuration service that determines optimal sketch parameters based on query workload and SLAs.
+
+## Purpose
+
+Given a workload of PromQL queries, the Controller:
+1. Analyzes each query to determine which sketch algorithm to use
+2. Computes sketch parameters (size, accuracy) based on SLAs
+3. Generates `streaming_config.yaml` for ArroyoSketch
+4. Generates `inference_config.yaml` for QueryEngine
+
+This automation eliminates manual configuration and ensures sketches meet performance targets.
+
+## How It Works
+
+### Input: controller-config.yaml
+
+The user provides a configuration file describing:
+- **Queries** to accelerate
+- **Metrics** metadata (labels, cardinality estimates)
+- **SLAs** (accuracy, latency targets) (**CURRENTLY IGNORED**)
+
+**Example:**
+```yaml
+query_groups:
+  - id: 1
+    queries:
+      - "quantile by (job) (0.99, http_request_duration_seconds)"
+      - "sum by (job) (rate(http_requests_total[5m]))"
+    client_options:
+      repetitions: 10
+      starting_delay: 60
+    controller_options:
+      accuracy_sla: 0.99  # 99% accuracy
+      latency_sla: 1.0    # 1 second max latency
+
+metrics:
+  - metric: "http_request_duration_seconds"
+    labels: ["job", "instance", "method", "status"]
+    cardinality:
+      job: 10
+      instance: 100
+      method: 5
+      status: 4
+  - metric: "http_requests_total"
+    labels: ["job", "instance", "method", "status"]
+```
+
+### Process: Analyze and Configure
+
+1. **Parse queries** (`utils/parse_query.py`)
+   - Extract query type (quantile, sum, avg, etc.)
+   - Identify aggregation labels
+   - Determine time range
+
+2. **Select sketch algorithm** (`utils/logics.py::decide_sketch_type()`)
+   - Quantile queries → DDSketch or KLL
+   - Sum/count queries → Simple aggregation
+   - Consider query patterns and SLAs
+
+3. **Compute sketch parameters** (`utils/logics.py`)
+   - Calculate sketch size based on accuracy SLA
+   - Determine merge strategy for aggregations
+   - Set up windowing parameters
+
+4. **Generate configs**
+   - `streaming_config.yaml` → Describes which sketches to build
+   - `inference_config.yaml` → Describes how to query sketches
+
+### Output Files
+
+**streaming_config.yaml** (for ArroyoSketch):
+```yaml
+sketches:
+  - metric: "http_request_duration_seconds"
+    sketch_type: "ddsketch"
+    parameters:
+      alpha: 0.01  # 1% relative error
+      max_num_bins: 2048
+    aggregation:
+      - "job"
+    window: "1h"
+```
+
+**inference_config.yaml** (for QueryEngine):
+```yaml
+sketches:
+  - metric: "http_request_duration_seconds"
+    sketch_type: "ddsketch"
+    labels: ["job"]
+    kafka_topic: "sketches"
+```
+
+## Key Files
+
+**TODO**
+
+## Configuration Schema
+
+### controller-config.yaml
+
+```yaml
+query_groups:
+  - id: <int>                          # Unique group ID
+    queries:                           # List of PromQL queries
+      - "<promql_query>"
+    client_options:                    # Query execution options
+      repetitions: <int>               # How many times to run
+      starting_delay: <int>            # Delay before first run (seconds)
+      repetition_delay: <int>          # Delay between runs (seconds)
+      query_time_offset: <int>         # Time offset for queries (seconds)
+    controller_options:
+      accuracy_sla: <float>            # 0.0-1.0 (default: 0.99)
+      latency_sla: <float>             # Seconds (default: 1.0)
+      sketch_type: <str>               # Optional: force specific sketch
+      custom_sketch_params: <dict>     # Optional: override params
+
+metrics:
+  - metric: "<metric_name>"            # Prometheus metric name
+    labels: [<label_names>]            # List of label names
+    cardinality:                       # Optional: estimated cardinalities
+      <label_name>: <int>
+```
diff --git a/Controller/classes/SingleQueryConfig.py b/Controller/classes/SingleQueryConfig.py
new file mode 100644
index 0000000..5638f10
--- /dev/null
+++ b/Controller/classes/SingleQueryConfig.py
@@ -0,0 +1,544 @@
+import copy
+from loguru import logger
+
+import promql_parser
+from typing import Optional, Tuple, List
+
+from promql_utilities.ast_matching.PromQLPattern import PromQLPattern, MatchResult
+from promql_utilities.ast_matching.PromQLPatternBuilder import PromQLPatternBuilder
+from promql_utilities.query_logics.enums import (
+    QueryPatternType,
+    QueryTreatmentType,
+    CleanupPolicy,
+)
+from promql_utilities.query_logics.logics import (
+    get_is_collapsable,
+    map_statistic_to_precompute_operator,
+)
+from promql_utilities.query_logics.parsing import (
+    get_metric_and_spatial_filter,
+    get_statistics_to_compute,
+)
+from promql_utilities.query_logics.parsing import get_spatial_aggregation_output_labels
+from promql_utilities.data_model.KeyByLabelNames import KeyByLabelNames
+
+from promql_utilities.streaming_config.StreamingAggregationConfig import (
+    StreamingAggregationConfig,
+)
+from utils import logics
+
+# import utils.promql
+
+from promql_utilities.streaming_config.MetricConfig import MetricConfig
+
+
+class SingleQueryConfig:
+    def __init__(
+        self,
+        config: dict,
+        metric_config: MetricConfig,
+        prometheus_scrape_interval: int,
+        streaming_engine: str,
+        sketch_parameters: dict,
+    ):
+        self.config = config
+        self.query = config["query"]
+        self.query_ast = promql_parser.parse(self.query)
+        self.t_repeat = int(config["t_repeat"])
+        self.prometheus_scrape_interval = prometheus_scrape_interval
+        self.__dict__.update(config["options"])
+        # self.accuracy_sla = float(config["accuracy_sla"])
+        # self.latency_sla = float(config["latency_sla"])
+        self.metric_config = metric_config
+        self.streaming_engine = streaming_engine
+        self.sketch_parameters = sketch_parameters
+        self.range_duration = config["range_duration"]
+        self.step = config["step"]
+
+        self.patterns = {
+            QueryPatternType.ONLY_TEMPORAL: [
+                PromQLPattern(
+                    PromQLPatternBuilder.function(
+                        "quantile_over_time",
+                        PromQLPatternBuilder.number(),
+                        PromQLPatternBuilder.matrix_selector(
+                            PromQLPatternBuilder.metric(collect_as="metric"),
+                            collect_as="range_vector",
+                        ),
+                        collect_as="function",
+                        collect_args_as="function_args",
+                    )
+                ),
+                PromQLPattern(
+                    PromQLPatternBuilder.function(
+                        [
+                            "sum_over_time",
+                            "count_over_time",
+                            "avg_over_time",
+                            "min_over_time",
+                            "max_over_time",
+                            # "stddev_over_time",
+                            # "stdvar_over_time",
+                            "increase",
+                            "rate",
+                        ],
+                        PromQLPatternBuilder.matrix_selector(
+                            PromQLPatternBuilder.metric(collect_as="metric"),
+                            collect_as="range_vector",
+                        ),
+                        collect_as="function",
+                        collect_args_as="function_args",
+                    )
+                ),
+            ],
+            # TODO: add topk/bottomk
+            QueryPatternType.ONLY_SPATIAL: [
+                PromQLPattern(
+                    PromQLPatternBuilder.aggregation(
+                        [
+                            "sum",
+                            "count",
+                            "avg",
+                            "quantile",
+                            "min",
+                            "max",
+                            "topk",
+                            # "stddev",
+                            # "stdvar",
+                        ],
+                        PromQLPatternBuilder.metric(collect_as="metric"),
+                        collect_as="aggregation",
+                    )
+                )
+            ],
+            # TODO: need some way of specifying pattern using an existing pattern
+            QueryPatternType.ONE_TEMPORAL_ONE_SPATIAL: [
+                PromQLPattern(
+                    PromQLPatternBuilder.aggregation(
+                        [
+                            "sum",
+                            "count",
+                            "avg",
+                            "quantile",
+                            "min",
+                            "max",
+                            # "stddev",
+                            # "stdvar",
+                        ],
+                        PromQLPatternBuilder.function(
+                            "quantile_over_time",
+                            PromQLPatternBuilder.number(),
+                            PromQLPatternBuilder.matrix_selector(
+                                PromQLPatternBuilder.metric(collect_as="metric"),
+                                collect_as="range_vector",
+                            ),
+                            collect_as="function",
+                            collect_args_as="function_args",
+                        ),
+                        collect_as="aggregation",
+                    )
+                ),
+                PromQLPattern(
+                    PromQLPatternBuilder.aggregation(
+                        [
+                            "sum",
+                            "count",
+                            "avg",
+                            "quantile",
+                            "min",
+                            "max",
+                            # "stddev",
+                            # "stdvar",
+                        ],
+                        PromQLPatternBuilder.function(
+                            [
+                                "sum_over_time",
+                                "count_over_time",
+                                "avg_over_time",
+                                "min_over_time",
+                                "max_over_time",
+                                # "stddev_over_time",
+                                # "stdvar_over_time",
+                                "increase",
+                                "rate",
+                            ],
+                            PromQLPatternBuilder.matrix_selector(
+                                PromQLPatternBuilder.metric(collect_as="metric"),
+                                collect_as="range_vector",
+                            ),
+                            collect_as="function",
+                            collect_args_as="function_args",
+                        ),
+                        collect_as="aggregation",
+                    )
+                ),
+            ],
+        }
+
+        self.query_pattern_type = None
+        self.query_pattern_match = None
+        self.query_treatment_type = None
+
+        self.process_query()
+
+    def process_query(self):
+        query_pattern_type, match = self.match_query_pattern()
+
+        if query_pattern_type and match:
+            self.query_pattern_type = query_pattern_type
+            self.query_pattern_match = match
+            self.query_treatment_type = self.get_query_treatment_type()
+            logger.debug("Query treatment type: {}", self.query_treatment_type)
+        else:
+            # self.logger.warning("Query pattern not supported: %s", self.query)
+            logger.warning("Query pattern not supported: {}", self.query)
+
+    def should_be_performant(self) -> bool:
+        if self.query_pattern_type == QueryPatternType.ONLY_TEMPORAL:
+            # Check quantile_over_time, rate, increase
+            # Calculate number of data points per key
+            function_name = self.query_pattern_match.tokens["function"]["name"]
+            if function_name in ["rate", "increase", "quantile_over_time"]:
+                num_data_points_per_tumbling_window = (
+                    self.t_repeat / self.prometheus_scrape_interval
+                )
+                range_duration = int(
+                    self.query_pattern_match.tokens["range_vector"][
+                        "range"
+                    ].total_seconds()
+                )
+                if num_data_points_per_tumbling_window < 60:
+                    logger.info(
+                        "[Performance Check Failed] num_data_points_per_tumbling_window {} < 60",
+                        num_data_points_per_tumbling_window,
+                    )
+                    return False
+                # bound time for merging for quantile_over_time
+                if function_name == "quantile_over_time":
+                    if range_duration / self.t_repeat > 15:
+                        logger.info(
+                            "[Performance Check Failed] range_duration / t_repeat {} > 15",
+                            range_duration / self.t_repeat,
+                        )
+                        return False
+            return True
+        elif self.query_pattern_type == QueryPatternType.ONLY_SPATIAL:
+            return True
+        elif self.query_pattern_type == QueryPatternType.ONE_TEMPORAL_ONE_SPATIAL:
+            # TODO: might need to add checks here
+            return True
+        else:
+            return True
+
+    def is_supported(self) -> bool:
+        return (
+            self.query_pattern_type is not None and self.query_pattern_match is not None
+        )
+
+    def match_query_pattern(
+        self,
+    ) -> Tuple[Optional[QueryPatternType], Optional[MatchResult]]:
+        for pattern_type, patterns in self.patterns.items():
+            for pattern in patterns:
+                match = pattern.matches(self.query_ast, debug=False)
+                if match:
+                    logger.debug("Matched pattern: {}", pattern_type)
+                    return pattern_type, match
+        return None, None
+
+    def get_query_treatment_type(self):
+        assert self.query_pattern_type and self.query_pattern_match
+
+        if (
+            self.query_pattern_type == QueryPatternType.ONLY_TEMPORAL
+            or self.query_pattern_type == QueryPatternType.ONE_TEMPORAL_ONE_SPATIAL
+        ):
+            if self.query_pattern_match.tokens["function"]["name"] in [
+                "quantile_over_time",
+                "sum_over_time",
+                "count_over_time",
+                "avg_over_time",
+            ]:
+                return QueryTreatmentType.APPROXIMATE
+            else:
+                return QueryTreatmentType.EXACT
+        elif self.query_pattern_type == QueryPatternType.ONLY_SPATIAL:
+            if self.query_pattern_match.tokens["aggregation"]["op"] in [
+                "quantile",
+                "sum",
+                "count",
+                "avg",
+                "topk",
+            ]:
+                return QueryTreatmentType.APPROXIMATE
+            else:
+                return QueryTreatmentType.EXACT
+        else:
+            raise ValueError("Invalid query pattern type")
+
+    def get_streaming_aggregation_configs(
+        self,
+    ) -> Tuple[List[StreamingAggregationConfig], int]:
+        assert (
+            self.query_pattern_type
+            and self.query_pattern_match
+            and self.query_treatment_type
+        )
+
+        template_config = StreamingAggregationConfig()
+        template_config.aggregationId = -1
+        # template_config.metric = self.query_pattern_match.tokens["metric"]["name"]
+
+        # setting spatial filter
+        # if self.query_pattern_match.tokens["metric"]["labels"].matchers:
+        #     template_config.spatialFilter = (
+        #         self.query_pattern_match.tokens["metric"]["ast"]
+        #         .prettify()
+        #         .split("{")[1]
+        #         .split("}")[0]
+        #     )
+        #     template_config.metric = template_config.metric.split("{")[0]
+        # else:
+        #     template_config.spatialFilter = ""
+
+        template_config.metric, template_config.spatialFilter = (
+            get_metric_and_spatial_filter(self.query_pattern_match)
+        )
+
+        statistics_to_compute = get_statistics_to_compute(
+            self.query_pattern_type, self.query_pattern_match
+        )
+
+        # if (
+        #     self.query_pattern_type == QueryPatternType.ONLY_TEMPORAL
+        #     or self.query_pattern_type == QueryPatternType.ONE_TEMPORAL_ONE_SPATIAL
+        # ):
+        #     statistic_to_compute = self.query_pattern_match.tokens["function"][
+        #         "name"
+        #     ].split("_")[0]
+        #     template_config.tumblingWindowSize = self.t_repeat
+        # elif self.query_pattern_type == QueryPatternType.ONLY_SPATIAL:
+        #     statistic_to_compute = self.query_pattern_match.tokens["aggregation"]["op"]
+        #     template_config.tumblingWindowSize = self.prometheus_scrape_interval
+        # else:
+        #     raise ValueError("Invalid query pattern type")
+
+        configs = []
+
+        for statistic_to_compute in statistics_to_compute:
+
+            aggregation_type, aggregation_sub_type = (
+                map_statistic_to_precompute_operator(
+                    statistic_to_compute, self.query_treatment_type
+                )
+            )
+
+            # NEW: Set window parameters (auto-decides sliding vs tumbling based on query type)
+            # Issue #236: Sliding windows for ONLY_TEMPORAL queries (except DeltaSetAggregator)
+            # Issue #329: For range queries, use min(t_repeat, step) as effective repeat interval
+            logics.set_window_parameters(
+                self.query_pattern_type,
+                self.query_pattern_match,
+                self.t_repeat,
+                self.prometheus_scrape_interval,
+                aggregation_type,
+                template_config,
+                self.step,
+            )
+
+            # for aggregation_type, aggregation_sub_type in list_of_precompute_operators:
+
+            all_labels = self.metric_config.config[template_config.metric]
+
+            if self.query_pattern_type == QueryPatternType.ONLY_TEMPORAL:
+                template_config.labels["rollup"] = KeyByLabelNames([])
+
+                logics.set_subpopulation_labels(
+                    statistic_to_compute, aggregation_type, all_labels, template_config
+                )
+
+                # if logics.does_precompute_operator_support_subpopulations(
+                #     statistic_to_compute, aggregation_type
+                # ):
+                #     template_config.labels["grouping"] = KeyByLabelNames([])
+                #     template_config.labels["aggregated"] = copy.deepcopy(
+                #         self.metric_config.config[template_config.metric]
+                #     )
+                # else:
+                #     template_config.labels["grouping"] = copy.deepcopy(
+                #         self.metric_config.config[template_config.metric]
+                #     )
+                #     template_config.labels["aggregated"] = KeyByLabelNames([])
+
+            elif self.query_pattern_type == QueryPatternType.ONLY_SPATIAL:
+                # aggregation_modifier = self.query_pattern_match.tokens["aggregation"][
+                #     "modifier"
+                # ]
+                # aggregation_modifier_labels = None
+                # if aggregation_modifier.type == aggregation_modifier.type.By:
+                #     aggregation_modifier_labels = KeyByLabelNames(
+                #         aggregation_modifier.labels
+                #     )
+                # elif aggregation_modifier.type == aggregation_modifier.type.Without:
+                #     aggregation_modifier_labels = self.metric_config.config[
+                #         template_config.metric
+                #     ] - KeyByLabelNames(aggregation_modifier.labels)
+                # else:
+                #     raise ValueError("Invalid aggregation modifier")
+
+                spatial_aggregation_output_labels = (
+                    get_spatial_aggregation_output_labels(
+                        self.query_pattern_match, all_labels
+                    )
+                )
+
+                template_config.labels["rollup"] = (
+                    all_labels - spatial_aggregation_output_labels
+                )
+
+                logics.set_subpopulation_labels(
+                    statistic_to_compute,
+                    aggregation_type,
+                    spatial_aggregation_output_labels,
+                    template_config,
+                )
+
+                # if logics.does_precompute_operator_support_subpopulations(
+                #     statistic_to_compute, aggregation_type
+                # ):
+                #     template_config.labels["aggregated"] = copy.deepcopy(
+                #         aggregation_modifier_labels
+                #     )
+                #     template_config.labels["grouping"] = KeyByLabelNames([])
+                # else:
+                #     template_config.labels["aggregated"] = KeyByLabelNames([])
+                #     template_config.labels["grouping"] = copy.deepcopy(
+                #         aggregation_modifier_labels
+                #     )
+
+            elif self.query_pattern_type == QueryPatternType.ONE_TEMPORAL_ONE_SPATIAL:
+                collapsable = get_is_collapsable(
+                    self.query_pattern_match.tokens["function"]["name"],
+                    self.query_pattern_match.tokens["aggregation"]["op"],
+                )
+
+                if not collapsable:
+                    template_config.labels["rollup"] = KeyByLabelNames([])
+
+                    logics.set_subpopulation_labels(
+                        statistic_to_compute,
+                        aggregation_type,
+                        all_labels,
+                        template_config,
+                    )
+
+                    # if logics.does_precompute_operator_support_subpopulations(
+                    #     statistic_to_compute, aggregation_type
+                    # ):
+                    #     template_config.labels["grouping"] = KeyByLabelNames([])
+                    #     template_config.labels["aggregated"] = copy.deepcopy(
+                    #         self.metric_config.config[template_config.metric]
+                    #     )
+                    # else:
+                    #     template_config.labels["grouping"] = copy.deepcopy(
+                    #         self.metric_config.config[template_config.metric]
+                    #     )
+                    #     template_config.labels["aggregated"] = KeyByLabelNames([])
+                else:
+                    # aggregation_modifier = self.query_pattern_match.tokens[
+                    #     "aggregation"
+                    # ]["modifier"]
+                    # aggregation_modifier_labels = None
+                    # if aggregation_modifier.type == aggregation_modifier.type.By:
+                    #     aggregation_modifier_labels = KeyByLabelNames(
+                    #         aggregation_modifier.labels
+                    #     )
+                    # elif aggregation_modifier.type == aggregation_modifier.type.Without:
+                    #     aggregation_modifier_labels = self.metric_config.config[
+                    #         template_config.metric
+                    #     ] - KeyByLabelNames(aggregation_modifier.labels)
+                    # else:
+                    #     raise ValueError("Invalid aggregation modifier")
+
+                    spatial_aggregation_output_labels = (
+                        get_spatial_aggregation_output_labels(
+                            self.query_pattern_match, all_labels
+                        )
+                    )
+
+                    template_config.labels["rollup"] = (
+                        all_labels - spatial_aggregation_output_labels
+                    )
+
+                    logics.set_subpopulation_labels(
+                        statistic_to_compute,
+                        aggregation_type,
+                        spatial_aggregation_output_labels,
+                        template_config,
+                    )
+
+                    # if logics.does_precompute_operator_support_subpopulations(
+                    #     statistic_to_compute, aggregation_type
+                    # ):
+                    #     template_config.labels["aggregated"] = copy.deepcopy(
+                    #         aggregation_modifier_labels
+                    #     )
+                    #     template_config.labels["grouping"] = KeyByLabelNames([])
+                    # else:
+                    #     template_config.labels["aggregated"] = KeyByLabelNames([])
+                    #     template_config.labels["grouping"] = copy.deepcopy(
+                    #         aggregation_modifier_labels
+                    #     )
+
+            config = copy.deepcopy(template_config)
+            config.aggregationType = aggregation_type
+            config.aggregationSubType = aggregation_sub_type
+            config.parameters = logics.get_precompute_operator_parameters(
+                aggregation_type,
+                aggregation_sub_type,
+                self.query_pattern_match,
+                self.sketch_parameters,
+            )
+
+            # TODO: remove this hardcoding once promql_utilities.query_logics has updated logic
+            # https://github.com/SketchDB/Utilities/issues/44
+            if aggregation_type in ["CountMinSketch", "HydraKLL"]:
+                # add another precompute operator for DeltaSetAggregator
+                delta_set_config = copy.deepcopy(template_config)
+                if (
+                    self.streaming_engine == "flink"
+                    or self.streaming_engine == "arroyo"
+                ):
+                    delta_set_config.aggregationType = "DeltaSetAggregator"
+                else:
+                    raise ValueError(
+                        f"Unsupported streaming engine: {self.streaming_engine}"
+                    )
+                delta_set_config.aggregationSubType = ""
+                delta_set_config.parameters = logics.get_precompute_operator_parameters(
+                    delta_set_config.aggregationType,
+                    delta_set_config.aggregationSubType,
+                    self.query_pattern_match,
+                    self.sketch_parameters,
+                )
+                configs.append(delta_set_config)
+            configs.append(config)
+
+        # Calculate cleanup parameter based on cleanup policy and window type
+        # This must be done AFTER set_window_parameters() has been called
+        cleanup_policy = self.config["cleanup_policy"]
+        if cleanup_policy == CleanupPolicy.NO_CLEANUP:
+            logger.info("Cleanup policy is NO_CLEANUP - cleanup_param will be None")
+            cleanup_param = None
+        else:
+            cleanup_param = logics.get_cleanup_param(
+                cleanup_policy=cleanup_policy,
+                query_pattern_type=self.query_pattern_type,
+                query_pattern_match=self.query_pattern_match,
+                t_repeat=self.t_repeat,
+                window_type=template_config.windowType,
+                range_duration=self.range_duration,
+                step=self.step,
+            )
+
+        return configs, cleanup_param
diff --git a/Controller/classes/WorkloadConfig.py b/Controller/classes/WorkloadConfig.py
new file mode 100644
index 0000000..c77b3fe
--- /dev/null
+++ b/Controller/classes/WorkloadConfig.py
@@ -0,0 +1,17 @@
+from typing import List
+
+from classes.SingleQueryConfig import SingleQueryConfig
+
+
+class WorkloadConfig:
+    def __init__(self, singe_query_configs: List[SingleQueryConfig]):
+        pass
+
+    def remove_common_subexpressions(self):
+        pass
+
+    def get_streaming_config(self):
+        pass
+
+    def get_estimation_config(self):
+        pass
diff --git a/Controller/controller-cli-compose.yml.j2 b/Controller/controller-cli-compose.yml.j2
new file mode 100644
index 0000000..87d0a18
--- /dev/null
+++ b/Controller/controller-cli-compose.yml.j2
@@ -0,0 +1,19 @@
+# Docker compose Jinja2 template to be rendered and used by asap-cli
+
+services:
+  controller:
+    image: sketchdb-controller:latest # Change to 'asap' prefix
+    container_name: asap-controller
+    networks:
+      - asap-network
+    volumes:
+      - {{ input_config_path }}:/app/input/config.yaml:ro
+      - {{ output_dir }}:/app/outputs
+    command: [
+      "--input_config", "/app/input/config.yaml",
+      "--output_dir", "/app/outputs",
+      "--prometheus_scrape_interval", "{{ prometheus_scrape_interval }}",
+      "--streaming_engine", "{{ streaming_engine }}"{% if punting %},
+      "--enable-punting"{% endif %}
+      ]
+    restart: no
diff --git a/Controller/docker-compose.yml.j2 b/Controller/docker-compose.yml.j2
new file mode 100644
index 0000000..d36368f
--- /dev/null
+++ b/Controller/docker-compose.yml.j2
@@ -0,0 +1,15 @@
+services:
+  controller:
+    image: sketchdb-controller:latest
+    container_name: {{ container_name }}
+    volumes:
+      - {{ input_config_path }}:/app/input/config.yaml:ro
+      - {{ output_dir }}:/app/output
+    command: [
+      "--input_config", "/app/input/config.yaml",
+      "--output_dir", "/app/output",
+      "--prometheus_scrape_interval", "{{ prometheus_scrape_interval }}",
+      "--streaming_engine", "{{ streaming_engine }}"{% if punting %},
+      "--enable-punting"{% endif %}
+    ]
+    restart: no
diff --git a/Controller/installation/install.sh b/Controller/installation/install.sh
new file mode 100755
index 0000000..7fe93cd
--- /dev/null
+++ b/Controller/installation/install.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -e
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+PARENT_DIR=$(dirname "$THIS_DIR")
+
+echo "Building Controller Docker image..."
+cd "$PARENT_DIR"
+docker build . -f Dockerfile -t sketchdb-controller:latest
+
+echo "Controller Docker image built successfully: sketchdb-controller:latest"
diff --git a/Controller/main_controller.py b/Controller/main_controller.py
new file mode 100644
index 0000000..471697b
--- /dev/null
+++ b/Controller/main_controller.py
@@ -0,0 +1,173 @@
+import os
+import yaml
+import argparse
+from loguru import logger
+
+from classes.SingleQueryConfig import SingleQueryConfig
+from promql_utilities.streaming_config.MetricConfig import MetricConfig
+from promql_utilities.query_logics.enums import CleanupPolicy
+
+
+def read_config(config_path) -> dict:
+    config_yaml = None
+    with open(config_path, "r") as f:
+        config_yaml = yaml.safe_load(f)
+    return config_yaml
+
+
+def validate_config(config_yaml):
+    # NOTE: only allow unique query strings for now
+    query_strings = set()
+    for query_group_yaml in config_yaml["query_groups"]:
+        for query_string in query_group_yaml["queries"]:
+            if query_string in query_strings:
+                raise ValueError(f"Duplicate query string: {query_string}")
+            query_strings.add(query_string)
+
+
+def main(args):
+    input_config_yaml = read_config(args.input_config)
+
+    validate_config(input_config_yaml)
+
+    metric_config = MetricConfig.from_list(input_config_yaml["metrics"])
+
+    # Read cleanup policy configuration (default to READ_BASED if not specified)
+    cleanup_policy_str = input_config_yaml.get("aggregate_cleanup", {}).get(
+        "policy", "read_based"
+    )
+    try:
+        cleanup_policy = CleanupPolicy(cleanup_policy_str)
+    except ValueError:
+        valid_policies = [p.value for p in CleanupPolicy]
+        raise ValueError(
+            f"Invalid cleanup policy: '{cleanup_policy_str}'. "
+            f"Valid options: {valid_policies}"
+        )
+    logger.info("Cleanup policy: {}", cleanup_policy.value)
+
+    # Read sketch parameters configuration (use None to apply defaults in logics.py)
+    sketch_parameters = input_config_yaml.get("sketch_parameters", None)
+    if sketch_parameters:
+        logger.info("Using custom sketch parameters: {}", sketch_parameters)
+    else:
+        logger.info("Using default sketch parameters")
+
+    streaming_aggregation_configs_map = {}
+    query_aggregation_config_keys_map = {}
+
+    for query_group_yaml in input_config_yaml["query_groups"]:
+        for query_string in query_group_yaml["queries"]:
+            single_query_config_yaml = {
+                "query": query_string,
+                "t_repeat": query_group_yaml["repetition_delay"],
+                "options": query_group_yaml["controller_options"],
+                "cleanup_policy": cleanup_policy,
+                "range_duration": args.range_duration,
+                "step": args.step,
+            }
+
+            logger.debug("Processing query {}", query_string)
+
+            single_query_config = SingleQueryConfig(
+                single_query_config_yaml,
+                metric_config,
+                args.prometheus_scrape_interval,
+                args.streaming_engine,
+                sketch_parameters,
+            )
+
+            should_process_query = single_query_config.is_supported()
+            if args.enable_punting:
+                should_process_query = (
+                    should_process_query and single_query_config.should_be_performant()
+                )
+
+            if should_process_query:
+                query_aggregation_config_keys_map[single_query_config.query] = []
+                current_configs, num_aggregates_to_retain = (
+                    single_query_config.get_streaming_aggregation_configs()
+                )
+
+                for current_config in current_configs:
+                    key = current_config.get_identifying_key()
+                    query_aggregation_config_keys_map[single_query_config.query].append(
+                        (key, num_aggregates_to_retain)
+                    )
+                    if key not in streaming_aggregation_configs_map:
+                        streaming_aggregation_configs_map[key] = current_config
+            else:
+                logger.warning("Unsupported query")
+
+    for idx, k in enumerate(streaming_aggregation_configs_map.keys()):
+        streaming_aggregation_configs_map[k].aggregationId = idx + 1
+
+    streaming_config = {
+        "aggregations": [
+            config.to_dict(metric_config, "promql")
+            for config in streaming_aggregation_configs_map.values()
+        ],
+        "metrics": metric_config.config,
+    }
+    inference_config = {
+        "cleanup_policy": {"name": cleanup_policy.value},
+        "queries": [],
+        "metrics": metric_config.config,
+    }
+    for query, streaming_config_keys in query_aggregation_config_keys_map.items():
+        inference_config["queries"].append({"query": query, "aggregations": []})
+        for streaming_config_key in streaming_config_keys:
+            aggregation_entry = {
+                "aggregation_id": streaming_aggregation_configs_map[
+                    streaming_config_key[0]
+                ].aggregationId,
+            }
+            # Add the appropriate parameter based on cleanup policy
+            cleanup_value = streaming_config_key[1]
+            if (
+                cleanup_policy == CleanupPolicy.CIRCULAR_BUFFER
+                and cleanup_value is not None
+            ):
+                aggregation_entry["num_aggregates_to_retain"] = cleanup_value
+            elif (
+                cleanup_policy == CleanupPolicy.READ_BASED and cleanup_value is not None
+            ):
+                aggregation_entry["read_count_threshold"] = cleanup_value
+            # For NO_CLEANUP, we don't add any parameter
+            inference_config["queries"][-1]["aggregations"].append(aggregation_entry)
+
+    os.makedirs(args.output_dir, exist_ok=True)
+    with open(f"{args.output_dir}/streaming_config.yaml", "w") as f:
+        f.write(yaml.dump(streaming_config))
+
+    with open(f"{args.output_dir}/inference_config.yaml", "w") as f:
+        f.write(yaml.dump(inference_config))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_config", type=str, required=True)
+    parser.add_argument("--output_dir", type=str, required=True)
+    parser.add_argument("--prometheus_scrape_interval", type=int, required=True)
+    parser.add_argument(
+        "--streaming_engine", type=str, choices=["flink", "arroyo"], required=True
+    )
+    parser.add_argument(
+        "--enable-punting",
+        action="store_true",
+        help="Enable query punting based on performance heuristics",
+    )
+    parser.add_argument(
+        "--range-duration",
+        type=int,
+        default=0,
+        help="Range query duration (end - start) in seconds. 0 for instant queries.",
+    )
+    parser.add_argument(
+        "--step",
+        type=int,
+        default=0,
+        help="Range query step in seconds. Required if range-duration > 0.",
+    )
+    args = parser.parse_args()
+    main(args)
diff --git a/Controller/requirements.txt b/Controller/requirements.txt
new file mode 100644
index 0000000..ee973aa
--- /dev/null
+++ b/Controller/requirements.txt
@@ -0,0 +1,3 @@
+loguru==0.7.3
+promql_parser==0.5.0
+PyYAML==6.0.2
diff --git a/Controller/utils/logics.py b/Controller/utils/logics.py
new file mode 100644
index 0000000..31bb761
--- /dev/null
+++ b/Controller/utils/logics.py
@@ -0,0 +1,396 @@
+import copy
+import math
+from loguru import logger
+
+from promql_utilities.data_model.KeyByLabelNames import KeyByLabelNames
+from promql_utilities.query_logics.enums import QueryPatternType, CleanupPolicy
+from promql_utilities.ast_matching.PromQLPattern import MatchResult
+from promql_utilities.query_logics.logics import (
+    does_precompute_operator_support_subpopulations,
+)
+
+CMS_WITH_HEAP_MULT = 4
+
+# Default sketch parameters for backward compatibility
+DEFAULT_SKETCH_PARAMETERS = {
+    "CountMinSketch": {"depth": 3, "width": 1024},
+    "CountMinSketchWithHeap": {"depth": 3, "width": 1024, "heap_multiplier": 4},
+    "DatasketchesKLL": {"K": 20},
+    "HydraKLL": {"row_num": 3, "col_num": 1024, "k": 20},
+}
+
+
+def get_effective_repeat(t_repeat: int, step: int) -> int:
+    """
+    Calculate effective repeat interval for range queries.
+
+    For range queries (step > 0), use the smaller of t_repeat and step to ensure
+    we produce aggregates frequently enough to support the query step size.
+    For instant queries (step = 0), use t_repeat.
+    """
+    return min(t_repeat, step) if step > 0 else t_repeat
+
+
+# TODO:
+# We only show the logic of `get_precompute_operator_parameters` here.
+# Semantics for topk query will be added in later PRs.
+def get_precompute_operator_parameters(
+    aggregation_type: str,
+    aggregation_sub_type: str,
+    query_pattern_match: MatchResult,
+    sketch_parameters: dict,
+) -> dict:
+    # Allow partial overrides: use provided parameters, fall back to defaults per sketch type
+    if sketch_parameters is None:
+        sketch_parameters = {}
+
+    if aggregation_type in [
+        "Increase",
+        "MinMax",
+        "Sum",
+        "MultipleIncrease",
+        "MultipleMinMax",
+        "MultipleSum",
+        "DeltaSetAggregator",
+        "SetAggregator",
+    ]:
+        return {}
+    elif aggregation_type == "CountMinSketch":
+        params = sketch_parameters.get(
+            "CountMinSketch", DEFAULT_SKETCH_PARAMETERS["CountMinSketch"]
+        )
+        return {"depth": params["depth"], "width": params["width"]}
+    elif aggregation_type == "CountMinSketchWithHeap":
+        if aggregation_sub_type == "topk":
+            if "aggregation" not in query_pattern_match.tokens:
+                raise ValueError(
+                    f"{aggregation_sub_type} query missing aggregator in the match tokens"
+                )
+            if "param" not in query_pattern_match.tokens["aggregation"]:
+                raise ValueError(
+                    f"{aggregation_sub_type} query missing required 'k' parameter"
+                )
+            k = int(query_pattern_match.tokens["aggregation"]["param"].val)
+            params = sketch_parameters.get(
+                "CountMinSketchWithHeap",
+                DEFAULT_SKETCH_PARAMETERS["CountMinSketchWithHeap"],
+            )
+            heap_mult = params.get("heap_multiplier", CMS_WITH_HEAP_MULT)
+            return {
+                "depth": params["depth"],
+                "width": params["width"],
+                "heapsize": k * heap_mult,
+            }
+        else:
+            raise ValueError(
+                f"Aggregation sub-type {aggregation_sub_type} for CountMinSketchWithHeap not supported"
+            )
+    elif aggregation_type == "DatasketchesKLL":
+        params = sketch_parameters.get(
+            "DatasketchesKLL", DEFAULT_SKETCH_PARAMETERS["DatasketchesKLL"]
+        )
+        return {"K": params["K"]}
+    elif aggregation_type == "HydraKLL":
+        params = sketch_parameters.get(
+            "HydraKLL", DEFAULT_SKETCH_PARAMETERS["HydraKLL"]
+        )
+        return {
+            "row_num": params["row_num"],
+            "col_num": params["col_num"],
+            "k": params["k"],
+        }
+    # elif aggregation_type == "UnivMon":
+    #     return {"depth": 3, "width": 2048, "levels": 16}
+    else:
+        raise NotImplementedError(f"Aggregation type {aggregation_type} not supported")
+
+
+def get_cleanup_param(
+    cleanup_policy: CleanupPolicy,
+    query_pattern_type,
+    query_pattern_match,
+    t_repeat: int,
+    window_type: str,
+    range_duration: int,
+    step: int,
+) -> int:
+    """
+    Calculate cleanup parameter based on cleanup policy and range query params.
+
+    Sliding windows (both policies): range_duration / step + 1
+    Tumbling circular_buffer: (T_lookback + range_duration) / min(T_repeat, step)
+    Tumbling read_based: (T_lookback / min(T_repeat, step)) * (range_duration / step + 1)
+
+    For ONLY_SPATIAL queries, T_lookback = T_repeat.
+    For instant queries, range_duration = 0 and effective_repeat = T_repeat.
+
+    Args:
+        cleanup_policy: CleanupPolicy.CIRCULAR_BUFFER or CleanupPolicy.READ_BASED
+        query_pattern_type: QueryPatternType enum
+        query_pattern_match: MatchResult with query tokens
+        t_repeat: Query repeat interval in seconds
+        window_type: "sliding" or "tumbling"
+        range_duration: end - start in seconds (0 for instant queries)
+        step: Range query step in seconds (required if range_duration > 0)
+
+    Raises:
+        ValueError: If exactly one of range_duration or step is zero
+    """
+    # Validation: range_duration and step must both be zero (instant) or both non-zero (range)
+    if (range_duration == 0) != (step == 0):
+        raise ValueError(
+            f"range_duration and step must both be 0 (instant query) or both > 0 (range query). "
+            f"Got range_duration={range_duration}, step={step}"
+        )
+
+    is_range_query = step > 0
+
+    # For ONLY_SPATIAL, T_lookback = T_repeat
+    if query_pattern_type == QueryPatternType.ONLY_SPATIAL:
+        t_lookback = t_repeat
+    else:
+        t_lookback = int(
+            query_pattern_match.tokens["range_vector"]["range"].total_seconds()
+        )
+
+    # For sliding windows: range_duration / step + 1 (same for both policies)
+    if window_type == "sliding":
+        if is_range_query:
+            result = range_duration // step + 1
+        else:
+            result = 1  # instant query
+        logger.debug(
+            f"Sliding window mode: cleanup_param = {result} "
+            f"(range_duration={range_duration}s, step={step}s)"
+        )
+        return result
+
+    # Tumbling window calculations
+    effective_repeat = get_effective_repeat(t_repeat, step)
+
+    # We use ceiling division because even if the time span doesn't fully fill
+    # a bucket, we still need that bucket to cover the partial data.
+    # E.g., if T_lookback=10s and effective_repeat=100s, we still need 1 bucket.
+    if cleanup_policy == CleanupPolicy.CIRCULAR_BUFFER:
+        # ceil((T_lookback + range_duration) / effective_repeat)
+        result = math.ceil((t_lookback + range_duration) / effective_repeat)
+    elif cleanup_policy == CleanupPolicy.READ_BASED:
+        # ceil(T_lookback / effective_repeat) * (range_duration / step + 1)
+        lookback_buckets = math.ceil(t_lookback / effective_repeat)
+        if is_range_query:
+            num_steps = range_duration // step + 1
+        else:
+            num_steps = 1  # instant query
+        result = lookback_buckets * num_steps
+    else:
+        raise ValueError(f"Invalid cleanup policy: {cleanup_policy}")
+
+    logger.debug(
+        f"Tumbling window mode ({cleanup_policy.value}): cleanup_param = {result} "
+        f"(t_lookback={t_lookback}s, t_repeat={t_repeat}s, "
+        f"range_duration={range_duration}s, step={step}s)"
+    )
+    return result
+
+
+def should_use_sliding_window(query_pattern_type, aggregation_type):
+    """
+    Decide if sliding windows should be used based on query type and aggregation type.
+
+    For Issue #236: Use sliding windows for ALL ONLY_TEMPORAL queries except DeltaSetAggregator.
+    This eliminates merging overhead in QueryEngine at the cost of more computation in Arroyo.
+
+    Args:
+        query_pattern_type: ONLY_TEMPORAL, ONLY_SPATIAL, or ONE_TEMPORAL_ONE_SPATIAL
+        aggregation_type: Type of aggregation (e.g., 'DatasketchesKLL', 'Sum', etc.)
+
+    Returns:
+        bool: True if sliding windows should be used
+    """
+    # NOTE: returning False since sliding window pipelines are causing arroyo to crash
+    return False
+    # Only use sliding for ONLY_TEMPORAL queries (not ONE_TEMPORAL_ONE_SPATIAL or ONLY_SPATIAL)
+    if query_pattern_type != QueryPatternType.ONLY_TEMPORAL:
+        logger.debug(
+            f"Query pattern {query_pattern_type} not eligible for sliding windows "
+            f"(only ONLY_TEMPORAL supported)"
+        )
+        return False
+
+    # Explicitly exclude DeltaSetAggregator (paired with CMS but needs tumbling)
+    if aggregation_type == "DeltaSetAggregator":
+        logger.debug("DeltaSetAggregator excluded from sliding windows")
+        return False
+
+    # All other ONLY_TEMPORAL aggregations use sliding windows
+    logger.info(
+        f"Aggregation type '{aggregation_type}' with {query_pattern_type} -> SLIDING windows"
+    )
+    return True
+
+
+def set_window_parameters(
+    query_pattern_type,
+    query_pattern_match,
+    t_repeat,
+    prometheus_scrape_interval,
+    aggregation_type,
+    template_config,
+    step: int,
+):
+    """
+    Set window parameters for streaming aggregation config.
+    Auto-decides between sliding and tumbling windows based on query type and aggregation cost.
+
+    For ONLY_TEMPORAL queries with expensive aggregations (KLL, CMS):
+    - Uses SLIDING windows: windowSize = range duration, slideInterval = effective_repeat
+    - This reduces QueryEngine latency by avoiding merges (Arroyo does more work upfront)
+
+    For other queries:
+    - Uses TUMBLING windows: windowSize = slideInterval = effective_repeat
+    - This is the original behavior
+
+    For range queries (step > 0), effective_repeat = min(t_repeat, step).
+    For instant queries (step = 0), effective_repeat = t_repeat.
+
+    Args:
+        query_pattern_type: Pattern type (ONLY_TEMPORAL, ONLY_SPATIAL, ONE_TEMPORAL_ONE_SPATIAL)
+        query_pattern_match: Matched PromQL pattern containing query metadata
+        t_repeat: Query repeat interval in seconds
+        prometheus_scrape_interval: Scrape interval in seconds
+        aggregation_type: Type of aggregation operator
+        template_config: StreamingAggregationConfig to update
+        step: Range query step in seconds (0 for instant queries)
+    """
+    # For range queries, use min(t_repeat, step) as the effective repeat interval
+    effective_repeat = get_effective_repeat(t_repeat, step)
+
+    # Decide if we should use sliding windows
+    use_sliding_window = should_use_sliding_window(query_pattern_type, aggregation_type)
+
+    if use_sliding_window:
+        # SLIDING WINDOW for ONLY_TEMPORAL queries with expensive aggregations
+        logger.info(
+            f"Configuring SLIDING WINDOW for {query_pattern_type} "
+            f"with {aggregation_type}"
+        )
+
+        if query_pattern_type == QueryPatternType.ONLY_TEMPORAL:
+            # Window size = range duration (e.g., 15m = 900s)
+            range_seconds = int(
+                query_pattern_match.tokens["range_vector"]["range"].total_seconds()
+            )
+
+            # Check if this is actually a tumbling window (windowSize == slideInterval)
+            if range_seconds == effective_repeat:
+                logger.info(
+                    f"Detected windowSize == slideInterval ({range_seconds}s). "
+                    f"Using tumbling window instead of sliding for efficiency."
+                )
+                template_config.windowSize = effective_repeat
+                template_config.slideInterval = effective_repeat
+                template_config.windowType = "tumbling"
+                template_config.tumblingWindowSize = effective_repeat
+            else:
+                # True sliding window
+                template_config.windowSize = range_seconds
+                template_config.slideInterval = effective_repeat
+                template_config.windowType = "sliding"
+
+                logger.info(
+                    f"Sliding window params: windowSize={range_seconds}s, "
+                    f"slideInterval={effective_repeat}s "
+                    f"(each window has {range_seconds} seconds of data, slides every {effective_repeat}s)"
+                )
+
+                # Set deprecated field for backward compatibility
+                template_config.tumblingWindowSize = effective_repeat
+        else:
+            # This should never be reached due to should_use_sliding_window() check
+            assert False, (
+                f"should_use_sliding_window returned True for {query_pattern_type}, "
+                f"but sliding windows only supported for ONLY_TEMPORAL"
+            )
+    else:
+        # TUMBLING WINDOW (existing logic)
+        logger.info(
+            f"Configuring TUMBLING WINDOW for {query_pattern_type} "
+            f"with {aggregation_type}"
+        )
+        _set_tumbling_window_parameters(
+            query_pattern_type,
+            effective_repeat,
+            prometheus_scrape_interval,
+            template_config,
+        )
+
+
+def _set_tumbling_window_parameters(
+    query_pattern_type, effective_repeat, prometheus_scrape_interval, template_config
+):
+    """
+    Set tumbling window parameters.
+
+    Args:
+        query_pattern_type: Pattern type (ONLY_TEMPORAL, ONLY_SPATIAL, ONE_TEMPORAL_ONE_SPATIAL)
+        effective_repeat: Effective repeat interval (min(t_repeat, step) for range queries)
+        prometheus_scrape_interval: Scrape interval in seconds
+        template_config: StreamingAggregationConfig to update
+    """
+    if (
+        query_pattern_type == QueryPatternType.ONLY_TEMPORAL
+        or query_pattern_type == QueryPatternType.ONE_TEMPORAL_ONE_SPATIAL
+    ):
+        template_config.windowSize = effective_repeat
+        template_config.slideInterval = effective_repeat
+        template_config.windowType = "tumbling"
+        template_config.tumblingWindowSize = effective_repeat
+
+        logger.debug(
+            f"Tumbling window params: windowSize={effective_repeat}s, slideInterval={effective_repeat}s"
+        )
+    elif query_pattern_type == QueryPatternType.ONLY_SPATIAL:
+        template_config.windowSize = prometheus_scrape_interval
+        template_config.slideInterval = prometheus_scrape_interval
+        template_config.windowType = "tumbling"
+        template_config.tumblingWindowSize = prometheus_scrape_interval
+
+        logger.debug(
+            f"Tumbling window params: windowSize={prometheus_scrape_interval}s, "
+            f"slideInterval={prometheus_scrape_interval}s"
+        )
+    else:
+        raise ValueError("Invalid query pattern type")
+
+
+# COMMENTED OUT - Original function kept for rollback
+# Issue #236: Replaced with set_window_parameters() to support sliding windows
+#
+# def set_tumbling_window_size(
+#     query_pattern_type, t_repeat, prometheus_scrape_interval, template_config
+# ):
+#     if (
+#         query_pattern_type == QueryPatternType.ONLY_TEMPORAL
+#         or query_pattern_type == QueryPatternType.ONE_TEMPORAL_ONE_SPATIAL
+#     ):
+#         template_config.tumblingWindowSize = t_repeat
+#     elif query_pattern_type == QueryPatternType.ONLY_SPATIAL:
+#         template_config.tumblingWindowSize = prometheus_scrape_interval
+#     else:
+#         raise ValueError("Invalid query pattern type")
+
+
+def set_subpopulation_labels(
+    statistic_to_compute,
+    aggregation_type,
+    subpopulation_labels: KeyByLabelNames,
+    template_config,
+):
+    if does_precompute_operator_support_subpopulations(
+        statistic_to_compute, aggregation_type
+    ):
+        template_config.labels["grouping"] = KeyByLabelNames([])
+        template_config.labels["aggregated"] = copy.deepcopy(subpopulation_labels)
+    else:
+        template_config.labels["grouping"] = copy.deepcopy(subpopulation_labels)
+        template_config.labels["aggregated"] = KeyByLabelNames([])
diff --git a/Controller/utils/test_logics.py b/Controller/utils/test_logics.py
new file mode 100644
index 0000000..b44c889
--- /dev/null
+++ b/Controller/utils/test_logics.py
@@ -0,0 +1,268 @@
+"""Unit tests for logics.py cleanup parameter calculations."""
+
+import pytest
+from datetime import timedelta
+from unittest.mock import MagicMock
+
+from promql_utilities.query_logics.enums import QueryPatternType, CleanupPolicy
+from logics import get_cleanup_param
+
+
+def create_mock_match(range_seconds: int) -> MagicMock:
+    """Create a mock match result with the given range duration."""
+    mock = MagicMock()
+    mock.tokens = {"range_vector": {"range": timedelta(seconds=range_seconds)}}
+    return mock
+
+
+class TestGetCleanupParamValidation:
+    """Tests for validation logic in get_cleanup_param."""
+
+    def test_range_duration_without_step_raises_error(self):
+        """range_duration > 0 with step = 0 is invalid."""
+        mock_match = create_mock_match(900)
+        with pytest.raises(ValueError, match="must both be 0.*or both > 0"):
+            get_cleanup_param(
+                cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER,
+                query_pattern_type=QueryPatternType.ONLY_TEMPORAL,
+                query_pattern_match=mock_match,
+                t_repeat=30,
+                window_type="tumbling",
+                range_duration=3600,
+                step=0,
+            )
+
+    def test_step_without_range_duration_raises_error(self):
+        """step > 0 with range_duration = 0 is invalid."""
+        mock_match = create_mock_match(900)
+        with pytest.raises(ValueError, match="must both be 0.*or both > 0"):
+            get_cleanup_param(
+                cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER,
+                query_pattern_type=QueryPatternType.ONLY_TEMPORAL,
+                query_pattern_match=mock_match,
+                t_repeat=30,
+                window_type="tumbling",
+                range_duration=0,
+                step=60,
+            )
+
+    def test_instant_query_both_zero_is_valid(self):
+        """Instant queries: both range_duration=0 and step=0 is valid."""
+        mock_match = create_mock_match(900)
+        result = get_cleanup_param(
+            cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER,
+            query_pattern_type=QueryPatternType.ONLY_TEMPORAL,
+            query_pattern_match=mock_match,
+            t_repeat=30,
+            window_type="tumbling",
+            range_duration=0,
+            step=0,
+        )
+        assert result == 30  # ceil(900 / 30) = 30
+
+
+class TestSlidingWindowCleanupParam:
+    """Tests for sliding window cleanup parameter calculations."""
+
+    def test_sliding_instant_query(self):
+        """Sliding window instant query returns 1."""
+        mock_match = create_mock_match(900)
+        result = get_cleanup_param(
+            cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER,
+            query_pattern_type=QueryPatternType.ONLY_TEMPORAL,
+            query_pattern_match=mock_match,
+            t_repeat=30,
+            window_type="sliding",
+            range_duration=0,
+            step=0,
+        )
+        assert result == 1
+
+    def test_sliding_range_query(self):
+        """Sliding window: range_duration / step + 1."""
+        mock_match = create_mock_match(900)
+        # range_duration=3600, step=60 -> 3600/60 + 1 = 61
+        result = get_cleanup_param(
+            cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER,
+            query_pattern_type=QueryPatternType.ONLY_TEMPORAL,
+            query_pattern_match=mock_match,
+            t_repeat=30,
+            window_type="sliding",
+            range_duration=3600,
+            step=60,
+        )
+        assert result == 61
+
+    def test_sliding_same_for_both_policies(self):
+        """Sliding windows use same formula for both policies."""
+        mock_match = create_mock_match(900)
+        result_cb = get_cleanup_param(
+            cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER,
+            query_pattern_type=QueryPatternType.ONLY_TEMPORAL,
+            query_pattern_match=mock_match,
+            t_repeat=30,
+            window_type="sliding",
+            range_duration=3600,
+            step=60,
+        )
+        result_rb = get_cleanup_param(
+            cleanup_policy=CleanupPolicy.READ_BASED,
+            query_pattern_type=QueryPatternType.ONLY_TEMPORAL,
+            query_pattern_match=mock_match,
+            t_repeat=30,
+            window_type="sliding",
+            range_duration=3600,
+            step=60,
+        )
+        assert result_cb == result_rb == 61
+
+
+class TestTumblingCircularBufferCleanupParam:
+    """Tests for tumbling window + circular_buffer cleanup parameter."""
+
+    def test_instant_query(self):
+        """Instant query: T_lookback / T_repeat."""
+        mock_match = create_mock_match(900)  # 15 minutes
+        # T_lookback=900, T_repeat=30 -> 900/30 = 30
+        result = get_cleanup_param(
+            cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER,
+            query_pattern_type=QueryPatternType.ONLY_TEMPORAL,
+            query_pattern_match=mock_match,
+            t_repeat=30,
+            window_type="tumbling",
+            range_duration=0,
+            step=0,
+        )
+        assert result == 30
+
+    def test_range_query(self):
+        """Range query: (T_lookback + range_duration) / min(T_repeat, step)."""
+        mock_match = create_mock_match(900)  # 15 minutes
+        # T_lookback=900, range_duration=3600, T_repeat=30, step=60
+        # effective_repeat = min(30, 60) = 30
+        # (900 + 3600) / 30 = 150
+        result = get_cleanup_param(
+            cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER,
+            query_pattern_type=QueryPatternType.ONLY_TEMPORAL,
+            query_pattern_match=mock_match,
+            t_repeat=30,
+            window_type="tumbling",
+            range_duration=3600,
+            step=60,
+        )
+        assert result == 150
+
+    def test_step_smaller_than_t_repeat(self):
+        """When step < T_repeat, use step as effective_repeat."""
+        mock_match = create_mock_match(900)
+        # T_lookback=900, range_duration=3600, T_repeat=60, step=30
+        # effective_repeat = min(60, 30) = 30
+        # (900 + 3600) / 30 = 150
+        result = get_cleanup_param(
+            cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER,
+            query_pattern_type=QueryPatternType.ONLY_TEMPORAL,
+            query_pattern_match=mock_match,
+            t_repeat=60,
+            window_type="tumbling",
+            range_duration=3600,
+            step=30,
+        )
+        assert result == 150
+
+
+class TestTumblingReadBasedCleanupParam:
+    """Tests for tumbling window + read_based cleanup parameter."""
+
+    def test_instant_query(self):
+        """Instant query: (T_lookback / T_repeat) * 1."""
+        mock_match = create_mock_match(900)  # 15 minutes
+        # T_lookback=900, T_repeat=30 -> (900/30) * 1 = 30
+        result = get_cleanup_param(
+            cleanup_policy=CleanupPolicy.READ_BASED,
+            query_pattern_type=QueryPatternType.ONLY_TEMPORAL,
+            query_pattern_match=mock_match,
+            t_repeat=30,
+            window_type="tumbling",
+            range_duration=0,
+            step=0,
+        )
+        assert result == 30
+
+    def test_range_query(self):
+        """Range query: (T_lookback / min(T_repeat, step)) * (range_duration / step + 1)."""
+        mock_match = create_mock_match(900)  # 15 minutes
+        # T_lookback=900, range_duration=3600, T_repeat=30, step=60
+        # effective_repeat = min(30, 60) = 30
+        # lookback_buckets = 900 / 30 = 30
+        # num_steps = 3600 / 60 + 1 = 61
+        # result = 30 * 61 = 1830
+        result = get_cleanup_param(
+            cleanup_policy=CleanupPolicy.READ_BASED,
+            query_pattern_type=QueryPatternType.ONLY_TEMPORAL,
+            query_pattern_match=mock_match,
+            t_repeat=30,
+            window_type="tumbling",
+            range_duration=3600,
+            step=60,
+        )
+        assert result == 1830
+
+
+class TestOnlySpatialQueries:
+    """Tests for ONLY_SPATIAL queries (T_lookback = T_repeat)."""
+
+    def test_only_spatial_instant_query(self):
+        """ONLY_SPATIAL uses T_lookback = T_repeat."""
+        mock_match = MagicMock()  # No range_vector token needed
+        # T_lookback = T_repeat = 30
+        # circular_buffer instant: T_lookback / T_repeat = 30/30 = 1
+        result = get_cleanup_param(
+            cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER,
+            query_pattern_type=QueryPatternType.ONLY_SPATIAL,
+            query_pattern_match=mock_match,
+            t_repeat=30,
+            window_type="tumbling",
+            range_duration=0,
+            step=0,
+        )
+        assert result == 1
+
+    def test_only_spatial_range_query(self):
+        """ONLY_SPATIAL range query uses T_lookback = T_repeat."""
+        mock_match = MagicMock()
+        # T_lookback = T_repeat = 30, range_duration=3600, step=60
+        # effective_repeat = min(30, 60) = 30
+        # circular_buffer: (30 + 3600) / 30 = 121
+        result = get_cleanup_param(
+            cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER,
+            query_pattern_type=QueryPatternType.ONLY_SPATIAL,
+            query_pattern_match=mock_match,
+            t_repeat=30,
+            window_type="tumbling",
+            range_duration=3600,
+            step=60,
+        )
+        assert result == 121
+
+
+class TestMinimumResult:
+    """Tests that result is always at least 1."""
+
+    def test_minimum_result_is_one(self):
+        """Result should never be less than 1."""
+        mock_match = create_mock_match(10)  # Very small lookback
+        result = get_cleanup_param(
+            cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER,
+            query_pattern_type=QueryPatternType.ONLY_TEMPORAL,
+            query_pattern_match=mock_match,
+            t_repeat=100,  # Larger than lookback
+            window_type="tumbling",
+            range_duration=0,
+            step=0,
+        )
+        # 10 / 100 = 0, but should be at least 1
+        assert result == 1
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/ExecutionUtilities/.gitignore b/ExecutionUtilities/.gitignore
new file mode 100644
index 0000000..533dc14
--- /dev/null
+++ b/ExecutionUtilities/.gitignore
@@ -0,0 +1,9 @@
+**/__pycache__
+**/*.pyc
+**/*.swp
+.DS_Store
+
+**/target
+
+clickhouse-benchmark-pipeline/benchmark_results/
+**/data/
diff --git a/ExecutionUtilities/LICENSE b/ExecutionUtilities/LICENSE
new file mode 100644
index 0000000..404d657
--- /dev/null
+++ b/ExecutionUtilities/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 SketchDB
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/README.md b/ExecutionUtilities/clickhouse-benchmark-pipeline/README.md
new file mode 100644
index 0000000..1680e91
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/README.md
@@ -0,0 +1,82 @@
+# ClickHouse Benchmark Pipeline
+
+Benchmark ClickHouse with 43 ClickBench queries using Kafka data ingestion.
+
+## Prerequisites
+
+Install and run Kafka and ClickHouse natively (no Docker):
+
+```bash
+# Install Kafka (one-time)
+cd /path/to/Utilities/installation/kafka
+./install.sh /path/to/Utilities/installation/kafka
+
+# Install ClickHouse (one-time)
+cd /path/to/Utilities/installation/clickhouse
+./install.sh /path/to/Utilities/installation/clickhouse
+```
+
+## Usage
+
+```bash
+# 1. Start Kafka (in a terminal)
+cd /path/to/Utilities/installation/kafka
+./run.sh kafka/
+
+# 2. Create Kafka topic
+kafka/bin/kafka-topics.sh --create --topic hits --bootstrap-server localhost:9092
+
+# 3. Start ClickHouse (in another terminal)
+cd /path/to/Utilities/installation/clickhouse
+./run.sh /path/to/Utilities/installation/clickhouse
+
+# 4. Initialize ClickHouse tables
+./scripts/init_clickhouse.sh
+
+# 5. Generate data (choose one mode)
+DATA_MODE=clickbench TOTAL_RECORDS=100000 ./scripts/generate_data.sh
+DATA_MODE=fake TOTAL_RECORDS=100000 ./scripts/generate_data.sh
+
+# 6. Check data
+./scripts/check_data.sh
+
+# 7. Run benchmark
+./scripts/run_benchmark.sh
+```
+
+## Data Modes
+
+| Mode | Description |
+|------|-------------|
+| `fake` | Synthetic data |
+| `clickbench` | Real ClickBench dataset (~100M rows) |
+
+## Configuration
+
+Edit `config.env` to change defaults. Environment variables override config values.
+
+# Elasticsearch Benchmark Pipeline
+
+## Prerequisites
+
+Follow instructions to install Elasticsearch:
+```bash
+cd /path/to/Utilities/installation/elastic
+```
+
+## Configuration
+
+Edit `config.env` to update the `ES_API_KEY` field after installing Elasticsearch locally.
+
+## Usage
+
+```bash
+# 1. Load H2O CSV data into Elastic
+./scripts/init_elastic.sh
+
+# 2. Check data
+./scripts/check_elastic_data.sh
+
+# 3. Run benchmark
+./scripts/run_benchmark.sh h2o_elastic
+```
\ No newline at end of file
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/download_clickbench_data.py b/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/download_clickbench_data.py
new file mode 100644
index 0000000..0c774b4
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/download_clickbench_data.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+"""
+ClickBench Data Downloader
+
+Downloads the official ClickBench dataset (hits.json.gz).
+
+Usage:
+    python download_data.py --output-dir ./data
+
+    # Specify output file directly:
+    python download_data.py --output-file /path/to/hits.json.gz
+"""
+
+import argparse
+import os
+import sys
+import urllib.request
+
+
+def load_config():
+    """Load configuration from config.env file. All values must be defined there."""
+    config = {}
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    project_root = os.path.dirname(script_dir)
+
+    # Load config (environment variables take precedence)
+    config_file = os.path.join(project_root, "config.env")
+    if os.path.exists(config_file):
+        with open(config_file) as f:
+            for line in f:
+                line = line.strip()
+                if line and not line.startswith("#") and "=" in line:
+                    key, value = line.split("=", 1)
+                    if key not in os.environ:
+                        config[key] = value
+
+    # Store project root for path resolution
+    config["_PROJECT_ROOT"] = project_root
+    return config
+
+
+_config = load_config()
+
+
+def get_config(key):
+    """Get config value from environment or config.env. Raises error if not found."""
+    if key in os.environ:
+        return os.environ[key]
+    if key in _config:
+        return _config[key]
+    raise ValueError(f"Configuration '{key}' not found. Please set it in config.env")
+
+
+# Get values from config (no hardcoded fallbacks)
+CLICKBENCH_URL = get_config("CLICKBENCH_URL")
+CLICKBENCH_FILENAME = get_config("CLICKBENCH_FILENAME")
+CLICKBENCH_DATA_DIR = get_config("CLICKBENCH_DATA_DIR")
+
+
+def download_clickbench_data(output_path: str) -> str:
+    """Download ClickBench dataset if not already present."""
+    if os.path.exists(output_path):
+        print(f"Using existing file: {output_path}")
+        return output_path
+
+    print(f"Downloading ClickBench dataset from {CLICKBENCH_URL}")
+    print("This is a large file (~14GB compressed, ~100M rows). Please wait...")
+
+    request = urllib.request.Request(
+        CLICKBENCH_URL,
+        headers={"User-Agent": "Mozilla/5.0 (compatible; ClickBench-Importer/1.0)"},
+    )
+
+    try:
+        with urllib.request.urlopen(request) as response:
+            total_size = int(response.headers.get("Content-Length", 0))
+            downloaded = 0
+            last_percent = -1
+            block_size = 8192 * 128  # 1MB blocks
+
+            with open(output_path, "wb") as out_file:
+                while True:
+                    block = response.read(block_size)
+                    if not block:
+                        break
+                    out_file.write(block)
+                    downloaded += len(block)
+
+                    if total_size > 0:
+                        percent = downloaded * 100 // total_size
+                        if percent != last_percent:
+                            last_percent = percent
+                            downloaded_mb = downloaded / (1024 * 1024)
+                            total_mb = total_size / (1024 * 1024)
+                            sys.stdout.write(
+                                f"\rProgress: {percent}% ({downloaded_mb:.1f}/{total_mb:.1f} MB)"
+                            )
+                            sys.stdout.flush()
+
+        print("\nDownload complete!")
+        return output_path
+
+    except urllib.error.HTTPError as e:
+        print(f"\nDownload failed: HTTP {e.code} - {e.reason}")
+        print("You can manually download the file and use --output-file option:")
+        print(f"  wget {CLICKBENCH_URL}")
+        print(f"  curl -L -o {CLICKBENCH_FILENAME} {CLICKBENCH_URL}")
+        raise
+
+
+def main():
+    # Compute default output dir from config
+    default_output_dir = os.path.join(_config["_PROJECT_ROOT"], CLICKBENCH_DATA_DIR)
+
+    parser = argparse.ArgumentParser(
+        description="Download ClickBench dataset",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument(
+        "--output-file",
+        help=f"Path to save {CLICKBENCH_FILENAME} (overrides --output-dir)",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default=default_output_dir,
+        help=f"Directory to download data to (default from config: {CLICKBENCH_DATA_DIR})",
+    )
+
+    args = parser.parse_args()
+
+    if args.output_file:
+        output_path = args.output_file
+        os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True)
+    else:
+        os.makedirs(args.output_dir, exist_ok=True)
+        output_path = os.path.join(args.output_dir, CLICKBENCH_FILENAME)
+
+    download_clickbench_data(output_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/download_h2o_data.py b/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/download_h2o_data.py
new file mode 100644
index 0000000..b438728
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/download_h2o_data.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+import sys
+import os
+import argparse
+
+# Check for gdown dependency
+try:
+    import gdown
+except ImportError:
+    # This should be handled by the shell script, but just in case:
+    print("Error: 'gdown' is missing. Installing it now...")
+    import subprocess
+
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "gdown"])
+    import gdown
+
+# H2O GroupBy Dataset ID
+FILE_ID = "15SVQjQ2QehzYDLoDonio4aP7xqdMiNyi"
+DEFAULT_FILENAME = "G1_1e7_1e2_0_0.csv"
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Download H2O Benchmark Data")
+    parser.add_argument(
+        "--output-dir", required=True, help="Directory to save the file"
+    )
+    args = parser.parse_args()
+
+    os.makedirs(args.output_dir, exist_ok=True)
+    output_path = os.path.join(args.output_dir, DEFAULT_FILENAME)
+
+    # Simple check to avoid redownloading if it looks valid (>100MB)
+    if os.path.exists(output_path) and os.path.getsize(output_path) > 100 * 1024 * 1024:
+        print(f"File {output_path} already exists. Skipping download.")
+        return
+
+    print(f"Downloading H2O dataset (ID: {FILE_ID}) using gdown...")
+
+    # gdown automatically handles the 'virus scan' warning and cookies
+    url = f"https://drive.google.com/uc?id={FILE_ID}"
+    gdown.download(url, output_path, quiet=False)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/requirements.txt b/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/requirements.txt
new file mode 100644
index 0000000..42edc69
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/requirements.txt
@@ -0,0 +1 @@
+confluent-kafka>=2.0.0
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_queries.sql b/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_queries.sql
new file mode 100644
index 0000000..e482da5
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_queries.sql
@@ -0,0 +1,132 @@
+-- ClickBench Benchmark Queries
+-- Source: https://benchmark.clickhouse.com/
+-- These 43 queries test various aspects of analytical query performance
+
+-- Q1: Simple count
+SELECT COUNT(*) FROM hits;
+
+-- Q2: Count with filter
+SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0;
+
+-- Q3: Aggregation functions
+SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits;
+
+-- Q4: Average of large integers
+SELECT AVG(UserID) FROM hits;
+
+-- Q5: Count distinct users
+SELECT COUNT(DISTINCT UserID) FROM hits;
+
+-- Q6: Count distinct search phrases
+SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
+
+-- Q7: Min/Max dates
+SELECT MIN(EventDate), MAX(EventDate) FROM hits;
+
+-- Q8: Group by with filter and order
+SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC;
+
+-- Q9: Count distinct with group by
+SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
+
+-- Q10: Multiple aggregations with group by
+SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
+
+-- Q11: Filter on string column with group by
+SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
+
+-- Q12: Multiple string columns group by
+SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
+
+-- Q13: Search phrase analysis
+SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+
+-- Q14: Search phrase with distinct users
+SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
+
+-- Q15: Search engine and phrase analysis
+SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
+
+-- Q16: Top users by activity
+SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10;
+
+-- Q17: User search behavior
+SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
+
+-- Q18: User search behavior (no order)
+SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
+
+-- Q19: Time-based user analysis
+SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
+
+-- Q20: Point query on specific user
+SELECT UserID FROM hits WHERE UserID = 435090932899640449;
+
+-- Q21: URL pattern matching
+SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';
+
+-- Q22: URL pattern with search phrase
+SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+
+-- Q23: Complex pattern matching
+SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+
+-- Q24: Full row retrieval with pattern
+SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10;
+
+-- Q25: String column ordering
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10;
+
+-- Q26: Alphabetical ordering
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10;
+
+-- Q27: Compound ordering
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10;
+
+-- Q28: URL length analysis
+SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
+
+-- Q29: Referer domain extraction with regex
+SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\\.)?([^/]+)/.*$', '\\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
+
+-- Q30: Wide aggregation (sum of 90 derived columns)
+SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits;
+
+-- Q31: Search with IP grouping
+SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
+
+-- Q32: Watch and IP grouping with search filter
+SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
+
+-- Q33: Watch and IP grouping
+SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
+
+-- Q34: URL popularity
+SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
+
+-- Q35: URL popularity with constant column
+SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
+
+-- Q36: IP arithmetic
+SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
+
+-- Q37: Counter-specific page views
+SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
+
+-- Q38: Counter-specific title analysis
+SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
+
+-- Q39: Link analysis with offset
+SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
+
+-- Q40: Traffic source analysis
+SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
+
+-- Q41: URL hash analysis
+SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100;
+
+-- Q42: Window dimensions analysis
+SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
+
+-- Q43: Time series analysis
+SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000;
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/clickhouse/clickbench_init.sql b/ExecutionUtilities/clickhouse-benchmark-pipeline/clickhouse/clickbench_init.sql
new file mode 100644
index 0000000..cc06844
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/clickhouse/clickbench_init.sql
@@ -0,0 +1,344 @@
+-- ClickHouse initialization script for ClickBench pipeline
+-- Schema matches actual ClickBench JSON data types
+
+-- Create the main hits table with MergeTree engine (ClickBench compatible)
+CREATE TABLE IF NOT EXISTS hits
+(
+    WatchID Int64,
+    JavaEnable UInt8,
+    Title String,
+    GoodEvent Int16,
+    EventTime DateTime,
+    EventDate Date,
+    CounterID UInt32,
+    ClientIP Int32,
+    RegionID UInt32,
+    UserID Int64,
+    CounterClass Int8,
+    OS UInt8,
+    UserAgent UInt8,
+    URL String,
+    Referer String,
+    IsRefresh UInt8,
+    RefererCategoryID UInt16,
+    RefererRegionID UInt32,
+    URLCategoryID UInt16,
+    URLRegionID UInt32,
+    ResolutionWidth UInt16,
+    ResolutionHeight UInt16,
+    ResolutionDepth UInt8,
+    FlashMajor UInt8,
+    FlashMinor UInt8,
+    FlashMinor2 String,
+    NetMajor UInt8,
+    NetMinor UInt8,
+    UserAgentMajor UInt16,
+    UserAgentMinor String,
+    CookieEnable UInt8,
+    JavascriptEnable UInt8,
+    IsMobile UInt8,
+    MobilePhone UInt8,
+    MobilePhoneModel String,
+    Params String,
+    IPNetworkID UInt32,
+    TraficSourceID Int8,
+    SearchEngineID UInt16,
+    SearchPhrase String,
+    AdvEngineID UInt8,
+    IsArtifical UInt8,
+    WindowClientWidth UInt16,
+    WindowClientHeight UInt16,
+    ClientTimeZone Int16,
+    ClientEventTime DateTime,
+    SilverlightVersion1 UInt8,
+    SilverlightVersion2 UInt8,
+    SilverlightVersion3 UInt32,
+    SilverlightVersion4 UInt16,
+    PageCharset String,
+    CodeVersion UInt32,
+    IsLink UInt8,
+    IsDownload UInt8,
+    IsNotBounce UInt8,
+    FUniqID Int64,
+    OriginalURL String,
+    HID UInt32,
+    IsOldCounter UInt8,
+    IsEvent UInt8,
+    IsParameter UInt8,
+    DontCountHits UInt8,
+    WithHash UInt8,
+    HitColor String,
+    LocalEventTime DateTime,
+    Age UInt8,
+    Sex UInt8,
+    Income UInt8,
+    Interests UInt16,
+    Robotness UInt8,
+    RemoteIP Int32,
+    WindowName Int32,
+    OpenerName Int32,
+    HistoryLength Int16,
+    BrowserLanguage String,
+    BrowserCountry String,
+    SocialNetwork String,
+    SocialAction String,
+    HTTPError UInt16,
+    SendTiming UInt32,
+    DNSTiming UInt32,
+    ConnectTiming UInt32,
+    ResponseStartTiming UInt32,
+    ResponseEndTiming UInt32,
+    FetchTiming UInt32,
+    SocialSourceNetworkID UInt8,
+    SocialSourcePage String,
+    ParamPrice Int64,
+    ParamOrderID String,
+    ParamCurrency String,
+    ParamCurrencyID UInt16,
+    OpenstatServiceName String,
+    OpenstatCampaignID String,
+    OpenstatAdID String,
+    OpenstatSourceID String,
+    UTMSource String,
+    UTMMedium String,
+    UTMCampaign String,
+    UTMContent String,
+    UTMTerm String,
+    FromTag String,
+    HasGCLID UInt8,
+    RefererHash Int64,
+    URLHash Int64,
+    CLID UInt32
+)
+ENGINE = MergeTree
+PARTITION BY toYYYYMM(EventDate)
+ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime, WatchID)
+SETTINGS index_granularity = 8192;
+
+-- Create Kafka engine table for consuming from Kafka
+CREATE TABLE IF NOT EXISTS hits_kafka
+(
+    WatchID Int64,
+    JavaEnable UInt8,
+    Title String,
+    GoodEvent Int16,
+    EventTime DateTime,
+    EventDate Date,
+    CounterID UInt32,
+    ClientIP Int32,
+    RegionID UInt32,
+    UserID Int64,
+    CounterClass Int8,
+    OS UInt8,
+    UserAgent UInt8,
+    URL String,
+    Referer String,
+    IsRefresh UInt8,
+    RefererCategoryID UInt16,
+    RefererRegionID UInt32,
+    URLCategoryID UInt16,
+    URLRegionID UInt32,
+    ResolutionWidth UInt16,
+    ResolutionHeight UInt16,
+    ResolutionDepth UInt8,
+    FlashMajor UInt8,
+    FlashMinor UInt8,
+    FlashMinor2 String,
+    NetMajor UInt8,
+    NetMinor UInt8,
+    UserAgentMajor UInt16,
+    UserAgentMinor String,
+    CookieEnable UInt8,
+    JavascriptEnable UInt8,
+    IsMobile UInt8,
+    MobilePhone UInt8,
+    MobilePhoneModel String,
+    Params String,
+    IPNetworkID UInt32,
+    TraficSourceID Int8,
+    SearchEngineID UInt16,
+    SearchPhrase String,
+    AdvEngineID UInt8,
+    IsArtifical UInt8,
+    WindowClientWidth UInt16,
+    WindowClientHeight UInt16,
+    ClientTimeZone Int16,
+    ClientEventTime DateTime,
+    SilverlightVersion1 UInt8,
+    SilverlightVersion2 UInt8,
+    SilverlightVersion3 UInt32,
+    SilverlightVersion4 UInt16,
+    PageCharset String,
+    CodeVersion UInt32,
+    IsLink UInt8,
+    IsDownload UInt8,
+    IsNotBounce UInt8,
+    FUniqID Int64,
+    OriginalURL String,
+    HID UInt32,
+    IsOldCounter UInt8,
+    IsEvent UInt8,
+    IsParameter UInt8,
+    DontCountHits UInt8,
+    WithHash UInt8,
+    HitColor String,
+    LocalEventTime DateTime,
+    Age UInt8,
+    Sex UInt8,
+    Income UInt8,
+    Interests UInt16,
+    Robotness UInt8,
+    RemoteIP Int32,
+    WindowName Int32,
+    OpenerName Int32,
+    HistoryLength Int16,
+    BrowserLanguage String,
+    BrowserCountry String,
+    SocialNetwork String,
+    SocialAction String,
+    HTTPError UInt16,
+    SendTiming UInt32,
+    DNSTiming UInt32,
+    ConnectTiming UInt32,
+    ResponseStartTiming UInt32,
+    ResponseEndTiming UInt32,
+    FetchTiming UInt32,
+    SocialSourceNetworkID UInt8,
+    SocialSourcePage String,
+    ParamPrice Int64,
+    ParamOrderID String,
+    ParamCurrency String,
+    ParamCurrencyID UInt16,
+    OpenstatServiceName String,
+    OpenstatCampaignID String,
+    OpenstatAdID String,
+    OpenstatSourceID String,
+    UTMSource String,
+    UTMMedium String,
+    UTMCampaign String,
+    UTMContent String,
+    UTMTerm String,
+    FromTag String,
+    HasGCLID UInt8,
+    RefererHash Int64,
+    URLHash Int64,
+    CLID UInt32
+)
+ENGINE = Kafka
+SETTINGS
+    kafka_broker_list = 'localhost:9092',
+    kafka_topic_list = 'hits',
+    kafka_group_name = 'clickhouse_hits_consumer',
+    kafka_format = 'JSONEachRow',
+    kafka_num_consumers = 1,
+    kafka_max_block_size = 65536;
+
+-- Create materialized view to move data from Kafka to MergeTree
+CREATE MATERIALIZED VIEW IF NOT EXISTS hits_mv TO hits AS
+SELECT
+    WatchID,
+    JavaEnable,
+    Title,
+    GoodEvent,
+    EventTime,
+    EventDate,
+    CounterID,
+    ClientIP,
+    RegionID,
+    UserID,
+    CounterClass,
+    OS,
+    UserAgent,
+    URL,
+    Referer,
+    IsRefresh,
+    RefererCategoryID,
+    RefererRegionID,
+    URLCategoryID,
+    URLRegionID,
+    ResolutionWidth,
+    ResolutionHeight,
+    ResolutionDepth,
+    FlashMajor,
+    FlashMinor,
+    FlashMinor2,
+    NetMajor,
+    NetMinor,
+    UserAgentMajor,
+    UserAgentMinor,
+    CookieEnable,
+    JavascriptEnable,
+    IsMobile,
+    MobilePhone,
+    MobilePhoneModel,
+    Params,
+    IPNetworkID,
+    TraficSourceID,
+    SearchEngineID,
+    SearchPhrase,
+    AdvEngineID,
+    IsArtifical,
+    WindowClientWidth,
+    WindowClientHeight,
+    ClientTimeZone,
+    ClientEventTime,
+    SilverlightVersion1,
+    SilverlightVersion2,
+    SilverlightVersion3,
+    SilverlightVersion4,
+    PageCharset,
+    CodeVersion,
+    IsLink,
+    IsDownload,
+    IsNotBounce,
+    FUniqID,
+    OriginalURL,
+    HID,
+    IsOldCounter,
+    IsEvent,
+    IsParameter,
+    DontCountHits,
+    WithHash,
+    HitColor,
+    LocalEventTime,
+    Age,
+    Sex,
+    Income,
+    Interests,
+    Robotness,
+    RemoteIP,
+    WindowName,
+    OpenerName,
+    HistoryLength,
+    BrowserLanguage,
+    BrowserCountry,
+    SocialNetwork,
+    SocialAction,
+    HTTPError,
+    SendTiming,
+    DNSTiming,
+    ConnectTiming,
+    ResponseStartTiming,
+    ResponseEndTiming,
+    FetchTiming,
+    SocialSourceNetworkID,
+    SocialSourcePage,
+    ParamPrice,
+    ParamOrderID,
+    ParamCurrency,
+    ParamCurrencyID,
+    OpenstatServiceName,
+    OpenstatCampaignID,
+    OpenstatAdID,
+    OpenstatSourceID,
+    UTMSource,
+    UTMMedium,
+    UTMCampaign,
+    UTMContent,
+    UTMTerm,
+    FromTag,
+    HasGCLID,
+    RefererHash,
+    URLHash,
+    CLID
+FROM hits_kafka;
\ No newline at end of file
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/clickhouse/h2o_init.sql b/ExecutionUtilities/clickhouse-benchmark-pipeline/clickhouse/h2o_init.sql
new file mode 100644
index 0000000..aa69de4
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/clickhouse/h2o_init.sql
@@ -0,0 +1,35 @@
+CREATE TABLE IF NOT EXISTS h2o_groupby_queue
+(
+    timestamp BIGINT,
+    id1 String,
+    id2 String,
+    id3 String,
+    id4 Int32,
+    id5 Int32,
+    id6 Int32,
+    v1 Int32,
+    v2 Int32,
+    v3 Float64
+) ENGINE = Kafka
+SETTINGS kafka_broker_list = 'localhost:9092',
+         kafka_topic_list = 'h2o_groupby',
+         kafka_group_name = 'clickhouse_h2o',
+         kafka_format = 'JSONEachRow';
+
+CREATE TABLE IF NOT EXISTS h2o_groupby
+(
+    timestamp BIGINT,
+    id1 String,
+    id2 String,
+    id3 String,
+    id4 Int32,
+    id5 Int32,
+    id6 Int32,
+    v1 Int32,
+    v2 Int32,
+    v3 Float64
+) ENGINE = MergeTree
+ORDER BY (id1, id2, id3, id4);
+
+CREATE MATERIALIZED VIEW IF NOT EXISTS h2o_groupby_mv TO h2o_groupby AS
+SELECT * FROM h2o_groupby_queue;
\ No newline at end of file
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/config.env b/ExecutionUtilities/clickhouse-benchmark-pipeline/config.env
new file mode 100644
index 0000000..0ca2c41
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/config.env
@@ -0,0 +1,40 @@
+# ClickHouse Benchmark Pipeline Configuration
+# Override any value by setting the environment variable before running scripts.
+
+PROJECT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Kafka
+KAFKA_BROKER=localhost:9092  # Native Kafka
+KAFKA_TOPIC=hits
+KAFKA_HOME="${PROJECT_DIR}/../../Utilities/installation/kafka/kafka"
+
+# ClickHouse
+CLICKHOUSE_HOST=localhost
+CLICKHOUSE_HTTP_PORT=8123
+
+# Data Generation
+DATA_BATCH_SIZE=50000
+DATA_FREQUENCY_SECONDS=0
+FAKE_NUM_USERS=10000
+FAKE_NUM_COUNTERS=100
+
+# ClickBench Data
+CLICKBENCH_URL=https://datasets.clickhouse.com/hits_compatible/hits.json.gz
+CLICKBENCH_FILENAME=hits.json.gz
+CLICKBENCH_DATA_DIR=clickbench_importer/data
+
+# Benchmark
+BENCHMARK_QUERIES_FILE=benchmark_queries.sql
+BENCHMARK_RESULTS_DIR=benchmark_results
+
+# H2O Benchmark
+H2O_DATA_URL="https://drive.google.com/uc?export=download&id=15SVQjQ2QehzYDLoDonio4aP7xqdMiNyi"
+H2O_FILENAME="G1_1e7_1e2_0_0.csv"
+H2O_KAFKA_TOPIC="h2o_groupby"
+
+# Elasticsearch Configuration
+ES_HOST=localhost
+ES_PORT=9200
+ES_INDEX_NAME=h2o_benchmark
+ES_BULK_SIZE=10000
+ES_API_KEY=your-api-key
\ No newline at end of file
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/Cargo.lock b/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/Cargo.lock
new file mode 100644
index 0000000..f5e3c48
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/Cargo.lock
@@ -0,0 +1,2478 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea"
+
+[[package]]
+name = "async-compression"
+version = "0.4.39"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68650b7df54f0293fd061972a0fb05aaf4fc0879d3b3d21a638a182c5c543b9f"
+dependencies = [
+ "compression-codecs",
+ "compression-core",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "base64"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7"
+
+[[package]]
+name = "base64"
+version = "0.21.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "bitflags"
+version = "2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
+
+[[package]]
+name = "bumpalo"
+version = "3.19.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510"
+
+[[package]]
+name = "bytes"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3"
+
+[[package]]
+name = "cc"
+version = "1.2.54"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583"
+dependencies = [
+ "find-msvc-tools",
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "chrono"
+version = "0.4.43"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118"
+dependencies = [
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "wasm-bindgen",
+ "windows-link",
+]
+
+[[package]]
+name = "clap"
+version = "4.5.54"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.54"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim 0.11.1",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.49"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.7.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32"
+
+[[package]]
+name = "cmake"
+version = "0.1.57"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "colorchoice"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+
+[[package]]
+name = "compression-codecs"
+version = "0.4.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "00828ba6fd27b45a448e57dbfe84f1029d4c9f26b368157e9a448a5f49a2ec2a"
+dependencies = [
+ "compression-core",
+ "flate2",
+ "memchr",
+]
+
+[[package]]
+name = "compression-core"
+version = "0.4.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d"
+
+[[package]]
+name = "console"
+version = "0.15.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
+dependencies = [
+ "encode_unicode",
+ "libc",
+ "once_cell",
+ "unicode-width",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "core-foundation"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "crc32fast"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "csv"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938"
+dependencies = [
+ "csv-core",
+ "itoa",
+ "ryu",
+ "serde_core",
+]
+
+[[package]]
+name = "csv-core"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "darling"
+version = "0.13.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a01d95850c592940db9b8194bc39f4bc0e89dee5c4265e4b1807c34a9aba453c"
+dependencies = [
+ "darling_core",
+ "darling_macro",
+]
+
+[[package]]
+name = "darling_core"
+version = "0.13.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "859d65a907b6852c9361e3185c862aae7fafd2887876799fa55f5f99dc40d610"
+dependencies = [
+ "fnv",
+ "ident_case",
+ "proc-macro2",
+ "quote",
+ "strsim 0.10.0",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "darling_macro"
+version = "0.13.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835"
+dependencies = [
+ "darling_core",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "data_exporter"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "clap",
+ "csv",
+ "elasticsearch",
+ "flate2",
+ "futures",
+ "indicatif",
+ "rand",
+ "rand_distr",
+ "rdkafka",
+ "serde",
+ "serde_json",
+ "tokio",
+]
+
+[[package]]
+name = "displaydoc"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+]
+
+[[package]]
+name = "dyn-clone"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
+
+[[package]]
+name = "elasticsearch"
+version = "8.5.0-alpha.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40d9bd57d914cc66ce878f098f63ed7b5d5b64c30644a5adb950b008f874a6c6"
+dependencies = [
+ "base64 0.11.0",
+ "bytes",
+ "dyn-clone",
+ "lazy_static",
+ "percent-encoding",
+ "reqwest",
+ "rustc_version",
+ "serde",
+ "serde_json",
+ "serde_with",
+ "url",
+ "void",
+]
+
+[[package]]
+name = "encode_unicode"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
+
+[[package]]
+name = "encoding_rs"
+version = "0.8.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "errno"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
+dependencies = [
+ "libc",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "fastrand"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+
+[[package]]
+name = "find-msvc-tools"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db"
+
+[[package]]
+name = "flate2"
+version = "1.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "foldhash"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+
+[[package]]
+name = "foreign-types"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
+dependencies = [
+ "foreign-types-shared",
+]
+
+[[package]]
+name = "foreign-types-shared"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
+
+[[package]]
+name = "form_urlencoded"
+version = "1.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf"
+dependencies = [
+ "percent-encoding",
+]
+
+[[package]]
+name = "futures"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
+
+[[package]]
+name = "futures-executor"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-io"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
+
+[[package]]
+name = "futures-macro"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
+
+[[package]]
+name = "futures-task"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
+
+[[package]]
+name = "futures-util"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "pin-utils",
+ "slab",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasip2",
+ "wasip3",
+]
+
+[[package]]
+name = "h2"
+version = "0.3.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d"
+dependencies = [
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "futures-util",
+ "http",
+ "indexmap",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+dependencies = [
+ "foldhash",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "http"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1"
+dependencies = [
+ "bytes",
+ "fnv",
+ "itoa",
+]
+
+[[package]]
+name = "http-body"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2"
+dependencies = [
+ "bytes",
+ "http",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "httparse"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
+
+[[package]]
+name = "httpdate"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
+
+[[package]]
+name = "hyper"
+version = "0.14.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7"
+dependencies = [
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "futures-util",
+ "h2",
+ "http",
+ "http-body",
+ "httparse",
+ "httpdate",
+ "itoa",
+ "pin-project-lite",
+ "socket2 0.5.10",
+ "tokio",
+ "tower-service",
+ "tracing",
+ "want",
+]
+
+[[package]]
+name = "hyper-tls"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905"
+dependencies = [
+ "bytes",
+ "hyper",
+ "native-tls",
+ "tokio",
+ "tokio-native-tls",
+]
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "icu_collections"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43"
+dependencies = [
+ "displaydoc",
+ "potential_utf",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locale_core"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6"
+dependencies = [
+ "displaydoc",
+ "litemap",
+ "tinystr",
+ "writeable",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599"
+dependencies = [
+ "icu_collections",
+ "icu_normalizer_data",
+ "icu_properties",
+ "icu_provider",
+ "smallvec",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer_data"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a"
+
+[[package]]
+name = "icu_properties"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec"
+dependencies = [
+ "icu_collections",
+ "icu_locale_core",
+ "icu_properties_data",
+ "icu_provider",
+ "zerotrie",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_properties_data"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af"
+
+[[package]]
+name = "icu_provider"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614"
+dependencies = [
+ "displaydoc",
+ "icu_locale_core",
+ "writeable",
+ "yoke",
+ "zerofrom",
+ "zerotrie",
+ "zerovec",
+]
+
+[[package]]
+name = "id-arena"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
+
+[[package]]
+name = "ident_case"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
+
+[[package]]
+name = "idna"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
+dependencies = [
+ "idna_adapter",
+ "smallvec",
+ "utf8_iter",
+]
+
+[[package]]
+name = "idna_adapter"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344"
+dependencies = [
+ "icu_normalizer",
+ "icu_properties",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.16.1",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "indicatif"
+version = "0.17.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
+dependencies = [
+ "console",
+ "number_prefix",
+ "portable-atomic",
+ "unicode-width",
+ "web-time",
+]
+
+[[package]]
+name = "ipnet"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
+
+[[package]]
+name = "itoa"
+version = "1.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
+
+[[package]]
+name = "js-sys"
+version = "0.3.85"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "leb128fmt"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
+
+[[package]]
+name = "libc"
+version = "0.2.180"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
+
+[[package]]
+name = "libm"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
+
+[[package]]
+name = "libz-sys"
+version = "1.1.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15d118bbf3771060e7311cc7bb0545b01d08a8b4a7de949198dec1fa0ca1c0f7"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
+
+[[package]]
+name = "litemap"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
+
+[[package]]
+name = "memchr"
+version = "2.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
+
+[[package]]
+name = "mime"
+version = "0.3.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+ "simd-adler32",
+]
+
+[[package]]
+name = "mio"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "native-tls"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e"
+dependencies = [
+ "libc",
+ "log",
+ "openssl",
+ "openssl-probe",
+ "openssl-sys",
+ "schannel",
+ "security-framework",
+ "security-framework-sys",
+ "tempfile",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+ "libm",
+]
+
+[[package]]
+name = "num_enum"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c"
+dependencies = [
+ "num_enum_derive",
+ "rustversion",
+]
+
+[[package]]
+name = "num_enum_derive"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7"
+dependencies = [
+ "proc-macro-crate",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+]
+
+[[package]]
+name = "number_prefix"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
+
+[[package]]
+name = "openssl"
+version = "0.10.75"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328"
+dependencies = [
+ "bitflags 2.10.0",
+ "cfg-if",
+ "foreign-types",
+ "libc",
+ "once_cell",
+ "openssl-macros",
+ "openssl-sys",
+]
+
+[[package]]
+name = "openssl-macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+]
+
+[[package]]
+name = "openssl-probe"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
+
+[[package]]
+name = "openssl-sys"
+version = "0.9.111"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "percent-encoding"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
+
+[[package]]
+name = "portable-atomic"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950"
+
+[[package]]
+name = "potential_utf"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77"
+dependencies = [
+ "zerovec",
+]
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "prettyplease"
+version = "0.2.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
+dependencies = [
+ "proc-macro2",
+ "syn 2.0.114",
+]
+
+[[package]]
+name = "proc-macro-crate"
+version = "3.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983"
+dependencies = [
+ "toml_edit",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom 0.2.17",
+]
+
+[[package]]
+name = "rand_distr"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
+dependencies = [
+ "num-traits",
+ "rand",
+]
+
+[[package]]
+name = "rdkafka"
+version = "0.36.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1beea247b9a7600a81d4cc33f659ce1a77e1988323d7d2809c7ed1c21f4c316d"
+dependencies = [
+ "futures-channel",
+ "futures-util",
+ "libc",
+ "log",
+ "rdkafka-sys",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "slab",
+ "tokio",
+]
+
+[[package]]
+name = "rdkafka-sys"
+version = "4.9.0+2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5230dca48bc354d718269f3e4353280e188b610f7af7e2fcf54b7a79d5802872"
+dependencies = [
+ "cmake",
+ "libc",
+ "libz-sys",
+ "num_enum",
+ "pkg-config",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags 2.10.0",
+]
+
+[[package]]
+name = "reqwest"
+version = "0.11.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62"
+dependencies = [
+ "async-compression",
+ "base64 0.21.7",
+ "bytes",
+ "encoding_rs",
+ "futures-core",
+ "futures-util",
+ "h2",
+ "http",
+ "http-body",
+ "hyper",
+ "hyper-tls",
+ "ipnet",
+ "js-sys",
+ "log",
+ "mime",
+ "native-tls",
+ "once_cell",
+ "percent-encoding",
+ "pin-project-lite",
+ "rustls-pemfile",
+ "serde",
+ "serde_json",
+ "serde_urlencoded",
+ "sync_wrapper",
+ "system-configuration",
+ "tokio",
+ "tokio-native-tls",
+ "tokio-util",
+ "tower-service",
+ "url",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "web-sys",
+ "winreg",
+]
+
+[[package]]
+name = "rustc_version"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
+dependencies = [
+ "semver 0.9.0",
+]
+
+[[package]]
+name = "rustix"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34"
+dependencies = [
+ "bitflags 2.10.0",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "rustls-pemfile"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c"
+dependencies = [
+ "base64 0.21.7",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "ryu"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
+
+[[package]]
+name = "schannel"
+version = "0.1.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "security-framework"
+version = "2.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
+dependencies = [
+ "bitflags 2.10.0",
+ "core-foundation",
+ "core-foundation-sys",
+ "libc",
+ "security-framework-sys",
+]
+
+[[package]]
+name = "security-framework-sys"
+version = "2.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "semver"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
+dependencies = [
+ "semver-parser",
+]
+
+[[package]]
+name = "semver"
+version = "1.0.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
+
+[[package]]
+name = "semver-parser"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.149"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+dependencies = [
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
+name = "serde_urlencoded"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
+dependencies = [
+ "form_urlencoded",
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "serde_with"
+version = "1.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "678b5a069e50bf00ecd22d0cd8ddf7c236f68581b03db652061ed5eb13a312ff"
+dependencies = [
+ "serde",
+ "serde_with_macros",
+]
+
+[[package]]
+name = "serde_with_macros"
+version = "1.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e182d6ec6f05393cc0e5ed1bf81ad6db3a8feedf8ee515ecdd369809bcce8082"
+dependencies = [
+ "darling",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b"
+dependencies = [
+ "errno",
+ "libc",
+]
+
+[[package]]
+name = "simd-adler32"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
+
+[[package]]
+name = "slab"
+version = "0.4.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "socket2"
+version = "0.5.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "socket2"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0"
+dependencies = [
+ "libc",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
+
+[[package]]
+name = "strsim"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.114"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "sync_wrapper"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
+
+[[package]]
+name = "synstructure"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+]
+
+[[package]]
+name = "system-configuration"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7"
+dependencies = [
+ "bitflags 1.3.2",
+ "core-foundation",
+ "system-configuration-sys",
+]
+
+[[package]]
+name = "system-configuration-sys"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "tempfile"
+version = "3.25.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1"
+dependencies = [
+ "fastrand",
+ "getrandom 0.4.1",
+ "once_cell",
+ "rustix",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "tinystr"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869"
+dependencies = [
+ "displaydoc",
+ "zerovec",
+]
+
+[[package]]
+name = "tokio"
+version = "1.49.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86"
+dependencies = [
+ "bytes",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "socket2 0.6.2",
+ "tokio-macros",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+]
+
+[[package]]
+name = "tokio-native-tls"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
+dependencies = [
+ "native-tls",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-util"
+version = "0.7.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "futures-sink",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "toml_datetime"
+version = "0.7.5+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "toml_edit"
+version = "0.23.10+spec-1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269"
+dependencies = [
+ "indexmap",
+ "toml_datetime",
+ "toml_parser",
+ "winnow",
+]
+
+[[package]]
+name = "toml_parser"
+version = "1.0.6+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44"
+dependencies = [
+ "winnow",
+]
+
+[[package]]
+name = "tower-service"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
+
+[[package]]
+name = "tracing"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
+dependencies = [
+ "pin-project-lite",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
+dependencies = [
+ "once_cell",
+]
+
+[[package]]
+name = "try-lock"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
+
+[[package]]
+name = "unicode-width"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
+
+[[package]]
+name = "url"
+version = "2.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed"
+dependencies = [
+ "form_urlencoded",
+ "idna",
+ "percent-encoding",
+ "serde",
+]
+
+[[package]]
+name = "utf8_iter"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
+[[package]]
+name = "void"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
+
+[[package]]
+name = "want"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
+dependencies = [
+ "try-lock",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasip2"
+version = "1.0.2+wasi-0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasip3"
+version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-futures"
+version = "0.4.58"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f"
+dependencies = [
+ "cfg-if",
+ "futures-util",
+ "js-sys",
+ "once_cell",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55"
+dependencies = [
+ "bumpalo",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "wasm-encoder"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319"
+dependencies = [
+ "leb128fmt",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasm-metadata"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
+dependencies = [
+ "anyhow",
+ "indexmap",
+ "wasm-encoder",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasmparser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
+dependencies = [
+ "bitflags 2.10.0",
+ "hashbrown 0.15.5",
+ "indexmap",
+ "semver 1.0.27",
+]
+
+[[package]]
+name = "web-sys"
+version = "0.3.85"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "web-time"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.62.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-result"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
+dependencies = [
+ "windows-targets 0.48.5",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.5",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
+dependencies = [
+ "windows_aarch64_gnullvm 0.48.5",
+ "windows_aarch64_msvc 0.48.5",
+ "windows_i686_gnu 0.48.5",
+ "windows_i686_msvc 0.48.5",
+ "windows_x86_64_gnu 0.48.5",
+ "windows_x86_64_gnullvm 0.48.5",
+ "windows_x86_64_msvc 0.48.5",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm 0.53.1",
+ "windows_aarch64_msvc 0.53.1",
+ "windows_i686_gnu 0.53.1",
+ "windows_i686_gnullvm 0.53.1",
+ "windows_i686_msvc 0.53.1",
+ "windows_x86_64_gnu 0.53.1",
+ "windows_x86_64_gnullvm 0.53.1",
+ "windows_x86_64_msvc 0.53.1",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
+
+[[package]]
+name = "winnow"
+version = "0.7.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "winreg"
+version = "0.50.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
+dependencies = [
+ "cfg-if",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "wit-bindgen"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
+dependencies = [
+ "wit-bindgen-rust-macro",
+]
+
+[[package]]
+name = "wit-bindgen-core"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc"
+dependencies = [
+ "anyhow",
+ "heck",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-bindgen-rust"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
+dependencies = [
+ "anyhow",
+ "heck",
+ "indexmap",
+ "prettyplease",
+ "syn 2.0.114",
+ "wasm-metadata",
+ "wit-bindgen-core",
+ "wit-component",
+]
+
+[[package]]
+name = "wit-bindgen-rust-macro"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a"
+dependencies = [
+ "anyhow",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+ "wit-bindgen-core",
+ "wit-bindgen-rust",
+]
+
+[[package]]
+name = "wit-component"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
+dependencies = [
+ "anyhow",
+ "bitflags 2.10.0",
+ "indexmap",
+ "log",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "wasm-encoder",
+ "wasm-metadata",
+ "wasmparser",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-parser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
+dependencies = [
+ "anyhow",
+ "id-arena",
+ "indexmap",
+ "log",
+ "semver 1.0.27",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "unicode-xid",
+ "wasmparser",
+]
+
+[[package]]
+name = "writeable"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
+
+[[package]]
+name = "yoke"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954"
+dependencies = [
+ "stable_deref_trait",
+ "yoke-derive",
+ "zerofrom",
+]
+
+[[package]]
+name = "yoke-derive"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+ "synstructure",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+]
+
+[[package]]
+name = "zerofrom"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
+dependencies = [
+ "zerofrom-derive",
+]
+
+[[package]]
+name = "zerofrom-derive"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+ "synstructure",
+]
+
+[[package]]
+name = "zerotrie"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
+]
+
+[[package]]
+name = "zerovec"
+version = "0.11.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002"
+dependencies = [
+ "yoke",
+ "zerofrom",
+ "zerovec-derive",
+]
+
+[[package]]
+name = "zerovec-derive"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+]
+
+[[package]]
+name = "zmij"
+version = "1.0.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfcd145825aace48cff44a8844de64bf75feec3080e0aa5cdbde72961ae51a65"
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/Cargo.toml b/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/Cargo.toml
new file mode 100644
index 0000000..5107663
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/Cargo.toml
@@ -0,0 +1,26 @@
+[package]
+name = "data_exporter"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+clap = { version = "4", features = ["derive", "env"] }
+csv = "1.3"
+flate2 = "1"
+rand = { version = "0.8", features = ["small_rng"] }
+rand_distr = "0.4"
+rdkafka = { version = "0.36", features = ["cmake-build"] }
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+tokio = { version = "1", features = ["full"] }
+chrono = "0.4"
+futures = "0.3"
+indicatif = "0.17"
+
+# Elasticsearch support
+elasticsearch = "8.5.0-alpha.1"
+
+[profile.release]
+opt-level = 3
+lto = true
+codegen-units = 1
\ No newline at end of file
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/src/main.rs b/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/src/main.rs
new file mode 100644
index 0000000..58c63f7
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/src/main.rs
@@ -0,0 +1,1075 @@
+use clap::{Parser, ValueEnum};
+use elasticsearch::{http::transport::Transport, BulkParts, Elasticsearch};
+use flate2::read::GzDecoder;
+use futures::future::join_all;
+use indicatif::{ProgressBar, ProgressStyle};
+use rand::rngs::SmallRng;
+use rand::SeedableRng;
+use rand_distr::{Distribution, Uniform};
+use rdkafka::config::ClientConfig;
+use rdkafka::producer::{FutureProducer, FutureRecord};
+use serde::{Deserialize, Serialize};
+use serde_json::json;
+use std::fs::File;
+use std::io::{BufRead, BufReader};
+use std::path::PathBuf;
+use std::time::Duration;
+use tokio::time::sleep;
+
+const RNG_SEED: u64 = 42;
+
+// Sample data for realistic values
+const SEARCH_PHRASES: &[&str] = &[
+    "", "", "", "", "", // Most are empty
+    "clickhouse benchmark",
+    "analytics database",
+    "olap performance",
+    "data warehouse",
+    "columnar storage",
+    "real-time analytics",
+    "big data processing",
+];
+
+const URLS: &[&str] = &[
+    "https://example.com/",
+    "https://example.com/products",
+    "https://example.com/about",
+    "https://google.com/search?q=test",
+    "https://example.com/blog/analytics",
+    "https://news.example.com/article/123",
+];
+
+const REFERERS: &[&str] = &[
+    "",
+    "https://google.com/",
+    "https://bing.com/",
+    "https://duckduckgo.com/",
+    "https://example.com/",
+];
+
+const MOBILE_PHONE_MODELS: &[&str] = &[
+    "", "", "", // Most are empty (desktop)
+    "iPhone",
+    "Samsung Galaxy",
+    "Pixel",
+    "OnePlus",
+];
+
+const TITLES: &[&str] = &[
+    "Home Page",
+    "Product Catalog",
+    "About Us",
+    "Contact",
+    "Blog - Analytics Tips",
+    "Google Search Results",
+    "Dashboard",
+];
+
+const BROWSER_LANGUAGES: &[&str] = &["en", "de", "fr", "es", "ru", "zh", "jp"];
+const BROWSER_COUNTRIES: &[&str] = &["US", "DE", "FR", "GB", "RU", "CN", "JP"];
+
+/// ClickBench-compatible hits record
+/// Schema from: https://github.com/ClickHouse/ClickBench
+#[derive(Serialize)]
+struct HitsRecord {
+    #[serde(rename = "WatchID")]
+    watch_id: u64,
+    #[serde(rename = "JavaEnable")]
+    java_enable: u8,
+    #[serde(rename = "Title")]
+    title: String,
+    #[serde(rename = "GoodEvent")]
+    good_event: i16,
+    #[serde(rename = "EventTime")]
+    event_time: String,
+    #[serde(rename = "EventDate")]
+    event_date: String,
+    #[serde(rename = "CounterID")]
+    counter_id: u32,
+    #[serde(rename = "ClientIP")]
+    client_ip: u32,
+    #[serde(rename = "RegionID")]
+    region_id: u32,
+    #[serde(rename = "UserID")]
+    user_id: u64,
+    #[serde(rename = "CounterClass")]
+    counter_class: i8,
+    #[serde(rename = "OS")]
+    os: u8,
+    #[serde(rename = "UserAgent")]
+    user_agent: u8,
+    #[serde(rename = "URL")]
+    url: String,
+    #[serde(rename = "Referer")]
+    referer: String,
+    #[serde(rename = "IsRefresh")]
+    is_refresh: u8,
+    #[serde(rename = "RefererCategoryID")]
+    referer_category_id: u16,
+    #[serde(rename = "RefererRegionID")]
+    referer_region_id: u32,
+    #[serde(rename = "URLCategoryID")]
+    url_category_id: u16,
+    #[serde(rename = "URLRegionID")]
+    url_region_id: u32,
+    #[serde(rename = "ResolutionWidth")]
+    resolution_width: u16,
+    #[serde(rename = "ResolutionHeight")]
+    resolution_height: u16,
+    #[serde(rename = "ResolutionDepth")]
+    resolution_depth: u8,
+    #[serde(rename = "FlashMajor")]
+    flash_major: u8,
+    #[serde(rename = "FlashMinor")]
+    flash_minor: u8,
+    #[serde(rename = "FlashMinor2")]
+    flash_minor2: String,
+    #[serde(rename = "NetMajor")]
+    net_major: u8,
+    #[serde(rename = "NetMinor")]
+    net_minor: u8,
+    #[serde(rename = "UserAgentMajor")]
+    user_agent_major: u16,
+    #[serde(rename = "UserAgentMinor")]
+    user_agent_minor: String,
+    #[serde(rename = "CookieEnable")]
+    cookie_enable: u8,
+    #[serde(rename = "JavascriptEnable")]
+    javascript_enable: u8,
+    #[serde(rename = "IsMobile")]
+    is_mobile: u8,
+    #[serde(rename = "MobilePhone")]
+    mobile_phone: u8,
+    #[serde(rename = "MobilePhoneModel")]
+    mobile_phone_model: String,
+    #[serde(rename = "Params")]
+    params: String,
+    #[serde(rename = "IPNetworkID")]
+    ip_network_id: u32,
+    #[serde(rename = "TraficSourceID")]
+    trafic_source_id: i8,
+    #[serde(rename = "SearchEngineID")]
+    search_engine_id: u16,
+    #[serde(rename = "SearchPhrase")]
+    search_phrase: String,
+    #[serde(rename = "AdvEngineID")]
+    adv_engine_id: u8,
+    #[serde(rename = "IsArtifical")]
+    is_artifical: u8,
+    #[serde(rename = "WindowClientWidth")]
+    window_client_width: u16,
+    #[serde(rename = "WindowClientHeight")]
+    window_client_height: u16,
+    #[serde(rename = "ClientTimeZone")]
+    client_time_zone: i16,
+    #[serde(rename = "ClientEventTime")]
+    client_event_time: String,
+    #[serde(rename = "SilverlightVersion1")]
+    silverlight_version1: u8,
+    #[serde(rename = "SilverlightVersion2")]
+    silverlight_version2: u8,
+    #[serde(rename = "SilverlightVersion3")]
+    silverlight_version3: u32,
+    #[serde(rename = "SilverlightVersion4")]
+    silverlight_version4: u16,
+    #[serde(rename = "PageCharset")]
+    page_charset: String,
+    #[serde(rename = "CodeVersion")]
+    code_version: u32,
+    #[serde(rename = "IsLink")]
+    is_link: u8,
+    #[serde(rename = "IsDownload")]
+    is_download: u8,
+    #[serde(rename = "IsNotBounce")]
+    is_not_bounce: u8,
+    #[serde(rename = "FUniqID")]
+    f_uniq_id: u64,
+    #[serde(rename = "OriginalURL")]
+    original_url: String,
+    #[serde(rename = "HID")]
+    hid: u32,
+    #[serde(rename = "IsOldCounter")]
+    is_old_counter: u8,
+    #[serde(rename = "IsEvent")]
+    is_event: u8,
+    #[serde(rename = "IsParameter")]
+    is_parameter: u8,
+    #[serde(rename = "DontCountHits")]
+    dont_count_hits: u8,
+    #[serde(rename = "WithHash")]
+    with_hash: u8,
+    #[serde(rename = "HitColor")]
+    hit_color: String,
+    #[serde(rename = "LocalEventTime")]
+    local_event_time: String,
+    #[serde(rename = "Age")]
+    age: u8,
+    #[serde(rename = "Sex")]
+    sex: u8,
+    #[serde(rename = "Income")]
+    income: u8,
+    #[serde(rename = "Interests")]
+    interests: u16,
+    #[serde(rename = "Robotness")]
+    robotness: u8,
+    #[serde(rename = "RemoteIP")]
+    remote_ip: u32,
+    #[serde(rename = "WindowName")]
+    window_name: i32,
+    #[serde(rename = "OpenerName")]
+    opener_name: i32,
+    #[serde(rename = "HistoryLength")]
+    history_length: i16,
+    #[serde(rename = "BrowserLanguage")]
+    browser_language: String,
+    #[serde(rename = "BrowserCountry")]
+    browser_country: String,
+    #[serde(rename = "SocialNetwork")]
+    social_network: String,
+    #[serde(rename = "SocialAction")]
+    social_action: String,
+    #[serde(rename = "HTTPError")]
+    http_error: u16,
+    #[serde(rename = "SendTiming")]
+    send_timing: u32,
+    #[serde(rename = "DNSTiming")]
+    dns_timing: u32,
+    #[serde(rename = "ConnectTiming")]
+    connect_timing: u32,
+    #[serde(rename = "ResponseStartTiming")]
+    response_start_timing: u32,
+    #[serde(rename = "ResponseEndTiming")]
+    response_end_timing: u32,
+    #[serde(rename = "FetchTiming")]
+    fetch_timing: u32,
+    #[serde(rename = "SocialSourceNetworkID")]
+    social_source_network_id: u8,
+    #[serde(rename = "SocialSourcePage")]
+    social_source_page: String,
+    #[serde(rename = "ParamPrice")]
+    param_price: i64,
+    #[serde(rename = "ParamOrderID")]
+    param_order_id: String,
+    #[serde(rename = "ParamCurrency")]
+    param_currency: String,
+    #[serde(rename = "ParamCurrencyID")]
+    param_currency_id: u16,
+    #[serde(rename = "OpenstatServiceName")]
+    openstat_service_name: String,
+    #[serde(rename = "OpenstatCampaignID")]
+    openstat_campaign_id: String,
+    #[serde(rename = "OpenstatAdID")]
+    openstat_ad_id: String,
+    #[serde(rename = "OpenstatSourceID")]
+    openstat_source_id: String,
+    #[serde(rename = "UTMSource")]
+    utm_source: String,
+    #[serde(rename = "UTMMedium")]
+    utm_medium: String,
+    #[serde(rename = "UTMCampaign")]
+    utm_campaign: String,
+    #[serde(rename = "UTMContent")]
+    utm_content: String,
+    #[serde(rename = "UTMTerm")]
+    utm_term: String,
+    #[serde(rename = "FromTag")]
+    from_tag: String,
+    #[serde(rename = "HasGCLID")]
+    has_gclid: u8,
+    #[serde(rename = "RefererHash")]
+    referer_hash: u64,
+    #[serde(rename = "URLHash")]
+    url_hash: u64,
+    #[serde(rename = "CLID")]
+    clid: u32,
+}
+
+// H2O Row Structure
+#[derive(Debug, Deserialize, Serialize)]
+struct H2oRow {
+    id1: String,
+    id2: String,
+    id3: String,
+    id4: i32,
+    id5: i32,
+    id6: i32,
+    v1: i32,
+    v2: i32,
+    v3: f64,
+}
+
+// H2O Elasticsearch Document (with timestamp)
+#[derive(Debug, Serialize)]
+struct H2oEsDoc {
+    timestamp: i64,
+    id1: String,
+    id2: String,
+    id3: String,
+    id4: i32,
+    id5: i32,
+    id6: i32,
+    v1: i32,
+    v2: i32,
+    v3: f64,
+}
+
+fn random_choice<'a, T>(items: &'a [T], rng: &mut SmallRng) -> &'a T {
+    let dist = Uniform::new(0, items.len());
+    &items[dist.sample(rng)]
+}
+
+fn generate_hits_record(
+    rng: &mut SmallRng,
+    watch_id_counter: &mut u64,
+    user_ids: &[u64],
+    counter_ids: &[u32],
+) -> HitsRecord {
+    let now = chrono::Utc::now();
+    let event_time = now.format("%Y-%m-%d %H:%M:%S").to_string();
+    let event_date = now.format("%Y-%m-%d").to_string();
+
+    let dist_bool = Uniform::new(0u8, 2);
+    let dist_u8 = Uniform::new(0u8, 100);
+    let dist_u16 = Uniform::new(0u16, 10000);
+    let dist_u32 = Uniform::new(0u32, 100000);
+    let dist_u64 = Uniform::new(0u64, 1000000);
+    let dist_region = Uniform::new(1u32, 250);
+    let dist_resolution_w = Uniform::new(320u16, 2560);
+    let dist_resolution_h = Uniform::new(240u16, 1440);
+    let dist_age = Uniform::new(0u8, 100);
+    let dist_timing = Uniform::new(0u32, 5000);
+
+    *watch_id_counter += 1;
+    let watch_id = *watch_id_counter;
+
+    let user_id = *random_choice(user_ids, rng);
+    let counter_id = *random_choice(counter_ids, rng);
+    let url = random_choice(URLS, rng).to_string();
+    let referer = random_choice(REFERERS, rng).to_string();
+    let search_phrase = random_choice(SEARCH_PHRASES, rng).to_string();
+    let mobile_phone_model = random_choice(MOBILE_PHONE_MODELS, rng).to_string();
+    let is_mobile = if mobile_phone_model.is_empty() { 0 } else { 1 };
+    let title = random_choice(TITLES, rng).to_string();
+
+    let url_hash = url.bytes().fold(0u64, |acc, b| acc.wrapping_mul(31).wrapping_add(b as u64));
+    let referer_hash = referer.bytes().fold(0u64, |acc, b| acc.wrapping_mul(31).wrapping_add(b as u64));
+
+    HitsRecord {
+        watch_id,
+        java_enable: dist_bool.sample(rng),
+        title,
+        good_event: 1,
+        event_time: event_time.clone(),
+        event_date,
+        counter_id,
+        client_ip: dist_u32.sample(rng),
+        region_id: dist_region.sample(rng),
+        user_id,
+        counter_class: (dist_bool.sample(rng) as i8),
+        os: dist_u8.sample(rng) % 20,
+        user_agent: dist_u8.sample(rng) % 50,
+        url,
+        referer,
+        is_refresh: dist_bool.sample(rng),
+        referer_category_id: dist_u16.sample(rng) % 20,
+        referer_region_id: dist_region.sample(rng),
+        url_category_id: dist_u16.sample(rng) % 20,
+        url_region_id: dist_region.sample(rng),
+        resolution_width: dist_resolution_w.sample(rng),
+        resolution_height: dist_resolution_h.sample(rng),
+        resolution_depth: 24,
+        flash_major: dist_u8.sample(rng) % 20,
+        flash_minor: dist_u8.sample(rng) % 10,
+        flash_minor2: String::new(),
+        net_major: dist_u8.sample(rng) % 5,
+        net_minor: dist_u8.sample(rng) % 10,
+        user_agent_major: dist_u16.sample(rng) % 100,
+        user_agent_minor: String::new(),
+        cookie_enable: 1,
+        javascript_enable: 1,
+        is_mobile,
+        mobile_phone: if is_mobile == 1 { dist_u8.sample(rng) % 10 } else { 0 },
+        mobile_phone_model,
+        params: String::new(),
+        ip_network_id: dist_u32.sample(rng),
+        trafic_source_id: ((dist_u8.sample(rng) % 10) as i8) - 1,
+        search_engine_id: dist_u16.sample(rng) % 30,
+        search_phrase,
+        adv_engine_id: dist_u8.sample(rng) % 30,
+        is_artifical: 0,
+        window_client_width: dist_resolution_w.sample(rng),
+        window_client_height: dist_resolution_h.sample(rng),
+        client_time_zone: ((dist_u8.sample(rng) % 24) as i16) - 12,
+        client_event_time: event_time.clone(),
+        silverlight_version1: 0,
+        silverlight_version2: 0,
+        silverlight_version3: 0,
+        silverlight_version4: 0,
+        page_charset: "UTF-8".to_string(),
+        code_version: dist_u32.sample(rng) % 1000,
+        is_link: dist_bool.sample(rng),
+        is_download: dist_bool.sample(rng),
+        is_not_bounce: dist_bool.sample(rng),
+        f_uniq_id: dist_u64.sample(rng),
+        original_url: String::new(),
+        hid: dist_u32.sample(rng),
+        is_old_counter: 0,
+        is_event: dist_bool.sample(rng),
+        is_parameter: 0,
+        dont_count_hits: dist_bool.sample(rng),
+        with_hash: 0,
+        hit_color: "E".to_string(),
+        local_event_time: event_time,
+        age: dist_age.sample(rng),
+        sex: dist_bool.sample(rng),
+        income: dist_u8.sample(rng) % 5,
+        interests: dist_u16.sample(rng),
+        robotness: dist_bool.sample(rng),
+        remote_ip: dist_u32.sample(rng),
+        window_name: 0,
+        opener_name: 0,
+        history_length: (dist_u8.sample(rng) % 20) as i16,
+        browser_language: random_choice(BROWSER_LANGUAGES, rng).to_string(),
+        browser_country: random_choice(BROWSER_COUNTRIES, rng).to_string(),
+        social_network: String::new(),
+        social_action: String::new(),
+        http_error: 0,
+        send_timing: dist_timing.sample(rng),
+        dns_timing: dist_timing.sample(rng) % 100,
+        connect_timing: dist_timing.sample(rng) % 200,
+        response_start_timing: dist_timing.sample(rng),
+        response_end_timing: dist_timing.sample(rng),
+        fetch_timing: dist_timing.sample(rng),
+        social_source_network_id: 0,
+        social_source_page: String::new(),
+        param_price: 0,
+        param_order_id: String::new(),
+        param_currency: String::new(),
+        param_currency_id: 0,
+        openstat_service_name: String::new(),
+        openstat_campaign_id: String::new(),
+        openstat_ad_id: String::new(),
+        openstat_source_id: String::new(),
+        utm_source: String::new(),
+        utm_medium: String::new(),
+        utm_campaign: String::new(),
+        utm_content: String::new(),
+        utm_term: String::new(),
+        from_tag: String::new(),
+        has_gclid: 0,
+        referer_hash,
+        url_hash,
+        clid: 0,
+    }
+}
+
+#[derive(Clone, ValueEnum, Debug)]
+enum Mode {
+    /// Generate synthetic fake data and send to Kafka
+    Fake,
+    /// Read ClickBench JSON data from file and send to Kafka
+    Clickbench,
+    /// Read H2O CSV data from file and send to Kafka
+    H2o,
+    /// Read H2O CSV data from file and send to Elasticsearch
+    H2oElasticsearch,
+}
+
+#[derive(Parser)]
+#[command(name = "data_exporter")]
+#[command(about = "ClickBench-compatible data exporter to Kafka (fake, clickbench, or h2o data)")]
+struct Args {
+    #[arg(long, value_enum, env = "DATA_MODE", help = "Data source mode (fake, clickbench, or h2o)")]
+    mode: Mode,
+
+    #[arg(long, env = "KAFKA_BROKER", help = "Kafka broker address")]
+    kafka_broker: Option<String>,
+
+    #[arg(long, env = "KAFKA_TOPIC", help = "Kafka topic name")]
+    kafka_topic: Option<String>,
+
+    #[arg(long, env = "DATA_BATCH_SIZE", help = "Number of records per batch")]
+    batch_size: usize,
+
+    #[arg(long, env = "DATA_FREQUENCY_SECONDS", help = "Seconds between batches (fake mode only)")]
+    frequency: u64,
+
+    #[arg(long, env = "FAKE_NUM_USERS", help = "Number of unique users (fake mode only)")]
+    num_users: usize,
+
+    #[arg(long, env = "FAKE_NUM_COUNTERS", help = "Number of unique counters (fake mode only)")]
+    num_counters: usize,
+
+    #[arg(long, env = "DEBUG_PRINT", default_value = "false", help = "Print records to console")]
+    debug_print: bool,
+
+    #[arg(long, env = "TOTAL_RECORDS", help = "Total records to generate/send (0 = infinite/all)")]
+    total_records: Option<u64>,
+
+    #[arg(long, env = "CLICKBENCH_FILE", help = "Path to hits.json or hits.json.gz (clickbench mode)")]
+    input_file: Option<String>,
+
+    #[arg(long, env = "INPUT_FILE", help = "Path to input file (h2o/general usage)")]
+    general_input_file: Option<PathBuf>,
+
+    #[arg(
+        long,
+        env = "ELASTIC_HOST",
+        default_value = "localhost",
+        help = "Elasticsearch host"
+    )]
+    elastic_host: String,
+
+    #[arg(
+        long,
+        env = "ELASTIC_PORT",
+        default_value = "9200",
+        help = "Elasticsearch port"
+    )]
+    elastic_port: u16,
+
+    #[arg(
+        long,
+        env = "ELASTIC_INDEX_NAME",
+        default_value = "h2o_benchmark",
+        help = "Elasticsearch index name"
+    )]
+    elastic_index: String,
+
+    #[arg(
+        long,
+        env = "ELASTIC_API_KEY",
+        help = "Elasticsearch API key (optional)"
+    )]
+    elastic_api_key: Option<String>,
+}
+
+async fn run_fake_mode(args: &Args, producer: &FutureProducer) -> Result<(), Box<dyn std::error::Error>> {
+    let mut rng = SmallRng::seed_from_u64(RNG_SEED);
+    let user_dist = Uniform::new(1u64, u64::MAX / 2);
+    let counter_dist = Uniform::new(1u32, 1000);
+
+    let user_ids: Vec<u64> = (0..args.num_users)
+        .map(|_| user_dist.sample(&mut rng))
+        .collect();
+
+    let mut counter_ids: Vec<u32> = (0..args.num_counters)
+        .map(|_| counter_dist.sample(&mut rng))
+        .collect();
+    counter_ids.push(62); // Required for ClickBench queries 37-43
+
+    println!(
+        "Generated {} unique users and {} unique counters",
+        user_ids.len(),
+        counter_ids.len()
+    );
+    println!(
+        "Generating {} records per batch every {} second(s)",
+        args.batch_size, args.frequency
+    );
+
+    let mut watch_id_counter: u64 = 0;
+    let mut total_sent: u64 = 0;
+
+    loop {
+        for _ in 0..args.batch_size {
+            let record = generate_hits_record(&mut rng, &mut watch_id_counter, &user_ids, &counter_ids);
+            let record_str = serde_json::to_string(&record)?;
+
+            if args.debug_print {
+                println!("{}", record_str);
+            }
+
+            let delivery_status = producer
+                .send(
+                    FutureRecord::to(&args.kafka_topic.as_ref().unwrap())
+                        .payload(&record_str)
+                        .key(&watch_id_counter.to_string()),
+                    Duration::from_secs(0),
+                )
+                .await;
+
+            if let Err((err, _)) = delivery_status {
+                eprintln!("Failed to send message to Kafka: {}", err);
+            }
+
+            total_sent += 1;
+
+            if let Some(limit) = args.total_records {
+                if limit > 0 && total_sent >= limit {
+                    println!("Reached target of {} records. Exiting.", limit);
+                    return Ok(());
+                }
+            }
+        }
+
+        println!("Sent batch. Total records: {}", total_sent);
+
+        if args.total_records.map_or(true, |l| l == 0 || total_sent < l) {
+            sleep(Duration::from_secs(args.frequency)).await;
+        }
+    }
+}
+
+async fn run_clickbench_mode(args: &Args, producer: &FutureProducer) -> Result<(), Box<dyn std::error::Error>> {
+    let input_file = args.input_file.as_deref()
+        .ok_or("--input-file is required for clickbench mode")?;
+
+    println!("Reading ClickBench data from: {}", input_file);
+
+    let file = File::open(input_file)?;
+    let reader: Box<dyn BufRead> = if input_file.ends_with(".gz") {
+        Box::new(BufReader::new(GzDecoder::new(file)))
+    } else {
+        Box::new(BufReader::new(file))
+    };
+
+    let mut total_sent: u64 = 0;
+    let total_limit = args.total_records.unwrap_or(0);
+    let mut batch: Vec<(String, String)> = Vec::with_capacity(args.batch_size); // (key, payload)
+
+    // ClickBench dataset has ~100M rows
+    let total_rows = if total_limit > 0 { total_limit } else { 99_997_497 };
+    let pb = ProgressBar::new(total_rows);
+    pb.set_style(ProgressStyle::default_bar()
+        .template("{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({percent}%) {per_sec} ETA: {eta}")?
+        .progress_chars("#>-"));
+
+    for line_result in reader.lines() {
+        let line = match line_result {
+            Ok(l) => l,
+            Err(e) => {
+                eprintln!("Warning: Error reading line: {}", e);
+                continue;
+            }
+        };
+
+        let line = line.trim();
+        if line.is_empty() {
+            continue;
+        }
+
+        let key = (total_sent + batch.len() as u64).to_string();
+        batch.push((key, line.to_string()));
+
+        if batch.len() >= args.batch_size {
+            let futures: Vec<_> = batch
+                .iter()
+                .map(|(key, payload)| {
+                    producer.send(
+                        FutureRecord::to(&args.kafka_topic.as_ref().unwrap())
+                            .payload(payload)
+                            .key(key),
+                        Duration::from_secs(5),
+                    )
+                })
+                .collect();
+
+            let results = join_all(futures).await;
+            for result in results {
+                if let Err((err, _)) = result {
+                    eprintln!("Failed to send message to Kafka: {}", err);
+                }
+            }
+
+            total_sent += batch.len() as u64;
+            pb.set_position(total_sent);
+            batch.clear();
+
+            if total_limit > 0 && total_sent >= total_limit {
+                break;
+            }
+
+            if args.frequency > 0 {
+                sleep(Duration::from_secs(args.frequency)).await;
+            }
+        }
+    }
+
+    // Send remaining records
+    if !batch.is_empty() && (total_limit == 0 || total_sent < total_limit) {
+        let futures: Vec<_> = batch
+            .iter()
+            .map(|(key, payload)| {
+                producer.send(
+                    FutureRecord::to(&args.kafka_topic.as_ref().unwrap())
+                        .payload(payload)
+                        .key(key),
+                    Duration::from_secs(5),
+                )
+            })
+            .collect();
+
+        let results = join_all(futures).await;
+        for result in results {
+            if let Err((err, _)) = result {
+                eprintln!("Failed to send message to Kafka: {}", err);
+            }
+        }
+        total_sent += batch.len() as u64;
+        pb.set_position(total_sent);
+    }
+
+    pb.finish_with_message(format!("Done! Sent {} records", total_sent));
+    Ok(())
+}
+
+async fn run_h2o_mode(args: &Args, producer: &FutureProducer) -> Result<(), Box<dyn std::error::Error>> {
+    // 1. Handle Input File Selection safely
+    let file_path = if let Some(path) = &args.general_input_file {
+        path.clone()
+    } else if let Some(path_str) = &args.input_file {
+        PathBuf::from(path_str)
+    } else {
+        panic!("Input file required for H2O mode (use --input-file or --clickbench-file)");
+    };
+
+    println!("Reading H2O data from: {:?}", file_path);
+    let file = File::open(file_path)?;
+    let reader = BufReader::new(file);
+
+    let mut batch = Vec::with_capacity(args.batch_size);
+    let mut total_sent = 0;
+
+    let pb = ProgressBar::new_spinner();
+    pb.set_style(ProgressStyle::default_spinner().template("{spinner:.green} [{elapsed_precise}] {msg}")?);
+
+    for line in reader.lines() {
+        let line = line?;
+        if line.is_empty() || line.starts_with("id1") { continue; } // Skip header
+
+        let cols: Vec<&str> = line.split(',').collect();
+        if cols.len() < 9 { continue; }
+
+        let row = H2oRow {
+            id1: cols[0].to_string(),
+            id2: cols[1].to_string(),
+            id3: cols[2].to_string(),
+            id4: cols[3].parse().unwrap_or(0),
+            id5: cols[4].parse().unwrap_or(0),
+            id6: cols[5].parse().unwrap_or(0),
+            v1: cols[6].parse().unwrap_or(0),
+            v2: cols[7].parse().unwrap_or(0),
+            v3: cols[8].parse().unwrap_or(0.0),
+        };
+
+        let payload = serde_json::to_string(&row)?;
+        batch.push(payload);
+
+        // SEND BATCH
+        if batch.len() >= args.batch_size {
+            let mut futures = Vec::with_capacity(batch.len());
+
+            for payload in batch.iter() {
+                let future = producer.send(
+                    FutureRecord::to(&args.kafka_topic.as_ref().unwrap()).payload(payload).key(""),
+                    Duration::from_secs(5),
+                );
+                futures.push(future);
+            }
+
+            // Wait for all sends to complete
+            join_all(futures).await;
+
+            // NOW it is safe to clear the batch, as futures are done
+            batch.clear();
+
+            total_sent += args.batch_size;
+            pb.set_message(format!("Sent {} records", total_sent));
+
+            if args.frequency > 0 {
+                sleep(Duration::from_secs(args.frequency)).await;
+            }
+        }
+    }
+
+    // SEND REMAINING
+    if !batch.is_empty() {
+        let mut futures = Vec::with_capacity(batch.len());
+        let count = batch.len();
+
+        for payload in batch.iter() {
+            let future = producer.send(
+                FutureRecord::to(&args.kafka_topic.as_ref().unwrap()).payload(payload).key(""),
+                Duration::from_secs(5),
+            );
+            futures.push(future);
+        }
+
+        join_all(futures).await;
+        batch.clear();
+
+        total_sent += count;
+    }
+
+    pb.finish_with_message(format!("Done! Sent {} H2O records", total_sent));
+    Ok(())
+}
+
+async fn run_h2o_elasticsearch_mode(args: &Args) -> Result<(), Box<dyn std::error::Error>> {
+    let file_path = if let Some(path) = &args.general_input_file {
+        path.clone()
+    } else if let Some(path_str) = &args.input_file {
+        PathBuf::from(path_str)
+    } else {
+        return Err("Input file required for H2O Elasticsearch mode".into());
+    };
+
+    println!("Reading H2O data from: {:?}", file_path);
+
+    // Connect to Elasticsearch with optional API key
+    let elastic_url = format!("http://{}:{}", args.elastic_host, args.elastic_port);
+
+    let transport = if let Some(api_key) = &args.elastic_api_key {
+        println!("Using API key authentication");
+        use elasticsearch::http::headers::HeaderMap;
+        use elasticsearch::http::headers::HeaderValue;
+        use elasticsearch::http::transport::TransportBuilder;
+
+        let mut headers = HeaderMap::new();
+        headers.insert(
+            "Authorization",
+            HeaderValue::from_str(&format!("ApiKey {}", api_key))?,
+        );
+
+        TransportBuilder::new(
+            elasticsearch::http::transport::SingleNodeConnectionPool::new(elastic_url.parse()?),
+        )
+        .headers(headers)
+        .build()?
+    } else {
+        println!("No API key provided, connecting without authentication");
+        Transport::single_node(&elastic_url)?
+    };
+
+    let client = Elasticsearch::new(transport);
+
+    println!("Connected to Elasticsearch at {}", elastic_url);
+
+    // Check if index exists, create if not
+    let index_exists = client
+        .indices()
+        .exists(elasticsearch::indices::IndicesExistsParts::Index(&[
+            &args.elastic_index
+        ]))
+        .send()
+        .await?
+        .status_code()
+        .is_success();
+
+    if !index_exists {
+        println!("Creating index: {}", args.elastic_index);
+        let create_response = client
+            .indices()
+            .create(elasticsearch::indices::IndicesCreateParts::Index(
+                &args.elastic_index,
+            ))
+            .body(json!({
+                "settings": {
+                    "number_of_shards": 1,
+                    "number_of_replicas": 0,
+                    "refresh_interval": "30s"
+                },
+                "mappings": {
+                    "properties": {
+                        "timestamp": {"type": "date", "format": "epoch_millis"},
+                        "id1": {"type": "keyword"},
+                        "id2": {"type": "keyword"},
+                        "id3": {"type": "keyword"},
+                        "id4": {"type": "long"},
+                        "id5": {"type": "long"},
+                        "id6": {"type": "long"},
+                        "v1": {"type": "long"},
+                        "v2": {"type": "long"},
+                        "v3": {"type": "double"}
+                    }
+                }
+            }))
+            .send()
+            .await?;
+
+        if !create_response.status_code().is_success() {
+            let error_text = create_response.text().await?;
+            eprintln!("Failed to create index. Error response: {}", error_text);
+            return Err("Failed to create index".into());
+        }
+        println!("Index created successfully");
+    } else {
+        println!(
+            "Index {} already exists, skipping creation",
+            args.elastic_index
+        );
+    }
+
+    let file = File::open(file_path)?;
+    let reader = BufReader::new(file);
+
+    let mut batch = Vec::with_capacity(args.batch_size);
+    let mut total_sent = 0u64;
+    let mut row_num = 0i64;
+
+    let base_timestamp =
+        chrono::DateTime::parse_from_rfc3339("2024-01-01T00:00:00Z")?.timestamp_millis();
+
+    let pb = ProgressBar::new_spinner();
+    pb.set_style(
+        ProgressStyle::default_spinner().template("{spinner:.green} [{elapsed_precise}] {msg}")?,
+    );
+
+    for line in reader.lines() {
+        let line = line?;
+        if line.is_empty() || line.starts_with("id1") {
+            continue;
+        }
+
+        let cols: Vec<&str> = line.split(',').collect();
+        if cols.len() < 9 {
+            continue;
+        }
+
+        // Create document with timestamp
+        let doc = H2oEsDoc {
+            timestamp: base_timestamp + (row_num * 1000), // Increment by 1 second per row
+            id1: cols[0].to_string(),
+            id2: cols[1].to_string(),
+            id3: cols[2].to_string(),
+            id4: cols[3].parse().unwrap_or(0),
+            id5: cols[4].parse().unwrap_or(0),
+            id6: cols[5].parse().unwrap_or(0),
+            v1: cols[6].parse().unwrap_or(0),
+            v2: cols[7].parse().unwrap_or(0),
+            v3: cols[8].parse().unwrap_or(0.0),
+        };
+
+        batch.push(serde_json::to_value(&doc)?);
+        row_num += 1;
+
+        // Send batch when full
+        if batch.len() >= args.batch_size {
+            let mut body: Vec<String> = Vec::with_capacity(batch.len() * 2);
+
+            for doc in &batch {
+                body.push(serde_json::to_string(&json!({"index": {}}))?);
+                body.push(serde_json::to_string(&doc)?);
+            }
+
+            let response = client
+                .bulk(BulkParts::Index(&args.elastic_index))
+                .body(body)
+                .send()
+                .await?;
+
+            if !response.status_code().is_success() {
+                eprintln!("Bulk indexing error: {:?}", response.text().await?);
+            }
+
+            total_sent += batch.len() as u64;
+            batch.clear();
+
+            pb.set_message(format!("Indexed {} documents", total_sent));
+
+            if args.frequency > 0 {
+                sleep(Duration::from_secs(args.frequency)).await;
+            }
+
+            // Check limit
+            if let Some(limit) = args.total_records {
+                if limit > 0 && total_sent >= limit {
+                    break;
+                }
+            }
+        }
+    }
+
+    // Send remaining documents
+    if !batch.is_empty() {
+        let mut body: Vec<String> = Vec::with_capacity(batch.len() * 2);
+
+        for doc in &batch {
+            body.push(serde_json::to_string(&json!({"index": {}}))?);
+            body.push(serde_json::to_string(&doc)?);
+        }
+
+        let response = client
+            .bulk(BulkParts::Index(&args.elastic_index))
+            .body(body)
+            .send()
+            .await?;
+
+        if !response.status_code().is_success() {
+            eprintln!("Bulk indexing error: {:?}", response.text().await?);
+        }
+
+        total_sent += batch.len() as u64;
+    }
+
+    // Refresh index
+    println!("Refreshing index...");
+    client
+        .indices()
+        .refresh(elasticsearch::indices::IndicesRefreshParts::Index(&[
+            &args.elastic_index
+        ]))
+        .send()
+        .await?;
+
+    pb.finish_with_message(format!(
+        "Done! Indexed {} H2O documents to Elasticsearch",
+        total_sent
+    ));
+    Ok(())
+}
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let args = Args::parse();
+
+    match args.mode {
+        Mode::Fake | Mode::Clickbench | Mode::H2o => {
+            // Kafka modes - require broker and topic
+            let kafka_broker = args
+                .kafka_broker
+                .as_ref()
+                .ok_or("--kafka-broker required for Kafka modes")?;
+            let kafka_topic = args
+                .kafka_topic
+                .as_ref()
+                .ok_or("--kafka-topic required for Kafka modes")?;
+
+            let producer: FutureProducer = ClientConfig::new()
+                .set("bootstrap.servers", kafka_broker)
+                .set("message.timeout.ms", "30000")
+                .set("queue.buffering.max.messages", "100000")
+                .set("batch.num.messages", "1000")
+                .create()
+                .expect("Failed to create Kafka producer");
+
+            println!(
+                "Connected to Kafka broker: {}, topic: {}",
+                kafka_broker, kafka_topic
+            );
+
+            match args.mode {
+                Mode::Fake => {
+                    println!("Mode: fake (generating synthetic data)");
+                    run_fake_mode(&args, &producer).await
+                }
+                Mode::Clickbench => {
+                    println!("Mode: clickbench (reading from file)");
+                    run_clickbench_mode(&args, &producer).await
+                }
+                Mode::H2o => {
+                    println!("Mode: h2o (reading from file)");
+                    run_h2o_mode(&args, &producer).await
+                }
+                Mode::H2oElasticsearch => {
+                    panic!("Invalid mode after setting up Kafka broker and topic");
+                }
+            }
+        }
+        Mode::H2oElasticsearch => {
+            println!("Mode: h2o-elasticsearch (direct to Elasticsearch)");
+            run_h2o_elasticsearch_mode(&args).await
+        }
+    }
+}
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/h2o_elastic_queries.sql b/ExecutionUtilities/clickhouse-benchmark-pipeline/h2o_elastic_queries.sql
new file mode 100644
index 0000000..90e493a
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/h2o_elastic_queries.sql
@@ -0,0 +1,29 @@
+-- Q1: Sum v1 by id1
+SELECT id1, sum(v1) AS v1 FROM "h2o_benchmark" GROUP BY id1 ORDER BY id1;
+
+-- Q2: Sum v1 by id1:id2
+SELECT id1, id2, sum(v1) AS v1 FROM "h2o_benchmark" GROUP BY id1, id2 ORDER BY id1, id2;
+
+-- Q3: Sum v1 mean v3 by id3
+SELECT id3, sum(v1) AS v1, avg(v3) AS v3 FROM "h2o_benchmark" GROUP BY id3 ORDER BY id3;
+
+-- Q4: Mean v1:v3 by id4
+SELECT id4, avg(v1) AS v1, avg(v2) AS v2, avg(v3) AS v3 FROM "h2o_benchmark" GROUP BY id4 ORDER BY id4;
+
+-- Q5: Sum v1:v3 by id6
+SELECT id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 FROM "h2o_benchmark" GROUP BY id6 ORDER BY id6;
+
+-- Q6: Median v3 sd v3 by id4 id5
+SELECT id4, id5, PERCENTILE(v3, 50) AS median_v3, STDDEV_SAMP(v3) AS sd_v3 FROM "h2o_benchmark" GROUP BY id4, id5 ORDER BY id4, id5;
+
+-- Q7: Max v1 - min v2 by id3
+SELECT id3, max(v1) - min(v2) AS range_v1_v2 FROM "h2o_benchmark" GROUP BY id3 ORDER BY id3;
+
+-- Q8: Largest two v3 by id6 (Elasticsearch SQL doesn't support LIMIT BY)
+SELECT id6, v3 FROM "h2o_benchmark" ORDER BY v3 DESC LIMIT 20;
+
+-- Q9: Count rows
+SELECT id2, id4, COUNT(*) as count FROM "h2o_benchmark" GROUP BY id2, id4 ORDER BY id2, id4;
+
+-- Q10: Sum v3 count by id1:id6
+SELECT id1, id6, sum(v3) AS v3, count(*) AS count FROM "h2o_benchmark" GROUP BY id1, id6 ORDER BY id1, id6;
\ No newline at end of file
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/h2o_queries.sql b/ExecutionUtilities/clickhouse-benchmark-pipeline/h2o_queries.sql
new file mode 100644
index 0000000..1d29987
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/h2o_queries.sql
@@ -0,0 +1,29 @@
+-- Q1: Sum v1 by id1
+SELECT id1, sum(v1) AS v1 FROM h2o_groupby GROUP BY id1 ORDER BY id1;
+
+-- Q2: Sum v1 by id1:id2
+SELECT id1, id2, sum(v1) AS v1 FROM h2o_groupby GROUP BY id1, id2 ORDER BY id1, id2;
+
+-- Q3: Sum v1 mean v3 by id3
+SELECT id3, sum(v1) AS v1, avg(v3) AS v3 FROM h2o_groupby GROUP BY id3 ORDER BY id3;
+
+-- Q4: Mean v1:v3 by id4
+SELECT id4, avg(v1) AS v1, avg(v2) AS v2, avg(v3) AS v3 FROM h2o_groupby GROUP BY id4 ORDER BY id4;
+
+-- Q5: Sum v1:v3 by id6
+SELECT id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 FROM h2o_groupby GROUP BY id6 ORDER BY id6;
+
+-- Q6: Median v3 sd v3 by id4 id5
+SELECT id4, id5, median(v3) AS median_v3, stddevSamp(v3) AS sd_v3 FROM h2o_groupby GROUP BY id4, id5 ORDER BY id4, id5;
+
+-- Q7: Max v1 - min v2 by id3
+SELECT id3, max(v1) - min(v2) AS range_v1_v2 FROM h2o_groupby GROUP BY id3 ORDER BY id3;
+
+-- Q8: Largest two v3 by id6
+SELECT id6, v3 FROM h2o_groupby ORDER BY v3 DESC LIMIT 2 BY id6;
+
+-- Q9: Regression v1 v2 by id2 id4 (Approximation using corr for benchmark simplicity or skip)
+SELECT id2, id4, corr(v1, v2) FROM h2o_groupby GROUP BY id2, id4 ORDER BY id2, id4;
+
+-- Q10: Sum v3 count by id1:id6
+SELECT id1, id6, sum(v3) AS v3, count(*) AS count FROM h2o_groupby GROUP BY id1, id6 ORDER BY id1, id6;
\ No newline at end of file
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/check_data.sh b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/check_data.sh
new file mode 100755
index 0000000..c8a2498
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/check_data.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# Check data ingestion status
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_DIR="${SCRIPT_DIR}/.."
+
+# Load config
+set -a
+source "${PROJECT_DIR}/config.env"
+set +a
+
+echo "=== ClickHouse Data Status ==="
+echo ""
+
+# Count total rows
+echo "Total rows in hits table:"
+curl -s "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/" \
+    --data-binary "SELECT count() FROM hits FORMAT Pretty"
+echo ""
+
+# Check recent data
+echo "Most recent records:"
+curl -s "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/" \
+    --data-binary "SELECT EventTime, CounterID, UserID, URL FROM hits ORDER BY EventTime DESC LIMIT 5 FORMAT Pretty"
+echo ""
+
+# Table size
+echo "Table size:"
+curl -s "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/" \
+    --data-binary "SELECT formatReadableSize(sum(bytes)) as size, count() as parts FROM system.parts WHERE table = 'hits' AND active FORMAT Pretty"
+echo ""
+
+# Kafka consumer lag (if available)
+echo "Kafka consumer status:"
+curl -s "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/" \
+    --data-binary "SELECT * FROM system.kafka_consumers FORMAT Pretty" 2>/dev/null || echo "No Kafka consumer info available"
+
+echo "=== H2O Data Status ==="
+curl -s "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/" \
+    --data-binary "SELECT count() as h2o_rows FROM h2o_groupby FORMAT Pretty"
+echo ""
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/check_elastic_data.sh b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/check_elastic_data.sh
new file mode 100755
index 0000000..06a740d
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/check_elastic_data.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+# Check Elasticsearch data ingestion status for H2O benchmark
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_DIR="${SCRIPT_DIR}/.."
+
+# Load config
+set -a
+source "${PROJECT_DIR}/config.env"
+set +a
+
+AUTH=(-H "Authorization: ApiKey ${ES_API_KEY}")
+BASE="http://${ES_HOST}:${ES_PORT}"
+
+echo "=== Elasticsearch Data Status ==="
+echo ""
+
+# Check connection
+if ! curl -s "${AUTH[@]}" "${BASE}/" > /dev/null; then
+    echo "Error: Cannot connect to Elasticsearch at ${ES_HOST}:${ES_PORT}"
+    exit 1
+fi
+
+# Total document count
+echo "Total documents in ${ES_INDEX_NAME} index:"
+curl -s "${AUTH[@]}" "${BASE}/${ES_INDEX_NAME}/_count" | \
+    python3 -c "import sys, json; print(json.load(sys.stdin)['count'])"
+echo ""
+
+# Index stats
+echo "Index statistics:"
+curl -s "${AUTH[@]}" "${BASE}/${ES_INDEX_NAME}/_stats" | \
+    python3 -c "
+import sys, json
+stats = json.load(sys.stdin)
+idx_stats = stats['indices']['${ES_INDEX_NAME}']['primaries']
+print(f\"  Total size: {idx_stats['store']['size_in_bytes'] / (1024*1024*1024):.2f} GB\")
+print(f\"  Document count: {idx_stats['docs']['count']:,}\")
+print(f\"  Deleted docs: {idx_stats['docs']['deleted']:,}\")
+"
+echo ""
+
+# Sample documents
+echo "Sample documents (first 5 by timestamp):"
+curl -s "${AUTH[@]}" -X POST "${BASE}/${ES_INDEX_NAME}/_search" \
+    -H 'Content-Type: application/json' \
+    -d '{
+      "size": 5,
+      "sort": [{"timestamp": "asc"}],
+      "_source": ["timestamp", "id1", "id2", "id3", "v1", "v2", "v3"]
+    }' | \
+    python3 -c "
+import sys, json
+from datetime import datetime
+results = json.load(sys.stdin)
+for hit in results['hits']['hits']:
+    doc = hit['_source']
+    ts = datetime.fromtimestamp(doc['timestamp'] / 1000).strftime('%Y-%m-%d %H:%M:%S')
+    print(f\"  {ts} | id1={doc['id1']} id2={doc['id2']} id3={doc['id3']} | v1={doc['v1']} v2={doc['v2']} v3={doc['v3']}\")
+"
+echo ""
+
+# Aggregation test
+echo "Sample aggregation (count by id1):"
+curl -s "${AUTH[@]}" -X POST "${BASE}/${ES_INDEX_NAME}/_search" \
+    -H 'Content-Type: application/json' \
+    -d '{
+      "size": 0,
+      "aggs": {
+        "by_id1": {
+          "terms": {"field": "id1", "size": 5}
+        }
+      }
+    }' | \
+    python3 -c "
+import sys, json
+results = json.load(sys.stdin)
+for bucket in results['aggregations']['by_id1']['buckets']:
+    print(f\"  {bucket['key']}: {bucket['doc_count']:,} documents\")
+"
+echo ""
\ No newline at end of file
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/generate_data.sh b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/generate_data.sh
new file mode 100755
index 0000000..df145e2
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/generate_data.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+# Data generation script with multiple data source modes
+#
+# Usage:
+#   DATA_MODE=fake ./scripts/generate_data.sh
+#   DATA_MODE=clickbench ./scripts/generate_data.sh
+#   DATA_MODE=h2o ./scripts/generate_data.sh
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_DIR="${SCRIPT_DIR}/.."
+
+# Load config
+set -a
+source "${PROJECT_DIR}/config.env"
+set +a
+
+# Define Directories
+IMPORTER_DIR="${PROJECT_DIR}/benchmark_importer"
+DATA_DIR="${IMPORTER_DIR}/data"
+
+# Hardcoded Kafka Path
+KAFKA_HOME="${PROJECT_DIR}/../../Utilities/installation/kafka/kafka"
+KAFKA_BIN="${KAFKA_HOME}/bin"
+
+# Check Mode
+if [ -z "${DATA_MODE}" ]; then
+    echo "Error: DATA_MODE is required"
+    echo "Usage: DATA_MODE=fake|clickbench|h2o $0"
+    exit 1
+fi
+
+echo "Data generation mode: ${DATA_MODE}"
+echo "Kafka broker: ${KAFKA_BROKER}"
+
+# 1. Build Rust Binary (Common for all modes)
+echo "Building data_exporter..."
+cd "${PROJECT_DIR}/data_exporter"
+if [ ! -f target/release/data_exporter ]; then
+    cargo build --release
+fi
+cd "${PROJECT_DIR}"
+
+case "${DATA_MODE}" in
+    fake)
+        # Synthetic Data Generation
+        # Note: We don't strictly need to create the topic here as the producer will auto-create it,
+        # but if you wanted to enforce partitions, you would use ${KAFKA_BIN}/kafka-topics.sh here.
+        
+        EXTRA_ARGS=()
+        if [ -n "${TOTAL_RECORDS}" ] && [ "${TOTAL_RECORDS}" -gt 0 ]; then
+            EXTRA_ARGS+=(--total-records "${TOTAL_RECORDS}")
+        fi
+
+        ./data_exporter/target/release/data_exporter \
+            --mode fake \
+            --kafka-broker "${KAFKA_BROKER}" \
+            --kafka-topic "${KAFKA_TOPIC}" \
+            "${EXTRA_ARGS[@]}"
+        ;;
+
+    clickbench)
+        # Real ClickBench Data
+        echo "Ensuring ClickBench data exists..."
+        
+        # Call the Python downloader
+        python3 "${IMPORTER_DIR}/download_data.py" \
+            --output-dir "${DATA_DIR}"
+
+        FILE_NAME="hits.json" 
+        # Check for .gz if .json doesn't exist
+        if [ ! -f "${DATA_DIR}/${FILE_NAME}" ] && [ -f "${DATA_DIR}/hits.json.gz" ]; then
+            FILE_NAME="hits.json.gz"
+        fi
+
+        echo "Ingesting ${FILE_NAME} to Kafka..."
+        ./data_exporter/target/release/data_exporter \
+            --mode clickbench \
+            --clickbench-file "${DATA_DIR}/${FILE_NAME}" \
+            --kafka-broker "${KAFKA_BROKER}" \
+            --kafka-topic "${KAFKA_TOPIC}"
+        ;;
+
+    h2o)
+        # H2O Benchmark Data
+        H2O_TOPIC=${H2O_KAFKA_TOPIC:-h2o_groupby}
+        H2O_FILE="G1_1e7_1e2_0_0.csv"
+        
+        # 1. Ensure Topic Exists using hardcoded path
+        echo "Ensuring topic '${H2O_TOPIC}' exists..."
+        
+        if [ -x "${KAFKA_BIN}/kafka-topics.sh" ]; then
+            "${KAFKA_BIN}/kafka-topics.sh" --create --if-not-exists \
+                --topic "${H2O_TOPIC}" \
+                --bootstrap-server "${KAFKA_BROKER}" \
+                --partitions 1 --replication-factor 1 2>/dev/null || true
+        else
+            echo "Warning: ${KAFKA_BIN}/kafka-topics.sh not found. Skipping explicit topic creation."
+        fi
+
+        # 2. Download Data
+        echo "Ensuring H2O data exists..."
+        
+        # Check and install gdown if missing (required for H2O download)
+        if ! python3 -c "import gdown" 2>/dev/null; then
+            echo "Installing python dependency: gdown..."
+            pip install gdown
+        fi
+
+        python3 "${IMPORTER_DIR}/download_h2o_data.py" \
+            --output-dir "${DATA_DIR}"
+
+        # 3. Ingest Data
+        echo "Ingesting ${H2O_FILE} to Kafka topic ${H2O_TOPIC}..."
+        ./data_exporter/target/release/data_exporter \
+            --mode h2o \
+            --input-file "${DATA_DIR}/${H2O_FILE}" \
+            --kafka-broker "${KAFKA_BROKER}" \
+            --kafka-topic "${H2O_TOPIC}"
+        ;;
+
+    *)
+        echo "Error: Unknown DATA_MODE '${DATA_MODE}'"
+        exit 1
+        ;;
+esac
\ No newline at end of file
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/init_clickhouse.sh b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/init_clickhouse.sh
new file mode 100755
index 0000000..e6fe485
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/init_clickhouse.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_DIR="${SCRIPT_DIR}/.."
+
+# Load config
+set -a
+source "${PROJECT_DIR}/config.env"
+set +a
+
+# --- CONFIGURATION ---
+CH_INSTALL_DIR="${PROJECT_DIR}/../../Utilities/installation/clickhouse/clickhouse"
+CLICKHOUSE_BIN="${CH_INSTALL_DIR}/clickhouse"
+
+# Set Defaults if config variables are missing
+CH_HOST="${CLICKHOUSE_HOST:-127.0.0.1}"
+CH_PORT="${CLICKHOUSE_PORT:-9000}"
+
+# Check binary
+if [ ! -f "${CLICKHOUSE_BIN}" ]; then
+    echo "Error: ClickHouse binary not found at ${CLICKHOUSE_BIN}"
+    exit 1
+fi
+
+# Select Mode
+if [ "${DATA_MODE}" == "fake" ] || [ "${DATA_MODE}" == "clickbench" ]; then
+    echo "Mode: ClickBench (Real or Fake)"
+    SQL_FILE="${PROJECT_DIR}/clickhouse/schema.sql"
+elif [ "${DATA_MODE}" == "h2o" ]; then
+    echo "Mode: H2O Benchmark"
+    SQL_FILE="${PROJECT_DIR}/clickhouse/h2o_init.sql"
+else
+    echo "Error: Unknown DATA_MODE '${DATA_MODE}'"
+    exit 1
+fi
+
+echo "Initializing ClickHouse tables using ${SQL_FILE}..."
+echo "Connecting to ${CH_HOST}:${CH_PORT}..."
+
+# Execute SQL (Using --flag=value syntax to prevent parsing errors)
+"${CLICKHOUSE_BIN}" client \
+    --host="${CH_HOST}" \
+    --port="${CH_PORT}" \
+    --multiquery < "${SQL_FILE}"
+
+echo "ClickHouse initialization complete."
\ No newline at end of file
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/init_elastic.sh b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/init_elastic.sh
new file mode 100755
index 0000000..5867228
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/init_elastic.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+# Load H2O data directly into Elasticsearch using Rust data_exporter
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_DIR="${SCRIPT_DIR}/.."
+
+# Load config
+set -a
+source "${PROJECT_DIR}/config.env"
+set +a
+
+H2O_FILE="${PROJECT_DIR}/${H2O_DATA_DIR}/${H2O_FILENAME}"
+
+echo "=== H2O → Elasticsearch Direct Import (Rust) ==="
+echo ""
+
+# Check if H2O data file exists
+if [ ! -f "${H2O_FILE}" ]; then
+    echo "H2O data file not found at ${H2O_FILE}"
+    echo "Downloading H2O dataset..."
+    cd "${PROJECT_DIR}/benchmark_importer"
+    python3 download_h2o_data.py --output-dir "${PROJECT_DIR}/${H2O_DATA_DIR}"
+    cd "${SCRIPT_DIR}"
+    
+    if [ ! -f "${H2O_FILE}" ]; then
+        echo "Error: Failed to download H2O data"
+        exit 1
+    fi
+fi
+
+FILE_SIZE=$(du -h "${H2O_FILE}" | cut -f1)
+echo "Found H2O data file: ${H2O_FILE} (${FILE_SIZE})"
+echo ""
+
+# Check Elasticsearch connection
+echo "Checking Elasticsearch connection at ${ES_HOST}:${ES_PORT}..."
+if ! curl -s "http://${ES_HOST}:${ES_PORT}/" > /dev/null; then
+    echo "Error: Cannot connect to Elasticsearch at ${ES_HOST}:${ES_PORT}"
+    echo "Please ensure Elasticsearch is running"
+    exit 1
+fi
+
+ES_VERSION=$(curl -s "http://${ES_HOST}:${ES_PORT}/" | grep -o '"number" : "[^"]*"' | head -1 | sed 's/.*: "\(.*\)"/\1/')
+echo "Connected to Elasticsearch version: ${ES_VERSION}"
+echo ""
+
+# Build the data_exporter binary if needed
+cd "${PROJECT_DIR}/data_exporter"
+if [ ! -f target/release/data_exporter ]; then
+    echo "Building data_exporter..."
+    cargo build --release
+fi
+
+echo ""
+echo "Importing H2O data into Elasticsearch..."
+echo "This may take several minutes..."
+echo ""
+
+EXTRA_ARGS=()
+if [ -n "${TOTAL_RECORDS}" ] && [ "${TOTAL_RECORDS}" -gt 0 ]; then
+    EXTRA_ARGS+=(--total-records "${TOTAL_RECORDS}")
+fi
+
+./target/release/data_exporter \
+    --mode h2o-elasticsearch \
+    --input-file "${H2O_FILE}" \
+    --elastic-host "${ES_HOST}" \
+    --elastic-port "${ES_PORT}" \
+    --elastic-index "${ES_INDEX_NAME}" \
+    --elastic-api-key "${ES_API_KEY}" \
+    --batch-size "${ES_BULK_SIZE}" \
+    "${EXTRA_ARGS[@]}"
+
+if [ $? -eq 0 ]; then
+    echo ""
+    echo "✓ Import complete!"
+    echo ""
+    echo "Index: ${ES_INDEX_NAME}"
+else
+    echo ""
+    echo "Error: Failed to import data into Elasticsearch"
+    exit 1
+fi
diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/run_benchmark.sh b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/run_benchmark.sh
new file mode 100755
index 0000000..f3c431f
--- /dev/null
+++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/run_benchmark.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# ClickBench Benchmark Runner
+# Reads queries from benchmark_queries.sql and reports timing
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_DIR="${SCRIPT_DIR}/.."
+
+# Load config
+set -a
+source "${PROJECT_DIR}/config.env"
+set +a
+
+OUTPUT_DIR="${PROJECT_DIR}/${BENCHMARK_RESULTS_DIR}"
+
+mkdir -p "$OUTPUT_DIR"
+
+TIMESTAMP=$(date +%Y%m%d_%H%M%S)
+
+if [ "$1" == "h2o" ]; then
+    QUERIES_FILE="${PROJECT_DIR}/h2o_queries.sql"
+    RESULTS_FILE="${OUTPUT_DIR}/benchmark_h2o_${TIMESTAMP}.csv"
+    DATABASE_TYPE="clickhouse"
+    echo "Running H2O Benchmark on ClickHouse..."
+elif [ "$1" == "h2o_elastic" ]; then
+    QUERIES_FILE="${PROJECT_DIR}/h2o_elastic_queries.sql"
+    RESULTS_FILE="${OUTPUT_DIR}/benchmark_h2o_elastic_${TIMESTAMP}.csv"
+    DATABASE_TYPE="elasticsearch"
+    echo "Running H2O Benchmark on Elasticsearch..."
+elif [ "$1" == "clickbench" ]; then
+    QUERIES_FILE="${PROJECT_DIR}/${BENCHMARK_QUERIES_FILE}"
+    RESULTS_FILE="${OUTPUT_DIR}/benchmark_clickbench_${TIMESTAMP}.csv"
+    DATABASE_TYPE="clickhouse"
+    echo "Running ClickBench Benchmark..."
+else
+    echo "Error: invalid benchmark name '$1'"
+    exit 1
+fi
+
+echo "query_num,query_time_ms,rows_read,bytes_read" > "$RESULTS_FILE"
+
+run_clickhouse_query() {
+    local query="$1"
+    local query_num="$2"
+
+    echo "Running Q${query_num}..."
+
+    RESULT=$(curl -s "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/" \
+        --data-binary "$query FORMAT JSON" 2>/dev/null || echo '{}')
+
+    ELAPSED=$(echo "$RESULT" | grep -o '"elapsed": *[0-9.]*' | head -1 | sed 's/.*: *//')
+    ROWS_READ=$(echo "$RESULT" | grep -o '"rows_read": *[0-9]*' | head -1 | sed 's/.*: *//')
+    BYTES_READ=$(echo "$RESULT" | grep -o '"bytes_read": *[0-9]*' | head -1 | sed 's/.*: *//')
+
+    if [ -n "$ELAPSED" ]; then
+        TIME_MS=$(echo "$ELAPSED * 1000" | bc 2>/dev/null || echo "0")
+    else
+        TIME_MS="0"
+    fi
+
+    echo "${query_num},${TIME_MS:-0},${ROWS_READ:-0},${BYTES_READ:-0}" >> "$RESULTS_FILE"
+    echo "  Q${query_num}: ${TIME_MS:-0}ms (${ROWS_READ:-0} rows, ${BYTES_READ:-0} bytes)"
+}
+
+run_elasticsearch_sql_query() {
+    local sql_query="$1"
+    local query_num="$2"
+
+    echo "Running Q${query_num}..."
+
+    START=$(date +%s%N)
+
+    RESULT=$(jq -n --arg q "$sql_query" '{"query": $q}' | \
+        curl -s -X POST "http://${ES_HOST}:${ES_PORT}/_sql?format=json" \
+            -H "Authorization: ApiKey ${ES_API_KEY}" \
+            -H "Content-Type: application/json" \
+            -d @-)
+
+    END=$(date +%s%N)
+
+    TIME_MS=$(( (END - START) / 1000000 ))
+
+    ROWS_READ=$(echo "$RESULT" | python3 -c "
+import sys, json
+try:
+    d = json.load(sys.stdin)
+    print(len(d.get('rows', [])))
+except:
+    print(0)
+")
+
+    echo "${query_num},${TIME_MS},${ROWS_READ},0" >> "$RESULTS_FILE"
+    echo "  Q${query_num}: ${TIME_MS}ms (${ROWS_READ} rows)"
+}
+
+echo "Reading queries from: $QUERIES_FILE"
+echo "Results will be saved to: $RESULTS_FILE"
+echo ""
+
+# Read queries from file (skip comments and empty lines)
+QUERY_NUM=0
+while IFS= read -r line; do
+    # Skip empty lines and comments
+    [[ -z "$line" || "$line" =~ ^[[:space:]]*-- ]] && continue
+
+    # Remove trailing semicolon and whitespace
+    query="${line%;}"
+    query="${query%"${query##*[![:space:]]}"}"
+
+    QUERY_NUM=$((QUERY_NUM + 1))
+
+    if [ "$DATABASE_TYPE" == "elasticsearch" ]; then
+        run_elasticsearch_sql_query "$query" "$QUERY_NUM"
+    else
+        run_clickhouse_query "$query" "$QUERY_NUM"
+    fi
+done < "$QUERIES_FILE"
+
+echo ""
+echo "Benchmark complete!"
+echo "Results saved to: $RESULTS_FILE"
+
+echo ""
+echo "=== Summary ==="
+TOTAL_TIME=$(awk -F',' 'NR>1 {sum+=$2} END {print sum}' "$RESULTS_FILE")
+echo "Total queries: $QUERY_NUM"
+echo "Total query time: ${TOTAL_TIME}ms"
+if [ "$QUERY_NUM" -gt 0 ]; then
+    AVG_TIME=$((TOTAL_TIME / QUERY_NUM))
+    echo "Average query time: ${AVG_TIME}ms"
+fi
diff --git a/ExecutionUtilities/csv_to_prometheus/__init__.py b/ExecutionUtilities/csv_to_prometheus/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/ExecutionUtilities/csv_to_prometheus/csv_to_prometheus.py b/ExecutionUtilities/csv_to_prometheus/csv_to_prometheus.py
new file mode 100644
index 0000000..4abfb71
--- /dev/null
+++ b/ExecutionUtilities/csv_to_prometheus/csv_to_prometheus.py
@@ -0,0 +1,394 @@
+import argparse
+from http.server import HTTPServer, BaseHTTPRequestHandler
+import csv
+
+import logging
+from typing import Dict, Optional, List
+from pathlib import Path
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class MetricsHandler(BaseHTTPRequestHandler):
+    def do_GET(self):
+        self.send_response(200)
+        self.send_header("Content-type", "text/plain")
+        self.end_headers()
+
+        metrics = self.server.get_metrics()  # type: ignore
+        self.wfile.write(metrics.encode("utf-8"))
+
+
+# class CSVMetricsExporter:
+#     def __init__(self, csv_path, timestamp_column, metric_column):
+#         self.df = pd.read_csv(csv_path)
+#         print("CSV loaded")
+#         self.timestamp_column = timestamp_column
+#         self.metric_column = metric_column
+#         self.label_columns = [
+#             col
+#             for col in self.df.columns
+#             if col not in [timestamp_column, metric_column]
+#         ]
+
+#         # Convert timestamp strings to Unix timestamps (milliseconds)
+#         self.df[timestamp_column] = (
+#             pd.to_datetime(self.df[timestamp_column], unit="ns").astype(int) // 10**6
+#         )
+#         self.current_index = 0
+
+#         # Preprocess data by grouping rows by timestamp
+#         self.metrics_by_timestamp = {}
+#         self.lengths_by_timestamp = {}
+#         # grouped = self.df.groupby(timestamp_column)
+#         # print("Grouping done")
+#         # # TODO: this is horribly slow, fix
+#         # for group_idx, (timestamp, group) in enumerate(grouped):
+#         #     self.metrics_by_timestamp[timestamp] = self._format_metrics(group)
+#         #     self.lengths_by_timestamp[timestamp] = len(group)
+#         #     # if group_idx % 500 == 0:
+#         #     print(f"Group {group_idx} done")
+#         self.metrics_by_timestamp = (
+#             self.df.groupby(timestamp_column).apply(self._format_metrics).to_dict()
+#         )
+#         self.lengths_by_timestamp = self.df[timestamp_column].value_counts().to_dict()
+#         print("Metrics done")
+
+#     def _format_metrics(self, group):
+#         output = []
+#         timestamp = int(group.iloc[0][self.timestamp_column])  # type: ignore
+#         metric_name = f"csv_{self.metric_column}"
+#         # Add TYPE header only for first occurrence
+#         output.append(f"# TYPE {metric_name} gauge")
+#         for _, row in group.iterrows():
+#             value = row[self.metric_column]
+#             labels = ",".join(
+#                 [f'{label}="{row[label]}"' for label in self.label_columns]
+#             )
+#             output.append(f"{metric_name}{{{labels}}} {value} {timestamp}")
+#         return "\n".join(output)
+
+#     def get_metrics(self):
+#         if self.current_index >= len(self.df):
+#             return ""
+
+#         current_timestamp = self.df.iloc[self.current_index][self.timestamp_column]
+#         metrics = self.metrics_by_timestamp[current_timestamp]
+#         self.current_index += self.lengths_by_timestamp[current_timestamp]
+#         return metrics
+
+
+# class ChunkedCSVReader:
+#     def __init__(self, csv_path: str):
+#         self.csv_path = csv_path
+#         self.file = None
+#         self.reader = None
+#         self.fieldnames = None
+
+#     def __enter__(self):
+#         self.file = open(self.csv_path, "r")
+#         self.reader = csv.DictReader(self.file)
+#         self.fieldnames = self.reader.fieldnames
+#         return self
+
+#     def __exit__(self, exc_type, exc_val, exc_tb):
+#         if self.file:
+#             self.file.close()
+
+
+class CSVMetricsExporterNoPandas:
+    def __init__(self, csv_path: str, timestamp_column: str, metric_column: str):
+        self.csv_path = csv_path
+        self.timestamp_column = timestamp_column
+        self.metric_column = metric_column
+        self.label_columns: List[str] = []
+        self.current_timestamp: Optional[int] = None
+        self.current_chunk: List[Dict] = []
+        self.file_size = Path(csv_path).stat().st_size
+        self.bytes_processed = 0
+
+        # Open file and initialize reader
+        self.file = open(csv_path, "r")
+        self.reader = csv.DictReader(self.file)
+        assert self.reader.fieldnames is not None
+
+        # Initialize label columns
+        self.label_columns = [
+            col
+            for col in self.reader.fieldnames
+            if col not in [timestamp_column, metric_column]
+        ]
+        logger.info(f"Initialized with {len(self.label_columns)} label columns")
+
+    def __del__(self):
+        """Cleanup when object is destroyed"""
+        try:
+            if hasattr(self, "file") and self.file:
+                self.file.close()
+        except Exception as e:
+            logger.error(f"Error closing file: {str(e)}")
+
+    def reset_file(self):
+        """Reset file to beginning and reinitialize reader"""
+        try:
+            self.file.seek(0)
+            self.reader = csv.DictReader(self.file)
+            self.bytes_processed = 0
+            self.current_timestamp = None
+            self.current_chunk = []
+            logger.info("File reset to beginning")
+        except Exception as e:
+            logger.error(f"Error resetting file: {str(e)}")
+            # If there's an error, try to reopen the file
+            self.file = open(self.csv_path, "r")
+            self.reader = csv.DictReader(self.file)
+            self.bytes_processed = 0
+
+    def read_next_chunk(self) -> Optional[List[Dict]]:
+        """Read the next chunk of rows with the same millisecond timestamp"""
+        if not self.current_chunk:
+            try:
+                # Read until we find a new timestamp
+                for row in self.reader:
+                    self.bytes_processed += 1
+                    timestamp = int(
+                        int(row[self.timestamp_column]) // 10**6
+                    )  # ns to ms
+
+                    if self.current_timestamp is None:
+                        self.current_timestamp = timestamp
+                        self.current_chunk.append(row)
+                    elif timestamp == self.current_timestamp:
+                        self.current_chunk.append(row)
+                    else:
+                        # Found a new timestamp, save it for next time
+                        self.current_timestamp = timestamp
+                        self.current_chunk.append(row)
+                        break
+
+                progress = (self.bytes_processed / self.file_size) * 100
+                logger.info(f"Progress: {progress:.2f}% processed")
+
+                if self.current_chunk:
+                    return self.current_chunk
+                else:
+                    # End of file reached
+                    self.reset_file()
+                    return None
+
+            except Exception as e:
+                logger.error(f"Error reading chunk: {str(e)}")
+                # Try to recover by resetting the file
+                self.reset_file()
+                return None
+
+        return self.current_chunk
+
+    def _format_metrics(self, group: List[Dict], timestamp: int) -> str:
+        """Format metrics in Prometheus exposition format"""
+        output = []
+        metric_name = f"csv_{self.metric_column}"
+        output.append(f"# TYPE {metric_name} gauge")
+
+        for row in group[:10]:
+            value = row[self.metric_column]
+            labels = ",".join(
+                [f'{label}="{row[label]}"' for label in self.label_columns]
+            )
+            output.append(f"{metric_name}{{{labels}}} {value} {timestamp}")
+
+        return "\n".join(output)
+
+    def get_metrics(self) -> str:
+        """Get metrics for the current timestamp chunk"""
+        chunk = self.read_next_chunk()
+        if not chunk:
+            return ""
+
+        metrics = self._format_metrics(chunk, self.current_timestamp or 0)
+        # Clear the chunk after processing
+        self.current_chunk = []
+        return metrics
+
+
+def run_server(
+    port: int, csv_path: str, timestamp_column: str, metric_column: str
+) -> None:
+    exporter = CSVMetricsExporterNoPandas(csv_path, timestamp_column, metric_column)
+
+    class MetricsServer(HTTPServer):
+        def get_metrics(self):
+            return exporter.get_metrics()
+
+    server = MetricsServer(("", port), MetricsHandler)
+    logger.info(f"Ready to serve metrics on port {port}")
+    server.serve_forever()
+
+
+# logging.basicConfig(level=logging.INFO)
+# logger = logging.getLogger(__name__)
+
+
+# class TimeBasedCSVExporter:
+#     def __init__(
+#         self, csv_path: str, metric_column, timestamp_column, update_interval: int = 60
+#     ):
+#         self.csv_path = csv_path
+#         self.update_interval = update_interval
+#         self.metrics: Dict[str, Gauge] = {}
+#         self.current_ms: Optional[int] = None
+#         self.csv_iterator = None
+#         self.metric_column = metric_column
+#         self.timestamp_column = timestamp_column
+
+#     def ns_to_ms(self, ns: int) -> int:
+#         """Convert nanosecond timestamp to milliseconds"""
+#         return ns // 1_000_000
+
+#     def get_next_batch(self) -> Optional[pd.DataFrame]:
+#         """Read rows until timestamp changes by 1ms"""
+#         if self.csv_iterator is None:
+#             # Initialize CSV reader with appropriate timestamp parsing
+#             try:
+#                 self.csv_iterator = pd.read_csv(
+#                     self.csv_path,
+#                     iterator=True,
+#                     dtype={
+#                         self.timestamp_column: "int64"
+#                     },  # Ensure timestamp is read as int64
+#                 )
+#             except Exception as e:
+#                 logger.error(f"Error initializing CSV reader: {str(e)}")
+#                 raise
+
+#         rows = []
+#         try:
+#             while True:
+#                 # Read one row at a time
+#                 row = next(self.csv_iterator)
+
+#                 # Convert timestamp to ms
+#                 row_ms = self.ns_to_ms(row[self.timestamp_column].iloc[0])
+
+#                 if self.current_ms is None:
+#                     # First batch
+#                     self.current_ms = row_ms
+#                     rows.append(row)
+#                 elif row_ms == self.current_ms:
+#                     # Same millisecond, add to batch
+#                     rows.append(row)
+#                 else:
+#                     # New millisecond reached
+#                     # Save this row's ms for next batch
+#                     self.current_ms = row_ms
+#                     # Return concatenated batch
+#                     result = pd.concat(rows, ignore_index=True)
+#                     # Start new batch with current row
+#                     rows = [row]
+#                     return result
+
+#         except StopIteration:
+#             # End of file reached
+#             if rows:
+#                 # Return final batch if any rows accumulated
+#                 return pd.concat(rows, ignore_index=True)
+#             self.csv_iterator = None
+#             self.current_ms = None
+#             return None
+#         except Exception as e:
+#             logger.error(f"Error reading batch: {str(e)}")
+#             raise
+
+#     def create_metric(self, metric_name: str, label_names: list) -> None:
+#         """Create a Prometheus metric if it doesn't exist"""
+#         if metric_name not in self.metrics:
+#             self.metrics[metric_name] = Gauge(
+#                 metric_name, f"Metric imported from CSV: {metric_name}", label_names
+#             )
+
+#     def process_batch(self, batch: pd.DataFrame) -> None:
+#         """Process a batch of rows with the same millisecond timestamp"""
+#         try:
+#             # Get label columns (excluding special columns)
+#             label_columns = [
+#                 col
+#                 for col in batch.columns
+#                 if col not in [self.timestamp_column, self.metric_column]
+#             ]
+
+#             # Process each row in the batch
+#             for _, row in batch.iterrows():
+#                 metric_name = row[self.metric_column]
+
+#                 # Create metric if it doesn't exist
+#                 self.create_metric(metric_name, label_columns)
+
+#                 # Extract labels
+#                 labels = {col: row[col] for col in label_columns}
+
+#                 # Update metric value with timestamp in milliseconds
+#                 # Set timestamp explicitly using the current batch timestamp
+#                 self.metrics[metric_name].labels(**labels).set(row["value"])
+
+#         except Exception as e:
+#             logger.error(f"Error processing batch: {str(e)}")
+#             raise
+
+#     def update_metrics(self) -> bool:
+#         """Read and update metrics from CSV in timestamp-based batches"""
+#         try:
+#             batch = self.get_next_batch()
+#             if batch is not None:
+#                 self.process_batch(batch)
+#                 # Force garbage collection after batch processing
+#                 gc.collect()
+#                 logger.info(f"Processed batch for timestamp {self.current_ms}ms")
+#                 return True
+#             return False
+
+#         except Exception as e:
+#             logger.error(f"Error updating metrics: {str(e)}")
+#             return False
+
+#     def run(self, port: int = 8000) -> None:
+#         """Start the exporter"""
+#         start_http_server(port)
+#         logger.info(f"Metrics server started on port {port}")
+
+#         while True:
+#             start_time = time.time()
+
+#             # Process all available batches
+#             while self.update_metrics():
+#                 pass
+
+#             # Calculate sleep time
+#             elapsed = time.time() - start_time
+#             sleep_time = max(0, self.update_interval - elapsed)
+
+#             logger.info(
+#                 f"Update cycle took {elapsed:.2f}s, sleeping for {sleep_time:.2f}s"
+#             )
+#             time.sleep(sleep_time)
+
+
+def main(args):
+    # exporter = TimeBasedCSVExporter(
+    #     args.input_file, args.metric_column, args.timestamp_column
+    # )
+    # exporter.run()
+
+    run_server(
+        args.http_port, args.input_file, args.timestamp_column, args.metric_column
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_file", type=str, required=True)
+    parser.add_argument("--timestamp_column", type=str, required=True)
+    parser.add_argument("--metric_column", type=str, required=True)
+    parser.add_argument("--http_port", default=8000)
+    args = parser.parse_args()
+    main(args)
diff --git a/ExecutionUtilities/csv_to_prometheus/setup.py b/ExecutionUtilities/csv_to_prometheus/setup.py
new file mode 100644
index 0000000..7925f8c
--- /dev/null
+++ b/ExecutionUtilities/csv_to_prometheus/setup.py
@@ -0,0 +1,8 @@
+from setuptools import setup, find_packages
+
+setup(
+    name="csv_to_prometheus",
+    version="0.1",
+    packages=find_packages(),
+    install_requires=[],
+)
diff --git a/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/Cargo.lock b/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/Cargo.lock
new file mode 100644
index 0000000..cda575a
--- /dev/null
+++ b/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/Cargo.lock
@@ -0,0 +1,1250 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "addr2line"
+version = "0.24.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
+dependencies = [
+ "gimli",
+]
+
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
+[[package]]
+name = "android-tzdata"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
+dependencies = [
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.98"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "backtrace"
+version = "0.3.75"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
+dependencies = [
+ "addr2line",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+ "windows-targets",
+]
+
+[[package]]
+name = "bitflags"
+version = "2.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
+
+[[package]]
+name = "bumpalo"
+version = "3.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
+
+[[package]]
+name = "bytes"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
+
+[[package]]
+name = "cc"
+version = "1.2.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7"
+dependencies = [
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
+
+[[package]]
+name = "chrono"
+version = "0.4.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
+dependencies = [
+ "android-tzdata",
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "serde",
+ "wasm-bindgen",
+ "windows-link",
+]
+
+[[package]]
+name = "clap"
+version = "4.5.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
+
+[[package]]
+name = "cmake"
+version = "0.1.54"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "colorchoice"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "either"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "futures"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
+
+[[package]]
+name = "futures-executor"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-io"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
+
+[[package]]
+name = "futures-macro"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
+
+[[package]]
+name = "futures-task"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
+
+[[package]]
+name = "futures-util"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "pin-utils",
+ "slab",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "gimli"
+version = "0.31.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
+
+[[package]]
+name = "hashbrown"
+version = "0.15.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.63"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "io-uring"
+version = "0.7.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013"
+dependencies = [
+ "bitflags",
+ "cfg-if",
+ "libc",
+]
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
+
+[[package]]
+name = "itertools"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+
+[[package]]
+name = "js-sys"
+version = "0.3.77"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "kafka-throughput-producer"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "chrono",
+ "clap",
+ "futures",
+ "itertools",
+ "rand",
+ "rdkafka",
+ "serde",
+ "serde_json",
+ "tokio",
+ "tracing",
+ "tracing-subscriber",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.174"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
+
+[[package]]
+name = "libz-sys"
+version = "1.1.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "lock_api"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
+dependencies = [
+ "autocfg",
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
+
+[[package]]
+name = "memchr"
+version = "2.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+]
+
+[[package]]
+name = "mio"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "nu-ansi-term"
+version = "0.46.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
+dependencies = [
+ "overload",
+ "winapi",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num_enum"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a973b4e44ce6cad84ce69d797acf9a044532e4184c4f267913d1b546a0727b7a"
+dependencies = [
+ "num_enum_derive",
+ "rustversion",
+]
+
+[[package]]
+name = "num_enum_derive"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77e878c846a8abae00dd069496dbe8751b16ac1c3d6bd2a7283a938e8228f90d"
+dependencies = [
+ "proc-macro-crate",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "object"
+version = "0.36.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
+
+[[package]]
+name = "openssl-sys"
+version = "0.9.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "overload"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
+
+[[package]]
+name = "parking_lot"
+version = "0.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-targets",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "proc-macro-crate"
+version = "3.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35"
+dependencies = [
+ "toml_edit",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.95"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "rdkafka"
+version = "0.36.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1beea247b9a7600a81d4cc33f659ce1a77e1988323d7d2809c7ed1c21f4c316d"
+dependencies = [
+ "futures-channel",
+ "futures-util",
+ "libc",
+ "log",
+ "rdkafka-sys",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "slab",
+ "tokio",
+]
+
+[[package]]
+name = "rdkafka-sys"
+version = "4.9.0+2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5230dca48bc354d718269f3e4353280e188b610f7af7e2fcf54b7a79d5802872"
+dependencies = [
+ "cmake",
+ "libc",
+ "libz-sys",
+ "num_enum",
+ "openssl-sys",
+ "pkg-config",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e8af0dde094006011e6a740d4879319439489813bd0bcdc7d821beaeeff48ec"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "rustc-demangle"
+version = "0.1.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
+
+[[package]]
+name = "rustversion"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d"
+
+[[package]]
+name = "ryu"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "serde"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.141"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3"
+dependencies = [
+ "itoa",
+ "memchr",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "sharded-slab"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
+dependencies = [
+ "lazy_static",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "slab"
+version = "0.4.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "socket2"
+version = "0.5.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "syn"
+version = "2.0.104"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "thread_local"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "tokio"
+version = "1.46.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17"
+dependencies = [
+ "backtrace",
+ "bytes",
+ "io-uring",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "slab",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "toml_datetime"
+version = "0.6.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
+
+[[package]]
+name = "toml_edit"
+version = "0.22.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
+dependencies = [
+ "indexmap",
+ "toml_datetime",
+ "winnow",
+]
+
+[[package]]
+name = "tracing"
+version = "0.1.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
+dependencies = [
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
+dependencies = [
+ "once_cell",
+ "valuable",
+]
+
+[[package]]
+name = "tracing-log"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
+dependencies = [
+ "log",
+ "once_cell",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-subscriber"
+version = "0.3.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
+dependencies = [
+ "nu-ansi-term",
+ "sharded-slab",
+ "smallvec",
+ "thread_local",
+ "tracing-core",
+ "tracing-log",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "valuable"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
+
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
+dependencies = [
+ "bumpalo",
+ "log",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "windows-core"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
+
+[[package]]
+name = "windows-result"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "winnow"
+version = "0.7.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
diff --git a/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/Cargo.toml b/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/Cargo.toml
new file mode 100644
index 0000000..9c1da7a
--- /dev/null
+++ b/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "kafka-throughput-producer"
+version = "0.1.0"
+edition = "2021"
+
+[[bin]]
+name = "producer"
+path = "src/main.rs"
+
+[dependencies]
+rdkafka = { version = "0.36", features = ["cmake-build", "ssl"] }
+tokio = { version = "1.0", features = ["full"] }
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+clap = { version = "4.0", features = ["derive"] }
+rand = "0.8"
+chrono = { version = "0.4", features = ["serde"] }
+itertools = "0.12"
+tracing = "0.1"
+tracing-subscriber = "0.3"
+anyhow = "1.0"
+futures = "0.3"
diff --git a/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/src/main.rs b/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/src/main.rs
new file mode 100644
index 0000000..b93da31
--- /dev/null
+++ b/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/src/main.rs
@@ -0,0 +1,450 @@
+use anyhow::Result;
+use clap::Parser;
+use futures::future::join_all;
+use itertools::Itertools;
+use rand::seq::SliceRandom;
+use rand::{thread_rng, Rng};
+use rdkafka::admin::{AdminClient, AdminOptions, NewTopic, TopicReplication};
+use rdkafka::config::ClientConfig;
+use rdkafka::producer::{FutureProducer, FutureRecord, Producer};
+use serde_json;
+use std::collections::HashMap;
+use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
+use tokio::time::sleep;
+use tracing::{error, info, warn};
+
+
+#[derive(Debug, Clone)]
+struct LabelChoices {
+    hostname: Vec<String>,
+    location: Vec<String>,
+    application_name: Vec<String>,
+    instance: Vec<String>,
+    job: Vec<String>,
+}
+
+impl Default for LabelChoices {
+    fn default() -> Self {
+        Self {
+            hostname: vec![
+                "host1".to_string(),
+                "host2".to_string(),
+                "host3".to_string(),
+                "host4".to_string(),
+                "host5".to_string(),
+            ],
+            location: vec![
+                "us-east".to_string(),
+                "us-west".to_string(),
+                "eu-central".to_string(),
+                "ap-southeast".to_string(),
+            ],
+            application_name: vec![
+                "app1".to_string(),
+                "app2".to_string(),
+                "app3".to_string(),
+                "app4".to_string(),
+            ],
+            instance: vec![
+                "worker1".to_string(),
+                "worker2".to_string(),
+                "worker3".to_string(),
+                "worker4".to_string(),
+            ],
+            job: vec![
+                "throughput-test".to_string(),
+                "latency-test".to_string(),
+                "stress-test".to_string(),
+            ],
+        }
+    }
+}
+
+static METRIC_NAMES: &[&str] = &[
+    "cpu_usage",
+    "memory_usage",
+    "network_throughput",
+    "disk_iops",
+    "response_time",
+    "error_rate",
+];
+
+#[derive(Debug, Clone)]
+struct ProducerStats {
+    messages_sent: u64,
+    bytes_sent: u64,
+    errors: u64,
+}
+
+impl ProducerStats {
+    fn new() -> Self {
+        Self {
+            messages_sent: 0,
+            bytes_sent: 0,
+            errors: 0,
+        }
+    }
+
+    fn add_message(&mut self, bytes: u64) {
+        self.messages_sent += 1;
+        self.bytes_sent += bytes;
+    }
+
+    fn add_error(&mut self) {
+        self.errors += 1;
+    }
+
+    fn get_stats(&self) -> (u64, u64, u64) {
+        (self.messages_sent, self.bytes_sent, self.errors)
+    }
+}
+
+#[derive(Parser, Debug)]
+#[command(name = "kafka-throughput-producer")]
+#[command(about = "High-performance Kafka producer for Arroyo benchmarking")]
+struct Args {
+    #[arg(long, default_value = "localhost:9092")]
+    kafka_broker: String,
+
+    #[arg(long)]
+    kafka_topic: String,
+
+    #[arg(long, default_value = "1000000")]
+    total_messages: u64,
+
+    #[arg(long, default_value = "10000")]
+    messages_per_second: u64,
+
+    #[arg(long)]
+    duration: Option<u64>,
+
+
+    #[arg(long, default_value = "1")]
+    num_partitions: i32,
+
+    #[arg(long, default_value = "1")]
+    replication_factor: i32,
+
+    #[arg(long)]
+    vary_labels: bool,
+
+    #[arg(long, default_value = "false")]
+    enable_flush: bool,
+
+    #[arg(long, default_value = "none")]
+    compression: String,
+
+    #[arg(long, default_value = "65536")]
+    batch_size: usize,
+}
+
+#[derive(Clone)]
+struct HighThroughputProducer {
+    producer: FutureProducer,
+    topic_name: String,
+    stats: ProducerStats,
+    label_choices: LabelChoices,
+}
+
+
+impl HighThroughputProducer {
+    async fn new_with_compression(
+        kafka_broker: &str,
+        topic_name: String,
+        num_partitions: i32,
+        replication_factor: i32,
+        compression: &str,
+    ) -> Result<Self> {
+        // High-performance producer configuration optimized for throughput
+        let producer: FutureProducer = ClientConfig::new()
+            .set("bootstrap.servers", kafka_broker)
+            .set("linger.ms", "5")
+            .set("batch.size", "1048576") // 1MB batches
+            .set("compression.type", compression)
+            .set("queue.buffering.max.messages", "1000000")
+            .set("queue.buffering.max.kbytes", "2097152") // 2GB
+            .set("batch.num.messages", "10000")
+            .set("acks", "0") // No acknowledgments for max throughput
+            .set("retries", "0") // No retries for max throughput
+            .set("message.max.bytes", "1048576") // 1MB
+            .set("queue.buffering.max.ms", "10")
+            .set("delivery.timeout.ms", "30000")
+            .create()?;
+
+        let label_choices = LabelChoices::default();
+
+        let kafka_producer = Self {
+            producer,
+            topic_name: topic_name.clone(),
+            stats: ProducerStats::new(),
+            label_choices,
+        };
+
+        kafka_producer
+            .create_topic_if_not_exists(kafka_broker, &topic_name, num_partitions, replication_factor)
+            .await?;
+
+        Ok(kafka_producer)
+    }
+
+    async fn create_topic_if_not_exists(
+        &self,
+        kafka_broker: &str,
+        topic_name: &str,
+        num_partitions: i32,
+        replication_factor: i32,
+    ) -> Result<()> {
+        let admin: AdminClient<_> = ClientConfig::new()
+            .set("bootstrap.servers", kafka_broker)
+            .create()?;
+
+        let metadata = admin.inner().fetch_metadata(None, Duration::from_secs(10))?;
+
+        let topic_exists = metadata.topics().iter().any(|t| t.name() == topic_name);
+
+        if !topic_exists {
+            let new_topic = NewTopic::new(
+                topic_name,
+                num_partitions,
+                TopicReplication::Fixed(replication_factor),
+            );
+
+            let opts = AdminOptions::new().request_timeout(Some(Duration::from_secs(10)));
+            let results = admin.create_topics(&[new_topic], &opts).await?;
+
+            for result in results {
+                match result {
+                    Ok(topic) => info!("Created topic: {}", topic),
+                    Err((topic, error)) => {
+                        error!("Failed to create topic {}: {}", topic, error);
+                        return Err(anyhow::anyhow!("Topic creation failed"));
+                    }
+                }
+            }
+
+            // Wait for topic creation to propagate
+            sleep(Duration::from_secs(2)).await;
+            info!("Topic '{}' created with {} partitions", topic_name, num_partitions);
+        }
+
+        Ok(())
+    }
+
+
+    fn generate_prometheus_metric(&self, labels: &[String]) -> Result<HashMap<String, serde_json::Value>> {
+        let mut rng = thread_rng();
+
+        let metric_name = METRIC_NAMES.choose(&mut rng).unwrap().to_string();
+        let timestamp = SystemTime::now().duration_since(UNIX_EPOCH)?.as_millis() as u64;
+
+        let value = match metric_name.as_str() {
+            "cpu_usage" | "memory_usage" => rng.gen_range(0.0..100.0),
+            "network_throughput" => rng.gen_range(1000.0..10000.0),
+            "disk_iops" => rng.gen_range(100.0..5000.0),
+            "response_time" => rng.gen_range(0.1..1000.0),
+            "error_rate" => rng.gen_range(0.0..5.0),
+            _ => rng.gen_range(0.0..1000.0),
+        };
+
+        let label_keys = ["hostname", "location", "application_name", "instance", "job"];
+        let mut label_map = HashMap::new();
+
+        for (i, key) in label_keys.iter().enumerate() {
+            if i < labels.len() {
+                label_map.insert(key.to_string(), serde_json::Value::String(labels[i].clone()));
+            }
+        }
+
+        let mut metric = HashMap::new();
+        metric.insert("metric_name".to_string(), serde_json::Value::String(metric_name));
+        metric.insert("timestamp".to_string(), serde_json::Value::Number(serde_json::Number::from(timestamp)));
+        metric.insert("value".to_string(), serde_json::Value::Number(serde_json::Number::from_f64(value).unwrap()));
+        metric.insert("labels".to_string(), serde_json::Value::Object(label_map.into_iter().collect()));
+
+        Ok(metric)
+    }
+
+    async fn produce_message_batch(
+        &mut self,
+        batch: Vec<(String, Vec<String>)>,
+    ) -> Result<()> {
+        let mut send_futures = Vec::new();
+
+        for (partition_key, labels) in batch {
+            let metric = self.generate_prometheus_metric(&labels)?;
+            let message_data = serde_json::to_vec(&metric)?;
+            let message_size = message_data.len();
+
+            let producer = self.producer.clone();
+            let topic_name = self.topic_name.clone();
+
+            let send_future = async move {
+                let record = FutureRecord::to(&topic_name)
+                    .key(&partition_key)
+                    .payload(&message_data);
+
+                let result = producer.send(record, Duration::from_secs(10)).await;
+                (result, message_size)
+            };
+
+            send_futures.push(send_future);
+        }
+
+        // Send all messages concurrently and collect results
+        let results = join_all(send_futures).await;
+
+        for (result, message_size) in results {
+            match result {
+                Ok(_) => {
+                    self.stats.add_message(message_size as u64);
+                }
+                Err((kafka_error, _)) => {
+                    self.stats.add_error();
+                    warn!("Failed to send message: {}", kafka_error);
+                }
+            }
+        }
+        Ok(())
+    }
+
+
+
+    fn generate_all_label_combinations(&self) -> Vec<Vec<String>> {
+        let label_values = vec![
+            &self.label_choices.hostname,
+            &self.label_choices.location,
+            &self.label_choices.application_name,
+            &self.label_choices.instance,
+            &self.label_choices.job,
+        ];
+
+        label_values
+            .into_iter()
+            .multi_cartesian_product()
+            .map(|combo| combo.into_iter().cloned().collect())
+            .collect()
+    }
+
+    async fn run_benchmark(
+        &mut self,
+        args: &Args,
+    ) -> Result<()> {
+        info!(
+            "Starting single-threaded benchmark: {} messages at {} msg/s",
+            args.total_messages, args.messages_per_second
+        );
+        info!("Producer initialized for single-threaded operation");
+        info!("🚀 Data generation started!");
+
+        let all_labels = self.generate_all_label_combinations();
+        let start_time = Instant::now();
+        let mut messages_sent = 0u64;
+
+        let messages_per_interval = args.messages_per_second;
+        let batch_size = std::cmp::max(1, args.batch_size);
+        let interval = Duration::from_secs(1);
+
+        while messages_sent < args.total_messages {
+            if let Some(duration) = args.duration {
+                if start_time.elapsed().as_secs() > duration {
+                    break;
+                }
+            }
+
+            let interval_start = Instant::now();
+
+            // Select labels for this interval
+            let labels_subset = if args.vary_labels {
+                let mut rng = thread_rng();
+                let num_labels = rng.gen_range(1..=std::cmp::min(all_labels.len(), messages_per_interval as usize));
+                all_labels.choose_multiple(&mut rng, num_labels).cloned().collect::<Vec<_>>()
+            } else {
+                all_labels[..std::cmp::min(all_labels.len(), messages_per_interval as usize)].to_vec()
+            };
+
+            // Process messages in batches sequentially
+            let mut remaining_messages = std::cmp::min(
+                messages_per_interval,
+                args.total_messages - messages_sent
+            );
+
+            while remaining_messages > 0 {
+                let current_batch_size = std::cmp::min(batch_size as u64, remaining_messages) as usize;
+                let batch: Vec<(String, Vec<String>)> = (0..current_batch_size)
+                    .map(|i| {
+                        let labels = &labels_subset[i % labels_subset.len()];
+                        let partition_key = format!("{}_{}", labels[0], labels[1]);
+                        (partition_key, labels.clone())
+                    })
+                    .collect();
+
+                // Process batch sequentially in single thread
+                if let Err(e) = self.produce_message_batch(batch).await {
+                    error!("Batch processing failed: {}", e);
+                }
+
+                remaining_messages -= current_batch_size as u64;
+                messages_sent += current_batch_size as u64;
+            }
+
+            // Rate limiting
+            let elapsed = interval_start.elapsed();
+            if elapsed < interval {
+                sleep(interval - elapsed).await;
+            }
+
+            // Print progress
+            if messages_sent % (args.messages_per_second) == 0 {
+                self.print_stats(start_time);
+            }
+        }
+
+        // Final flush - wait for all messages to be delivered
+        info!("Flushing remaining messages...");
+        if let Err(e) = self.producer.flush(Duration::from_secs(30)) {
+            warn!("Error during flush: {}", e);
+        }
+
+        info!("Benchmark completed!");
+        self.print_stats(start_time);
+
+        Ok(())
+    }
+
+    fn print_stats(&self, start_time: Instant) {
+        let (messages, bytes, errors) = self.stats.get_stats();
+        let elapsed = start_time.elapsed().as_secs_f64();
+
+        let rate = if elapsed > 0.0 { messages as f64 / elapsed } else { 0.0 };
+        let throughput_mb = if elapsed > 0.0 {
+            (bytes as f64 / (1024.0 * 1024.0)) / elapsed
+        } else {
+            0.0
+        };
+
+        info!(
+            "Messages: {}, Rate: {:.2} msg/s, Throughput: {:.2} MB/s, Errors: {}",
+            messages, rate, throughput_mb, errors
+        );
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    tracing_subscriber::fmt::init();
+
+    let args = Args::parse();
+
+    let producer = HighThroughputProducer::new_with_compression(
+        &args.kafka_broker,
+        args.kafka_topic.clone(),
+        args.num_partitions,
+        args.replication_factor,
+        &args.compression,
+    ).await?;
+
+    let mut producer = producer;
+    producer.run_benchmark(&args).await?;
+
+    Ok(())
+}
diff --git a/ExecutionUtilities/high-throughput-kafka-producer/Cargo.lock b/ExecutionUtilities/high-throughput-kafka-producer/Cargo.lock
new file mode 100644
index 0000000..cda575a
--- /dev/null
+++ b/ExecutionUtilities/high-throughput-kafka-producer/Cargo.lock
@@ -0,0 +1,1250 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "addr2line"
+version = "0.24.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
+dependencies = [
+ "gimli",
+]
+
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
+[[package]]
+name = "android-tzdata"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
+dependencies = [
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.98"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "backtrace"
+version = "0.3.75"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
+dependencies = [
+ "addr2line",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+ "windows-targets",
+]
+
+[[package]]
+name = "bitflags"
+version = "2.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
+
+[[package]]
+name = "bumpalo"
+version = "3.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
+
+[[package]]
+name = "bytes"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
+
+[[package]]
+name = "cc"
+version = "1.2.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7"
+dependencies = [
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
+
+[[package]]
+name = "chrono"
+version = "0.4.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
+dependencies = [
+ "android-tzdata",
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "serde",
+ "wasm-bindgen",
+ "windows-link",
+]
+
+[[package]]
+name = "clap"
+version = "4.5.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
+
+[[package]]
+name = "cmake"
+version = "0.1.54"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "colorchoice"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "either"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "futures"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
+
+[[package]]
+name = "futures-executor"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-io"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
+
+[[package]]
+name = "futures-macro"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
+
+[[package]]
+name = "futures-task"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
+
+[[package]]
+name = "futures-util"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "pin-utils",
+ "slab",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "gimli"
+version = "0.31.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
+
+[[package]]
+name = "hashbrown"
+version = "0.15.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.63"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "io-uring"
+version = "0.7.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013"
+dependencies = [
+ "bitflags",
+ "cfg-if",
+ "libc",
+]
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
+
+[[package]]
+name = "itertools"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+
+[[package]]
+name = "js-sys"
+version = "0.3.77"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "kafka-throughput-producer"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "chrono",
+ "clap",
+ "futures",
+ "itertools",
+ "rand",
+ "rdkafka",
+ "serde",
+ "serde_json",
+ "tokio",
+ "tracing",
+ "tracing-subscriber",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.174"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
+
+[[package]]
+name = "libz-sys"
+version = "1.1.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "lock_api"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
+dependencies = [
+ "autocfg",
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
+
+[[package]]
+name = "memchr"
+version = "2.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+]
+
+[[package]]
+name = "mio"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "nu-ansi-term"
+version = "0.46.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
+dependencies = [
+ "overload",
+ "winapi",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num_enum"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a973b4e44ce6cad84ce69d797acf9a044532e4184c4f267913d1b546a0727b7a"
+dependencies = [
+ "num_enum_derive",
+ "rustversion",
+]
+
+[[package]]
+name = "num_enum_derive"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77e878c846a8abae00dd069496dbe8751b16ac1c3d6bd2a7283a938e8228f90d"
+dependencies = [
+ "proc-macro-crate",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "object"
+version = "0.36.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
+
+[[package]]
+name = "openssl-sys"
+version = "0.9.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "overload"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
+
+[[package]]
+name = "parking_lot"
+version = "0.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-targets",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "proc-macro-crate"
+version = "3.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35"
+dependencies = [
+ "toml_edit",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.95"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "rdkafka"
+version = "0.36.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1beea247b9a7600a81d4cc33f659ce1a77e1988323d7d2809c7ed1c21f4c316d"
+dependencies = [
+ "futures-channel",
+ "futures-util",
+ "libc",
+ "log",
+ "rdkafka-sys",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "slab",
+ "tokio",
+]
+
+[[package]]
+name = "rdkafka-sys"
+version = "4.9.0+2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5230dca48bc354d718269f3e4353280e188b610f7af7e2fcf54b7a79d5802872"
+dependencies = [
+ "cmake",
+ "libc",
+ "libz-sys",
+ "num_enum",
+ "openssl-sys",
+ "pkg-config",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e8af0dde094006011e6a740d4879319439489813bd0bcdc7d821beaeeff48ec"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "rustc-demangle"
+version = "0.1.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
+
+[[package]]
+name = "rustversion"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d"
+
+[[package]]
+name = "ryu"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "serde"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.141"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3"
+dependencies = [
+ "itoa",
+ "memchr",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "sharded-slab"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
+dependencies = [
+ "lazy_static",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "slab"
+version = "0.4.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "socket2"
+version = "0.5.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "syn"
+version = "2.0.104"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "thread_local"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "tokio"
+version = "1.46.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17"
+dependencies = [
+ "backtrace",
+ "bytes",
+ "io-uring",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "slab",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "toml_datetime"
+version = "0.6.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
+
+[[package]]
+name = "toml_edit"
+version = "0.22.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
+dependencies = [
+ "indexmap",
+ "toml_datetime",
+ "winnow",
+]
+
+[[package]]
+name = "tracing"
+version = "0.1.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
+dependencies = [
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
+dependencies = [
+ "once_cell",
+ "valuable",
+]
+
+[[package]]
+name = "tracing-log"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
+dependencies = [
+ "log",
+ "once_cell",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-subscriber"
+version = "0.3.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
+dependencies = [
+ "nu-ansi-term",
+ "sharded-slab",
+ "smallvec",
+ "thread_local",
+ "tracing-core",
+ "tracing-log",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "valuable"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
+
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
+dependencies = [
+ "bumpalo",
+ "log",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "windows-core"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
+
+[[package]]
+name = "windows-result"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "winnow"
+version = "0.7.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
diff --git a/ExecutionUtilities/high-throughput-kafka-producer/Cargo.toml b/ExecutionUtilities/high-throughput-kafka-producer/Cargo.toml
new file mode 100644
index 0000000..9c1da7a
--- /dev/null
+++ b/ExecutionUtilities/high-throughput-kafka-producer/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "kafka-throughput-producer"
+version = "0.1.0"
+edition = "2021"
+
+[[bin]]
+name = "producer"
+path = "src/main.rs"
+
+[dependencies]
+rdkafka = { version = "0.36", features = ["cmake-build", "ssl"] }
+tokio = { version = "1.0", features = ["full"] }
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+clap = { version = "4.0", features = ["derive"] }
+rand = "0.8"
+chrono = { version = "0.4", features = ["serde"] }
+itertools = "0.12"
+tracing = "0.1"
+tracing-subscriber = "0.3"
+anyhow = "1.0"
+futures = "0.3"
diff --git a/ExecutionUtilities/high-throughput-kafka-producer/code_for_reference/ProducerPerformance.java b/ExecutionUtilities/high-throughput-kafka-producer/code_for_reference/ProducerPerformance.java
new file mode 100644
index 0000000..d6ed1d0
--- /dev/null
+++ b/ExecutionUtilities/high-throughput-kafka-producer/code_for_reference/ProducerPerformance.java
@@ -0,0 +1,598 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.tools;
+
+import org.apache.kafka.clients.producer.Callback;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.clients.producer.RecordMetadata;
+import org.apache.kafka.common.Uuid;
+import org.apache.kafka.common.utils.Exit;
+import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.server.util.ThroughputThrottler;
+
+import net.sourceforge.argparse4j.ArgumentParsers;
+import net.sourceforge.argparse4j.inf.ArgumentParser;
+import net.sourceforge.argparse4j.inf.ArgumentParserException;
+import net.sourceforge.argparse4j.inf.MutuallyExclusiveGroup;
+import net.sourceforge.argparse4j.inf.Namespace;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.Scanner;
+import java.util.SplittableRandom;
+
+import static net.sourceforge.argparse4j.impl.Arguments.store;
+import static net.sourceforge.argparse4j.impl.Arguments.storeTrue;
+
+public class ProducerPerformance {
+
+    public static final String DEFAULT_TRANSACTION_ID_PREFIX = "performance-producer-";
+    public static final long DEFAULT_TRANSACTION_DURATION_MS = 3000L;
+
+    public static void main(String[] args) throws Exception {
+        ProducerPerformance perf = new ProducerPerformance();
+        perf.start(args);
+    }
+
+    void start(String[] args) throws IOException {
+        ArgumentParser parser = argParser();
+
+        try {
+            ConfigPostProcessor config = new ConfigPostProcessor(parser, args);
+            KafkaProducer<byte[], byte[]> producer = createKafkaProducer(config.producerProps);
+
+            if (config.transactionsEnabled)
+                producer.initTransactions();
+
+            /* setup perf test */
+            byte[] payload = null;
+            if (config.recordSize != null) {
+                payload = new byte[config.recordSize];
+            }
+            // not thread-safe, do not share with other threads
+            SplittableRandom random = new SplittableRandom(0);
+            ProducerRecord<byte[], byte[]> record;
+
+            if (config.warmupRecords > 0) {
+                System.out.println("Warmup first " + config.warmupRecords + " records. Steady state results will print after the complete test summary.");
+            }
+            boolean isSteadyState = false;
+            stats = new Stats(config.numRecords, isSteadyState);
+            long startMs = System.currentTimeMillis();
+
+            ThroughputThrottler throttler = new ThroughputThrottler(config.throughput, startMs);
+
+            int currentTransactionSize = 0;
+            long transactionStartTime = 0;
+            for (long i = 0; i < config.numRecords; i++) {
+
+                payload = generateRandomPayload(config.recordSize, config.payloadByteList, payload, random, config.payloadMonotonic, i);
+
+                if (config.transactionsEnabled && currentTransactionSize == 0) {
+                    producer.beginTransaction();
+                    transactionStartTime = System.currentTimeMillis();
+                }
+
+                record = new ProducerRecord<>(config.topicName, payload);
+
+                long sendStartMs = System.currentTimeMillis();
+                if ((isSteadyState = config.warmupRecords > 0) && i == config.warmupRecords) {
+                    steadyStateStats = new Stats(config.numRecords - config.warmupRecords, isSteadyState);
+                    stats.suppressPrinting();
+                }
+                cb = new PerfCallback(sendStartMs, payload.length, stats, steadyStateStats);
+                producer.send(record, cb);
+
+                currentTransactionSize++;
+                if (config.transactionsEnabled && config.transactionDurationMs <= (sendStartMs - transactionStartTime)) {
+                    producer.commitTransaction();
+                    currentTransactionSize = 0;
+                }
+
+                if (throttler.shouldThrottle(i, sendStartMs)) {
+                    throttler.throttle();
+                }
+            }
+
+            if (config.transactionsEnabled && currentTransactionSize != 0)
+                producer.commitTransaction();
+
+            if (!config.shouldPrintMetrics) {
+                producer.close();
+
+                /* print final results */
+                stats.printTotal();
+                /* print steady-state stats if relevant */
+                if (steadyStateStats != null) {
+                    steadyStateStats.printTotal();
+                }
+            } else {
+                // Make sure all messages are sent before printing out the stats and the metrics
+                // We need to do this in a different branch for now since tests/kafkatest/sanity_checks/test_performance_services.py
+                // expects this class to work with older versions of the client jar that don't support flush().
+                producer.flush();
+
+                /* print final results */
+                stats.printTotal();
+                /* print steady-state stats if relevant */
+                if (steadyStateStats != null) {
+                    steadyStateStats.printTotal();
+                }
+
+                /* print out metrics */
+                ToolsUtils.printMetrics(producer.metrics());
+                producer.close();
+            }
+        } catch (ArgumentParserException e) {
+            if (args.length == 0) {
+                parser.printHelp();
+                Exit.exit(0);
+            } else {
+                parser.handleError(e);
+                Exit.exit(1);
+            }
+        }
+
+    }
+
+    KafkaProducer<byte[], byte[]> createKafkaProducer(Properties props) {
+        return new KafkaProducer<>(props);
+    }
+
+    Callback cb;
+    Stats stats;
+    Stats steadyStateStats;
+
+    static byte[] generateRandomPayload(Integer recordSize, List<byte[]> payloadByteList, byte[] payload,
+            SplittableRandom random, boolean payloadMonotonic, long recordValue) {
+        if (!payloadByteList.isEmpty()) {
+            payload = payloadByteList.get(random.nextInt(payloadByteList.size()));
+        } else if (recordSize != null) {
+            for (int j = 0; j < payload.length; ++j)
+                payload[j] = (byte) (random.nextInt(26) + 65);
+        } else if (payloadMonotonic) {
+            payload = Long.toString(recordValue).getBytes(StandardCharsets.UTF_8);
+        } else {
+            throw new IllegalArgumentException("no payload File Path or record Size or payload-monotonic option provided");
+        }
+        return payload;
+    }
+
+    static Properties readProps(List<String> producerProps, String producerConfig) throws IOException {
+        Properties props = new Properties();
+        if (producerConfig != null) {
+            props.putAll(Utils.loadProps(producerConfig));
+        }
+        if (producerProps != null)
+            for (String prop : producerProps) {
+                String[] pieces = prop.split("=");
+                if (pieces.length != 2)
+                    throw new IllegalArgumentException("Invalid property: " + prop);
+                props.put(pieces[0], pieces[1]);
+            }
+
+        props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer");
+        props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer");
+        if (props.getProperty(ProducerConfig.CLIENT_ID_CONFIG) == null) {
+            props.put(ProducerConfig.CLIENT_ID_CONFIG, "perf-producer-client");
+        }
+        return props;
+    }
+
+    static List<byte[]> readPayloadFile(String payloadFilePath, String payloadDelimiter) throws IOException {
+        List<byte[]> payloadByteList = new ArrayList<>();
+        if (payloadFilePath != null) {
+            Path path = Paths.get(payloadFilePath);
+            System.out.println("Reading payloads from: " + path.toAbsolutePath());
+            if (Files.notExists(path) || Files.size(path) == 0)  {
+                throw new IllegalArgumentException("File does not exist or empty file provided.");
+            }
+
+            try (Scanner payLoadScanner = new Scanner(path, StandardCharsets.UTF_8)) {
+                //setting the delimiter while parsing the file, avoids loading entire data in memory before split
+                payLoadScanner.useDelimiter(payloadDelimiter);
+                while (payLoadScanner.hasNext()) {
+                    byte[] payloadBytes = payLoadScanner.next().getBytes(StandardCharsets.UTF_8);
+                    payloadByteList.add(payloadBytes);
+                }
+            }
+
+            System.out.println("Number of messages read: " + payloadByteList.size());
+
+        }
+        return payloadByteList;
+    }
+
+    /** Get the command-line argument parser. */
+    static ArgumentParser argParser() {
+        ArgumentParser parser = ArgumentParsers
+                .newArgumentParser("producer-performance")
+                .defaultHelp(true)
+                .description("This tool is used to verify the producer performance. To enable transactions, " +
+                        "you can specify a transaction id or set a transaction duration using --transaction-duration-ms. " +
+                        "There are three ways to specify the transaction id: set transaction.id=<id> via --producer-props, " +
+                        "set transaction.id=<id> in the config file via --producer.config, or use --transaction-id <id>.");
+
+        MutuallyExclusiveGroup payloadOptions = parser
+                .addMutuallyExclusiveGroup()
+                .required(true)
+                .description("either --record-size or --payload-file must be specified but not both.");
+
+        parser.addArgument("--topic")
+                .action(store())
+                .required(true)
+                .type(String.class)
+                .metavar("TOPIC")
+                .help("produce messages to this topic");
+
+        parser.addArgument("--num-records")
+                .action(store())
+                .required(true)
+                .type(Long.class)
+                .metavar("NUM-RECORDS")
+                .dest("numRecords")
+                .help("number of messages to produce");
+
+        payloadOptions.addArgument("--record-size")
+                .action(store())
+                .required(false)
+                .type(Integer.class)
+                .metavar("RECORD-SIZE")
+                .dest("recordSize")
+                .help("message size in bytes. Note that you must provide exactly one of --record-size or --payload-file " +
+                        "or --payload-monotonic.");
+
+        payloadOptions.addArgument("--payload-file")
+                .action(store())
+                .required(false)
+                .type(String.class)
+                .metavar("PAYLOAD-FILE")
+                .dest("payloadFile")
+                .help("file to read the message payloads from. This works only for UTF-8 encoded text files. " +
+                        "Payloads will be read from this file and a payload will be randomly selected when sending messages. " +
+                        "Note that you must provide exactly one of --record-size or --payload-file or --payload-monotonic.");
+
+        payloadOptions.addArgument("--payload-monotonic")
+                .action(storeTrue())
+                .type(Boolean.class)
+                .metavar("PAYLOAD-MONOTONIC")
+                .dest("payloadMonotonic")
+                .help("payload is monotonically increasing integer. Note that you must provide exactly one of --record-size " +
+                        "or --payload-file or --payload-monotonic.");
+
+        parser.addArgument("--payload-delimiter")
+                .action(store())
+                .required(false)
+                .type(String.class)
+                .metavar("PAYLOAD-DELIMITER")
+                .dest("payloadDelimiter")
+                .setDefault("\\n")
+                .help("provides delimiter to be used when --payload-file is provided. " +
+                        "Defaults to new line. " +
+                        "Note that this parameter will be ignored if --payload-file is not provided.");
+
+        parser.addArgument("--throughput")
+                .action(store())
+                .required(true)
+                .type(Double.class)
+                .metavar("THROUGHPUT")
+                .help("throttle maximum message throughput to *approximately* THROUGHPUT messages/sec. Set this to -1 to disable throttling.");
+
+        parser.addArgument("--producer-props")
+                 .nargs("+")
+                 .required(false)
+                 .metavar("PROP-NAME=PROP-VALUE")
+                 .type(String.class)
+                 .dest("producerConfig")
+                 .help("kafka producer related configuration properties like bootstrap.servers,client.id etc. " +
+                         "These configs take precedence over those passed via --producer.config.");
+
+        parser.addArgument("--producer.config")
+                .action(store())
+                .required(false)
+                .type(String.class)
+                .metavar("CONFIG-FILE")
+                .dest("producerConfigFile")
+                .help("producer config properties file.");
+
+        parser.addArgument("--print-metrics")
+                .action(storeTrue())
+                .type(Boolean.class)
+                .metavar("PRINT-METRICS")
+                .dest("printMetrics")
+                .help("print out metrics at the end of the test.");
+
+        parser.addArgument("--transactional-id")
+               .action(store())
+               .required(false)
+               .type(String.class)
+               .metavar("TRANSACTIONAL-ID")
+               .dest("transactionalId")
+               .help("The transactional id to use. This config takes precedence over the transactional.id " +
+                       "specified via --producer.config or --producer-props. Note that if the transactional id " +
+                       "is not specified while --transaction-duration-ms is provided, the default value for the " +
+                       "transactional id will be performance-producer- followed by a random uuid.");
+
+        parser.addArgument("--transaction-duration-ms")
+               .action(store())
+               .required(false)
+               .type(Long.class)
+               .metavar("TRANSACTION-DURATION")
+               .dest("transactionDurationMs")
+               .help("The max age of each transaction. The commitTransaction will be called after this time has elapsed. " +
+                       "The value should be greater than 0. If the transactional id is specified via --producer-props, " +
+                       "--producer.config, or --transactional-id but --transaction-duration-ms is not specified, " +
+                       "the default value will be 3000.");
+
+        parser.addArgument("--warmup-records")
+               .action(store())
+               .required(false)
+               .type(Long.class)
+               .metavar("WARMUP-RECORDS")
+               .dest("warmupRecords")
+               .setDefault(0L)
+               .help("The number of records to treat as warmup; these initial records will not be included in steady-state statistics. " +
+                       "An additional summary line will be printed describing the steady-state statistics. (default: 0).");
+
+        return parser;
+    }
+
+    // Visible for testing
+    static class Stats {
+        private final long start;
+        private final int[] latencies;
+        private final long sampling;
+        private final long reportingInterval;
+        private long iteration;
+        private int index;
+        private long count;
+        private long bytes;
+        private int maxLatency;
+        private long totalLatency;
+        private long windowCount;
+        private int windowMaxLatency;
+        private long windowTotalLatency;
+        private long windowBytes;
+        private long windowStart;
+        private final boolean isSteadyState;
+        private boolean suppressPrint;
+
+        public Stats(long numRecords, boolean isSteadyState) {
+            this.start = System.currentTimeMillis();
+            this.windowStart = System.currentTimeMillis();
+            this.iteration = 0;
+            this.sampling = numRecords / Math.min(numRecords, 500000);
+            this.latencies = new int[(int) (numRecords / this.sampling) + 1];
+            this.index = 0;
+            this.maxLatency = 0;
+            this.windowCount = 0;
+            this.windowMaxLatency = 0;
+            this.windowTotalLatency = 0;
+            this.windowBytes = 0;
+            this.totalLatency = 0;
+            this.reportingInterval = 5000;
+            this.isSteadyState = isSteadyState;
+            this.suppressPrint = false;
+        }
+
+        public void record(int latency, int bytes, long time) {
+            this.count++;
+            this.bytes += bytes;
+            this.totalLatency += latency;
+            this.maxLatency = Math.max(this.maxLatency, latency);
+            this.windowCount++;
+            this.windowBytes += bytes;
+            this.windowTotalLatency += latency;
+            this.windowMaxLatency = Math.max(windowMaxLatency, latency);
+            if (this.iteration % this.sampling == 0) {
+                this.latencies[index] = latency;
+                this.index++;
+            }
+            /* maybe report the recent perf */
+            if (time - windowStart >= reportingInterval) {
+                if (this.isSteadyState && count == windowCount) {
+                    System.out.println("In steady state.");
+                }
+                if (!this.suppressPrint) {
+                    printWindow();
+                }
+                newWindow();
+            }
+            this.iteration++;
+        }
+
+        public long totalCount() {
+            return this.count;
+        }
+
+        public long currentWindowCount() {
+            return this.windowCount;
+        }
+
+        public long iteration() {
+            return this.iteration;
+        }
+
+        public long bytes() {
+            return this.bytes;
+        }
+
+        public int index() {
+            return this.index;
+        }
+
+        public void printWindow() {
+            long elapsed = System.currentTimeMillis() - windowStart;
+            double recsPerSec = 1000.0 * windowCount / (double) elapsed;
+            double mbPerSec = 1000.0 * this.windowBytes / (double) elapsed / (1024.0 * 1024.0);
+            System.out.printf("%d records sent, %.1f records/sec (%.2f MB/sec), %.1f ms avg latency, %.1f ms max latency.%n",
+                              windowCount,
+                              recsPerSec,
+                              mbPerSec,
+                              windowTotalLatency / (double) windowCount,
+                              (double) windowMaxLatency);
+        }
+
+        public void newWindow() {
+            this.windowStart = System.currentTimeMillis();
+            this.windowCount = 0;
+            this.windowMaxLatency = 0;
+            this.windowTotalLatency = 0;
+            this.windowBytes = 0;
+        }
+
+        public void printTotal() {
+            long elapsed = System.currentTimeMillis() - start;
+            double recsPerSec = 1000.0 * count / (double) elapsed;
+            double mbPerSec = 1000.0 * this.bytes / (double) elapsed / (1024.0 * 1024.0);
+            int[] percs = percentiles(this.latencies, index, 0.5, 0.95, 0.99, 0.999);
+            System.out.printf("%d%s records sent, %f records/sec (%.2f MB/sec), %.2f ms avg latency, %.2f ms max latency, %d ms 50th, %d ms 95th, %d ms 99th, %d ms 99.9th.%n",
+                              count,
+                              this.isSteadyState ? " steady state" : "",
+                              recsPerSec,
+                              mbPerSec,
+                              totalLatency / (double) count,
+                              (double) maxLatency,
+                              percs[0],
+                              percs[1],
+                              percs[2],
+                              percs[3]);
+        }
+
+        private static int[] percentiles(int[] latencies, int count, double... percentiles) {
+            int size = Math.min(count, latencies.length);
+            Arrays.sort(latencies, 0, size);
+            int[] values = new int[percentiles.length];
+            for (int i = 0; i < percentiles.length; i++) {
+                int index = (int) (percentiles[i] * size);
+                values[i] = latencies[index];
+            }
+            return values;
+        }
+
+        public void suppressPrinting() {
+            this.suppressPrint = true;
+        }
+    }
+
+    static final class PerfCallback implements Callback {
+        private final long start;
+        private final int bytes;
+        private final Stats stats;
+        private final Stats steadyStateStats;
+
+        public PerfCallback(long start, int bytes, Stats stats, Stats steadyStateStats) {
+            this.start = start;
+            this.stats = stats;
+            this.steadyStateStats = steadyStateStats;
+            this.bytes = bytes;
+        }
+
+        public void onCompletion(RecordMetadata metadata, Exception exception) {
+            long now = System.currentTimeMillis();
+            int latency = (int) (now - start);
+            // It will only be counted when the sending is successful, otherwise the number of sent records may be
+            // magically printed when the sending fails.
+            if (exception == null) {
+                this.stats.record(latency, bytes, now);
+                if (steadyStateStats != null) {
+                    this.steadyStateStats.record(latency, bytes, now);
+                }
+            }
+            if (exception != null)
+                exception.printStackTrace();
+        }
+    }
+
+    static final class ConfigPostProcessor {
+        final String topicName;
+        final long numRecords;
+        final long warmupRecords;
+        final Integer recordSize;
+        final double throughput;
+        final boolean payloadMonotonic;
+        final Properties producerProps;
+        final boolean shouldPrintMetrics;
+        final Long transactionDurationMs;
+        final boolean transactionsEnabled;
+        final List<byte[]> payloadByteList;
+
+        public ConfigPostProcessor(ArgumentParser parser, String[] args) throws IOException, ArgumentParserException {
+            Namespace namespace = parser.parseArgs(args);
+            this.topicName = namespace.getString("topic");
+            this.numRecords = namespace.getLong("numRecords");
+            this.warmupRecords = Math.max(namespace.getLong("warmupRecords"), 0);
+            this.recordSize = namespace.getInt("recordSize");
+            this.throughput = namespace.getDouble("throughput");
+            this.payloadMonotonic = namespace.getBoolean("payloadMonotonic");
+            this.shouldPrintMetrics = namespace.getBoolean("printMetrics");
+
+            List<String> producerConfigs = namespace.getList("producerConfig");
+            String producerConfigFile = namespace.getString("producerConfigFile");
+            String payloadFilePath = namespace.getString("payloadFile");
+            Long transactionDurationMsArg = namespace.getLong("transactionDurationMs");
+            String transactionIdArg = namespace.getString("transactionalId");
+            if (numRecords <= 0) {
+                throw new ArgumentParserException("--num-records should be greater than zero", parser);
+            }
+            if (warmupRecords >= numRecords) {
+                throw new ArgumentParserException("The value for --warmup-records must be strictly fewer than the number of records in the test, --num-records.", parser);
+            }
+            if (recordSize != null && recordSize <= 0) {
+                throw new ArgumentParserException("--record-size should be greater than zero", parser);
+            }
+            if (producerConfigs == null && producerConfigFile == null) {
+                throw new ArgumentParserException("Either --producer-props or --producer.config must be specified.", parser);
+            }
+            if (transactionDurationMsArg != null && transactionDurationMsArg <= 0) {
+                throw new ArgumentParserException("--transaction-duration-ms should be greater than zero", parser);
+            }
+
+            // since default value gets printed with the help text, we are escaping \n there and replacing it with correct value here.
+            String payloadDelimiter = namespace.getString("payloadDelimiter").equals("\\n")
+                    ? "\n" : namespace.getString("payloadDelimiter");
+            this.payloadByteList = readPayloadFile(payloadFilePath, payloadDelimiter);
+            this.producerProps = readProps(producerConfigs, producerConfigFile);
+            // setup transaction related configs
+            this.transactionsEnabled = transactionDurationMsArg != null
+                    || transactionIdArg != null
+                    || producerProps.containsKey(ProducerConfig.TRANSACTIONAL_ID_CONFIG);
+            if (transactionsEnabled) {
+                Optional<String> txIdInProps =
+                        Optional.ofNullable(producerProps.get(ProducerConfig.TRANSACTIONAL_ID_CONFIG))
+                                .map(Object::toString);
+                String transactionId = Optional.ofNullable(transactionIdArg).orElse(txIdInProps.orElse(DEFAULT_TRANSACTION_ID_PREFIX + Uuid.randomUuid().toString()));
+                producerProps.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, transactionId);
+
+                if (transactionDurationMsArg == null) {
+                    transactionDurationMsArg = DEFAULT_TRANSACTION_DURATION_MS;
+                }
+            }
+            this.transactionDurationMs = transactionDurationMsArg;
+        }
+    }
+}
diff --git a/ExecutionUtilities/high-throughput-kafka-producer/src/main.rs b/ExecutionUtilities/high-throughput-kafka-producer/src/main.rs
new file mode 100644
index 0000000..e7e06f6
--- /dev/null
+++ b/ExecutionUtilities/high-throughput-kafka-producer/src/main.rs
@@ -0,0 +1,556 @@
+use anyhow::Result;
+use clap::Parser;
+use futures::future::join_all;
+use itertools::Itertools;
+use rand::seq::SliceRandom;
+use rand::{thread_rng, Rng};
+use rdkafka::admin::{AdminClient, AdminOptions, NewTopic, TopicReplication};
+use rdkafka::config::ClientConfig;
+use rdkafka::producer::{FutureProducer, FutureRecord, Producer};
+use serde_json;
+use std::collections::HashMap;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::Arc;
+use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
+use tokio::time::sleep;
+use tracing::{error, info, warn};
+
+#[derive(Debug, Clone)]
+struct PrometheusTemplate {
+    template: String,
+    metric_name: String,
+}
+
+impl PrometheusTemplate {
+    fn new(metric_name: String, labels: &[String]) -> Self {
+        // Create JSON template with placeholders
+        let mut label_parts = Vec::new();
+        let label_keys = ["hostname", "location", "application_name", "instance", "job"];
+
+        for (i, key) in label_keys.iter().enumerate() {
+            if i < labels.len() {
+                label_parts.push(format!("\"{}\": \"{}\"", key, labels[i]));
+            }
+        }
+        let labels_json = label_parts.join(", ");
+
+        let template = format!(
+            "{{\"metric_name\": \"{}\", \"timestamp\": {{TIMESTAMP}}, \"value\": {{VALUE}}, \"labels\": {{{}}}}}",
+            metric_name, labels_json
+        );
+
+        Self {
+            template,
+            metric_name,
+        }
+    }
+
+    fn generate_message(&self, timestamp: u64, value: f64) -> Vec<u8> {
+        // Fast string replacement instead of JSON serialization
+        let mut result = self.template.clone();
+        result = result.replace("{TIMESTAMP}", &timestamp.to_string());
+        result = result.replace("{VALUE}", &format!("{:.2}", value));
+        result.into_bytes()
+    }
+}
+
+#[derive(Debug, Clone)]
+struct LabelChoices {
+    hostname: Vec<String>,
+    location: Vec<String>,
+    application_name: Vec<String>,
+    instance: Vec<String>,
+    job: Vec<String>,
+}
+
+impl Default for LabelChoices {
+    fn default() -> Self {
+        Self {
+            hostname: vec![
+                "host1".to_string(),
+                "host2".to_string(),
+                "host3".to_string(),
+                "host4".to_string(),
+                "host5".to_string(),
+            ],
+            location: vec![
+                "us-east".to_string(),
+                "us-west".to_string(),
+                "eu-central".to_string(),
+                "ap-southeast".to_string(),
+            ],
+            application_name: vec![
+                "app1".to_string(),
+                "app2".to_string(),
+                "app3".to_string(),
+                "app4".to_string(),
+            ],
+            instance: vec![
+                "worker1".to_string(),
+                "worker2".to_string(),
+                "worker3".to_string(),
+                "worker4".to_string(),
+            ],
+            job: vec![
+                "throughput-test".to_string(),
+                "latency-test".to_string(),
+                "stress-test".to_string(),
+            ],
+        }
+    }
+}
+
+static METRIC_NAMES: &[&str] = &[
+    "cpu_usage",
+    "memory_usage",
+    "network_throughput",
+    "disk_iops",
+    "response_time",
+    "error_rate",
+];
+
+#[derive(Debug, Clone)]
+struct ProducerStats {
+    messages_sent: Arc<AtomicU64>,
+    bytes_sent: Arc<AtomicU64>,
+    errors: Arc<AtomicU64>,
+}
+
+impl ProducerStats {
+    fn new() -> Self {
+        Self {
+            messages_sent: Arc::new(AtomicU64::new(0)),
+            bytes_sent: Arc::new(AtomicU64::new(0)),
+            errors: Arc::new(AtomicU64::new(0)),
+        }
+    }
+
+    fn add_message(&self, bytes: u64) {
+        self.messages_sent.fetch_add(1, Ordering::Relaxed);
+        self.bytes_sent.fetch_add(bytes, Ordering::Relaxed);
+    }
+
+    fn add_error(&self) {
+        self.errors.fetch_add(1, Ordering::Relaxed);
+    }
+
+    fn get_stats(&self) -> (u64, u64, u64) {
+        (
+            self.messages_sent.load(Ordering::Relaxed),
+            self.bytes_sent.load(Ordering::Relaxed),
+            self.errors.load(Ordering::Relaxed),
+        )
+    }
+}
+
+#[derive(Parser, Debug)]
+#[command(name = "kafka-throughput-producer")]
+#[command(about = "High-performance Kafka producer for Arroyo benchmarking")]
+struct Args {
+    #[arg(long, default_value = "localhost:9092")]
+    kafka_broker: String,
+
+    #[arg(long)]
+    kafka_topic: String,
+
+    #[arg(long, default_value = "1000000")]
+    total_messages: u64,
+
+    #[arg(long, default_value = "10000")]
+    messages_per_second: u64,
+
+    #[arg(long)]
+    duration: Option<u64>,
+
+    #[arg(long, default_value = "1")]
+    num_threads: usize,
+
+    #[arg(long, default_value = "1")]
+    num_partitions: i32,
+
+    #[arg(long, default_value = "1")]
+    replication_factor: i32,
+
+    #[arg(long)]
+    vary_labels: bool,
+
+    #[arg(long, default_value = "false")]
+    enable_flush: bool,
+
+    #[arg(long, default_value = "none")]
+    compression: String,
+
+    #[arg(long, default_value = "65536")]
+    batch_size: usize,
+}
+
+#[derive(Clone)]
+struct HighThroughputProducer {
+    producer: FutureProducer,
+    topic_name: String,
+    stats: ProducerStats,
+    templates: Arc<Vec<PrometheusTemplate>>,
+    label_choices: LabelChoices,
+}
+
+
+impl HighThroughputProducer {
+    async fn new_with_compression(
+        kafka_broker: &str,
+        topic_name: String,
+        num_partitions: i32,
+        replication_factor: i32,
+        compression: &str,
+    ) -> Result<Self> {
+        // High-performance producer configuration optimized for throughput
+        let producer: FutureProducer = ClientConfig::new()
+            .set("bootstrap.servers", kafka_broker)
+            .set("linger.ms", "5")
+            .set("batch.size", "1048576") // 1MB batches
+            .set("compression.type", compression)
+            .set("queue.buffering.max.messages", "1000000")
+            .set("queue.buffering.max.kbytes", "2097152") // 2GB
+            .set("batch.num.messages", "10000")
+            .set("acks", "0") // No acknowledgments for max throughput
+            .set("retries", "0") // No retries for max throughput
+            .set("message.max.bytes", "1048576") // 1MB
+            .set("queue.buffering.max.ms", "10")
+            .set("delivery.timeout.ms", "30000")
+            .create()?;
+
+        // Pre-generate templates for all label combinations
+        let label_choices = LabelChoices::default();
+        let all_labels = Self::generate_all_label_combinations_static(&label_choices);
+        let mut templates = Vec::new();
+
+        for metric_name in METRIC_NAMES {
+            for labels in &all_labels {
+                templates.push(PrometheusTemplate::new(metric_name.to_string(), labels));
+            }
+        }
+
+        let kafka_producer = Self {
+            producer,
+            topic_name: topic_name.clone(),
+            stats: ProducerStats::new(),
+            templates: Arc::new(templates),
+            label_choices,
+        };
+
+        kafka_producer
+            .create_topic_if_not_exists(kafka_broker, &topic_name, num_partitions, replication_factor)
+            .await?;
+
+        Ok(kafka_producer)
+    }
+
+    async fn create_topic_if_not_exists(
+        &self,
+        kafka_broker: &str,
+        topic_name: &str,
+        num_partitions: i32,
+        replication_factor: i32,
+    ) -> Result<()> {
+        let admin: AdminClient<_> = ClientConfig::new()
+            .set("bootstrap.servers", kafka_broker)
+            .create()?;
+
+        let metadata = admin.inner().fetch_metadata(None, Duration::from_secs(10))?;
+
+        let topic_exists = metadata.topics().iter().any(|t| t.name() == topic_name);
+
+        if !topic_exists {
+            let new_topic = NewTopic::new(
+                topic_name,
+                num_partitions,
+                TopicReplication::Fixed(replication_factor),
+            );
+
+            let opts = AdminOptions::new().request_timeout(Some(Duration::from_secs(10)));
+            let results = admin.create_topics(&[new_topic], &opts).await?;
+
+            for result in results {
+                match result {
+                    Ok(topic) => info!("Created topic: {}", topic),
+                    Err((topic, error)) => {
+                        error!("Failed to create topic {}: {}", topic, error);
+                        return Err(anyhow::anyhow!("Topic creation failed"));
+                    }
+                }
+            }
+
+            // Wait for topic creation to propagate
+            sleep(Duration::from_secs(2)).await;
+            info!("Topic '{}' created with {} partitions", topic_name, num_partitions);
+        }
+
+        Ok(())
+    }
+
+    fn generate_fast_prometheus_message(&self) -> Vec<u8> {
+        let mut rng = thread_rng();
+
+        // Select random template (pre-built with metric name and labels)
+        let template = self.templates.choose(&mut rng).unwrap();
+
+        let timestamp = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_millis() as u64;
+
+        let value = match template.metric_name.as_str() {
+            "cpu_usage" | "memory_usage" => rng.gen_range(0.0..100.0),
+            "network_throughput" => rng.gen_range(1000.0..10000.0),
+            "disk_iops" => rng.gen_range(100.0..5000.0),
+            "response_time" => rng.gen_range(0.1..1000.0),
+            "error_rate" => rng.gen_range(0.0..5.0),
+            _ => rng.gen_range(0.0..1000.0),
+        };
+
+        template.generate_message(timestamp, value)
+    }
+
+    fn generate_prometheus_metric(&self, labels: &[String]) -> Result<HashMap<String, serde_json::Value>> {
+        let mut rng = thread_rng();
+
+        let metric_name = METRIC_NAMES.choose(&mut rng).unwrap().to_string();
+        let timestamp = SystemTime::now().duration_since(UNIX_EPOCH)?.as_millis() as u64;
+
+        let value = match metric_name.as_str() {
+            "cpu_usage" | "memory_usage" => rng.gen_range(0.0..100.0),
+            "network_throughput" => rng.gen_range(1000.0..10000.0),
+            "disk_iops" => rng.gen_range(100.0..5000.0),
+            "response_time" => rng.gen_range(0.1..1000.0),
+            "error_rate" => rng.gen_range(0.0..5.0),
+            _ => rng.gen_range(0.0..1000.0),
+        };
+
+        let label_keys = ["hostname", "location", "application_name", "instance", "job"];
+        let mut label_map = HashMap::new();
+
+        for (i, key) in label_keys.iter().enumerate() {
+            if i < labels.len() {
+                label_map.insert(key.to_string(), serde_json::Value::String(labels[i].clone()));
+            }
+        }
+
+        let mut metric = HashMap::new();
+        metric.insert("metric_name".to_string(), serde_json::Value::String(metric_name));
+        metric.insert("timestamp".to_string(), serde_json::Value::Number(serde_json::Number::from(timestamp)));
+        metric.insert("value".to_string(), serde_json::Value::Number(serde_json::Number::from_f64(value).unwrap()));
+        metric.insert("labels".to_string(), serde_json::Value::Object(label_map.into_iter().collect()));
+
+        Ok(metric)
+    }
+
+    async fn produce_message_batch(
+        &self,
+        batch: Vec<(String, Vec<String>)>,
+    ) -> Result<()> {
+        let mut futures = Vec::new();
+
+        for (partition_key, labels) in batch {
+            let metric = self.generate_prometheus_metric(&labels)?;
+            let message_data = serde_json::to_vec(&metric)?;
+            let message_size = message_data.len();
+
+            let stats = self.stats.clone();
+            let producer = self.producer.clone();
+            let topic_name = self.topic_name.clone();
+
+            let future = async move {
+                let record = FutureRecord::to(&topic_name)
+                    .key(&partition_key)
+                    .payload(&message_data);
+
+                match producer.send(record, Duration::from_secs(10)).await {
+                    Ok((partition, offset)) => {
+                        stats.add_message(message_size as u64);
+                        Ok((partition, offset))
+                    }
+                    Err((kafka_error, message)) => {
+                        stats.add_error();
+                        warn!("Failed to send message: {}", kafka_error);
+                        Err((kafka_error, message))
+                    }
+                }
+            };
+            futures.push(future);
+        }
+
+        let _results = join_all(futures).await;
+        Ok(())
+    }
+
+
+    fn generate_all_label_combinations_static(label_choices: &LabelChoices) -> Vec<Vec<String>> {
+        let label_values = vec![
+            &label_choices.hostname,
+            &label_choices.location,
+            &label_choices.application_name,
+            &label_choices.instance,
+            &label_choices.job,
+        ];
+
+        label_values
+            .into_iter()
+            .multi_cartesian_product()
+            .map(|combo| combo.into_iter().cloned().collect())
+            .collect()
+    }
+
+    fn generate_all_label_combinations(&self) -> Vec<Vec<String>> {
+        let label_values = vec![
+            &self.label_choices.hostname,
+            &self.label_choices.location,
+            &self.label_choices.application_name,
+            &self.label_choices.instance,
+            &self.label_choices.job,
+        ];
+
+        label_values
+            .into_iter()
+            .multi_cartesian_product()
+            .map(|combo| combo.into_iter().cloned().collect())
+            .collect()
+    }
+
+    async fn run_benchmark(
+        &self,
+        args: &Args,
+    ) -> Result<()> {
+        info!(
+            "Starting benchmark: {} messages at {} msg/s using {} threads",
+            args.total_messages, args.messages_per_second, args.num_threads
+        );
+        info!("Producer initialized with {} pre-generated templates", self.templates.len());
+        info!("🚀 Data generation started!");
+
+        let all_labels = self.generate_all_label_combinations();
+        let start_time = Instant::now();
+        let mut messages_sent = 0u64;
+
+        let messages_per_interval = args.messages_per_second;
+        let batch_size = std::cmp::max(1, args.batch_size);
+        let interval = Duration::from_secs(1);
+
+        while messages_sent < args.total_messages {
+            if let Some(duration) = args.duration {
+                if start_time.elapsed().as_secs() > duration {
+                    break;
+                }
+            }
+
+            let interval_start = Instant::now();
+
+            // Select labels for this interval
+            let labels_subset = if args.vary_labels {
+                let mut rng = thread_rng();
+                let num_labels = rng.gen_range(1..=std::cmp::min(all_labels.len(), messages_per_interval as usize));
+                all_labels.choose_multiple(&mut rng, num_labels).cloned().collect::<Vec<_>>()
+            } else {
+                all_labels[..std::cmp::min(all_labels.len(), messages_per_interval as usize)].to_vec()
+            };
+
+            // Create batches for parallel processing
+            let mut tasks = Vec::new();
+            let mut remaining_messages = std::cmp::min(
+                messages_per_interval,
+                args.total_messages - messages_sent
+            );
+
+            while remaining_messages > 0 && tasks.len() < args.num_threads {
+                let current_batch_size = std::cmp::min(batch_size as u64, remaining_messages) as usize;
+                let batch: Vec<(String, Vec<String>)> = (0..current_batch_size)
+                    .map(|i| {
+                        let labels = &labels_subset[i % labels_subset.len()];
+                        let partition_key = format!("{}_{}", labels[0], labels[1]);
+                        (partition_key, labels.clone())
+                    })
+                    .collect();
+
+                // Clone necessary data for the async task
+                let producer = self.producer.clone();
+                let topic_name = self.topic_name.clone();
+                let stats = self.stats.clone();
+                let label_choices = self.label_choices.clone();
+
+                tasks.push(tokio::spawn(async move {
+                    let temp_producer = HighThroughputProducer {
+                        producer,
+                        topic_name,
+                        stats,
+                        templates: Arc::new(Vec::new()), // Empty templates for batch producer
+                        label_choices,
+                    };
+                    temp_producer.produce_message_batch(batch).await
+                }));
+
+                remaining_messages -= current_batch_size as u64;
+                messages_sent += current_batch_size as u64;
+            }
+
+            // Wait for all tasks to complete
+            for task in tasks {
+                if let Err(e) = task.await? {
+                    error!("Batch processing failed: {}", e);
+                }
+            }
+
+            // Rate limiting
+            let elapsed = interval_start.elapsed();
+            if elapsed < interval {
+                sleep(interval - elapsed).await;
+            }
+
+            // Print progress
+            if messages_sent % (args.messages_per_second) == 0 {
+                self.print_stats(start_time);
+            }
+        }
+
+        // Final flush - wait for all messages to be delivered
+        info!("Flushing remaining messages...");
+        if let Err(e) = self.producer.flush(Duration::from_secs(30)) {
+            warn!("Error during flush: {}", e);
+        }
+
+        info!("Benchmark completed!");
+        self.print_stats(start_time);
+
+        Ok(())
+    }
+
+    fn print_stats(&self, start_time: Instant) {
+        let (messages, bytes, errors) = self.stats.get_stats();
+        let elapsed = start_time.elapsed().as_secs_f64();
+
+        let rate = if elapsed > 0.0 { messages as f64 / elapsed } else { 0.0 };
+        let throughput_mb = if elapsed > 0.0 {
+            (bytes as f64 / (1024.0 * 1024.0)) / elapsed
+        } else {
+            0.0
+        };
+
+        info!(
+            "Messages: {}, Rate: {:.2} msg/s, Throughput: {:.2} MB/s, Errors: {}",
+            messages, rate, throughput_mb, errors
+        );
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    tracing_subscriber::fmt::init();
+
+    let args = Args::parse();
+
+    let producer = HighThroughputProducer::new_with_compression(
+        &args.kafka_broker,
+        args.kafka_topic.clone(),
+        args.num_partitions,
+        args.replication_factor,
+        &args.compression,
+    ).await?;
+
+    producer.run_benchmark(&args).await?;
+
+    Ok(())
+}
diff --git a/ExecutionUtilities/kafka-consumer/Cargo.lock b/ExecutionUtilities/kafka-consumer/Cargo.lock
new file mode 100644
index 0000000..63bdf55
--- /dev/null
+++ b/ExecutionUtilities/kafka-consumer/Cargo.lock
@@ -0,0 +1,611 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "addr2line"
+version = "0.24.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
+dependencies = [
+ "gimli",
+]
+
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "backtrace"
+version = "0.3.75"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
+dependencies = [
+ "addr2line",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+ "windows-targets",
+]
+
+[[package]]
+name = "bitflags"
+version = "2.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
+
+[[package]]
+name = "bytes"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
+
+[[package]]
+name = "cc"
+version = "1.2.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7"
+dependencies = [
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "futures-channel"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
+dependencies = [
+ "futures-core",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
+
+[[package]]
+name = "futures-task"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
+
+[[package]]
+name = "futures-util"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "pin-project-lite",
+ "pin-utils",
+]
+
+[[package]]
+name = "gimli"
+version = "0.31.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
+
+[[package]]
+name = "hashbrown"
+version = "0.15.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
+
+[[package]]
+name = "indexmap"
+version = "2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "io-uring"
+version = "0.7.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013"
+dependencies = [
+ "bitflags",
+ "cfg-if",
+ "libc",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+
+[[package]]
+name = "kafka-throughput-consumer"
+version = "0.1.0"
+dependencies = [
+ "rdkafka",
+ "tokio",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.174"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
+
+[[package]]
+name = "libz-sys"
+version = "1.1.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "lock_api"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
+dependencies = [
+ "autocfg",
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
+
+[[package]]
+name = "memchr"
+version = "2.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+]
+
+[[package]]
+name = "mio"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "num_enum"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a973b4e44ce6cad84ce69d797acf9a044532e4184c4f267913d1b546a0727b7a"
+dependencies = [
+ "num_enum_derive",
+ "rustversion",
+]
+
+[[package]]
+name = "num_enum_derive"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77e878c846a8abae00dd069496dbe8751b16ac1c3d6bd2a7283a938e8228f90d"
+dependencies = [
+ "proc-macro-crate",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "object"
+version = "0.36.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-targets",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
+
+[[package]]
+name = "proc-macro-crate"
+version = "3.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35"
+dependencies = [
+ "toml_edit",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.95"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rdkafka"
+version = "0.36.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1beea247b9a7600a81d4cc33f659ce1a77e1988323d7d2809c7ed1c21f4c316d"
+dependencies = [
+ "futures-channel",
+ "futures-util",
+ "libc",
+ "log",
+ "rdkafka-sys",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "slab",
+ "tokio",
+]
+
+[[package]]
+name = "rdkafka-sys"
+version = "4.9.0+2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5230dca48bc354d718269f3e4353280e188b610f7af7e2fcf54b7a79d5802872"
+dependencies = [
+ "libc",
+ "libz-sys",
+ "num_enum",
+ "pkg-config",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e8af0dde094006011e6a740d4879319439489813bd0bcdc7d821beaeeff48ec"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "rustc-demangle"
+version = "0.1.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
+
+[[package]]
+name = "rustversion"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d"
+
+[[package]]
+name = "ryu"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "serde"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.141"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3"
+dependencies = [
+ "itoa",
+ "memchr",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "slab"
+version = "0.4.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "socket2"
+version = "0.5.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.104"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "tokio"
+version = "1.46.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17"
+dependencies = [
+ "backtrace",
+ "bytes",
+ "io-uring",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "slab",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "toml_datetime"
+version = "0.6.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
+
+[[package]]
+name = "toml_edit"
+version = "0.22.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
+dependencies = [
+ "indexmap",
+ "toml_datetime",
+ "winnow",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
+
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "winnow"
+version = "0.7.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95"
+dependencies = [
+ "memchr",
+]
diff --git a/ExecutionUtilities/kafka-consumer/Cargo.toml b/ExecutionUtilities/kafka-consumer/Cargo.toml
new file mode 100644
index 0000000..d40d77f
--- /dev/null
+++ b/ExecutionUtilities/kafka-consumer/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "kafka-throughput-consumer"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+rdkafka = "0.36"
+tokio = { version = "1.0", features = ["full"] }
diff --git a/ExecutionUtilities/kafka-consumer/src/main.rs b/ExecutionUtilities/kafka-consumer/src/main.rs
new file mode 100644
index 0000000..1ae08b8
--- /dev/null
+++ b/ExecutionUtilities/kafka-consumer/src/main.rs
@@ -0,0 +1,84 @@
+use rdkafka::config::ClientConfig;
+use rdkafka::consumer::{Consumer, StreamConsumer};
+use rdkafka::message::Message;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+use tokio::signal;
+use tokio::time::interval;
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let consumer: StreamConsumer = ClientConfig::new()
+        .set("group.id", "throughput-test-group")
+        .set("bootstrap.servers", "localhost:9092")
+        .set("auto.offset.reset", "earliest")
+        // Performance optimizations
+        .set("fetch.min.bytes", "50000")
+        .set("fetch.wait.max.ms", "500")
+        .set("queued.min.messages", "100000")
+        .set("receive.message.max.bytes", "100000000")
+        .set("enable.auto.commit", "true")
+        .create()?;
+
+    consumer.subscribe(&["test_input"])?;
+
+    let message_count = Arc::new(AtomicU64::new(0));
+    let total_bytes = Arc::new(AtomicU64::new(0));
+    let start_time = Instant::now();
+
+    // Stats reporting task
+    let stats_count = Arc::clone(&message_count);
+    let stats_bytes = Arc::clone(&total_bytes);
+    let stats_start = start_time;
+
+    tokio::spawn(async move {
+        let mut interval = interval(Duration::from_secs(5));
+        loop {
+            interval.tick().await;
+            print_stats(&stats_count, &stats_bytes, stats_start);
+        }
+    });
+
+    // Main consumer loop
+    loop {
+        tokio::select! {
+            message = consumer.recv() => {
+                match message {
+                    Ok(msg) => {
+                        message_count.fetch_add(1, Ordering::Relaxed);
+                        if let Some(payload) = msg.payload() {
+                            total_bytes.fetch_add(payload.len() as u64, Ordering::Relaxed);
+                        }
+                    }
+                    Err(e) => eprintln!("Error receiving message: {}", e),
+                }
+            }
+            _ = signal::ctrl_c() => {
+                println!("\n=== Final Statistics ===");
+                print_stats(&message_count, &total_bytes, start_time);
+                break;
+            }
+        }
+    }
+
+    Ok(())
+}
+
+fn print_stats(
+    message_count: &Arc<AtomicU64>,
+    total_bytes: &Arc<AtomicU64>,
+    start_time: Instant,
+) {
+    let elapsed = start_time.elapsed().as_millis() as f64;
+    let count = message_count.load(Ordering::Relaxed) as f64;
+    let bytes = total_bytes.load(Ordering::Relaxed) as f64;
+
+    let messages_per_sec = (count * 1000.0) / elapsed;
+    let mb_per_sec = (bytes * 1000.0) / (elapsed * 1024.0 * 1024.0);
+
+    println!(
+        "Messages: {}, Rate: {:.2} msg/s, Throughput: {:.2} MB/s",
+        count as u64, messages_per_sec, mb_per_sec
+    );
+}
diff --git a/ExecutionUtilities/kafka_toy_data_generator/main.py b/ExecutionUtilities/kafka_toy_data_generator/main.py
new file mode 100644
index 0000000..9bc32d2
--- /dev/null
+++ b/ExecutionUtilities/kafka_toy_data_generator/main.py
@@ -0,0 +1,118 @@
+import argparse
+import json
+import time
+import pytz
+import random
+import itertools
+from datetime import datetime
+from confluent_kafka import Producer, admin
+
+random.seed(42)
+
+LABEL_CHOICES = {
+    "hostname": ["host1", "host2", "host3"],
+    "location": ["us-east", "us-west", "eu-central"],
+    "application_name": ["app1", "app2", "app3"],
+}
+METRIC_NAME = "cpu_usage"
+
+
+def create_topic_if_not_exists(producer, topic):
+    admin_client = admin.AdminClient(
+        {"bootstrap.servers": producer.list_topics().brokers}
+    )
+    topic_metadata = admin_client.list_topics(timeout=10)
+    if topic not in topic_metadata.topics:
+        new_topic = admin.NewTopic(topic, num_partitions=1, replication_factor=1)
+        admin_client.create_topics([new_topic])
+        print(f"Topic '{topic}' created.")
+
+
+def generate_data(labels):
+    label_names = list(LABEL_CHOICES.keys())
+    labels = {label_names[i]: labels[i] for i in range(len(label_names))}
+
+    metric_name = METRIC_NAME
+
+    local_datetime = datetime.now()
+    utc_datetime = local_datetime.astimezone(pytz.utc)
+
+    data = {
+        "labels": labels,
+        "value": random.uniform(0, 100),
+        "name": metric_name,
+        "timestamp": utc_datetime.strftime("%Y-%m-%dT%H:%M:%S.")
+        + f"{utc_datetime.microsecond // 1000:03d}Z",
+    }
+    return json.dumps(data).encode("utf-8")
+
+
+def main(args):
+    label_names = list(LABEL_CHOICES.keys())
+    total_combinations = 1
+    for label_name in label_names:
+        total_combinations *= len(LABEL_CHOICES[label_name])
+
+    if args.data_points > total_combinations:
+        raise ValueError(
+            "data_points cannot be greater than the number of possible combinations for labels"
+        )
+
+    producer = Producer({"bootstrap.servers": args.kafka_broker})
+    create_topic_if_not_exists(producer, args.kafka_topic)
+
+    # Generate all possible label combinations dynamically
+    label_values = [LABEL_CHOICES[label_name] for label_name in label_names]
+    all_labels = list(itertools.product(*label_values))
+
+    num_labels = args.data_points
+
+    while True:
+        for idx in range(num_labels):
+            data = generate_data(labels=all_labels[idx])
+            producer.produce(args.kafka_topic, value=data)
+            producer.flush()
+
+            if args.debug_print:
+                print(data)
+
+        if args.debug_print:
+            print("-" * 50)
+        time.sleep(args.frequency)
+
+        if args.vary_labels:
+            num_labels += random.randint(-1, 1)
+            if num_labels < 1:
+                num_labels = 1
+            if num_labels > total_combinations:
+                num_labels = total_combinations
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--kafka_broker", type=str, default="localhost:9092")
+    parser.add_argument(
+        "--debug_print",
+        action="store_true",
+        help="Print data to console instead of sending to Kafka",
+    )
+    parser.add_argument("--kafka_topic", type=str, required=True)
+    parser.add_argument(
+        "--frequency",
+        type=int,
+        default=1,
+        help="Frequency in seconds to dump data to Kafka",
+    )
+    parser.add_argument(
+        "--data_points",
+        type=int,
+        required=True,
+        help="Number of data points to dump at each frequency interval",
+    )
+    parser.add_argument(
+        "--vary_labels",
+        action="store_true",
+        help="Vary the number of labels to dump data for",
+    )
+    args = parser.parse_args()
+    main(args)
diff --git a/ExecutionUtilities/kafka_toy_data_generator/requirements.txt b/ExecutionUtilities/kafka_toy_data_generator/requirements.txt
new file mode 100644
index 0000000..95854a4
--- /dev/null
+++ b/ExecutionUtilities/kafka_toy_data_generator/requirements.txt
@@ -0,0 +1 @@
+confluent-kafka==2.3.0
diff --git a/PrometheusClient/.dockerignore b/PrometheusClient/.dockerignore
new file mode 100644
index 0000000..ec71510
--- /dev/null
+++ b/PrometheusClient/.dockerignore
@@ -0,0 +1,16 @@
+.DS_Store
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.Python
+*.log
+outputs/
+bin/
+lib/
+share/
+pyvenv.cfg
+.git/
+.gitignore
+README.md
+*.md
diff --git a/PrometheusClient/.gitignore b/PrometheusClient/.gitignore
new file mode 100644
index 0000000..3dbed02
--- /dev/null
+++ b/PrometheusClient/.gitignore
@@ -0,0 +1,2 @@
+**/*.pyc
+**/__pycache__
diff --git a/PrometheusClient/Dockerfile b/PrometheusClient/Dockerfile
new file mode 100644
index 0000000..7703652
--- /dev/null
+++ b/PrometheusClient/Dockerfile
@@ -0,0 +1,13 @@
+FROM sketchdb-base:latest
+
+LABEL maintainer="SketchDB Team"
+LABEL description="Prometheus Client for SketchDB"
+
+# Copy requirements and install Python dependencies
+COPY . .
+RUN pip install --upgrade pip
+RUN pip install --no-cache-dir -r requirements.txt
+RUN apt-get update && apt-get install -y sudo
+
+# Run with host network access for communicating with host services
+ENTRYPOINT ["python", "main_prometheus_client.py"]
diff --git a/PrometheusClient/LICENSE b/PrometheusClient/LICENSE
new file mode 100644
index 0000000..404d657
--- /dev/null
+++ b/PrometheusClient/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 SketchDB
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/PrometheusClient/classes/QueryLatencyExporter.py b/PrometheusClient/classes/QueryLatencyExporter.py
new file mode 100644
index 0000000..405a8db
--- /dev/null
+++ b/PrometheusClient/classes/QueryLatencyExporter.py
@@ -0,0 +1,171 @@
+from typing import Any, Dict, Optional, Union
+from prometheus_client import start_http_server, Gauge
+from loguru import logger
+
+
+class QueryLatencyExporter:
+
+    @staticmethod
+    def _IP_valid(addr: Optional[Union[str, object]]) -> None:
+        """
+        Verifies that a given ip address is of the correct type and is a "valid"
+        IP address for running the exporter. At the moment, this function considers
+        any properly formatted IP address as valid
+        """
+        if addr is None:
+            raise TypeError("IP address cannot be None")
+        elif not isinstance(addr, str):
+            raise TypeError("IP address must be a string")
+        elif addr == "localhost":
+            return
+
+        addr_nums = addr.split(sep=".")
+        if len(addr_nums) != 4:
+            raise ValueError("Improperly formatted IPv4 address")
+        for num_str in addr_nums:
+            if int(num_str) < 0 or int(num_str) > 255:
+                raise ValueError("Improperly formatted IPv4 address")
+
+    @staticmethod
+    def _port_valid(port: Optional[Union[int, object]]) -> None:
+        """
+        Verifies that a given ip address is of the correct type and is a "valid"
+        IP address for running the exporter. At the moment, this function considers
+        any properly formatted IP address as valid
+        """
+        if port is None:
+            raise TypeError("Port cannot be None")
+        elif not isinstance(port, int):
+            raise TypeError("Port must be an integer")
+        elif port < 0 or port > 65535:
+            raise ValueError("Improperly formatted port")
+
+    def __init__(self, addr: str, port: int):
+        self.logger = logger.bind(module="query_latency_exporter")
+        self.port = port
+        self.addr = addr
+
+        self.http_server: Optional[Any] = None
+        self.server_thread: Optional[Any] = None
+
+        try:
+            QueryLatencyExporter._IP_valid(self.addr)
+            QueryLatencyExporter._port_valid(self.port)
+        except (TypeError, ValueError) as e:
+            self.logger.error(f"Failed to create QueryLatencyExporter: {str(e)}")
+            raise e
+
+        self.latencies_metric = Gauge(
+            "query_latencies", "Query latencies", labelnames=["query_index", "server"]
+        )
+        self.cumulative_latencies_metric = Gauge(
+            "cumulative_query_latencies",
+            "Query cumulative latencies",
+            labelnames=["query_index", "server"],
+        )
+        self.logger.info("QueryLatencyExporter object created")
+
+    def __enter__(self) -> "QueryLatencyExporter":
+        return self
+
+    def __exit__(self, *args: object) -> None:
+        self.shutdown()
+
+    def launch(self) -> None:
+        """
+        Launches the exporter's http_server and server thread for exporting metrics
+        to be scraped by Prometheus
+        """
+        if self.addr is None:
+            self.logger.error("Launch failed: Exporter IP address is None")
+            raise RuntimeError("Latency exporter failed to launch: exporter IP is None")
+        elif self.port is None:
+            self.logger.error("Launch failed: Exporter port is None")
+            raise RuntimeError(
+                "Latency exporter failed to launch: exporter port is None"
+            )
+
+        self.logger.info(f"Launching latency exporter at {self.addr}: {self.port}")
+
+        try:
+            result = start_http_server(addr=self.addr, port=self.port)
+            assert result is not None
+            self.http_server, self.server_thread = result
+        except Exception as e:
+            self.logger.error(f"Failed to start http server due to exception: {str(e)}")
+            # e.add_note is only available in Python 3.11+, commenting out for compatibility
+            # e.add_note("Latency exporter failed to launch")
+            raise e
+
+        self.logger.info(f"Exporter successfully started at {self.addr}: {self.port}")
+        print(f"Exporter running at {self.addr}: {self.port}")
+
+    def shutdown(self) -> None:
+        """
+        Cleans up all resources associated with the exporter, mainly the
+        http_server and corresponding server thread
+        """
+        print("Shutting down latency exporter server and joining server thread...")
+
+        self.logger.info("Shutting down server...")
+        if self.http_server is not None:
+            try:
+                self.http_server.shutdown()
+            except Exception as e:
+                self.logger.error(f"Error shutting down http_server: {str(e)}")
+                # e.add_note is only available in Python 3.11+, commenting out for compatibility
+                # e.add_note("Attempt to shutdown exporter http_server failed.")
+                raise e
+            self.logger.info("Shut down server successfully")
+        else:
+            self.logger.error("Exporter http_server is None")
+            raise RuntimeError("Exporter http_server is None")
+
+        self.logger.info("Joining server thread...")
+        if self.server_thread is not None:
+            try:
+                self.server_thread.join()
+            except Exception as e:
+                self.logger.error(f"Error joining server thread: {str(e)}")
+                # e.add_note is only available in Python 3.11+, commenting out for compatibility
+                # e.add_note("Attempt to join exporter's server thread failed.")
+                raise e
+            self.logger.info("Joined server thread successfully")
+        else:
+            self.logger.error("Exporter server thread is None")
+            raise RuntimeError("Exporter server thread is None")
+
+        print("Exporter shut down successfully")
+
+    def export_repetition(self, repetition_idx: int, result: Dict[str, Any]) -> None:
+        """
+        Exports a single repetition result for all queries
+        """
+        if not isinstance(repetition_idx, int):
+            self.logger.error("Given non-integer repetition_idx")
+            raise TypeError("Repetition index must be an integer")
+
+        self.logger.trace(f"Updating metrics for repetition no.{repetition_idx}")
+
+        if result is None:
+            self.logger.error("Repetition result is None")
+            raise TypeError("Repetition result is None")
+
+        for server_name in result:
+            for query_idx in result[server_name]:
+                query_result_across_time = result[server_name][query_idx]
+                query_rep_result = query_result_across_time.query_results[
+                    repetition_idx
+                ]
+                latency = query_rep_result.latency
+                cumulative_latency = query_rep_result.cumulative_latency
+
+                if latency is not None:
+                    self.latencies_metric.labels(
+                        query_index=str(query_idx), server=server_name
+                    ).set(latency)
+
+                if cumulative_latency is not None:
+                    self.cumulative_latencies_metric.labels(
+                        query_index=str(query_idx), server=server_name
+                    ).set(cumulative_latency)
diff --git a/PrometheusClient/classes/__init__.py b/PrometheusClient/classes/__init__.py
new file mode 100644
index 0000000..485ca37
--- /dev/null
+++ b/PrometheusClient/classes/__init__.py
@@ -0,0 +1 @@
+# Make classes a proper Python package
diff --git a/PrometheusClient/classes/clickhouse_query_client.py b/PrometheusClient/classes/clickhouse_query_client.py
new file mode 100644
index 0000000..1ab9785
--- /dev/null
+++ b/PrometheusClient/classes/clickhouse_query_client.py
@@ -0,0 +1,125 @@
+import requests
+from typing import Any, Dict, Optional
+from requests.adapters import HTTPAdapter
+
+from .query_client import QueryClient, QueryResponse
+
+
+class ClickHouseQueryClient(QueryClient):
+    """Query client for ClickHouse HTTP API."""
+
+    def __init__(
+        self,
+        server_url: str,
+        server_name: str,
+        database: str = "default",
+        user: str = "default",
+        password: str = "",
+        timeout: int = 30,
+        **kwargs: Any,
+    ):
+        super().__init__(server_url, server_name)
+        self.database = database
+        self.user = user
+        self.password = password
+        self.timeout = timeout
+
+        self._session = requests.Session()
+        if user and password:
+            self._session.auth = (user, password)
+
+    @property
+    def protocol_name(self) -> str:
+        return "clickhouse"
+
+    @property
+    def session(self) -> requests.Session:
+        """Access to underlying requests Session for mounting debug adapters."""
+        return self._session
+
+    def mount_adapter(self, prefix: str, adapter: HTTPAdapter) -> None:
+        """Mount an HTTP adapter (e.g., for debug logging)."""
+        self._session.mount(prefix, adapter)
+
+    def execute_query(
+        self,
+        query: str,
+        query_time: Optional[int] = None,
+    ) -> QueryResponse:
+        """
+        Execute SQL query via ClickHouse HTTP interface.
+
+        Args:
+            query: SQL query string (may contain template variables already substituted)
+            query_time: Not directly used - time filtering should be done via
+                        template substitution before calling this method
+
+        Returns:
+            QueryResponse with normalized data
+        """
+        try:
+            params = {"database": self.database}
+
+            formatted_query = query.strip()
+
+            # Reject queries with FORMAT clause - we need raw TSV for parsing
+            if self._has_format_clause(formatted_query):
+                return QueryResponse(
+                    success=False,
+                    data=None,
+                    error_message="Queries must not contain FORMAT clause - raw TSV output is required for parsing",
+                )
+
+            response = self._session.post(
+                self.server_url,
+                params=params,
+                data=formatted_query.encode("utf-8"),
+                timeout=self.timeout,
+            )
+
+            if response.status_code != 200:
+                return QueryResponse(
+                    success=False,
+                    data=None,
+                    error_message=f"HTTP {response.status_code}: {response.text}",
+                    raw_response=response.text,
+                )
+
+            # Return raw TSV text - will be stored in QueryResult.raw_text_result
+            return QueryResponse(
+                success=True,
+                data=None,
+                raw_response=response.text,
+            )
+
+        except requests.exceptions.Timeout:
+            return QueryResponse(
+                success=False,
+                data=None,
+                error_message=f"Request timed out after {self.timeout}s",
+            )
+        except Exception as e:
+            return QueryResponse(
+                success=False,
+                data=None,
+                error_message=f"{type(e).__name__}: {e}",
+            )
+
+    def _has_format_clause(self, query: str) -> bool:
+        """Check if query already has a FORMAT clause."""
+        # Simple check - look for FORMAT keyword followed by format name
+        upper_query = query.upper()
+        return " FORMAT " in upper_query or "\nFORMAT " in upper_query
+
+    def get_runtime_info(self) -> Optional[Dict[str, Any]]:
+        """Check ClickHouse availability via ping endpoint."""
+        try:
+            response = self._session.get(
+                f"{self.server_url}/ping",
+                timeout=5,
+            )
+            if response.status_code == 200:
+                return {"status": "ok", "response": response.text.strip()}
+        except Exception:
+            pass
+        return None
diff --git a/PrometheusClient/classes/config.py b/PrometheusClient/classes/config.py
new file mode 100644
index 0000000..a7fe215
--- /dev/null
+++ b/PrometheusClient/classes/config.py
@@ -0,0 +1,93 @@
+from typing import List, Dict, Any, Optional
+
+
+class ServerConfig:
+    def __init__(
+        self,
+        name: str,
+        url: str,
+        protocol: Optional[str],
+        # ClickHouse-specific options
+        database: Optional[str],
+        user: Optional[str],
+        password: Optional[str],
+    ):
+        self.name = name
+        self.url = url
+        self.protocol = protocol
+        self.database = database
+        self.user = user
+        self.password = password
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "ServerConfig":
+        return cls(
+            name=data["name"],
+            url=data["url"],
+            protocol=data.get("protocol"),
+            database=data.get("database"),
+            user=data.get("user"),
+            password=data.get("password"),
+        )
+
+
+class QueryGroupConfig:
+    def __init__(
+        self,
+        id: int,
+        queries: List[str],
+        # repetitions: int,
+        repetition_delay: int,
+        options: Dict[str, Any],
+        time_window_seconds: Optional[int],
+        # starting_delay: int,
+        # options: Dict[str, Any],
+    ):
+        # set defaults
+        self.starting_delay = 0
+        self.repetitions = None
+
+        self.id = id
+        self.queries = queries
+        # self.repetitions = repetitions
+        self.repetition_delay = repetition_delay
+        self.time_window_seconds = time_window_seconds
+        self.__dict__.update(options)
+        # self.starting_delay = starting_delay
+        # self.options = options
+
+        assert self.repetitions is not None
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "QueryGroupConfig":
+        # return cls(
+        #     id=data["id"],
+        #     repetitions=data["repetitions"],
+        #     repetition_delay=data["repetition_delay"],
+        #     starting_delay=data["starting_delay"] if "starting_delay" in data else 0,
+        #     options=data["options"],
+        #     queries=data["queries"],
+        # )
+        return cls(
+            id=data["id"],
+            queries=data["queries"],
+            repetition_delay=data["repetition_delay"],
+            options=data["client_options"],
+            time_window_seconds=data.get("time_window_seconds"),
+        )
+
+
+class Config:
+    def __init__(
+        self, servers: List[ServerConfig], query_groups: List[QueryGroupConfig]
+    ):
+        self.servers = servers
+        self.query_groups = query_groups
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "Config":
+        servers = [ServerConfig.from_dict(server) for server in data["servers"]]
+        query_groups = [
+            QueryGroupConfig.from_dict(group) for group in data["query_groups"]
+        ]
+        return cls(servers=servers, query_groups=query_groups)
diff --git a/PrometheusClient/classes/prometheus_query_client.py b/PrometheusClient/classes/prometheus_query_client.py
new file mode 100644
index 0000000..e3ad868
--- /dev/null
+++ b/PrometheusClient/classes/prometheus_query_client.py
@@ -0,0 +1,111 @@
+from typing import Any, Dict, Optional
+import requests
+
+from prometheus_api_client import PrometheusConnect
+from .query_client import QueryClient, QueryResponse
+
+
+class PrometheusQueryClient(QueryClient):
+    """Query client for Prometheus HTTP API."""
+
+    def __init__(
+        self,
+        server_url: str,
+        server_name: str,
+        disable_ssl: bool = True,
+        retry: Optional[Any] = None,
+        **kwargs: Any,
+    ):
+        super().__init__(server_url, server_name)
+        self._client = PrometheusConnect(
+            url=server_url,
+            disable_ssl=disable_ssl,
+            retry=retry,
+            **kwargs,
+        )
+
+    @property
+    def protocol_name(self) -> str:
+        return "prometheus"
+
+    @property
+    def underlying_client(self) -> PrometheusConnect:
+        """Access to underlying PrometheusConnect for advanced usage (e.g., mounting HTTP adapters)."""
+        return self._client
+
+    @property
+    def session(self) -> requests.Session:
+        """Access to underlying requests Session for mounting debug adapters."""
+        return self._client._session
+
+    def execute_query(
+        self,
+        query: str,
+        query_time: Optional[int] = None,
+    ) -> QueryResponse:
+        """
+        Execute PromQL query via Prometheus HTTP API.
+
+        Args:
+            query: PromQL query string
+            query_time: Optional Unix timestamp for point-in-time query
+
+        Returns:
+            QueryResponse with normalized data
+        """
+        try:
+            if query_time:
+                raw_result = self._client.custom_query(
+                    query=query, params={"time": query_time}
+                )
+            else:
+                raw_result = self._client.custom_query(query=query)
+
+            # Normalize to Dict[frozenset, float]
+            normalized = self._normalize_response(raw_result)
+            return QueryResponse(
+                success=True,
+                data=normalized,
+                raw_response=raw_result,
+            )
+        except Exception as e:
+            return QueryResponse(
+                success=False,
+                data=None,
+                error_message=str(e),
+            )
+
+    def _normalize_response(self, raw_result: list) -> Dict[frozenset, float]:
+        """
+        Convert Prometheus response to normalized format.
+
+        Prometheus response format:
+            [{"metric": {"label1": "value1", ...}, "value": [timestamp, "value_str"]}, ...]
+
+        Returns:
+            Dict mapping frozenset of labels to float value
+        """
+        result = {}
+        for item in raw_result:
+            metric_labels = frozenset(item.get("metric", {}).items())
+            value = item.get("value", [None, None])
+            if len(value) >= 2 and value[1] is not None:
+                try:
+                    result[metric_labels] = float(value[1])
+                except (ValueError, TypeError):
+                    # Skip non-numeric values (e.g., NaN represented as string)
+                    pass
+        return result
+
+    def get_runtime_info(self) -> Optional[Dict[str, Any]]:
+        """Query SketchDB/Prometheus runtime info endpoint."""
+        try:
+            response = requests.get(
+                f"{self.server_url}/api/v1/status/runtimeinfo",
+                timeout=10,
+            )
+            if response.status_code == 200:
+                return response.json().get("data", {})
+        except Exception:
+            pass
+        return None
diff --git a/PrometheusClient/classes/query_client.py b/PrometheusClient/classes/query_client.py
new file mode 100644
index 0000000..3c1ebdc
--- /dev/null
+++ b/PrometheusClient/classes/query_client.py
@@ -0,0 +1,63 @@
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Optional
+from dataclasses import dataclass
+import requests
+
+
+@dataclass
+class QueryResponse:
+    """Normalized response from any query backend."""
+
+    success: bool
+    data: Optional[Dict[frozenset, float]]  # metric_labels -> value
+    error_message: Optional[str] = None
+    raw_response: Optional[Any] = None  # For debugging
+
+
+class QueryClient(ABC):
+    """Abstract base class for query protocol adapters."""
+
+    def __init__(self, server_url: str, server_name: str):
+        self.server_url = server_url
+        self.server_name = server_name
+
+    @abstractmethod
+    def execute_query(
+        self,
+        query: str,
+        query_time: Optional[int] = None,
+    ) -> QueryResponse:
+        """
+        Execute a query and return normalized response.
+
+        Args:
+            query: The query string (PromQL, SQL, etc.)
+            query_time: Optional Unix timestamp for point-in-time queries
+
+        Returns:
+            QueryResponse with normalized data
+        """
+        pass
+
+    @abstractmethod
+    def get_runtime_info(self) -> Optional[Dict[str, Any]]:
+        """
+        Get runtime/status info from the backend.
+        Used for query alignment with SketchDB.
+
+        Returns:
+            Dict with backend-specific runtime info, or None if unavailable
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def protocol_name(self) -> str:
+        """Return the protocol name (e.g., 'prometheus', 'clickhouse')."""
+        pass
+
+    @property
+    @abstractmethod
+    def session(self) -> requests.Session:
+        """Access to underlying requests Session for mounting debug adapters."""
+        pass
diff --git a/PrometheusClient/classes/query_client_factory.py b/PrometheusClient/classes/query_client_factory.py
new file mode 100644
index 0000000..b7eefbe
--- /dev/null
+++ b/PrometheusClient/classes/query_client_factory.py
@@ -0,0 +1,63 @@
+from typing import Any, Dict, List, Type
+
+from .query_client import QueryClient
+from .prometheus_query_client import PrometheusQueryClient
+from .clickhouse_query_client import ClickHouseQueryClient
+
+
+class QueryClientFactory:
+    """Factory for creating protocol-specific query clients."""
+
+    _registry: Dict[str, Type[QueryClient]] = {
+        "prometheus": PrometheusQueryClient,
+        "clickhouse": ClickHouseQueryClient,
+    }
+
+    @classmethod
+    def register(cls, protocol: str, client_class: Type[QueryClient]) -> None:
+        """
+        Register a new protocol handler.
+
+        Args:
+            protocol: Protocol name (e.g., 'influxdb')
+            client_class: QueryClient subclass to handle this protocol
+        """
+        cls._registry[protocol] = client_class
+
+    @classmethod
+    def create(
+        cls,
+        protocol: str,
+        server_url: str,
+        server_name: str,
+        **kwargs: Any,
+    ) -> QueryClient:
+        """
+        Create a query client for the specified protocol.
+
+        Args:
+            protocol: Protocol name ('prometheus', 'clickhouse', etc.)
+            server_url: Backend server URL
+            server_name: Logical name for the server
+            **kwargs: Protocol-specific options passed to the client constructor
+
+        Returns:
+            QueryClient instance
+
+        Raises:
+            ValueError: If protocol is not supported
+        """
+        if protocol not in cls._registry:
+            supported = ", ".join(sorted(cls._registry.keys()))
+            raise ValueError(
+                f"Unsupported protocol '{protocol}'. "
+                f"Supported protocols: {supported}"
+            )
+
+        client_class = cls._registry[protocol]
+        return client_class(server_url, server_name, **kwargs)
+
+    @classmethod
+    def supported_protocols(cls) -> List[str]:
+        """Return list of supported protocol names."""
+        return sorted(cls._registry.keys())
diff --git a/PrometheusClient/classes/query_template.py b/PrometheusClient/classes/query_template.py
new file mode 100644
index 0000000..d92881a
--- /dev/null
+++ b/PrometheusClient/classes/query_template.py
@@ -0,0 +1,177 @@
+import time
+from datetime import datetime, timezone
+from typing import Optional, Set
+from dataclasses import dataclass
+
+from jinja2 import Environment, BaseLoader, TemplateSyntaxError, UndefinedError
+
+
+@dataclass
+class TimeRange:
+    """Represents a query time range with Unix timestamps."""
+
+    start_time: int  # Unix timestamp (seconds)
+    end_time: int  # Unix timestamp (seconds)
+
+    @property
+    def start_datetime(self) -> str:
+        """ISO format datetime string for start (UTC)."""
+        return datetime.fromtimestamp(self.start_time, tz=timezone.utc).strftime(
+            "%Y-%m-%d %H:%M:%S"
+        )
+
+    @property
+    def end_datetime(self) -> str:
+        """ISO format datetime string for end (UTC)."""
+        return datetime.fromtimestamp(self.end_time, tz=timezone.utc).strftime(
+            "%Y-%m-%d %H:%M:%S"
+        )
+
+    @property
+    def start_time_ms(self) -> int:
+        """Start time in milliseconds."""
+        return self.start_time * 1000
+
+    @property
+    def end_time_ms(self) -> int:
+        """End time in milliseconds."""
+        return self.end_time * 1000
+
+
+class QueryTemplate:
+    """
+    Handles Jinja2 template variable substitution in queries.
+
+    Supported variables:
+        {{ start_time }}       - Unix timestamp in seconds (int)
+        {{ end_time }}         - Unix timestamp in seconds (int)
+        {{ start_time_ms }}    - Unix timestamp in milliseconds (int)
+        {{ end_time_ms }}      - Unix timestamp in milliseconds (int)
+        {{ start_datetime }}   - ISO datetime string (e.g., '2024-01-16 12:00:00')
+        {{ end_datetime }}     - ISO datetime string (e.g., '2024-01-16 12:01:00')
+
+    Example usage:
+        template = QueryTemplate(
+            "SELECT * FROM metrics WHERE ts >= {{ start_time }} AND ts < {{ end_time }}"
+        )
+        time_range = TimeRange(start_time=1705420800, end_time=1705420860)
+        query = template.render(time_range)
+        # Result: "SELECT * FROM metrics WHERE ts >= 1705420800 AND ts < 1705420860"
+    """
+
+    SUPPORTED_VARS = {
+        "start_time",
+        "end_time",
+        "start_time_ms",
+        "end_time_ms",
+        "start_datetime",
+        "end_datetime",
+    }
+
+    def __init__(self, template: str):
+        """
+        Initialize with a query template.
+
+        Args:
+            template: Query string potentially containing {{ variable }} placeholders
+
+        Raises:
+            ValueError: If template has syntax errors
+        """
+        self.template_str = template
+        self._env = Environment(loader=BaseLoader(), autoescape=False)
+
+        try:
+            self._template = self._env.from_string(template)
+        except TemplateSyntaxError as e:
+            raise ValueError(f"Invalid template syntax: {e}")
+
+        self._variables = self._extract_variables()
+
+    def _extract_variables(self) -> Set[str]:
+        """Extract all template variable names from the query."""
+        # Parse the AST to find all variable references
+        from jinja2 import meta
+
+        ast = self._env.parse(self.template_str)
+        return meta.find_undeclared_variables(ast)
+
+    @property
+    def has_time_variables(self) -> bool:
+        """Check if template contains any time variables."""
+        return bool(self._variables)
+
+    @property
+    def variables(self) -> Set[str]:
+        """Return set of variables used in this template."""
+        return self._variables.copy()
+
+    def render(self, time_range: TimeRange) -> str:
+        """
+        Substitute template variables with actual values.
+
+        Args:
+            time_range: TimeRange object with start/end times
+
+        Returns:
+            Query string with variables substituted
+
+        Raises:
+            ValueError: If template uses unsupported variables
+        """
+        context = {
+            "start_time": time_range.start_time,
+            "end_time": time_range.end_time,
+            "start_time_ms": time_range.start_time_ms,
+            "end_time_ms": time_range.end_time_ms,
+            "start_datetime": time_range.start_datetime,
+            "end_datetime": time_range.end_datetime,
+        }
+
+        try:
+            return self._template.render(**context)
+        except UndefinedError as e:
+            unsupported = self._variables - self.SUPPORTED_VARS
+            raise ValueError(
+                f"Unsupported template variables: {unsupported}. "
+                f"Supported: {sorted(self.SUPPORTED_VARS)}"
+            ) from e
+
+    @staticmethod
+    def calculate_time_range(
+        current_time: Optional[int] = None,
+        window_seconds: int = 60,
+        offset_seconds: int = 0,
+    ) -> TimeRange:
+        """
+        Calculate a time range for query execution.
+
+        The time range is calculated as:
+            end_time = current_time - offset_seconds
+            start_time = end_time - window_seconds
+
+        Args:
+            current_time: Reference Unix timestamp (default: now)
+            window_seconds: Size of time window in seconds
+            offset_seconds: How far back from current_time to end the window
+                           (positive = past, useful for query_time_offset)
+
+        Returns:
+            TimeRange object
+
+        Examples:
+            # Current time query with 60s window
+            calculate_time_range(current_time=1000, window_seconds=60, offset_seconds=0)
+            -> TimeRange(start=940, end=1000)
+
+            # Query with 30s offset (for delayed data)
+            calculate_time_range(current_time=1000, window_seconds=60, offset_seconds=30)
+            -> TimeRange(start=910, end=970)
+        """
+        if current_time is None:
+            current_time = int(time.time())
+
+        end_time = current_time - offset_seconds
+        start_time = end_time - window_seconds
+
+        return TimeRange(start_time=start_time, end_time=end_time)
diff --git a/PrometheusClient/docker-compose.yml.j2 b/PrometheusClient/docker-compose.yml.j2
new file mode 100644
index 0000000..9621b84
--- /dev/null
+++ b/PrometheusClient/docker-compose.yml.j2
@@ -0,0 +1,51 @@
+# QueryEngine Docker Compose Template
+# This template is rendered with Jinja2 to generate the final docker-compose.yml
+services:
+  prometheusclient:
+    image: sketchdb-prometheusclient:latest
+    cap_add:
+      - SYS_PTRACE # For py-spy monitoring
+    security_opt: # For pyspy monitoring, gives container permission to trace processes
+      - seccomp=unconfined
+      - apparmor=unconfined
+    container_name: {{ container_name }}{% if latency_exporter_socket_addr is defined and latency_exporter_socket_addr is not none %}
+    ports:
+      - "{{ latency_exporter_socket_addr.split(":")[1] }}"{% endif %}
+    volumes:
+      - "{{ experiment_output_dir }}:/app/outputs"
+      - "{{ client_output_dir }}:/app/prometheus_client_output"
+      - "{{ config_file }}:/app/prometheus_client_config.yaml:ro"{% if query_engine_config_file is defined and query_engine_config_file is not none %}
+      - "{{ query_engine_config_file}}:/app/query_engine_config_file.yaml:ro"{% endif %}
+    network_mode: "host" # Allows prometheus client to send requests to localhost, not ideal for production
+    command: [
+      "--config_file", "/app/prometheus_client_config.yaml",
+      "--output_dir", "/app/prometheus_client_output",
+      "--output_file", "/app/prometheus_client_output/{{ client_output_file }}",{% if result_output_file is defined and result_output_file is not none %}
+      "--result_output_file", "{{ result_output_file }}",{% endif %}{% if query_engine_config_file is defined and query_engine_config_file is not none %}
+      "--query_engine_config_file", "/app/query_engine_config_file.yaml",{% endif %}{% if align_query_time %}
+      "--align_query_time",{% endif %}{% if server_for_alignment is defined and server_for_alignment is not none %}
+      "--server_for_alignment", "{{ server_for_alignment }}",{% endif %}{% if dry_run %}
+      "--dry_run",{% endif %}{% if compare_results %}
+      "--compare_results",{% endif %}{% if parallel %}
+      "--parallel",{% endif %}{% if query_engine_pid is defined and query_engine_pid is not none %}
+      "--profile_query_engine_pid", "{{ query_engine_pid }}",{% endif %}{% if profile_prometheus_time is defined and profile_prometheus_time is not none %}
+      "--profile_prometheus_time", "{{ profile_prometheus_time }}",{% endif %}{% if latency_exporter_socket_addr is defined and latency_exporter_socket_addr is not none %}
+      "--export_latencies_for_prometheus", "{{ latency_exporter_socket_addr }}",{% endif %}
+    ]
+    # pid: "container:sketchdb-queryengine"
+    pid: "host" # So py-spy can access processes running on the host
+    extra_hosts:
+    # host.docker.internal allows prometheus client to curl servers running on host system localhost,
+    # but this requires changing the server urls that the client reads from localhost:<PORT> to host.docker.internal:<PORT>
+    # Maybe there is a more elegant solution?
+     # - "host.docker.internal:host-gateway"
+      - "prometheus:{{ prometheus_host }}"
+      - "sketchdb:{{ sketchdb_host }}"
+    restart: no
+
+# networks:
+#  prometheusclient-net:
+#    driver: bridge
+#    ipam:
+#      config:
+#        - subnet: 172.20.0.0/16
diff --git a/PrometheusClient/installation/install.sh b/PrometheusClient/installation/install.sh
new file mode 100755
index 0000000..525e0ca
--- /dev/null
+++ b/PrometheusClient/installation/install.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -e
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+PARENT_DIR=$(dirname "$THIS_DIR")
+
+echo "Building PrometheusClient Docker image..."
+cd "$PARENT_DIR"
+docker build . -f Dockerfile -t sketchdb-prometheusclient:latest
+
+echo "PrometheusClient Docker image built successfully: sketchdb-prometheusclient:latest"
diff --git a/PrometheusClient/installation/setup_dependencies.sh b/PrometheusClient/installation/setup_dependencies.sh
new file mode 100755
index 0000000..b8a209c
--- /dev/null
+++ b/PrometheusClient/installation/setup_dependencies.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+
+sudo apt-get install -y python3-pip
+# TODO: change to virtualenv
+pip3 install --user -r "${THIS_DIR}/../requirements.txt"
diff --git a/PrometheusClient/main_prometheus_client.py b/PrometheusClient/main_prometheus_client.py
new file mode 100644
index 0000000..08a9bf5
--- /dev/null
+++ b/PrometheusClient/main_prometheus_client.py
@@ -0,0 +1,846 @@
+import os
+import yaml
+import time
+import requests
+import argparse
+import datetime
+import logging
+
+# import urllib3
+from loguru import logger
+from typing import Dict, Set, Optional, List, Any
+from type_aliases import (
+    ServerDict,
+    Query,
+    QueryIndex,
+    RepetitionIndex,
+    UnixTimestamp,
+    ResultDict,
+    QueryStartTimes,
+    QueryEngineConfig,
+)
+import threading
+import subprocess
+import concurrent.futures
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
+
+from classes.config import Config
+from classes.QueryLatencyExporter import QueryLatencyExporter
+from classes.query_client import QueryClient
+from classes.query_client_factory import QueryClientFactory
+from classes.query_template import QueryTemplate
+from promql_utilities.query_results.classes import QueryResult, QueryResultAcrossTime
+from promql_utilities.query_results.serializers import SerializerFactory
+
+
+class PrometheusDebugRetry(Retry):
+    def __init__(self, *args: Any, server_name: str = "", **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.server_name = server_name
+
+    def new(self, **kw: Any) -> "PrometheusDebugRetry":
+        """Override new() to preserve server_name when creating new instances."""
+        new_retry = super().new(**kw)
+        new_retry.server_name = self.server_name
+        return new_retry
+
+    def increment(
+        self,
+        method: Optional[str] = None,
+        url: Optional[str] = None,
+        response: Optional[Any] = None,
+        error: Optional[Exception] = None,
+        _pool: Optional[Any] = None,
+        _stacktrace: Optional[Any] = None,
+    ) -> "PrometheusDebugRetry":
+        # Calculate current attempt number
+        assert self.total is not None
+        current_retries = self.total - (
+            self.total if hasattr(self, "history") and self.history else 0
+        )
+        attempt_num = (3 - current_retries) + 1  # Assuming max 3 retries
+
+        if response:
+            logger.bind(module="http_debug").debug(
+                f"RETRY ATTEMPT {attempt_num} for {self.server_name}: "
+                f"{method} {url} -> HTTP {response.status} "
+                f"(will retry: {response.status in self.status_forcelist})"
+            )
+        elif error:
+            logger.bind(module="http_debug").debug(
+                f"RETRY ATTEMPT {attempt_num} for {self.server_name}: "
+                f"{method} {url} -> ERROR: {type(error).__name__}: {error}"
+            )
+
+        result = super().increment(method, url, response, error, _pool, _stacktrace)
+        assert isinstance(result, PrometheusDebugRetry)
+        return result
+
+
+class PrometheusDebugHTTPAdapter(HTTPAdapter):
+    def __init__(self, server_name: str, *args: Any, **kwargs: Any) -> None:
+        self.server_name = server_name
+        super().__init__(*args, **kwargs)
+
+    def send(self, request: Any, *args: Any, **kwargs: Any) -> Any:
+        logger.bind(module="http_debug").debug(
+            f"HTTP REQUEST START for {self.server_name}: "
+            f"{request.method} {request.url}"
+        )
+        start_time = time.time()
+
+        try:
+            response = super().send(request, *args, **kwargs)
+            elapsed = time.time() - start_time
+
+            logger.bind(module="http_debug").debug(
+                f"HTTP REQUEST END for {self.server_name}: "
+                f"{request.method} {request.url} -> HTTP {response.status_code} "
+                f"({elapsed:.3f}s, {len(response.content)} bytes)"
+            )
+            return response
+        except Exception as e:
+            elapsed = time.time() - start_time
+            logger.bind(module="http_debug").error(
+                f"HTTP REQUEST FAILED for {self.server_name}: "
+                f"{request.method} {request.url} -> {type(e).__name__}: {e} "
+                f"(after {elapsed:.3f}s)"
+            )
+            raise
+
+
+def create_loggers(logging_dir: str, log_level: str) -> None:
+    logger.remove(None)  # remove default loggers
+
+    logger.add("{}/prometheus_client.log".format(logging_dir), filter="__main__")
+
+    logger.add(  # add latency exporter logger
+        "{}/query_latency_exporter.log".format(logging_dir),
+        level=log_level,
+        filter=lambda record: record["extra"].get("module") == "query_latency_exporter",
+    )
+
+    # NEW: HTTP request debugging logger
+    logger.add(
+        "{}/http_requests.log".format(logging_dir),
+        level="DEBUG",
+        filter=lambda record: record["extra"].get("module") == "http_debug",
+    )
+
+    # Enable urllib3 debug logging for connection-level details
+    urllib3_logger = logging.getLogger("urllib3.connectionpool")
+    urllib3_logger.setLevel(logging.DEBUG)
+    urllib3_handler = logging.FileHandler("{}/urllib3_debug.log".format(logging_dir))
+    urllib3_handler.setFormatter(
+        logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+    )
+    urllib3_logger.addHandler(urllib3_handler)
+
+
+def get_query_unix_time(
+    query: Query,
+    query_unix_time: UnixTimestamp,
+    query_start_times: Optional[QueryStartTimes],
+    repetition_delay: int,
+) -> UnixTimestamp:
+    if query_start_times is None or query not in query_start_times:
+        return query_unix_time
+
+    query_alignment_time = query_start_times[query]
+    # we want the latest timestamp that is query_aligment_time + N * repetition_delay
+    query_unix_time = int(
+        query_unix_time - (query_unix_time - query_alignment_time) % repetition_delay
+    )
+    return query_unix_time
+
+
+def execute_single_query(
+    server_name: str,
+    server_object: QueryClient,
+    query: Query,
+    query_idx: QueryIndex,
+    repetition_idx: RepetitionIndex,
+    query_unix_time: Optional[UnixTimestamp],
+    dry_run: bool,
+    query_group_idx: int,
+    time_window_seconds: Optional[int],
+) -> QueryResult:
+    """Execute a single query and return the result with latency information."""
+    logger.debug(
+        f"Running query {query} on server {server_name} at time {query_unix_time}"
+    )
+
+    # Handle template substitution for queries with time variables
+    template = QueryTemplate(query)
+    if template.has_time_variables:
+        if time_window_seconds is None:
+            raise ValueError(
+                f"Query contains time template variables but time_window_seconds is not set: {query[:100]}"
+            )
+        if query_unix_time is None:
+            raise ValueError(
+                f"Query contains time template variables but query_unix_time is not set: {query[:100]}"
+            )
+        time_range = QueryTemplate.calculate_time_range(
+            current_time=query_unix_time,
+            window_seconds=time_window_seconds,
+        )
+        rendered_query = template.render(time_range)
+        logger.debug(f"Rendered query template: {rendered_query}")
+    else:
+        rendered_query = query
+
+    # Enhanced HTTP debug logging for query start
+    logger.bind(module="http_debug").info(
+        f"QUERY START - Server: {server_name}, Query: {rendered_query[:100]}{'...' if len(rendered_query) > 100 else ''}, "
+        f"QueryIdx: {query_idx}, QueryGroupIdx: {query_group_idx}, Rep: {repetition_idx}, Time: {query_unix_time}"
+    )
+
+    empty_query_result = QueryResult(
+        server_name,
+        query,  # Store original query template, not rendered
+        query_idx,
+        repetition_idx,
+        result=None,
+        latency=None,
+        cumulative_latency=None,
+        query_group_idx=query_group_idx,
+    )
+
+    if dry_run:
+        logger.bind(module="http_debug").debug(
+            f"DRY RUN - Skipping actual HTTP request for {server_name}"
+        )
+        return empty_query_result
+
+    try:
+        query_start_time = time.time()
+        # Use the QueryClient abstraction
+        response = server_object.execute_query(
+            query=rendered_query,
+            query_time=query_unix_time,
+        )
+        query_end_time = time.time()
+
+        latency = query_end_time - query_start_time
+        logger.debug("Latency: {}", latency)
+
+        if not response.success:
+            logger.error(f"Query failed: {response.error_message}")
+            logger.bind(module="http_debug").error(
+                f"QUERY ERROR - Server: {server_name}, Error: {response.error_message}"
+            )
+            return empty_query_result
+
+        # Determine result type based on response format
+        if isinstance(response.raw_response, str):
+            # ClickHouse/SQL - raw text result
+            query_result_data = None
+            raw_text_result = response.raw_response
+            result_count = (
+                len(response.raw_response.strip().split("\n"))
+                if response.raw_response
+                else 0
+            )
+        else:
+            # Prometheus - list of dicts
+            query_result_data = response.raw_response
+            raw_text_result = None
+            result_count = len(response.raw_response) if response.raw_response else 0
+
+        # Enhanced HTTP debug logging for query success
+        logger.debug("Query result: {}", response.raw_response)
+        logger.bind(module="http_debug").info(
+            f"QUERY SUCCESS - Server: {server_name}, Total latency: {latency:.3f}s, "
+            f"Results: {result_count} data points"
+        )
+
+    except Exception as e:
+        logger.error(f"Error running query: {str(e)}")
+
+        # Enhanced HTTP debug logging for query error
+        logger.bind(module="http_debug").error(
+            f"QUERY ERROR - Server: {server_name}, Error: {type(e).__name__}: {e}"
+        )
+        return empty_query_result
+
+    return QueryResult(
+        server_name,
+        query,  # Store original query template
+        query_idx,
+        repetition_idx,
+        result=query_result_data,
+        latency=latency,
+        cumulative_latency=None,
+        query_group_idx=query_group_idx,
+        raw_text_result=raw_text_result,
+    )
+
+
+def handle_query_group(
+    servers: ServerDict,
+    query_group: Any,
+    query_group_idx: int,
+    query_start_times: Optional[QueryStartTimes],
+    dry_run: bool,
+    parallel: bool = False,
+    latency_exporter: Optional[Any] = None,
+    streaming_serializer: Optional[Any] = None,
+) -> ResultDict:
+    logger.debug(f"Starting query group {query_group.id}")
+    if query_group.starting_delay:
+        logger.debug(
+            f"Waiting for {query_group.starting_delay} seconds before starting"
+        )
+        time.sleep(query_group.starting_delay)
+
+    logger.debug("Query start times: {}", query_start_times)
+
+    current_time = None
+    query_unix_time = None
+
+    # Calculate global query indices (combining group offset with local index)
+    global_query_idx_start: int = query_group._global_query_idx_start
+
+    result = {
+        server_name: {
+            global_query_idx_start
+            + local_query_idx: QueryResultAcrossTime(
+                server_name,
+                query,
+                global_query_idx_start + local_query_idx,
+                query_group.repetitions,
+            )
+            for local_query_idx, query in enumerate(query_group.queries)
+        }
+        for server_name in servers
+    }
+
+    for repetition_idx in range(query_group.repetitions):
+        current_time = datetime.datetime.now()
+        logger.debug("Current unix time: {}", int(current_time.timestamp()))
+
+        if hasattr(query_group, "query_time_offset"):
+            current_time = current_time - datetime.timedelta(
+                seconds=query_group.query_time_offset
+            )
+            logger.debug(
+                "Offsetting query time by {} seconds", query_group.query_time_offset
+            )
+
+        query_unix_time = int(current_time.timestamp())
+        logger.debug("Unix time after query_time_offset: {}", query_unix_time)
+
+        if parallel:
+            # Execute queries in parallel
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                futures = []
+                for local_query_idx, query in enumerate(query_group.queries):
+                    global_query_idx = global_query_idx_start + local_query_idx
+                    current_query_unix_time = get_query_unix_time(
+                        query,
+                        query_unix_time,
+                        query_start_times,
+                        query_group.repetition_delay,
+                    )
+
+                    for server_name, server_object in servers.items():
+                        futures.append(
+                            executor.submit(
+                                execute_single_query,
+                                server_name,
+                                server_object,
+                                query,
+                                global_query_idx,
+                                repetition_idx,
+                                current_query_unix_time,
+                                dry_run,
+                                query_group_idx,
+                                query_group.time_window_seconds,
+                            )
+                        )
+
+                # Collect results
+                for future in concurrent.futures.as_completed(futures):
+                    query_result = future.result()
+                    server_name = query_result.server_name
+                    query_idx = query_result.query_idx
+
+                    query_result.cumulative_latency = query_result.latency
+
+                    result[server_name][query_idx].add_result(query_result)
+
+                    # Stream result immediately if streaming serializer is provided
+                    if streaming_serializer is not None and not dry_run:
+                        streaming_serializer.streaming_write_result(query_result)
+        else:
+            # Reset cumulative latency for each repetition
+            cumulative_latency = {server_name: 0.0 for server_name in servers}
+
+            # Serial execution - use the same execute_single_query function
+            for local_query_idx, query in enumerate(query_group.queries):
+                global_query_idx = global_query_idx_start + local_query_idx
+                current_query_unix_time = get_query_unix_time(
+                    query,
+                    query_unix_time,
+                    query_start_times,
+                    query_group.repetition_delay,
+                )
+
+                logger.debug("Unix time for query: {}", current_query_unix_time)
+
+                for server_name, server_object in servers.items():
+                    query_result = execute_single_query(
+                        server_name,
+                        server_object,
+                        query,
+                        global_query_idx,
+                        repetition_idx,
+                        current_query_unix_time,
+                        dry_run,
+                        query_group_idx,
+                        query_group.time_window_seconds,
+                    )
+
+                    # Update cumulative latency for this repetition
+                    if query_result.latency is not None:
+                        cumulative_latency[server_name] += query_result.latency
+
+                    query_result.cumulative_latency = cumulative_latency[server_name]
+
+                    try:
+                        result[server_name][global_query_idx].add_result(query_result)
+                    except Exception as e:
+                        logger.error(
+                            f"{type(e).__name__} accessing result dict: {e}, "
+                            f"server_name={server_name}, "
+                            f"global_query_idx={global_query_idx}, "
+                            f"local_query_idx={local_query_idx}, "
+                            f"query_group_idx={query_group_idx}, "
+                            f"available_keys={list(result[server_name].keys())}"
+                        )
+                        raise
+
+                    # Stream result immediately if streaming serializer is provided
+                    if streaming_serializer is not None and not dry_run:
+                        streaming_serializer.streaming_write_result(query_result)
+
+        if latency_exporter is not None:
+            latency_exporter.export_repetition(repetition_idx, result)
+
+        if repetition_idx < query_group.repetitions - 1:
+            time.sleep(query_group.repetition_delay)
+
+    if latency_exporter is not None:
+        latency_exporter.shutdown()
+
+    return result
+
+
+def get_query_start_times(
+    server_url: str, query_engine_config: QueryEngineConfig
+) -> QueryStartTimes:
+    aggregation_id_start_time_map = {}
+    query_aggregation_id_map = {}
+    query_start_time_map = {}
+
+    required_aggregation_ids: Set[int] = set()
+    for query_yaml in query_engine_config["queries"]:
+        # add all aggregation IDs from the query YAML to the required_aggregation_ids set
+        required_aggregation_ids.update(
+            int(aggregation["aggregation_id"])
+            for aggregation in query_yaml["aggregations"]
+        )
+        # assert len(query_yaml["aggregations"]) == 1
+        # required_aggregation_ids.add(
+        #    int(query_yaml["aggregations"][0]["aggregation_id"])
+        # )
+    logger.debug("Required aggregation IDs: {}", required_aggregation_ids)
+
+    # wait for all required aggregation IDs to be present
+    while True:
+        server_response = requests.get(
+            server_url + "/api/v1/status/runtimeinfo",
+            headers={"Content-Type": "application/json"},
+        )
+        server_response.raise_for_status()
+        server_response_json = server_response.json()
+        logger.debug("Server response: {}", server_response_json)
+        aggregation_id_start_time_map = server_response_json["data"][
+            "earliest_timestamp_per_aggregation_id"
+        ]
+
+        # change all keys from string to int
+        aggregation_id_start_time_map = {
+            int(k): v for k, v in aggregation_id_start_time_map.items()
+        }
+
+        if not set(aggregation_id_start_time_map.keys()).issuperset(
+            required_aggregation_ids
+        ):
+            logger.debug(
+                "Waiting for aggregation IDs {} to be present",
+                required_aggregation_ids - set(aggregation_id_start_time_map.keys()),
+            )
+            time.sleep(10)
+        else:
+            break
+
+    # TODO: make this more robust. What happens if there are multiple aggregations with
+    # different tumbling windows? How long do we wait here? What happens with multiple query groups?
+
+    # get query to aggregate ID mapping from query_engine_config
+    for query_yaml in query_engine_config["queries"]:
+        # TODO: this assert will fail if there are multiple aggregations in a query YAML, including for DeltaSet, so commenting it out
+        # assert len(query_yaml["aggregations"]) == 1
+        # for now, just take the first aggregation ID
+        # TODO: make this more robust, eg for cases where aggregations for the same query have different tumbling windows or start times
+        query_aggregation_id_map[query_yaml["query"]] = int(
+            query_yaml["aggregations"][0]["aggregation_id"]
+        )
+
+    for query, aggregation_id in query_aggregation_id_map.items():
+        # aggregation_id_start_time_map is in milliseconds, convert to seconds
+        query_start_time_map[query] = (
+            # aggregation_id_start_time_map[str(aggregation_id)] / 1000
+            aggregation_id_start_time_map[aggregation_id]
+            / 1000
+        )
+
+    return query_start_time_map
+
+
+def check_args(args: Any) -> None:
+    if args.align_query_time and args.query_engine_config_file is None:
+        raise ValueError(
+            "If align_query_time is set, query_engine_config_file must be provided"
+        )
+
+
+def start_query_engine_profiler(
+    pid: int, output_dir: str, starting_delay: int, duration: int
+) -> None:
+    """
+    Create and start a subprocess to run py-spy on the specified process.
+
+    Args:
+        pid: Process ID of the query engine
+        output_dir: Directory to save the profile output
+        duration: Duration in seconds to run the profiler
+
+    Returns:
+        subprocess.Popen: The created subprocess
+    """
+    output_file = os.path.join(output_dir, "query_engine_profile.svg")
+    logger.debug(f"Waiting for {starting_delay} seconds before starting profiler")
+    time.sleep(starting_delay)
+    logger.debug(f"Starting py-spy profiling of PID {pid} for {duration} seconds")
+
+    try:
+        cmd = "bash --login -c 'sudo env \"PATH=$PATH\" py-spy record --pid {} -o {} --duration {} --idle'".format(
+            str(pid), output_file, str(duration)
+        )
+        logger.info(f"Running command: {cmd}")
+
+        subprocess.run(cmd, shell=True)
+    except Exception as e:
+        logger.error(f"Error starting profiler: {str(e)}")
+        raise e
+
+
+def start_prometheus_profiler(
+    output_dir: str, starting_delay: int, duration: int
+) -> None:
+    output_file = os.path.join(output_dir, "prometheus_profile.pprof")
+    logger.debug(f"Waiting for {starting_delay} seconds before starting profiler")
+    time.sleep(starting_delay)
+    logger.debug(f"Starting pprof profiling of Prometheus for {duration} seconds")
+
+    try:
+        # cmd = "go tool pprof -seconds {} -output {} http://localhost:9090/debug/pprof/profile".format(
+        cmd = "curl -o {} http://localhost:9090/debug/pprof/profile?seconds={}".format(
+            output_file,
+            str(duration),
+        )
+        logger.info(f"Running command: {cmd}")
+
+        subprocess.run(cmd, shell=True)
+    except Exception as e:
+        logger.error(f"Error starting profiler: {str(e)}")
+
+
+def main(args: Any) -> None:
+    check_args(args)
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    create_loggers(args.output_dir, "DEBUG")
+
+    if args.dry_run:
+        logger.info("Running in dry-run mode")
+
+    if args.parallel:
+        logger.info("Running queries in parallel mode")
+
+    with open(args.config_file, "r") as file:
+        config_data = yaml.safe_load(file)
+
+    query_engine_config = None
+    if args.query_engine_config_file:
+        with open(args.query_engine_config_file, "r") as file:
+            query_engine_config = yaml.safe_load(file)
+
+    config = Config.from_dict(config_data)
+
+    logger.debug("Read config")
+
+    # Calculate global query indices for each query group
+    global_query_idx = 0
+    for query_group in config.query_groups:
+        query_group._global_query_idx_start = global_query_idx  # type: ignore[attr-defined]
+        global_query_idx += len(query_group.queries)
+
+    server_url_for_alignment = None
+
+    servers: Dict[str, QueryClient] = {}
+    for server in config.servers:
+        # Determine protocol (default to prometheus for backward compatibility)
+        protocol = server.protocol if server.protocol else "prometheus"
+
+        if protocol == "prometheus":
+            # Create custom retry adapter with debug logging
+            debug_retry = PrometheusDebugRetry(
+                server_name=server.name,
+                total=3,
+                backoff_factor=1,
+                status_forcelist=[408, 429, 500, 502, 503, 504],
+            )
+
+            client = QueryClientFactory.create(
+                protocol=protocol,
+                server_url=server.url,
+                server_name=server.name,
+                disable_ssl=True,
+                retry=debug_retry,
+            )
+
+            # Mount debug adapter for HTTP request logging
+            debug_adapter = PrometheusDebugHTTPAdapter(server.name)
+            client.session.mount("http://", debug_adapter)
+            client.session.mount("https://", debug_adapter)
+        else:
+            # ClickHouse or other protocols
+            client = QueryClientFactory.create(
+                protocol=protocol,
+                server_url=server.url,
+                server_name=server.name,
+                database=server.database if server.database else "default",
+                user=server.user if server.user else "default",
+                password=server.password if server.password else "",
+            )
+
+            # Mount debug adapter for HTTP request logging
+            debug_adapter = PrometheusDebugHTTPAdapter(server.name)
+            client.session.mount("http://", debug_adapter)
+            client.session.mount("https://", debug_adapter)
+
+        servers[server.name] = client
+        logger.debug(
+            "Connected to server {} ({}) with HTTP debug logging enabled",
+            server.name,
+            protocol,
+        )
+
+        if args.align_query_time and server.name == args.server_for_alignment:
+            server_url_for_alignment = server.url
+
+    query_start_times = None
+    if args.align_query_time:
+        assert server_url_for_alignment is not None
+        assert query_engine_config is not None
+        query_start_times = get_query_start_times(
+            server_url_for_alignment, query_engine_config
+        )
+        logger.debug("Got query start times")
+
+    # Calculate profiler timing based on all query groups
+    min_starting_delay = min(qg.starting_delay for qg in config.query_groups)
+    max_duration = 0
+    for query_group in config.query_groups:
+        assert query_group.repetitions is not None
+        assert query_group.repetition_delay is not None
+        duration = (
+            query_group.repetition_delay * query_group.repetitions
+            + query_group.starting_delay
+            - min_starting_delay
+        )
+        max_duration = max(max_duration, duration)
+
+    query_engine_profiler_thread = None
+    if args.profile_query_engine_pid:
+        query_engine_profiler_thread = threading.Thread(
+            target=start_query_engine_profiler,
+            args=(
+                args.profile_query_engine_pid,
+                args.output_dir,
+                min_starting_delay,
+                max_duration,
+            ),
+        )
+        if query_engine_profiler_thread:
+            logger.debug("Starting query engine profiler thread...")
+            query_engine_profiler_thread.daemon = True
+            query_engine_profiler_thread.start()
+
+    prometheus_profiler_thread = None
+    if args.profile_prometheus_time is not None:
+        prometheus_profiler_thread = threading.Thread(
+            target=start_prometheus_profiler,
+            args=(
+                args.output_dir,
+                min_starting_delay,
+                args.profile_prometheus_time,
+            ),
+        )
+        if prometheus_profiler_thread:
+            prometheus_profiler_thread.daemon = True
+            prometheus_profiler_thread.start()
+
+    if args.export_latencies_for_prometheus is not None:
+        exporter_socket_addr = args.export_latencies_for_prometheus.split(sep=":")
+        exporter_ip = exporter_socket_addr[0]
+        exporter_port = int(exporter_socket_addr[1])
+        latency_exporter = QueryLatencyExporter(addr=exporter_ip, port=exporter_port)
+        logger.debug(
+            f"Running with query latency exporter at {args.export_latencies_for_prometheus}"
+        )
+        latency_exporter.launch()
+    else:
+        latency_exporter = None
+
+    # Initialize streaming serializer if not in dry run mode
+    streaming_serializer = None
+    if not args.dry_run:
+        streaming_serializer = SerializerFactory.create(
+            args.serialization_format, args.output_dir
+        )
+
+        # Prepare metadata for streaming - include per-group information
+        query_groups_metadata = []
+        for query_group_idx, query_group in enumerate(config.query_groups):
+            query_groups_metadata.append(
+                {
+                    "query_group_idx": query_group_idx,
+                    "query_group_id": query_group.id,
+                    "queries": query_group.queries,
+                    "repetitions": query_group.repetitions,
+                }
+            )
+
+        metadata = {
+            "query_groups": query_groups_metadata,
+            "servers": list(servers.keys()),
+        }
+        streaming_serializer.streaming_write_start(metadata)
+
+    # Spawn threads for each query group
+    query_group_threads = []
+    results_per_group: List[Optional[ResultDict]] = [None] * len(config.query_groups)
+
+    def run_query_group(query_group_idx: int, query_group: Any) -> None:
+        """Wrapper function to run a query group and store results."""
+        try:
+            results = handle_query_group(
+                servers,
+                query_group,
+                query_group_idx,
+                query_start_times,
+                args.dry_run,
+                args.parallel,
+                latency_exporter,
+                streaming_serializer,
+            )
+            results_per_group[query_group_idx] = results
+        except Exception as e:
+            logger.error(
+                f"Query group {query_group_idx} (id={query_group.id}) failed with "
+                f"{type(e).__name__}: {e}",
+                exc_info=True,
+            )
+            results_per_group[query_group_idx] = None
+            raise  # Re-raise to ensure it's logged but thread still terminates
+
+    for query_group_idx, query_group in enumerate(config.query_groups):
+        thread = threading.Thread(
+            target=run_query_group,
+            args=(query_group_idx, query_group),
+        )
+        query_group_threads.append(thread)
+        thread.start()
+        logger.debug(f"Started thread for query group {query_group_idx}")
+
+    # Wait for all query group threads to complete
+    for idx, thread in enumerate(query_group_threads):
+        thread.join()
+        logger.debug(f"Query group {idx} thread completed")
+
+    # Merge results from all query groups into single structure
+    results_across_servers: Dict[str, Dict[int, Any]] = {}
+    for server_name in servers.keys():
+        results_across_servers[server_name] = {}
+
+    for group_results in results_per_group:
+        if group_results:
+            for server_name, server_results in group_results.items():
+                results_across_servers[server_name].update(server_results)
+
+    if not args.dry_run and streaming_serializer is not None:
+        # Finalize streaming write
+        streaming_serializer.streaming_write_end()
+
+        # deprecated: save results in a pickle file
+        # with open(os.path.join(args.output_dir, args.result_output_file), "wb") as fout:
+        #    pickle.dump(results_across_servers, fout)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="")
+    parser.add_argument("--config_file", type=str, required=True)
+    parser.add_argument("--output_dir", type=str, required=True)
+    parser.add_argument("--output_file", type=str, required=True)
+    # deprecated:
+    # parser.add_argument("--result_output_file", type=str, default="results.pkl")
+
+    parser.add_argument("--query_engine_config_file", type=str, required=False)
+    parser.add_argument("--align_query_time", action="store_true", required=False)
+    parser.add_argument("--server_for_alignment", type=str, default="sketchdb")
+
+    parser.add_argument("--dry_run", action="store_true", required=False)
+    parser.add_argument(
+        "--parallel",
+        action="store_true",
+        help="Execute queries in parallel",
+        required=False,
+    )
+
+    parser.add_argument("--profile_query_engine_pid", type=int, required=False)
+    parser.add_argument("--profile_prometheus_time", type=int, required=False)
+
+    parser.add_argument(
+        "--export_latencies_for_prometheus",
+        type=str,
+        help="Run prometheus query latency exporter at <IP:PORT>",
+        required=False,
+    )
+
+    parser.add_argument(
+        "--serialization_format",
+        type=str,
+        choices=["jsonl", "parquet"],
+        default="jsonl",
+        help="Format for serializing query results (jsonl or parquet)",
+        required=False,
+    )
+
+    args = parser.parse_args()
+    main(args)
diff --git a/PrometheusClient/pyproject.toml b/PrometheusClient/pyproject.toml
new file mode 100644
index 0000000..000d249
--- /dev/null
+++ b/PrometheusClient/pyproject.toml
@@ -0,0 +1,36 @@
+[tool.black]
+line-length = 88
+target-version = ['py38', 'py39', 'py310', 'py311']
+include = '\.pyi?$'
+exclude = '''
+/(
+    \.eggs
+  | \.git
+  | \.hg
+  | \.mypy_cache
+  | \.tox
+  | \.venv
+  | _build
+  | buck-out
+  | build
+  | dist
+)/
+'''
+
+[tool.mypy]
+python_version = "3.8"
+mypy_path = "../../../CommonDependencies/py"
+
+# STRICT MODE ENABLED
+# This enables all 14 strict type checking flags
+strict = false
+
+# Ignore missing imports for third-party libraries without stubs
+[[tool.mypy.overrides]]
+module = [
+    "prometheus_api_client.*",
+    "loguru.*",
+    "promql_utilities.*",
+    "jinja2.*"
+]
+ignore_missing_imports = true
diff --git a/PrometheusClient/requirements.txt b/PrometheusClient/requirements.txt
new file mode 100644
index 0000000..95ba595
--- /dev/null
+++ b/PrometheusClient/requirements.txt
@@ -0,0 +1,10 @@
+prometheus-api-client==0.5.5
+prometheus_client==0.21.1
+pyyaml
+numpy
+loguru
+requests
+py-spy
+typing_extensions==4.13.2
+jinja2
+types-jinja2
diff --git a/PrometheusClient/similarity_scores.py b/PrometheusClient/similarity_scores.py
new file mode 100644
index 0000000..986490f
--- /dev/null
+++ b/PrometheusClient/similarity_scores.py
@@ -0,0 +1,21 @@
+import numpy as np
+from numpy.typing import NDArray
+from typing import Any
+
+
+def correlation(
+    exact: NDArray[np.floating[Any]], estimate: NDArray[np.floating[Any]]
+) -> float:
+    return float(np.corrcoef(exact, estimate)[0, 1])
+
+
+def l1_norm(
+    exact: NDArray[np.floating[Any]], estimate: NDArray[np.floating[Any]]
+) -> float:
+    return float(np.sum(np.abs(exact - estimate)))
+
+
+def l2_norm(
+    exact: NDArray[np.floating[Any]], estimate: NDArray[np.floating[Any]]
+) -> float:
+    return float(np.sum(np.square(exact - estimate)))
diff --git a/PrometheusClient/type_aliases.py b/PrometheusClient/type_aliases.py
new file mode 100644
index 0000000..7e343eb
--- /dev/null
+++ b/PrometheusClient/type_aliases.py
@@ -0,0 +1,31 @@
+"""Type aliases for PrometheusClient codebase."""
+
+from typing import Dict, Any, Callable
+from typing_extensions import TypeAlias
+
+# Server and connection types
+ServerName: TypeAlias = str
+ServerURL: TypeAlias = str
+ServerDict: TypeAlias = Dict[str, Any]  # Dict of server_name -> QueryClient
+
+# Query related types
+Query: TypeAlias = str
+QueryIndex: TypeAlias = int
+RepetitionIndex: TypeAlias = int
+UnixTimestamp: TypeAlias = int
+
+# Result types
+ResultDict: TypeAlias = Dict[
+    str, Dict[int, Any]
+]  # Dict[server_name][query_idx] -> QueryResultAcrossTime
+SimilarityScores: TypeAlias = Dict[
+    str, Dict[str, float]
+]  # Dict[function_name][query] -> score
+
+# Configuration types
+QueryStartTimes: TypeAlias = Dict[str, float]  # Dict[query] -> start_time
+AggregationConfig: TypeAlias = Dict[str, Any]
+QueryEngineConfig: TypeAlias = Dict[str, Any]
+
+# Function types
+SimilarityFunction: TypeAlias = Callable[[Any, Any], float]
diff --git a/PrometheusExporters/.gitignore b/PrometheusExporters/.gitignore
new file mode 100644
index 0000000..3540786
--- /dev/null
+++ b/PrometheusExporters/.gitignore
@@ -0,0 +1,7 @@
+pyvenv.cfg
+bin/
+lib/
+include/
+__pycache__/
+.DS_Store
+**/target/
diff --git a/PrometheusExporters/.isort.cfg b/PrometheusExporters/.isort.cfg
new file mode 100644
index 0000000..b9fb3f3
--- /dev/null
+++ b/PrometheusExporters/.isort.cfg
@@ -0,0 +1,2 @@
+[settings]
+profile=black
diff --git a/PrometheusExporters/.mypy.ini b/PrometheusExporters/.mypy.ini
new file mode 100644
index 0000000..8cb50b6
--- /dev/null
+++ b/PrometheusExporters/.mypy.ini
@@ -0,0 +1,4 @@
+[mypy]
+files = "**/*.py"
+ignore_missing_imports = True
+disable_error_code = import-untyped
diff --git a/PrometheusExporters/LICENSE b/PrometheusExporters/LICENSE
new file mode 100644
index 0000000..404d657
--- /dev/null
+++ b/PrometheusExporters/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 SketchDB
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/PrometheusExporters/README.md b/PrometheusExporters/README.md
new file mode 100644
index 0000000..31eaeea
--- /dev/null
+++ b/PrometheusExporters/README.md
@@ -0,0 +1,17 @@
+# SketchDB Prometheus Exporters
+
+This repository contains multiple Prometheus exporters for exposing various types of metrics that can be scraped by a Prometheus server.
+
+## Available Exporters
+
+- **Cluster Data Exporter** (Rust) - Exposes cluster resource usage metrics from Google and Alibaba cluster trace datasets
+- **Fake Exporter** (Rust or Python) - Generates synthetic, pseudorandom Prometheus metrics
+- **Query Cost Exporter** (Python) - Exports query cost metrics and resource usage statistics
+- **Query Latency Exporter** (Python) - Monitors and exports query latency metrics
+
+## Metrics Endpoint
+
+All exporters expose metrics at:
+```
+http://localhost:<port>/metrics
+```
diff --git a/PrometheusExporters/cluster_data_exporter/.dockerignore b/PrometheusExporters/cluster_data_exporter/.dockerignore
new file mode 100644
index 0000000..1634158
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/.dockerignore
@@ -0,0 +1,9 @@
+target/
+.git/
+.gitignore
+README.md
+data/
+*.csv
+*.gz
+docker_compose_frames/
+scripts/
diff --git a/PrometheusExporters/cluster_data_exporter/.gitignore b/PrometheusExporters/cluster_data_exporter/.gitignore
new file mode 100644
index 0000000..1e7caa9
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/.gitignore
@@ -0,0 +1,2 @@
+Cargo.lock
+target/
diff --git a/PrometheusExporters/cluster_data_exporter/Cargo.toml b/PrometheusExporters/cluster_data_exporter/Cargo.toml
new file mode 100644
index 0000000..96ef3aa
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/Cargo.toml
@@ -0,0 +1,19 @@
+[package]
+name = "cluster_data_exporter"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+prometheus = "0.14.0"
+tokio = { version = "1", features = ["full"] }
+hyper = { version = "1", features = ["full"] }
+hyper-util = { version = "0.1", features = ["full"] }
+lazy_static = "1.5"
+csv = "1.3"
+serde = { version = "1.0", features = ["derive"] }
+concurrent-queue = "2.5.0"
+flate2 = "1.1.2"
+clap = { version = "4.5.41", features = ["derive"] }
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+tracing-appender = "0.2"
diff --git a/PrometheusExporters/cluster_data_exporter/Dockerfile b/PrometheusExporters/cluster_data_exporter/Dockerfile
new file mode 100644
index 0000000..6e5dfc2
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/Dockerfile
@@ -0,0 +1,38 @@
+# Use the official Rust image as a build environment
+FROM rust:latest AS builder
+
+# Set the working directory inside the container
+WORKDIR /usr/src/app
+
+# Copy the Cargo.toml and Cargo.lock files
+COPY Cargo.toml Cargo.lock ./
+
+# Copy the source code
+COPY src ./src
+
+# Build the application in release mode
+RUN cargo build --release
+
+# Use a minimal runtime image
+FROM debian:bookworm-slim
+
+# Install necessary runtime dependencies
+RUN apt-get update && apt-get install -y \
+  ca-certificates \
+  && rm -rf /var/lib/apt/lists/*
+
+# Create a non-root user
+RUN useradd -r -s /bin/false exporter
+
+# Create the data and output directories that will be mounted as volumes
+RUN mkdir -p /data /output && chown exporter:exporter /data /output
+
+# Copy the binary from the builder stage
+COPY --from=builder /usr/src/app/target/release/cluster_data_exporter /usr/local/bin/cluster_data_exporter
+
+# Change to the non-root user
+USER exporter
+
+# Set the entrypoint to the binary
+# All arguments including port and input directory must be provided via docker run or docker-compose
+ENTRYPOINT ["cluster_data_exporter"]
diff --git a/PrometheusExporters/cluster_data_exporter/README.md b/PrometheusExporters/cluster_data_exporter/README.md
new file mode 100644
index 0000000..e075e13
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/README.md
@@ -0,0 +1,244 @@
+# CLUSTER DATA EXPORTER
+
+A Prometheus exporter that exposes cluster resource usage metrics from Google and Alibaba cluster trace datasets.
+
+## DESCRIPTION
+
+This exporter reads CSV data from certain datasets provided by Google or Alibaba and exposes them as Prometheus metrics. The exporter supports both Google task resource usage data from 2011 and Alibaba node and microservice resource data from 2021 and 2022. Instructions for downloading this data are linked in this document.
+
+## INSTALLATION
+
+### Prerequisites
+
+- Rust 1.70+ (edition 2021)
+- Access to Google or Alibaba cluster datasets
+
+### Building
+
+```bash
+cargo build --release
+```
+
+## USAGE
+
+```bash
+cluster_data_exporter -i <input_directory> -p <port> <provider> [OPTIONS]
+```
+
+### Google Provider
+
+```bash
+cluster_data_exporter -i ./google/clusterdata-2011/ -p 8080 google [OPTIONS]
+```
+
+### Alibaba Provider
+
+```bash
+cluster_data_exporter -i ./alibaba/2021/ -p 8080 alibaba [OPTIONS]
+```
+
+## DATA SOURCES
+
+### Google Cluster Data
+
+Instructions on how to download the Google Cluster 2011 task usage data:
+https://github.com/google/cluster-data/blob/master/ClusterData2011_2.md
+
+The only part of the dataset used by the exporter is the task_usage section, so there's no need to install the whole dataset
+
+Expected directory structure:
+```
+path/to/task/resource/usage/dir/
+├── part-00000-of-00500.csv.gz
+├── part-00001-of-00500.csv.gz
+└── ...
+```
+
+### Alibaba Cluster Data
+
+Instructions on downloading the Alibaba microservice trace datasets:
+- 2021: https://github.com/alibaba/clusterdata/blob/master/cluster-trace-microservices-v2021/README.md#introduction-of-trace-data
+- 2022: https://github.com/alibaba/clusterdata/tree/master/cluster-trace-microservices-v2022#trace-data-download
+
+The only parts of the datasets used by the exporter are the Node and MSResource sections, the rest can be discarded.
+
+Expected directory structure (after preprocessing):
+
+2021 Data:
+```
+path/to/Node/
+├── Node_0.csv.gz
+├── Node_1.csv.gz
+└── ...
+
+path/to/MSResource/
+├── MSResource_0.csv.gz
+├── MSResource_1.csv.gz
+└── ...
+```
+
+2022 Data:
+```
+path/to/NodeMetrics/
+├── NodeMetrics_0.csv.gz
+├── NodeMetrics_1.csv.gz
+└── ...
+
+path/to/MSMetrics/
+├── MSMetrics_0.csv.gz
+├── MSMetrics_1.csv.gz
+└── ...
+```
+
+## DATA PREPROCESSING FOR ALIBABA
+
+IMPORTANT: Before running the exporter on Alibaba data, you must run the preprocessing script to sort the data by timestamp and recompress it as a .csv.gz:
+
+```bash
+./bin/alibaba/sort_and_format.sh <alibaba_data_directory> --year <2021|2022> [-n] [-m]
+```
+
+This script extracts, sorts by timestamp, and recompresses the Alibaba CSV files in a format the exporter can read (.csv.gz). The sorting is necessary because some datasets (mainly 2022 data) are not sorted by timestamp, which is required for proper metric export timing.
+
+### Input Directory Structure
+
+The input directory should contain one or both of the subdirectories with unprocessed files, i.e. the untouched /data/ directory created from running the fetchData.sh scripts from the Alibaba github repos. For example:
+
+```
+alibaba/2021/data/
+├── Node/
+│   ├── Node_0.tar.gz
+│   ├── Node_1.tar.gz
+│   └── ...
+└── MSResource/
+    ├── MSResource_0.tar.gz
+    ├── MSResource_1.tar.gz
+    └── ...
+
+alibaba/2022/data/
+├── NodeMetrics/
+│   ├── NodeMetrics_0.tar.gz
+│   ├── NodeMetrics_1.tar.gz
+│   └── ...
+└── MSMetrics/
+    ├── MSMetrics_0.tar.gz
+    ├── MSMetrics_1.tar.gz
+    └── ...
+```
+
+Examples:
+
+```bash
+# Process 2021 Node data
+./bin/alibaba/sort_and_format.sh alibaba/2021/data --year 2021 -n
+
+# Process 2021 MSResource data
+./bin/alibaba/sort_and_format.sh alibaba/2021/data --year 2021 -m
+
+# Process both Node and MSResource data for 2021
+./bin/alibaba/sort_and_format.sh alibaba/2021/data --year 2021 -n -m
+```
+
+## COMMAND LINE ARGUMENTS
+
+- -i, --input-directory: Path to the directory containing CSV data files
+- -p, --port: Port number for the HTTP server
+
+### Provider-specific Options
+
+#### Google
+- --metrics: Specific metrics to export from task resource usage data
+- --all-parts: Process all CSV parts (default behavior)
+- --part-index: Process only a specific part index (0-499)
+
+#### Alibaba
+- --data-type: Type of data to export (node or msresource)
+- --data-year: Year of the dataset (2021 or 2022)
+- --all-parts: Process all CSV parts (default behavior)
+- --part-index: Process only a specific part index
+
+## DOCKER USAGE
+
+### Prerequisites for Docker
+
+1. Download and preprocess your CSV data as described in the DATA SOURCES section above
+2. Place the preprocessed data in a local directory (e.g., `./data/`)
+
+### Building and Running with Docker
+
+Build the Docker image:
+```bash
+docker build -t cluster-data-exporter .
+```
+
+Run with Docker (example for Google data):
+```bash
+docker run -v ./data:/data:ro -p 40000:40000 cluster-data-exporter \
+  --input-directory /data \
+  --port 40000 \
+  google \
+  --metrics mean_cpu_usage_rate,canonical_memory_usage \
+  --all-parts
+```
+
+Run with Docker (example for Alibaba data):
+```bash
+docker run -v ./data:/data:ro -p 40000:40000 cluster-data-exporter \
+  --input-directory /data \
+  --port 40000 \
+  alibaba \
+  --data-type node \
+  --data-year 2021 \
+  --all-parts
+```
+
+### Using Docker Compose
+
+#### Automated Generation with Python Script
+
+The `scripts/generate_docker_compose.py` script automatically generates docker-compose.yml files from the frame templates and fill in certain fields.
+
+**Google Provider Example:**
+```bash
+python scripts/generate_docker_compose.py google --metrics mean_cpu_usage_rate,max_cpu_usage --port 8080 --input-dir ./data
+```
+
+**Alibaba Provider Example:**
+```bash
+python scripts/generate_docker_compose.py alibaba --data-type node --data-year 2021 --port 8080 --input-dir ./data
+```
+
+The script will:
+- Validate your configuration options
+- Generate a docker-compose.yml file with correct settings
+- Update port mappings and volume mounts automatically
+
+#### Manual Setup with Frame Files
+
+Alternatively, the `docker_compose_frames/` directory contains pre-configured docker-compose files for different providers and configurations. These frame files will still require small edits before running docker-compose, see each frame file for more information.
+
+- **Google Provider**: `google-docker-compose.yml` - Edit list of metrics to export
+- **Alibaba Provider**: Provider-specific frames for each data type and year combination:
+  - `alibaba-node-2021-docker-compose.yml`
+  - `alibaba-node-2022-docker-compose.yml`
+  - `alibaba-msresource-2021-docker-compose.yml`
+  - `alibaba-msresource-2022-docker-compose.yml`
+
+To use a frame file:
+1. Copy the appropriate frame file from `docker_compose_frames/` to your working directory as `docker-compose.yml`
+2. Edit the file with any options that still need to be filled in (marked with "CHANGE THIS" comments)
+3. Run: `docker-compose up -d`
+
+### Data Volume Requirements
+
+- The container expects data to be mounted at `/data`
+- Data must be preprocessed according to the instructions in the DATA SOURCES section
+- For Alibaba data, ensure you've run the sorting and compression scripts before mounting
+- Mount the volume as read-only (`:ro`)
+
+## METRICS ENDPOINT
+
+Once running, metrics are available at:
+```
+http://localhost:<port>/metrics
+```
diff --git a/PrometheusExporters/cluster_data_exporter/docker-compose.yml.j2 b/PrometheusExporters/cluster_data_exporter/docker-compose.yml.j2
new file mode 100644
index 0000000..2587b52
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/docker-compose.yml.j2
@@ -0,0 +1,41 @@
+# cluster_data_exporter Docker Compose Template
+# This template is rendered with Jinja2 to generate the final docker-compose.yml
+
+{% if x_bake %}
+x-bake:
+  - COMPOSE_BAKE=true
+
+{% endif %}
+services:
+  cluster-data-exporter:
+    image: sketchdb-cluster-data-exporter:latest
+    container_name: {{ container_name | default('sketchdb-cluster-data-exporter') }}
+    volumes:
+      - {{ data_directory }}:/data:ro
+    ports:
+      - "{{ port }}:{{ port }}"
+    command: [
+      "--input-directory","/data",
+      "--port","{{ port }}",
+      "{{ provider }}",{% if provider == "google" %}
+      "--metrics={{ metrics }}",{% if process_mode == "all-parts" %}
+      "--all-parts"{% else %}
+      "--part-index={{ part_index }}"{% endif %}{% elif provider == "alibaba" %}
+      "--data-type={{ data_type }}",
+      "--data-year={{ data_year }}",{% if process_mode == "all-parts" %}
+      "--all-parts"{% else %}
+      "--part-index={{ part_index }}"{% endif %}{% endif %}
+    ]
+    restart: unless-stopped
+{% if memory_limit or memory_reservation %}
+    deploy:
+      resources:
+{% if memory_limit %}
+        limits:
+          memory: {{ memory_limit }}
+{% endif %}
+{% if memory_reservation %}
+        reservations:
+          memory: {{ memory_reservation }}
+{% endif %}
+{% endif %}
diff --git a/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-msresource-2021-docker-compose.yml b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-msresource-2021-docker-compose.yml
new file mode 100644
index 0000000..683bbfa
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-msresource-2021-docker-compose.yml
@@ -0,0 +1,31 @@
+x-bake:
+  - COMPOSE_BAKE=true
+
+services:
+  cluster-data-exporter:
+    build: .
+    container_name: cluster-data-exporter
+    volumes:
+      # CHANGE THIS: Replace './data' with the path to your Alibaba MsResource 2021 preprocessed csv.gz files
+      - ./data:/data:ro
+    ports:
+      # Map container port to host port - adjust as needed
+      - "40000:40000"
+    command: [
+        "--input-directory",
+        "/data",
+        "--port",
+        "40000",
+        "alibaba",
+        "--data-type=ms-resource",
+        "--data-year=2021",
+        "--all-parts", # or "--part-index=<INDEX_NO>"
+      ]
+    restart: unless-stopped
+    # Optional: set resource limits
+    deploy:
+      resources:
+        limits:
+          memory: 2G
+        reservations:
+          memory: 512M
diff --git a/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-msresource-2022-docker-compose.yml b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-msresource-2022-docker-compose.yml
new file mode 100644
index 0000000..4bdf90e
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-msresource-2022-docker-compose.yml
@@ -0,0 +1,32 @@
+x-bake:
+  - COMPOSE_BAKE=true
+
+services:
+  cluster-data-exporter:
+    build: .
+    container_name: cluster-data-exporter
+    volumes:
+      # CHANGE THIS: Replace './data' with the path to your Alibaba MSResource 2022 preprocessed csv.gz files
+      - ./data:/data:ro
+    ports:
+      # Map container port to host port - adjust as needed
+      - "40000:40000"
+    command: [
+        "--input-directory",
+        "/data",
+        "--port",
+        "40000",
+        "alibaba",
+        "--data-type=ms-resource",
+        "--data-year=2022",
+        "--all-parts", # or "--part-index=<INDEX_NO>"
+
+      ]
+    restart: unless-stopped
+    # Optional: set resource limits
+    deploy:
+      resources:
+        limits:
+          memory: 2G
+        reservations:
+          memory: 512M
diff --git a/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-node-2021-docker-compose.yml b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-node-2021-docker-compose.yml
new file mode 100644
index 0000000..2134943
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-node-2021-docker-compose.yml
@@ -0,0 +1,32 @@
+x-bake:
+  - COMPOSE_BAKE=true
+
+services:
+  cluster-data-exporter:
+    build: .
+    container_name: cluster-data-exporter
+    volumes:
+      # CHANGE THIS: Replace './data' with the path to your Alibaba Node 2021 preprocessed csv.gz files
+      - ./data:/data:ro
+    ports:
+      # Map container port to host port - adjust as needed
+      - "40000:40000"
+    command: [
+        "--input-directory",
+        "/data",
+        "--port",
+        "40000",
+        "alibaba",
+        "--data-type=node",
+        "--data-year=2021",
+        "--all-parts", # or "--part-index=<INDEX_NO>"
+
+      ]
+    restart: unless-stopped
+    # Optional: set resource limits
+    deploy:
+      resources:
+        limits:
+          memory: 2G
+        reservations:
+          memory: 512M
diff --git a/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-node-2022-docker-compose.yml b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-node-2022-docker-compose.yml
new file mode 100644
index 0000000..bbe7b7b
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-node-2022-docker-compose.yml
@@ -0,0 +1,32 @@
+x-bake:
+  - COMPOSE_BAKE=true
+
+services:
+  cluster-data-exporter:
+    build: .
+    container_name: cluster-data-exporter
+    volumes:
+      # CHANGE THIS: Replace './data' with the path to your Alibaba Node 2022 preprocessed csv.gz files
+      - ./data:/data:ro
+    ports:
+      # Map container port to host port - adjust as needed
+      - "40000:40000"
+    command: [
+        "--input-directory",
+        "/data",
+        "--port",
+        "40000",
+        "alibaba",
+        "--data-type=node",
+        "--data-year=2022",
+        "--all-parts", # or "--part-index=<INDEX_NO>"
+
+      ]
+    restart: unless-stopped
+    # Optional: set resource limits
+    deploy:
+      resources:
+        limits:
+          memory: 2G
+        reservations:
+          memory: 512M
diff --git a/PrometheusExporters/cluster_data_exporter/docker_compose_frames/base-docker-compose.yml b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/base-docker-compose.yml
new file mode 100644
index 0000000..e013892
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/base-docker-compose.yml
@@ -0,0 +1,39 @@
+x-bake:
+  - COMPOSE_BAKE=true
+
+services:
+  cluster-data-exporter:
+    build: .
+    container_name: cluster-data-exporter
+    volumes:
+      # Mount the local data directory to /data in the container
+      # Replace './data' with the path to your csv.gz files
+      - ./data:/data:ro
+    ports:
+      # Map container port to host port - adjust as needed
+      - "40000:40000"
+    command: [
+        "--input-directory",
+        "/data",
+        "--port",
+        "40000",
+        # Add your provider-specific arguments here
+        # For Google data example:
+        # "google",
+        # "--metrics", "mean_cpu_usage_rate,canonical_memory_usage",
+        # "--all-parts"
+        #
+        # For Alibaba data example:
+        # "alibaba",
+        # "--data-type", "node",
+        # "--data-year", "2021",
+        # "--all-parts"
+      ]
+    restart: unless-stopped
+    # Optional: set resource limits
+    deploy:
+      resources:
+        limits:
+          memory: 2G
+        reservations:
+          memory: 512M
diff --git a/PrometheusExporters/cluster_data_exporter/docker_compose_frames/google-docker-compose.yml b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/google-docker-compose.yml
new file mode 100644
index 0000000..22ce384
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/google-docker-compose.yml
@@ -0,0 +1,32 @@
+x-bake:
+  - COMPOSE_BAKE=true
+
+services:
+  cluster-data-exporter:
+    build: .
+    container_name: cluster-data-exporter
+    volumes:
+      # CHANGE THIS: Replace './data' with the path to your Google csv.gz files
+      - ./data:/data:ro
+    ports:
+      # Map container port to host port - adjust as needed
+      - "40000:40000"
+    command: [
+        "--input-directory",
+        "/data",
+        "--port",
+        "40000",
+        "google",
+        # CHANGE THIS: Replace with your desired metrics (comma-separated)
+        "--metrics=mean_cpu_usage_rate,canonical_memory_usage",
+        "--all-parts", # or "--part-index=<INDEX_NO>"
+
+      ]
+    restart: unless-stopped
+    # Optional: set resource limits
+    deploy:
+      resources:
+        limits:
+          memory: 2G
+        reservations:
+          memory: 512M
diff --git a/PrometheusExporters/cluster_data_exporter/installation/install.sh b/PrometheusExporters/cluster_data_exporter/installation/install.sh
new file mode 100755
index 0000000..915c68b
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/installation/install.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -e
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+PARENT_DIR=$(dirname "$THIS_DIR")
+
+echo "Building Cluster Data Exporter Docker image..."
+cd "$PARENT_DIR"
+docker build . -f Dockerfile -t sketchdb-cluster-data-exporter:latest
+
+echo "Cluster Data Exporter Docker image built successfully: sketchdb-cluster-data-exporter:latest"
\ No newline at end of file
diff --git a/PrometheusExporters/cluster_data_exporter/scripts/generate_docker_compose.py b/PrometheusExporters/cluster_data_exporter/scripts/generate_docker_compose.py
new file mode 100644
index 0000000..38c32f9
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/scripts/generate_docker_compose.py
@@ -0,0 +1,253 @@
+"""
+Script to generate docker-compose.yml files from frame templates based on data provider configuration.
+
+This script takes a data provider (google or alibaba) and provider-specific arguments,
+then generates a docker-compose.yml file by copying and modifying the appropriate frame file.
+"""
+
+import argparse
+import sys
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import yaml
+
+# Valid values from Rust enums (CLI format with hyphens)
+VALID_GOOGLE_METRICS = [
+    "mean-cpu-usage-rate",
+    "canonical-memory-usage",
+    "assigned-memory-usage",
+    "unmapped-page-cache-memory-usage",
+    "total-page-cache-memory-usage",
+    "max-memory-usage",
+    "mean-disk-io-time",
+    "mean-local-disk-space-used",
+    "max-cpu-usage",
+    "max-disk-io-time",
+    "cycles-per-instruction",
+    "memory-accesses-per-instruction",
+    "sample-portion",
+    "sampled-cpu-usage",
+]
+
+VALID_ALIBABA_DATA_TYPES = ["node", "msresource"]
+VALID_ALIBABA_DATA_YEARS = [2021, 2022]
+
+
+def validate_google_metrics(metrics: List[str]) -> None:
+    """Validate that all provided Google metrics are valid."""
+    invalid_metrics = [m for m in metrics if m not in VALID_GOOGLE_METRICS]
+    if invalid_metrics:
+        print(f"Error: Invalid Google metrics: {', '.join(invalid_metrics)}")
+        print(f"Valid metrics: {', '.join(VALID_GOOGLE_METRICS)}")
+        sys.exit(1)
+
+
+def validate_alibaba_args(data_type: str, data_year: int) -> None:
+    """Validate Alibaba data type and year arguments."""
+    if data_type not in VALID_ALIBABA_DATA_TYPES:
+        print(f"Error: Invalid data type: {data_type}")
+        print(f"Valid data types: {', '.join(VALID_ALIBABA_DATA_TYPES)}")
+        sys.exit(1)
+
+    if data_year not in VALID_ALIBABA_DATA_YEARS:
+        print(f"Error: Invalid data year: {data_year}")
+        print(f"Valid years: {', '.join(map(str, VALID_ALIBABA_DATA_YEARS))}")
+        sys.exit(1)
+
+
+def get_frame_file_path(
+    provider: str, data_type: Optional[str] = None, data_year: Optional[int] = None
+) -> Path:
+    """Get the path to the appropriate frame file based on provider and arguments."""
+    frames_dir = Path("docker_compose_frames")
+
+    if provider == "google":
+        return frames_dir / "google-docker-compose.yml"
+    elif provider == "alibaba":
+        return frames_dir / f"alibaba-{data_type}-{data_year}-docker-compose.yml"
+    else:
+        raise ValueError(f"Unknown provider: {provider}")
+
+
+def load_yaml_file(file_path: Path) -> Dict[str, Any]:
+    """Load YAML file and return parsed content."""
+    with open(file_path, "r") as f:
+        return yaml.safe_load(f)
+
+
+def save_yaml_file(file_path: Path, data: Dict[str, Any]) -> None:
+    """Save data to YAML file."""
+    with open(file_path, "w") as f:
+        yaml.dump(data, f, default_flow_style=False, sort_keys=False)
+
+
+def update_command_arg(command: List[str], arg_name: str, new_value: str) -> List[str]:
+    """Update a command line argument in the command list."""
+    updated_command = []
+    i = 0
+    while i < len(command):
+        if command[i] == arg_name:
+            updated_command.append(command[i])
+            if i + 1 < len(command):
+                updated_command.append(new_value)
+                i += 2
+            else:
+                updated_command.append(new_value)
+                i += 1
+        elif command[i].startswith(f"{arg_name}="):
+            updated_command.append(f"{arg_name}={new_value}")
+            i += 1
+        else:
+            updated_command.append(command[i])
+            i += 1
+    return updated_command
+
+
+def generate_google_compose(
+    metrics: List[str], port: Optional[int], input_dir: Optional[str]
+) -> None:
+    """Generate docker-compose.yml for Google provider."""
+    frame_file = get_frame_file_path("google")
+    output_file = Path("docker-compose.yml")
+
+    # Load frame file
+    compose_data = load_yaml_file(frame_file)
+
+    # Update metrics
+    metrics_str = ",".join(metrics)
+    service = compose_data["services"]["cluster-data-exporter"]
+    command = service["command"]
+
+    # Find and update metrics argument
+    for i, arg in enumerate(command):
+        if arg.startswith("--metrics="):
+            command[i] = f"--metrics={metrics_str}"
+            break
+
+    # Update optional arguments if provided
+    if port is not None:
+        # Update port mapping
+        service["ports"] = [f"{port}:{port}"]
+        # Update port in command
+        command = update_command_arg(command, "--port", str(port))
+        service["command"] = command
+
+    if input_dir is not None:
+        # Update volume mapping
+        service["volumes"] = [f"{input_dir}:/data:ro"]
+
+    # Save updated compose file
+    save_yaml_file(output_file, compose_data)
+
+
+def generate_alibaba_compose(
+    data_type: str,
+    data_year: int,
+    port: Optional[int],
+    input_dir: Optional[str],
+    speedup: Optional[int],
+) -> None:
+    """Generate docker-compose.yml for Alibaba provider."""
+    frame_file = get_frame_file_path("alibaba", data_type, data_year)
+    output_file = Path("docker-compose.yml")
+
+    # Load frame file
+    compose_data = load_yaml_file(frame_file)
+
+    service = compose_data["services"]["cluster-data-exporter"]
+    command = service["command"]
+
+    # Update optional arguments if provided
+    if port is not None:
+        # Update port mapping
+        service["ports"] = [f"{port}:{port}"]
+        # Update port in command
+        command = update_command_arg(command, "--port", str(port))
+        service["command"] = command
+
+    if input_dir is not None:
+        # Update volume mapping
+        service["volumes"] = [f"{input_dir}:/data:ro"]
+
+    # Add speedup if specified
+    if speedup is not None:
+        if "--speedup" not in " ".join(command):
+            command.append(f"--speedup={speedup}")
+        else:
+            command = update_command_arg(command, "--speedup", str(speedup))
+        service["command"] = command
+
+    # Save updated compose file
+    save_yaml_file(output_file, compose_data)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate docker-compose.yml from frame files based on data provider configuration",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Google provider with specific metrics
+  python scripts/generate_docker_compose.py google --metrics mean_cpu_usage_rate,max_cpu_usage --port 8080
+
+  # Alibaba provider with node data from 2021
+  python scripts/generate_docker_compose.py alibaba --data-type node --data-year 2021 --port 8080
+
+  # With custom input directory
+  python scripts/generate_docker_compose.py google --metrics canonical_memory_usage --input-dir /path/to/data
+        """,
+    )
+
+    parser.add_argument("provider", choices=["google", "alibaba"], help="Data provider")
+    parser.add_argument("--port", type=int, help="Port number for the HTTP server")
+    parser.add_argument("--input-dir", "--input-directory", help="Input directory path")
+
+    # Google-specific arguments
+    google_group = parser.add_argument_group("Google provider arguments")
+    google_group.add_argument(
+        "--metrics", type=str, help="Comma-separated list of metrics to export"
+    )
+
+    # Alibaba-specific arguments
+    alibaba_group = parser.add_argument_group("Alibaba provider arguments")
+    alibaba_group.add_argument(
+        "--data-type", choices=VALID_ALIBABA_DATA_TYPES, help="Type of data to export"
+    )
+    alibaba_group.add_argument(
+        "--data-year",
+        type=int,
+        choices=VALID_ALIBABA_DATA_YEARS,
+        help="Year of the dataset",
+    )
+    alibaba_group.add_argument(
+        "--speedup",
+        type=int,
+        help="Speedup factor for faster-than-realtime export (1=real-time, 10=10x faster)",
+    )
+
+    args = parser.parse_args()
+
+    # Validate provider-specific required arguments
+    if args.provider == "google":
+        if not args.metrics:
+            parser.error("Google provider requires --metrics argument")
+        metrics_list = [m.strip() for m in args.metrics.split(",")]
+        validate_google_metrics(metrics_list)
+        generate_google_compose(metrics_list, args.port, args.input_dir)
+
+    elif args.provider == "alibaba":
+        if not args.data_type:
+            parser.error("Alibaba provider requires --data-type argument")
+        if not args.data_year:
+            parser.error("Alibaba provider requires --data-year argument")
+        validate_alibaba_args(args.data_type, args.data_year)
+        generate_alibaba_compose(
+            args.data_type, args.data_year, args.port, args.input_dir, args.speedup
+        )
+
+    print(f"Generated docker-compose.yml for {args.provider} provider")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/PrometheusExporters/cluster_data_exporter/scripts/requirements.txt b/PrometheusExporters/cluster_data_exporter/scripts/requirements.txt
new file mode 100644
index 0000000..5fde258
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/scripts/requirements.txt
@@ -0,0 +1,2 @@
+PyYAML==6.0.2
+types-PyYAML==6.0.12.20250516
diff --git a/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics.rs b/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics.rs
new file mode 100644
index 0000000..98bc4ec
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics.rs
@@ -0,0 +1,71 @@
+use clap::ValueEnum;
+use std::sync::OnceLock;
+
+type BoxedErr = Box<dyn std::error::Error + Send + Sync + 'static>;
+
+// Speedup factor for faster-than-realtime export (set via CLI)
+pub static SPEEDUP_FACTOR: OnceLock<u64> = OnceLock::new();
+
+#[derive(Copy, Clone, Debug, ValueEnum)]
+pub enum MsDataType {
+    // BM Node runtime information.
+    // It records CPU and memory utilization of 1300+ BM nodes in a production cluster.
+    Node,
+    // MS runtime information.
+    // It records CPU and memory utilization of 90000+ containers for 1300+ MSs in the same production cluster.
+    MsResource,
+}
+
+pub mod ms_resource;
+pub mod node;
+
+// The type of microservice data to export. Should be initialized before any
+// reading or exporting begins
+pub static EXPORTER_DATA_TYPE: OnceLock<MsDataType> = OnceLock::new();
+
+/// @brief Calls the export_from_queue() function based on runtime initialized
+///        EXPORTER_DATA_TYPE
+pub fn export_from_queue() {
+    match EXPORTER_DATA_TYPE.get().unwrap() {
+        MsDataType::Node => node::export_from_queue(),
+        MsDataType::MsResource => ms_resource::export_from_queue(),
+    }
+}
+
+/// @brief Main routine for the thread that will be reading csv data and
+/// exporting. This function just uses a match statement to call the reading
+/// and exporting routine required by the specified mode
+///
+/// @param[in] input_dir  The input directory containing csv files
+/// @param[in] all_parts  Whether to start from part 0 of csv files and continue
+///                       until no more files are found. This should be false if
+///                       part_index is Some(part)
+/// @param[in] part_index Which csv file part to use as the data source.
+///                       This should be None if all_parts is true.
+/// @param[in] data_type  The type of data out of the different types of trace
+///                       data in the Alibaba micro-services trace data
+/// @param[in] data_year  The year of the trace data. Supported values are
+///                       2021 and 2022
+/// @param[in] speedup    Speedup factor for faster-than-realtime export
+///
+/// @return The result returned by the reader thread.
+pub fn reader_thread_routine(
+    input_dir: String,
+    all_parts: bool,
+    part_index: Option<u16>,
+    data_type: MsDataType,
+    data_year: u32,
+    speedup: u64,
+) -> Result<(), BoxedErr> {
+    use crate::alibaba_metrics::node;
+    let _ = EXPORTER_DATA_TYPE.set(data_type);
+    let _ = SPEEDUP_FACTOR.set(speedup);
+    let result = match EXPORTER_DATA_TYPE.get().unwrap() {
+        MsDataType::Node => node::read_and_queue(&input_dir, all_parts, part_index, data_year),
+        MsDataType::MsResource => {
+            ms_resource::read_and_queue(&input_dir, all_parts, part_index, data_year)
+        }
+    };
+
+    result
+}
diff --git a/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics/ms_resource.rs b/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics/ms_resource.rs
new file mode 100644
index 0000000..cb0e91b
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics/ms_resource.rs
@@ -0,0 +1,248 @@
+use crate::utilities;
+pub use concurrent_queue::ConcurrentQueue;
+use csv::Reader;
+use flate2::read::GzDecoder;
+use lazy_static::lazy_static;
+use prometheus::{register_gauge_vec, GaugeVec};
+use std::fs::File;
+use std::io::BufReader;
+use std::thread;
+use std::time::Duration;
+use tracing::{debug, info};
+
+const FILENAME_PARTS_2021: [&str; 2] = ["MSResource_", ".csv.gz"];
+const FILENAME_PARTS_2022: [&str; 2] = ["MSMetrics_", ".csv.gz"];
+
+const DATA_QUEUE_CAP: usize = 400_000;
+const QUEUE_POLL_INTERVAL_MS: u64 = 250;
+const CSV_DELIMITER: u8 = b',';
+const LABELS: [&str; 3] = ["ms_name", "ms_instance_id", "node_id"];
+
+type CsvGzReader<File> = Reader<GzDecoder<BufReader<File>>>;
+type BoxedErr = Box<dyn std::error::Error + Send + Sync + 'static>;
+/// Struct for holding fields after deserialization
+/// for both 2021 and 2022
+#[derive(Debug, serde::Deserialize)]
+pub struct MsResourceCsvFields {
+    #[serde(rename = "", skip)]
+    _trace: u64,
+
+    #[serde(rename = "timestamp")]
+    timestamp: u64,
+
+    #[serde(rename = "nodeid")]
+    node_id: String,
+
+    #[serde(rename = "msname")]
+    ms_name: String,
+
+    #[serde(rename = "msinstanceid")]
+    ms_instance_id: String,
+
+    #[serde(alias = "instance_cpu_usage", alias = "cpu_utilization")]
+    cpu_usage: Option<f64>,
+
+    #[serde(alias = "instance_memory_usage", alias = "memory_utilization")]
+    memory_usage: Option<f64>,
+}
+
+lazy_static! {
+    pub static ref MS_RESOURCE_DATA_QUEUE: ConcurrentQueue<MsResourceCsvFields> =
+        ConcurrentQueue::bounded(DATA_QUEUE_CAP);
+    pub static ref CPU_USAGE: GaugeVec = register_gauge_vec!(
+        "alibaba_microservice_cpu_usage",
+        "Cpu usages for microservices by alibaba nodes",
+        &LABELS,
+    )
+    .unwrap();
+    pub static ref MEMORY_USAGE: GaugeVec = register_gauge_vec!(
+        "alibaba_microservice_memory_usage",
+        "Memory usages for microservices by alibaba nodes",
+        &LABELS,
+    )
+    .unwrap();
+}
+
+/// @brief Gets the filename for the MsResource csv data based on the year
+/// and the index number
+///
+/// @param[in] year     The year of the trace data. Supported values are 2021
+///                     and 2022
+/// @param[in] index_no The index of the csv file
+///
+/// @return A String of the filename based on the data year and index num
+fn get_filename(year: u32, index_no: u16) -> String {
+    let mut filename: String = String::new();
+    let prefix: &str;
+    let suffix: &str;
+    let index: &str = &format!("{}", index_no);
+
+    match year {
+        2021 => {
+            prefix = FILENAME_PARTS_2021[0];
+            suffix = FILENAME_PARTS_2021[1];
+        }
+        2022 => {
+            prefix = FILENAME_PARTS_2022[0];
+            suffix = FILENAME_PARTS_2022[1];
+        }
+        _ => {
+            panic!("Invalid year: {}", year);
+        }
+    }
+    filename.push_str(prefix);
+    filename.push_str(index);
+    filename.push_str(suffix);
+
+    filename
+}
+
+/// @brief Gets a csv reader for MsResource data
+///
+/// @param[in] input_dir The directory containing the csv file
+/// @param[in] year      Which trace data year to create the reader for.
+///                      supported years are 2021 and 2022
+/// @param[in] index     The index of the csv file
+///
+/// @return A Result type containing either the reader or an Error if the file
+///         cannot be found
+pub fn get_reader(input_dir: &str, year: u32, index: u16) -> Result<CsvGzReader<File>, BoxedErr> {
+    use csv::ReaderBuilder;
+    use std::path::Path;
+
+    let filename: String = get_filename(year, index);
+    let file_path = Path::new(input_dir).join(&filename);
+    let fd: File = File::open(file_path)?;
+    let buf_rdr: BufReader<File> = BufReader::new(fd);
+    let gz_decoder: GzDecoder<BufReader<File>> = GzDecoder::new(buf_rdr);
+
+    let csv_rdr: CsvGzReader<File> = ReaderBuilder::new()
+        .delimiter(CSV_DELIMITER)
+        .flexible(true)
+        .has_headers(true)
+        .from_reader(gz_decoder);
+
+    Ok(csv_rdr)
+}
+
+/// @brief Routine for reading MSResource csv data and enqueuing it
+///
+/// @param[in] input_dir  The input directory containing the csv file
+/// @param[in] all_parts  Whether or not to read all csv files in the
+///                       directory, starting from part 0. Once a file
+///                       cannot be found, this will return. This should
+///                       be false if a part_index is given.
+/// @param[in] part_index The part index for a single csv file to use as
+///                       the data source. This should be None if all_parts
+///                       is true.
+/// @param[in] year       The year of the trace data. Supported values are
+///                       2021 and 2022
+///
+/// @pre All csv files are uncompressed
+/// @pre If all_parts is specified, at least part 0 must exist
+/// @pre Either all_parts is true and part_index is None, or all_parts is
+///      false and part_index is Some(part)
+pub fn read_and_queue(
+    input_dir: &str,
+    all_parts: bool,
+    part_index: Option<u16>,
+    year: u32,
+) -> Result<(), BoxedErr> {
+    let mut part: u16 = 0;
+    if !all_parts {
+        part = part_index.unwrap();
+    }
+
+    while let Ok(mut rdr) = get_reader(input_dir, year, part) {
+        let csv_iter = rdr.deserialize();
+        for csv_line in csv_iter {
+            while MS_RESOURCE_DATA_QUEUE.is_full() {
+                thread::sleep(Duration::from_millis(QUEUE_POLL_INTERVAL_MS));
+            }
+            let parsed_line: MsResourceCsvFields = csv_line?;
+            let _ = MS_RESOURCE_DATA_QUEUE.push(parsed_line);
+        }
+        part += 1;
+        if !all_parts {
+            break;
+        }
+    } // No more files to read, or couldn't find initial file
+
+    if part == 0 {
+        // Reading always starts at part 0
+        panic!(
+            "Failed to read initial .csv.gz file. Check that all data files
+             are named in the correct format (2021: '{}<idx>{}', 2022: '{}<idx>{}),
+             and that the csv files contian the field headers at the top
+            ",
+            FILENAME_PARTS_2021[0],
+            FILENAME_PARTS_2021[1],
+            FILENAME_PARTS_2022[0],
+            FILENAME_PARTS_2022[1]
+        );
+    } else {
+        MS_RESOURCE_DATA_QUEUE.close();
+        Ok(())
+    }
+}
+
+/// @brief Takes the timestamp of a trace in milliseconds and
+///        returns the normalized time as a Duration
+///
+/// @param[in] time_millis The trace timestamp in milliseconds
+///
+/// @return The normalized timestamp as a Duration
+///
+/// @NOTE: Brief check of data suggests no dilation is necessary
+///
+/// @NOTE: MSResource data from 2022 is not sorted by timestamp whatsoever,
+/// sometimes the data is listed in order of decreasing timestamp and other
+/// times it's listed in order of increasing timestamp, so the timestamps
+/// are modified to work with the exporter before being queued
+///
+/// @NOTE: SPEEDUP_FACTOR can be set via --speedup CLI argument for faster-than-realtime export
+pub fn get_normalized_start_time(time_millis: u64) -> Duration {
+    let speedup = crate::alibaba_metrics::SPEEDUP_FACTOR.get().unwrap_or(&1);
+    Duration::from_millis(time_millis / speedup)
+}
+
+/// @brief Exports a single line from the MS_RESOURCE_DATA_QUEUE
+///
+/// @param[in] csv_line A parsed line from a MsResource csv file
+pub fn export_line(csv_line: MsResourceCsvFields) {
+    let label_vals: [&str; 3] = [
+        csv_line.ms_name.as_str(),
+        csv_line.ms_instance_id.as_str(),
+        csv_line.node_id.as_str(),
+    ];
+
+    if let Some(cpu_usage) = csv_line.cpu_usage {
+        CPU_USAGE.with_label_values(&label_vals).set(cpu_usage);
+    }
+
+    if let Some(memory_usage) = csv_line.memory_usage {
+        MEMORY_USAGE
+            .with_label_values(&label_vals)
+            .set(memory_usage);
+    }
+}
+
+/// @brief Exports lines from the queue until a line is found with a timestamp
+///        later than the current runtime. This function will terminate the
+///        the program once the queue has both been closed by the reader thread
+///        and the queue is empty
+pub fn export_from_queue() {
+    let elapsed_t: Duration = utilities::get_time_elapsed();
+    let check_time =
+        |line: &MsResourceCsvFields| get_normalized_start_time(line.timestamp) <= elapsed_t;
+    MS_RESOURCE_DATA_QUEUE
+        .try_iter()
+        .take_while(check_time)
+        .for_each(export_line);
+
+    // No more files to read and empty queue
+    if MS_RESOURCE_DATA_QUEUE.is_closed() && MS_RESOURCE_DATA_QUEUE.is_empty() {
+        info!("No more MSResource data to export, shutting down");
+        std::process::exit(0);
+    }
+}
diff --git a/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics/node.rs b/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics/node.rs
new file mode 100644
index 0000000..e1bf60e
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics/node.rs
@@ -0,0 +1,232 @@
+use crate::utilities;
+use concurrent_queue::ConcurrentQueue;
+use csv::{Reader, ReaderBuilder};
+use flate2::read::GzDecoder;
+use lazy_static::lazy_static;
+use prometheus::{register_gauge_vec, GaugeVec};
+use std::fs::File;
+use std::io::BufReader;
+use std::path::Path;
+use std::thread;
+use std::time::Duration;
+use tracing::{debug, info};
+
+type BoxedErr = Box<dyn std::error::Error + Send + Sync + 'static>;
+type CsvGzReader<File> = Reader<GzDecoder<BufReader<File>>>;
+
+const FILENAME_PARTS_2021: [&str; 2] = ["Node_", ".csv.gz"];
+const FILENAME_PARTS_2022: [&str; 2] = ["NodeMetrics_", ".csv.gz"];
+
+const DATA_QUEUE_CAP: usize = 400_000;
+const QUEUE_POLL_INTERVAL_MS: u64 = 250;
+const CSV_DELIMITER: u8 = b',';
+
+const LABELS: [&str; 1] = ["node_id"];
+
+/// Struct for holding fields after deserialization
+#[derive(Debug, serde::Deserialize)]
+pub struct NodeCsvFields {
+    #[serde(rename = "", skip)]
+    _trace: u64,
+
+    #[serde(rename = "timestamp")]
+    timestamp: u64,
+
+    #[serde(rename = "nodeid")]
+    node_id: String,
+
+    #[serde(alias = "node_cpu_usage", alias = "cpu_utilization")]
+    cpu_usage: Option<f64>,
+
+    #[serde(alias = "node_memory_usage", alias = "memory_utilization")]
+    memory_usage: Option<f64>,
+}
+
+lazy_static! {
+    pub static ref NODE_DATA_QUEUE: ConcurrentQueue<NodeCsvFields> =
+        ConcurrentQueue::bounded(DATA_QUEUE_CAP);
+    pub static ref CPU_USAGE: GaugeVec = register_gauge_vec!(
+        "alibaba_node_cpu_usage",
+        "Cpu usages by alibaba nodes",
+        &LABELS,
+    )
+    .unwrap();
+    pub static ref MEMORY_USAGE: GaugeVec = register_gauge_vec!(
+        "alibaba_node_memory_usage",
+        "Memory usages by alibaba nodes",
+        &LABELS,
+    )
+    .unwrap();
+}
+
+/// @brief Gets the filename for the Node_<idx>.csv.gz data based on the year
+/// and the index number
+///
+/// @param[in] year     The year of the trace data. Supported values are 2021
+///                     and 2022
+/// @param[in] index_no The index of the csv file
+///
+/// @return A String of the filename based on the data year and index num
+fn get_filename(year: u32, index_no: u16) -> String {
+    let mut filename: String = String::new();
+    let prefix: &str;
+    let suffix: &str;
+    let index: &str = &format!("{}", index_no);
+
+    match year {
+        2021 => {
+            prefix = FILENAME_PARTS_2021[0];
+            suffix = FILENAME_PARTS_2021[1];
+        }
+        2022 => {
+            prefix = FILENAME_PARTS_2022[0];
+            suffix = FILENAME_PARTS_2022[1];
+        }
+        _ => {
+            panic!("Invalid year: {}", year);
+        }
+    }
+    filename.push_str(prefix);
+    filename.push_str(index);
+    filename.push_str(suffix);
+
+    filename
+}
+
+/// @brief Gets a csv reader for Node data
+///
+/// @param[in] input_dir The directory containing the csv file
+/// @param[in] year      Which trace data year to create the reader for.
+///                      supported years are 2021 and 2022
+///
+/// @return A reader for the .csv.gz files
+///
+/// @pre All files should have been converted to a .csv.gz format from the
+///      .tar.gz format that they come as initially.
+pub fn get_reader(
+    input_dir: &str,
+    year: u32,
+    index_no: u16,
+) -> Result<CsvGzReader<File>, BoxedErr> {
+    let filename = get_filename(year, index_no);
+    let file_path = Path::new(input_dir).join(&filename);
+    let fd: File = File::open(file_path)?;
+    let buf_rdr: BufReader<File> = BufReader::new(fd);
+    let gz_decoder: GzDecoder<BufReader<File>> = GzDecoder::new(buf_rdr);
+
+    let csv_rdr: CsvGzReader<File> = ReaderBuilder::new()
+        .delimiter(CSV_DELIMITER)
+        .flexible(true)
+        .has_headers(true)
+        .from_reader(gz_decoder);
+
+    Ok(csv_rdr)
+}
+
+/// @brief Takes the timestamp of a trace in milliseconds and
+///        returns the normalized time as a Duration
+///
+/// @param[in] time_millis The trace timestamp in milliseconds
+///
+/// @return The normalized timestamp as a Duration
+///
+/// @NOTE: Brief check of data suggests no dilation is necessary
+///
+/// @NOTE: Node data from 2022 is not sorted by timestamp whatsoever,
+/// sometimes the data is listed in order of decreasing timestamp and other
+/// times it's listed in order of increasing timestamp, so the timestamps
+/// are modified to work with the exporter before being queued
+///
+/// @NOTE: SPEEDUP_FACTOR can be set via --speedup CLI argument for faster-than-realtime export
+pub fn get_normalized_start_time(time_millis: u64) -> Duration {
+    let speedup = crate::alibaba_metrics::SPEEDUP_FACTOR.get().unwrap_or(&1);
+    Duration::from_millis(time_millis / speedup)
+}
+
+/// @brief Reads the csv data from .csv.gz files and adds them to the queue.
+///
+/// @param[in] input_dir The input directory
+/// @param[in] data_year The year of the trace data
+///
+/// @pre All csv data should have been sorted by timestamp and compressed with
+///      gzip
+pub fn read_and_queue(
+    input_dir: &str,
+    all_parts: bool,
+    part_index: Option<u16>,
+    data_year: u32,
+) -> Result<(), BoxedErr> {
+    let mut part: u16 = 0;
+    if !all_parts {
+        part = part_index.unwrap();
+    }
+
+    while let Ok(mut rdr) = get_reader(input_dir, data_year, part) {
+        let csv_iter = rdr.deserialize();
+        for csv_line in csv_iter {
+            while NODE_DATA_QUEUE.is_full() {
+                thread::sleep(Duration::from_millis(QUEUE_POLL_INTERVAL_MS));
+            }
+            let parsed_line: NodeCsvFields = csv_line?;
+            let _ = NODE_DATA_QUEUE.push(parsed_line);
+        } // EOF
+        part += 1;
+
+        if !all_parts {
+            break;
+        }
+    } // No more files to read, or couldn't find initial file
+
+    if part == 0 {
+        // Reading always starts at part 0
+        panic!(
+            "Failed to read initial .csv.gz file. Check that all data files
+             are named in the correct format (2021: '{}<idx>{}', 2022: '{}<idx>{}),
+             and that the csv files contain the field headers at the top.
+            ",
+            FILENAME_PARTS_2021[0],
+            FILENAME_PARTS_2021[1],
+            FILENAME_PARTS_2022[0],
+            FILENAME_PARTS_2022[1]
+        );
+    } else {
+        NODE_DATA_QUEUE.close();
+        Ok(())
+    }
+}
+
+/// @brief Exports a single line from the NODE_DATA_QUEUE
+///
+/// @param[in] csv_line A parsed line from a Node csv file
+pub fn export_line(csv_line: NodeCsvFields) {
+    let label_vals: [&str; 1] = [csv_line.node_id.as_str()];
+
+    if let Some(cpu_usage) = csv_line.cpu_usage {
+        CPU_USAGE.with_label_values(&label_vals).set(cpu_usage);
+    }
+
+    if let Some(memory_usage) = csv_line.memory_usage {
+        MEMORY_USAGE
+            .with_label_values(&label_vals)
+            .set(memory_usage);
+    }
+}
+
+/// @brief Exports lines from the queue until a line is found with a timestamp
+///        later than the current runtime. This function will terminate the
+///        the program once the queue has both been closed by the reader thread
+///        and the queue is empty
+pub fn export_from_queue() {
+    let elapsed_t: Duration = utilities::get_time_elapsed();
+    let check_time = |line: &NodeCsvFields| get_normalized_start_time(line.timestamp) <= elapsed_t;
+    NODE_DATA_QUEUE
+        .try_iter()
+        .take_while(check_time)
+        .for_each(export_line);
+
+    // No more files to read and empty queue
+    if NODE_DATA_QUEUE.is_closed() && NODE_DATA_QUEUE.is_empty() {
+        info!("No more Node data to export, shutting down");
+        std::process::exit(0);
+    }
+}
diff --git a/PrometheusExporters/cluster_data_exporter/src/google_metrics.rs b/PrometheusExporters/cluster_data_exporter/src/google_metrics.rs
new file mode 100644
index 0000000..2fefed6
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/src/google_metrics.rs
@@ -0,0 +1,452 @@
+use crate::utilities;
+use crate::utilities::*;
+use clap::ValueEnum;
+use concurrent_queue::ConcurrentQueue;
+use csv::Reader;
+use flate2::read::GzDecoder;
+use lazy_static::lazy_static;
+use prometheus::{register_gauge_vec, GaugeVec};
+use std::sync::OnceLock;
+use std::thread;
+use std::time::Duration;
+use std::{fs::File, io::BufReader};
+use tracing::{debug, info};
+
+type CsvGzReader<File> = Reader<GzDecoder<BufReader<File>>>;
+
+/* Standard labels for google's task resource usage data */
+const TRU_LABELS: [&str; 3] = ["job_id", "task_index", "machine_id"];
+const TRU_CSV_DELIMITER: u8 = b',';
+const DATA_QUEUE_CAP: usize = 400_000; // Max lines in the queue
+const CSV_MAX_PART_NO: u16 = 500;
+
+const MICRO_SECONDS_PER_SECOND: u64 = 1_000_000;
+const T_OFFSET_SECS: u64 = 600;
+const DILATION_FACTOR: u64 = 10; // Factor for scaling time stamps relative to when they are exported
+
+/// Each line of the csv file is serialized into the following struct.
+/// The ordering of the struct fields MUST match the order that fields
+/// appear in a line of the csv file.
+///
+/// All fields wrapped in Option<> are not considered mandatory by
+/// the schema and, therefore, may be missing from a given trace.
+/// The rest of the fields should never be missing, so failure to
+/// deserialize will result in an error and program termination
+#[derive(Debug, serde::Deserialize)]
+pub struct TruCsvFields {
+    pub start_time: u64,
+    pub _end_time: u64,     // unused, only here for parsing
+    pub job_id: String,     // label
+    pub task_index: String, // label
+    pub machine_id: String, // label
+    pub mean_cpu_usage_rate: Option<f64>,
+    pub canonical_memory_usage: Option<f64>,
+    pub assigned_memory_usage: Option<f64>,
+    pub unmapped_page_cache_memory_usage: Option<f64>,
+    pub total_page_cache_memory_usage: Option<f64>,
+    pub max_memory_usage: Option<f64>,
+    pub mean_disk_io_time: Option<f64>,
+    pub mean_local_disk_space_used: Option<f64>,
+    pub max_cpu_usage: Option<f64>,
+    pub max_disk_io_time: Option<f64>,
+    pub cycles_per_instruction: Option<f64>,
+    pub memory_accesses_per_instruction: Option<f64>,
+    pub sample_portion: Option<f64>,
+    pub aggregation_type: Option<u8>, // Divides metrics into two
+    pub sampled_cpu_usage: Option<f64>,
+}
+
+/// @brief An enum for matching the metrics to export with their
+/// corresponding prometheus gauges
+#[derive(Copy, Clone, Debug, ValueEnum)]
+pub enum TruMetrics {
+    MeanCpuUsageRate,
+    CanonicalMemoryUsage,
+    AssignedMemoryUsage,
+    UnmappedPageCacheMemoryUsage,
+    TotalPageCacheMemoryUsage,
+    MaxMemoryUsage,
+    MeanDiskIoTime,
+    MeanLocalDiskSpaceUsed,
+    MaxCpuUsage,
+    MaxDiskIoTime,
+    CyclesPerInstruction,
+    MemoryAccessesPerInstruction,
+    SamplePortion,
+    SampledCpuUsage,
+}
+
+/// @brief A tuple struct representing two of the same prometheus metrics,
+/// but partitioned by their aggregation type. Index number directly
+/// corresponds to the aggregation type, i.e. i=0 => aggregation_type=0
+pub struct GaugePair(GaugeVec, GaugeVec);
+
+impl GaugePair {
+    /// @brief Create and register both GaugeVecs in the GaugePair to the
+    /// default registry.
+    ///
+    /// @param[in] base_name The string used as the base of both metrics
+    /// names as seen by prometheus, where aggregation type will be appended
+    ///
+    /// @param[in] base_help The string used as the base of both metrics
+    /// help strings when scraped by prometheus. Aggregation type is
+    /// appended
+    fn new(base_name: &str, base_help: &str) -> GaugePair {
+        let mut name_0 = String::from(base_name);
+        name_0.push_str("_0");
+        let mut help_0 = String::from(base_help);
+        help_0.push_str(" (aggregation_type=0)");
+        let gauge_0 = register_gauge_vec!(name_0.as_str(), help_0.as_str(), &TRU_LABELS).unwrap();
+
+        let mut name_1 = String::from(base_name);
+        name_1.push_str("_1");
+        let mut help_1 = String::from(base_help);
+        help_1.push_str(" (aggregation_type=1)");
+        let gauge_1 = register_gauge_vec!(name_1.as_str(), help_1.as_str(), &TRU_LABELS).unwrap();
+
+        GaugePair(gauge_0, gauge_1)
+    }
+
+    /// @brief Retrieve a static reference to the gauge from the pair for
+    /// the given aggregation type
+    ///
+    /// @param[in] self             Statically defined GaugePair
+    /// @param[in] aggregation_type 0 or 1 (The aggregation type)
+    fn get(&'static self, aggregation_type: u8) -> &'static GaugeVec {
+        match aggregation_type {
+            0 => &self.0,
+            1 => &self.1,
+            _ => panic!("Invalid index into gauge vec"),
+        }
+    }
+}
+
+/// List of metrics to export from the google task resource usage data
+pub static GOOGLE_METRICS: OnceLock<Vec<TruMetrics>> = OnceLock::new();
+
+lazy_static! {
+    /// Queue for parsed csv lines
+    pub static ref GOOGLE_DATA_QUEUE: ConcurrentQueue<TruCsvFields> = ConcurrentQueue::bounded(DATA_QUEUE_CAP);
+
+    /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+         *                          ALL METRICS                              *
+         *                                                                   *
+         *  Each static reference is a GaugePair corresponding to a single   *
+         *  metric. Each element of the pair corresponds to an aggregation   *
+         *  type of 0 or 1. When the aggregation type is missing from a      *
+         *  trace the aggregation type defaults to 0                         *
+         *                                                                   *
+         * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+    pub static ref MEAN_CPU_USAGE_RATE_PAIR: GaugePair = GaugePair::new(
+        "google_mean_cpu_usage_rate", "Mean cpu usage rate by google machines",
+    );
+
+    pub static ref CANONICAL_MEMORY_USAGE_PAIR: GaugePair = GaugePair::new(
+       "google_canonical_memory_usage", "Canonical memory usage by google cluster machines",
+    );
+
+    pub static ref ASSIGNED_MEMORY_USAGE_PAIR: GaugePair = GaugePair::new(
+        "google_assigned_memory_usage", "Assigned memory usage for google cluster machines",
+    );
+
+    pub static ref UNMAPPED_PAGE_CACHE_MEMORY_USAGE_PAIR: GaugePair = GaugePair::new(
+        "google_unmapped_page_cache_memory_usage", "Unmapped page cache memory usage for google cluster machines",
+    );
+
+    pub static ref TOTAL_PAGE_CACHE_MEMORY_USAGE_PAIR: GaugePair = GaugePair::new(
+        "google_total_page_cache_memory_usage", "Total page cache memory usage for google cluster machines",
+    );
+
+    pub static ref MAX_MEMORY_USAGE_PAIR: GaugePair = GaugePair::new(
+        "google_max_memory_usage", "Maximum memory usage by google cluster machines",
+    );
+
+    pub static ref MEAN_DISK_IO_TIME_PAIR: GaugePair = GaugePair::new(
+        "google_mean_disk_io_time", "Mean disk I/O time for google cluster machines",
+    );
+
+    pub static ref MEAN_LOCAL_DISK_SPACE_USED_PAIR: GaugePair = GaugePair::new(
+        "google_mean_local_disk_space_used", "Mean local disk space used by google cluster machines",
+    );
+
+    pub static ref MAX_CPU_USAGE_PAIR: GaugePair = GaugePair::new(
+        "google_max_cpu_usage", "Maximum cpu usage for google cluster machines",
+    );
+
+    pub static ref MAX_DISK_IO_TIME_PAIR: GaugePair = GaugePair::new(
+        "google_max_disk_io_time", "Maximum disk I/O time for google cluster machines",
+    );
+
+    pub static ref CYCLES_PER_INSTRUCTION_PAIR: GaugePair = GaugePair::new(
+        "google_cycles_per_instruction", "Cycles per instruction for google cluster machines",
+    );
+
+    pub static ref MEMORY_ACCESSES_PER_INSTRUCTION_PAIR: GaugePair = GaugePair::new(
+        "google_memory_accesses_per_instruction", "Memory accesses per instruction for google cluster machines",
+    );
+
+    pub static ref SAMPLE_PORTION_PAIR: GaugePair = GaugePair::new(
+        "google_sample_portion", "Sample portion for google cluster machines",
+    );
+
+    pub static ref SAMPLED_CPU_USAGE_PAIR: GaugePair = GaugePair::new(
+        "google_sampled_cpu_usage", "Sampled cpu usage for google cluster machines",
+    );
+}
+
+/// @brief Given the part number, create a String for the filename.
+///
+/// @param[in] part    The csv part number such that: part ∈ [0, 500]
+/// @param[in] gzipped Whether or not .gz should be appended to the filename
+///
+/// @return The csv filename as a String, in the form:
+///             <part-00xxx-of-00500.csv(.gz)>
+pub fn get_csv_filename(part: u16, gzipped: bool) -> String {
+    const TRU_CSV_PATH_PARTS: [&str; 4] = ["part-", "00000", "-of-00500.csv", ".gz"];
+
+    let mut filename = String::new();
+    let part_name_str: String;
+
+    if part < 10 {
+        part_name_str = format!("0000{}", part);
+    } else if (10..100).contains(&part) {
+        part_name_str = format!("000{}", part);
+    } else if (100..=CSV_MAX_PART_NO).contains(&part) {
+        part_name_str = format!("00{}", part);
+    } else {
+        panic!(
+            "Invalid part number: {} => part must be between 0 and 500",
+            part
+        );
+    }
+
+    filename.push_str(TRU_CSV_PATH_PARTS[0]);
+    filename.push_str(&part_name_str);
+    filename.push_str(TRU_CSV_PATH_PARTS[2]);
+
+    if gzipped {
+        filename.push_str(TRU_CSV_PATH_PARTS[3]);
+    }
+
+    filename
+}
+
+/// @brief Creates a new csv reader wrapped around a gzip decoder which
+/// streams data from the underlying file
+///
+/// @param[in] input_dir The directory containing gzipped csv files
+/// @param[in] part      The part number out of the total number of csv files
+///
+/// @return The configured reader
+fn get_reader(input_dir: &str, part: u16) -> Result<CsvGzReader<File>, BoxedErr> {
+    use csv::ReaderBuilder;
+    use flate2::read::GzDecoder;
+    use std::fs::File;
+    use std::io::BufReader;
+    use std::path::Path;
+
+    let filename: String = get_csv_filename(part, true);
+    let file_path = Path::new(input_dir).join(&filename);
+    let fd: File = File::open(file_path)?;
+    let buf_rdr = BufReader::new(fd);
+    let gz_decoder = GzDecoder::new(buf_rdr);
+
+    let csv_rdr: CsvGzReader<File> = ReaderBuilder::new()
+        .delimiter(TRU_CSV_DELIMITER)
+        .flexible(true)
+        .has_headers(false)
+        .from_reader(gz_decoder);
+
+    Ok(csv_rdr)
+}
+
+/// @brief Main routine of the helper (reader) thread.
+///
+/// The purpose of the thread is to handle all of the work involved in
+///     reading and enqueuing lines from the csv.gz file for the
+///     main thread to then pop and export on scrape
+///
+/// @param[in] input_dir  The path to the directory containing the csv.gz files
+/// @param[in] all_parts  Whether or not to run the exporter on all 500 parts of
+///                       the task resource usage csv data. Running in this mode
+///                       and not providing all 500 parts will cause the reader
+///                       thread to panic. If this option is true, part_index
+///                       should be None
+/// @param[in] part_index Specify a single part (out of 500) to read csv data
+///                       from. The reader thread will stop after reading this
+///                       single file. If part_index is not None, then all_parts
+///                       should be false
+/// @param[in] metrics    The list of metrics, or csv fields, for the exporter
+///                       to expose to prometheus. At least one must be given
+///
+/// @pre All csv files are expected to be of the form:
+///                 "part-00xxx-of-00500.csv.gz"
+pub fn reader_thread_routine(
+    input_dir: String,
+    all_parts: bool,
+    part_index: Option<u16>,
+    metrics: Vec<TruMetrics>,
+) -> Result<(), BoxedErr> {
+    const QUEUE_POLL_INTERVAL_MS: u64 = 250;
+    GOOGLE_METRICS.set(metrics).unwrap();
+    let mut part: u16 = 0_u16;
+
+    if !all_parts {
+        part = part_index.unwrap();
+    }
+
+    while let Ok(mut rdr) = get_reader(&input_dir, part) {
+        let csv_iter = rdr.deserialize();
+        for csv_line in csv_iter {
+            while GOOGLE_DATA_QUEUE.is_full() {
+                thread::sleep(Duration::from_millis(QUEUE_POLL_INTERVAL_MS));
+            }
+            let parsed_line: TruCsvFields = csv_line?;
+            let _ = GOOGLE_DATA_QUEUE.push(parsed_line);
+        }
+        part += 1;
+
+        if !all_parts || part > CSV_MAX_PART_NO {
+            break;
+        }
+    }
+
+    // Never read any parts or all parts was specified and we never read all 500
+    // parts of the csv data
+    if part == 0 || (all_parts && part <= CSV_MAX_PART_NO) {
+        panic!(
+            "Failed to read initial .csv.gz file. Check that all data files
+             are named in the correct format ('part-?????-of-00500.csv.gz').
+             If running with --all-parts, ensure all 500 parts exist in the
+             input directory.
+            "
+        );
+    } else {
+        GOOGLE_DATA_QUEUE.close();
+        Ok(())
+    }
+}
+
+/// @brief: Converts the start time of a job into seconds and normalizes it
+///
+/// From pg.2 of the schema doc:
+///    "Each record has a timestamp, which is in microseconds since 600
+///     seconds before the beginning of the trace period, and recorded as a
+///     64 bit integer (i.e., an event 20 second after the start of the
+///     trace would have a timestamp=620s)."
+///
+/// @param[in] time_micros The event start time in microseconds,
+///                        offset by T_OFFSET_SECS (600s)
+///
+/// @return A duration representing the dilated trace start time in seconds
+///            after subtracting the offset
+pub fn get_normalized_start_time(time_micros: u64) -> Duration {
+    let time_secs = time_micros / MICRO_SECONDS_PER_SECOND;
+    Duration::from_secs((time_secs - T_OFFSET_SECS) * DILATION_FACTOR)
+}
+
+/// @brief Given a single parsed line from the csv file, update all gauges
+/// corresponding to the metrics in the list
+///
+/// @param[in] csv_line A parsed line from the csv file containing label
+///                         values and metric data to export
+pub fn export_line(csv_line: TruCsvFields) {
+    let metrics = GOOGLE_METRICS.get().unwrap();
+    let label_vals: [&str; 3] = [
+        csv_line.job_id.as_str(),
+        csv_line.task_index.as_str(),
+        csv_line.machine_id.as_str(),
+    ];
+
+    let aggregation_type = csv_line.aggregation_type.unwrap_or(0_u8);
+
+    for metric in metrics {
+        let curr_gauge: &'static GaugeVec;
+        let wrapped_value: Option<f64>;
+
+        (curr_gauge, wrapped_value) = match metric {
+            TruMetrics::MeanCpuUsageRate => (
+                MEAN_CPU_USAGE_RATE_PAIR.get(aggregation_type),
+                csv_line.mean_cpu_usage_rate,
+            ),
+            TruMetrics::CanonicalMemoryUsage => (
+                CANONICAL_MEMORY_USAGE_PAIR.get(aggregation_type),
+                csv_line.canonical_memory_usage,
+            ),
+            TruMetrics::AssignedMemoryUsage => (
+                ASSIGNED_MEMORY_USAGE_PAIR.get(aggregation_type),
+                csv_line.assigned_memory_usage,
+            ),
+            TruMetrics::UnmappedPageCacheMemoryUsage => (
+                UNMAPPED_PAGE_CACHE_MEMORY_USAGE_PAIR.get(aggregation_type),
+                csv_line.unmapped_page_cache_memory_usage,
+            ),
+            TruMetrics::TotalPageCacheMemoryUsage => (
+                TOTAL_PAGE_CACHE_MEMORY_USAGE_PAIR.get(aggregation_type),
+                csv_line.total_page_cache_memory_usage,
+            ),
+            TruMetrics::MaxMemoryUsage => (
+                MAX_MEMORY_USAGE_PAIR.get(aggregation_type),
+                csv_line.max_memory_usage,
+            ),
+            TruMetrics::MeanDiskIoTime => (
+                MEAN_DISK_IO_TIME_PAIR.get(aggregation_type),
+                csv_line.mean_disk_io_time,
+            ),
+            TruMetrics::MeanLocalDiskSpaceUsed => (
+                MEAN_LOCAL_DISK_SPACE_USED_PAIR.get(aggregation_type),
+                csv_line.mean_local_disk_space_used,
+            ),
+            TruMetrics::MaxCpuUsage => (
+                MAX_CPU_USAGE_PAIR.get(aggregation_type),
+                csv_line.max_cpu_usage,
+            ),
+            TruMetrics::MaxDiskIoTime => (
+                MAX_DISK_IO_TIME_PAIR.get(aggregation_type),
+                csv_line.max_disk_io_time,
+            ),
+            TruMetrics::CyclesPerInstruction => (
+                CYCLES_PER_INSTRUCTION_PAIR.get(aggregation_type),
+                csv_line.cycles_per_instruction,
+            ),
+            TruMetrics::MemoryAccessesPerInstruction => (
+                MEMORY_ACCESSES_PER_INSTRUCTION_PAIR.get(aggregation_type),
+                csv_line.memory_accesses_per_instruction,
+            ),
+            TruMetrics::SamplePortion => (
+                SAMPLE_PORTION_PAIR.get(aggregation_type),
+                csv_line.sample_portion,
+            ),
+            TruMetrics::SampledCpuUsage => (
+                SAMPLED_CPU_USAGE_PAIR.get(aggregation_type),
+                csv_line.sampled_cpu_usage,
+            ),
+        };
+
+        if let Some(metric_value) = wrapped_value {
+            // Set the metric, unless it was missing
+            curr_gauge.with_label_values(&label_vals).set(metric_value);
+        }
+    }
+}
+
+/// @brief Exports all parsed CSV lines from the queue
+///
+/// This function will continue popping lines from the queue until it
+/// pops one with a start timestamp which should be exported later in time.
+/// This line will be saved in FUTURE_LINE and then exported on the next
+/// scrape for which the program runtime <= start time
+pub fn export_from_queue() {
+    let elapsed_t: Duration = utilities::get_time_elapsed();
+    let check_time = |line: &TruCsvFields| get_normalized_start_time(line.start_time) <= elapsed_t;
+
+    GOOGLE_DATA_QUEUE
+        .try_iter()
+        .take_while(check_time)
+        .for_each(export_line);
+
+    if GOOGLE_DATA_QUEUE.is_closed() && GOOGLE_DATA_QUEUE.is_empty() {
+        info!("No more task resource usage to export, shutting down");
+        std::process::exit(0);
+    }
+}
diff --git a/PrometheusExporters/cluster_data_exporter/src/main.rs b/PrometheusExporters/cluster_data_exporter/src/main.rs
new file mode 100644
index 0000000..d6a1a53
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/src/main.rs
@@ -0,0 +1,260 @@
+/// @NOTE: As new label-value combinations are added to each metric,
+/// they will persist unless another metric with the same label-value combo
+/// overwipes it. Therefore, user should be wary about the possibility
+/// of program memory usage steadily increasing over the course of the runtime
+use crate::alibaba_metrics::*;
+use crate::google_metrics::*;
+use crate::utilities::*;
+use clap::Parser;
+use hyper::body::Incoming;
+use hyper::header::CONTENT_TYPE;
+use hyper::server::conn::http1;
+use hyper::service::service_fn;
+use hyper::Request;
+use hyper::Response;
+use hyper_util::rt::TokioIo;
+use prometheus::{Encoder, TextEncoder};
+use std::net::{Ipv4Addr, SocketAddr};
+use std::sync::OnceLock;
+use std::{panic, process, thread};
+use tokio::net::TcpListener;
+use tracing::{debug, error, info, warn};
+use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter};
+
+mod alibaba_metrics;
+mod google_metrics;
+mod utilities;
+
+type BoxedErr = Box<dyn std::error::Error + Send + Sync + 'static>;
+
+/// Google or Alibaba. Must be initialized before starting export routine
+static DATA_PROVIDER: OnceLock<Provider> = OnceLock::new();
+
+/// @brief Async call-back function for servicing http requests, like
+///        prometheus scrapes
+///
+/// @param[in] _req The incoming http request
+///
+/// @return Prometheus metrics on success
+///         BoxedErr on failure
+async fn serve_req(_req: Request<Incoming>) -> Result<Response<String>, BoxedErr> {
+    let encoder = TextEncoder::new();
+    let provider = DATA_PROVIDER.get().unwrap();
+
+    match provider {
+        Provider::Google => google_metrics::export_from_queue(),
+        Provider::Alibaba => alibaba_metrics::export_from_queue(),
+    }
+
+    let metric_families = prometheus::gather();
+    let body = encoder.encode_to_string(&metric_families)?;
+    let response = Response::builder()
+        .status(200)
+        .header(CONTENT_TYPE, encoder.format_type())
+        .body(body)?;
+
+    Ok(response)
+}
+
+/// @brief Starts a thread to read and queue Google cluster data
+///
+/// @param[in] input_dir  The input directory to Google task resource usage
+///                       cluster data
+/// @param[in] all_parts  Whether to run the exporter across all csv parts or
+///                       not. This should be false if part index is not None
+/// @param[in] part_index The part number, out of 500, of the csv file to use
+///                       when exporting task resource usage data. This should
+///                       be None if all_parts is true.
+/// @param[in] metrics    The list of metrics from the task resource usage data
+///                       to export
+///
+/// @post All globals required by the main exporter thread are initialized.
+fn start_google_thread(
+    input_dir: String,
+    all_parts: bool,
+    part_index: Option<u16>,
+    metrics: Vec<TruMetrics>,
+) {
+    debug!("Starting Google reader thread");
+    thread::spawn(move || {
+        // start reader thread
+        // Drops thread handle => thread is implicitly detached
+        if let Err(err) =
+            google_metrics::reader_thread_routine(input_dir, all_parts, part_index, metrics)
+        {
+            error!("Error in Google reader thread: {:?}", err);
+            process::exit(1);
+        }
+    });
+    // Must be initialized before main thread starts exporting
+    google_metrics::GOOGLE_METRICS.wait();
+    debug!("Google reader thread initialized");
+}
+
+/// @brief Starts a thread to read and queue Alibaba cluster data
+///
+/// @param[in] input_dir  The input directory containing the csv files for
+///                       reading
+/// @param[in] all_parts  Whether to run the exporter from part 0 until no more
+///                       csv files are found, or not. This should be false if
+///                       part index is not None.
+/// @param[in] part_index Which csv file part to use as the data source.
+///                       This should be None if all_parts is true.
+/// @param[in] data_type  Which type of microservice data the reading thread
+///                       should be configured to read and queue
+/// @param[in] data_year  The year from which the source data comes from. Valid
+///                       options are 2021 and 2022
+/// @param[in] speedup    Speedup factor for faster-than-realtime export
+///
+/// @post All globals required by the main exporter thread are initialized.
+fn start_alibaba_thread(
+    input_dir: String,
+    all_parts: bool,
+    part_index: Option<u16>,
+    data_type: MsDataType,
+    data_year: u32,
+    speedup: u64,
+) {
+    debug!("Starting Alibaba reader thread");
+    thread::spawn(move || {
+        if let Err(err) = alibaba_metrics::reader_thread_routine(
+            input_dir, all_parts, part_index, data_type, data_year, speedup,
+        ) {
+            error!("Error in Alibaba reader thread: {:?}", err);
+            process::exit(1);
+        }
+    });
+    // Must be initialized before main thread starts exporting
+    alibaba_metrics::EXPORTER_DATA_TYPE.wait();
+    debug!("Alibaba reader thread initialized");
+}
+
+/// @brief Sets up logging with optional file output
+///
+/// @param[in] log_dir   Optional directory for log file output
+/// @param[in] log_level Log level string (DEBUG, INFO, WARN, ERROR)
+///
+/// @return WorkerGuard if file logging is enabled, None otherwise.
+///         The guard must be kept alive for the duration of the program.
+fn setup_logging(
+    log_dir: Option<&str>,
+    log_level: &str,
+) -> Result<Option<tracing_appender::non_blocking::WorkerGuard>, BoxedErr> {
+    // Create env filter that respects RUST_LOG, with fallback to command line arg
+    let env_filter = EnvFilter::try_from_default_env()
+        .or_else(|_| EnvFilter::try_new(log_level))
+        .unwrap_or_else(|_| EnvFilter::new("info"));
+
+    if let Some(dir) = log_dir {
+        // Log to file AND stdout
+        std::fs::create_dir_all(dir)?;
+        let file_appender =
+            tracing_appender::rolling::never(dir, "cluster_data_exporter.log");
+        let (non_blocking, guard) = tracing_appender::non_blocking(file_appender);
+
+        tracing_subscriber::registry()
+            .with(env_filter)
+            .with(
+                tracing_subscriber::fmt::layer()
+                    .with_writer(std::io::stdout)
+                    .with_ansi(true),
+            )
+            .with(
+                tracing_subscriber::fmt::layer()
+                    .with_writer(non_blocking)
+                    .with_ansi(false),
+            )
+            .init();
+
+        info!(
+            "Logging initialized with file output: {}/cluster_data_exporter.log",
+            dir
+        );
+        Ok(Some(guard))
+    } else {
+        // Log to stdout only
+        tracing_subscriber::registry()
+            .with(env_filter)
+            .with(tracing_subscriber::fmt::layer())
+            .init();
+
+        info!("Logging initialized (stdout only)");
+        Ok(None)
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<(), BoxedErr> {
+    let cli = Cli::parse();
+
+    // Initialize logging (keep guard alive for lifetime of program)
+    let _log_guard = setup_logging(cli.log_dir.as_deref(), &cli.log_level)?;
+
+    info!("Starting cluster_data_exporter");
+    info!("Input directory: {}", cli.input_directory);
+    info!("Port: {}", cli.port);
+
+    // This code forces the program to exit if a reader thread panics.
+    // Comment it out if it's preferable for the main thread to remain
+    let orig_hook = panic::take_hook();
+    panic::set_hook(Box::new(move |panic_info| {
+        // invoke the default handler and then exit the process
+        orig_hook(panic_info);
+        process::exit(1);
+    }));
+
+    let input_directory: String = cli.input_directory.clone();
+    let port: u16 = cli.port;
+    let addr: SocketAddr = (Ipv4Addr::UNSPECIFIED, port).into();
+
+    let _ = utilities::T_START; // init t_start
+
+    // Spin up reader thread to start queueing csv data
+    match cli.provider {
+        ProviderCmd::Google {
+            metrics,
+            all_parts,
+            part_index,
+        } => {
+            info!("Provider: Google");
+            info!("Metrics: {:?}", metrics);
+            info!("Parts mode: {}", if all_parts { "all-parts" } else { "part-index" });
+            if let Some(idx) = part_index {
+                info!("Part index: {}", idx);
+            }
+            let _ = DATA_PROVIDER.set(Provider::Google);
+            start_google_thread(input_directory, all_parts, part_index, metrics);
+        }
+        ProviderCmd::Alibaba {
+            data_type,
+            data_year,
+            all_parts,
+            part_index,
+            speedup,
+        } => {
+            info!("Provider: Alibaba");
+            info!("Data type: {:?}", data_type);
+            info!("Data year: {}", data_year);
+            info!("Parts mode: {}", if all_parts { "all-parts" } else { "part-index" });
+            if let Some(idx) = part_index {
+                info!("Part index: {}", idx);
+            }
+            info!("Speedup factor: {}x", speedup);
+            let _ = DATA_PROVIDER.set(Provider::Alibaba);
+            start_alibaba_thread(input_directory, all_parts, part_index, data_type, data_year, speedup);
+        }
+    }
+
+    let listener = TcpListener::bind(addr).await?;
+    info!("Server listening on http://{}", addr);
+
+    loop {
+        // Main exporter routine
+        let (stream, _) = listener.accept().await?;
+        let io = TokioIo::new(stream);
+        let service = service_fn(serve_req);
+        if let Err(err) = http1::Builder::new().serve_connection(io, service).await {
+            error!("Server error: {:?}", err);
+        };
+    }
+}
diff --git a/PrometheusExporters/cluster_data_exporter/src/utilities.rs b/PrometheusExporters/cluster_data_exporter/src/utilities.rs
new file mode 100644
index 0000000..f811084
--- /dev/null
+++ b/PrometheusExporters/cluster_data_exporter/src/utilities.rs
@@ -0,0 +1,113 @@
+use crate::alibaba_metrics::*;
+use crate::google_metrics::*;
+use clap::{ArgGroup, Parser, Subcommand, ValueEnum};
+use lazy_static::lazy_static;
+use std::time::{Duration, Instant};
+
+pub type BoxedErr = Box<dyn std::error::Error + Send + Sync + 'static>;
+
+lazy_static! {
+    /// An instant in time to roughly represent the start time of the exporter
+    /// This is used as the reference point for calculating how much time has
+    /// elapsed, and therefore which traces should be exported during a scrape
+    /// and which ones should be held onto until later
+    pub static ref T_START: Instant = Instant::now();
+}
+
+/// @brief Returns the time since T_START as a Duration
+///
+/// @return Duration since the Instant defined by T_START
+///
+/// @note Since T_START isn't initialized until it is referenced for the first
+///       time, so if this function is called before T_START is ever referenced
+///       then T_START will be initialized here with Duration::Zero returned
+pub fn get_time_elapsed() -> Duration {
+    T_START.elapsed()
+}
+
+#[derive(Debug, Clone, ValueEnum)]
+pub enum Provider {
+    Google,
+    Alibaba,
+}
+
+#[derive(Parser, Debug)]
+#[command(name = "cluster_data_exporter", version, about)]
+#[command(subcommand_required = true)]
+pub struct Cli {
+    #[arg(short, long, aliases = ["input, in, dir, input_dir"])]
+    #[arg(required = true)]
+    pub input_directory: String,
+
+    #[arg(short, long)]
+    #[arg(required = true)]
+    pub port: u16,
+
+    /// Log level (DEBUG, INFO, WARN, ERROR)
+    #[arg(long, default_value = "INFO")]
+    pub log_level: String,
+
+    /// Output directory for log files (optional, defaults to stdout only)
+    #[arg(long)]
+    pub log_dir: Option<String>,
+
+    #[command(subcommand)]
+    pub provider: ProviderCmd,
+}
+
+#[derive(Subcommand, Debug)]
+pub enum ProviderCmd {
+    /// Run the exporter on google task resource usage data
+    #[command(group(ArgGroup::new("csv-parts")
+                        .args(&["all_parts", "part_index"])
+                        .required(true))
+    )]
+    Google {
+        #[arg(long, value_enum, value_delimiter = ',', num_args = 1..)]
+        #[arg(required = true, require_equals = true)]
+        metrics: Vec<TruMetrics>,
+
+        #[arg(long, group = "csv-parts", alias = "all")]
+        all_parts: bool,
+
+        #[arg(long, group = "csv-parts", aliases = ["part", "index"])]
+        #[arg(require_equals = true)]
+        part_index: Option<u16>,
+    },
+
+    /// Run the exporter on Alibaba microservice data
+    #[command(group(ArgGroup::new("csv-parts")
+                .args(&["all_parts", "part_index"])
+                .required(true))
+    )]
+    Alibaba {
+        /// The type of microservice data to use
+        #[arg(long, value_enum)]
+        #[arg(required = true, require_equals = true)]
+        data_type: MsDataType,
+
+        /// Which year the microservice data comes from
+        #[arg(long)]
+        #[arg(required = true, require_equals = true)]
+        #[arg(value_parser = clap::value_parser!(u32).range(2021..=2022))]
+        data_year: u32,
+
+        /// Whether or not to run the exporter starting on part 0 of the csv
+        /// files and continue sequentially until no more files are found.
+        /// This option is mutually exclusive with --part-index
+        #[arg(long, group = "csv-parts", alias = "all")]
+        all_parts: bool,
+
+        /// Specify a single csv file to use as trace data.
+        /// This option is mutually exclusive with --all-parts
+        #[arg(long, group = "csv-parts", aliases = ["part", "index"])]
+        #[arg(require_equals = true)]
+        part_index: Option<u16>,
+
+        /// Speedup factor for faster-than-realtime export
+        /// 1 = real-time, 10 = 10x faster, 100 = 100x faster
+        #[arg(long, require_equals = true)]
+        #[arg(value_parser = clap::value_parser!(u64).range(1..))]
+        speedup: u64,
+    },
+}
diff --git a/PrometheusExporters/fake_exporter/fake_exporter_python/Dockerfile b/PrometheusExporters/fake_exporter/fake_exporter_python/Dockerfile
new file mode 100644
index 0000000..c775d8a
--- /dev/null
+++ b/PrometheusExporters/fake_exporter/fake_exporter_python/Dockerfile
@@ -0,0 +1,12 @@
+FROM sketchdb-base:latest
+
+LABEL maintainer="SketchDB Team"
+LABEL description="Prometheus Client for SketchDB"
+
+COPY requirements.txt .
+RUN pip install --upgrade pip
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY fake_exporter.py .
+
+ENTRYPOINT ["python", "fake_exporter.py"]
diff --git a/PrometheusExporters/fake_exporter/fake_exporter_python/docker-compose.yml.j2 b/PrometheusExporters/fake_exporter/fake_exporter_python/docker-compose.yml.j2
new file mode 100644
index 0000000..8ee0b84
--- /dev/null
+++ b/PrometheusExporters/fake_exporter/fake_exporter_python/docker-compose.yml.j2
@@ -0,0 +1,22 @@
+# fake_exporter Docker Compose Template
+# This template is rendered with Jinja2 to generate the final docker-compose.yml
+
+services:
+  fake-exporter:
+    image: sketchdb-fake-exporter-python:latest
+    container_name: {{ container_name | default('sketchdb-fake-exporter') }}
+    ports:
+    - "{{ port }}:{{ port }}"
+    volumes:
+    - {{ experiment_output_dir }}:/app/output
+    - {{ output_dir }}:/app/exporter_output_dir
+    command: [
+      "--output_dir", "/app/exporter_output_dir",
+      "--port", "{{ port }}",
+      "--valuescale", "{{ valuescale }}",
+      "--dataset", " {{ dataset }}",
+      "--num_labels", "{{ num_labels }}",
+      "--num_values_per_label", "{{ num_values_per_label }}",
+      "--metric_type", "{{ metric_type }}"
+    ]
+    restart: unless-stopped
diff --git a/PrometheusExporters/fake_exporter/fake_exporter_python/fake-exporter-python-cli-compose.yml.j2 b/PrometheusExporters/fake_exporter/fake_exporter_python/fake-exporter-python-cli-compose.yml.j2
new file mode 100644
index 0000000..8e95ed0
--- /dev/null
+++ b/PrometheusExporters/fake_exporter/fake_exporter_python/fake-exporter-python-cli-compose.yml.j2
@@ -0,0 +1,25 @@
+# fake_exporter Docker Compose Template
+# This template is rendered with Jinja2 to generate the final docker-compose.yml
+
+services:
+  fake-exporter:
+    image: sketchdb-fake-exporter-python:latest
+    container_name: {{ container_name | default('asap-fake-exporter') }}
+    hostname: {{ container_name }} # What prometheus uses to scrape
+    networks:
+      - asap-network
+    expose:
+      - "{{ port }}"
+    volumes:
+      - {{ experiment_output_dir }}:/app/outputs
+      - {{ output_dir }}:/app/exporter_output_dir
+    command: [
+      "--output_dir", "/app/exporter_output_dir",
+      "--port", "{{ port }}",
+      "--valuescale", "{{ valuescale }}",
+      "--dataset", " {{ dataset }}",
+      "--num_labels", "{{ num_labels }}",
+      "--num_values_per_label", "{{ num_values_per_label }}",
+      "--metric_type", "{{ metric_type }}"
+    ]
+    restart: no
diff --git a/PrometheusExporters/fake_exporter/fake_exporter_python/fake_exporter.py b/PrometheusExporters/fake_exporter/fake_exporter_python/fake_exporter.py
new file mode 100644
index 0000000..9e4cfe7
--- /dev/null
+++ b/PrometheusExporters/fake_exporter/fake_exporter_python/fake_exporter.py
@@ -0,0 +1,247 @@
+import argparse
+import itertools
+import os
+import time
+from typing import List
+
+import numpy
+import numpy as np
+from prometheus_client import start_http_server
+from prometheus_client.core import REGISTRY, CounterMetricFamily, GaugeMetricFamily
+from prometheus_client.registry import Collector
+
+
+class CustomCollector(Collector):
+    def __init__(
+        self, scale, dataset, num_labels, num_values_per_label: List[int], metric_type
+    ):
+        self.scale = scale
+        # self.timeseries_id_start = timeseries_id_start
+        self.dataset = dataset
+        self.rng = numpy.random.default_rng(0)
+        self.total_samples = 0
+        self.const_1M = 1000000
+        self.const_2M = 2000000
+        self.const_3M = 3000000
+
+        self.metric_type = metric_type
+
+        self.uniform_counter = 0
+        self.dynamic_counter = 0
+        self.zipf_counter = 0
+        self.normal_counter = 0
+
+        self.num_labels: int = num_labels
+        self.labels = [f"label_{i}" for i in range(self.num_labels)]
+        self.num_values_per_label: List[int]
+        self.values_per_label: List[List[str]] = []
+        self.label_value_combinations: List[List[str]] = []
+
+        self.label_value_combinations = self.compute_labels(
+            num_labels, num_values_per_label
+        )
+
+        # print("values_per_label")
+        # [print(sublist) for sublist in self.values_per_label]
+        # print("label_value_combinations")
+        # [print(sublist) for sublist in self.label_value_combinations]
+        # assert False
+
+    def compute_labels(
+        self, num_labels: int, num_values_per_label: List[int]
+    ) -> List[List[str]]:
+        if len(num_values_per_label) == 1:
+            self.num_values_per_label = [
+                num_values_per_label[0] for _ in range(num_labels)
+            ]
+        else:
+            if len(num_values_per_label) != num_labels:
+                raise ValueError(
+                    "Number of num_values_per_label must be equal to num_labels"
+                )
+            self.num_values_per_label = num_values_per_label
+
+        num_timeseries = np.prod(self.num_values_per_label)
+
+        for label_idx in range(self.num_labels):
+            values = [
+                f"value_{label_idx}_value_{value_idx}"
+                for value_idx in range(self.num_values_per_label[label_idx])
+            ]
+            self.values_per_label.append(values)
+
+        label_value_combinations = list(itertools.product(*self.values_per_label))
+        assert len(label_value_combinations) == num_timeseries
+
+        # convert from list[tuple[str]] to list[list[str]]
+        rv: List[List[str]] = [
+            list(label_value_combination)
+            for label_value_combination in label_value_combinations
+        ]
+        return rv
+
+    def get_uniform_value_gauge(self):
+        value = -1
+        while value < 0 or value > self.scale:
+            # value = numpy.random.uniform() * self.scale
+            value = self.rng.uniform(0, self.scale)
+        return value
+
+    def get_normal_value_gauge(self):
+        value = -1
+        while value < 0 or value > self.scale:
+            value = self.rng.normal(loc=self.scale / 2, scale=self.scale)
+        return value
+
+    def get_zipf_value_gauge(self):
+        value = -1
+        while value < 0 or value > self.scale:
+            # value = numpy.random.zipf(1.01)
+            value = self.rng.zipf(1.01)
+        return value
+
+    def get_dynamic_value_gauge(self):
+        value = -1
+        while value < 0 or value > self.scale:
+            if self.total_samples < self.const_1M:
+                # value = numpy.random.zipf(1.01)
+                value = self.rng.zipf(1.01)
+            elif self.total_samples < self.const_2M:
+                # value = numpy.random.uniform() * self.scale
+                value = self.rng.uniform(0, self.scale)
+            else:
+                value = self.rng.normal(loc=self.scale / 2, scale=self.scale)
+        self.total_samples = (self.total_samples + 1) % self.const_3M
+        return value
+
+    def get_uniform_value_counter(self):
+        value = -1
+        while value < 0 or value > self.scale:
+            # value = numpy.random.uniform() * self.scale
+            value = self.rng.uniform(0, self.scale)
+        self.uniform_counter += value
+        return self.uniform_counter
+
+    def get_normal_value_counter(self):
+        value = -1
+        while value < 0 or value > self.scale:
+            value = self.rng.normal(loc=self.scale / 2, scale=self.scale)
+        self.normal_counter += value
+        return self.normal_counter
+
+    def get_zipf_value_counter(self):
+        value = -1
+        while value < 0 or value > self.scale:
+            # value = numpy.random.zipf(1.01)
+            value = self.rng.zipf(1.01)
+        self.zipf_counter += value
+        return self.zipf_counter
+
+    def get_dynamic_value_counter(self):
+        value = -1
+        while value < 0 or value > self.scale:
+            if self.total_samples < self.const_1M:
+                # value = numpy.random.zipf(1.01)
+                value = self.rng.zipf(1.01)
+            elif self.total_samples < self.const_2M:
+                # value = numpy.random.uniform() * self.scale
+                value = self.rng.uniform(0, self.scale)
+            else:
+                value = self.rng.normal(loc=self.scale / 2, scale=self.scale)
+        self.total_samples = (self.total_samples + 1) % self.const_3M
+        self.dynamic_counter += value
+        return self.dynamic_counter
+
+    def collect(self):
+        if self.metric_type == "counter":
+            fake_metric = CounterMetricFamily(
+                "fake_metric",
+                "Generating fake time series data with {} dataset".format(self.dataset),
+                labels=self.labels,
+            )
+        elif self.metric_type == "gauge":
+            fake_metric = GaugeMetricFamily(
+                "fake_metric",
+                "Generating fake time series data with {} dataset".format(self.dataset),
+                labels=self.labels,
+            )
+        else:
+            fake_metric = GaugeMetricFamily(
+                "fake_metric",
+                "Generating fake time series data with {} dataset".format(self.dataset),
+                labels=self.labels,
+            )
+
+        for label_value_combination in self.label_value_combinations:
+            if self.metric_type == "counter":
+                if self.dataset == "uniform":
+                    value = self.get_uniform_value_counter()
+                elif self.dataset == "normal":
+                    value = self.get_normal_value_counter()
+                elif self.dataset == "zipf":
+                    value = self.get_zipf_value_counter()
+                elif self.dataset == "dynamic":
+                    value = self.get_dynamic_value_counter()
+                else:
+                    value = self.get_dynamic_value_counter()
+            else:  # gauge
+                if self.dataset == "uniform":
+                    value = self.get_uniform_value_gauge()
+                elif self.dataset == "normal":
+                    value = self.get_normal_value_gauge()
+                elif self.dataset == "zipf":
+                    value = self.get_zipf_value_gauge()
+                elif self.dataset == "dynamic":
+                    value = self.get_dynamic_value_gauge()
+                else:
+                    value = self.get_dynamic_value_gauge()
+
+            # labels = [f"label_value_{i}" for d in range(self.num_labels)]
+            # fake_metric.add_metric(labels, value=value)
+            fake_metric.add_metric(label_value_combination, value)
+
+        yield fake_metric
+
+
+def main(args):
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    metric_collector = CustomCollector(
+        args.valuescale,
+        args.dataset,
+        args.num_labels,
+        args.num_values_per_label,
+        args.metric_type,
+    )
+    REGISTRY.register(metric_collector)
+    start_http_server(port=args.port)
+    print("Fake exporter started on port {}".format(args.port))
+    while True:
+        time.sleep(1)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output_dir", type=str, required=True)
+    parser.add_argument("--port", type=int, required=True)
+    parser.add_argument("--valuescale", type=int, required=True)
+    # parser.add_argument("--start_instanceid", type=int, required=True)
+    # parser.add_argument("--batchsize", type=int, required=True)
+    parser.add_argument("--dataset", type=str, required=True)
+    parser.add_argument("--num_labels", type=int, required=True)
+    parser.add_argument("--num_values_per_label", type=str, required=True)
+    parser.add_argument("--metric_type", type=str, required=True)
+    args = parser.parse_args()
+
+    args.num_values_per_label = [int(i) for i in args.num_values_per_label.split(",")]
+
+    # if (
+    #     args.port is None
+    #     or args.valuescale is None
+    #     or args.start_instanceid is None
+    #     or args.batchsize is None
+    #     or args.dataset is None
+    # ):
+    #     print("Fake exporter missing argument")
+    #     sys.exit(0)
+    main(args)
diff --git a/PrometheusExporters/fake_exporter/fake_exporter_python/installation/install.sh b/PrometheusExporters/fake_exporter/fake_exporter_python/installation/install.sh
new file mode 100755
index 0000000..a876782
--- /dev/null
+++ b/PrometheusExporters/fake_exporter/fake_exporter_python/installation/install.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -e
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+PARENT_DIR=$(dirname "$THIS_DIR")
+
+echo "Building Fake Exporter Python Docker image..."
+cd "$PARENT_DIR"
+docker build . -f Dockerfile -t sketchdb-fake-exporter-python:latest
+
+echo "Fake Exporter Python Docker image built successfully: sketchdb-fake-exporter-python:latest"
\ No newline at end of file
diff --git a/PrometheusExporters/fake_exporter/fake_exporter_python/requirements.txt b/PrometheusExporters/fake_exporter/fake_exporter_python/requirements.txt
new file mode 100644
index 0000000..e24276c
--- /dev/null
+++ b/PrometheusExporters/fake_exporter/fake_exporter_python/requirements.txt
@@ -0,0 +1,2 @@
+numpy
+prometheus_client
diff --git a/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Cargo.lock b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Cargo.lock
new file mode 100644
index 0000000..1547d4d
--- /dev/null
+++ b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Cargo.lock
@@ -0,0 +1,1166 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "addr2line"
+version = "0.24.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
+dependencies = [
+ "gimli",
+]
+
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
+[[package]]
+name = "anstream"
+version = "0.6.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2"
+dependencies = [
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "atomic-waker"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "backtrace"
+version = "0.3.75"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
+dependencies = [
+ "addr2line",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "base64"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+
+[[package]]
+name = "bitflags"
+version = "2.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a65b545ab31d687cff52899d4890855fec459eb6afe0da6417b8a18da87aa29"
+
+[[package]]
+name = "bytes"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
+
+[[package]]
+name = "clap"
+version = "4.5.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fc0e74a703892159f5ae7d3aac52c8e6c392f5ae5f359c70b5881d60aaac318"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3e7f4214277f3c7aa526a59dd3fbe306a370daee1f8b7b8c987069cd8e888a8"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14cb31bb0a7d536caef2639baa7fad459e15c3144efefa6dbd1c84562c4739f6"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+
+[[package]]
+name = "core-foundation"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "fake_exporter"
+version = "0.1.0"
+dependencies = [
+ "clap",
+ "futures",
+ "http-body-util",
+ "hyper",
+ "hyper-util",
+ "lazy_static",
+ "prometheus",
+ "rand",
+ "rand_distr",
+ "tokio",
+]
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "futures"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
+
+[[package]]
+name = "futures-executor"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-io"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
+
+[[package]]
+name = "futures-macro"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
+
+[[package]]
+name = "futures-task"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
+
+[[package]]
+name = "futures-util"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "pin-utils",
+ "slab",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasi 0.14.2+wasi-0.2.4",
+]
+
+[[package]]
+name = "gimli"
+version = "0.31.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
+
+[[package]]
+name = "h2"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "http",
+ "indexmap",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "http"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
+dependencies = [
+ "bytes",
+ "fnv",
+ "itoa",
+]
+
+[[package]]
+name = "http-body"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
+dependencies = [
+ "bytes",
+ "http",
+]
+
+[[package]]
+name = "http-body-util"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http",
+ "http-body",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "httparse"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
+
+[[package]]
+name = "httpdate"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
+
+[[package]]
+name = "hyper"
+version = "1.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "h2",
+ "http",
+ "http-body",
+ "httparse",
+ "httpdate",
+ "itoa",
+ "pin-project-lite",
+ "pin-utils",
+ "smallvec",
+ "tokio",
+ "want",
+]
+
+[[package]]
+name = "hyper-util"
+version = "0.1.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e"
+dependencies = [
+ "base64",
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "futures-util",
+ "http",
+ "http-body",
+ "hyper",
+ "ipnet",
+ "libc",
+ "percent-encoding",
+ "pin-project-lite",
+ "socket2",
+ "system-configuration",
+ "tokio",
+ "tower-service",
+ "tracing",
+ "windows-registry",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "io-uring"
+version = "0.7.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
+dependencies = [
+ "bitflags",
+ "cfg-if",
+ "libc",
+]
+
+[[package]]
+name = "ipnet"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
+
+[[package]]
+name = "itoa"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.175"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
+
+[[package]]
+name = "libm"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
+
+[[package]]
+name = "lock_api"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
+dependencies = [
+ "autocfg",
+ "scopeguard",
+]
+
+[[package]]
+name = "memchr"
+version = "2.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+]
+
+[[package]]
+name = "mio"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
+dependencies = [
+ "libc",
+ "wasi 0.11.1+wasi-snapshot-preview1",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+ "libm",
+]
+
+[[package]]
+name = "object"
+version = "0.36.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
+
+[[package]]
+name = "parking_lot"
+version = "0.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "percent-encoding"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "prometheus"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a"
+dependencies = [
+ "cfg-if",
+ "fnv",
+ "lazy_static",
+ "memchr",
+ "parking_lot",
+ "protobuf",
+ "thiserror 2.0.16",
+]
+
+[[package]]
+name = "protobuf"
+version = "3.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d65a1d4ddae7d8b5de68153b48f6aa3bba8cb002b243dbdbc55a5afbc98f99f4"
+dependencies = [
+ "once_cell",
+ "protobuf-support",
+ "thiserror 1.0.69",
+]
+
+[[package]]
+name = "protobuf-support"
+version = "3.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3e36c2f31e0a47f9280fb347ef5e461ffcd2c52dd520d8e216b52f93b0b0d7d6"
+dependencies = [
+ "thiserror 1.0.69",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "rand"
+version = "0.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
+dependencies = [
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "rand_distr"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463"
+dependencies = [
+ "num-traits",
+ "rand",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "rustc-demangle"
+version = "0.1.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "slab"
+version = "0.4.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "socket2"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807"
+dependencies = [
+ "libc",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "syn"
+version = "2.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "system-configuration"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
+dependencies = [
+ "bitflags",
+ "core-foundation",
+ "system-configuration-sys",
+]
+
+[[package]]
+name = "system-configuration-sys"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl 1.0.69",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0"
+dependencies = [
+ "thiserror-impl 2.0.16",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tokio"
+version = "1.47.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038"
+dependencies = [
+ "backtrace",
+ "bytes",
+ "io-uring",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "slab",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tokio-util"
+version = "0.7.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "futures-sink",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "tower-service"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
+
+[[package]]
+name = "tracing"
+version = "0.1.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
+dependencies = [
+ "pin-project-lite",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
+dependencies = [
+ "once_cell",
+]
+
+[[package]]
+name = "try-lock"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "want"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
+dependencies = [
+ "try-lock",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasi"
+version = "0.14.2+wasi-0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
+dependencies = [
+ "wit-bindgen-rt",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
+
+[[package]]
+name = "windows-registry"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e"
+dependencies = [
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-result"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.3",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm 0.53.0",
+ "windows_aarch64_msvc 0.53.0",
+ "windows_i686_gnu 0.53.0",
+ "windows_i686_gnullvm 0.53.0",
+ "windows_i686_msvc 0.53.0",
+ "windows_x86_64_gnu 0.53.0",
+ "windows_x86_64_gnullvm 0.53.0",
+ "windows_x86_64_msvc 0.53.0",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
+
+[[package]]
+name = "wit-bindgen-rt"
+version = "0.39.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
diff --git a/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Cargo.toml b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Cargo.toml
new file mode 100644
index 0000000..ab6cec6
--- /dev/null
+++ b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "fake_exporter"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+prometheus = "0.14.0"
+tokio = { version = "1", features = ["full"] }
+hyper = { version = "1", features = ["full"] }
+hyper-util = { version = "0.1", features = ["full"] }
+http-body-util = "0.1"
+rand = "0.9.1"
+rand_distr = "0.5.1"
+futures = "0.3"
+lazy_static = "1.5"
+clap = { version = "4.0", features = ["derive"] }
diff --git a/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Dockerfile b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Dockerfile
new file mode 100644
index 0000000..968907a
--- /dev/null
+++ b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Dockerfile
@@ -0,0 +1,17 @@
+# Use the official Rust image as a parent image
+FROM rust:1.82 AS builder
+
+# Set the working directory in the container
+WORKDIR /usr/src/app
+
+# Copy the Cargo.toml and Cargo.lock files
+COPY Cargo.toml Cargo.lock ./
+
+# Copy the source code
+COPY src ./src
+
+# Build the application
+RUN cargo build --release
+
+# Set the entrypoint to the fake_exporter binary
+ENTRYPOINT ["target/release/fake_exporter"]
diff --git a/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/docker-compose.yml.j2 b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/docker-compose.yml.j2
new file mode 100644
index 0000000..a8b67b4
--- /dev/null
+++ b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/docker-compose.yml.j2
@@ -0,0 +1,18 @@
+# fake_exporter Docker Compose Template
+# This template is rendered with Jinja2 to generate the final docker-compose.yml
+
+services:
+  fake-exporter:
+    image: sketchdb-fake-exporter-rust:latest
+    container_name: {{ container_name | default('sketchdb-fake-exporter') }}
+    ports:
+      - "{{ port }}:{{ port }}"
+    command: [
+      "--port", "{{ port }}",
+      "--valuescale", "{{ valuescale }}",
+      "--dataset", "{{ dataset }}",
+      "--num-labels", "{{ num_labels }}",
+      "--num-values-per-label", "{{ num_values_per_label | string }}",
+      "--metric-type", "{{ metric_type }}"
+    ]
+    restart: unless-stopped
diff --git a/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/fake-exporter-rust-cli-compose.yml.j2 b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/fake-exporter-rust-cli-compose.yml.j2
new file mode 100644
index 0000000..a59d56d
--- /dev/null
+++ b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/fake-exporter-rust-cli-compose.yml.j2
@@ -0,0 +1,21 @@
+# fake_exporter Docker Compose Template
+# This template is rendered with Jinja2 to generate the final docker-compose.yml
+
+services:
+  fake-exporter:
+    image: sketchdb-fake-exporter-rust:latest
+    container_name: {{ container_name | default('asap-fake-exporter') }}
+    hostname: {{ container_name }}
+    networks:
+      - asap-network
+    expose:
+      - "{{ port }}"
+    command: [
+      "--port", "{{ port }}",
+      "--valuescale", "{{ valuescale }}",
+      "--dataset", "{{ dataset }}",
+      "--num-labels", "{{ num_labels }}",
+      "--num-values-per-label", "{{ num_values_per_label | string }}",
+      "--metric-type", "{{ metric_type }}"
+    ]
+    restart: no
diff --git a/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/installation/install.sh b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/installation/install.sh
new file mode 100755
index 0000000..e7fe261
--- /dev/null
+++ b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/installation/install.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -e
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+PARENT_DIR=$(dirname "$THIS_DIR")
+
+echo "Building Fake Exporter Rust Docker image..."
+cd "$PARENT_DIR"
+docker build . -f Dockerfile -t sketchdb-fake-exporter-rust:latest
+
+echo "Fake Exporter Rust Docker image built successfully: sketchdb-fake-exporter-rust:latest"
\ No newline at end of file
diff --git a/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/src/main.rs b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/src/main.rs
new file mode 100644
index 0000000..99e1bbd
--- /dev/null
+++ b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/src/main.rs
@@ -0,0 +1,662 @@
+use clap::{Parser, ValueEnum};
+use hyper::{
+    Request, Response, body::Incoming, header::CONTENT_TYPE, server::conn::http1,
+    service::service_fn,
+};
+use hyper_util::rt::TokioIo;
+use prometheus::{
+    Encoder, TextEncoder,
+    core::{Collector, Desc},
+    proto::MetricFamily,
+};
+use rand::{SeedableRng, rngs::SmallRng};
+use rand_distr::{Distribution, Normal, Uniform, Zipf};
+use std::{f64::consts::PI, net::Ipv4Addr, net::SocketAddr, sync::Mutex, time::{SystemTime, UNIX_EPOCH}};
+use tokio::net::TcpListener;
+
+/// Dataset/pattern types for metric generation
+#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
+enum Dataset {
+    // === Random distribution types ===
+    /// Uniform random distribution
+    Uniform,
+    /// Normal (Gaussian) random distribution
+    Normal,
+    /// Zipf power-law distribution
+    Zipf,
+    /// Cycles through Zipf -> Uniform -> Normal
+    Dynamic,
+
+    // === Deterministic pattern types (time-based) ===
+    /// Constant value (varies per series)
+    Constant,
+    /// Linearly increasing over time
+    LinearUp,
+    /// Linearly decreasing over time
+    LinearDown,
+    /// Sinusoidal wave
+    Sine,
+    /// Sinusoidal wave with gaussian noise
+    SineNoise,
+    /// Step function (discrete levels)
+    Step,
+    /// Baseline with random spikes
+    Spiky,
+    /// Exponential growth
+    ExpUp,
+}
+
+impl Dataset {
+    /// Returns the pattern label value (for --add-pattern-label flag)
+    fn as_label(&self) -> &'static str {
+        match self {
+            Dataset::Uniform => "uniform",
+            Dataset::Normal => "normal",
+            Dataset::Zipf => "zipf",
+            Dataset::Dynamic => "dynamic",
+            Dataset::Constant => "constant",
+            Dataset::LinearUp => "linear_up",
+            Dataset::LinearDown => "linear_down",
+            Dataset::Sine => "sine",
+            Dataset::SineNoise => "sine_noise",
+            Dataset::Step => "step",
+            Dataset::Spiky => "spiky",
+            Dataset::ExpUp => "exp_up",
+        }
+    }
+}
+
+type BoxedErr = Box<dyn std::error::Error + Send + Sync + 'static>;
+
+// === Dynamic distribution constants ===
+const CONST_1M: u64 = 1_000_000;
+const CONST_2M: u64 = 2_000_000;
+const CONST_3M: u64 = 3_000_000;
+
+const RNG_SEED: u64 = 0; // seed for rng used by all distributions
+
+const ZIPF_ALPHA: f64 = 1.01; // zipf parameter
+
+// === Pattern timing constants ===
+const SINE_PERIOD_SECS: f64 = 120.0;           // 2 minute cycle
+const STEP_PERIOD_SECS: f64 = 30.0;            // Step changes every 30s
+const LINEAR_WRAP_PERIOD_SECS: f64 = 300.0;    // Linear resets every 5min
+
+// === Pattern variation constants (per-series diversity) ===
+const SINE_PHASE_VARIATION: f64 = 0.1;         // Phase offset per series
+const SINE_AMPLITUDE_VARIATION: f64 = 0.2;     // Amplitude varies ±20%
+const LINEAR_SLOPE_VARIATION: f64 = 0.1;       // Slope varies ±10%
+const CONSTANT_NUM_LEVELS: usize = 10;         // 10 distinct constant values
+
+// === Noise/spike constants ===
+const NOISE_STDDEV_FRACTION: f64 = 0.1;        // Noise is 10% of signal
+const SPIKE_PROBABILITY: f64 = 0.05;           // 5% chance per scrape
+const SPIKE_MAGNITUDE: f64 = 5.0;              // Spike is 5x baseline
+
+// === Step function constants ===
+const STEP_NUM_LEVELS: usize = 4;              // 4 discrete levels
+
+// === Exponential constants ===
+const EXP_GROWTH_RATE: f64 = 0.01;             // Growth rate per second
+const EXP_WRAP_PERIOD_SECS: f64 = 300.0;       // Exponential resets every 5min
+
+// Normal distribution mean
+fn get_mean(valuescale: f64) -> f64 {
+    valuescale / 2.0
+}
+// Normal distribution standard deviation
+fn get_sigma(valuescale: f64) -> f64 {
+    valuescale / 8.0
+}
+
+// Converts string to vector of usize
+fn get_num_vals_per_label(num_values_per_label_str: String, num_labels: usize) -> Vec<usize> {
+    let parse = num_values_per_label_str
+        .split(',')
+        .map(str::trim) // drop any surrounding whitespace
+        .filter(|s| !s.is_empty()) // skip empty segments, if any
+        .map(str::parse::<usize>) // parse each into usize
+        .collect();
+    let num_values_per_label: Vec<usize> = match parse {
+        Ok(list) => list,
+        Err(error) => panic!("Couldn't parse num_values_per_label: {error:?}"),
+    };
+
+    let rv: Vec<usize>;
+
+    if num_values_per_label.len() == 1 {
+        rv = vec![num_values_per_label[0]; num_labels];
+    } else {
+        if num_values_per_label.len() != num_labels {
+            panic!(
+                "Number of num_values_per_label must be equal to num_labels (got {} vs {})",
+                num_values_per_label.len(),
+                num_labels
+            );
+        }
+        rv = num_values_per_label;
+    }
+
+    return rv;
+}
+
+fn compute_labels(num_labels: usize, num_values_per_label: Vec<usize>, label_value_prefixes: &Option<Vec<String>>) -> Vec<Vec<String>> {
+    // 1. Build values_per_label
+    let mut values_per_label = Vec::with_capacity(num_labels);
+    for label_idx in 0..num_labels {
+        let count = num_values_per_label[label_idx];
+        let mut bucket = Vec::with_capacity(count);
+        for value_idx in 0..count {
+            let value = match label_value_prefixes {
+                Some(prefixes) if label_idx < prefixes.len() => {
+                    format!("{}_{}", prefixes[label_idx], value_idx)
+                }
+                _ => format!("value_{}_value_{}", label_idx, value_idx),
+            };
+            bucket.push(value);
+        }
+        values_per_label.push(bucket);
+    }
+
+    // 2. Compute expected total combinations
+    let expected: usize = num_values_per_label.iter().product();
+
+    // 3. Cartesian product helper
+    fn cartesian_product(pools: &[Vec<String>]) -> Vec<Vec<String>> {
+        let mut result: Vec<Vec<String>> = vec![Vec::new()];
+        for pool in pools {
+            let mut next = Vec::new();
+            for prefix in &result {
+                for item in pool {
+                    let mut new_prefix = prefix.clone();
+                    new_prefix.push(item.clone());
+                    next.push(new_prefix);
+                }
+            }
+            result = next;
+        }
+        result
+    }
+
+    // 5. Generate combinations
+    let combos = cartesian_product(&values_per_label);
+    assert!(
+        combos.len() == expected,
+        "got {} combinations but expected {}",
+        combos.len(),
+        expected
+    );
+
+    combos
+}
+
+struct FakeCollector {
+    valuescale: f64,                            // Max magnitude of value generation
+    dataset: Dataset,                           // Dataset/pattern type
+    label_value_combinations: Vec<Vec<String>>, // list of label sets for all metrics
+    metric_type: String,                        // gauge or counter
+    metric_name: String,                        // custom metric name
+    label_names: Vec<String>,                   // custom label names
+    add_pattern_label: bool,                    // whether to add a 'pattern' label to metrics
+    rng: Mutex<SmallRng>,                       // seeded rng
+    zipf_dist: Option<Zipf<f64>>,
+    normal_dist: Option<Normal<f64>>,
+    uniform_dist: Option<Uniform<f64>>,
+    counter_state: Mutex<f64>,                  // tracking counter value
+    total_samples: Mutex<u64>,                  // for dynamic distribution only
+}
+
+impl FakeCollector {
+    fn new(
+        valuescale: f64,
+        dataset: Dataset,
+        num_labels: usize,
+        num_values_per_label: String,
+        metric_type: String,
+        metric_name: Option<String>,
+        label_names: Option<String>,
+        label_value_prefixes: Option<String>,
+        add_pattern_label: bool,
+    ) -> Self {
+        let num_values_per_label = get_num_vals_per_label(num_values_per_label, num_labels);
+        let prefixes: Option<Vec<String>> = label_value_prefixes
+            .map(|s| s.split(',').map(|p| p.trim().to_string()).collect());
+        let label_value_combinations = compute_labels(num_labels, num_values_per_label, &prefixes);
+
+        // Determine metric name
+        let metric_name = match metric_name {
+            Some(name) => name,
+            None => if metric_type == "counter" { "fake_metric_total".to_string() } else { "fake_metric".to_string() },
+        };
+
+        // Determine label names
+        let label_names: Vec<String> = match label_names {
+            Some(names) => {
+                let parsed: Vec<String> = names.split(',').map(|s| s.trim().to_string()).collect();
+                if parsed.len() != num_labels {
+                    panic!(
+                        "Number of label names ({}) must match num_labels ({})",
+                        parsed.len(), num_labels
+                    );
+                }
+                parsed
+            }
+            None => (0..num_labels).map(|i| format!("label_{}", i)).collect(),
+        };
+        let mut zipf_dist: Option<Zipf<f64>> = None;
+        let mut normal_dist: Option<Normal<f64>> = None;
+        let mut uniform_dist: Option<Uniform<f64>> = None;
+
+        // Instantiate required distributions based on dataset type
+        match dataset {
+            Dataset::Zipf => {
+                zipf_dist = Some(
+                    Zipf::new(valuescale, ZIPF_ALPHA).expect("Failed to create Zipf distribution"),
+                );
+            }
+            Dataset::Normal => {
+                let mean = get_mean(valuescale);
+                let sigma = get_sigma(valuescale);
+                normal_dist =
+                    Some(Normal::new(mean, sigma).expect("Failed to create Normal distribution"));
+            }
+            Dataset::Dynamic => {
+                let mean = get_mean(valuescale);
+                let sigma = get_sigma(valuescale);
+                normal_dist =
+                    Some(Normal::new(mean, sigma).expect("Failed to create Normal distribution"));
+                zipf_dist = Some(
+                    Zipf::new(valuescale, ZIPF_ALPHA).expect("Failed to create Zipf distribution"),
+                );
+                uniform_dist = Some(
+                    Uniform::new_inclusive(0.0, valuescale)
+                        .expect("Failed to create Uniform distribution"),
+                );
+            }
+            Dataset::Uniform => {
+                uniform_dist = Some(
+                    Uniform::new_inclusive(0.0, valuescale)
+                        .expect("Failed to create Uniform distribution"),
+                );
+            }
+            Dataset::SineNoise => {
+                // Needs normal distribution for noise
+                let noise_stddev = valuescale * NOISE_STDDEV_FRACTION;
+                normal_dist =
+                    Some(Normal::new(0.0, noise_stddev).expect("Failed to create Normal distribution"));
+            }
+            Dataset::Spiky => {
+                // Needs uniform for probability check, normal for spike magnitude variation
+                uniform_dist = Some(
+                    Uniform::new_inclusive(0.0, 1.0).expect("Failed to create Uniform distribution"),
+                );
+            }
+            // Other patterns don't need distributions
+            Dataset::Constant
+            | Dataset::LinearUp
+            | Dataset::LinearDown
+            | Dataset::Sine
+            | Dataset::Step
+            | Dataset::ExpUp => {}
+        }
+
+        Self {
+            valuescale,
+            dataset,
+            label_value_combinations,
+            metric_type,
+            metric_name,
+            label_names,
+            add_pattern_label,
+            rng: Mutex::new(SmallRng::seed_from_u64(RNG_SEED)),
+            zipf_dist,
+            normal_dist,
+            uniform_dist,
+            counter_state: Mutex::new(0.0),
+            total_samples: Mutex::new(0),
+        }
+    }
+
+    /// Get current timestamp in seconds since epoch
+    fn get_time_secs() -> f64 {
+        SystemTime::now()
+            .duration_since(UNIX_EPOCH)
+            .expect("Time went backwards")
+            .as_secs_f64()
+    }
+
+    /// Generate a sample value for the given series
+    /// For random distributions, series_id is ignored
+    /// For patterns, series_id is used to create per-series variation
+    fn get_sample(&self, series_id: usize) -> f64 {
+        match self.dataset {
+            // === Random distribution types ===
+            Dataset::Zipf => {
+                self.zipf_dist
+                    .as_ref()
+                    .expect("Zipf distribution not initialized")
+                    .sample(&mut self.rng.lock().unwrap())
+            }
+            Dataset::Normal => {
+                self.normal_dist
+                    .as_ref()
+                    .expect("Normal distribution not initialized")
+                    .sample(&mut self.rng.lock().unwrap())
+            }
+            Dataset::Uniform => {
+                self.uniform_dist
+                    .as_ref()
+                    .expect("Uniform distribution not initialized")
+                    .sample(&mut self.rng.lock().unwrap())
+            }
+            Dataset::Dynamic => {
+                let mut samples_mutex = self.total_samples.lock().unwrap();
+                let rv = if *samples_mutex < CONST_1M {
+                    self.zipf_dist
+                        .as_ref()
+                        .expect("Zipf distribution not initialized")
+                        .sample(&mut self.rng.lock().unwrap())
+                } else if *samples_mutex < CONST_2M {
+                    self.uniform_dist
+                        .as_ref()
+                        .expect("Uniform distribution not initialized")
+                        .sample(&mut self.rng.lock().unwrap())
+                } else {
+                    self.normal_dist
+                        .as_ref()
+                        .expect("Normal distribution not initialized")
+                        .sample(&mut self.rng.lock().unwrap())
+                };
+                *samples_mutex = (*samples_mutex + 1) % CONST_3M;
+                rv
+            }
+
+            // === Deterministic pattern types ===
+            Dataset::Constant => {
+                // Each series gets a different constant value based on its ID
+                let level = series_id % CONSTANT_NUM_LEVELS;
+                let base = self.valuescale / CONSTANT_NUM_LEVELS as f64;
+                base * (level as f64 + 0.5) // center of each level
+            }
+            Dataset::LinearUp => {
+                let now = Self::get_time_secs();
+                // Slope varies per series
+                let slope_multiplier = 1.0 + (series_id % 10) as f64 * LINEAR_SLOPE_VARIATION;
+                let slope = (self.valuescale / LINEAR_WRAP_PERIOD_SECS) * slope_multiplier;
+                (now * slope) % self.valuescale
+            }
+            Dataset::LinearDown => {
+                let now = Self::get_time_secs();
+                let slope_multiplier = 1.0 + (series_id % 10) as f64 * LINEAR_SLOPE_VARIATION;
+                let slope = (self.valuescale / LINEAR_WRAP_PERIOD_SECS) * slope_multiplier;
+                self.valuescale - ((now * slope) % self.valuescale)
+            }
+            Dataset::Sine => {
+                let now = Self::get_time_secs();
+                // Phase offset varies per series
+                let phase = (series_id % 100) as f64 * SINE_PHASE_VARIATION;
+                // Amplitude varies per series (±SINE_AMPLITUDE_VARIATION)
+                let amplitude_multiplier = 1.0 + ((series_id % 5) as f64 - 2.0) * SINE_AMPLITUDE_VARIATION;
+                let amplitude = (self.valuescale / 2.0) * amplitude_multiplier;
+                let offset = self.valuescale / 2.0; // center the wave
+
+                let angle = (2.0 * PI * now / SINE_PERIOD_SECS) + phase;
+                offset + amplitude * angle.sin()
+            }
+            Dataset::SineNoise => {
+                let now = Self::get_time_secs();
+                let phase = (series_id % 100) as f64 * SINE_PHASE_VARIATION;
+                let amplitude_multiplier = 1.0 + ((series_id % 5) as f64 - 2.0) * SINE_AMPLITUDE_VARIATION;
+                let amplitude = (self.valuescale / 2.0) * amplitude_multiplier;
+                let offset = self.valuescale / 2.0;
+
+                let angle = (2.0 * PI * now / SINE_PERIOD_SECS) + phase;
+                let base_value = offset + amplitude * angle.sin();
+
+                // Add gaussian noise
+                let noise = self.normal_dist
+                    .as_ref()
+                    .expect("Normal distribution not initialized for noise")
+                    .sample(&mut self.rng.lock().unwrap());
+
+                (base_value + noise).max(0.0) // clamp to non-negative
+            }
+            Dataset::Step => {
+                let now = Self::get_time_secs();
+                // Different series have different phase offsets for step timing
+                let phase_offset = (series_id % STEP_NUM_LEVELS) as f64 * (STEP_PERIOD_SECS / STEP_NUM_LEVELS as f64);
+                let adjusted_time = now + phase_offset;
+
+                // Determine which step level we're at
+                let step_index = ((adjusted_time / STEP_PERIOD_SECS) as usize) % STEP_NUM_LEVELS;
+                let level_height = self.valuescale / STEP_NUM_LEVELS as f64;
+
+                level_height * (step_index as f64 + 0.5)
+            }
+            Dataset::Spiky => {
+                // Baseline value varies per series
+                let baseline = self.valuescale * 0.2 * (1.0 + (series_id % 5) as f64 * 0.1);
+
+                // Check if we should spike
+                let should_spike = self.uniform_dist
+                    .as_ref()
+                    .expect("Uniform distribution not initialized for spike check")
+                    .sample(&mut self.rng.lock().unwrap()) < SPIKE_PROBABILITY;
+
+                if should_spike {
+                    baseline * SPIKE_MAGNITUDE
+                } else {
+                    baseline
+                }
+            }
+            Dataset::ExpUp => {
+                let now = Self::get_time_secs();
+                // Wrap time to avoid overflow
+                let wrapped_time = now % EXP_WRAP_PERIOD_SECS;
+                // Growth rate varies slightly per series
+                let rate = EXP_GROWTH_RATE * (1.0 + (series_id % 5) as f64 * 0.1);
+
+                // Exponential growth, scaled to valuescale
+                let raw = (rate * wrapped_time).exp();
+                // Normalize to valuescale range
+                let max_value = (rate * EXP_WRAP_PERIOD_SECS).exp();
+                (raw / max_value) * self.valuescale
+            }
+        }
+    }
+
+    // Generates a new random value based on the dataset, updates the counter,
+    // and returns the current counter value
+    // Note: Counter support for patterns is not fully implemented (uses series_id=0)
+    fn get_next_counter_val(&self, series_id: usize) -> f64 {
+        let random_val: f64 = self.get_sample(series_id);
+        let mut counter_mutex = self.counter_state.lock().unwrap();
+        // Update counter with val
+        *counter_mutex += random_val;
+        *counter_mutex
+    }
+
+    // Gets a metric family containing a counter family with all label_value combos
+    // Note: Pattern support for counters is limited - use gauge metric type for patterns
+    fn get_counter_family(&self) -> MetricFamily {
+        let mut counter_family = MetricFamily::default();
+        counter_family.set_name(self.metric_name.clone());
+        counter_family.set_help(format!(
+            "Generating fake time series data with {:?} dataset",
+            self.dataset
+        ));
+        counter_family.set_field_type(prometheus::proto::MetricType::COUNTER);
+
+        for (series_id, label_value_combination) in self.label_value_combinations.iter().enumerate() {
+            let mut metric = prometheus::proto::Metric::default();
+            let mut counter = prometheus::proto::Counter::default();
+            let mut labels = Vec::new();
+
+            // Add the pattern label if enabled
+            if self.add_pattern_label {
+                let mut pattern_label = prometheus::proto::LabelPair::default();
+                pattern_label.set_name("pattern".to_string());
+                pattern_label.set_value(self.dataset.as_label().to_string());
+                labels.push(pattern_label);
+            }
+
+            for i in 0..label_value_combination.len() {
+                let mut label_and_value = prometheus::proto::LabelPair::default();
+                let label_val: &String = &label_value_combination[i];
+                label_and_value.set_name(self.label_names[i].clone());
+                label_and_value.set_value(label_val.to_string());
+                labels.push(label_and_value);
+            }
+
+            metric.set_label(labels.into());
+            counter.set_value(self.get_next_counter_val(series_id));
+            metric.set_counter(counter);
+            counter_family.mut_metric().push(metric);
+        }
+        counter_family
+    }
+
+    // Gets a metric family containing a gauge family with all label_value combos
+    fn get_gauge_family(&self) -> MetricFamily {
+        let mut gauge_family = MetricFamily::default();
+        gauge_family.set_name(self.metric_name.clone());
+        gauge_family.set_help(format!(
+            "Generating fake time series data with {:?} dataset",
+            self.dataset
+        ));
+        gauge_family.set_field_type(prometheus::proto::MetricType::GAUGE);
+
+        for (series_id, label_value_combination) in self.label_value_combinations.iter().enumerate() {
+            let mut metric = prometheus::proto::Metric::default();
+            let mut gauge = prometheus::proto::Gauge::default();
+            let mut labels = Vec::new();
+
+            // Add the pattern label if enabled
+            if self.add_pattern_label {
+                let mut pattern_label = prometheus::proto::LabelPair::default();
+                pattern_label.set_name("pattern".to_string());
+                pattern_label.set_value(self.dataset.as_label().to_string());
+                labels.push(pattern_label);
+            }
+
+            // Add the regular labels
+            for i in 0..label_value_combination.len() {
+                let mut label_and_value = prometheus::proto::LabelPair::default();
+                let label_val: &String = &label_value_combination[i];
+                label_and_value.set_name(self.label_names[i].clone());
+                label_and_value.set_value(label_val.to_string());
+                labels.push(label_and_value);
+            }
+
+            metric.set_label(labels.into());
+            gauge.set_value(self.get_sample(series_id));
+            metric.set_gauge(gauge);
+            gauge_family.mut_metric().push(metric);
+        }
+        gauge_family
+    }
+}
+
+// Interface used by prometheus
+impl Collector for FakeCollector {
+    fn desc(&self) -> Vec<&Desc> {
+        // Return empty vec initially
+        Vec::new()
+    }
+
+    fn collect(&self) -> Vec<MetricFamily> {
+        let mut metric_families = Vec::new();
+
+        if self.metric_type == "counter" {
+            let counter_family = self.get_counter_family();
+            metric_families.push(counter_family);
+        } else if self.metric_type == "gauge" {
+            let gauge_family = self.get_gauge_family();
+            metric_families.push(gauge_family);
+        } else {
+            panic!("Metric type must be one of either 'counter' or 'gauge'")
+        }
+
+        metric_families
+    }
+}
+
+async fn serve_req(_req: Request<Incoming>) -> Result<Response<String>, BoxedErr> {
+    let encoder = TextEncoder::new();
+    let metric_families = prometheus::gather(); // Calls collect() method
+    let body = encoder.encode_to_string(&metric_families)?;
+    let response = Response::builder()
+        .status(200)
+        .header(CONTENT_TYPE, encoder.format_type())
+        .body(body)?;
+
+    Ok(response)
+}
+
+#[derive(Parser)]
+#[command(name = "fake_exporter")]
+#[command(about = "A Prometheus fake exporter for generating test metrics")]
+struct Args {
+    #[arg(long, help = "Port to serve metrics on")]
+    port: u16,
+
+    #[arg(long, help = "Maximum scale for generated values")]
+    valuescale: i32,
+
+    #[arg(long, value_enum, help = "Dataset/pattern type for value generation")]
+    dataset: Dataset,
+
+    #[arg(long, help = "Number of labels per metric")]
+    num_labels: usize,
+
+    #[arg(long, help = "Comma-separated list of number of values per label")]
+    num_values_per_label: String,
+
+    #[arg(long, help = "Metric type (gauge or counter)")]
+    metric_type: String,
+
+    #[arg(long, help = "Custom metric name (default: fake_metric for gauge, fake_metric_total for counter)")]
+    metric_name: Option<String>,
+
+    #[arg(long, help = "Comma-separated custom label names (must match num-labels count)")]
+    label_names: Option<String>,
+
+    #[arg(long, help = "Comma-separated prefixes for label values (e.g. 'region,svc,inst' produces region_0, svc_0, inst_0)")]
+    label_value_prefixes: Option<String>,
+
+    #[arg(long, default_value = "false", help = "Add 'pattern' label to metrics with dataset name")]
+    add_pattern_label: bool,
+}
+
+#[tokio::main]
+async fn main() -> Result<(), BoxedErr> {
+    let args = Args::parse();
+
+    let fake_collector = Box::new(FakeCollector::new(
+        args.valuescale as f64,
+        args.dataset,
+        args.num_labels,
+        args.num_values_per_label,
+        args.metric_type,
+        args.metric_name,
+        args.label_names,
+        args.label_value_prefixes,
+        args.add_pattern_label,
+    ));
+
+    // Register collector and start serving
+    let _ = prometheus::register(fake_collector);
+    let ip = Ipv4Addr::UNSPECIFIED;
+    let addr: SocketAddr = (ip, args.port).into();
+    println!("Fake exporter started on port {}", args.port);
+    let listener = TcpListener::bind(addr).await?;
+    loop {
+        let (stream, _) = listener.accept().await?;
+        let io = TokioIo::new(stream);
+
+        let service = service_fn(serve_req);
+        if let Err(err) = http1::Builder::new().serve_connection(io, service).await {
+            eprintln!("server error: {:?}", err);
+        };
+    }
+}
diff --git a/PrometheusExporters/fake_kafka_exporter/Cargo.lock b/PrometheusExporters/fake_kafka_exporter/Cargo.lock
new file mode 100644
index 0000000..4a39365
--- /dev/null
+++ b/PrometheusExporters/fake_kafka_exporter/Cargo.lock
@@ -0,0 +1,1006 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "bitflags"
+version = "2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
+
+[[package]]
+name = "bumpalo"
+version = "3.19.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510"
+
+[[package]]
+name = "bytes"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3"
+
+[[package]]
+name = "cc"
+version = "1.2.53"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "755d2fce177175ffca841e9a06afdb2c4ab0f593d53b4dee48147dfaade85932"
+dependencies = [
+ "find-msvc-tools",
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "chrono"
+version = "0.4.43"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118"
+dependencies = [
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "wasm-bindgen",
+ "windows-link",
+]
+
+[[package]]
+name = "clap"
+version = "4.5.54"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.54"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.49"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.7.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "errno"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
+dependencies = [
+ "libc",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "fake_kafka_exporter"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "clap",
+ "rand",
+ "rand_distr",
+ "rdkafka",
+ "serde",
+ "serde_json",
+ "tokio",
+]
+
+[[package]]
+name = "find-msvc-tools"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db"
+
+[[package]]
+name = "futures-channel"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
+dependencies = [
+ "futures-core",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
+
+[[package]]
+name = "futures-task"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
+
+[[package]]
+name = "futures-util"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "pin-project-lite",
+ "pin-utils",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasip2",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
+
+[[package]]
+name = "itoa"
+version = "1.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
+
+[[package]]
+name = "js-sys"
+version = "0.3.85"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.180"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
+
+[[package]]
+name = "libm"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
+
+[[package]]
+name = "libz-sys"
+version = "1.1.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15d118bbf3771060e7311cc7bb0545b01d08a8b4a7de949198dec1fa0ca1c0f7"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
+
+[[package]]
+name = "memchr"
+version = "2.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
+
+[[package]]
+name = "mio"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+ "libm",
+]
+
+[[package]]
+name = "num_enum"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c"
+dependencies = [
+ "num_enum_derive",
+ "rustversion",
+]
+
+[[package]]
+name = "num_enum_derive"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7"
+dependencies = [
+ "proc-macro-crate",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
+
+[[package]]
+name = "parking_lot"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "proc-macro-crate"
+version = "3.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983"
+dependencies = [
+ "toml_edit",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.105"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.43"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "rand"
+version = "0.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
+dependencies = [
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "rand_distr"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463"
+dependencies = [
+ "num-traits",
+ "rand",
+]
+
+[[package]]
+name = "rdkafka"
+version = "0.34.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053adfa02fab06e86c01d586cc68aa47ee0ff4489a59469081dc12cbcde578bf"
+dependencies = [
+ "futures-channel",
+ "futures-util",
+ "libc",
+ "log",
+ "rdkafka-sys",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "slab",
+ "tokio",
+]
+
+[[package]]
+name = "rdkafka-sys"
+version = "4.9.0+2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5230dca48bc354d718269f3e4353280e188b610f7af7e2fcf54b7a79d5802872"
+dependencies = [
+ "libc",
+ "libz-sys",
+ "num_enum",
+ "pkg-config",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.149"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+dependencies = [
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b"
+dependencies = [
+ "errno",
+ "libc",
+]
+
+[[package]]
+name = "slab"
+version = "0.4.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "socket2"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881"
+dependencies = [
+ "libc",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "syn"
+version = "2.0.114"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "tokio"
+version = "1.49.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86"
+dependencies = [
+ "bytes",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "toml_datetime"
+version = "0.7.5+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "toml_edit"
+version = "0.23.10+spec-1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269"
+dependencies = [
+ "indexmap",
+ "toml_datetime",
+ "toml_parser",
+ "winnow",
+]
+
+[[package]]
+name = "toml_parser"
+version = "1.0.6+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44"
+dependencies = [
+ "winnow",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasip2"
+version = "1.0.2+wasi-0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55"
+dependencies = [
+ "bumpalo",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.62.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-result"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
+
+[[package]]
+name = "winnow"
+version = "0.7.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "wit-bindgen"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
+
+[[package]]
+name = "zerocopy"
+version = "0.8.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "zmij"
+version = "1.0.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfcd145825aace48cff44a8844de64bf75feec3080e0aa5cdbde72961ae51a65"
diff --git a/PrometheusExporters/fake_kafka_exporter/Cargo.toml b/PrometheusExporters/fake_kafka_exporter/Cargo.toml
new file mode 100644
index 0000000..2be3518
--- /dev/null
+++ b/PrometheusExporters/fake_kafka_exporter/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "fake_kafka_exporter"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+clap = { version = "4.0", features = ["derive"] }
+rdkafka = "0.34"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+tokio = { version = "1", features = ["full"] }
+rand = "0.9.1"
+rand_distr = "0.5.1"
+chrono = "0.4"
diff --git a/PrometheusExporters/fake_kafka_exporter/Dockerfile b/PrometheusExporters/fake_kafka_exporter/Dockerfile
new file mode 100644
index 0000000..1e1e14d
--- /dev/null
+++ b/PrometheusExporters/fake_kafka_exporter/Dockerfile
@@ -0,0 +1,17 @@
+# Use the official Rust image as a parent image
+FROM rust:1.82 AS builder
+
+# Set the working directory in the container
+WORKDIR /usr/src/app
+
+# Copy the Cargo.toml and Cargo.lock files
+COPY Cargo.toml Cargo.lock ./
+
+# Copy the source code
+COPY src ./src
+
+# Build the application
+RUN cargo build --release
+
+# Set the entrypoint to the fake_kafka_exporter binary
+ENTRYPOINT ["target/release/fake_kafka_exporter"]
diff --git a/PrometheusExporters/fake_kafka_exporter/installation/install.sh b/PrometheusExporters/fake_kafka_exporter/installation/install.sh
new file mode 100755
index 0000000..b0667ec
--- /dev/null
+++ b/PrometheusExporters/fake_kafka_exporter/installation/install.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -e
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+PARENT_DIR=$(dirname "$THIS_DIR")
+
+echo "Building Fake Kafka Exporter Docker image..."
+cd "$PARENT_DIR"
+docker build . -f Dockerfile -t sketchdb-fake-kafka-exporter:latest
+
+echo "Fake Kafka Exporter Docker image built successfully: sketchdb-fake-kafka-exporter:latest"
diff --git a/PrometheusExporters/fake_kafka_exporter/src/main.rs b/PrometheusExporters/fake_kafka_exporter/src/main.rs
new file mode 100644
index 0000000..02117da
--- /dev/null
+++ b/PrometheusExporters/fake_kafka_exporter/src/main.rs
@@ -0,0 +1,249 @@
+use clap::Parser;
+use rand::rngs::SmallRng;
+use rand::SeedableRng;
+use rand_distr::{Distribution, Uniform};
+use rdkafka::config::ClientConfig;
+use rdkafka::producer::{FutureProducer, FutureRecord};
+use serde_json::{json, Value as JsonValue};
+use std::time::Duration;
+use tokio::time::sleep;
+
+const RNG_SEED: u64 = 0;
+
+/// Converts comma-separated string to vector of usize
+fn get_num_vals_per_column(num_values_str: &str, num_columns: usize) -> Vec<usize> {
+    let parse: Result<Vec<usize>, _> = num_values_str
+        .split(',')
+        .map(str::trim)
+        .filter(|s| !s.is_empty())
+        .map(str::parse::<usize>)
+        .collect();
+
+    let num_values: Vec<usize> = match parse {
+        Ok(list) => list,
+        Err(error) => panic!("Couldn't parse num_values_per_metadata_column: {error:?}"),
+    };
+
+    if num_values.len() == 1 {
+        vec![num_values[0]; num_columns]
+    } else {
+        if num_values.len() != num_columns {
+            panic!(
+                "Number of num_values_per_metadata_column must be equal to metadata_columns count (got {} vs {})",
+                num_values.len(),
+                num_columns
+            );
+        }
+        num_values
+    }
+}
+
+/// Computes all combinations of metadata column values
+fn compute_metadata_combinations(
+    column_names: &[String],
+    num_values_per_column: &[usize],
+) -> Vec<Vec<(String, String)>> {
+    // Build values for each column
+    let mut values_per_column: Vec<Vec<String>> = Vec::with_capacity(column_names.len());
+    for (col_idx, col_name) in column_names.iter().enumerate() {
+        let count = num_values_per_column[col_idx];
+        let mut bucket = Vec::with_capacity(count);
+        for value_idx in 0..count {
+            bucket.push(format!("{}_{}", col_name, value_idx));
+        }
+        values_per_column.push(bucket);
+    }
+
+    // Cartesian product
+    fn cartesian_product(pools: &[Vec<String>]) -> Vec<Vec<String>> {
+        let mut result: Vec<Vec<String>> = vec![Vec::new()];
+        for pool in pools {
+            let mut next = Vec::new();
+            for prefix in &result {
+                for item in pool {
+                    let mut new_prefix = prefix.clone();
+                    new_prefix.push(item.clone());
+                    next.push(new_prefix);
+                }
+            }
+            result = next;
+        }
+        result
+    }
+
+    let combos = cartesian_product(&values_per_column);
+
+    // Pair column names with values
+    combos
+        .into_iter()
+        .map(|combo| {
+            column_names
+                .iter()
+                .zip(combo.into_iter())
+                .map(|(name, val)| (name.clone(), val))
+                .collect()
+        })
+        .collect()
+}
+
+/// Builds a JSON record for a single data point
+fn build_json_record(
+    timestamp_ms: i64,
+    time_column: &str,
+    metadata: &[(String, String)],
+    value_columns: &[String],
+    rng: &mut SmallRng,
+    uniform_dist: &Uniform<f64>,
+) -> JsonValue {
+    let mut record = json!({});
+    let obj = record.as_object_mut().unwrap();
+
+    // Add timestamp
+    obj.insert(time_column.to_string(), json!(timestamp_ms));
+
+    // Add metadata columns
+    for (col_name, col_value) in metadata {
+        obj.insert(col_name.clone(), json!(col_value));
+    }
+
+    // Add value columns with random values
+    for col_name in value_columns {
+        let value = uniform_dist.sample(rng);
+        obj.insert(col_name.clone(), json!(value));
+    }
+
+    record
+}
+
+#[derive(Parser)]
+#[command(name = "fake_kafka_exporter")]
+#[command(about = "A fake data exporter that outputs SQL/tabular-style JSON records to Kafka")]
+struct Args {
+    #[arg(long, default_value = "localhost:9092", help = "Kafka broker address")]
+    kafka_broker: String,
+
+    #[arg(long, help = "Kafka topic name")]
+    kafka_topic: String,
+
+    #[arg(long, default_value = "time", help = "Name of the timestamp column")]
+    time_column: String,
+
+    #[arg(long, help = "Comma-separated metadata column names")]
+    metadata_columns: String,
+
+    #[arg(long, help = "Comma-separated counts per metadata column")]
+    num_values_per_metadata_column: String,
+
+    #[arg(long, help = "Comma-separated value column names")]
+    value_columns: String,
+
+    #[arg(
+        long,
+        default_value = "100.0",
+        help = "Max value for uniform distribution [0, value_scale]"
+    )]
+    value_scale: f64,
+
+    #[arg(long, default_value = "1", help = "Seconds between data batches")]
+    frequency: u64,
+
+    #[arg(long, default_value = "false", help = "Print records to console")]
+    debug_print: bool,
+}
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let args = Args::parse();
+
+    // Parse column names
+    let metadata_columns: Vec<String> = args
+        .metadata_columns
+        .split(',')
+        .map(|s| s.trim().to_string())
+        .filter(|s| !s.is_empty())
+        .collect();
+
+    let value_columns: Vec<String> = args
+        .value_columns
+        .split(',')
+        .map(|s| s.trim().to_string())
+        .filter(|s| !s.is_empty())
+        .collect();
+
+    if metadata_columns.is_empty() {
+        panic!("At least one metadata column is required");
+    }
+    if value_columns.is_empty() {
+        panic!("At least one value column is required");
+    }
+
+    // Parse num_values_per_metadata_column
+    let num_values_per_column =
+        get_num_vals_per_column(&args.num_values_per_metadata_column, metadata_columns.len());
+
+    // Compute all metadata combinations
+    let all_metadata_combinations =
+        compute_metadata_combinations(&metadata_columns, &num_values_per_column);
+
+    let num_combinations: usize = num_values_per_column.iter().product();
+    println!(
+        "Generated {} metadata combinations from {} columns",
+        num_combinations,
+        metadata_columns.len()
+    );
+
+    // Create Kafka producer
+    let producer: FutureProducer = ClientConfig::new()
+        .set("bootstrap.servers", &args.kafka_broker)
+        .set("message.timeout.ms", "5000")
+        .create()
+        .expect("Failed to create Kafka producer");
+
+    println!(
+        "Connected to Kafka broker: {}, topic: {}",
+        args.kafka_broker, args.kafka_topic
+    );
+
+    // Initialize RNG and distribution
+    let mut rng = SmallRng::seed_from_u64(RNG_SEED);
+    let uniform_dist = Uniform::new_inclusive(0.0, args.value_scale)
+        .expect("Failed to create Uniform distribution");
+
+    // Main data generation loop
+    loop {
+        let timestamp_ms = chrono::Utc::now().timestamp_millis();
+
+        for metadata_combo in &all_metadata_combinations {
+            let record = build_json_record(
+                timestamp_ms,
+                &args.time_column,
+                metadata_combo,
+                &value_columns,
+                &mut rng,
+                &uniform_dist,
+            );
+
+            let record_str = serde_json::to_string(&record)?;
+
+            if args.debug_print {
+                println!("{}", record_str);
+            }
+
+            // Send to Kafka
+            let delivery_status = producer
+                .send(
+                    FutureRecord::to(&args.kafka_topic)
+                        .payload(&record_str)
+                        .key(""),
+                    Duration::from_secs(0),
+                )
+                .await;
+
+            if let Err((err, _)) = delivery_status {
+                eprintln!("Failed to send message to Kafka: {}", err);
+            }
+        }
+
+        sleep(Duration::from_secs(args.frequency)).await;
+    }
+}
diff --git a/PrometheusExporters/installation/install.sh b/PrometheusExporters/installation/install.sh
new file mode 100755
index 0000000..a4efbc3
--- /dev/null
+++ b/PrometheusExporters/installation/install.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+set -e
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+PROMETHEUS_EXPORTERS_DIR=$(dirname "$THIS_DIR")
+
+echo "Building PrometheusExporters Docker images..."
+
+# Build cluster data exporter
+echo "Building Cluster Data Exporter..."
+(
+    cd "$PROMETHEUS_EXPORTERS_DIR/cluster_data_exporter/installation"
+    ./install.sh
+)
+
+# Build fake exporter python
+echo "Building Fake Exporter Python..."
+(
+    cd "$PROMETHEUS_EXPORTERS_DIR/fake_exporter/fake_exporter_python/installation"
+    ./install.sh
+)
+
+# Build fake exporter rust
+echo "Building Fake Exporter Rust..."
+(
+    cd "$PROMETHEUS_EXPORTERS_DIR/fake_exporter/fake_exporter_rust/fake_exporter/installation"
+    ./install.sh
+)
+
+# Build fake kafka exporter
+echo "Building Fake Kafka Exporter..."
+(
+    cd "$PROMETHEUS_EXPORTERS_DIR/fake_kafka_exporter/installation"
+    ./install.sh
+)
+
+echo "All PrometheusExporters Docker images built successfully!"
diff --git a/PrometheusExporters/query_cost_exporter/QueryCostExporter.py b/PrometheusExporters/query_cost_exporter/QueryCostExporter.py
new file mode 100644
index 0000000..8448643
--- /dev/null
+++ b/PrometheusExporters/query_cost_exporter/QueryCostExporter.py
@@ -0,0 +1,324 @@
+from typing import Dict, List, Tuple
+
+import query_cost as query_cost
+from loguru import logger
+from process.ProcessMonitorHook import ProcessMetricSnapshot, ProcessMonitorHook
+from prometheus_client import Gauge, start_http_server
+from query_cost import CostModel, CostModelOption
+
+
+class QueryCostExporterHook(ProcessMonitorHook):
+    """
+    Wrapper class for the QueryCostExporter
+    """
+
+    def __init__(
+        self,
+        monitor_to_models_map: Dict[str, List[CostModelOption]],
+        addr: str,
+        port: int,
+    ):
+        self.port = port
+        self.addr = addr
+        self.monitor_to_models_map = monitor_to_models_map
+        self.exporter = None
+
+    def init(self):
+        """
+        Instantiates the cost exporter and launches it for exporting
+        """
+        self.exporter = QueryCostExporter(
+            self.monitor_to_models_map, self.addr, self.port
+        )
+        self.exporter.launch()
+
+    def update(self, value):
+        """
+        Updates exporter metrics using the given value
+        """
+        if self.exporter is not None:
+            self.exporter.export_recent_measurement(value)
+        else:
+            raise RuntimeError(
+                "Exporter is None, remember to call init() before using this hook"
+            )
+
+    def close(self):
+        """
+        Shuts down the cost exporter
+        """
+        if self.exporter is not None:
+            self.exporter.shutdown()
+        else:
+            raise RuntimeError(
+                "Error closing hook, exporter is None. Did you remember to call init()?"
+            )
+
+
+class QueryCostExporter:
+    @staticmethod
+    def _IP_valid(addr):
+        """
+        Verifies that a given ip address is of the correct type and is a "valid"
+        IP address for running the exporter. At the moment, this function considers
+        any properly formatted IP address as valid.
+        """
+        if not isinstance(addr, str):
+            raise TypeError("IP address must be a string")
+
+        if addr == "localhost":
+            return
+
+        addr_nums = addr.split(sep=".")
+        if len(addr_nums) != 4:
+            raise ValueError("Improperly formatted IPv4 address")
+        for num_str in addr_nums:
+            if int(num_str) < 0 or int(num_str) > 255:
+                raise ValueError("Improperly formatted IPv4 address")
+        return
+
+    @staticmethod
+    def _port_valid(port):
+        """
+        Verifies that a given port is of the correct type and is a "valid"
+        port to be used by the exporter. At the moment, this function considers
+        any properly formatted port as valid
+        """
+        if not isinstance(port, int):
+            raise TypeError("Port must be an integer")
+        if port < 0 or port > 65535:
+            raise ValueError("Improperly formatted port")
+
+        return
+
+    @staticmethod
+    def _monitor_to_models_map_valid(monitor_to_models_map):
+        """
+        Verifies that the monitor_to_models_map given during object creation
+        is valid, e.g. is a dictionary with valid keys and values
+        """
+        # Check map itself (Correct type, non-empty)
+        if monitor_to_models_map is None:
+            raise TypeError("Monitor to cost models map is None.")
+        elif not isinstance(monitor_to_models_map, dict):
+            raise TypeError("Monitor to cost models map must be a dictionary.")
+        elif len(monitor_to_models_map) == 0:
+            raise ValueError("Monitor to cost models map must not be empty.")
+
+        # Check key-value pairs (Correct types, each monitor has at least one cost model)
+        for monitor in monitor_to_models_map:
+            if not isinstance(monitor, str):
+                raise TypeError("Monitor names in the map must be given as strings.")
+
+            cost_models = monitor_to_models_map[monitor]
+
+            if cost_models is None:
+                raise TypeError(f"Cost model list for {monitor} is None.")
+            elif not isinstance(cost_models, list):
+                raise TypeError(
+                    f"Cost models for {monitor} must be given as a list of CostModelOption."
+                )
+            elif len(cost_models) == 0:
+                raise ValueError(f"Cost model list for {monitor} is empty")
+
+            for model in cost_models:
+                if not isinstance(model, type(CostModelOption.NO_TRANSFORM)):
+                    raise TypeError(
+                        f"List of cost models for {monitor} contains one or more element that is not a CostModelOption"
+                    )
+
+    # NOTE: Implementation only uses prometheus Gauges
+    @staticmethod
+    def _create_prom_metric(
+        monitor_metric_name: str, cost_model: CostModelOption, metric_labels: List[str]
+    ) -> Gauge:
+        """
+        Creates a single prometheus metric for a single monitor (e.g. cpu_percent) and
+        one of the cost functions applied to it. The name of the metric as seen by prometheus
+        will be "<monitor_metric_name>_<cost model enumeration name>", e.g.
+        "cpu_percent_NO_TRANSFORM"
+        """
+        prom_metric_name = "{}_{}".format(monitor_metric_name, cost_model.name)
+        prom_description = "{}({})".format(cost_model.name, monitor_metric_name)
+
+        return Gauge(prom_metric_name, prom_description, metric_labels)
+
+    # NOTE Only uses prometheus gauges for metrics at the moment
+    @staticmethod
+    def _init_prom_metrics(
+        monitor_to_models_map,
+    ) -> Dict[str, List[Tuple[CostModel, Gauge]]]:
+        """
+        Creates a dictionary which maps the name of a monitor to a list of tuples,
+        where each tuple contains a cost model object as the first element
+        and the corresponding prometheus metric as the second element,
+        e.g. Dict = {"cpu_percent": [(cost_model, Gauge), ...]}
+        """
+        prometheus_metrics = {}
+
+        for monitor_metric in monitor_to_models_map:
+            models_and_prom_metrics = []
+            for cost_model_option in monitor_to_models_map[monitor_metric]:
+                cost_model = query_cost.create_model(cost_model_option)
+                prom_metric = QueryCostExporter._create_prom_metric(
+                    monitor_metric, cost_model_option, ["keyword", "PID"]
+                )
+                model_and_prom_metric = (cost_model, prom_metric)
+                models_and_prom_metrics.append(model_and_prom_metric)
+
+            prometheus_metrics[monitor_metric] = models_and_prom_metrics
+
+        return prometheus_metrics
+
+    def __init__(
+        self,
+        monitor_to_models_map: Dict[str, List[CostModelOption]],
+        addr: str,
+        port: int,
+    ):
+        self.logger = logger.bind(module="query_cost_exporter")
+
+        self.port = port
+        self.addr = addr
+        self.monitor_to_models_map = monitor_to_models_map
+
+        self.http_server = None
+        self.server_thread = None
+
+        # Verify input parameters
+        try:
+            QueryCostExporter._IP_valid(self.addr)
+            QueryCostExporter._port_valid(self.port)
+            QueryCostExporter._monitor_to_models_map_valid(self.monitor_to_models_map)
+        except (TypeError, ValueError) as e:
+            self.logger.error(f"Failed to create QueryCostExporter: {str(e)}")
+            raise e
+
+        self.prometheus_metrics_map = QueryCostExporter._init_prom_metrics(
+            self.monitor_to_models_map
+        )
+        self.logger.info("QueryCostExporter object created")
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        self.shutdown()
+
+    def launch(self):
+        """
+        Launches the exporter's http_server and server thread for exporting metrics
+        to be scraped by Prometheus
+        """
+        if self.addr is None:
+            self.logger.error("Launch failed: Exporter IP address is None")
+            raise RuntimeError("Cost exporter failed to launch: exporter IP is None")
+
+        if self.port is None:
+            self.logger.error("Launch failed: Exporter port is None")
+            raise RuntimeError("Cost exporter failed to launch: exporter port is None")
+
+        self.logger.info(f"Launching cost exporter at {self.addr}:{self.port}...")
+
+        try:
+            self.http_server, self.server_thread = start_http_server(
+                addr=self.addr, port=self.port
+            )
+        except Exception as e:
+            self.logger.error(f"Failed to start http server due to exception: {str(e)}")
+            raise e
+
+        self.logger.info(f"Exporter successfully started at {self.addr}:{self.port}")
+        print(f"Exporter running at {self.addr}:{self.port}")
+
+        return
+
+    def shutdown(self):
+        """
+        Cleans up all resources associated with the exporter, mainly the
+        http_server and corresponding server thread
+        """
+        print("Shutting down cost exporter server and joining server thread...")
+
+        self.logger.info("Shutting down server...")
+        if self.http_server is not None:
+            try:
+                self.http_server.shutdown()
+            except Exception as e:
+                self.logger.error(f"Error shutting down http_server: {str(e)}")
+                raise e
+            self.logger.info("Shut down server successfully")
+        else:
+            self.logger.error("Exporter http_server is None")
+            raise RuntimeError("Cost exporter http_server is None")
+
+        self.logger.info("Joining server thread...")
+        if self.server_thread is not None:
+            try:
+                self.server_thread.join()
+            except Exception as e:
+                self.logger.error(f"Error joining server thread: {str(e)}")
+                raise e
+            self.logger.info("Joined server thread successfully")
+        else:
+            self.logger.error("Exporter server thread is None")
+            raise RuntimeError("Cost exporter server thread is None")
+
+        print("Exporter shut down successfully")
+        return
+
+    # NOTE: This function is blocking. Exporting the new information requires
+    #       the calling thread to perform all cost modelling calculations,
+    #       so be wary when using cost models which take substantial time to
+    #       compute
+    def export_recent_measurement(self, iteration_info: List[ProcessMetricSnapshot]):
+        """
+        Takes a list of snapshots for every process and monitor from the most
+        recent iteration in process_monitor
+        """
+        if iteration_info is None:
+            raise TypeError("Failed to export iteration, iteration_info is None")
+        elif not isinstance(iteration_info, list):
+            raise TypeError("iteration_info must be a list of ProcessMetricSnapshots")
+
+        for snapshot in iteration_info:
+            self.export_snapshot(snapshot)
+
+    # NOTE: Function logic currently assumes all prometheus metrics are Gauges
+    # NOTE: This function is blocking. Since this function makes the necessary
+    #       calls to compute costs, beware of cost models which take a while to
+    #       compute
+    def export_snapshot(self, snapshot: ProcessMetricSnapshot):
+        """
+        Updates all prometheus metrics corresponding to the given monitor. The
+        function applies the corresponding cost function to the given value
+        before exporting
+        """
+        if snapshot is None:
+            self.logger.error("Exporter given None snapshot")
+            raise TypeError("Attempt to export a None snapshot")
+        elif not isinstance(snapshot, ProcessMetricSnapshot):
+            self.logger.error("Wrong argument")
+            raise TypeError(
+                "export_snapshot() argument must be a ProcessMetricSnapshot"
+            )
+
+        pid = snapshot.pid
+        keyword = snapshot.keyword
+        monitor_name = snapshot.monitor_name
+        measurement = snapshot.value
+        self.logger.trace(
+            f"Updating for pid={pid}, keyword={keyword}, monitor_name={monitor_name}, measurement={measurement}"
+        )
+
+        if monitor_name in self.prometheus_metrics_map:
+            metric_list = self.prometheus_metrics_map[monitor_name]
+            for cost_model, prometheus_metric in metric_list:
+                # NOTE: For a computation like a sum, the cost is being computed
+                #       using every measurement, i.e. across all PIDs and keywords,
+                #       so PID and keyword labels are meaningless in these cases.
+                cost = cost_model.compute(measurement)
+                if cost is not None and prometheus_metric is not None:
+                    prometheus_metric.labels(keyword=keyword, PID=pid).set(cost)
+
+        return
diff --git a/PrometheusExporters/query_cost_exporter/process/ProcessMonitorHook.py b/PrometheusExporters/query_cost_exporter/process/ProcessMonitorHook.py
new file mode 100644
index 0000000..167e784
--- /dev/null
+++ b/PrometheusExporters/query_cost_exporter/process/ProcessMonitorHook.py
@@ -0,0 +1,39 @@
+from abc import ABC, abstractmethod
+from typing import Any, Optional
+
+
+class ProcessMonitorHook(ABC):
+    """
+    Abstract parent class for any hooks in process_monitor
+    """
+
+    @abstractmethod
+    def init(self):
+        pass
+
+    @abstractmethod
+    def update(self, value: Any):
+        pass
+
+    @abstractmethod
+    def close(self):
+        pass
+
+
+class ProcessMetricSnapshot:
+    """
+    Class for providing hooks with a consistent format for a single measurement
+    for a single process
+    """
+
+    def __init__(
+        self,
+        pid: int,
+        value: Any,
+        keyword: Optional[str] = None,
+        monitor_name: Optional[str] = None,
+    ):
+        self.pid = pid
+        self.keyword = keyword
+        self.monitor_name = monitor_name
+        self.value = value
diff --git a/PrometheusExporters/query_cost_exporter/process/__init__.py b/PrometheusExporters/query_cost_exporter/process/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/PrometheusExporters/query_cost_exporter/query_cost.py b/PrometheusExporters/query_cost_exporter/query_cost.py
new file mode 100644
index 0000000..627b993
--- /dev/null
+++ b/PrometheusExporters/query_cost_exporter/query_cost.py
@@ -0,0 +1,136 @@
+"""
+Rather than instantiating any of these cost models directly, it is preferred
+for the user to use create_model(CostModelOption, *args) to initialize the cost model.
+
+When implementing a new model, the abstract CostModel() class should be used
+as a parent class. Once a new model is implemented, it should be added to the
+CostModelOption enum and the create_model function.
+"""
+
+from abc import ABC, abstractmethod
+from enum import Enum, auto
+from typing import Any
+
+
+# flake8: noqa
+# Enum for available cost models
+class CostModelOption(Enum):
+    """
+    Enumeration of implemented cost models.
+    """
+
+    NO_TRANSFORM = auto()
+    SUM = auto()
+    ARITHMETIC_AVG = auto()
+
+
+class CostModel(ABC):
+    """
+    Abstract class representing any cost model. All implemented cost models
+    must be a child class of this abstract class.
+    """
+
+    @abstractmethod
+    def __init__(self):
+        """
+        Any initial setup for models which require it. Usually, these are
+        models which maintain some sort of state
+        """
+        pass
+
+    @abstractmethod
+    def compute(self, x: Any) -> Any:
+        """
+        Absract method for updating a cost model (if it has memory). It must
+        return the output of the model after updating
+        """
+        pass
+
+
+class NoTransform(CostModel):
+    """
+    CostModel which applies no transformation when computing, i.e. calls to
+    compute simply return the input argument
+    """
+
+    def __init__(self):
+        pass
+
+    def compute(self, x: Any):
+        return x
+
+    @property
+    def name(self):
+        return "NO_TRANSFORM"
+
+
+# NOTE: Assumes scalar inputs (e.g. int and float)
+class Sum(CostModel):
+    """
+    Model to represent the running sum of all samples
+    """
+
+    def __init__(self):
+        self.sum = 0
+
+    def compute(self, x: Any) -> Any:
+        """
+        Returns the sum of x and all previous values
+        """
+        if x is None:
+            raise TypeError("Input argument cannot be None")
+        self.sum += x
+        return self.sum
+
+    @property
+    def name(self):
+        return "SUM"
+
+
+# NOTE: Assumes scalar inputs (e.g. int and float)
+class ArithmeticAverage(CostModel):
+    """
+    Model to represent a running average across all samples
+    """
+
+    def __init__(self):
+        self.average = 0
+        self.n = 0
+
+    def compute(self, x: Any) -> Any:
+        """
+        Computes and returns the new average after including x
+
+        Updates the internal average
+        """
+        if x is None:
+            raise TypeError("Input argument cannot be None")
+
+        self.n += 1
+        self.average = self.average * (self.n - 1) / self.n + x / self.n
+        return self.average
+
+    @property
+    def name(self):
+        return "ARITHMETIC_AVG"
+
+
+def create_model(cost_model_option: CostModelOption, *args):
+    """
+    Given a CostModelOption, initialize and return the corresponding cost model.
+        *args is to provide a CostModel with additional creation arguments if
+        the particular model takes additional parameters during creation
+    """
+    if cost_model_option is None:
+        raise TypeError("cost_model_option cannot be None")
+    elif not isinstance(cost_model_option, type(CostModelOption.NO_TRANSFORM)):
+        raise TypeError("First argument, cost_model_option, must be a CostModelOption")
+
+    if cost_model_option == CostModelOption.NO_TRANSFORM:
+        return NoTransform()
+    elif cost_model_option == CostModelOption.SUM:
+        return Sum()
+    elif cost_model_option == CostModelOption.ARITHMETIC_AVG:
+        return ArithmeticAverage()
+    else:
+        raise ValueError("Given cost model option not implemented.")
diff --git a/PrometheusExporters/query_cost_exporter/requirements.txt b/PrometheusExporters/query_cost_exporter/requirements.txt
new file mode 100644
index 0000000..022a5ad
--- /dev/null
+++ b/PrometheusExporters/query_cost_exporter/requirements.txt
@@ -0,0 +1,2 @@
+loguru==0.7.3
+prometheus_client==0.22.1
diff --git a/PrometheusExporters/query_latency_exporter/QueryLatencyExporter.py b/PrometheusExporters/query_latency_exporter/QueryLatencyExporter.py
new file mode 100644
index 0000000..766e42a
--- /dev/null
+++ b/PrometheusExporters/query_latency_exporter/QueryLatencyExporter.py
@@ -0,0 +1,175 @@
+from loguru import logger
+from prometheus_client import Gauge, start_http_server
+
+
+class QueryLatencyExporter:
+
+    @staticmethod
+    def _IP_valid(addr):
+        """
+        Verifies that a given ip address is of the correct type and is a "valid"
+        IP address for running the exporter. At the moment, this function considers
+        any properly formatted IP address as valid
+        """
+        if addr is None:
+            raise TypeError("IP address cannot be None")
+        elif not isinstance(addr, str):
+            raise TypeError("IP address must be a string")
+        elif addr == "localhost":
+            return
+
+        addr_nums = addr.split(sep=".")
+        if len(addr_nums) != 4:
+            raise ValueError("Improperly formatted IPv4 address")
+        for num_str in addr_nums:
+            if int(num_str) < 0 or int(num_str) > 255:
+                raise ValueError("Improperly formatted IPv4 address")
+        return
+
+    @staticmethod
+    def _port_valid(port):
+        """
+        Verifies that a given ip address is of the correct type and is a "valid"
+        IP address for running the exporter. At the moment, this function considers
+        any properly formatted IP address as valid
+        """
+        if port is None:
+            raise TypeError("Port cannot be None")
+        elif not isinstance(port, int):
+            raise TypeError("Port must be an integer")
+        elif port < 0 or port > 65535:
+            raise ValueError("Improperly formatted port")
+
+        return
+
+    def __init__(self, addr: str, port: int):
+        self.logger = logger.bind(module="query_latency_exporter")
+        self.port = port
+        self.addr = addr
+
+        self.http_server = None
+        self.server_thread = None
+
+        try:
+            QueryLatencyExporter._IP_valid(self.addr)
+            QueryLatencyExporter._port_valid(self.port)
+        except (TypeError, ValueError) as e:
+            self.logger.error(f"Failed to create QueryLatencyExporter: {str(e)}")
+            raise e
+
+        self.latencies_metric = Gauge(
+            "query_latencies", "Query latencies", labelnames=["query_index", "server"]
+        )
+        self.cumulative_latencies_metric = Gauge(
+            "cumulative_query_latencies",
+            "Query cumulative latencies",
+            labelnames=["query_index", "server"],
+        )
+        self.logger.info("QueryLatencyExporter object created")
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        self.shutdown()
+
+    def launch(self):
+        """
+        Launches the exporter's http_server and server thread for exporting metrics
+        to be scraped by Prometheus
+        """
+        if self.addr is None:
+            self.logger.error("Launch failed: Exporter IP address is None")
+            raise RuntimeError("Latency exporter failed to launch: exporter IP is None")
+        elif self.port is None:
+            self.logger.error("Launch failed: Exporter port is None")
+            raise RuntimeError(
+                "Latency exporter failed to launch: exporter port is None"
+            )
+
+        self.logger.info(f"Launching latency exporter at {self.addr}: {self.port}")
+
+        try:
+            self.http_server, self.server_thread = start_http_server(
+                addr=self.addr, port=self.port
+            )
+        except Exception as e:
+            self.logger.error(f"Failed to start http server due to exception: {str(e)}")
+            e.add_note("Latency exporter failed to launch")
+            raise e
+
+        self.logger.info(f"Exporter successfully started at {self.addr}: {self.port}")
+        print(f"Exporter running at {self.addr}: {self.port}")
+
+        return
+
+    def shutdown(self):
+        """
+        Cleans up all resources associated with the exporter, mainly the
+        http_server and corresponding server thread
+        """
+        print("Shutting down latency exporter server and joining server thread...")
+
+        self.logger.info("Shutting down server...")
+        if self.http_server is not None:
+            try:
+                self.http_server.shutdown()
+            except Exception as e:
+                self.logger.error(f"Error shutting down http_server: {str(e)}")
+                e.add_note("Attempt to shutdown exporter http_server failed.")
+                raise e
+            self.logger.info("Shut down server successfully")
+        else:
+            self.logger.error("Exporter http_server is None")
+            raise RuntimeError("Exporter http_server is None")
+
+        self.logger.info("Joining server thread...")
+        if self.server_thread is not None:
+            try:
+                self.server_thread.join()
+            except Exception as e:
+                self.logger.error(f"Error joining server thread: {str(e)}")
+                e.add_note("Attempt to join exporter's server thread failed.")
+                raise e
+            self.logger.info("Joined server thread successfully")
+        else:
+            self.logger.error("Exporter server thread is None")
+            raise RuntimeError("Exporter server thread is None")
+
+        print("Exporter shut down successfully")
+        return
+
+    def export_repetition(self, repetition_idx: int, result):
+        """
+        Exports a single repetition result for all queries
+        """
+        if not isinstance(repetition_idx, int):
+            self.logger.error("Given non-integer repetition_idx")
+            raise TypeError("Repetition index must be an integer")
+
+        self.logger.trace(f"Updating metrics for repetition no.{repetition_idx}")
+
+        if result is None:
+            self.logger.error("Repetition result is None")
+            raise TypeError("Repetition result is None")
+
+        for server_name in result:
+            for query_idx in result[server_name]:
+                query_result_across_time = result[server_name][query_idx]
+                query_rep_result = query_result_across_time.query_results[
+                    repetition_idx
+                ]
+                latency = query_rep_result.latency
+                cumulative_latency = query_rep_result.cumulative_latency
+
+                if latency is not None:
+                    self.latencies_metric.labels(
+                        query_index=str(query_idx), server=server_name
+                    ).set(latency)
+
+                if cumulative_latency is not None:
+                    self.cumulative_latencies_metric.labels(
+                        query_index=str(query_idx), server=server_name
+                    ).set(cumulative_latency)
+
+        return
diff --git a/PrometheusExporters/query_latency_exporter/requirements.txt b/PrometheusExporters/query_latency_exporter/requirements.txt
new file mode 100644
index 0000000..022a5ad
--- /dev/null
+++ b/PrometheusExporters/query_latency_exporter/requirements.txt
@@ -0,0 +1,2 @@
+loguru==0.7.3
+prometheus_client==0.22.1
diff --git a/QueryEngineRust/.cargo/config.toml b/QueryEngineRust/.cargo/config.toml
new file mode 100644
index 0000000..c91c3f3
--- /dev/null
+++ b/QueryEngineRust/.cargo/config.toml
@@ -0,0 +1,2 @@
+[net]
+git-fetch-with-cli = true
diff --git a/QueryEngineRust/.gitignore b/QueryEngineRust/.gitignore
new file mode 100644
index 0000000..eb5a316
--- /dev/null
+++ b/QueryEngineRust/.gitignore
@@ -0,0 +1 @@
+target
diff --git a/QueryEngineRust/Cargo.toml b/QueryEngineRust/Cargo.toml
new file mode 100644
index 0000000..567bcc0
--- /dev/null
+++ b/QueryEngineRust/Cargo.toml
@@ -0,0 +1,61 @@
+[package]
+name = "query_engine_rust"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+sketch-core = { path = "../sketch-core" }
+form_urlencoded = "1.2"
+promql_utilities = { path = "../CommonDependencies/dependencies/rs/promql_utilities" }
+sql_utilities = { path = "../CommonDependencies/dependencies/rs/sql_utilities" }
+sketch_db_common = { path = "../CommonDependencies/dependencies/rs/sketch_db_common" }
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+serde_yaml = "0.9"
+rmp-serde = "1.1"
+tokio = { version = "1.0", features = ["full"] }
+axum = "0.7"
+rdkafka = "0.34"
+rusqlite = { version = "0.31", features = ["bundled"] }
+clap = { version = "4.0", features = ["derive"] }
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+tracing-appender = "0.2"
+thiserror = "1.0"
+anyhow = "1.0"
+bincode = "1.3"
+dashmap = "5.5"
+uuid = { version = "1.0", features = ["v4"] }
+chrono = { version = "0.4", features = ["serde"] }
+structopt = "0.3"
+urlencoding = "2.1"
+flate2 = "1.0"
+async-trait = "0.1"
+promql-parser = "0.5.0"
+reqwest = { version = "0.11", features = ["json"] }
+xxhash-rust = { version = "0.8", features = ["xxh32", "xxh64"] }
+dsrs = { git = "https://github.com/ProjectASAP/datasketches-rs" }
+base64 = "0.21"
+hex = "0.4"
+sqlparser = "0.59.0"
+datafusion = "43"
+arrow = "53.4.1"
+datafusion_summary_library = { path = "../CommonDependencies/dependencies/rs/datafusion_summary_library" }
+futures = "0.3"
+prost = "0.13"
+snap = "1"
+regex = "1"
+prometheus = "0.13"
+lazy_static = "1.4"
+zstd = "0.13"
+
+[dev-dependencies]
+tempfile = "3.20.0"
+
+[features]
+#default = ["lock_profiling", "extra_debugging"]
+default = []
+# Enable lock profiling instrumentation
+lock_profiling = []
+# Enable extra debugging output
+extra_debugging = []
diff --git a/QueryEngineRust/Dockerfile b/QueryEngineRust/Dockerfile
new file mode 100644
index 0000000..c43d757
--- /dev/null
+++ b/QueryEngineRust/Dockerfile
@@ -0,0 +1,68 @@
+# QueryEngine Rust/Dockerfile
+# Multi-stage build for Rust application
+
+FROM rust:1.89 AS builder
+
+LABEL maintainer="SketchDB Team"
+LABEL description="QueryEngine Rust service for SketchDB"
+
+WORKDIR /code
+
+# Copy the CommonDependencies directory
+COPY CommonDependencies ./CommonDependencies
+
+# Copy path dependencies of QueryEngineRust
+COPY sketch-core ./sketch-core
+
+COPY Cargo.toml ./
+COPY Cargo.lock ./
+COPY QueryEngineRust/Cargo.toml ./QueryEngineRust/
+# COPY QueryEngineRust/.cargo ./QueryEngineRust/.cargo
+
+# Create a dummy main.rs to build dependencies
+RUN mkdir -p QueryEngineRust/src && echo "fn main() {}" > QueryEngineRust/src/main.rs
+
+# Build dependencies (this layer will be cached)
+# Uses BuildKit secret mount to pass git credentials without baking into a layer
+WORKDIR /code/QueryEngineRust
+RUN cargo build --release && rm -rf src/
+# RUN --mount=type=secret,id=git_token \
+#     if [ -f /run/secrets/git_token ]; then \
+#       git config --global url."https://x-access-token:$(cat /run/secrets/git_token)@github.com/".insteadOf "https://github.com/"; \
+#     fi && \
+#     cargo build --release && rm -rf src/
+
+# Copy source code
+COPY QueryEngineRust/src ./src
+
+# Build the actual application
+RUN touch src/main.rs && cargo build --release
+# RUN --mount=type=secret,id=git_token \
+#     if [ -f /run/secrets/git_token ]; then \
+#       git config --global url."https://x-access-token:$(cat /run/secrets/git_token)@github.com/".insteadOf "https://github.com/"; \
+#     fi && \
+#     touch src/main.rs && cargo build --release
+
+# Runtime stage with Ubuntu 24.04 (has newer glibc/libstdc++)
+FROM ubuntu:24.04
+
+WORKDIR /app
+
+# Install minimal runtime dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates \
+    libssl3 \
+    zlib1g \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy the built binary
+COPY --from=builder /code/target/release/query_engine_rust /usr/local/bin/query_engine_rust
+
+# Expose the HTTP server port
+EXPOSE 8088
+
+# Note: Running as root to match Python QueryEngine behavior
+# This allows writing to mounted volumes without permission issues
+
+# Use ENTRYPOINT to allow passing command line arguments
+ENTRYPOINT ["query_engine_rust"]
diff --git a/QueryEngineRust/LICENSE b/QueryEngineRust/LICENSE
new file mode 100644
index 0000000..404d657
--- /dev/null
+++ b/QueryEngineRust/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 SketchDB
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/QueryEngineRust/docker-compose.yml.j2 b/QueryEngineRust/docker-compose.yml.j2
new file mode 100644
index 0000000..43fd7e1
--- /dev/null
+++ b/QueryEngineRust/docker-compose.yml.j2
@@ -0,0 +1,43 @@
+# QueryEngine Rust Docker Compose Template
+# This template is rendered with Jinja2 to generate the final docker-compose.yml
+
+version: '3.8'
+
+services:
+  queryengine-rust:
+    image: sketchdb-queryengine-rust:latest
+    container_name: {{ container_name }}
+    environment:
+      - RUST_LOG={{ log_level }}
+      - RUST_BACKTRACE=1
+    ports:
+      - "{{ http_port }}:8088"
+    network_mode: "host"
+    volumes:
+      # Mount output directory for experiment results
+      - "{{ experiment_output_dir }}:/app/outputs"
+      # Mount controller output directory for configuration files (read-only)
+      - "{{ controller_remote_output_dir }}:/app/controller_output:ro"
+    command: [
+      "--kafka-topic", "{{ kafka_topic }}",
+      "--kafka-broker", "{{ kafka_host }}:9092",
+      "--input-format", "{{ input_format }}",
+      "--config", "/app/controller_output/inference_config.yaml",
+      "--streaming-config", "/app/controller_output/streaming_config.yaml",
+      "--prometheus-server", "http://{{ prometheus_host }}:{{ prometheus_port }}",
+      "--prometheus-scrape-interval", "{{ prometheus_scrape_interval }}",
+      "--delete-existing-db",
+      "--log-level", "{{ log_level }}",
+      "--output-dir", "/app/outputs",
+      "--streaming-engine", "{{ streaming_engine }}",
+      "--query-language", "{{ query_language }}",
+      "--lock-strategy", "{{ lock_strategy }}"{% if compress_json %},
+      "--decompress-json"{% endif %}{% if profile_query_engine %},
+      "--do-profiling"{% endif %}{% if forward_unsupported_queries %},
+      "--forward-unsupported-queries"{% endif %}{% if dump_precomputes %},
+      "--dump-precomputes"{% endif %}
+    ]
+    extra_hosts:
+      - "kafka:{{ kafka_host }}"
+      - "prometheus:{{ prometheus_host }}"
+    restart: no
diff --git a/QueryEngineRust/docs/README.md b/QueryEngineRust/docs/README.md
new file mode 100644
index 0000000..1fc7ce7
--- /dev/null
+++ b/QueryEngineRust/docs/README.md
@@ -0,0 +1,114 @@
+# QueryEngineRust Developer Documentation
+
+Welcome to the QueryEngineRust developer documentation! This directory contains guides for extending the system with new components.
+
+## Architecture Overview
+
+QueryEngineRust is organized into clear, extensible layers:
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                   Client Applications                    │
+└─────────────────────────────────────────────────────────┘
+                           │
+                           ▼
+┌─────────────────────────────────────────────────────────┐
+│              Protocol Servers (HTTP, etc.)               │
+│  - Parse protocol-specific requests                      │
+│  - Route to appropriate adapter                          │
+│  - Handle protocol-specific endpoints                    │
+└─────────────────────────────────────────────────────────┘
+                           │
+                           ▼
+┌─────────────────────────────────────────────────────────┐
+│         Protocol Adapters (Prometheus, etc.)             │
+│  - Parse query language (PromQL, SQL, etc.)             │
+│  - Format responses for protocol                         │
+│  - Determine if query is supported                       │
+└─────────────────────────────────────────────────────────┘
+                           │
+                    ┌──────┴──────┐
+                    ▼             ▼
+        ┌─────────────────┐   ┌──────────────────┐
+        │  Query Engine   │   │ Fallback Client  │
+        │  - Execute      │   │  - Forward       │
+        │    queries      │   │    unsupported   │
+        │  - Return       │   │    queries       │
+        │    results      │   │                  │
+        └────────┬────────┘   └──────────────────┘
+                 │
+                 ▼
+        ┌─────────────────┐
+        │     Store       │
+        │  - Data storage │
+        │  - Sketches     │
+        └─────────────────┘
+                 ▲
+                 │
+        ┌────────┴────────┐
+        │ Ingest Drivers  │
+        │  - Kafka, etc.  │
+        └─────────────────┘
+```
+
+## Directory Structure
+
+```
+src/drivers/
+├── ingest/           # Data ingestion (Kafka, etc.)
+├── query/
+│   ├── adapters/     # Protocol adapters (Prometheus HTTP, etc.)
+│   ├── fallback/     # Fallback backends (Prometheus, ClickHouse, etc.)
+│   └── servers/      # Protocol servers (HTTP, Flight SQL, etc.)
+```
+
+## Extension Guides
+
+- **[Adding a Protocol Adapter](./adding-protocol-adapter.md)** - Add support for new query protocols (e.g., ClickHouse HTTP API)
+- **[Adding a Fallback Backend](./adding-fallback-backend.md)** - Add new fallback query backends (e.g., DuckDB, Elasticsearch)
+- **[Adding a Protocol Server](./adding-protocol-server.md)** - Add new protocol servers (e.g., Flight SQL, gRPC)
+
+## Key Concepts
+
+### Protocol Adapter
+Handles protocol-specific request/response formatting and query parsing. Examples: Prometheus HTTP API, ClickHouse HTTP API.
+
+### Fallback Backend
+External query system to forward unsupported queries to. Examples: Prometheus, ClickHouse, DuckDB.
+
+### Protocol Server
+Handles network communication for a specific protocol. Examples: HTTP server, Flight SQL server.
+
+## Quick Reference
+
+### Adding a Protocol Adapter
+1. Create `src/drivers/query/adapters/my_adapter.rs`
+2. Implement `HttpProtocolAdapter` trait
+3. Add to factory in `factory.rs`
+4. Update `QueryProtocol` enum
+
+### Adding a Fallback Backend
+1. Create `src/drivers/query/fallback/my_backend.rs`
+2. Implement `FallbackClient` trait
+3. Export from `fallback/mod.rs`
+
+### Adding a Protocol Server
+1. Create `src/drivers/query/servers/my_server.rs`
+2. Implement server logic with appropriate adapter
+3. Export from `servers/mod.rs`
+
+## Testing
+
+Each component should include:
+- Unit tests in the same file
+- Integration tests in `src/tests/`
+- Example usage in documentation
+
+## Contributing
+
+When adding new components:
+1. Follow existing naming conventions
+2. Add comprehensive documentation
+3. Include tests
+4. Update this documentation
+5. Keep backward compatibility
diff --git a/QueryEngineRust/docs/adding-fallback-backend.md b/QueryEngineRust/docs/adding-fallback-backend.md
new file mode 100644
index 0000000..847a899
--- /dev/null
+++ b/QueryEngineRust/docs/adding-fallback-backend.md
@@ -0,0 +1,117 @@
+# Adding a Fallback Backend
+
+Fallback backends allow forwarding unsupported queries to external systems. This guide shows how to add support for a new fallback backend.
+
+## Overview
+
+A fallback backend:
+- Accepts queries in a specific language (SQL, PromQL, etc.)
+- Makes HTTP/gRPC/native calls to external system
+- Returns results in a generic format
+- Optionally provides runtime/health information
+
+## Example: Adding DuckDB HTTP Fallback
+
+### Step 1: Create the Fallback Client
+
+Create `src/drivers/query/fallback/duckdb.rs`:
+
+```rust
+/// Fallback client for DuckDB HTTP API
+pub struct DuckDBHttpFallback {
+    client: Client,
+    base_url: String,
+}
+
+impl DuckDBHttpFallback {
+    pub fn new(base_url: String) -> Self {
+        Self {
+            client: Client::new(),
+            base_url,
+        }
+    }
+}
+
+#[derive(Debug, Deserialize)]
+struct DuckDBResponse {
+    success: bool,
+    data: Option<Vec<Vec<Value>>>,
+    columns: Option<Vec<String>>,
+    error: Option<String>,
+}
+
+#[async_trait]
+impl FallbackClient for DuckDBHttpFallback {
+    async fn execute_query(
+        &self,
+        request: &ParsedQueryRequest,
+    ) -> Result<Json<Value>, StatusCode> {
+        ...
+    }
+
+    async fn get_runtime_info(&self) -> Result<Value, StatusCode> {
+        ...
+    }
+}
+```
+
+### Step 2: Export from Module
+
+Update `src/drivers/query/fallback/mod.rs`:
+
+```rust
+mod duckdb;
+pub use duckdb::DuckDBHttpFallback;
+```
+
+### Step 3: Use in Configuration
+
+The fallback client can now be used in adapter configuration:
+
+```rust
+use crate::drivers::query::adapters::AdapterConfig;
+use crate::drivers::query::fallback::DuckDBHttpFallback;
+use std::sync::Arc;
+
+// Create adapter config with DuckDB fallback
+let fallback = Some(Arc::new(
+    DuckDBHttpFallback::new("http://localhost:8080".to_string())
+) as Arc<dyn FallbackClient>);
+
+let config = AdapterConfig::new(
+    QueryProtocol::PrometheusHttp,  // Protocol for incoming queries
+    QueryLanguage::sql,              // Query language
+    fallback,                        // DuckDB fallback
+);
+```
+
+### Step 4: Add Tests
+
+Add tests in `duckdb.rs`:
+
+```rust
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[tokio::test]
+    async fn test_duckdb_fallback_creation() {
+        ...
+    }
+
+    // Mock DuckDB server test would go here
+}
+```
+
+## FallbackClient Trait Methods
+
+### Required: `execute_query()`
+- Accepts a `ParsedQueryRequest` (query string + time)
+- Makes external call to backend
+- Returns `Json<Value>` response
+- Should handle all error cases gracefully
+
+### Optional: `get_runtime_info()`
+- Returns health/status information from backend
+- Has default implementation (returns empty JSON)
+- Override if backend has health endpoint
diff --git a/QueryEngineRust/docs/adding-protocol-adapter.md b/QueryEngineRust/docs/adding-protocol-adapter.md
new file mode 100644
index 0000000..b2ce68f
--- /dev/null
+++ b/QueryEngineRust/docs/adding-protocol-adapter.md
@@ -0,0 +1,133 @@
+# Adding a Protocol Adapter
+
+Protocol adapters handle protocol-specific request/response formatting and query language parsing. This guide shows how to add support for a new query protocol.
+
+## Overview
+
+A protocol adapter:
+- Parses incoming requests (GET/POST parameters, headers, etc.)
+- Translates queries to internal format
+- Formats query results for the protocol
+- Defines protocol-specific endpoints
+
+## Example: Adding ClickHouse HTTP Adapter
+
+### Step 1: Create the Adapter File
+
+Create `src/drivers/query/adapters/clickhouse_http.rs`:
+
+```rust
+
+/// ClickHouse HTTP protocol adapter
+pub struct ClickHouseHttpAdapter {
+    config: AdapterConfig,
+}
+
+impl ClickHouseHttpAdapter {
+    ...
+}
+
+#[async_trait]
+impl QueryRequestAdapter for ClickHouseHttpAdapter {
+    ...
+}
+
+#[async_trait]
+impl QueryResponseAdapter for ClickHouseHttpAdapter {
+    ...
+}
+
+#[async_trait]
+impl HttpProtocolAdapter for ClickHouseHttpAdapter {
+    ...
+}
+```
+
+### Step 2: Add Protocol Enum Variant
+
+Update `src/data_model/enums.rs` to add the new protocol:
+
+```rust
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum QueryProtocol {
+    ...
+    ClickHouseHttp,  // Add this
+}
+```
+
+### Step 3: Export from Module
+
+Update `src/drivers/query/adapters/mod.rs`:
+
+```rust
+pub mod clickhouse_http;
+pub use clickhouse_http::ClickHouseHttpAdapter;
+```
+
+### Step 4: Add to Factory
+
+Update `src/drivers/query/adapters/factory.rs`:
+
+```rust
+pub fn create_http_adapter(config: AdapterConfig) -> Arc<dyn HttpProtocolAdapter> {
+    match config.protocol {
+        ...
+        QueryProtocol::ClickHouseHttp => {  // Add this
+            Arc::new(ClickHouseHttpAdapter::new(config))
+        }
+    }
+}
+```
+
+### Step 5: Add Convenience Constructor (Optional)
+
+Update `src/drivers/query/adapters/config.rs`:
+
+```rust
+impl AdapterConfig {
+    pub fn clickhouse_http(fallback_url: String, forward_unsupported: bool) -> Self {
+        ...
+    }
+}
+```
+
+### Step 6: Test the Adapter
+
+Add tests in `clickhouse_http.rs`:
+
+```rust
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[tokio::test]
+    async fn test_parse_get_request() {
+        ...
+    }
+}
+```
+
+## Key Traits to Implement
+
+### Required: `QueryRequestAdapter`
+- `parse_get_request()` - Parse GET requests
+- `parse_post_request()` - Parse POST requests
+- `get_query_endpoint()` - Return endpoint path
+
+### Required: `QueryResponseAdapter`
+- `format_success_response()` - Format successful query results
+- `format_error_response()` - Format errors
+- `format_unsupported_query_response()` - Format unsupported query errors
+
+### Required: `HttpProtocolAdapter`
+- `adapter_name()` - Return adapter name for logging
+- `get_runtime_info_path()` - Return health/status endpoint path
+- `handle_runtime_info()` - Handle health/status requests
+
+## Common Gotchas
+
+- Don't implement query execution in the adapter - that's the engine's job
+- Don't hard-code URLs or configuration - use `AdapterConfig`
+- Handle both GET and POST requests appropriately
+- Return protocol-specific error formats
+- Use existing types from `traits.rs` (`ParsedQueryRequest`, `QueryExecutionResult`)
diff --git a/QueryEngineRust/docs/adding-protocol-server.md b/QueryEngineRust/docs/adding-protocol-server.md
new file mode 100644
index 0000000..ded7ed6
--- /dev/null
+++ b/QueryEngineRust/docs/adding-protocol-server.md
@@ -0,0 +1,105 @@
+# Adding a Protocol Server
+
+Protocol servers handle network communication for specific protocols. This guide shows how to add a new protocol server (like Flight SQL, gRPC, etc.).
+
+## Overview
+
+A protocol server:
+- Listens on a network port
+- Handles protocol-specific requests
+- Uses adapters to process queries
+- Returns protocol-specific responses
+
+## Example: Adding Flight SQL Server
+
+Flight SQL is Apache Arrow's SQL protocol over gRPC. Here's how to add it:
+
+### Step 1: Create the Server
+
+Create `src/drivers/query/servers/flight_sql.rs`:
+
+```rust
+#[derive(Debug, Clone)]
+pub struct FlightSqlServerConfig {
+    pub port: u16,
+    pub adapter_config: AdapterConfig,
+}
+
+pub struct FlightSqlServer {
+    config: FlightSqlServerConfig,
+    query_engine: Arc<SimpleEngine>,
+    store: Arc<dyn Store>,
+}
+
+impl FlightSqlServer {
+    pub fn new(
+        config: FlightSqlServerConfig,
+        query_engine: Arc<SimpleEngine>,
+        store: Arc<dyn Store>,
+    ) -> Self {
+        Self {
+            config,
+            query_engine,
+            store,
+        }
+    }
+
+    pub async fn run(self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+        ...
+    }
+}
+```
+
+### Step 3: Export from Module
+
+Update `src/drivers/query/servers/mod.rs`:
+
+```rust
+pub mod flight_sql;
+pub use flight_sql::{FlightSqlServer, FlightSqlServerConfig};
+```
+
+### Step 4: Update Main Binary
+
+Update `src/main.rs` to support choosing the server:
+
+```rust
+#[derive(Parser, Debug)]
+struct Args {
+    // ... existing args ...
+
+    /// Server protocol to use (http, flight_sql)
+    #[arg(long, default_value = "http")]
+    server_protocol: String,
+
+    // ... rest of args ...
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let args = Args::parse();
+
+    // ... setup engine, store, etc. ...
+
+    match args.server_protocol.as_str() {
+        "http" => {
+            let server = HttpServer::new(http_config, engine, store);
+            server.run().await?;
+        }
+        "flight_sql" => {
+            let flight_config = FlightSqlServerConfig {
+                port: args.http_port,
+                adapter_config,
+            };
+            let server = FlightSqlServer::new(flight_config, engine, store);
+            server.run().await?;
+        }
+        _ => {
+            eprintln!("Unknown server protocol: {}", args.server_protocol);
+            std::process::exit(1);
+        }
+    }
+
+    Ok(())
+}
+```
diff --git a/QueryEngineRust/docs/promsketch-integration.md b/QueryEngineRust/docs/promsketch-integration.md
new file mode 100644
index 0000000..ad0ba8f
--- /dev/null
+++ b/QueryEngineRust/docs/promsketch-integration.md
@@ -0,0 +1,144 @@
+# PromSketch Integration — Multi-Path Ingestion Architecture
+
+## 1. Overview
+
+QueryEngine supports two parallel data ingestion paths:
+
+1. **Precomputed pipeline**: A Kafka topic carrying pre-aggregated sketch buckets is consumed by `KafkaConsumer`, stored in `SimpleMapStore`, and served through the standard query path.
+2. **Raw sample pipeline (Prometheus Remote Write)**: A standalone HTTP endpoint (`/api/v1/write`) accepts standard Prometheus remote write requests (Snappy-compressed protobuf). Decoded samples are inserted into `PromSketchStore` (which maintains live EHUniv, EHKLL, and USampling sketch instances per series) and served through the sketch query path.
+
+When a query arrives, the engine tries the sketch path first, then falls through to the precomputed path, and finally (optionally) to a remote Prometheus server.
+
+## 2. Data Flow Diagram
+
+```
+Raw Samples Path (Prometheus Remote Write):
+  Prometheus / Agent --> POST /api/v1/write --> PrometheusRemoteWriteServer --> PromSketchStore
+                          (Snappy + protobuf)          decode & insert
+                                                                               |
+                                                                         sketch_insert()
+                                                                   (EHUniv, EHKLL, USampling)
+
+Precomputed Path:
+  Prometheus --> PrecomputeEngine --> Kafka [precomputed] --> KafkaConsumer --> SimpleMapStore
+
+Query Path:
+  HTTP Request --> SimpleEngine
+                     |-- (1) handle_sketch_query_promql() --> PromSketchStore.eval_matching()
+                     |-- (2) precomputed pipeline (SimpleMapStore)
+                     +-- (3) fallback --> Prometheus server
+```
+
+## 3. Query Routing
+
+When a PromQL query arrives, `SimpleEngine` dispatches it as follows:
+
+1. **PromSketch path** — `handle_sketch_query_promql()` parses the query (AST first, regex fallback for custom functions). If the function name is in `promsketch_func_map` and the `PromSketchStore` has matching series data, results are returned immediately.
+2. **Precomputed path** — If the sketch path returns `None` (function not sketch-backed, no store configured, or no matching series), the query falls through to `SimpleMapStore`.
+3. **Prometheus fallback** — If `--forward-unsupported-queries` is set and the precomputed path also misses, the query is forwarded to the remote Prometheus server.
+
+### Sketch-Backed Functions (13 total)
+
+These functions are routed to `PromSketchStore` first, with fallthrough to precomputed on miss:
+
+| Function                | Sketch Type | Standard PromQL? | Description                                      |
+|-------------------------|-------------|-------------------|--------------------------------------------------|
+| `entropy_over_time`     | EHUniv      | No (custom)       | Shannon entropy of the sample distribution        |
+| `distinct_over_time`    | EHUniv      | No (custom)       | Estimated number of distinct values               |
+| `l1_over_time`          | EHUniv      | No (custom)       | L1 norm of the value vector                       |
+| `l2_over_time`          | EHUniv      | No (custom)       | L2 norm of the value vector                       |
+| `quantile_over_time`    | EHKLL       | Yes               | Approximate quantile (e.g., p50, p99)             |
+| `min_over_time`         | EHKLL       | Yes               | Minimum value over the range                      |
+| `max_over_time`         | EHKLL       | Yes               | Maximum value over the range                      |
+| `avg_over_time`         | USampling   | Yes               | Average of sampled values                         |
+| `count_over_time`       | USampling   | Yes               | Count of sampled data points                      |
+| `sum_over_time`         | USampling   | Yes               | Sum of sampled values                             |
+| `sum2_over_time`        | USampling   | No (custom)       | Sum of squared values                             |
+| `stddev_over_time`      | USampling   | Yes               | Standard deviation over the range                 |
+| `stdvar_over_time`      | USampling   | Yes               | Variance over the range                           |
+
+### Non-Sketch Functions
+
+These functions always go directly to the precomputed pipeline (not in `promsketch_func_map`):
+
+| Function    | Description                          |
+|-------------|--------------------------------------|
+| `rate`      | Per-second rate of increase          |
+| `increase`  | Total increase over the range        |
+
+## 4. Configuration Reference
+
+### CLI Arguments
+
+| Argument                            | Description                                                       | Default         |
+|-------------------------------------|-------------------------------------------------------------------|-----------------|
+| `--enable-prometheus-remote-write`  | Enable the Prometheus remote write ingest endpoint                | `false`         |
+| `--prometheus-remote-write-port`    | Port for the Prometheus remote write HTTP server                  | `9090`          |
+| `--auto-init-sketches`              | Auto-initialize all 3 sketch types for every new series           | `true`          |
+| `--promsketch-config`               | Path to a sketch configuration YAML file (optional)               | (none)          |
+
+### Sketch Config YAML
+
+All fields are optional; defaults are shown below.
+
+```yaml
+eh_univ:
+  k: 50                # EH buckets for UnivMon
+  time_window: 1000000 # milliseconds
+
+eh_kll:
+  k: 50                # EH buckets for KLL
+  kll_k: 256           # KLL accuracy parameter
+  time_window: 1000000
+
+sampling:
+  sample_rate: 0.2     # fraction of data points to sample
+  time_window: 1000000
+```
+
+## 5. Deployment Checklist
+
+### Start QueryEngine with remote write enabled:
+
+```bash
+./query_engine \
+  --enable-prometheus-remote-write \
+  --prometheus-remote-write-port 9090 \
+  --promsketch-config promsketch_config.yaml   # optional
+```
+
+### Configure Prometheus (or any remote write sender) to write to the endpoint:
+
+```yaml
+# prometheus.yml
+remote_write:
+  - url: "http://<query-engine-host>:9090/api/v1/write"
+```
+
+### Verify ingestion
+
+Check logs for `"Received N samples"` messages.
+
+### Verify queries
+
+```bash
+curl 'http://localhost:8088/api/v1/query?query=quantile_over_time(0.5,metric[1m])&time=...'
+```
+
+### Monitor
+
+Use the `/metrics` endpoint for Prometheus counters (see section 6).
+
+## 6. `/metrics` Endpoint
+
+Exposed at `GET /metrics` in Prometheus exposition format. Key metrics:
+
+| Metric                                          | Type      | Description                                           |
+|-------------------------------------------------|-----------|-------------------------------------------------------|
+| `promsketch_series_total`                       | Gauge     | Number of live series currently tracked               |
+| `promsketch_samples_ingested_total`             | Counter   | Total raw samples ingested                            |
+| `promsketch_ingest_errors_total`                | Counter   | Total ingestion errors (parse failures, etc.)         |
+| `promsketch_ingest_batch_duration_seconds`      | Histogram | Time spent processing each ingestion batch            |
+| `promsketch_sketch_queries_total{result="hit"}` | Counter   | Sketch queries that returned data                     |
+| `promsketch_sketch_queries_total{result="miss"}`| Counter   | Sketch queries that fell through (no matching series) |
+| `promsketch_sketch_query_duration_seconds`      | Histogram | End-to-end latency of sketch query evaluation         |
diff --git a/QueryEngineRust/examples/promql/inference_config.yaml b/QueryEngineRust/examples/promql/inference_config.yaml
new file mode 100644
index 0000000..4aedd01
--- /dev/null
+++ b/QueryEngineRust/examples/promql/inference_config.yaml
@@ -0,0 +1,13 @@
+metrics:
+  fake_metric:
+  - instance
+  - job
+  - label_0
+  - label_1
+cleanup_policy:
+  name: read_based
+queries:
+- aggregations:
+  - aggregation_id: 1
+    read_count_threshold: 1
+  query: quantile by (label_0) (0.99, fake_metric)
diff --git a/QueryEngineRust/examples/promql/sketch_config.yaml b/QueryEngineRust/examples/promql/sketch_config.yaml
new file mode 100644
index 0000000..e5ccc74
--- /dev/null
+++ b/QueryEngineRust/examples/promql/sketch_config.yaml
@@ -0,0 +1,10 @@
+eh_univ:
+  k: 50
+  time_window: 1000000
+eh_kll:
+  k: 50
+  kll_k: 256
+  time_window: 1000000
+sampling:
+  sample_rate: 0.2
+  time_window: 1000000
diff --git a/QueryEngineRust/examples/promql/streaming_config.yaml b/QueryEngineRust/examples/promql/streaming_config.yaml
new file mode 100644
index 0000000..1c7fac1
--- /dev/null
+++ b/QueryEngineRust/examples/promql/streaming_config.yaml
@@ -0,0 +1,21 @@
+aggregations:
+- aggregationId: 1
+  aggregationType: DatasketchesKLL
+  aggregationSubType: ''
+  labels:
+    grouping: [label_0]
+    rollup: [instance, job, label_1]
+    aggregated: []
+  metric: fake_metric
+  parameters:
+    K: 20
+  tumblingWindowSize: 1
+  windowSize: 1
+  windowType: tumbling
+  spatialFilter: ''
+metrics:
+  fake_metric:
+  - instance
+  - job
+  - label_0
+  - label_1
diff --git a/QueryEngineRust/examples/sql/inference_config.yaml b/QueryEngineRust/examples/sql/inference_config.yaml
new file mode 100644
index 0000000..378c6f6
--- /dev/null
+++ b/QueryEngineRust/examples/sql/inference_config.yaml
@@ -0,0 +1,16 @@
+tables:
+  - name: metrics_table
+    time_column: time
+    metadata_columns: [hostname, datacenter]
+    value_columns: [cpu_usage, memory_usage]
+cleanup_policy:
+  name: read_based
+queries:
+- aggregations:
+  - aggregation_id: 1
+    read_count_threshold: 1
+  query: |
+    SELECT datacenter, quantile(0.99)(cpu_usage) as p99
+    FROM metrics_table
+    GROUP BY datacenter
+    WHERE time BETWEEN DATEADD(s, -11, NOW()) AND DATEADD(s, -10, NOW())
diff --git a/QueryEngineRust/examples/sql/streaming_config.yaml b/QueryEngineRust/examples/sql/streaming_config.yaml
new file mode 100644
index 0000000..d36c54c
--- /dev/null
+++ b/QueryEngineRust/examples/sql/streaming_config.yaml
@@ -0,0 +1,21 @@
+tables:
+  - name: metrics_table
+    time_column: time
+    metadata_columns: [hostname, datacenter]
+    value_columns: [cpu_usage, memory_usage]
+aggregations:
+- aggregationId: 1
+  aggregationType: DatasketchesKLL
+  aggregationSubType: ''
+  labels:
+    grouping: [datacenter]
+    rollup: [hostname]
+    aggregated: []
+  table_name: metrics_table
+  value_column: cpu_usage
+  parameters:
+    K: 20
+  tumblingWindowSize: 1
+  windowSize: 1
+  windowType: tumbling
+  spatialFilter: ''
diff --git a/QueryEngineRust/installation/install.sh b/QueryEngineRust/installation/install.sh
new file mode 100755
index 0000000..dfce4dc
--- /dev/null
+++ b/QueryEngineRust/installation/install.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+set -e
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+PARENT_DIR=$(dirname "$THIS_DIR")
+
+source "$HOME/.cargo/env"
+
+echo "Building QueryEngine Rust binary..."
+cd "$PARENT_DIR"
+cargo build --release
+
+echo "Building QueryEngine Rust Docker image..."
+cd "$(dirname "$PARENT_DIR")"
+docker build . -f QueryEngineRust/Dockerfile -t sketchdb-queryengine-rust:latest
+
+echo "QueryEngine Rust Docker image built successfully: sketchdb-queryengine-rust:latest"
diff --git a/QueryEngineRust/query-engine-rust-cli-compose.yml.j2 b/QueryEngineRust/query-engine-rust-cli-compose.yml.j2
new file mode 100644
index 0000000..64be341
--- /dev/null
+++ b/QueryEngineRust/query-engine-rust-cli-compose.yml.j2
@@ -0,0 +1,50 @@
+# QueryEngine Rust Docker Compose Template
+# This template is rendered with Jinja2 to generate the final docker-compose.yml
+
+services:
+  queryengine-rust:
+    image: sketchdb-queryengine-rust:latest # Need to change image name to 'asap' prefix
+    container_name: asap-queryengine-rust
+    hostname: queryengine-rust
+    networks:
+      - asap-network
+    ports:
+      - "{{ http_port | default('8088') }}:8088"
+    environment:
+      - RUST_LOG={{ log_level | default('info') }}
+      - RUST_BACKTRACE=1
+    volumes:
+      - "{{ experiment_output_dir }}:/app/outputs"
+      - "{{ controller_remote_output_dir }}:/app/controller_output:ro"
+    command: [
+      "--kafka-topic", "{{ kafka_topic }}",
+      "--kafka-broker", "kafka:9092", # Uses kafka container service name instead of IP
+      "--input-format", "{{ input_format }}",
+      "--config", "/app/controller_output/inference_config.yaml",
+      "--streaming-config", "/app/controller_output/streaming_config.yaml",
+      "--prometheus-server", "http://prometheus:9090",
+      "--prometheus-scrape-interval", "{{ prometheus_scrape_interval }}",
+      "--delete-existing-db",
+      "--log-level", "{{ log_level }}",
+      "--output-dir", "/app/outputs",
+      "--streaming-engine", "{{ streaming_engine }}",
+      "--query-language", "{{ query_language }}",
+      "--lock-strategy", "{{ lock_strategy }}"{% if compress_json %},
+      "--decompress-json"{% endif %}{% if profile_query_engine %},
+      "--do-profiling"{% endif %}{% if forward_unsupported_queries %},
+      "--forward-unsupported-queries"{% endif %}{% if dump_precomputes %},
+      "--dump-precomputes"{% endif %}
+    ]
+    depends_on:
+      kafka:
+        condition: service_healthy
+      kafka-init:
+        condition: service_completed_successfully # Wait for Kafka topics to be created
+      prometheus:
+        condition: service_healthy
+      controller:
+        condition: service_completed_successfully # Wait for controller to generate configs
+      arroyosketch:
+        condition: service_completed_successfully # Wait for pipeline configuration
+    restart: no
+    # Add healthcheck
diff --git a/QueryEngineRust/rustfmt.toml b/QueryEngineRust/rustfmt.toml
new file mode 100644
index 0000000..88c5c0f
--- /dev/null
+++ b/QueryEngineRust/rustfmt.toml
@@ -0,0 +1,4 @@
+# Optional: customize rustfmt behavior
+max_width = 100
+hard_tabs = false
+tab_spaces = 4
diff --git a/QueryEngineRust/src/bin/run.sh b/QueryEngineRust/src/bin/run.sh
new file mode 100644
index 0000000..57b41a6
--- /dev/null
+++ b/QueryEngineRust/src/bin/run.sh
@@ -0,0 +1 @@
+cargo run --bin test_offline_precomputes -- --input-file <msgpack_file_path> --mode merge --pattern-type temporal --aggregation-type DatasketchesKLL --window-size 90 --iterations 10 --slide-step 5
diff --git a/QueryEngineRust/src/bin/show_logical_plans.rs b/QueryEngineRust/src/bin/show_logical_plans.rs
new file mode 100644
index 0000000..f5210cc
--- /dev/null
+++ b/QueryEngineRust/src/bin/show_logical_plans.rs
@@ -0,0 +1,527 @@
+//! Standalone binary that constructs diverse QueryExecutionContext structures,
+//! converts each to a DataFusion logical plan, and prints each plan along with
+//! the schema of every edge and key internal variables.
+//!
+//! Covers 4 queries x multiple accumulator configurations = 10 test cases:
+//!
+//! 1. sum by (host) (data)              — spatial sum
+//! 2. quantile by (host) (0.5, data)    — spatial quantile
+//! 3. sum_over_time(data[1m])           — temporal sum
+//! 4. quantile_over_time(0.5, data[1m]) — temporal quantile
+//!
+//! data has columns: host, service, region
+
+use datafusion::logical_expr::LogicalPlan;
+use datafusion_summary_library::{PrecomputedSummaryRead, SummaryInfer, SummaryMergeMultiple};
+use promql_utilities::data_model::KeyByLabelNames;
+use promql_utilities::query_logics::enums::Statistic;
+use query_engine_rust::engines::simple_engine::{
+    AggregationIdInfo, QueryExecutionContext, QueryMetadata, StoreQueryParams, StoreQueryPlan,
+};
+use std::collections::HashMap;
+
+// ============================================================================
+// Context builders
+// ============================================================================
+
+/// Build a QueryExecutionContext with full control over all parameters.
+#[allow(clippy::too_many_arguments)]
+fn build_context(
+    metric: &str,
+    statistic: Statistic,
+    query_output_labels: Vec<&str>,
+    grouping_labels: Vec<&str>,
+    aggregated_labels: Vec<&str>,
+    agg_type_value: &str,
+    agg_type_key: &str,
+    agg_id_value: u64,
+    agg_id_key: u64,
+    keys_query: Option<StoreQueryParams>,
+    do_merge: bool,
+    is_exact_query: bool,
+    kwargs: HashMap<String, String>,
+) -> QueryExecutionContext {
+    QueryExecutionContext {
+        metric: metric.to_string(),
+        metadata: QueryMetadata {
+            query_output_labels: KeyByLabelNames {
+                labels: query_output_labels.into_iter().map(String::from).collect(),
+            },
+            statistic_to_compute: statistic,
+            query_kwargs: kwargs,
+        },
+        store_plan: StoreQueryPlan {
+            values_query: StoreQueryParams {
+                metric: metric.to_string(),
+                aggregation_id: agg_id_value,
+                start_timestamp: if do_merge { 1000 } else { 2000 },
+                end_timestamp: 2000,
+                is_exact_query,
+            },
+            keys_query,
+        },
+        agg_info: AggregationIdInfo {
+            aggregation_id_for_key: agg_id_key,
+            aggregation_id_for_value: agg_id_value,
+            aggregation_type_for_key: agg_type_key.to_string(),
+            aggregation_type_for_value: agg_type_value.to_string(),
+        },
+        do_merge,
+        spatial_filter: String::new(),
+        query_time: 2000,
+        grouping_labels: KeyByLabelNames {
+            labels: grouping_labels.into_iter().map(String::from).collect(),
+        },
+        aggregated_labels: KeyByLabelNames {
+            labels: aggregated_labels.into_iter().map(String::from).collect(),
+        },
+    }
+}
+
+fn make_keys_query(metric: &str, agg_id: u64) -> StoreQueryParams {
+    StoreQueryParams {
+        metric: metric.to_string(),
+        aggregation_id: agg_id,
+        start_timestamp: 0, // DeltaSetAggregator reads from beginning of time
+        end_timestamp: 2000,
+        is_exact_query: false, // keys are always range queries
+    }
+}
+
+// ============================================================================
+// Plan printing utilities
+// ============================================================================
+
+/// Recursively print the plan tree with indentation, showing each node's
+/// explain text and output schema.
+fn print_plan_tree(plan: &LogicalPlan, indent: usize) {
+    let prefix = "  ".repeat(indent);
+    let connector = if indent > 0 { "└─► " } else { "" };
+
+    match plan {
+        LogicalPlan::Extension(ext) => {
+            // Print node name and explain text
+            println!("{prefix}{connector}{}", ext.node.name());
+
+            // Print detailed properties by downcasting each node
+            print_node_details(plan, indent + 2);
+
+            // Print output schema
+            let schema = ext.node.schema();
+            print!("{}    schema: [", prefix);
+            for (i, field) in schema.fields().iter().enumerate() {
+                if i > 0 {
+                    print!(", ");
+                }
+                print!("{}:{}", field.name(), field.data_type());
+            }
+            println!("]");
+
+            // Recurse into inputs
+            let inputs = ext.node.inputs();
+            for (i, input) in inputs.iter().enumerate() {
+                if inputs.len() > 1 {
+                    println!("{}    input {}:", prefix, i);
+                }
+                print_plan_tree(input, indent + 2);
+            }
+        }
+        _ => {
+            println!("{prefix}{connector}Unknown: {:?}", plan);
+        }
+    }
+}
+
+/// Print detailed properties of a plan node by downcasting.
+fn print_node_details(plan: &LogicalPlan, indent: usize) {
+    let prefix = "  ".repeat(indent);
+    if let LogicalPlan::Extension(ext) = plan {
+        if let Some(infer) = ext.node.as_any().downcast_ref::<SummaryInfer>() {
+            println!(
+                "{prefix}operations: {:?}",
+                infer
+                    .operations
+                    .iter()
+                    .map(|op| format!("{}", op))
+                    .collect::<Vec<_>>()
+            );
+            println!("{prefix}output_names: {:?}", infer.output_names);
+            println!("{prefix}group_key_columns: {:?}", infer.group_key_columns);
+            println!("{prefix}has_keys_input: {}", infer.keys_input.is_some());
+        } else if let Some(merge) = ext.node.as_any().downcast_ref::<SummaryMergeMultiple>() {
+            println!("{prefix}group_by: {:?}", merge.group_by());
+            println!("{prefix}sketch_column: {:?}", merge.sketch_column());
+            println!("{prefix}summary_type: {}", merge.summary_type());
+        } else if let Some(read) = ext.node.as_any().downcast_ref::<PrecomputedSummaryRead>() {
+            println!("{prefix}metric: {:?}", read.metric());
+            println!("{prefix}aggregation_id: {}", read.aggregation_id());
+            println!(
+                "{prefix}range: [{}, {}]",
+                read.start_timestamp(),
+                read.end_timestamp()
+            );
+            println!("{prefix}is_exact_query: {}", read.is_exact_query());
+            println!("{prefix}summary_type: {}", read.summary_type());
+            println!("{prefix}output_labels: {:?}", read.output_labels());
+        }
+    }
+}
+
+/// Print key internal variables about a QueryExecutionContext.
+fn print_context_variables(ctx: &QueryExecutionContext) {
+    let has_separate_keys = ctx.store_plan.keys_query.is_some()
+        && ctx.agg_info.aggregation_id_for_key != ctx.agg_info.aggregation_id_for_value;
+    let has_aggregated_labels = !ctx.aggregated_labels.labels.is_empty();
+
+    println!("  Internal variables:");
+    println!("    has_separate_keys (dual input): {}", has_separate_keys);
+    println!(
+        "    has_aggregated_labels (multi-population): {}",
+        has_aggregated_labels
+    );
+    println!("    do_merge (temporal): {}", ctx.do_merge);
+    println!(
+        "    keys_included: {}",
+        has_separate_keys || has_aggregated_labels
+    );
+    println!(
+        "    value_agg: {} (id={})",
+        ctx.agg_info.aggregation_type_for_value, ctx.agg_info.aggregation_id_for_value
+    );
+    println!(
+        "    key_agg: {} (id={})",
+        ctx.agg_info.aggregation_type_for_key, ctx.agg_info.aggregation_id_for_key
+    );
+    println!(
+        "    query_output_labels: {:?}",
+        ctx.metadata.query_output_labels.labels
+    );
+    println!("    grouping_labels: {:?}", ctx.grouping_labels.labels);
+    println!("    aggregated_labels: {:?}", ctx.aggregated_labels.labels);
+    println!("    statistic: {:?}", ctx.metadata.statistic_to_compute);
+    if !ctx.metadata.query_kwargs.is_empty() {
+        println!("    query_kwargs: {:?}", ctx.metadata.query_kwargs);
+    }
+}
+
+// ============================================================================
+// Test case definitions
+// ============================================================================
+
+struct TestCase {
+    title: String,
+    query: String,
+    description: String,
+    context: QueryExecutionContext,
+}
+
+fn build_all_test_cases() -> Vec<TestCase> {
+    let metric = "data";
+    let mut cases = Vec::new();
+
+    // ========================================================================
+    // Query 1: sum by (host) (data)
+    // ========================================================================
+
+    // Case 1a: SumAccumulator only
+    // Simple single-population. Store groups by host, one Sum per host.
+    cases.push(TestCase {
+        title: "sum by (host) — SumAccumulator".into(),
+        query: "sum by (host) (data)".into(),
+        description:
+            "Single-population exact sum. Store groups by [host], one scalar sum per group key."
+                .into(),
+        context: build_context(
+            metric,
+            Statistic::Sum,
+            vec!["host"],     // query_output_labels
+            vec!["host"],     // grouping_labels (store GROUP BY)
+            vec![],           // aggregated_labels (none)
+            "SumAccumulator", // value accumulator
+            "SumAccumulator", // key accumulator (same = single)
+            42,
+            42,    // same agg_id
+            None,  // no keys_query
+            false, // not temporal
+            true,  // exact (sliding window)
+            HashMap::new(),
+        ),
+    });
+
+    // Case 1b: MultipleSumAccumulator only (self-keyed)
+    // The accumulator internally tracks sums for each host value.
+    // Store doesn't group by host; the accumulator maps host -> sum.
+    cases.push(TestCase {
+        title: "sum by (host) — MultipleSumAccumulator (self-keyed)".into(),
+        query: "sum by (host) (data)".into(),
+        description: "Self-keyed multi-population. Store groups by [] (no spatial grouping). \
+                       MultipleSumAccumulator internally maps host -> sum."
+            .into(),
+        context: build_context(
+            metric,
+            Statistic::Sum,
+            vec!["host"], // query_output_labels
+            vec![],       // grouping_labels (no store grouping)
+            vec!["host"], // aggregated_labels (host tracked internally)
+            "MultipleSumAccumulator",
+            "MultipleSumAccumulator", // same type = single agg_id
+            42,
+            42,
+            None,
+            false,
+            true,
+            HashMap::new(),
+        ),
+    });
+
+    // Case 1c: CountMinSketch + DeltaSetAggregator (dual-input)
+    // CountMinSketch estimates frequency per key; DeltaSetAggregator enumerates keys.
+    cases.push(TestCase {
+        title: "sum by (host) — CountMinSketch + DeltaSetAggregator (dual-input)".into(),
+        query: "sum by (host) (data)".into(),
+        description: "Dual-input plan. CountMinSketch for value estimation per host key, \
+                       DeltaSetAggregator enumerates which hosts exist."
+            .into(),
+        context: build_context(
+            metric,
+            Statistic::Sum,
+            vec!["host"],
+            vec![],       // grouping_labels (no store grouping)
+            vec!["host"], // aggregated_labels
+            "CountMinSketch",
+            "DeltaSetAggregator",
+            42,
+            99, // different agg_ids
+            Some(make_keys_query(metric, 99)),
+            false,
+            true,
+            HashMap::new(),
+        ),
+    });
+
+    // ========================================================================
+    // Query 2: quantile by (host) (0.5, data)
+    // ========================================================================
+
+    let mut q_kwargs = HashMap::new();
+    q_kwargs.insert("quantile".to_string(), "0.5".to_string());
+
+    // Case 2a: KLL only
+    cases.push(TestCase {
+        title: "quantile by (host) (0.5) — KLL".into(),
+        query: "quantile by (host) (0.5, data)".into(),
+        description:
+            "Single-population quantile. Store groups by [host], one KLL sketch per group key."
+                .into(),
+        context: build_context(
+            metric,
+            Statistic::Quantile,
+            vec!["host"],
+            vec!["host"],
+            vec![],
+            "KLL",
+            "KLL",
+            42,
+            42,
+            None,
+            false,
+            true,
+            q_kwargs.clone(),
+        ),
+    });
+
+    // Case 2b: HydraKLL + DeltaSetAggregator (dual-input)
+    cases.push(TestCase {
+        title: "quantile by (host) (0.5) — HydraKLL + DeltaSetAggregator (dual-input)".into(),
+        query: "quantile by (host) (0.5, data)".into(),
+        description: "Dual-input quantile. HydraKLL has per-host KLL sketches internally. \
+                       DeltaSetAggregator enumerates which hosts exist."
+            .into(),
+        context: build_context(
+            metric,
+            Statistic::Quantile,
+            vec!["host"],
+            vec![],       // no store grouping
+            vec!["host"], // host tracked internally
+            "HydraKLL",
+            "DeltaSetAggregator",
+            42,
+            99,
+            Some(make_keys_query(metric, 99)),
+            false,
+            true,
+            q_kwargs.clone(),
+        ),
+    });
+
+    // ========================================================================
+    // Query 3: sum_over_time(data[1m])
+    // Temporal — all labels preserved, do_merge=true
+    // ========================================================================
+
+    // Case 3a: SumAccumulator only
+    cases.push(TestCase {
+        title: "sum_over_time(data[1m]) — SumAccumulator".into(),
+        query: "sum_over_time(data[1m])".into(),
+        description: "Temporal sum, single-population. All labels preserved. \
+                       do_merge=true to merge tumbling windows across the 1m range."
+            .into(),
+        context: build_context(
+            metric,
+            Statistic::Sum,
+            vec!["host", "service", "region"],
+            vec!["host", "service", "region"],
+            vec![],
+            "SumAccumulator",
+            "SumAccumulator",
+            42,
+            42,
+            None,
+            true,  // temporal merge
+            false, // tumbling window (range query)
+            HashMap::new(),
+        ),
+    });
+
+    // Case 3b: MultipleSumAccumulator only (self-keyed)
+    cases.push(TestCase {
+        title: "sum_over_time(data[1m]) — MultipleSumAccumulator (self-keyed)".into(),
+        query: "sum_over_time(data[1m])".into(),
+        description: "Temporal sum, self-keyed multi-population. Store groups by [host]. \
+                       MultipleSumAccumulator internally maps (service, region) -> sum."
+            .into(),
+        context: build_context(
+            metric,
+            Statistic::Sum,
+            vec!["host", "service", "region"],
+            vec!["host"],              // store groups by host only
+            vec!["service", "region"], // rest tracked internally
+            "MultipleSumAccumulator",
+            "MultipleSumAccumulator",
+            42,
+            42,
+            None,
+            true,
+            false,
+            HashMap::new(),
+        ),
+    });
+
+    // Case 3c: CountMinSketch + DeltaSetAggregator (dual-input)
+    cases.push(TestCase {
+        title: "sum_over_time(data[1m]) — CountMinSketch + DeltaSetAggregator (dual-input)".into(),
+        query: "sum_over_time(data[1m])".into(),
+        description: "Temporal sum, dual-input. Store groups by [host]. \
+                       CountMinSketch estimates per (service, region). \
+                       DeltaSetAggregator enumerates (service, region) keys."
+            .into(),
+        context: build_context(
+            metric,
+            Statistic::Sum,
+            vec!["host", "service", "region"],
+            vec!["host"],
+            vec!["service", "region"],
+            "CountMinSketch",
+            "DeltaSetAggregator",
+            42,
+            99,
+            Some(make_keys_query(metric, 99)),
+            true,
+            false,
+            HashMap::new(),
+        ),
+    });
+
+    // ========================================================================
+    // Query 4: quantile_over_time(0.5, data[1m])
+    // Temporal — all labels preserved, do_merge=true
+    // ========================================================================
+
+    // Case 4a: KLL only
+    cases.push(TestCase {
+        title: "quantile_over_time(0.5, data[1m]) — KLL".into(),
+        query: "quantile_over_time(0.5, data[1m])".into(),
+        description: "Temporal quantile, single-population. All labels preserved. \
+                       One KLL sketch per (host, service, region) group."
+            .into(),
+        context: build_context(
+            metric,
+            Statistic::Quantile,
+            vec!["host", "service", "region"],
+            vec!["host", "service", "region"],
+            vec![],
+            "KLL",
+            "KLL",
+            42,
+            42,
+            None,
+            true,
+            false,
+            q_kwargs.clone(),
+        ),
+    });
+
+    // Case 4b: HydraKLL + DeltaSetAggregator (dual-input)
+    cases.push(TestCase {
+        title: "quantile_over_time(0.5, data[1m]) — HydraKLL + DeltaSetAggregator (dual-input)"
+            .into(),
+        query: "quantile_over_time(0.5, data[1m])".into(),
+        description: "Temporal quantile, dual-input. Store groups by [host]. \
+                       HydraKLL has per-(service, region) KLL sketches. \
+                       DeltaSetAggregator enumerates (service, region) keys."
+            .into(),
+        context: build_context(
+            metric,
+            Statistic::Quantile,
+            vec!["host", "service", "region"],
+            vec!["host"],
+            vec!["service", "region"],
+            "HydraKLL",
+            "DeltaSetAggregator",
+            42,
+            99,
+            Some(make_keys_query(metric, 99)),
+            true,
+            false,
+            q_kwargs.clone(),
+        ),
+    });
+
+    cases
+}
+
+// ============================================================================
+// Main
+// ============================================================================
+
+fn main() {
+    let cases = build_all_test_cases();
+
+    for (i, case) in cases.iter().enumerate() {
+        println!("╔══════════════════════════════════════════════════════════════════════");
+        println!("║ Case {}: {}", i + 1, case.title);
+        println!("║ Query: {}", case.query);
+        println!("║ {}", case.description);
+        println!("╚══════════════════════════════════════════════════════════════════════");
+        println!();
+
+        // Print key internal variables
+        print_context_variables(&case.context);
+        println!();
+
+        // Convert to logical plan
+        match case.context.to_logical_plan() {
+            Ok(plan) => {
+                println!("  Logical Plan Tree:");
+                println!("  ──────────────────");
+                print_plan_tree(&plan, 2);
+            }
+            Err(e) => {
+                println!("  ERROR converting to logical plan: {}", e);
+            }
+        }
+
+        println!();
+        println!();
+    }
+}
diff --git a/QueryEngineRust/src/bin/test_offline_precomputes.rs b/QueryEngineRust/src/bin/test_offline_precomputes.rs
new file mode 100644
index 0000000..fa25822
--- /dev/null
+++ b/QueryEngineRust/src/bin/test_offline_precomputes.rs
@@ -0,0 +1,916 @@
+// Standard library
+use std::collections::HashMap;
+use std::fs::File;
+use std::io::{BufReader, Read};
+use std::path::PathBuf;
+
+// External crates
+use clap::Parser;
+use serde::Deserialize;
+
+// Internal imports from QueryEngineRust
+use promql_utilities::query_logics::enums::{QueryPatternType, Statistic};
+use query_engine_rust::data_model::{AggregateCore, KeyByLabelValues, PrecomputedOutput};
+use query_engine_rust::precompute_operators::*;
+
+/// CLI Arguments
+#[derive(Parser, Debug)]
+#[command(name = "test_offline_precomputes")]
+#[command(about = "Test offline precomputes for SimpleEngine functionality")]
+struct Args {
+    /// Path to the dumped precomputes file (.msgpack)
+    #[arg(short, long)]
+    input_file: PathBuf,
+
+    /// Test mode: "merge", "query", or "both"
+    #[arg(short, long, default_value = "both")]
+    mode: String,
+
+    /// Query pattern type for testing: "temporal", "spatial", or "temporal_spatial"
+    #[arg(short, long, default_value = "temporal")]
+    pattern_type: String,
+
+    /// Aggregation type for merging (e.g., "Sum", "DatasketchesKLL", "DeltaSetAggregator")
+    #[arg(short, long, default_value = "Sum")]
+    aggregation_type: String,
+
+    /// Statistic to query: "sum", "count", "avg", "min", "max", "quantile", etc.
+    #[arg(short, long, default_value = "sum")]
+    statistic: String,
+
+    /// Optional quantile parameter (for quantile queries)
+    #[arg(long)]
+    quantile: Option<String>,
+
+    /// Maximum number of precomputes to load (for testing)
+    #[arg(long)]
+    max_records: Option<usize>,
+
+    /// Verbose logging
+    #[arg(short, long)]
+    verbose: bool,
+
+    /// Window size for sliding window merges (number of precomputes per window)
+    #[arg(long)]
+    window_size: Option<usize>,
+
+    /// Number of sliding window iterations (default: 1 if window_size set)
+    #[arg(long)]
+    iterations: Option<usize>,
+
+    /// Step size for sliding (defaults to window_size for tumbling windows)
+    #[arg(long)]
+    slide_step: Option<usize>,
+
+    /// Keep only last merged result for query testing (default: true)
+    #[arg(long, default_value = "true")]
+    keep_last_only: bool,
+}
+
+/// Represents a single loaded precompute dump from the file
+struct LoadedPrecompute {
+    metadata: PrecomputedOutput,
+    accumulator: Box<dyn AggregateCore>,
+}
+
+/// Deserializable version matching the dump format
+/// This must match the PrecomputeDump struct in src/utils/precompute_dumper.rs
+#[derive(Deserialize, Debug)]
+struct PrecomputeDumpRaw {
+    #[allow(dead_code)]
+    timestamp: u64,
+    metadata: PrecomputedOutput,
+    accumulator_type: String,
+    accumulator_data_bytes: Vec<u8>,
+}
+
+/// Type alias for merged precomputes result
+type MergedPrecomputes = HashMap<Option<KeyByLabelValues>, Box<dyn AggregateCore>>;
+
+/// Statistics for analysis
+#[derive(Debug, Default)]
+struct LoadStatistics {
+    total_records: usize,
+    records_by_type: HashMap<String, usize>,
+    records_by_aggregation_id: HashMap<u64, usize>,
+    time_range: (u64, u64), // (min_start, max_end)
+}
+
+/// Window configuration for sliding window merges
+#[derive(Debug, Clone)]
+struct WindowConfig {
+    window_size: usize,
+    iterations: usize,
+    slide_step: usize,
+    keep_last_only: bool,
+}
+
+impl WindowConfig {
+    fn from_args(args: &Args) -> Option<Self> {
+        args.window_size.map(|window_size| Self {
+            window_size,
+            iterations: args.iterations.unwrap_or(1),
+            slide_step: args.slide_step.unwrap_or(window_size),
+            keep_last_only: args.keep_last_only,
+        })
+    }
+
+    /// Calculate window boundaries for given total precomputes
+    fn calculate_windows(&self, total: usize) -> Vec<(usize, usize)> {
+        let mut windows = Vec::new();
+        for i in 0..self.iterations {
+            let start = i * self.slide_step;
+            if start >= total {
+                break;
+            }
+            let end = std::cmp::min(start + self.window_size, total);
+            if start < end {
+                windows.push((start, end));
+            }
+        }
+        windows
+    }
+}
+
+/// Statistics for a single window merge operation
+#[derive(Debug, Clone)]
+struct WindowStats {
+    precompute_count: usize,
+    merge_time: std::time::Duration,
+}
+
+/// Statistics for windowed merge operations
+#[derive(Debug, Default)]
+struct WindowMergeStatistics {
+    window_stats: HashMap<Option<KeyByLabelValues>, Vec<WindowStats>>,
+    total_windows: usize,
+    total_merges: usize,
+    total_merge_time: std::time::Duration,
+}
+
+impl WindowMergeStatistics {
+    fn new() -> Self {
+        Self::default()
+    }
+
+    fn add_window_stat(&mut self, key: Option<KeyByLabelValues>, stat: WindowStats) {
+        self.total_windows += 1;
+        self.total_merges += stat.precompute_count;
+        self.total_merge_time += stat.merge_time;
+
+        self.window_stats.entry(key).or_default().push(stat);
+    }
+}
+
+/// Validate window-related CLI arguments
+fn validate_window_args(args: &Args) -> Result<(), Box<dyn std::error::Error>> {
+    if let Some(ws) = args.window_size {
+        if ws == 0 {
+            return Err("window_size must be greater than 0".into());
+        }
+        if let Some(step) = args.slide_step {
+            if step == 0 {
+                return Err("slide_step must be greater than 0".into());
+            }
+        }
+    } else {
+        // Ensure window-related args not used without window_size
+        if args.iterations.is_some() {
+            return Err("iterations requires window_size".into());
+        }
+        if args.slide_step.is_some() {
+            return Err("slide_step requires window_size".into());
+        }
+    }
+    Ok(())
+}
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // 1. Parse CLI arguments
+    let args = Args::parse();
+
+    // 2. Validate window arguments
+    validate_window_args(&args)?;
+
+    // 3. Initialize logging
+    init_logging(args.verbose);
+
+    // 4. Parse window configuration
+    let window_config = WindowConfig::from_args(&args);
+
+    // 5. Load precomputes from file
+    println!("Loading precomputes from: {:?}", args.input_file);
+    let (precomputes, stats) = load_precomputes_from_file(&args.input_file, args.max_records)?;
+
+    // 6. Display load statistics
+    print_load_statistics(&stats);
+
+    // 7. Group precomputes by key for testing
+    let grouped_precomputes = group_precomputes_by_key(precomputes);
+
+    // 8. Run tests based on mode
+    match args.mode.as_str() {
+        "merge" => {
+            println!("\n=== TESTING MERGE FUNCTIONALITY ===\n");
+            test_merge_functionality(
+                &grouped_precomputes,
+                parse_pattern_type(&args.pattern_type),
+                &args.aggregation_type,
+                window_config,
+            )?;
+        }
+        "query" => {
+            if window_config.is_some() {
+                println!("Warning: window parameters ignored in 'query' mode");
+            }
+            println!("\n=== TESTING QUERY FUNCTIONALITY ===\n");
+            test_query_functionality(
+                &grouped_precomputes,
+                parse_statistic(&args.statistic)?,
+                build_query_kwargs(&args),
+            )?;
+        }
+        "both" => {
+            println!("\n=== TESTING MERGE FUNCTIONALITY ===\n");
+            let merged = test_merge_functionality(
+                &grouped_precomputes,
+                parse_pattern_type(&args.pattern_type),
+                &args.aggregation_type,
+                window_config,
+            )?;
+
+            println!("\n=== TESTING QUERY FUNCTIONALITY ===\n");
+            test_query_on_merged(
+                &merged,
+                parse_statistic(&args.statistic)?,
+                build_query_kwargs(&args),
+            )?;
+        }
+        _ => {
+            return Err(format!("Invalid mode: {}", args.mode).into());
+        }
+    }
+
+    println!("\n=== TESTING COMPLETE ===");
+    Ok(())
+}
+
+/// Load precomputes from a MessagePack dump file
+///
+/// File format (from precompute_dumper.rs):
+/// - 4 bytes: length prefix (u32, little-endian)
+/// - N bytes: MessagePack-serialized PrecomputeDumpRaw
+/// - Repeat...
+fn load_precomputes_from_file(
+    file_path: &PathBuf,
+    max_records: Option<usize>,
+) -> Result<(Vec<LoadedPrecompute>, LoadStatistics), Box<dyn std::error::Error>> {
+    let file = File::open(file_path)?;
+    let mut reader = BufReader::new(file);
+    let mut precomputes = Vec::new();
+    let mut stats = LoadStatistics::default();
+
+    let mut count = 0;
+    loop {
+        // Check if we've reached max_records
+        if let Some(max) = max_records {
+            if count >= max {
+                println!("Reached max_records limit: {}", max);
+                break;
+            }
+        }
+
+        // Read length prefix (4 bytes, little-endian)
+        let mut length_bytes = [0u8; 4];
+        match reader.read_exact(&mut length_bytes) {
+            Ok(_) => {}
+            Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
+                // End of file reached
+                break;
+            }
+            Err(e) => return Err(e.into()),
+        }
+
+        let length = u32::from_le_bytes(length_bytes) as usize;
+
+        // Read the serialized data
+        let mut data_bytes = vec![0u8; length];
+        reader.read_exact(&mut data_bytes)?;
+
+        // Deserialize from MessagePack
+        let dump: PrecomputeDumpRaw = rmp_serde::from_slice(&data_bytes)
+            .map_err(|e| format!("Failed to deserialize record {}: {}", count, e))?;
+
+        // Deserialize accumulator from bytes
+        let accumulator =
+            deserialize_accumulator(&dump.accumulator_type, &dump.accumulator_data_bytes)?;
+
+        // Update statistics
+        update_statistics(&mut stats, &dump);
+
+        // Create loaded precompute
+        precomputes.push(LoadedPrecompute {
+            metadata: dump.metadata,
+            accumulator,
+        });
+
+        count += 1;
+        if count % 10000 == 0 {
+            println!("Loaded {} precomputes...", count);
+        }
+    }
+
+    stats.total_records = count;
+    println!("Total precomputes loaded: {}", count);
+
+    Ok((precomputes, stats))
+}
+
+/// Deserialize accumulator from bytes based on type
+/// Only supports accumulators with deserialize_from_bytes_arroyo method
+fn deserialize_accumulator(
+    accumulator_type: &str,
+    bytes: &[u8],
+) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error>> {
+    match accumulator_type {
+        "SumAccumulator" => Ok(Box::new(
+            sum_accumulator::SumAccumulator::deserialize_from_bytes_arroyo(bytes)?,
+        )),
+        "MultipleIncreaseAccumulator" => Ok(Box::new(
+            multiple_increase_accumulator::MultipleIncreaseAccumulator::deserialize_from_bytes_arroyo(bytes)?,
+        )),
+        "CountMinSketchAccumulator" => Ok(Box::new(
+            count_min_sketch_accumulator::CountMinSketchAccumulator::deserialize_from_bytes_arroyo(bytes)?,
+        )),
+        "CountMinSketchWithHeapAccumulator" => Ok(Box::new(
+            count_min_sketch_with_heap_accumulator::CountMinSketchWithHeapAccumulator::deserialize_from_bytes_arroyo(
+                bytes,
+            )?,
+        )),
+        "DatasketchesKLLAccumulator" => Ok(Box::new(
+            datasketches_kll_accumulator::DatasketchesKLLAccumulator::deserialize_from_bytes_arroyo(bytes)?,
+        )),
+        "DeltaSetAggregatorAccumulator" => Ok(Box::new(
+            delta_set_aggregator_accumulator::DeltaSetAggregatorAccumulator::deserialize_from_bytes_arroyo(
+                bytes,
+            )?,
+        )),
+        "SetAggregatorAccumulator" => Ok(Box::new(
+            set_aggregator_accumulator::SetAggregatorAccumulator::deserialize_from_bytes_arroyo(bytes)?,
+        )),
+        _ => Err(format!("Unsupported accumulator type: {} (only Arroyo-based accumulators supported)", accumulator_type).into()),
+    }
+}
+
+/// Group precomputes by their key to prepare for merging
+///
+/// This simulates how simple_engine.rs groups precomputes from the store
+/// into HashMap<Option<KeyByLabelValues>, Vec<Box<dyn AggregateCore>>>
+///
+/// Returns: HashMap<Option<KeyByLabelValues>, Vec<Box<dyn AggregateCore>>>
+fn group_precomputes_by_key(
+    precomputes: Vec<LoadedPrecompute>,
+) -> HashMap<Option<KeyByLabelValues>, Vec<Box<dyn AggregateCore>>> {
+    let mut grouped: HashMap<Option<KeyByLabelValues>, Vec<Box<dyn AggregateCore>>> =
+        HashMap::new();
+
+    for precompute in precomputes {
+        grouped
+            .entry(precompute.metadata.key.clone())
+            .or_default()
+            .push(precompute.accumulator);
+    }
+
+    println!(
+        "Grouped {} precomputes into {} unique keys",
+        grouped.values().map(|v| v.len()).sum::<usize>(),
+        grouped.len()
+    );
+
+    grouped
+}
+
+/// Test merge_precomputed_outputs functionality with optional windowing
+/// This replicates the logic from simple_engine.rs:1334-1409
+///
+/// Reference: SimpleEngine::merge_precomputed_outputs
+fn test_merge_functionality(
+    grouped_precomputes: &HashMap<Option<KeyByLabelValues>, Vec<Box<dyn AggregateCore>>>,
+    query_pattern_type: QueryPatternType,
+    aggregation_type: &str,
+    window_config: Option<WindowConfig>,
+) -> Result<MergedPrecomputes, Box<dyn std::error::Error>> {
+    println!("Testing merge with pattern type: {:?}", query_pattern_type);
+    println!("Aggregation type: {}", aggregation_type);
+
+    if let Some(ref config) = window_config {
+        println!("\n=== WINDOWED MERGE CONFIGURATION ===");
+        println!("Window size: {}", config.window_size);
+        println!("Iterations: {}", config.iterations);
+        println!("Slide step: {}", config.slide_step);
+
+        test_merge_with_windows(
+            grouped_precomputes,
+            query_pattern_type,
+            aggregation_type,
+            config,
+        )
+    } else {
+        println!("Mode: Standard (merge all)");
+        test_merge_all(grouped_precomputes, query_pattern_type, aggregation_type)
+    }
+}
+
+/// Merge all precomputes for each key (standard mode)
+fn test_merge_all(
+    grouped_precomputes: &HashMap<Option<KeyByLabelValues>, Vec<Box<dyn AggregateCore>>>,
+    query_pattern_type: QueryPatternType,
+    aggregation_type: &str,
+) -> Result<MergedPrecomputes, Box<dyn std::error::Error>> {
+    let mut merged_results = HashMap::new();
+    let mut merge_times = Vec::new();
+
+    for (key, precomputes) in grouped_precomputes.iter() {
+        if precomputes.is_empty() {
+            continue;
+        }
+
+        let start = std::time::Instant::now();
+
+        let merged = if should_merge(query_pattern_type, aggregation_type) {
+            merge_accumulators(precomputes)?
+        } else {
+            println!("  No merge needed, taking single precompute");
+            assert_eq!(
+                precomputes.len(),
+                1,
+                "Expected exactly 1 precompute for spatial query without DeltaSetAggregator"
+            );
+            precomputes[0].clone()
+        };
+
+        let elapsed = start.elapsed();
+        merge_times.push(elapsed);
+
+        println!(
+            "  Merge completed in {:.2}ms",
+            elapsed.as_secs_f64() * 1000.0
+        );
+        println!("  Result type: {}", merged.get_accumulator_type());
+
+        merged_results.insert(key.clone(), merged);
+    }
+
+    // Print statistics
+    if !merge_times.is_empty() {
+        let total: std::time::Duration = merge_times.iter().sum();
+        let avg = total / merge_times.len() as u32;
+        println!("\n=== Merge Statistics ===");
+        println!("Total merges: {}", merge_times.len());
+        println!("Total time: {:.2}ms", total.as_secs_f64() * 1000.0);
+        println!("Average time: {:.2}ms", avg.as_secs_f64() * 1000.0);
+    }
+
+    Ok(merged_results)
+}
+
+/// Merge precomputes using sliding windows
+fn test_merge_with_windows(
+    grouped_precomputes: &HashMap<Option<KeyByLabelValues>, Vec<Box<dyn AggregateCore>>>,
+    query_pattern_type: QueryPatternType,
+    aggregation_type: &str,
+    config: &WindowConfig,
+) -> Result<MergedPrecomputes, Box<dyn std::error::Error>> {
+    let mut final_results = HashMap::new();
+    let mut window_stats = WindowMergeStatistics::new();
+
+    println!(
+        "\nProcessing {} keys with sliding window merge",
+        grouped_precomputes.len()
+    );
+
+    for (key, precomputes) in grouped_precomputes.iter() {
+        if precomputes.is_empty() {
+            continue;
+        }
+
+        //println!("\nKey: {:?}", key);
+        //println!("Total precomputes: {}", precomputes.len());
+
+        // Calculate window boundaries
+        let windows = config.calculate_windows(precomputes.len());
+
+        if windows.is_empty() {
+            println!("  Warning: No valid windows for this key");
+            continue;
+        }
+
+        //println!("  Windows to process: {}", windows.len());
+
+        // Process each window
+        let mut window_results = Vec::new();
+
+        for (window_idx, (start_idx, end_idx)) in windows.iter().enumerate() {
+            //println!("  Window {}/{}: merging precomputes [{}..{}] ({} items)",
+            //         window_idx + 1, windows.len(), start_idx, end_idx, end_idx - start_idx);
+
+            let window_start = std::time::Instant::now();
+
+            // Extract window slice
+            let window_slice = &precomputes[*start_idx..*end_idx];
+
+            // Perform merge if needed
+            let merged = if should_merge(query_pattern_type, aggregation_type) {
+                merge_accumulators(window_slice)?
+            } else {
+                // For spatial queries without DeltaSetAggregator
+                if window_slice.len() != 1 {
+                    println!("    Warning: Expected 1 precompute for spatial query, got {}. Taking first.", window_slice.len());
+                }
+                window_slice[0].clone()
+            };
+
+            let window_elapsed = window_start.elapsed();
+
+            // Record statistics
+            let stat = WindowStats {
+                precompute_count: end_idx - start_idx,
+                merge_time: window_elapsed,
+            };
+
+            window_stats.add_window_stat(key.clone(), stat);
+
+            //println!("    Window merge time: {:.2}ms", window_elapsed.as_secs_f64() * 1000.0);
+            //println!("    Result type: {}", merged.get_accumulator_type());
+
+            // Store result
+            if config.keep_last_only {
+                // Only keep the last window's result
+                if window_idx == windows.len() - 1 {
+                    window_results.push(merged);
+                }
+            } else {
+                // Keep all window results
+                window_results.push(merged);
+            }
+        }
+
+        // Store final result for this key
+        if !window_results.is_empty() {
+            // For query testing, we use the last result
+            let final_result = window_results.into_iter().last().unwrap();
+            final_results.insert(key.clone(), final_result);
+        }
+    }
+
+    // Print comprehensive statistics
+    print_window_merge_statistics(&window_stats);
+
+    Ok(final_results)
+}
+
+/// Print detailed statistics for windowed merge operations
+fn print_window_merge_statistics(stats: &WindowMergeStatistics) {
+    println!("\n=== WINDOWED MERGE STATISTICS ===");
+    println!("Total windows processed: {}", stats.total_windows);
+    println!("Total precomputes merged: {}", stats.total_merges);
+    println!(
+        "Total merge time: {:.2}ms",
+        stats.total_merge_time.as_secs_f64() * 1000.0
+    );
+
+    if stats.total_windows > 0 {
+        let avg_window_time = stats.total_merge_time / stats.total_windows as u32;
+        println!(
+            "Average time per window: {:.2}ms",
+            avg_window_time.as_secs_f64() * 1000.0
+        );
+    }
+
+    // Per-key breakdown
+    println!("\n=== PER-KEY STATISTICS ===");
+    for key_stats in stats.window_stats.values() {
+        //println!("\nKey: {:?}", key);
+        //println!("  Windows: {}", key_stats.len());
+
+        let total_precomputes: usize = key_stats.iter().map(|s| s.precompute_count).sum();
+        let total_time: std::time::Duration = key_stats.iter().map(|s| s.merge_time).sum();
+
+        println!("  Total precomputes: {}", total_precomputes);
+        println!("  Total time: {:.2}ms", total_time.as_secs_f64() * 1000.0);
+
+        if !key_stats.is_empty() {
+            let avg_time = total_time / key_stats.len() as u32;
+            println!(
+                "  Average time per window: {:.2}ms",
+                avg_time.as_secs_f64() * 1000.0
+            );
+        }
+    }
+}
+
+/// Determine if merging should happen based on pattern type and aggregation type
+/// Mirrors logic from simple_engine.rs:1360-1395
+fn should_merge(pattern_type: QueryPatternType, aggregation_type: &str) -> bool {
+    match pattern_type {
+        QueryPatternType::OnlyTemporal | QueryPatternType::OneTemporalOneSpatial => true,
+        QueryPatternType::OnlySpatial => aggregation_type == "DeltaSetAggregator",
+    }
+}
+
+/// Merge multiple accumulators
+/// This replicates simple_engine.rs:1413-1441
+///
+/// Reference: SimpleEngine::merge_accumulators
+fn merge_accumulators(
+    accumulators: &[Box<dyn AggregateCore>],
+) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error>> {
+    if accumulators.is_empty() {
+        return Err("No accumulators to merge".into());
+    }
+
+    if accumulators.len() == 1 {
+        return Ok(accumulators[0].clone());
+    }
+
+    let mut result = accumulators[0].clone();
+
+    for (i, accumulator) in accumulators[1..].iter().enumerate() {
+        //println!("    Merging accumulator {} of {}", i + 2, accumulators.len());
+        match result.merge_with(accumulator.as_ref()) {
+            Ok(merged) => {
+                result = merged;
+            }
+            Err(e) => {
+                eprintln!("    Warning: Failed to merge accumulator {}: {}", i + 2, e);
+                // Continue with current result
+            }
+        }
+    }
+
+    Ok(result)
+}
+
+/// Test query_precompute_for_statistic functionality on merged results
+fn test_query_on_merged(
+    merged_precomputes: &HashMap<Option<KeyByLabelValues>, Box<dyn AggregateCore>>,
+    statistic: Statistic,
+    query_kwargs: HashMap<String, String>,
+) -> Result<(), Box<dyn std::error::Error>> {
+    println!("Testing query with statistic: {:?}", statistic);
+    println!("Query kwargs: {:?}", query_kwargs);
+
+    let mut query_results = Vec::new();
+
+    for (idx, (key, precompute)) in merged_precomputes.iter().enumerate() {
+        println!(
+            "\n--- Querying key {} of {} ---",
+            idx + 1,
+            merged_precomputes.len()
+        );
+        println!("Key: {:?}", key);
+        println!("Accumulator type: {}", precompute.get_accumulator_type());
+
+        let start = std::time::Instant::now();
+
+        let result =
+            query_precompute_for_statistic(precompute.as_ref(), &statistic, key, &query_kwargs)?;
+
+        let elapsed = start.elapsed();
+
+        println!("  Query result: {}", result);
+        println!("  Query time: {:.2}μs", elapsed.as_micros());
+
+        query_results.push((key.clone(), result));
+    }
+
+    // Print summary
+    println!("\n=== Query Results Summary ===");
+    println!("Total results: {}", query_results.len());
+    for (key, value) in &query_results {
+        println!("  {:?} => {}", key, value);
+    }
+
+    Ok(())
+}
+
+/// Also test querying functionality on ungrouped precomputes
+fn test_query_functionality(
+    grouped_precomputes: &HashMap<Option<KeyByLabelValues>, Vec<Box<dyn AggregateCore>>>,
+    statistic: Statistic,
+    query_kwargs: HashMap<String, String>,
+) -> Result<(), Box<dyn std::error::Error>> {
+    println!("Testing query on individual (unmerged) precomputes");
+    println!("Statistic: {:?}", statistic);
+
+    for (key, precomputes) in grouped_precomputes {
+        println!(
+            "\n--- Key: {:?} ({} precomputes) ---",
+            key,
+            precomputes.len()
+        );
+
+        for (i, precompute) in precomputes.iter().enumerate() {
+            println!(
+                "  Precompute {}: type = {}",
+                i,
+                precompute.get_accumulator_type()
+            );
+
+            let result = query_precompute_for_statistic(
+                precompute.as_ref(),
+                &statistic,
+                key,
+                &query_kwargs,
+            )?;
+
+            println!("    Result: {}", result);
+        }
+    }
+
+    Ok(())
+}
+
+/// Query a precompute for a specific statistic
+/// Only supports Arroyo-based accumulators
+fn query_precompute_for_statistic(
+    precompute: &dyn AggregateCore,
+    statistic: &Statistic,
+    key: &Option<KeyByLabelValues>,
+    query_kwargs: &HashMap<String, String>,
+) -> Result<f64, Box<dyn std::error::Error>> {
+    match precompute.get_accumulator_type() {
+        "SumAccumulator" => {
+            let acc = precompute
+                .as_any()
+                .downcast_ref::<sum_accumulator::SumAccumulator>()
+                .ok_or("Failed to downcast to SumAccumulator")?;
+            use query_engine_rust::data_model::SingleSubpopulationAggregate;
+            acc.query(*statistic, None)
+                .map_err(|e| format!("{}", e).into())
+        }
+        "MultipleIncreaseAccumulator" => {
+            let acc = precompute
+                .as_any()
+                .downcast_ref::<multiple_increase_accumulator::MultipleIncreaseAccumulator>()
+                .ok_or("Failed to downcast to MultipleIncreaseAccumulator")?;
+            let key_val = key
+                .as_ref()
+                .ok_or("Key required for MultipleIncreaseAccumulator")?;
+            use query_engine_rust::data_model::MultipleSubpopulationAggregate;
+            acc.query(*statistic, key_val, Some(query_kwargs))
+                .map_err(|e| format!("{}", e).into())
+        }
+        "CountMinSketchAccumulator" => {
+            let acc = precompute
+                .as_any()
+                .downcast_ref::<count_min_sketch_accumulator::CountMinSketchAccumulator>()
+                .ok_or("Failed to downcast to CountMinSketchAccumulator")?;
+            let key_val = key
+                .as_ref()
+                .ok_or("Key required for CountMinSketchAccumulator")?;
+            use query_engine_rust::data_model::MultipleSubpopulationAggregate;
+            acc.query(*statistic, key_val, Some(query_kwargs))
+                .map_err(|e| format!("{}", e).into())
+        }
+        "CountMinSketchWithHeapAccumulator" => {
+            let acc = precompute
+                .as_any()
+                .downcast_ref::<count_min_sketch_with_heap_accumulator::CountMinSketchWithHeapAccumulator>()
+                .ok_or("Failed to downcast to CountMinSketchWithHeapAccumulator")?;
+            let key_val = key
+                .as_ref()
+                .ok_or("Key required for CountMinSketchWithHeapAccumulator")?;
+            use query_engine_rust::data_model::MultipleSubpopulationAggregate;
+            acc.query(*statistic, key_val, Some(query_kwargs))
+                .map_err(|e| format!("{}", e).into())
+        }
+        "DatasketchesKLLAccumulator" => {
+            let acc = precompute
+                .as_any()
+                .downcast_ref::<datasketches_kll_accumulator::DatasketchesKLLAccumulator>()
+                .ok_or("Failed to downcast to DatasketchesKLLAccumulator")?;
+            use query_engine_rust::data_model::SingleSubpopulationAggregate;
+            acc.query(*statistic, Some(query_kwargs))
+                .map_err(|e| format!("{}", e).into())
+        }
+        "DeltaSetAggregatorAccumulator" => {
+            let acc = precompute
+                .as_any()
+                .downcast_ref::<delta_set_aggregator_accumulator::DeltaSetAggregatorAccumulator>()
+                .ok_or("Failed to downcast to DeltaSetAggregatorAccumulator")?;
+            if let Some(key_val) = key {
+                use query_engine_rust::data_model::MultipleSubpopulationAggregate;
+                acc.query(*statistic, key_val, Some(query_kwargs))
+                    .map_err(|e| format!("{}", e).into())
+            } else {
+                Ok((acc.added.union(&acc.removed).count()) as f64)
+            }
+        }
+        "SetAggregatorAccumulator" => {
+            let acc = precompute
+                .as_any()
+                .downcast_ref::<set_aggregator_accumulator::SetAggregatorAccumulator>()
+                .ok_or("Failed to downcast to SetAggregatorAccumulator")?;
+            if let Some(key_val) = key {
+                use query_engine_rust::data_model::MultipleSubpopulationAggregate;
+                acc.query(*statistic, key_val, Some(query_kwargs))
+                    .map_err(|e| format!("{}", e).into())
+            } else {
+                Ok(acc.added.len() as f64)
+            }
+        }
+        _ => Err(format!(
+            "Unsupported accumulator type: {}",
+            precompute.get_accumulator_type()
+        )
+        .into()),
+    }
+}
+
+/// Initialize logging based on verbosity
+fn init_logging(verbose: bool) {
+    use tracing_subscriber;
+
+    let level = if verbose {
+        tracing::Level::DEBUG
+    } else {
+        tracing::Level::INFO
+    };
+
+    tracing_subscriber::fmt().with_max_level(level).init();
+}
+
+/// Update statistics during loading
+fn update_statistics(stats: &mut LoadStatistics, dump: &PrecomputeDumpRaw) {
+    // Count by type
+    *stats
+        .records_by_type
+        .entry(dump.accumulator_type.clone())
+        .or_insert(0) += 1;
+
+    // Count by aggregation_id
+    *stats
+        .records_by_aggregation_id
+        .entry(dump.metadata.aggregation_id)
+        .or_insert(0) += 1;
+
+    // Track time range
+    if stats.time_range.0 == 0 || dump.metadata.start_timestamp < stats.time_range.0 {
+        stats.time_range.0 = dump.metadata.start_timestamp;
+    }
+    if dump.metadata.end_timestamp > stats.time_range.1 {
+        stats.time_range.1 = dump.metadata.end_timestamp;
+    }
+}
+
+/// Print load statistics
+fn print_load_statistics(stats: &LoadStatistics) {
+    println!("\n=== Load Statistics ===");
+    println!("Total records: {}", stats.total_records);
+
+    println!("\nRecords by accumulator type:");
+    for (acc_type, count) in &stats.records_by_type {
+        println!("  {}: {}", acc_type, count);
+    }
+
+    println!("\nRecords by aggregation ID:");
+    for (agg_id, count) in &stats.records_by_aggregation_id {
+        println!("  Aggregation {}: {}", agg_id, count);
+    }
+
+    println!("\nTime range:");
+    println!("  Start: {}", stats.time_range.0);
+    println!("  End: {}", stats.time_range.1);
+    println!("  Duration: {} ms", stats.time_range.1 - stats.time_range.0);
+}
+
+/// Parse pattern type string to enum
+fn parse_pattern_type(s: &str) -> QueryPatternType {
+    match s.to_lowercase().as_str() {
+        "temporal" | "only_temporal" => QueryPatternType::OnlyTemporal,
+        "spatial" | "only_spatial" => QueryPatternType::OnlySpatial,
+        "temporal_spatial" | "one_temporal_one_spatial" => QueryPatternType::OneTemporalOneSpatial,
+        _ => {
+            eprintln!("Unknown pattern type '{}', defaulting to OnlyTemporal", s);
+            QueryPatternType::OnlyTemporal
+        }
+    }
+}
+
+/// Parse statistic string to enum
+fn parse_statistic(s: &str) -> Result<Statistic, Box<dyn std::error::Error>> {
+    s.parse::<Statistic>()
+        .map_err(|_| format!("Invalid statistic: {}", s).into())
+}
+
+/// Build query kwargs from CLI args
+fn build_query_kwargs(args: &Args) -> HashMap<String, String> {
+    let mut kwargs = HashMap::new();
+
+    if let Some(ref quantile) = args.quantile {
+        kwargs.insert("quantile".to_string(), quantile.clone());
+    }
+
+    kwargs
+}
diff --git a/QueryEngineRust/src/commenting_out_flink_diff b/QueryEngineRust/src/commenting_out_flink_diff
new file mode 100644
index 0000000..50c1763
--- /dev/null
+++ b/QueryEngineRust/src/commenting_out_flink_diff
@@ -0,0 +1,1032 @@
+diff --git a/src/data_model/enums.rs b/src/data_model/enums.rs
+index b04e3bf..c04e9c7 100644
+--- a/src/data_model/enums.rs
++++ b/src/data_model/enums.rs
+@@ -6,6 +6,6 @@ pub enum InputFormat {
+
+ #[derive(clap::ValueEnum, Clone, Debug)]
+ pub enum StreamingEngine {
+-    Flink,
++    // Flink,
+     Arroyo,
+ }
+diff --git a/src/data_model/precomputed_output.rs b/src/data_model/precomputed_output.rs
+index f4ca3dd..b0f749f 100644
+--- a/src/data_model/precomputed_output.rs
++++ b/src/data_model/precomputed_output.rs
+@@ -254,61 +254,61 @@ impl PrecomputedOutput {
+         streaming_engine: &str,
+     ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+         match streaming_engine {
+-            "flink" => Self::deserialize_from_json_flink(data, streaming_config),
++            // "flink" => Self::deserialize_from_json_flink(data, streaming_config),
+             "arroyo" => Self::deserialize_from_json_arroyo(data, streaming_config),
+             _ => Err(format!("Unknown streaming engine: {streaming_engine}").into()),
+         }
+     }
+
+-    /// Deserialization for Flink streaming engine
+-    pub fn deserialize_from_json_flink(
+-        data: &serde_json::Value,
+-        streaming_config: &HashMap<u64, AggregationConfig>,
+-    ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+-        let aggregation_id = data
+-            .get("aggregation_id")
+-            .and_then(|v| v.as_u64())
+-            .ok_or("Missing or invalid 'aggregation_id' field")?;
+-
+-        let start_timestamp = data
+-            .get("start_timestamp")
+-            .and_then(|v| v.as_u64())
+-            .ok_or("Missing or invalid 'start_timestamp' field")?;
+-
+-        let end_timestamp = data
+-            .get("end_timestamp")
+-            .and_then(|v| v.as_u64())
+-            .ok_or("Missing or invalid 'end_timestamp' field")?;
+-
+-        let key = if let Some(key_data) = data.get("key") {
+-            if key_data.is_null() {
+-                None
+-            } else {
+-                Some(KeyByLabelValues::deserialize_from_json(key_data).map_err(
+-                    |e| -> Box<dyn std::error::Error + Send + Sync> {
+-                        format!("Failed to deserialize key: {e}").into()
+-                    },
+-                )?)
+-            }
+-        } else {
+-            None
+-        };
+-
+-        // Get aggregation type from streaming config lookup
+-        let config = streaming_config
+-            .get(&aggregation_id)
+-            .ok_or_else(|| {
+-                format!("Aggregation ID {aggregation_id} not found in streaming config")
+-            })?
+-            .clone();
+-
+-        Ok(Self {
+-            start_timestamp,
+-            end_timestamp,
+-            key,
+-            config,
+-        })
+-    }
++    // /// Deserialization for Flink streaming engine
++    // pub fn deserialize_from_json_flink(
++    //     data: &serde_json::Value,
++    //     streaming_config: &HashMap<u64, AggregationConfig>,
++    // ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
++    //     let aggregation_id = data
++    //         .get("aggregation_id")
++    //         .and_then(|v| v.as_u64())
++    //         .ok_or("Missing or invalid 'aggregation_id' field")?;
++
++    //     let start_timestamp = data
++    //         .get("start_timestamp")
++    //         .and_then(|v| v.as_u64())
++    //         .ok_or("Missing or invalid 'start_timestamp' field")?;
++
++    //     let end_timestamp = data
++    //         .get("end_timestamp")
++    //         .and_then(|v| v.as_u64())
++    //         .ok_or("Missing or invalid 'end_timestamp' field")?;
++
++    //     let key = if let Some(key_data) = data.get("key") {
++    //         if key_data.is_null() {
++    //             None
++    //         } else {
++    //             Some(KeyByLabelValues::deserialize_from_json(key_data).map_err(
++    //                 |e| -> Box<dyn std::error::Error + Send + Sync> {
++    //                     format!("Failed to deserialize key: {e}").into()
++    //                 },
++    //             )?)
++    //         }
++    //     } else {
++    //         None
++    //     };
++
++    //     // Get aggregation type from streaming config lookup
++    //     let config = streaming_config
++    //         .get(&aggregation_id)
++    //         .ok_or_else(|| {
++    //             format!("Aggregation ID {aggregation_id} not found in streaming config")
++    //         })?
++    //         .clone();
++
++    //     Ok(Self {
++    //         start_timestamp,
++    //         end_timestamp,
++    //         key,
++    //         config,
++    //     })
++    // }
+
+     /// Deserialization for Arroyo streaming engine
+     pub fn deserialize_from_json_arroyo(
+diff --git a/src/drivers/kafka_consumer.rs b/src/drivers/kafka_consumer.rs
+index c2d090f..06f1651 100644
+--- a/src/drivers/kafka_consumer.rs
++++ b/src/drivers/kafka_consumer.rs
+@@ -224,105 +224,105 @@ impl<T: Store + Send + Sync + 'static> KafkaConsumer<T> {
+             InputFormat::Json => {
+                 // Handle streaming engine specific logic
+                 match self.config.streaming_engine {
+-                    StreamingEngine::Flink => {
+-                        debug!("Received message of length: {}", payload.len());
++                    // StreamingEngine::Flink => {
++                    //     debug!("Received message of length: {}", payload.len());
+
+-                        let json_data = if self.config.decompress_json {
+-                            // Decompress using gzip
+-                            let mut decoder = GzDecoder::new(payload);
+-                            let mut decompressed = Vec::new();
+-                            match decoder.read_to_end(&mut decompressed) {
+-                                Ok(_) => {
+-                                    debug!(
+-                                        "Decompressed JSON message of length: {}",
+-                                        decompressed.len()
+-                                    );
+-                                    decompressed
+-                                }
+-                                Err(e) => {
+-                                    error!("Error decompressing gzip data: {}", e);
+-                                    return Err(format!("Gzip decompression error: {e}").into());
+-                                }
+-                            }
+-                        } else {
+-                            payload.to_vec()
+-                        };
++                    //     let json_data = if self.config.decompress_json {
++                    //         // Decompress using gzip
++                    //         let mut decoder = GzDecoder::new(payload);
++                    //         let mut decompressed = Vec::new();
++                    //         match decoder.read_to_end(&mut decompressed) {
++                    //             Ok(_) => {
++                    //                 debug!(
++                    //                     "Decompressed JSON message of length: {}",
++                    //                     decompressed.len()
++                    //                 );
++                    //                 decompressed
++                    //             }
++                    //             Err(e) => {
++                    //                 error!("Error decompressing gzip data: {}", e);
++                    //                 return Err(format!("Gzip decompression error: {e}").into());
++                    //             }
++                    //         }
++                    //     } else {
++                    //         payload.to_vec()
++                    //     };
+
+-                        let json_str = match String::from_utf8(json_data) {
+-                            Ok(s) => s,
+-                            Err(e) => {
+-                                error!("Error converting bytes to UTF-8: {}", e);
+-                                return Err(format!("UTF-8 conversion error: {e}").into());
+-                            }
+-                        };
++                    //     let json_str = match String::from_utf8(json_data) {
++                    //         Ok(s) => s,
++                    //         Err(e) => {
++                    //             error!("Error converting bytes to UTF-8: {}", e);
++                    //             return Err(format!("UTF-8 conversion error: {e}").into());
++                    //         }
++                    //     };
+
+-                        let json_parse_start_time = Instant::now();
++                    //     let json_parse_start_time = Instant::now();
+
+-                        let json_dict: serde_json::Value = match serde_json::from_str(&json_str) {
+-                            Ok(dict) => {
+-                                let json_parse_duration = json_parse_start_time.elapsed();
+-                                debug!(
+-                                    "JSON parsing took: {:.2}ms",
+-                                    json_parse_duration.as_secs_f64() * 1000.0
+-                                );
+-                                dict
+-                            }
+-                            Err(e) => {
+-                                error!("Error parsing JSON: {}", e);
+-                                debug!("JSON content: {}", json_str);
+-                                return Err(format!("JSON parsing error: {e}").into());
+-                            }
+-                        };
++                    //     let json_dict: serde_json::Value = match serde_json::from_str(&json_str) {
++                    //         Ok(dict) => {
++                    //             let json_parse_duration = json_parse_start_time.elapsed();
++                    //             debug!(
++                    //                 "JSON parsing took: {:.2}ms",
++                    //                 json_parse_duration.as_secs_f64() * 1000.0
++                    //             );
++                    //             dict
++                    //         }
++                    //         Err(e) => {
++                    //             error!("Error parsing JSON: {}", e);
++                    //             debug!("JSON content: {}", json_str);
++                    //             return Err(format!("JSON parsing error: {e}").into());
++                    //         }
++                    //     };
+
+-                        debug!(
+-                            "Deserializing JSON message: {}, {}, {}",
+-                            json_dict
+-                                .get("aggregation_id")
+-                                .and_then(|v| v.as_u64())
+-                                .unwrap_or(0),
+-                            json_dict
+-                                .get("start_timestamp")
+-                                .and_then(|v| v.as_u64())
+-                                .unwrap_or(0),
+-                            json_dict
+-                                .get("end_timestamp")
+-                                .and_then(|v| v.as_u64())
+-                                .unwrap_or(0)
+-                        );
++                    //     debug!(
++                    //         "Deserializing JSON message: {}, {}, {}",
++                    //         json_dict
++                    //             .get("aggregation_id")
++                    //             .and_then(|v| v.as_u64())
++                    //             .unwrap_or(0),
++                    //         json_dict
++                    //             .get("start_timestamp")
++                    //             .and_then(|v| v.as_u64())
++                    //             .unwrap_or(0),
++                    //         json_dict
++                    //             .get("end_timestamp")
++                    //             .and_then(|v| v.as_u64())
++                    //             .unwrap_or(0)
++                    //     );
+
+-                        let deserialize_start_time = Instant::now();
++                    //     let deserialize_start_time = Instant::now();
+
+-                        match PrecomputedOutput::deserialize_from_json_with_precompute(&json_dict) {
+-                            Ok((output, precompute)) => {
+-                                let deserialize_duration = deserialize_start_time.elapsed();
+-                                debug!(
+-                                    "Deserialization took: {:.2}ms",
+-                                    deserialize_duration.as_secs_f64() * 1000.0
+-                                );
+-                                debug!(
+-                                    "Deserialized item: {}, {}, {}",
+-                                    output.config.aggregation_id,
+-                                    output.start_timestamp,
+-                                    output.end_timestamp
+-                                );
+-                                debug!("Successfully deserialized Flink JSON message with precompute data");
+-                                let total_message_duration = message_start_time.elapsed();
+-                                debug!(
+-                                    "Total message processing took: {:.2}ms",
+-                                    total_message_duration.as_secs_f64() * 1000.0
+-                                );
+-                                Ok(Some((output, precompute)))
+-                            }
+-                            Err(e) => {
+-                                error!(
+-                                    "Error deserializing Flink PrecomputedOutput from JSON with precompute: {}",
+-                                    e
+-                                );
+-                                debug!("JSON content: {}", json_str);
+-                                Err(e)
+-                            }
+-                        }
+-                    }
++                    //     match PrecomputedOutput::deserialize_from_json_with_precompute(&json_dict) {
++                    //         Ok((output, precompute)) => {
++                    //             let deserialize_duration = deserialize_start_time.elapsed();
++                    //             debug!(
++                    //                 "Deserialization took: {:.2}ms",
++                    //                 deserialize_duration.as_secs_f64() * 1000.0
++                    //             );
++                    //             debug!(
++                    //                 "Deserialized item: {}, {}, {}",
++                    //                 output.config.aggregation_id,
++                    //                 output.start_timestamp,
++                    //                 output.end_timestamp
++                    //             );
++                    //             debug!("Successfully deserialized Flink JSON message with precompute data");
++                    //             let total_message_duration = message_start_time.elapsed();
++                    //             debug!(
++                    //                 "Total message processing took: {:.2}ms",
++                    //                 total_message_duration.as_secs_f64() * 1000.0
++                    //             );
++                    //             Ok(Some((output, precompute)))
++                    //         }
++                    //         Err(e) => {
++                    //             error!(
++                    //                 "Error deserializing Flink PrecomputedOutput from JSON with precompute: {}",
++                    //                 e
++                    //             );
++                    //             debug!("JSON content: {}", json_str);
++                    //             Err(e)
++                    //         }
++                    //     }
++                    // }
+                     StreamingEngine::Arroyo => {
+                         // Arroyo messages - gzip decompression is applied at precompute level, not message level
+                         let json_str = match String::from_utf8(payload.to_vec()) {
+diff --git a/src/precompute_operators/count_min_sketch_accumulator.rs b/src/precompute_operators/count_min_sketch_accumulator.rs
+index bcd98e4..658999e 100644
+--- a/src/precompute_operators/count_min_sketch_accumulator.rs
++++ b/src/precompute_operators/count_min_sketch_accumulator.rs
+@@ -14,12 +14,12 @@ use promql_utilities::query_logics::enums::Statistic;
+ pub struct CountMinSketchAccumulator {
+     pub row_num: usize,
+     pub col_num: usize,
+-    pub sketch: Vec<Vec<i32>>,
++    pub sketch: Vec<Vec<f64>>,
+ }
+
+ impl CountMinSketchAccumulator {
+     pub fn new(row_num: usize, col_num: usize) -> Self {
+-        let sketch = vec![vec![0; col_num]; row_num];
++        let sketch = vec![vec![0.0; col_num]; row_num];
+         Self {
+             row_num,
+             col_num,
+@@ -44,7 +44,7 @@ impl CountMinSketchAccumulator {
+         for i in 0..self.row_num {
+             let hash_value = xxh32(key_bytes, i as u32);
+             let col_index = (hash_value as usize) % self.col_num;
+-            self.sketch[i][col_index] += value as i32;
++            self.sketch[i][col_index] += value;
+         }
+     }
+
+@@ -61,7 +61,7 @@ impl CountMinSketchAccumulator {
+         let key_str = key_values.join(";");
+         let key_bytes = key_str.as_bytes();
+
+-        let mut min_value = i32::MAX;
++        let mut min_value = f64::MAX;
+
+         // Query each row and take the minimum
+         for i in 0..self.row_num {
+@@ -70,44 +70,44 @@ impl CountMinSketchAccumulator {
+             min_value = min_value.min(self.sketch[i][col_index]);
+         }
+
+-        min_value as f64
++        min_value
+     }
+
+-    pub fn deserialize_from_json(data: &Value) -> Result<Self, Box<dyn std::error::Error>> {
+-        let row_num = data["row_num"]
+-            .as_u64()
+-            .ok_or("Missing or invalid 'row_num' field")? as usize;
+-        let col_num = data["col_num"]
+-            .as_u64()
+-            .ok_or("Missing or invalid 'col_num' field")? as usize;
+-
+-        let sketch_data = data["sketch"]
+-            .as_array()
+-            .ok_or("Missing or invalid 'sketch' field")?;
+-
+-        let mut sketch = Vec::new();
+-        for row in sketch_data {
+-            let row_array = row.as_array().ok_or("Invalid row in sketch data")?;
+-            let mut sketch_row = Vec::new();
+-            for cell in row_array {
+-                let value = cell.as_i64().ok_or("Invalid cell value in sketch data")? as i32;
+-                sketch_row.push(value);
+-            }
+-            sketch.push(sketch_row);
+-        }
+-
+-        Ok(Self {
+-            row_num,
+-            col_num,
+-            sketch,
+-        })
+-    }
++    // pub fn deserialize_from_json(data: &Value) -> Result<Self, Box<dyn std::error::Error>> {
++    //     let row_num = data["row_num"]
++    //         .as_f64()
++    //         .ok_or("Missing or invalid 'row_num' field")? as usize;
++    //     let col_num = data["col_num"]
++    //         .as_f64()
++    //         .ok_or("Missing or invalid 'col_num' field")? as usize;
++
++    //     let sketch_data = data["sketch"]
++    //         .as_array()
++    //         .ok_or("Missing or invalid 'sketch' field")?;
++
++    //     let mut sketch = Vec::new();
++    //     for row in sketch_data {
++    //         let row_array = row.as_array().ok_or("Invalid row in sketch data")?;
++    //         let mut sketch_row = Vec::new();
++    //         for cell in row_array {
++    //             let value = cell.as_f64().ok_or("Invalid cell value in sketch data")?;
++    //             sketch_row.push(value);
++    //         }
++    //         sketch.push(sketch_row);
++    //     }
++
++    //     Ok(Self {
++    //         row_num,
++    //         col_num,
++    //         sketch,
++    //     })
++    // }
+
+     pub fn deserialize_from_bytes_arroyo(
+         buffer: &[u8],
+     ) -> Result<Self, Box<dyn std::error::Error>> {
+         // Arroyo uses MessagePack format: [sketch_counters, col_num, row_num]
+-        let precompute: (Vec<Vec<i32>>, usize, usize) = rmp_serde::from_slice(buffer)
++        let precompute: (Vec<Vec<f64>>, usize, usize) = rmp_serde::from_slice(buffer)
+             .map_err(|e| format!("Failed to deserialize CountMinSketch from MessagePack: {e}"))?;
+
+         let (sketch_counters, col_num, row_num) = precompute;
+@@ -118,43 +118,46 @@ impl CountMinSketchAccumulator {
+         })
+     }
+
+-    pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+-        if buffer.len() < 8 {
+-            return Err("Buffer too short for row_num and col_num".into());
+-        }
+-
+-        let row_num = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]) as usize;
+-        let col_num = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]) as usize;
+-
+-        let expected_size = 8 + (row_num * col_num * 4);
+-        if buffer.len() < expected_size {
+-            return Err("Buffer too short for sketch data".into());
+-        }
+-
+-        let mut sketch = Vec::new();
+-        let mut offset = 8;
+-
+-        for _ in 0..row_num {
+-            let mut row = Vec::new();
+-            for _ in 0..col_num {
+-                let value = i32::from_le_bytes([
+-                    buffer[offset],
+-                    buffer[offset + 1],
+-                    buffer[offset + 2],
+-                    buffer[offset + 3],
+-                ]);
+-                row.push(value);
+-                offset += 4;
+-            }
+-            sketch.push(row);
+-        }
+-
+-        Ok(Self {
+-            row_num,
+-            col_num,
+-            sketch,
+-        })
+-    }
++    // pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
++    //     if buffer.len() < 8 {
++    //         return Err("Buffer too short for row_num and col_num".into());
++    //     }
++
++    // TODO: this logic will need to be checked for i32 -> f64
++    // Github Issue #11
++
++    //     let row_num = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]) as usize;
++    //     let col_num = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]) as usize;
++
++    //     let expected_size = 8 + (row_num * col_num * 4);
++    //     if buffer.len() < expected_size {
++    //         return Err("Buffer too short for sketch data".into());
++    //     }
++
++    //     let mut sketch = Vec::new();
++    //     let mut offset = 8;
++
++    //     for _ in 0..row_num {
++    //         let mut row = Vec::new();
++    //         for _ in 0..col_num {
++    //             let value = i32::from_le_bytes([
++    //                 buffer[offset],
++    //                 buffer[offset + 1],
++    //                 buffer[offset + 2],
++    //                 buffer[offset + 3],
++    //             ]);
++    //             row.push(value);
++    //             offset += 4;
++    //         }
++    //         sketch.push(row);
++    //     }
++
++    //     Ok(Self {
++    //         row_num,
++    //         col_num,
++    //         sketch,
++    //     })
++    // }
+ }
+
+ impl SerializableToSink for CountMinSketchAccumulator {
+@@ -319,7 +322,7 @@ mod tests {
+         // Check all values are initialized to 0
+         for row in &cms.sketch {
+             for &value in row {
+-                assert_eq!(value, 0);
++                assert_eq!(value, 0.0);
+             }
+         }
+     }
+@@ -356,17 +359,17 @@ mod tests {
+         let mut cms2 = CountMinSketchAccumulator::new(2, 3);
+
+         // Set some values
+-        cms1.sketch[0][0] = 5;
+-        cms1.sketch[1][2] = 10;
++        cms1.sketch[0][0] = 5.0;
++        cms1.sketch[1][2] = 10.0;
+
+-        cms2.sketch[0][0] = 3;
+-        cms2.sketch[0][1] = 7;
++        cms2.sketch[0][0] = 3.0;
++        cms2.sketch[0][1] = 7.0;
+
+         let merged = CountMinSketchAccumulator::merge_accumulators(vec![cms1, cms2]).unwrap();
+
+-        assert_eq!(merged.sketch[0][0], 8); // 5 + 3
+-        assert_eq!(merged.sketch[0][1], 7); // 0 + 7
+-        assert_eq!(merged.sketch[1][2], 10); // 10 + 0
++        assert_eq!(merged.sketch[0][0], 8.0); // 5 + 3
++        assert_eq!(merged.sketch[0][1], 7.0); // 0 + 7
++        assert_eq!(merged.sketch[1][2], 10.0); // 10 + 0
+     }
+
+     #[test]
+@@ -378,30 +381,30 @@ mod tests {
+         assert!(result.is_err());
+     }
+
+-    #[test]
+-    fn test_count_min_sketch_serialization() {
+-        let mut cms = CountMinSketchAccumulator::new(2, 3);
+-        cms.sketch[0][1] = 42;
+-        cms.sketch[1][2] = 100;
+-
+-        // Test JSON serialization
+-        let json_value = cms.serialize_to_json();
+-        let deserialized = CountMinSketchAccumulator::deserialize_from_json(&json_value).unwrap();
+-
+-        assert_eq!(deserialized.row_num, 2);
+-        assert_eq!(deserialized.col_num, 3);
+-        assert_eq!(deserialized.sketch[0][1], 42);
+-        assert_eq!(deserialized.sketch[1][2], 100);
+-
+-        // Test binary serialization
+-        let bytes = cms.serialize_to_bytes();
+-        let deserialized_bytes = CountMinSketchAccumulator::deserialize_from_bytes(&bytes).unwrap();
+-
+-        assert_eq!(deserialized_bytes.row_num, 2);
+-        assert_eq!(deserialized_bytes.col_num, 3);
+-        assert_eq!(deserialized_bytes.sketch[0][1], 42);
+-        assert_eq!(deserialized_bytes.sketch[1][2], 100);
+-    }
++    // #[test]
++    // fn test_count_min_sketch_serialization() {
++    //     let mut cms = CountMinSketchAccumulator::new(2, 3);
++    //     cms.sketch[0][1] = 42.0;
++    //     cms.sketch[1][2] = 100.0;
++
++    //     // Test JSON serialization
++    //     let json_value = cms.serialize_to_json();
++    //     let deserialized = CountMinSketchAccumulator::deserialize_from_json(&json_value).unwrap();
++
++    //     assert_eq!(deserialized.row_num, 2);
++    //     assert_eq!(deserialized.col_num, 3);
++    //     assert_eq!(deserialized.sketch[0][1], 42.0);
++    //     assert_eq!(deserialized.sketch[1][2], 100.0);
++
++    //     // Test binary serialization
++    //     let bytes = cms.serialize_to_bytes();
++    //     let deserialized_bytes = CountMinSketchAccumulator::deserialize_from_bytes(&bytes).unwrap();
++
++    //     assert_eq!(deserialized_bytes.row_num, 2);
++    //     assert_eq!(deserialized_bytes.col_num, 3);
++    //     assert_eq!(deserialized_bytes.sketch[0][1], 42.0);
++    //     assert_eq!(deserialized_bytes.sketch[1][2], 100.0);
++    // }
+
+     #[test]
+     fn test_count_min_sketch_as_aggregate_core() {
+diff --git a/src/precompute_operators/datasketches_kll_accumulator.rs b/src/precompute_operators/datasketches_kll_accumulator.rs
+index c72b700..3635f8e 100644
+--- a/src/precompute_operators/datasketches_kll_accumulator.rs
++++ b/src/precompute_operators/datasketches_kll_accumulator.rs
+@@ -68,62 +68,62 @@ impl DatasketchesKLLAccumulator {
+         sorted_values[index]
+     }
+
+-    pub fn deserialize_from_json(data: &Value) -> Result<Self, Box<dyn std::error::Error>> {
+-        let max_capacity = data["max_capacity"].as_u64().unwrap_or(1000) as usize;
+-
+-        let values = if let Some(values_array) = data["values"].as_array() {
+-            values_array
+-                .iter()
+-                .map(|v| v.as_f64().unwrap_or(0.0))
+-                .collect()
+-        } else {
+-            Vec::new()
+-        };
+-
+-        Ok(Self {
+-            values,
+-            max_capacity,
+-        })
+-    }
+-
+-    pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+-        if buffer.len() < 8 {
+-            return Err("Buffer too short for max_capacity and values_count".into());
+-        }
+-
+-        let max_capacity =
+-            u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]) as usize;
+-        let values_count =
+-            u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]) as usize;
+-
+-        let expected_size = 8 + (values_count * 8);
+-        if buffer.len() < expected_size {
+-            return Err("Buffer too short for values data".into());
+-        }
+-
+-        let mut values = Vec::new();
+-        let mut offset = 8;
+-
+-        for _ in 0..values_count {
+-            let value = f64::from_le_bytes([
+-                buffer[offset],
+-                buffer[offset + 1],
+-                buffer[offset + 2],
+-                buffer[offset + 3],
+-                buffer[offset + 4],
+-                buffer[offset + 5],
+-                buffer[offset + 6],
+-                buffer[offset + 7],
+-            ]);
+-            values.push(value);
+-            offset += 8;
+-        }
+-
+-        Ok(Self {
+-            values,
+-            max_capacity,
+-        })
+-    }
++    // pub fn deserialize_from_json(data: &Value) -> Result<Self, Box<dyn std::error::Error>> {
++    //     let max_capacity = data["max_capacity"].as_u64().unwrap_or(1000) as usize;
++
++    //     let values = if let Some(values_array) = data["values"].as_array() {
++    //         values_array
++    //             .iter()
++    //             .map(|v| v.as_f64().unwrap_or(0.0))
++    //             .collect()
++    //     } else {
++    //         Vec::new()
++    //     };
++
++    //     Ok(Self {
++    //         values,
++    //         max_capacity,
++    //     })
++    // }
++
++    // pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
++    //     if buffer.len() < 8 {
++    //         return Err("Buffer too short for max_capacity and values_count".into());
++    //     }
++
++    //     let max_capacity =
++    //         u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]) as usize;
++    //     let values_count =
++    //         u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]) as usize;
++
++    //     let expected_size = 8 + (values_count * 8);
++    //     if buffer.len() < expected_size {
++    //         return Err("Buffer too short for values data".into());
++    //     }
++
++    //     let mut values = Vec::new();
++    //     let mut offset = 8;
++
++    //     for _ in 0..values_count {
++    //         let value = f64::from_le_bytes([
++    //             buffer[offset],
++    //             buffer[offset + 1],
++    //             buffer[offset + 2],
++    //             buffer[offset + 3],
++    //             buffer[offset + 4],
++    //             buffer[offset + 5],
++    //             buffer[offset + 6],
++    //             buffer[offset + 7],
++    //         ]);
++    //         values.push(value);
++    //         offset += 8;
++    //     }
++
++    //     Ok(Self {
++    //         values,
++    //         max_capacity,
++    //     })
++    // }
+
+     pub fn deserialize_from_bytes_arroyo(
+         buffer: &[u8],
+diff --git a/src/precompute_operators/delta_set_aggregator_accumulator.rs b/src/precompute_operators/delta_set_aggregator_accumulator.rs
+index 2248748..46a853b 100644
+--- a/src/precompute_operators/delta_set_aggregator_accumulator.rs
++++ b/src/precompute_operators/delta_set_aggregator_accumulator.rs
+@@ -43,114 +43,114 @@ impl DeltaSetAggregatorAccumulator {
+         self.removed.insert(key);
+     }
+
+-    pub fn deserialize_from_json(data: &Value) -> Result<Self, Box<dyn std::error::Error>> {
+-        let mut added = HashSet::new();
+-        let mut removed = HashSet::new();
+-
+-        if let Some(added_array) = data["added"].as_array() {
+-            for item in added_array {
+-                // Handle nested structure with "values" key
+-                let key_data = if let Some(values) = item.get("values") {
+-                    values
+-                } else {
+-                    item
+-                };
+-                let key = KeyByLabelValues::deserialize_from_json(key_data)?;
+-                added.insert(key);
+-            }
+-        }
+-
+-        if let Some(removed_array) = data["removed"].as_array() {
+-            for item in removed_array {
+-                // Handle nested structure with "values" key
+-                let key_data = if let Some(values) = item.get("values") {
+-                    values
+-                } else {
+-                    item
+-                };
+-                let key = KeyByLabelValues::deserialize_from_json(key_data)?;
+-                removed.insert(key);
+-            }
+-        }
+-
+-        Ok(Self { added, removed })
+-    }
+-
+-    pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+-        let mut offset = 0;
+-        let mut added = HashSet::new();
+-        let mut removed = HashSet::new();
+-
+-        // Read added set
+-        if offset + 4 > buffer.len() {
+-            return Err("Buffer too short for added set size".into());
+-        }
+-        let added_size = u32::from_le_bytes([
+-            buffer[offset],
+-            buffer[offset + 1],
+-            buffer[offset + 2],
+-            buffer[offset + 3],
+-        ]) as usize;
+-        offset += 4;
+-
+-        for _ in 0..added_size {
+-            if offset + 4 > buffer.len() {
+-                return Err("Buffer too short for added item size".into());
+-            }
+-            let item_size = u32::from_le_bytes([
+-                buffer[offset],
+-                buffer[offset + 1],
+-                buffer[offset + 2],
+-                buffer[offset + 3],
+-            ]) as usize;
+-            offset += 4;
+-
+-            if offset + item_size > buffer.len() {
+-                return Err("Buffer too short for added item data".into());
+-            }
+-            let key =
+-                KeyByLabelValues::deserialize_from_bytes(&buffer[offset..offset + item_size])?;
+-            offset += item_size;
+-
+-            added.insert(key);
+-        }
+-
+-        // Read removed set
+-        if offset + 4 > buffer.len() {
+-            return Err("Buffer too short for removed set size".into());
+-        }
+-        let removed_size = u32::from_le_bytes([
+-            buffer[offset],
+-            buffer[offset + 1],
+-            buffer[offset + 2],
+-            buffer[offset + 3],
+-        ]) as usize;
+-        offset += 4;
+-
+-        for _ in 0..removed_size {
+-            if offset + 4 > buffer.len() {
+-                return Err("Buffer too short for removed item size".into());
+-            }
+-            let item_size = u32::from_le_bytes([
+-                buffer[offset],
+-                buffer[offset + 1],
+-                buffer[offset + 2],
+-                buffer[offset + 3],
+-            ]) as usize;
+-            offset += 4;
+-
+-            if offset + item_size > buffer.len() {
+-                return Err("Buffer too short for removed item data".into());
+-            }
+-            let key =
+-                KeyByLabelValues::deserialize_from_bytes(&buffer[offset..offset + item_size])?;
+-            offset += item_size;
+-
+-            removed.insert(key);
+-        }
+-
+-        Ok(Self { added, removed })
+-    }
++    // pub fn deserialize_from_json(data: &Value) -> Result<Self, Box<dyn std::error::Error>> {
++    //     let mut added = HashSet::new();
++    //     let mut removed = HashSet::new();
++
++    //     if let Some(added_array) = data["added"].as_array() {
++    //         for item in added_array {
++    //             // Handle nested structure with "values" key
++    //             let key_data = if let Some(values) = item.get("values") {
++    //                 values
++    //             } else {
++    //                 item
++    //             };
++    //             let key = KeyByLabelValues::deserialize_from_json(key_data)?;
++    //             added.insert(key);
++    //         }
++    //     }
++
++    //     if let Some(removed_array) = data["removed"].as_array() {
++    //         for item in removed_array {
++    //             // Handle nested structure with "values" key
++    //             let key_data = if let Some(values) = item.get("values") {
++    //                 values
++    //             } else {
++    //                 item
++    //             };
++    //             let key = KeyByLabelValues::deserialize_from_json(key_data)?;
++    //             removed.insert(key);
++    //         }
++    //     }
++
++    //     Ok(Self { added, removed })
++    // }
++
++    // pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
++    //     let mut offset = 0;
++    //     let mut added = HashSet::new();
++    //     let mut removed = HashSet::new();
++
++    //     // Read added set
++    //     if offset + 4 > buffer.len() {
++    //         return Err("Buffer too short for added set size".into());
++    //     }
++    //     let added_size = u32::from_le_bytes([
++    //         buffer[offset],
++    //         buffer[offset + 1],
++    //         buffer[offset + 2],
++    //         buffer[offset + 3],
++    //     ]) as usize;
++    //     offset += 4;
++
++    //     for _ in 0..added_size {
++    //         if offset + 4 > buffer.len() {
++    //             return Err("Buffer too short for added item size".into());
++    //         }
++    //         let item_size = u32::from_le_bytes([
++    //             buffer[offset],
++    //             buffer[offset + 1],
++    //             buffer[offset + 2],
++    //             buffer[offset + 3],
++    //         ]) as usize;
++    //         offset += 4;
++
++    //         if offset + item_size > buffer.len() {
++    //             return Err("Buffer too short for added item data".into());
++    //         }
++    //         let key =
++    //             KeyByLabelValues::deserialize_from_bytes(&buffer[offset..offset + item_size])?;
++    //         offset += item_size;
++
++    //         added.insert(key);
++    //     }
++
++    //     // Read removed set
++    //     if offset + 4 > buffer.len() {
++    //         return Err("Buffer too short for removed set size".into());
++    //     }
++    //     let removed_size = u32::from_le_bytes([
++    //         buffer[offset],
++    //         buffer[offset + 1],
++    //         buffer[offset + 2],
++    //         buffer[offset + 3],
++    //     ]) as usize;
++    //     offset += 4;
++
++    //     for _ in 0..removed_size {
++    //         if offset + 4 > buffer.len() {
++    //             return Err("Buffer too short for removed item size".into());
++    //         }
++    //         let item_size = u32::from_le_bytes([
++    //             buffer[offset],
++    //             buffer[offset + 1],
++    //             buffer[offset + 2],
++    //             buffer[offset + 3],
++    //         ]) as usize;
++    //         offset += 4;
++
++    //         if offset + item_size > buffer.len() {
++    //             return Err("Buffer too short for removed item data".into());
++    //         }
++    //         let key =
++    //             KeyByLabelValues::deserialize_from_bytes(&buffer[offset..offset + item_size])?;
++    //         offset += item_size;
++
++    //         removed.insert(key);
++    //     }
++
++    //     Ok(Self { added, removed })
++    // }
+
+     pub fn deserialize_from_bytes_arroyo(
+         buffer: &[u8],
+@@ -412,36 +412,35 @@ mod tests {
+     }
+
+     #[test]
+-    fn test_delta_set_aggregator_serialization() {
+-        let mut acc = DeltaSetAggregatorAccumulator::new();
+-
+-        let key1 = create_test_key("web");
+-        let key2 = create_test_key("api");
+-
+-        acc.add_key(key1.clone());
+-        acc.remove_key(key2.clone());
+-
+-        // Test JSON serialization
+-        let json_value = acc.serialize_to_json();
+-        let deserialized =
+-            DeltaSetAggregatorAccumulator::deserialize_from_json(&json_value).unwrap();
+-
+-        assert_eq!(deserialized.added.len(), 1);
+-        assert_eq!(deserialized.removed.len(), 1);
+-        assert!(deserialized.added.contains(&key1));
+-        assert!(deserialized.removed.contains(&key2));
+-
+-        // Test binary serialization
+-        let bytes = acc.serialize_to_bytes();
+-        let deserialized_bytes =
+-            DeltaSetAggregatorAccumulator::deserialize_from_bytes(&bytes).unwrap();
+-
+-        assert_eq!(deserialized_bytes.added.len(), 1);
+-        assert_eq!(deserialized_bytes.removed.len(), 1);
+-        assert!(deserialized_bytes.added.contains(&key1));
+-        assert!(deserialized_bytes.removed.contains(&key2));
+-    }
+-
++    // fn test_delta_set_aggregator_serialization() {
++    //     let mut acc = DeltaSetAggregatorAccumulator::new();
++
++    //     let key1 = create_test_key("web");
++    //     let key2 = create_test_key("api");
++
++    //     acc.add_key(key1.clone());
++    //     acc.remove_key(key2.clone());
++
++    //     // Test JSON serialization
++    //     let json_value = acc.serialize_to_json();
++    //     let deserialized =
++    //         DeltaSetAggregatorAccumulator::deserialize_from_json(&json_value).unwrap();
++
++    //     assert_eq!(deserialized.added.len(), 1);
++    //     assert_eq!(deserialized.removed.len(), 1);
++    //     assert!(deserialized.added.contains(&key1));
++    //     assert!(deserialized.removed.contains(&key2));
++
++    //     // Test binary serialization
++    //     let bytes = acc.serialize_to_bytes();
++    //     let deserialized_bytes =
++    //         DeltaSetAggregatorAccumulator::deserialize_from_bytes(&bytes).unwrap();
++
++    //     assert_eq!(deserialized_bytes.added.len(), 1);
++    //     assert_eq!(deserialized_bytes.removed.len(), 1);
++    //     assert!(deserialized_bytes.added.contains(&key1));
++    //     assert!(deserialized_bytes.removed.contains(&key2));
++    // }
+     #[test]
+     fn test_delta_set_aggregator_query() {
+         let acc = DeltaSetAggregatorAccumulator::new();
diff --git a/QueryEngineRust/src/data_model/aggregation_config.rs b/QueryEngineRust/src/data_model/aggregation_config.rs
new file mode 100644
index 0000000..3480c45
--- /dev/null
+++ b/QueryEngineRust/src/data_model/aggregation_config.rs
@@ -0,0 +1 @@
+pub use sketch_db_common::aggregation_config::*;
diff --git a/QueryEngineRust/src/data_model/aggregation_reference.rs b/QueryEngineRust/src/data_model/aggregation_reference.rs
new file mode 100644
index 0000000..ccd18c5
--- /dev/null
+++ b/QueryEngineRust/src/data_model/aggregation_reference.rs
@@ -0,0 +1,33 @@
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AggregationReference {
+    pub aggregation_id: u64,
+    /// For circular_buffer policy: keep this many most recent aggregates
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub num_aggregates_to_retain: Option<u64>,
+    /// For read_based policy: remove aggregate after this many reads
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub read_count_threshold: Option<u64>,
+}
+
+impl AggregationReference {
+    pub fn new(aggregation_id: u64, num_aggregates_to_retain: Option<u64>) -> Self {
+        Self {
+            aggregation_id,
+            num_aggregates_to_retain,
+            read_count_threshold: None,
+        }
+    }
+
+    pub fn with_read_count_threshold(
+        aggregation_id: u64,
+        read_count_threshold: Option<u64>,
+    ) -> Self {
+        Self {
+            aggregation_id,
+            num_aggregates_to_retain: None,
+            read_count_threshold,
+        }
+    }
+}
diff --git a/QueryEngineRust/src/data_model/enums.rs b/QueryEngineRust/src/data_model/enums.rs
new file mode 100644
index 0000000..92c0d13
--- /dev/null
+++ b/QueryEngineRust/src/data_model/enums.rs
@@ -0,0 +1,32 @@
+#[derive(clap::ValueEnum, Clone, Debug)]
+pub enum InputFormat {
+    Json,
+    Byte,
+}
+
+#[derive(clap::ValueEnum, Clone, Debug)]
+pub enum StreamingEngine {
+    Flink,
+    Arroyo,
+}
+
+pub use sketch_db_common::enums::{CleanupPolicy, QueryLanguage};
+
+#[derive(clap::ValueEnum, Clone, Debug, PartialEq)]
+pub enum QueryProtocol {
+    #[value(alias = "PROMETHEUS_HTTP")]
+    PrometheusHttp,
+    #[value(alias = "CLICKHOUSE_HTTP")]
+    ClickHouseHttp,
+    #[value(alias = "ELASTIC_HTTP")]
+    ElasticHttp,
+    // Future: DuckDbHttp, etc.
+}
+
+#[derive(clap::ValueEnum, Clone, Debug, Copy, PartialEq)]
+pub enum LockStrategy {
+    #[value(name = "global")]
+    Global,
+    #[value(name = "per-key")]
+    PerKey,
+}
diff --git a/QueryEngineRust/src/data_model/inference_config.rs b/QueryEngineRust/src/data_model/inference_config.rs
new file mode 100644
index 0000000..0626fef
--- /dev/null
+++ b/QueryEngineRust/src/data_model/inference_config.rs
@@ -0,0 +1,249 @@
+use anyhow::Result;
+use serde_yaml::Value;
+use std::collections::HashSet;
+use std::fs::File;
+use std::io::BufReader;
+
+use crate::data_model::{CleanupPolicy, PromQLSchema, QueryConfig, QueryLanguage};
+use promql_utilities::data_model::KeyByLabelNames;
+use sql_utilities::sqlhelper::{SQLSchema, Table};
+
+/// Schema configuration that can be either PromQL or SQL format
+#[derive(Debug, Clone)]
+pub enum SchemaConfig {
+    PromQL(PromQLSchema),
+    SQL(SQLSchema),
+    ElasticQueryDSL,
+    ElasticSQL,
+}
+
+#[derive(Debug, Clone)]
+pub struct InferenceConfig {
+    pub schema: SchemaConfig,
+    pub query_configs: Vec<QueryConfig>,
+    pub cleanup_policy: CleanupPolicy,
+}
+
+impl InferenceConfig {
+    pub fn new(query_language: QueryLanguage, cleanup_policy: CleanupPolicy) -> Self {
+        let schema = match query_language {
+            QueryLanguage::promql => SchemaConfig::PromQL(PromQLSchema::new()),
+            QueryLanguage::sql => SchemaConfig::SQL(SQLSchema::new(Vec::new())),
+            QueryLanguage::elastic_querydsl => SchemaConfig::ElasticQueryDSL, // Placeholder for QueryDSL
+            QueryLanguage::elastic_sql => SchemaConfig::ElasticSQL,
+        };
+        Self {
+            schema,
+            query_configs: Vec::new(),
+            cleanup_policy,
+        }
+    }
+
+    pub fn from_yaml_file(yaml_file: &str, query_language: QueryLanguage) -> Result<Self> {
+        let file = File::open(yaml_file)?;
+        let reader = BufReader::new(file);
+        let data: Value = serde_yaml::from_reader(reader)?;
+
+        Self::from_yaml_data(&data, query_language)
+    }
+
+    pub fn from_yaml_data(data: &Value, query_language: QueryLanguage) -> Result<Self> {
+        let schema = match query_language {
+            QueryLanguage::promql => {
+                let promql_schema = Self::parse_promql_schema(data)?;
+                SchemaConfig::PromQL(promql_schema)
+            }
+            QueryLanguage::sql => {
+                let sql_schema = Self::parse_sql_schema(data)?;
+                SchemaConfig::SQL(sql_schema)
+            }
+            QueryLanguage::elastic_querydsl => SchemaConfig::ElasticQueryDSL,
+            QueryLanguage::elastic_sql => SchemaConfig::ElasticSQL,
+        };
+
+        let cleanup_policy = Self::parse_cleanup_policy(data)?;
+        let query_configs = Self::parse_query_configs(data, cleanup_policy)?;
+
+        Ok(Self {
+            schema,
+            query_configs,
+            cleanup_policy,
+        })
+    }
+
+    /// Parse PromQL schema from YAML data (metrics: key)
+    fn parse_promql_schema(data: &Value) -> Result<PromQLSchema> {
+        let mut promql_schema = PromQLSchema::new();
+        if let Some(metrics) = data.get("metrics") {
+            if let Some(metrics_map) = metrics.as_mapping() {
+                for (metric_name_val, labels_val) in metrics_map {
+                    if let (Some(metric_name), Some(labels_seq)) =
+                        (metric_name_val.as_str(), labels_val.as_sequence())
+                    {
+                        let labels: Vec<String> = labels_seq
+                            .iter()
+                            .filter_map(|v| v.as_str())
+                            .map(|s| s.to_string())
+                            .collect();
+                        let key_by_label_names = KeyByLabelNames::new(labels);
+                        promql_schema =
+                            promql_schema.add_metric(metric_name.to_string(), key_by_label_names);
+                    }
+                }
+            }
+        }
+        Ok(promql_schema)
+    }
+
+    /// Parse SQL schema from YAML data (tables: key at top level, matching ArroyoSketch format)
+    fn parse_sql_schema(data: &Value) -> Result<SQLSchema> {
+        let tables_data = data
+            .get("tables")
+            .and_then(|v| v.as_sequence())
+            .ok_or_else(|| {
+                anyhow::anyhow!("Missing or invalid tables field for SQL query language")
+            })?;
+
+        let mut tables = Vec::new();
+        for table_data in tables_data {
+            let name = table_data
+                .get("name")
+                .and_then(|v| v.as_str())
+                .ok_or_else(|| anyhow::anyhow!("Missing name field in table"))?
+                .to_string();
+
+            let time_column = table_data
+                .get("time_column")
+                .and_then(|v| v.as_str())
+                .ok_or_else(|| anyhow::anyhow!("Missing time_column field in table {}", name))?
+                .to_string();
+
+            let value_columns: HashSet<String> = table_data
+                .get("value_columns")
+                .and_then(|v| v.as_sequence())
+                .ok_or_else(|| anyhow::anyhow!("Missing value_columns field in table {}", name))?
+                .iter()
+                .filter_map(|v| v.as_str())
+                .map(|s| s.to_string())
+                .collect();
+
+            let metadata_columns: HashSet<String> = table_data
+                .get("metadata_columns")
+                .and_then(|v| v.as_sequence())
+                .ok_or_else(|| anyhow::anyhow!("Missing metadata_columns field in table {}", name))?
+                .iter()
+                .filter_map(|v| v.as_str())
+                .map(|s| s.to_string())
+                .collect();
+
+            tables.push(Table::new(
+                name,
+                time_column,
+                value_columns,
+                metadata_columns,
+            ));
+        }
+
+        Ok(SQLSchema::new(tables))
+    }
+
+    /// Parse cleanup policy from YAML data. Errors if not specified.
+    fn parse_cleanup_policy(data: &Value) -> Result<CleanupPolicy> {
+        let cleanup_policy_data = data.get("cleanup_policy").ok_or_else(|| {
+            anyhow::anyhow!(
+                "Missing cleanup_policy section in inference_config.yaml. \
+                 Must specify cleanup_policy.name as one of: circular_buffer, read_based, no_cleanup"
+            )
+        })?;
+
+        let name = cleanup_policy_data
+            .get("name")
+            .and_then(|v| v.as_str())
+            .ok_or_else(|| {
+                anyhow::anyhow!(
+                    "Missing cleanup_policy.name in inference_config.yaml. \
+                     Must be one of: circular_buffer, read_based, no_cleanup"
+                )
+            })?;
+
+        match name {
+            "circular_buffer" => Ok(CleanupPolicy::CircularBuffer),
+            "read_based" => Ok(CleanupPolicy::ReadBased),
+            "no_cleanup" => Ok(CleanupPolicy::NoCleanup),
+            _ => Err(anyhow::anyhow!(
+                "Invalid cleanup policy: '{}'. Valid options: circular_buffer, read_based, no_cleanup",
+                name
+            )),
+        }
+    }
+
+    fn parse_query_configs(
+        data: &Value,
+        cleanup_policy: CleanupPolicy,
+    ) -> Result<Vec<QueryConfig>> {
+        // Handle queries field -> query_configs
+        let query_configs = if let Some(queries) = data.get("queries").and_then(|v| v.as_sequence())
+        {
+            let mut configs = Vec::new();
+            for query_data in queries {
+                let query = query_data
+                    .get("query")
+                    .and_then(|v| v.as_str())
+                    .ok_or_else(|| anyhow::anyhow!("Missing query field"))?
+                    .to_string();
+
+                // Parse aggregations if present
+                let aggregations = if let Some(aggregations_data) =
+                    query_data.get("aggregations").and_then(|v| v.as_sequence())
+                {
+                    let mut agg_refs = Vec::new();
+                    for agg_data in aggregations_data {
+                        let aggregation_id = agg_data
+                            .get("aggregation_id")
+                            .and_then(|v| v.as_u64())
+                            .ok_or_else(|| {
+                                anyhow::anyhow!("Missing aggregation_id in aggregation")
+                            })?;
+
+                        // Parse the appropriate parameter based on cleanup policy
+                        let agg_ref = match cleanup_policy {
+                            CleanupPolicy::CircularBuffer => {
+                                let num_aggregates_to_retain = agg_data
+                                    .get("num_aggregates_to_retain")
+                                    .and_then(|v| v.as_u64());
+                                crate::data_model::AggregationReference::new(
+                                    aggregation_id,
+                                    num_aggregates_to_retain,
+                                )
+                            }
+                            CleanupPolicy::ReadBased => {
+                                let read_count_threshold = agg_data
+                                    .get("read_count_threshold")
+                                    .and_then(|v| v.as_u64());
+                                crate::data_model::AggregationReference::with_read_count_threshold(
+                                    aggregation_id,
+                                    read_count_threshold,
+                                )
+                            }
+                            CleanupPolicy::NoCleanup => {
+                                // No cleanup parameters needed
+                                crate::data_model::AggregationReference::new(aggregation_id, None)
+                            }
+                        };
+                        agg_refs.push(agg_ref);
+                    }
+                    agg_refs
+                } else {
+                    Vec::new()
+                };
+
+                let config = QueryConfig::new(query).with_aggregations(aggregations);
+                configs.push(config);
+            }
+            configs
+        } else {
+            Vec::new()
+        };
+        Ok(query_configs)
+    }
+}
diff --git a/QueryEngineRust/src/data_model/key_by_label_values.rs b/QueryEngineRust/src/data_model/key_by_label_values.rs
new file mode 100644
index 0000000..9b282bf
--- /dev/null
+++ b/QueryEngineRust/src/data_model/key_by_label_values.rs
@@ -0,0 +1,163 @@
+use serde::{Deserialize, Serialize};
+// use std::collections::HashMap;
+use std::hash::{Hash, Hasher};
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct KeyByLabelValues {
+    // pub labels: HashMap<String, String>,
+    pub labels: Vec<String>,
+}
+
+impl KeyByLabelValues {
+    pub fn new() -> Self {
+        Self { labels: Vec::new() }
+    }
+
+    pub fn new_with_labels(labels: Vec<String>) -> Self {
+        Self { labels }
+    }
+
+    pub fn insert(&mut self, value: String) {
+        self.labels.push(value);
+    }
+
+    pub fn get(&self, index: usize) -> Option<&String> {
+        self.labels.get(index)
+    }
+
+    pub fn serialize_to_json(&self) -> serde_json::Value {
+        serde_json::to_value(&self.labels).unwrap_or(serde_json::Value::Null)
+    }
+
+    pub fn deserialize_from_json(data: &serde_json::Value) -> Result<Self, serde_json::Error> {
+        let labels: Vec<String> = serde_json::from_value(data.clone())?;
+        Ok(Self { labels })
+    }
+
+    pub fn serialize_to_bytes(&self) -> Vec<u8> {
+        bincode::serialize(&self.labels).unwrap_or_default()
+    }
+
+    pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        let labels: Vec<String> = bincode::deserialize(buffer)?;
+        Ok(Self { labels })
+    }
+
+    /// Encode labels as a semicolon-joined string — the canonical key format used
+    /// for all sketch hashing (CountMinSketch, HydraKLL, SetAggregator, DeltaSet).
+    pub fn to_semicolon_str(&self) -> String {
+        self.labels.join(";")
+    }
+
+    /// Decode a semicolon-joined string back into a KeyByLabelValues.
+    pub fn from_semicolon_str(s: &str) -> Self {
+        Self {
+            labels: s.split(';').map(|s| s.to_string()).collect(),
+        }
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.labels.is_empty()
+    }
+
+    pub fn len(&self) -> usize {
+        self.labels.len()
+    }
+}
+
+impl Hash for KeyByLabelValues {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        // Create a sorted vector of key-value pairs for consistent hashing
+        let mut sorted_pairs: Vec<_> = self.labels.iter().collect();
+        sorted_pairs.sort();
+
+        for value in sorted_pairs {
+            value.hash(state);
+        }
+    }
+}
+
+impl Default for KeyByLabelValues {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl std::fmt::Display for KeyByLabelValues {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{{")?;
+        let mut first = true;
+        for value in &self.labels {
+            if !first {
+                write!(f, ", ")?;
+            }
+            write!(f, "{value}")?;
+            first = false;
+        }
+        write!(f, "}}")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_key_by_label_values() {
+        let mut key = KeyByLabelValues::new();
+        key.insert("localhost:8080".to_string());
+        key.insert("prometheus".to_string());
+
+        assert_eq!(key.len(), 2);
+        assert_eq!(key.get(0), Some(&"localhost:8080".to_string()));
+        assert_eq!(key.get(1), Some(&"prometheus".to_string()));
+    }
+
+    #[test]
+    fn test_serialization() {
+        let mut key = KeyByLabelValues::new();
+        key.insert("test".to_string());
+
+        let json = key.serialize_to_json();
+        let deserialized = KeyByLabelValues::deserialize_from_json(&json).unwrap();
+        assert_eq!(key, deserialized);
+    }
+
+    #[test]
+    fn test_byte_serialization() {
+        let mut key = KeyByLabelValues::new();
+        key.insert("test".to_string());
+
+        let bytes = key.serialize_to_bytes();
+        let deserialized = KeyByLabelValues::deserialize_from_bytes(&bytes).unwrap();
+        assert_eq!(key, deserialized);
+    }
+
+    #[test]
+    fn test_semicolon_roundtrip() {
+        let key = KeyByLabelValues::new_with_labels(vec!["web".to_string(), "prod".to_string()]);
+        assert_eq!(key.to_semicolon_str(), "web;prod");
+        let roundtripped = KeyByLabelValues::from_semicolon_str("web;prod");
+        assert_eq!(roundtripped, key);
+    }
+
+    #[test]
+    fn test_hash_consistency() {
+        let mut key1 = KeyByLabelValues::new();
+        key1.insert("a".to_string());
+        key1.insert("b".to_string());
+
+        let mut key2 = KeyByLabelValues::new();
+        key2.insert("b".to_string());
+        key2.insert("a".to_string());
+
+        // Should hash to the same value regardless of insertion order
+        let mut hasher1 = std::collections::hash_map::DefaultHasher::new();
+        let mut hasher2 = std::collections::hash_map::DefaultHasher::new();
+
+        key1.hash(&mut hasher1);
+        key2.hash(&mut hasher2);
+
+        assert_eq!(hasher1.finish(), hasher2.finish());
+    }
+}
diff --git a/QueryEngineRust/src/data_model/measurement.rs b/QueryEngineRust/src/data_model/measurement.rs
new file mode 100644
index 0000000..0fe1abc
--- /dev/null
+++ b/QueryEngineRust/src/data_model/measurement.rs
@@ -0,0 +1,94 @@
+use serde::{Deserialize, Serialize};
+use std::ops::Add;
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct Measurement {
+    pub value: f64,
+}
+
+impl Measurement {
+    pub fn new(value: f64) -> Self {
+        Self { value }
+    }
+
+    pub fn serialize_to_bytes(&self) -> Vec<u8> {
+        self.value.to_le_bytes().to_vec()
+    }
+
+    pub fn serialize_to_json(&self) -> serde_json::Value {
+        serde_json::json!({
+            "value": self.value
+        })
+    }
+
+    pub fn deserialize_from_json(data: &serde_json::Value) -> Result<Self, serde_json::Error> {
+        let value = data["value"].as_f64().ok_or_else(|| {
+            serde_json::Error::io(std::io::Error::new(
+                std::io::ErrorKind::InvalidData,
+                "Missing or invalid 'value' field",
+            ))
+        })?;
+        Ok(Self::new(value))
+    }
+
+    pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        if buffer.len() < 8 {
+            return Err("Buffer too short for f64".into());
+        }
+        let value = f64::from_le_bytes([
+            buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], buffer[5], buffer[6], buffer[7],
+        ]);
+        Ok(Self::new(value))
+    }
+}
+
+impl Add for Measurement {
+    type Output = Measurement;
+
+    fn add(self, other: Measurement) -> Measurement {
+        Measurement::new(self.value + other.value)
+    }
+}
+
+impl Add for &Measurement {
+    type Output = Measurement;
+
+    fn add(self, other: &Measurement) -> Measurement {
+        Measurement::new(self.value + other.value)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_measurement_creation() {
+        let measurement = Measurement::new(42.5);
+        assert_eq!(measurement.value, 42.5);
+    }
+
+    #[test]
+    fn test_measurement_addition() {
+        let m1 = Measurement::new(10.0);
+        let m2 = Measurement::new(20.0);
+        let result = m1 + m2;
+        assert_eq!(result.value, 30.0);
+    }
+
+    #[test]
+    fn test_serialization() {
+        let measurement = Measurement::new(42.5);
+        let json = measurement.serialize_to_json();
+        let deserialized = Measurement::deserialize_from_json(&json).unwrap();
+        assert_eq!(measurement, deserialized);
+    }
+
+    #[test]
+    fn test_byte_serialization() {
+        let measurement = Measurement::new(42.5);
+        let bytes = measurement.serialize_to_bytes();
+        let deserialized = Measurement::deserialize_from_bytes(&bytes).unwrap();
+        assert_eq!(measurement, deserialized);
+    }
+}
diff --git a/QueryEngineRust/src/data_model/mod.rs b/QueryEngineRust/src/data_model/mod.rs
new file mode 100644
index 0000000..ce8a6d6
--- /dev/null
+++ b/QueryEngineRust/src/data_model/mod.rs
@@ -0,0 +1,23 @@
+pub mod aggregation_config;
+pub mod aggregation_reference;
+pub mod enums;
+pub mod inference_config;
+pub mod key_by_label_values;
+pub mod measurement;
+pub mod precomputed_output;
+pub mod promql_schema;
+pub mod query_config;
+pub mod streaming_config;
+pub mod traits;
+
+pub use aggregation_config::*;
+pub use aggregation_reference::*;
+pub use enums::*;
+pub use inference_config::*;
+pub use key_by_label_values::*;
+pub use measurement::*;
+pub use precomputed_output::*;
+pub use promql_schema::*;
+pub use query_config::*;
+pub use streaming_config::*;
+pub use traits::*;
diff --git a/QueryEngineRust/src/data_model/precomputed_output.rs b/QueryEngineRust/src/data_model/precomputed_output.rs
new file mode 100644
index 0000000..c5ebab5
--- /dev/null
+++ b/QueryEngineRust/src/data_model/precomputed_output.rs
@@ -0,0 +1,690 @@
+use chrono::DateTime;
+use flate2::read::GzDecoder;
+use serde::{Deserialize, Serialize};
+use std::io::Read as _;
+use tracing::error;
+
+use crate::data_model::traits::SerializableToSink;
+use crate::data_model::{KeyByLabelValues, StreamingConfig};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PrecomputedOutput {
+    pub start_timestamp: u64,
+    pub end_timestamp: u64,
+    pub key: Option<KeyByLabelValues>,
+    pub aggregation_id: u64,
+    // pub config: AggregationConfig,
+    // Note: precompute will be handled separately as it's a trait object
+}
+
+impl PrecomputedOutput {
+    pub fn new(
+        start_timestamp: u64,
+        end_timestamp: u64,
+        key: Option<KeyByLabelValues>,
+        aggregation_id: u64,
+        // TODO: we should remove AggregationConfig from here. Configs should only be accessed from the StreamingConfig read in main.rs
+        // config: AggregationConfig,
+    ) -> Self {
+        Self {
+            start_timestamp,
+            end_timestamp,
+            key,
+            aggregation_id,
+            // config,
+        }
+    }
+
+    pub fn get_freshness_debug_string(&self) -> String {
+        // Match Python implementation more closely
+        let current_time = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_millis() as u64;
+        let freshness = current_time.saturating_sub(self.end_timestamp);
+        format!(
+            "end_timestamp: {}, current_time: {}, freshness: {}",
+            self.end_timestamp, current_time, freshness
+        )
+    }
+
+    // /// Serialize PrecomputedOutput with precompute data to match Python JSON format
+    // pub fn serialize_to_json_with_precompute(
+    //     &self,
+    //     precompute: &dyn crate::data_model::AggregateCore,
+    // ) -> serde_json::Value {
+    //     serde_json::json!({
+    //         // "config": self.config.serialize_to_json(),
+    //         "start_timestamp": self.start_timestamp,
+    //         "end_timestamp": self.end_timestamp,
+    //         "key": self.key.as_ref().map(|k| k.serialize_to_json()),
+    //         "precompute": precompute.serialize_to_json()
+    //     })
+    // }
+
+    /// Deserialize from bytes using Python-compatible format
+    pub fn deserialize_from_bytes_with_precompute(
+        _data: &[u8],
+    ) -> Result<(Self, Vec<u8>), Box<dyn std::error::Error>> {
+        error!("Not implemented: deserialize_from_bytes_with_precompute");
+        Err(("Not implemented: deserialize_from_bytes_with_precompute").into())
+    }
+
+    // /// Simple deserialization from bytes (compatibility method for Kafka consumer)
+    // /// This doesn't include precompute data and is primarily for compatibility
+    // pub fn deserialize_from_bytes(
+    //     data: &[u8],
+    // ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+    //     // Try to deserialize as JSON first (common case)
+    //     if let Ok(json_str) = String::from_utf8(data.to_vec()) {
+    //         if let Ok(json_value) = serde_json::from_str::<serde_json::Value>(&json_str) {
+    //             return Self::deserialize_from_json(&json_value);
+    //         }
+    //     }
+
+    //     // If JSON fails, try binary format
+    //     let (output, _precompute_bytes) = Self::deserialize_from_bytes_with_precompute(data)
+    //         .map_err(|e| -> Box<dyn std::error::Error + Send + Sync> {
+    //             format!("Failed to deserialize from bytes: {e}").into()
+    //         })?;
+    //     Ok(output)
+    // }
+
+    // /// Legacy deserialization method for backward compatibility
+    // pub fn deserialize_from_json(
+    //     data: &serde_json::Value,
+    // ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+    //     // Extract required fields
+    //     let config_data = data.get("config").ok_or("Missing 'config' field in JSON")?;
+    //     // Use custom deserialization for the config
+    //     let config = AggregationConfig::deserialize_from_json(config_data).map_err(
+    //         |e| -> Box<dyn std::error::Error + Send + Sync> {
+    //             format!("Failed to deserialize config: {e}").into()
+    //         },
+    //     )?;
+
+    //     let start_timestamp = data
+    //         .get("start_timestamp")
+    //         .and_then(|v| v.as_u64())
+    //         .ok_or("Missing or invalid 'start_timestamp' field")?;
+
+    //     let end_timestamp = data
+    //         .get("end_timestamp")
+    //         .and_then(|v| v.as_u64())
+    //         .ok_or("Missing or invalid 'end_timestamp' field")?;
+
+    //     let key = if let Some(key_data) = data.get("key") {
+    //         if key_data.is_null() {
+    //             None
+    //         } else {
+    //             // Use the custom deserialize_from_json method which expects the direct HashMap format
+    //             Some(KeyByLabelValues::deserialize_from_json(key_data).map_err(
+    //                 |e| -> Box<dyn std::error::Error + Send + Sync> {
+    //                     format!("Failed to deserialize key: {e}").into()
+    //                 },
+    //             )?)
+    //         }
+    //     } else {
+    //         None
+    //     };
+
+    //     // For now, we create a PrecomputedOutput without precompute data
+    //     // In a full implementation, we would deserialize the precompute field as well
+    //     Ok(Self {
+    //         start_timestamp,
+    //         end_timestamp,
+    //         key,
+    //         config,
+    //     })
+    // }
+
+    // /// Deserialization for Flink streaming engine
+    // pub fn deserialize_from_json_flink(
+    //     data: &serde_json::Value,
+    //     streaming_config: &HashMap<u64, AggregationConfig>,
+    // ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+    //     let aggregation_id = data
+    //         .get("aggregation_id")
+    //         .and_then(|v| v.as_u64())
+    //         .ok_or("Missing or invalid 'aggregation_id' field")?;
+
+    //     let start_timestamp = data
+    //         .get("start_timestamp")
+    //         .and_then(|v| v.as_u64())
+    //         .ok_or("Missing or invalid 'start_timestamp' field")?;
+
+    //     let end_timestamp = data
+    //         .get("end_timestamp")
+    //         .and_then(|v| v.as_u64())
+    //         .ok_or("Missing or invalid 'end_timestamp' field")?;
+
+    //     let key = if let Some(key_data) = data.get("key") {
+    //         if key_data.is_null() {
+    //             None
+    //         } else {
+    //             Some(KeyByLabelValues::deserialize_from_json(key_data).map_err(
+    //                 |e| -> Box<dyn std::error::Error + Send + Sync> {
+    //                     format!("Failed to deserialize key: {e}").into()
+    //                 },
+    //             )?)
+    //         }
+    //     } else {
+    //         None
+    //     };
+
+    //     // Get aggregation type from streaming config lookup
+    //     let config = streaming_config
+    //         .get(&aggregation_id)
+    //         .ok_or_else(|| {
+    //             format!("Aggregation ID {aggregation_id} not found in streaming config")
+    //         })?
+    //         .clone();
+
+    //     Ok(Self {
+    //         start_timestamp,
+    //         end_timestamp,
+    //         key,
+    //         config,
+    //     })
+    // }
+
+    /// Deserialization for Arroyo streaming engine
+    pub fn deserialize_from_json_arroyo(
+        data: &serde_json::Value,
+        // streaming_config: &HashMap<u64, AggregationConfig>,
+        streaming_config: &StreamingConfig,
+    ) -> Result<
+        (Self, Box<dyn crate::data_model::AggregateCore>),
+        Box<dyn std::error::Error + Send + Sync>,
+    > {
+        let aggregation_id = data
+            .get("aggregation_id")
+            .and_then(|v| v.as_u64())
+            .ok_or("Missing or invalid 'aggregation_id' field")?;
+
+        // Parse window timestamps from Arroyo format
+        let window = data
+            .get("window")
+            .ok_or("Missing 'window' field in Arroyo data")?;
+        let start_str = window
+            .get("start")
+            .and_then(|v| v.as_str())
+            .ok_or("Missing or invalid 'start' field in window")?;
+        let end_str = window
+            .get("end")
+            .and_then(|v| v.as_str())
+            .ok_or("Missing or invalid 'end' field in window")?;
+
+        // Parse timestamps with Z suffix - convert to milliseconds
+        let start_timestamp = (DateTime::parse_from_rfc3339(&format!("{start_str}Z"))
+            .map_err(|e| format!("Failed to parse start timestamp: {e}"))?
+            .timestamp() as u64)
+            * 1000;
+        let end_timestamp = (DateTime::parse_from_rfc3339(&format!("{end_str}Z"))
+            .map_err(|e| format!("Failed to parse end timestamp: {e}"))?
+            .timestamp() as u64)
+            * 1000;
+
+        // Parse key from semicolon-separated format - always create KeyByLabelValues (even if empty)
+        let key_str = data.get("key").and_then(|v| v.as_str()).unwrap_or("");
+        let labels: Vec<String> = key_str.split(';').map(|s| s.to_string()).collect();
+        // let key = Some(KeyByLabelValues::new_with_labels(
+        //     labels
+        //         .into_iter()
+        //         .enumerate()
+        //         .map(|(i, v)| (format!("label_{i}"), v))
+        //         .collect(),
+        // ));
+        let key = Some(KeyByLabelValues::new_with_labels(labels));
+
+        // Get aggregation type from streaming config lookup
+        let config = streaming_config
+            .get_aggregation_config(aggregation_id)
+            .ok_or_else(|| {
+                format!("Aggregation ID {aggregation_id} not found in streaming config")
+            })?
+            .clone();
+
+        let precomputed_output = Self {
+            start_timestamp,
+            end_timestamp,
+            key,
+            aggregation_id,
+        };
+
+        // data["precompute"] has been compressed using the following logic
+        // fn gzip_compress(data: &[u8]) -> Option<Vec<u8>> {
+        //     let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
+        //     encoder.write_all(&data).ok()?;
+        //     encoder.finish().ok()
+        // }
+
+        // Extract and decompress precompute data
+        // Equivalent python code:
+        // precompute_bytes = bytes.fromhex(data["precompute"])
+        // precompute_bytes = gzip.decompress(precompute_bytes)
+        let precompute_hex = data
+            .get("precompute")
+            .and_then(|v| v.as_str())
+            .ok_or("Missing or invalid 'precompute' field")?;
+
+        // NOTE: Check if hex decoding is actually needed - might depend on Arroyo's JSON serialization
+        let compressed_bytes = hex::decode(precompute_hex)
+            .map_err(|e| format!("Failed to decode hex precompute data: {e}"))?;
+
+        // Decompress gzip data
+
+        let mut decoder = GzDecoder::new(&compressed_bytes[..]);
+        let mut precompute_bytes = Vec::new();
+        decoder
+            .read_to_end(&mut precompute_bytes)
+            .map_err(|e| format!("Failed to decompress precompute data: {e}"))?;
+
+        let precompute = Self::create_precompute_from_bytes(
+            &config.aggregation_type,
+            Vec::as_slice(&precompute_bytes),
+            "arroyo",
+        )?;
+
+        Ok((precomputed_output, precompute))
+    }
+
+    // /// Deserialize from JSON and extract precompute data following Python implementation
+    // /// This is the public method that should be used by Kafka consumer
+    // pub fn deserialize_from_json_with_precompute(
+    //     data: &serde_json::Value,
+    // ) -> Result<
+    //     (Self, Box<dyn crate::data_model::AggregateCore>),
+    //     Box<dyn std::error::Error + Send + Sync>,
+    // > {
+    //     debug!("Deserializing PrecomputedOutput with precompute from JSON: {data}");
+    //     // First get the metadata
+    //     let precomputed_output = Self::deserialize_from_json(data)?;
+    //     debug!(
+    //         "Deserialized PrecomputedOutput metadata: {:?}",
+    //         precomputed_output
+    //     );
+
+    //     // Then deserialize the precompute data based on aggregation type
+    //     let precompute_data = data
+    //         .get("precompute")
+    //         .ok_or("Missing 'precompute' field in JSON")?;
+    //     let precompute = Self::create_precompute_from_json(
+    //         &precomputed_output.config.aggregation_type,
+    //         precompute_data,
+    //     )?;
+
+    //     Ok((precomputed_output, precompute))
+    // }
+
+    // /// Deserialize from bytes and extract precompute data following Python implementation
+    // /// This is the public method that should be used by Kafka consumer
+    // pub fn deserialize_from_bytes_with_precompute_and_type(
+    //     data: &[u8],
+    //     aggregation_type: &str,
+    // ) -> Result<
+    //     (Self, Box<dyn crate::data_model::AggregateCore>),
+    //     Box<dyn std::error::Error + Send + Sync>,
+    // > {
+    //     // First get the metadata and precompute bytes
+    //     let (precomputed_output, precompute_bytes) = Self::deserialize_from_bytes_with_precompute(
+    //         data,
+    //     )
+    //     .map_err(|e| -> Box<dyn std::error::Error + Send + Sync> {
+    //         format!("Failed to deserialize from bytes: {e}").into()
+    //     })?;
+
+    //     // Then create the accumulator from the precompute bytes
+    //     let precompute =
+    //         Self::create_precompute_from_bytes(aggregation_type, &precompute_bytes, "flink")?;
+
+    //     Ok((precomputed_output, precompute))
+    // }
+
+    // /// Factory method to create precompute accumulator from JSON data
+    // fn create_precompute_from_json(
+    //     precompute_type: &str,
+    //     data: &serde_json::Value,
+    // ) -> Result<Box<dyn crate::data_model::AggregateCore>, Box<dyn std::error::Error + Send + Sync>>
+    // {
+    //     use crate::precompute_operators::*;
+
+    //     match precompute_type {
+    //         "Sum" | "sum" => {
+    //             let accumulator = SumAccumulator::deserialize_from_json(data)
+    //                 .map_err(|e| format!("Failed to deserialize SumAccumulator: {e}"))?;
+    //             Ok(Box::new(accumulator))
+    //         }
+    //         "MinMax" => {
+    //             let accumulator = MinMaxAccumulator::deserialize_from_json(data)
+    //                 .map_err(|e| format!("Failed to deserialize MinMaxAccumulator: {e}"))?;
+    //             Ok(Box::new(accumulator))
+    //         }
+    //         "Increase" => {
+    //             let accumulator = IncreaseAccumulator::deserialize_from_json(data)
+    //                 .map_err(|e| format!("Failed to deserialize IncreaseAccumulator: {e}"))?;
+    //             Ok(Box::new(accumulator))
+    //         }
+    //         "MultipleSum" => {
+    //             let accumulator = MultipleSumAccumulator::deserialize_from_json(data)
+    //                 .map_err(|e| format!("Failed to deserialize MultipleSumAccumulator: {e}"))?;
+    //             Ok(Box::new(accumulator))
+    //         }
+    //         "MultipleMinMax" => {
+    //             // Extract sub_type from data
+    //             let _sub_type = data
+    //                 .get("sub_type")
+    //                 .and_then(|v| v.as_str())
+    //                 .unwrap_or("min")
+    //                 .to_string();
+    //             let accumulator = MultipleMinMaxAccumulator::deserialize_from_json(data)
+    //                 .map_err(|e| format!("Failed to deserialize MultipleMinMaxAccumulator: {e}"))?;
+    //             Ok(Box::new(accumulator))
+    //         }
+    //         "MultipleIncrease" => {
+    //             let accumulator = MultipleIncreaseAccumulator::deserialize_from_json(data)
+    //                 .map_err(|e| {
+    //                     format!("Failed to deserialize MultipleIncreaseAccumulator: {e}")
+    //                 })?;
+    //             Ok(Box::new(accumulator))
+    //         }
+    //         "CountMinSketch" => {
+    //             let accumulator = CountMinSketchAccumulator::deserialize_from_json(data)
+    //                 .map_err(|e| format!("Failed to deserialize CountMinSketchAccumulator: {e}"))?;
+    //             Ok(Box::new(accumulator))
+    //         }
+    //         "DatasketchesKLL" => {
+    //             let accumulator =
+    //                 DatasketchesKLLAccumulator::deserialize_from_json(data).map_err(|e| {
+    //                     format!("Failed to deserialize DatasketchesKLLAccumulator: {e}")
+    //                 })?;
+    //             Ok(Box::new(accumulator))
+    //         }
+    //         "DeltaSetAggregator" => {
+    //             let accumulator = DeltaSetAggregatorAccumulator::deserialize_from_json(data)
+    //                 .map_err(|e| {
+    //                     format!("Failed to deserialize DeltaSetAggregatorAccumulator: {e}")
+    //                 })?;
+    //             Ok(Box::new(accumulator))
+    //         }
+    //         _ => Err(format!("Unknown precompute type: {precompute_type}").into()),
+    //     }
+    // }
+
+    /// Factory method to create precompute accumulator from bytes
+    fn create_precompute_from_bytes(
+        precompute_type: &str,
+        buffer: &[u8],
+        streaming_engine: &str,
+    ) -> Result<Box<dyn crate::data_model::AggregateCore>, Box<dyn std::error::Error + Send + Sync>>
+    {
+        use crate::precompute_operators::*;
+
+        // TODO: add arroyo methods in each operator
+        // TODO: remove flink methods
+
+        match precompute_type {
+            "Sum" | "sum" => {
+                let accumulator = if streaming_engine == "flink" {
+                    SumAccumulator::deserialize_from_bytes(buffer)
+                } else {
+                    SumAccumulator::deserialize_from_bytes_arroyo(buffer)
+                }
+                .map_err(|e| format!("Failed to deserialize SumAccumulator: {e}"))?;
+                Ok(Box::new(accumulator))
+            }
+            "MinMax" => {
+                let accumulator = MinMaxAccumulator::deserialize_from_bytes(buffer)
+                    .map_err(|e| format!("Failed to deserialize MinMaxAccumulator: {e}"))?;
+                Ok(Box::new(accumulator))
+            }
+            "Increase" => {
+                let accumulator = IncreaseAccumulator::deserialize_from_bytes(buffer)
+                    .map_err(|e| format!("Failed to deserialize IncreaseAccumulator: {e}"))?;
+                Ok(Box::new(accumulator))
+            }
+            "MultipleSum" => {
+                let accumulator = MultipleSumAccumulator::deserialize_from_bytes(buffer)
+                    .map_err(|e| format!("Failed to deserialize MultipleSumAccumulator: {e}"))?;
+                Ok(Box::new(accumulator))
+            }
+            "MultipleMinMax" => {
+                let accumulator =
+                    MultipleMinMaxAccumulator::deserialize_from_bytes(buffer, "min".to_string())
+                        .map_err(|e| {
+                            format!("Failed to deserialize MultipleMinMaxAccumulator: {e}")
+                        })?;
+                Ok(Box::new(accumulator))
+            }
+            "MultipleIncrease" => {
+                let accumulator = if streaming_engine == "flink" {
+                    MultipleIncreaseAccumulator::deserialize_from_bytes(buffer)
+                } else {
+                    MultipleIncreaseAccumulator::deserialize_from_bytes_arroyo(buffer)
+                }
+                .map_err(|e| format!("Failed to deserialize MultipleIncreaseAccumulator: {e}"))?;
+                Ok(Box::new(accumulator))
+            }
+            "CountMinSketch" => {
+                let accumulator = if streaming_engine == "flink" {
+                    CountMinSketchAccumulator::deserialize_from_bytes(buffer)
+                } else {
+                    CountMinSketchAccumulator::deserialize_from_bytes_arroyo(buffer)
+                }
+                .map_err(|e| format!("Failed to deserialize CountMinSketchAccumulator: {e}"))?;
+                Ok(Box::new(accumulator))
+            }
+            "CountMinSketchWithHeap" => {
+                let accumulator = if streaming_engine == "flink" {
+                    CountMinSketchWithHeapAccumulator::deserialize_from_bytes(buffer)
+                } else {
+                    CountMinSketchWithHeapAccumulator::deserialize_from_bytes_arroyo(buffer)
+                }
+                .map_err(|e| {
+                    format!("Failed to deserialize CountMinSketchWithHeapAccumulator: {e}")
+                })?;
+                Ok(Box::new(accumulator))
+            }
+            "DatasketchesKLL" => {
+                let accumulator = if streaming_engine == "flink" {
+                    DatasketchesKLLAccumulator::deserialize_from_bytes(buffer)
+                } else {
+                    DatasketchesKLLAccumulator::deserialize_from_bytes_arroyo(buffer)
+                }
+                .map_err(|e| format!("Failed to deserialize DatasketchesKLLAccumulator: {e}"))?;
+                Ok(Box::new(accumulator))
+            }
+            "HydraKLL" => {
+                let accumulator = if streaming_engine == "flink" {
+                    return Err("HydraKLL not supported for Flink".into());
+                } else {
+                    HydraKllSketchAccumulator::deserialize_from_bytes_arroyo(buffer)
+                }
+                .map_err(|e| format!("Failed to deserialize HydraKllSketchAccumulator: {e}"))?;
+                Ok(Box::new(accumulator))
+            }
+            "DeltaSetAggregator" => {
+                let accumulator = if streaming_engine == "flink" {
+                    DeltaSetAggregatorAccumulator::deserialize_from_bytes(buffer)
+                } else {
+                    DeltaSetAggregatorAccumulator::deserialize_from_bytes_arroyo(buffer)
+                }
+                .map_err(|e| format!("Failed to deserialize DeltaSetAggregatorAccumulator: {e}"))?;
+                Ok(Box::new(accumulator))
+            }
+            _ => Err(format!("Unknown precompute type: {precompute_type}").into()),
+        }
+    }
+}
+
+impl SerializableToSink for PrecomputedOutput {
+    fn serialize_to_json(&self) -> serde_json::Value {
+        // Default implementation without precompute data for backward compatibility
+        serde_json::json!({
+            // "config": self.config.serialize_to_json(),
+            "start_timestamp": self.start_timestamp,
+            "end_timestamp": self.end_timestamp,
+            "key": self.key.as_ref().map(|k| k.serialize_to_json())
+        })
+    }
+
+    fn serialize_to_bytes(&self) -> Vec<u8> {
+        // Default implementation without precompute data for backward compatibility
+        serde_json::to_vec(self).unwrap_or_else(|_| Vec::new())
+    }
+}
+
+// #[cfg(test)]
+// mod tests {
+//     use super::*;
+
+//     #[test]
+//     fn test_aggregation_config_creation() {
+//         let labels = KeyByLabelNames::from_names(vec!["instance".to_string(), "job".to_string()]);
+//         let empty_labels = KeyByLabelNames::new(vec![]);
+//         let config = AggregationConfig::new(
+//             1,
+//             "cpu_usage".to_string(),
+//             labels,
+//             empty_labels.clone(),
+//             empty_labels,
+//             "".to_string(),
+//             "sum".to_string(),
+//             10,
+//         );
+
+//         assert_eq!(config.aggregation_id, 1);
+//         assert_eq!(config.metric, "cpu_usage");
+//         assert_eq!(config.aggregation_type, "sum");
+//         assert_eq!(config.tumbling_window_size, 10);
+//     }
+
+//     #[test]
+//     fn test_query_config_builder() {
+//         let labels = KeyByLabelNames::from_names(vec!["instance".to_string()]);
+//         let empty_labels = KeyByLabelNames::new(vec![]);
+//         let aggregation = AggregationConfig::new(
+//             1,
+//             "cpu_usage".to_string(),
+//             labels,
+//             empty_labels.clone(),
+//             empty_labels,
+//             "".to_string(),
+//             "sum".to_string(),
+//             10,
+//         );
+
+//         let query_config = QueryConfig::new("sum_over_time(cpu_usage[5m])".to_string())
+//             .add_aggregation(aggregation);
+
+//         assert_eq!(query_config.query, "sum_over_time(cpu_usage[5m])");
+//         assert_eq!(query_config.aggregations.len(), 1);
+//     }
+
+//     #[test]
+//     fn test_precomputed_output_json_serialization_with_precompute() {
+//         // Test Issue 9: PrecomputedOutput JSON serialization alignment with Python behavior
+//         use crate::precompute_operators::SumAccumulator;
+//         use std::collections::BTreeMap;
+
+//         let labels = KeyByLabelNames::from_names(vec!["instance".to_string()]);
+//         let empty_labels = KeyByLabelNames::new(vec![]);
+//         let config = AggregationConfig::new(
+//             1,
+//             "cpu_usage".to_string(),
+//             labels,
+//             empty_labels.clone(),
+//             empty_labels,
+//             "".to_string(),
+//             "sum".to_string(),
+//             10,
+//         );
+
+//         let mut key_labels = BTreeMap::new();
+//         key_labels.insert("instance".to_string(), "server1".to_string());
+//         let key = Some(KeyByLabelValues::new_with_labels(key_labels));
+
+//         let precomputed_output = PrecomputedOutput::new(
+//             1000, // start_timestamp
+//             2000, // end_timestamp
+//             key.clone(),
+//             config.clone(),
+//         );
+
+//         let accumulator = SumAccumulator::with_sum(42.5);
+
+//         // Test JSON serialization with precompute data (matching Python format)
+//         let json_with_precompute =
+//             precomputed_output.serialize_to_json_with_precompute(&accumulator);
+
+//         // Verify the JSON structure matches Python implementation
+//         assert!(json_with_precompute["config"].is_object());
+//         assert_eq!(json_with_precompute["start_timestamp"], 1000);
+//         assert_eq!(json_with_precompute["end_timestamp"], 2000);
+//         assert!(json_with_precompute["key"].is_object());
+//         assert!(json_with_precompute["precompute"].is_object());
+
+//         // Verify precompute data is included (this is the key difference from default serialization)
+//         assert_eq!(json_with_precompute["precompute"]["sum"], 42.5);
+
+//         // Test default JSON serialization without precompute data
+//         let json_default = precomputed_output.serialize_to_json();
+
+//         // Verify default serialization does NOT include precompute data
+//         assert!(
+//             json_default["precompute"].is_null()
+//                 || !json_default.as_object().unwrap().contains_key("precompute")
+//         );
+//         assert_eq!(json_default["start_timestamp"], 1000);
+//         assert_eq!(json_default["end_timestamp"], 2000);
+//     }
+
+//     #[test]
+//     fn test_precomputed_output_byte_serialization_with_precompute() {
+//         // Test Issue 9: PrecomputedOutput byte serialization alignment with Python behavior
+//         use crate::precompute_operators::SumAccumulator;
+
+//         let labels = KeyByLabelNames::from_names(vec!["instance".to_string()]);
+//         let empty_labels = KeyByLabelNames::new(vec![]);
+//         let config = AggregationConfig::new(
+//             1,
+//             "cpu_usage".to_string(),
+//             labels,
+//             empty_labels.clone(),
+//             empty_labels,
+//             "".to_string(),
+//             "sum".to_string(),
+//             10,
+//         );
+
+//         let precomputed_output = PrecomputedOutput::new(
+//             1000, // start_timestamp
+//             2000, // end_timestamp
+//             None, // key
+//             config,
+//         );
+
+//         let accumulator = SumAccumulator::with_sum(42.5);
+
+//         // Test byte serialization with precompute data (matching Python format)
+//         let bytes_with_precompute =
+//             precomputed_output.serialize_to_bytes_with_precompute(&accumulator);
+
+//         // Test round-trip: serialize then deserialize
+//         let (deserialized_output, precompute_bytes) =
+//             PrecomputedOutput::deserialize_from_bytes_with_precompute(&bytes_with_precompute)
+//                 .unwrap();
+
+//         // Verify round-trip works correctly
+//         assert_eq!(deserialized_output.start_timestamp, 1000);
+//         assert_eq!(deserialized_output.end_timestamp, 2000);
+//         assert!(deserialized_output.key.is_none());
+//         assert_eq!(deserialized_output.config.aggregation_id, 1);
+//         assert_eq!(deserialized_output.config.metric, "cpu_usage");
+
+//         // Verify precompute data can be deserialized back to SumAccumulator
+//         let deserialized_accumulator =
+//             SumAccumulator::deserialize_from_bytes(&precompute_bytes).unwrap();
+//         assert_eq!(deserialized_accumulator.sum, 42.5);
+//     }
+// }
diff --git a/QueryEngineRust/src/data_model/promql_schema.rs b/QueryEngineRust/src/data_model/promql_schema.rs
new file mode 100644
index 0000000..1cd3859
--- /dev/null
+++ b/QueryEngineRust/src/data_model/promql_schema.rs
@@ -0,0 +1,32 @@
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+
+use promql_utilities::data_model::KeyByLabelNames;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PromQLSchema {
+    pub config: HashMap<String, KeyByLabelNames>,
+}
+
+impl PromQLSchema {
+    pub fn new() -> Self {
+        Self {
+            config: HashMap::new(),
+        }
+    }
+
+    pub fn add_metric(mut self, metric: String, labels: KeyByLabelNames) -> Self {
+        self.config.insert(metric, labels);
+        self
+    }
+
+    pub fn get_labels(&self, metric: &str) -> Option<&KeyByLabelNames> {
+        self.config.get(metric)
+    }
+}
+
+impl Default for PromQLSchema {
+    fn default() -> Self {
+        Self::new()
+    }
+}
diff --git a/QueryEngineRust/src/data_model/query_config.rs b/QueryEngineRust/src/data_model/query_config.rs
new file mode 100644
index 0000000..cb05328
--- /dev/null
+++ b/QueryEngineRust/src/data_model/query_config.rs
@@ -0,0 +1,28 @@
+use serde::{Deserialize, Serialize};
+
+use crate::data_model::AggregationReference;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct QueryConfig {
+    pub query: String,
+    pub aggregations: Vec<AggregationReference>,
+}
+
+impl QueryConfig {
+    pub fn new(query: String) -> Self {
+        Self {
+            query,
+            aggregations: Vec::new(),
+        }
+    }
+
+    pub fn add_aggregation(mut self, aggregation: AggregationReference) -> Self {
+        self.aggregations.push(aggregation);
+        self
+    }
+
+    pub fn with_aggregations(mut self, aggregations: Vec<AggregationReference>) -> Self {
+        self.aggregations = aggregations;
+        self
+    }
+}
diff --git a/QueryEngineRust/src/data_model/streaming_config.rs b/QueryEngineRust/src/data_model/streaming_config.rs
new file mode 100644
index 0000000..746a632
--- /dev/null
+++ b/QueryEngineRust/src/data_model/streaming_config.rs
@@ -0,0 +1,114 @@
+use anyhow::Result;
+use core::panic;
+use serde::{Deserialize, Serialize};
+use serde_yaml::Value;
+use std::collections::HashMap;
+use std::fs::File;
+use std::io::BufReader;
+use std::ops::Index;
+
+use crate::data_model::aggregation_config::AggregationConfig;
+use crate::data_model::enums::QueryLanguage;
+use crate::data_model::inference_config::{InferenceConfig, SchemaConfig};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct StreamingConfig {
+    pub aggregation_configs: HashMap<u64, AggregationConfig>,
+}
+
+impl StreamingConfig {
+    pub fn new(aggregation_configs: HashMap<u64, AggregationConfig>) -> Self {
+        Self {
+            aggregation_configs,
+        }
+    }
+
+    pub fn get_aggregation_config(&self, aggregation_id: u64) -> Option<&AggregationConfig> {
+        self.aggregation_configs.get(&aggregation_id)
+    }
+
+    pub fn get_all_aggregation_configs(&self) -> &HashMap<u64, AggregationConfig> {
+        &self.aggregation_configs
+    }
+
+    pub fn contains(&self, aggregation_id: u64) -> bool {
+        self.aggregation_configs.contains_key(&aggregation_id)
+    }
+
+    pub fn from_yaml_file(yaml_file: &str) -> Result<Self> {
+        let file = File::open(yaml_file)?;
+        let reader = BufReader::new(file);
+        let data: Value = serde_yaml::from_reader(reader)?;
+
+        Self::from_yaml_data(&data, None)
+    }
+
+    pub fn from_yaml_data(
+        data: &Value,
+        inference_config: Option<&InferenceConfig>,
+    ) -> Result<Self> {
+        let mut retention_map: HashMap<u64, u64> = HashMap::new();
+        let mut read_count_threshold_map: HashMap<u64, u64> = HashMap::new();
+
+        if let Some(inference_config) = inference_config {
+            for query_config in &inference_config.query_configs {
+                for aggregation in &query_config.aggregations {
+                    let aggregation_id = aggregation.aggregation_id;
+                    if let Some(num_aggregates) = aggregation.num_aggregates_to_retain {
+                        // OLD: Keep last value only (for backwards compatibility)
+                        retention_map.insert(aggregation_id, num_aggregates);
+
+                        // NEW: Sum up num_aggregates_to_retain across all queries
+                        *read_count_threshold_map.entry(aggregation_id).or_insert(0) +=
+                            num_aggregates;
+                    }
+                }
+            }
+        }
+
+        // Derive query_language from inference_config schema
+        let query_language = inference_config
+            .map(|ic| match &ic.schema {
+                SchemaConfig::PromQL(_) => QueryLanguage::promql,
+                SchemaConfig::SQL(_) => QueryLanguage::sql,
+                SchemaConfig::ElasticQueryDSL => QueryLanguage::elastic_querydsl,
+                SchemaConfig::ElasticSQL => QueryLanguage::elastic_sql,
+            })
+            .unwrap_or(QueryLanguage::promql); // Default to promql if no inference_config
+
+        let mut aggregation_configs: HashMap<u64, AggregationConfig> = HashMap::new();
+
+        if let Some(aggregations) = data.get("aggregations").and_then(|v| v.as_sequence()) {
+            for aggregation_data in aggregations {
+                if let Some(aggregation_id) = aggregation_data.get("aggregationId") {
+                    let aggregation_id_u64 = aggregation_id.as_u64().or_else(|| panic!()).unwrap();
+                    let num_aggregates_to_retain = retention_map.get(&aggregation_id_u64);
+                    let read_count_threshold = read_count_threshold_map.get(&aggregation_id_u64);
+                    let config = AggregationConfig::from_yaml_data(
+                        aggregation_data,
+                        num_aggregates_to_retain.copied(),
+                        read_count_threshold.copied(),
+                        query_language,
+                    )?;
+                    aggregation_configs.insert(aggregation_id_u64, config);
+                }
+            }
+        }
+
+        Ok(Self::new(aggregation_configs))
+    }
+}
+
+impl Index<u64> for StreamingConfig {
+    type Output = AggregationConfig;
+
+    fn index(&self, aggregation_id: u64) -> &Self::Output {
+        &self.aggregation_configs[&aggregation_id]
+    }
+}
+
+impl Default for StreamingConfig {
+    fn default() -> Self {
+        Self::new(HashMap::new())
+    }
+}
diff --git a/QueryEngineRust/src/data_model/traits.rs b/QueryEngineRust/src/data_model/traits.rs
new file mode 100644
index 0000000..83183d6
--- /dev/null
+++ b/QueryEngineRust/src/data_model/traits.rs
@@ -0,0 +1,122 @@
+use crate::data_model::KeyByLabelValues;
+use serde_json::Value;
+use std::collections::HashMap;
+
+use promql_utilities::query_logics::enums::Statistic;
+
+pub use sketch_db_common::traits::SerializableToSink;
+
+/// Core trait for all aggregates containing shared functionality
+/// This trait provides common operations like serialization, cloning, and type identification
+pub trait AggregateCore: SerializableToSink + Send + Sync {
+    /// Clone this accumulator into a boxed trait object
+    fn clone_boxed_core(&self) -> Box<dyn AggregateCore>;
+
+    /// Get the type name of this accumulator
+    fn type_name(&self) -> &'static str;
+
+    /// Downcast to Any for type checking
+    fn as_any(&self) -> &dyn std::any::Any;
+
+    /// Merge this accumulator with another accumulator of the same type
+    /// Returns a new merged accumulator, leaving the original unchanged
+    fn merge_with(
+        &self,
+        other: &dyn AggregateCore,
+    ) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error + Send + Sync>>;
+
+    /// Get the accumulator type identifier for merge compatibility checking
+    fn get_accumulator_type(&self) -> &'static str;
+
+    /// Get all keys stored in this accumulator
+    fn get_keys(&self) -> Option<Vec<KeyByLabelValues>>;
+}
+
+/// Trait for accumulators that support a single subpopulation
+/// These accumulators store a single aggregate value (e.g., Sum, Increase)
+pub trait SingleSubpopulationAggregate: AggregateCore {
+    /// Query the accumulator for a specific statistic
+    fn query(
+        &self,
+        statistic: Statistic,
+        query_kwargs: Option<&HashMap<String, String>>,
+    ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>>;
+
+    /// Clone this accumulator into a boxed trait object
+    fn clone_boxed(&self) -> Box<dyn SingleSubpopulationAggregate>;
+}
+
+/// Trait for accumulators that support multiple subpopulations identified by keys
+/// These accumulators store separate values for different label combinations
+pub trait MultipleSubpopulationAggregate: AggregateCore {
+    /// Query the accumulator for a specific statistic and key
+    fn query(
+        &self,
+        statistic: Statistic,
+        key: &KeyByLabelValues,
+        query_kwargs: Option<&HashMap<String, String>>,
+    ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>>;
+
+    /// Clone this accumulator into a boxed trait object
+    fn clone_boxed(&self) -> Box<dyn MultipleSubpopulationAggregate>;
+}
+
+/// Factory traits for creating and merging accumulators (object-safe)
+pub trait SingleSubpopulationAggregateFactory {
+    fn merge_accumulators(
+        &self,
+        accumulators: Vec<Box<dyn SingleSubpopulationAggregate>>,
+    ) -> Result<Box<dyn SingleSubpopulationAggregate>, Box<dyn std::error::Error + Send + Sync>>;
+    fn create_default(&self) -> Box<dyn SingleSubpopulationAggregate>;
+}
+
+pub trait MultipleSubpopulationAggregateFactory {
+    fn merge_accumulators(
+        &self,
+        accumulators: Vec<Box<dyn MultipleSubpopulationAggregate>>,
+    ) -> Result<Box<dyn MultipleSubpopulationAggregate>, Box<dyn std::error::Error + Send + Sync>>;
+    fn create_default(&self) -> Box<dyn MultipleSubpopulationAggregate>;
+}
+
+/// Trait for merging multiple accumulators of the same type
+pub trait MergeableAccumulator<T> {
+    fn merge_accumulators(
+        accumulators: Vec<T>,
+    ) -> Result<T, Box<dyn std::error::Error + Send + Sync>>
+    where
+        T: Sized;
+}
+
+// Implement Clone for the new trait objects
+impl Clone for Box<dyn AggregateCore> {
+    fn clone(&self) -> Self {
+        self.clone_boxed_core()
+    }
+}
+
+impl Clone for Box<dyn SingleSubpopulationAggregate> {
+    fn clone(&self) -> Self {
+        self.clone_boxed()
+    }
+}
+
+impl Clone for Box<dyn MultipleSubpopulationAggregate> {
+    fn clone(&self) -> Self {
+        self.clone_boxed()
+    }
+}
+
+/// Factory trait for creating accumulators from serialized data
+pub trait AccumulatorFactory {
+    fn create_from_json(
+        accumulator_type: &str,
+        data: &Value,
+    ) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error>>;
+    fn create_from_bytes(
+        accumulator_type: &str,
+        buffer: &[u8],
+    ) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error>>;
+}
+
+#[cfg(test)]
+mod tests {}
diff --git a/QueryEngineRust/src/drivers/ingest/kafka.rs b/QueryEngineRust/src/drivers/ingest/kafka.rs
new file mode 100644
index 0000000..e2b9f03
--- /dev/null
+++ b/QueryEngineRust/src/drivers/ingest/kafka.rs
@@ -0,0 +1,444 @@
+use rdkafka::config::ClientConfig;
+use rdkafka::consumer::{Consumer, StreamConsumer};
+use rdkafka::Message;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+use tracing::{debug, error, info, warn};
+
+use crate::data_model::enums::{InputFormat, StreamingEngine};
+use crate::data_model::traits::SerializableToSink;
+use crate::data_model::PrecomputedOutput;
+use crate::data_model::StreamingConfig;
+use crate::stores::Store;
+use crate::utils::PrecomputeDumper;
+
+#[derive(Debug, Clone)]
+pub struct KafkaConsumerConfig {
+    pub broker: String,
+    pub topic: String,
+    pub group_id: String,
+    pub auto_offset_reset: String,
+    pub input_format: InputFormat,
+    pub decompress_json: bool,
+    pub batch_size: usize,
+    pub poll_timeout_ms: u64,
+    pub streaming_engine: StreamingEngine,
+    pub dump_precomputes: bool,
+    pub dump_output_dir: Option<String>,
+}
+
+pub struct KafkaConsumer<T: Store> {
+    config: KafkaConsumerConfig,
+    store: Arc<T>,
+    consumer: StreamConsumer,
+    streaming_config: Arc<StreamingConfig>,
+    previous_consume_time: Option<Instant>,
+    precompute_dumper: Option<PrecomputeDumper>,
+}
+
+impl<T: Store + Send + Sync + 'static> KafkaConsumer<T> {
+    pub fn new(
+        config: KafkaConsumerConfig,
+        store: Arc<T>,
+        streaming_config: Arc<StreamingConfig>,
+    ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+        let consumer: StreamConsumer = ClientConfig::new()
+            .set("bootstrap.servers", &config.broker)
+            .set("group.id", &config.group_id)
+            .set("auto.offset.reset", &config.auto_offset_reset)
+            .set("enable.partition.eof", "false")
+            .set("session.timeout.ms", "6000")
+            .set("enable.auto.commit", "true")
+            .create()?;
+
+        // Subscribe to the topic
+        consumer.subscribe(&[&config.topic])?;
+
+        // Initialize precompute dumper if enabled
+        let precompute_dumper = if config.dump_precomputes {
+            match &config.dump_output_dir {
+                Some(output_dir) => match PrecomputeDumper::new(output_dir) {
+                    Ok(dumper) => {
+                        info!("Precompute dumping enabled to: {}", dumper.get_file_path());
+                        Some(dumper)
+                    }
+                    Err(e) => {
+                        error!("Failed to create precompute dumper: {}", e);
+                        info!("Continuing without precompute dumping");
+                        None
+                    }
+                },
+                None => {
+                    warn!("Precompute dumping requested but no output directory provided");
+                    None
+                }
+            }
+        } else {
+            None
+        };
+
+        Ok(Self {
+            config,
+            store,
+            consumer,
+            streaming_config,
+            previous_consume_time: None,
+            precompute_dumper,
+        })
+    }
+
+    pub async fn run(&mut self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+        info!(
+            "Starting Kafka consumer for topic: {} on broker: {}",
+            self.config.topic, self.config.broker
+        );
+
+        let mut batch = Vec::new();
+
+        loop {
+            // Collect messages into batches like Python implementation
+            let timeout_duration = Duration::from_millis(self.config.poll_timeout_ms);
+
+            // StreamConsumer uses recv() for async message reception
+            match tokio::time::timeout(timeout_duration, self.consumer.recv()).await {
+                Ok(Ok(message)) => {
+                    // Add timing debug similar to Python
+                    let current_consume_time = Instant::now();
+                    if let Some(previous_time) = self.previous_consume_time {
+                        let elapsed = current_consume_time.duration_since(previous_time);
+                        debug!(
+                            "Time since last consume: {:.2} seconds",
+                            elapsed.as_secs_f64()
+                        );
+                    }
+                    self.previous_consume_time = Some(current_consume_time);
+                    // Process single message and add to batch
+                    match self.process_message(&message) {
+                        Ok(Some((precomputed_output, precompute_accumulator))) => {
+                            // Check if this is an empty DeltaSetAggregator and skip it
+                            if let Some(delta_acc) = precompute_accumulator
+                                .as_any()
+                                .downcast_ref::<crate::precompute_operators::delta_set_aggregator_accumulator::DeltaSetAggregatorAccumulator>()
+                            {
+                                if delta_acc.is_empty() {
+                                    debug!("Skipping empty DeltaSetAggregatorAccumulator");
+                                    continue;
+                                }
+                            }
+
+                            // Dump precompute if enabled
+                            if let Some(ref mut dumper) = self.precompute_dumper {
+                                if let Err(e) = dumper.dump_precompute(
+                                    &precomputed_output,
+                                    precompute_accumulator.as_ref(),
+                                ) {
+                                    error!("Failed to dump precompute: {}", e);
+                                }
+                            }
+
+                            // Store both the metadata and the real accumulator data
+                            batch.push((precomputed_output, precompute_accumulator));
+                        }
+                        Ok(None) => {
+                            debug!("Message processed but no precomputed output produced");
+                        }
+                        Err(e) => {
+                            error!("Error processing message: {e}");
+                            continue; // Skip this message and continue
+                        }
+                    }
+
+                    // Process batch when we reach batch_size or periodically
+                    if batch.len() >= self.config.batch_size {
+                        self.process_batch(&mut batch).await?;
+                    }
+                }
+                Ok(Err(kafka_err)) => {
+                    if kafka_err.rdkafka_error_code()
+                        == Some(rdkafka::types::RDKafkaErrorCode::PartitionEOF)
+                    {
+                        debug!("Reached end of partition");
+                        continue;
+                    } else {
+                        error!("Kafka error: {kafka_err}");
+                        return Err(Box::new(kafka_err));
+                    }
+                }
+                Err(_) => {
+                    // Timeout occurred - process any accumulated batch
+                    if !batch.is_empty() {
+                        debug!(
+                            "Poll timeout, processing accumulated batch of {} items",
+                            batch.len()
+                        );
+                        self.process_batch(&mut batch).await?;
+                    } else {
+                        debug!("Poll timeout, no messages to process");
+                    }
+                }
+            }
+        }
+    }
+
+    async fn process_batch(
+        &self,
+        batch: &mut Vec<(PrecomputedOutput, Box<dyn crate::data_model::AggregateCore>)>,
+    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+        if batch.is_empty() {
+            return Ok(());
+        }
+
+        let batch_start_time = Instant::now();
+        debug!("Processing batch of {} messages", batch.len());
+
+        // Batch insert with real precompute data like Python implementation
+        let store_insert_start_time = Instant::now();
+        match self.store.insert_precomputed_output_batch(batch.to_vec()) {
+            Ok(_) => {
+                let store_insert_duration = store_insert_start_time.elapsed();
+                debug!(
+                    "Store batch insert took: {:.2}ms",
+                    store_insert_duration.as_secs_f64() * 1000.0
+                );
+                debug!("{}", batch[0].0.get_freshness_debug_string());
+                for (item, _) in batch.iter() {
+                    debug!(
+                        "Received message: {} with aggregation_id: {}",
+                        serde_json::to_string(&item.serialize_to_json())
+                            .unwrap_or_else(|_| "failed to serialize".to_string()),
+                        item.aggregation_id
+                    );
+                }
+            }
+            Err(e) => {
+                error!("Error inserting precomputed output batch: {}", e);
+                return Err(e);
+            }
+        }
+
+        batch.clear();
+        let total_batch_duration = batch_start_time.elapsed();
+        debug!(
+            "Total batch processing took: {:.2}ms",
+            total_batch_duration.as_secs_f64() * 1000.0
+        );
+        Ok(())
+    }
+
+    #[allow(clippy::type_complexity)]
+    fn process_message(
+        &self,
+        message: &rdkafka::message::BorrowedMessage<'_>,
+    ) -> Result<
+        Option<(PrecomputedOutput, Box<dyn crate::data_model::AggregateCore>)>,
+        Box<dyn std::error::Error + Send + Sync>,
+    > {
+        let message_start_time = Instant::now();
+        let payload = match message.payload() {
+            Some(payload) => payload,
+            None => {
+                warn!("Received message with no payload");
+                return Ok(None);
+            }
+        };
+
+        match self.config.input_format {
+            InputFormat::Byte => {
+                // For binary format, we need to first extract metadata to get aggregation_type
+                // Then use it to create the proper accumulator
+                // let (metadata, _precompute_bytes) =
+                //     match PrecomputedOutput::deserialize_from_bytes_with_precompute(payload) {
+                //         Ok(result) => result,
+                //         Err(e) => {
+                //             error!("Error deserializing binary message metadata: {}", e);
+                //             return Err(format!("Binary deserialization error: {e}").into());
+                //         }
+                //     };
+
+                // // Now deserialize with the correct accumulator type
+                // match PrecomputedOutput::deserialize_from_bytes_with_precompute_and_type(
+                //     payload,
+                //     &metadata.config.aggregation_type,
+                // ) {
+                //     Ok((output, precompute)) => {
+                //         debug!("Successfully deserialized binary message with precompute data");
+                //         Ok(Some((output, precompute)))
+                //     }
+                //     Err(e) => {
+                //         error!("Error deserializing binary message with precompute: {}", e);
+                //         Err(e)
+                //     }
+                // }
+                error!("Binary input format with precompute not implemented");
+                Err("Binary input format with precompute not implemented".into())
+            }
+            InputFormat::Json => {
+                // Handle streaming engine specific logic
+                match self.config.streaming_engine {
+                    StreamingEngine::Flink => {
+                        // debug!("Received message of length: {}", payload.len());
+
+                        // let json_data = if self.config.decompress_json {
+                        //     // Decompress using gzip
+                        //     let mut decoder = GzDecoder::new(payload);
+                        //     let mut decompressed = Vec::new();
+                        //     match decoder.read_to_end(&mut decompressed) {
+                        //         Ok(_) => {
+                        //             debug!(
+                        //                 "Decompressed JSON message of length: {}",
+                        //                 decompressed.len()
+                        //             );
+                        //             decompressed
+                        //         }
+                        //         Err(e) => {
+                        //             error!("Error decompressing gzip data: {}", e);
+                        //             return Err(format!("Gzip decompression error: {e}").into());
+                        //         }
+                        //     }
+                        // } else {
+                        //     payload.to_vec()
+                        // };
+
+                        // let json_str = match String::from_utf8(json_data) {
+                        //     Ok(s) => s,
+                        //     Err(e) => {
+                        //         error!("Error converting bytes to UTF-8: {}", e);
+                        //         return Err(format!("UTF-8 conversion error: {e}").into());
+                        //     }
+                        // };
+
+                        // let json_parse_start_time = Instant::now();
+
+                        // let json_dict: serde_json::Value = match serde_json::from_str(&json_str) {
+                        //     Ok(dict) => {
+                        //         let json_parse_duration = json_parse_start_time.elapsed();
+                        //         debug!(
+                        //             "JSON parsing took: {:.2}ms",
+                        //             json_parse_duration.as_secs_f64() * 1000.0
+                        //         );
+                        //         dict
+                        //     }
+                        //     Err(e) => {
+                        //         error!("Error parsing JSON: {}", e);
+                        //         debug!("JSON content: {}", json_str);
+                        //         return Err(format!("JSON parsing error: {e}").into());
+                        //     }
+                        // };
+
+                        // debug!(
+                        //     "Deserializing JSON message: {}, {}, {}",
+                        //     json_dict
+                        //         .get("aggregation_id")
+                        //         .and_then(|v| v.as_u64())
+                        //         .unwrap_or(0),
+                        //     json_dict
+                        //         .get("start_timestamp")
+                        //         .and_then(|v| v.as_u64())
+                        //         .unwrap_or(0),
+                        //     json_dict
+                        //         .get("end_timestamp")
+                        //         .and_then(|v| v.as_u64())
+                        //         .unwrap_or(0)
+                        // );
+
+                        // let deserialize_start_time = Instant::now();
+
+                        // match PrecomputedOutput::deserialize_from_json_with_precompute(&json_dict) {
+                        //     Ok((output, precompute)) => {
+                        //         let deserialize_duration = deserialize_start_time.elapsed();
+                        //         debug!(
+                        //             "Deserialization took: {:.2}ms",
+                        //             deserialize_duration.as_secs_f64() * 1000.0
+                        //         );
+                        //         debug!(
+                        //             "Deserialized item: {}, {}, {}",
+                        //             output.config.aggregation_id,
+                        //             output.start_timestamp,
+                        //             output.end_timestamp
+                        //         );
+                        //         debug!("Successfully deserialized Flink JSON message with precompute data");
+                        //         let total_message_duration = message_start_time.elapsed();
+                        //         debug!(
+                        //             "Total message processing took: {:.2}ms",
+                        //             total_message_duration.as_secs_f64() * 1000.0
+                        //         );
+                        //         Ok(Some((output, precompute)))
+                        //     }
+                        //     Err(e) => {
+                        //         error!(
+                        //             "Error deserializing Flink PrecomputedOutput from JSON with precompute: {}",
+                        //             e
+                        //         );
+                        //         debug!("JSON content: {}", json_str);
+                        //         Err(e)
+                        //     }
+                        // }
+                        error!("Flink input format with precompute not implemented");
+                        Err("Flink input format with precompute not implemented".into())
+                    }
+                    StreamingEngine::Arroyo => {
+                        // Arroyo messages - gzip decompression is applied at precompute level, not message level
+                        let json_str = match String::from_utf8(payload.to_vec()) {
+                            Ok(s) => s,
+                            Err(e) => {
+                                error!("Error converting bytes to UTF-8: {}", e);
+                                return Err(format!("UTF-8 conversion error: {e}").into());
+                            }
+                        };
+
+                        let json_dict: serde_json::Value = match serde_json::from_str(&json_str) {
+                            Ok(dict) => dict,
+                            Err(e) => {
+                                error!("Error parsing Arroyo JSON: {}", e);
+                                debug!("JSON content: {}", json_str);
+                                return Err(format!("JSON parsing error: {e}").into());
+                            }
+                        };
+
+                        let deserialize_start_time = Instant::now();
+                        match PrecomputedOutput::deserialize_from_json_arroyo(
+                            &json_dict,
+                            &self.streaming_config,
+                        ) {
+                            Ok((output, precompute)) => {
+                                let deserialize_duration = deserialize_start_time.elapsed();
+                                debug!(
+                                    "Arroyo deserialization took: {:.2}ms",
+                                    deserialize_duration.as_secs_f64() * 1000.0
+                                );
+                                debug!("Successfully deserialized Arroyo JSON message with precompute data");
+                                let total_message_duration = message_start_time.elapsed();
+                                debug!(
+                                    "Total Arroyo message processing took: {:.2}ms",
+                                    total_message_duration.as_secs_f64() * 1000.0
+                                );
+                                Ok(Some((output, precompute)))
+                            }
+                            Err(e) => {
+                                error!(
+                                    "Error deserializing Arroyo PrecomputedOutput from JSON with precompute: {e}"
+                                );
+                                debug!("JSON content: {}", json_str);
+                                Err(e)
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    pub async fn stop(&mut self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+        info!("Stopping Kafka consumer");
+
+        // Flush precompute dumper if it exists
+        if let Some(ref mut dumper) = self.precompute_dumper {
+            if let Err(e) = dumper.flush() {
+                error!("Failed to flush precompute dumper on stop: {}", e);
+            }
+        }
+
+        // The consumer will be dropped automatically
+        Ok(())
+    }
+}
diff --git a/QueryEngineRust/src/drivers/ingest/mod.rs b/QueryEngineRust/src/drivers/ingest/mod.rs
new file mode 100644
index 0000000..f300788
--- /dev/null
+++ b/QueryEngineRust/src/drivers/ingest/mod.rs
@@ -0,0 +1,9 @@
+pub mod kafka;
+pub mod prometheus_remote_write;
+pub mod victoriametrics_remote_write;
+
+pub use kafka::{KafkaConsumer, KafkaConsumerConfig};
+// pub use prometheus_remote_write::{PrometheusRemoteWriteConfig, PrometheusRemoteWriteServer};
+// pub use victoriametrics_remote_write::{
+//     VictoriaMetricsRemoteWriteConfig, VictoriaMetricsRemoteWriteServer,
+// };
diff --git a/QueryEngineRust/src/drivers/ingest/prometheus_remote_write.rs b/QueryEngineRust/src/drivers/ingest/prometheus_remote_write.rs
new file mode 100644
index 0000000..428c9de
--- /dev/null
+++ b/QueryEngineRust/src/drivers/ingest/prometheus_remote_write.rs
@@ -0,0 +1,492 @@
+// use axum::{body::Bytes, extract::State, http::StatusCode, routing::post, Router};
+// use prost::Message;
+// use std::sync::Arc;
+// use tokio::net::TcpListener;
+// use tracing::{debug, error, info, warn};
+
+// // use crate::stores::promsketch_store::metrics as ps_metrics;
+// // use crate::stores::promsketch_store::PromSketchStore;
+
+// // ---------------------------------------------------------------------------
+// // Protobuf message types (Prometheus remote write wire format)
+// // ---------------------------------------------------------------------------
+// // These mirror the upstream proto definitions in prometheus/prompb but are
+// // defined inline via prost derive macros so we don't need a .proto file or
+// // build script.
+
+// #[derive(Clone, PartialEq, Message)]
+// pub struct WriteRequest {
+//     #[prost(message, repeated, tag = "1")]
+//     pub timeseries: Vec<TimeSeries>,
+// }
+
+// #[derive(Clone, PartialEq, Message)]
+// pub struct TimeSeries {
+//     #[prost(message, repeated, tag = "1")]
+//     pub labels: Vec<Label>,
+//     #[prost(message, repeated, tag = "2")]
+//     pub samples: Vec<Sample>,
+// }
+
+// #[derive(Clone, PartialEq, Message)]
+// pub struct Label {
+//     #[prost(string, tag = "1")]
+//     pub name: String,
+//     #[prost(string, tag = "2")]
+//     pub value: String,
+// }
+
+// #[derive(Clone, PartialEq, Message)]
+// pub struct Sample {
+//     #[prost(double, tag = "1")]
+//     pub value: f64,
+//     #[prost(int64, tag = "2")]
+//     pub timestamp: i64,
+// }
+
+// // ---------------------------------------------------------------------------
+// // Label helpers
+// // ---------------------------------------------------------------------------
+
+// /// Convert a slice of Prometheus [`Label`] pairs into the canonical
+// /// `metric_name{key1="val1",key2="val2"}` string format.
+// ///
+// /// The `__name__` label becomes the metric name prefix; the remaining labels
+// /// are sorted alphabetically by name.
+// pub fn labels_to_string(labels: &[Label]) -> String {
+//     let mut name: Option<&str> = None;
+//     let mut rest: Vec<(&str, &str)> = Vec::new();
+
+//     for l in labels {
+//         if l.name == "__name__" {
+//             name = Some(&l.value);
+//         } else {
+//             rest.push((&l.name, &l.value));
+//         }
+//     }
+
+//     rest.sort_by(|a, b| a.0.cmp(b.0));
+
+//     let metric = name.unwrap_or("");
+
+//     if rest.is_empty() {
+//         return metric.to_string();
+//     }
+
+//     let mut out = String::with_capacity(metric.len() + 2 + rest.len() * 16);
+//     out.push_str(metric);
+//     out.push('{');
+//     for (i, (k, v)) in rest.iter().enumerate() {
+//         if i > 0 {
+//             out.push(',');
+//         }
+//         out.push_str(k);
+//         out.push_str("=\"");
+//         out.push_str(v);
+//         out.push('"');
+//     }
+//     out.push('}');
+//     out
+// }
+
+// // ---------------------------------------------------------------------------
+// // Decoded sample — the output of this driver
+// // ---------------------------------------------------------------------------
+
+// /// A single decoded sample ready for downstream consumption.
+// #[derive(Debug, Clone)]
+// pub struct DecodedSample {
+//     pub labels: String,
+//     pub timestamp_ms: i64,
+//     pub value: f64,
+// }
+
+// // ---------------------------------------------------------------------------
+// // Decode helpers
+// // ---------------------------------------------------------------------------
+
+// /// Snappy-decompress and protobuf-decode a raw Prometheus remote write body
+// /// into a flat list of [`DecodedSample`]s.
+// pub fn decode_prometheus_remote_write(
+//     body: &[u8],
+// ) -> Result<Vec<DecodedSample>, PrometheusRemoteWriteError> {
+//     let decompressed = snap::raw::Decoder::new()
+//         .decompress_vec(body)
+//         .map_err(|e| PrometheusRemoteWriteError::SnappyDecompress(e.to_string()))?;
+
+//     let write_req = WriteRequest::decode(decompressed.as_slice())
+//         .map_err(|e| PrometheusRemoteWriteError::ProtobufDecode(e.to_string()))?;
+
+//     let mut samples = Vec::new();
+//     for ts in &write_req.timeseries {
+//         let labels_str = labels_to_string(&ts.labels);
+//         for s in &ts.samples {
+//             samples.push(DecodedSample {
+//                 labels: labels_str.clone(),
+//                 timestamp_ms: s.timestamp,
+//                 value: s.value,
+//             });
+//         }
+//     }
+//     Ok(samples)
+// }
+
+// #[derive(Debug, thiserror::Error)]
+// pub enum PrometheusRemoteWriteError {
+//     #[error("snappy decompression failed: {0}")]
+//     SnappyDecompress(String),
+//     #[error("protobuf decode failed: {0}")]
+//     ProtobufDecode(String),
+// }
+
+// // ---------------------------------------------------------------------------
+// // Config
+// // ---------------------------------------------------------------------------
+
+// #[derive(Debug, Clone)]
+// pub struct PrometheusRemoteWriteConfig {
+//     pub port: u16,
+//     pub auto_init_sketches: bool,
+// }
+
+// impl Default for PrometheusRemoteWriteConfig {
+//     fn default() -> Self {
+//         Self {
+//             port: 9090,
+//             auto_init_sketches: true,
+//         }
+//     }
+// }
+
+// // ---------------------------------------------------------------------------
+// // Server
+// // ---------------------------------------------------------------------------
+
+// /// Shared state accessible by axum handlers.
+// struct ServerState {
+//     /// Running counter of ingested samples (for logging).
+//     samples_ingested: std::sync::atomic::AtomicU64,
+//     /// Optional PromSketchStore for sketch-based ingestion.
+//     promsketch_store: Option<Arc<PromSketchStore>>,
+//     /// Whether to auto-initialize all sketch types for new series.
+//     auto_init_sketches: bool,
+// }
+
+// /// A standalone HTTP server that accepts Prometheus remote write requests.
+// ///
+// /// Decoded samples are logged at debug level. To wire them into a downstream
+// /// store, extend the handler or wrap this driver with a callback.
+// pub struct PrometheusRemoteWriteServer {
+//     config: PrometheusRemoteWriteConfig,
+//     promsketch_store: Option<Arc<PromSketchStore>>,
+// }
+
+// impl PrometheusRemoteWriteServer {
+//     pub fn new(
+//         config: PrometheusRemoteWriteConfig,
+//         promsketch_store: Option<Arc<PromSketchStore>>,
+//     ) -> Self {
+//         Self {
+//             config,
+//             promsketch_store,
+//         }
+//     }
+
+//     /// Start the server. Blocks until the listener is dropped or an error occurs.
+//     pub async fn run(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+//         let state = Arc::new(ServerState {
+//             samples_ingested: std::sync::atomic::AtomicU64::new(0),
+//             promsketch_store: self.promsketch_store.clone(),
+//             auto_init_sketches: self.config.auto_init_sketches,
+//         });
+
+//         let app = Router::new()
+//             .route("/api/v1/write", post(handle_prometheus_remote_write))
+//             .with_state(state);
+
+//         let addr = format!("0.0.0.0:{}", self.config.port);
+//         info!("Prometheus remote write server listening on {}", addr);
+
+//         let listener = TcpListener::bind(&addr).await?;
+//         axum::serve(listener, app).await?;
+//         Ok(())
+//     }
+// }
+
+// async fn handle_prometheus_remote_write(
+//     State(state): State<Arc<ServerState>>,
+//     body: Bytes,
+// ) -> StatusCode {
+//     let samples = match decode_prometheus_remote_write(&body) {
+//         Ok(s) => s,
+//         Err(e) => {
+//             warn!("Failed to decode Prometheus remote write request: {e}");
+//             return StatusCode::BAD_REQUEST;
+//         }
+//     };
+
+//     let count = samples.len() as u64;
+//     let total = state
+//         .samples_ingested
+//         .fetch_add(count, std::sync::atomic::Ordering::Relaxed)
+//         + count;
+
+//     debug!("Received {} samples ({} total ingested)", count, total);
+
+//     if let Some(ref store) = state.promsketch_store {
+//         let mut errors = 0u64;
+//         for s in &samples {
+//             debug!("  {} t={} v={}", s.labels, s.timestamp_ms, s.value);
+
+//             if state.auto_init_sketches && store.get_or_create(&s.labels) {
+//                 if let Err(e) = store.ensure_all_sketches(&s.labels) {
+//                     error!("Failed to init sketches for {}: {e}", s.labels);
+//                     errors += 1;
+//                     continue;
+//                 }
+//             }
+
+//             if let Err(e) = store.sketch_insert(&s.labels, s.timestamp_ms as u64, s.value) {
+//                 error!("Failed to insert sample for {}: {e}", s.labels);
+//                 errors += 1;
+//             }
+//         }
+//         ps_metrics::SAMPLES_INGESTED_TOTAL.inc_by((count - errors) as f64);
+//         if errors > 0 {
+//             ps_metrics::INGEST_ERRORS_TOTAL.inc_by(errors as f64);
+//         }
+//     } else {
+//         for s in &samples {
+//             debug!("  {} t={} v={}", s.labels, s.timestamp_ms, s.value);
+//         }
+//     }
+
+//     StatusCode::NO_CONTENT
+// }
+
+// // ---------------------------------------------------------------------------
+// // Tests
+// // ---------------------------------------------------------------------------
+
+// #[cfg(test)]
+// mod tests {
+//     use super::*;
+
+//     #[test]
+//     fn test_labels_to_string_basic() {
+//         let labels = vec![
+//             Label {
+//                 name: "__name__".into(),
+//                 value: "http_requests_total".into(),
+//             },
+//             Label {
+//                 name: "method".into(),
+//                 value: "GET".into(),
+//             },
+//             Label {
+//                 name: "status".into(),
+//                 value: "200".into(),
+//             },
+//         ];
+//         assert_eq!(
+//             labels_to_string(&labels),
+//             "http_requests_total{method=\"GET\",status=\"200\"}"
+//         );
+//     }
+
+//     #[test]
+//     fn test_labels_to_string_sorted() {
+//         let labels = vec![
+//             Label {
+//                 name: "__name__".into(),
+//                 value: "cpu_usage".into(),
+//             },
+//             Label {
+//                 name: "zone".into(),
+//                 value: "us-east".into(),
+//             },
+//             Label {
+//                 name: "host".into(),
+//                 value: "node1".into(),
+//             },
+//             Label {
+//                 name: "app".into(),
+//                 value: "web".into(),
+//             },
+//         ];
+//         assert_eq!(
+//             labels_to_string(&labels),
+//             "cpu_usage{app=\"web\",host=\"node1\",zone=\"us-east\"}"
+//         );
+//     }
+
+//     #[test]
+//     fn test_labels_to_string_no_name() {
+//         let labels = vec![Label {
+//             name: "host".into(),
+//             value: "node1".into(),
+//         }];
+//         assert_eq!(labels_to_string(&labels), "{host=\"node1\"}");
+//     }
+
+//     #[test]
+//     fn test_labels_to_string_name_only() {
+//         let labels = vec![Label {
+//             name: "__name__".into(),
+//             value: "up".into(),
+//         }];
+//         assert_eq!(labels_to_string(&labels), "up");
+//     }
+
+//     #[test]
+//     fn test_labels_to_string_empty() {
+//         let labels: Vec<Label> = vec![];
+//         assert_eq!(labels_to_string(&labels), "");
+//     }
+
+//     #[test]
+//     fn test_protobuf_roundtrip() {
+//         let write_req = WriteRequest {
+//             timeseries: vec![TimeSeries {
+//                 labels: vec![
+//                     Label {
+//                         name: "__name__".into(),
+//                         value: "test_metric".into(),
+//                     },
+//                     Label {
+//                         name: "env".into(),
+//                         value: "prod".into(),
+//                     },
+//                 ],
+//                 samples: vec![
+//                     Sample {
+//                         value: 1.5,
+//                         timestamp: 1000,
+//                     },
+//                     Sample {
+//                         value: 2.5,
+//                         timestamp: 2000,
+//                     },
+//                 ],
+//             }],
+//         };
+
+//         let encoded = write_req.encode_to_vec();
+//         let decoded = WriteRequest::decode(encoded.as_slice()).unwrap();
+//         assert_eq!(write_req, decoded);
+//     }
+
+//     #[test]
+//     fn test_snappy_protobuf_decode() {
+//         let write_req = WriteRequest {
+//             timeseries: vec![TimeSeries {
+//                 labels: vec![
+//                     Label {
+//                         name: "__name__".into(),
+//                         value: "metric_a".into(),
+//                     },
+//                     Label {
+//                         name: "job".into(),
+//                         value: "test".into(),
+//                     },
+//                 ],
+//                 samples: vec![Sample {
+//                     value: 42.0,
+//                     timestamp: 1700000000000,
+//                 }],
+//             }],
+//         };
+
+//         let proto_bytes = write_req.encode_to_vec();
+//         let compressed = snap::raw::Encoder::new()
+//             .compress_vec(&proto_bytes)
+//             .unwrap();
+
+//         let samples = decode_prometheus_remote_write(&compressed).unwrap();
+//         assert_eq!(samples.len(), 1);
+//         assert_eq!(samples[0].labels, "metric_a{job=\"test\"}");
+//         assert_eq!(samples[0].timestamp_ms, 1700000000000);
+//         assert!((samples[0].value - 42.0).abs() < f64::EPSILON);
+//     }
+
+//     #[test]
+//     fn test_decode_multiple_timeseries() {
+//         let write_req = WriteRequest {
+//             timeseries: vec![
+//                 TimeSeries {
+//                     labels: vec![
+//                         Label {
+//                             name: "__name__".into(),
+//                             value: "cpu".into(),
+//                         },
+//                         Label {
+//                             name: "host".into(),
+//                             value: "a".into(),
+//                         },
+//                     ],
+//                     samples: vec![
+//                         Sample {
+//                             value: 0.5,
+//                             timestamp: 100,
+//                         },
+//                         Sample {
+//                             value: 0.6,
+//                             timestamp: 200,
+//                         },
+//                     ],
+//                 },
+//                 TimeSeries {
+//                     labels: vec![
+//                         Label {
+//                             name: "__name__".into(),
+//                             value: "mem".into(),
+//                         },
+//                         Label {
+//                             name: "host".into(),
+//                             value: "b".into(),
+//                         },
+//                     ],
+//                     samples: vec![Sample {
+//                         value: 1024.0,
+//                         timestamp: 100,
+//                     }],
+//                 },
+//             ],
+//         };
+
+//         let proto_bytes = write_req.encode_to_vec();
+//         let compressed = snap::raw::Encoder::new()
+//             .compress_vec(&proto_bytes)
+//             .unwrap();
+
+//         let samples = decode_prometheus_remote_write(&compressed).unwrap();
+//         assert_eq!(samples.len(), 3);
+//         assert_eq!(samples[0].labels, "cpu{host=\"a\"}");
+//         assert_eq!(samples[1].labels, "cpu{host=\"a\"}");
+//         assert_eq!(samples[2].labels, "mem{host=\"b\"}");
+//     }
+
+//     #[test]
+//     fn test_decode_invalid_snappy() {
+//         let result = decode_prometheus_remote_write(b"not-snappy-data");
+//         assert!(result.is_err());
+//         assert!(matches!(
+//             result.unwrap_err(),
+//             PrometheusRemoteWriteError::SnappyDecompress(_)
+//         ));
+//     }
+
+//     #[test]
+//     fn test_decode_invalid_protobuf() {
+//         // Valid snappy wrapping invalid protobuf
+//         let garbage = b"this is not protobuf";
+//         let compressed = snap::raw::Encoder::new().compress_vec(garbage).unwrap();
+
+//         let result = decode_prometheus_remote_write(&compressed);
+//         assert!(result.is_err());
+//         assert!(matches!(
+//             result.unwrap_err(),
+//             PrometheusRemoteWriteError::ProtobufDecode(_)
+//         ));
+//     }
+// }
diff --git a/QueryEngineRust/src/drivers/ingest/victoriametrics_remote_write.rs b/QueryEngineRust/src/drivers/ingest/victoriametrics_remote_write.rs
new file mode 100644
index 0000000..05a35b1
--- /dev/null
+++ b/QueryEngineRust/src/drivers/ingest/victoriametrics_remote_write.rs
@@ -0,0 +1,200 @@
+// use axum::{body::Bytes, extract::State, http::StatusCode, routing::post, Router};
+// use prost::Message;
+// use std::sync::Arc;
+// use tokio::net::TcpListener;
+// use tracing::{debug, info, warn};
+
+// use super::prometheus_remote_write::{labels_to_string, DecodedSample, WriteRequest};
+
+// // ---------------------------------------------------------------------------
+// // Config
+// // ---------------------------------------------------------------------------
+
+// /// Configuration for the VictoriaMetrics remote write ingest endpoint.
+// ///
+// /// This is a thin wrapper around the shared remote write decoder that always
+// /// uses zstd compression (VictoriaMetrics remote write protocol).
+// #[derive(Debug, Clone)]
+// pub struct VictoriaMetricsRemoteWriteConfig {
+//     pub port: u16,
+// }
+
+// impl Default for VictoriaMetricsRemoteWriteConfig {
+//     fn default() -> Self {
+//         // VictoriaMetrics commonly uses 8428, but the caller can override this.
+//         Self { port: 8428 }
+//     }
+// }
+
+// // ---------------------------------------------------------------------------
+// // Server
+// // ---------------------------------------------------------------------------
+
+// /// Shared state accessible by axum handlers.
+// struct ServerState {
+//     /// Running counter of ingested samples (for logging).
+//     samples_ingested: std::sync::atomic::AtomicU64,
+// }
+
+// /// A standalone HTTP server that accepts VictoriaMetrics remote write requests.
+// ///
+// /// This server listens on `VictoriaMetricsRemoteWriteConfig::port` and exposes
+// /// a `POST /api/v1/write` endpoint that expects zstd-compressed protobuf
+// /// `WriteRequest` bodies, matching the VictoriaMetrics remote write protocol.
+// ///
+// /// The decoded samples are logged at debug level. To integrate with a
+// /// downstream store or precompute engine, extend the handler or wrap this
+// /// server with a callback.
+// pub struct VictoriaMetricsRemoteWriteServer {
+//     config: VictoriaMetricsRemoteWriteConfig,
+// }
+
+// impl VictoriaMetricsRemoteWriteServer {
+//     pub fn new(config: VictoriaMetricsRemoteWriteConfig) -> Self {
+//         Self { config }
+//     }
+
+//     /// Start the server. Blocks until the listener is dropped or an error occurs.
+//     pub async fn run(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+//         let state = Arc::new(ServerState {
+//             samples_ingested: std::sync::atomic::AtomicU64::new(0),
+//         });
+
+//         let app = Router::new()
+//             .route("/api/v1/write", post(handle_victoriametrics_remote_write))
+//             .with_state(state);
+
+//         let addr = format!("0.0.0.0:{}", self.config.port);
+//         info!(
+//             "VictoriaMetrics remote write server listening on {} (zstd compression only)",
+//             addr
+//         );
+
+//         let listener = TcpListener::bind(&addr).await?;
+//         axum::serve(listener, app).await?;
+//         Ok(())
+//     }
+// }
+
+// async fn handle_victoriametrics_remote_write(
+//     State(state): State<Arc<ServerState>>,
+//     body: Bytes,
+// ) -> StatusCode {
+//     let samples: Vec<DecodedSample> = match decode_victoriametrics_remote_write(&body) {
+//         Ok(s) => s,
+//         Err(VictoriaMetricsRemoteWriteError::ZstdDecompress(e)) => {
+//             warn!("Failed to zstd-decompress VictoriaMetrics remote write request: {e}");
+//             return StatusCode::BAD_REQUEST;
+//         }
+//         Err(VictoriaMetricsRemoteWriteError::ProtobufDecode(e)) => {
+//             warn!("Failed to decode VictoriaMetrics remote write protobuf: {e}");
+//             return StatusCode::BAD_REQUEST;
+//         }
+//     };
+
+//     let count = samples.len() as u64;
+//     let total = state
+//         .samples_ingested
+//         .fetch_add(count, std::sync::atomic::Ordering::Relaxed)
+//         + count;
+
+//     debug!(
+//         "Received {} VictoriaMetrics samples ({} total ingested)",
+//         count, total
+//     );
+//     for s in &samples {
+//         debug!("  {} t={} v={}", s.labels, s.timestamp_ms, s.value);
+//     }
+
+//     StatusCode::NO_CONTENT
+// }
+
+// // ---------------------------------------------------------------------------
+// // Decode helpers
+// // ---------------------------------------------------------------------------
+
+// #[derive(Debug, thiserror::Error)]
+// pub enum VictoriaMetricsRemoteWriteError {
+//     #[error("zstd decompression failed: {0}")]
+//     ZstdDecompress(String),
+//     #[error("protobuf decode failed: {0}")]
+//     ProtobufDecode(String),
+// }
+
+// /// Zstd-decompress and protobuf-decode a raw VictoriaMetrics remote write body
+// /// into a flat list of [`DecodedSample`]s.
+// pub fn decode_victoriametrics_remote_write(
+//     body: &[u8],
+// ) -> Result<Vec<DecodedSample>, VictoriaMetricsRemoteWriteError> {
+//     let decompressed = zstd::decode_all(body)
+//         .map_err(|e| VictoriaMetricsRemoteWriteError::ZstdDecompress(e.to_string()))?;
+
+//     let write_req = WriteRequest::decode(decompressed.as_slice())
+//         .map_err(|e| VictoriaMetricsRemoteWriteError::ProtobufDecode(e.to_string()))?;
+
+//     let mut samples = Vec::new();
+//     for ts in &write_req.timeseries {
+//         let labels_str = labels_to_string(&ts.labels);
+//         for s in &ts.samples {
+//             samples.push(DecodedSample {
+//                 labels: labels_str.clone(),
+//                 timestamp_ms: s.timestamp,
+//                 value: s.value,
+//             });
+//         }
+//     }
+//     Ok(samples)
+// }
+
+// // ---------------------------------------------------------------------------
+// // Tests
+// // ---------------------------------------------------------------------------
+
+// #[cfg(test)]
+// mod tests {
+//     use super::*;
+//     use crate::drivers::ingest::prometheus_remote_write::{
+//         Label, Sample, TimeSeries, WriteRequest,
+//     };
+
+//     #[test]
+//     fn test_zstd_decode_single_sample() {
+//         let write_req = WriteRequest {
+//             timeseries: vec![TimeSeries {
+//                 labels: vec![
+//                     Label {
+//                         name: "__name__".into(),
+//                         value: "vm_metric".into(),
+//                     },
+//                     Label {
+//                         name: "region".into(),
+//                         value: "us-east-1".into(),
+//                     },
+//                 ],
+//                 samples: vec![Sample {
+//                     value: 99.9,
+//                     timestamp: 1700000000000,
+//                 }],
+//             }],
+//         };
+
+//         let proto_bytes = write_req.encode_to_vec();
+//         let compressed = zstd::encode_all(proto_bytes.as_slice(), 0).unwrap();
+
+//         let samples = decode_victoriametrics_remote_write(&compressed).unwrap();
+//         assert_eq!(samples.len(), 1);
+//         assert_eq!(samples[0].labels, "vm_metric{region=\"us-east-1\"}");
+//         assert_eq!(samples[0].timestamp_ms, 1700000000000);
+//         assert!((samples[0].value - 99.9).abs() < f64::EPSILON);
+//     }
+
+//     #[test]
+//     fn test_decode_invalid_zstd() {
+//         let result = decode_victoriametrics_remote_write(b"not-zstd-data");
+//         assert!(result.is_err());
+//         assert!(matches!(
+//             result.unwrap_err(),
+//             VictoriaMetricsRemoteWriteError::ZstdDecompress(_)
+//         ));
+//     }
+// }
diff --git a/QueryEngineRust/src/drivers/mod.rs b/QueryEngineRust/src/drivers/mod.rs
new file mode 100644
index 0000000..544d648
--- /dev/null
+++ b/QueryEngineRust/src/drivers/mod.rs
@@ -0,0 +1,6 @@
+pub mod ingest;
+pub mod query;
+
+// Re-export commonly used types for convenience
+pub use ingest::{KafkaConsumer, KafkaConsumerConfig};
+pub use query::{AdapterConfig, HttpServer, HttpServerConfig};
diff --git a/QueryEngineRust/src/drivers/query/adapters/clickhouse_http.rs b/QueryEngineRust/src/drivers/query/adapters/clickhouse_http.rs
new file mode 100644
index 0000000..19c3bc5
--- /dev/null
+++ b/QueryEngineRust/src/drivers/query/adapters/clickhouse_http.rs
@@ -0,0 +1,397 @@
+use super::config::AdapterConfig;
+use super::traits::*;
+use crate::engines::QueryResult;
+use async_trait::async_trait;
+use axum::{
+    extract::{Form, Query},
+    http::StatusCode,
+    response::{IntoResponse, Json, Response},
+};
+use promql_utilities::data_model::KeyByLabelNames;
+use serde_json::{json, Value};
+use std::collections::HashMap;
+use std::sync::Arc;
+use tracing::debug;
+
+/// ClickHouse HTTP protocol adapter
+pub struct ClickHouseHttpAdapter {
+    #[allow(dead_code)]
+    config: AdapterConfig,
+}
+
+impl ClickHouseHttpAdapter {
+    pub fn new(config: AdapterConfig) -> Self {
+        Self { config }
+    }
+
+    /// Helper to parse query parameters (used by both GET and POST)
+    fn parse_params_common(
+        &self,
+        params: &HashMap<String, String>,
+    ) -> Result<ParsedQueryRequest, AdapterError> {
+        // Extract query parameter (required)
+        let query = params
+            .get("query")
+            .ok_or_else(|| AdapterError::MissingParameter("query".to_string()))?
+            .clone();
+
+        // ClickHouse doesn't use a time parameter, but ParsedQueryRequest requires it
+        let time = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_secs_f64();
+
+        Ok(ParsedQueryRequest { query, time })
+    }
+}
+
+#[async_trait]
+impl QueryRequestAdapter for ClickHouseHttpAdapter {
+    async fn parse_get_request(
+        &self,
+        Query(params): Query<HashMap<String, String>>,
+    ) -> Result<ParsedQueryRequest, AdapterError> {
+        debug!(
+            "ClickHouse adapter: parsing GET request with params: {:?}",
+            params
+        );
+        self.parse_params_common(&params)
+    }
+
+    async fn parse_post_request(
+        &self,
+        Form(params): Form<HashMap<String, String>>,
+    ) -> Result<ParsedQueryRequest, AdapterError> {
+        debug!(
+            "ClickHouse adapter: parsing POST request with params: {:?}",
+            params
+        );
+        self.parse_params_common(&params)
+    }
+
+    fn get_query_endpoint(&self) -> &'static str {
+        "/clickhouse/query"
+    }
+
+    async fn parse_range_get_request(
+        &self,
+        _query_params: Query<HashMap<String, String>>,
+    ) -> Result<ParsedRangeQueryRequest, AdapterError> {
+        Err(AdapterError::ProtocolError(
+            "Range queries not supported by ClickHouse adapter".to_string(),
+        ))
+    }
+
+    async fn parse_range_post_request(
+        &self,
+        _form_params: Form<HashMap<String, String>>,
+    ) -> Result<ParsedRangeQueryRequest, AdapterError> {
+        Err(AdapterError::ProtocolError(
+            "Range queries not supported by ClickHouse adapter".to_string(),
+        ))
+    }
+
+    fn get_range_query_endpoint(&self) -> &'static str {
+        "/clickhouse/query_range" // Not actually used, just satisfying the trait
+    }
+}
+
+#[async_trait]
+impl QueryResponseAdapter for ClickHouseHttpAdapter {
+    async fn format_success_response(
+        &self,
+        result: &QueryExecutionResult,
+    ) -> Result<Response, StatusCode> {
+        // Convert QueryResult to ClickHouse TabSeparated format
+        // Format: columns separated by tabs, rows separated by newlines
+        let label_names = &result.query_output_labels.labels;
+
+        // Build TabSeparated output
+        let mut output = String::new();
+
+        match &result.query_result {
+            QueryResult::Vector(instant_vector) => {
+                for element in &instant_vector.values {
+                    // Add label values
+                    for (i, _label_name) in label_names.iter().enumerate() {
+                        let label_value = element.labels.get(i).map(|s| s.as_str()).unwrap_or("");
+                        output.push_str(label_value);
+                        output.push('\t');
+                    }
+                    // Add value column
+                    output.push_str(&element.value.to_string());
+                    output.push('\n');
+                }
+            }
+            QueryResult::Matrix(_) => {
+                // ClickHouse adapter doesn't support Matrix results
+                return Err(StatusCode::NOT_IMPLEMENTED);
+            }
+        };
+
+        debug!(
+            "ClickHouse adapter: formatting TabSeparated response:\n{}",
+            output
+        );
+        Ok(output.into_response())
+    }
+
+    async fn format_range_success_response(
+        &self,
+        _result: &QueryResult,
+        _labels: &KeyByLabelNames,
+    ) -> Result<Response, StatusCode> {
+        // ClickHouse doesn't support Prometheus-style range queries
+        Err(StatusCode::NOT_IMPLEMENTED)
+    }
+
+    async fn format_error_response(&self, error: &AdapterError) -> Result<Response, StatusCode> {
+        // Return proper HTTP error status codes like ClickHouse does
+        let status_code = match error {
+            AdapterError::MissingParameter(_) => StatusCode::BAD_REQUEST,
+            AdapterError::InvalidParameter(_) => StatusCode::BAD_REQUEST,
+            AdapterError::ParseError(_) => StatusCode::BAD_REQUEST,
+            AdapterError::NetworkError(_) => StatusCode::BAD_GATEWAY,
+            AdapterError::ProtocolError(_) => StatusCode::INTERNAL_SERVER_ERROR,
+        };
+
+        debug!(
+            "ClickHouse adapter: formatting error response for {:?}, returning status: {}",
+            error, status_code
+        );
+        Err(status_code)
+    }
+
+    async fn format_unsupported_query_response(&self) -> Result<Response, StatusCode> {
+        // Return HTTP 501 Not Implemented for unsupported queries
+        Err(StatusCode::NOT_IMPLEMENTED)
+    }
+}
+
+#[async_trait]
+impl HttpProtocolAdapter for ClickHouseHttpAdapter {
+    fn adapter_name(&self) -> &'static str {
+        "ClickHouseHTTP"
+    }
+
+    fn get_runtime_info_path(&self) -> &'static str {
+        "/clickhouse/ping"
+    }
+
+    async fn handle_runtime_info(
+        &self,
+        _store: Arc<dyn crate::stores::Store>,
+    ) -> Result<Json<Value>, StatusCode> {
+        // Stub implementation - return basic info
+        // In the future, this could query the store for metrics
+        // and/or forward to the fallback ClickHouse instance
+        Ok(Json(json!({
+            "status": "ok",
+            "adapter": "ClickHouseHTTP",
+            "version": "1.0.0"
+        })))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::data_model::enums::{QueryLanguage, QueryProtocol};
+    use crate::engines::QueryResult;
+    use promql_utilities::data_model::KeyByLabelNames;
+
+    fn create_test_adapter() -> ClickHouseHttpAdapter {
+        let config = AdapterConfig::new(
+            QueryProtocol::ClickHouseHttp,
+            QueryLanguage::sql,
+            None, // No fallback for unit tests
+        );
+        ClickHouseHttpAdapter::new(config)
+    }
+
+    /// Test 7: Parse GET request with SQL query
+    #[tokio::test]
+    async fn test_parse_get_request() {
+        let adapter = create_test_adapter();
+
+        let mut params = HashMap::new();
+        params.insert("query".to_string(), "SELECT 1".to_string());
+
+        let result = adapter.parse_get_request(Query(params)).await;
+
+        assert!(result.is_ok(), "GET request parsing should succeed");
+        let parsed = result.unwrap();
+        assert_eq!(parsed.query, "SELECT 1");
+        let now = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap()
+            .as_secs_f64();
+        assert!(
+            (parsed.time - now).abs() < 1.0,
+            "Time should be within 1 second of current time"
+        );
+    }
+
+    /// Test 8: Parse POST request with form-encoded data
+    #[tokio::test]
+    async fn test_parse_post_request_form() {
+        let adapter = create_test_adapter();
+
+        let mut params = HashMap::new();
+        params.insert("query".to_string(), "SELECT * FROM table".to_string());
+
+        let result = adapter.parse_post_request(Form(params)).await;
+
+        assert!(result.is_ok(), "POST request parsing should succeed");
+        let parsed = result.unwrap();
+        assert_eq!(parsed.query, "SELECT * FROM table");
+        let now = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap()
+            .as_secs_f64();
+        assert!(
+            (parsed.time - now).abs() < 1.0,
+            "Time should be within 1 second of current time"
+        );
+    }
+
+    /// Test 9: Missing query parameter should return error
+    #[tokio::test]
+    async fn test_missing_query_parameter() {
+        let adapter = create_test_adapter();
+
+        // No query parameter provided
+        let params = HashMap::new();
+
+        let result = adapter.parse_get_request(Query(params)).await;
+
+        assert!(
+            result.is_err(),
+            "Missing query parameter should return error"
+        );
+        match result {
+            Err(AdapterError::MissingParameter(param)) => {
+                assert_eq!(
+                    param, "query",
+                    "Error should indicate missing 'query' parameter"
+                );
+            }
+            _ => panic!("Expected MissingParameter error"),
+        }
+    }
+
+    /// Test 10: Format success response with actual data in TabSeparated format
+    #[tokio::test]
+    async fn test_format_success_response() {
+        use crate::data_model::KeyByLabelValues;
+        use crate::engines::query_result::InstantVectorElement;
+        use axum::body::to_bytes;
+
+        let adapter = create_test_adapter();
+
+        // Create a mock QueryExecutionResult with actual data
+        let label_names = KeyByLabelNames::new(vec!["hostname".to_string()]);
+        let elements = vec![
+            InstantVectorElement::new(
+                KeyByLabelValues::new_with_labels(vec!["host1".to_string()]),
+                91.0,
+            ),
+            InstantVectorElement::new(
+                KeyByLabelValues::new_with_labels(vec!["host2".to_string()]),
+                77.5,
+            ),
+        ];
+        let result = QueryExecutionResult {
+            query_output_labels: label_names,
+            query_result: QueryResult::vector(elements, 1000),
+        };
+
+        let response = adapter.format_success_response(&result).await;
+        assert!(response.is_ok(), "Response formatting should succeed");
+
+        // Extract body from Response
+        let response = response.unwrap();
+        let body_bytes = to_bytes(response.into_body(), usize::MAX).await.unwrap();
+        let body_str = String::from_utf8(body_bytes.to_vec()).unwrap();
+
+        // Verify TabSeparated format: "label_value\tvalue\n" for each row
+        let lines: Vec<&str> = body_str.lines().collect();
+        assert_eq!(lines.len(), 2, "Should have 2 rows");
+
+        // First row: host1\t91
+        let cols1: Vec<&str> = lines[0].split('\t').collect();
+        assert_eq!(cols1.len(), 2);
+        assert_eq!(cols1[0], "host1");
+        assert_eq!(cols1[1], "91");
+
+        // Second row: host2\t77.5
+        let cols2: Vec<&str> = lines[1].split('\t').collect();
+        assert_eq!(cols2.len(), 2);
+        assert_eq!(cols2[0], "host2");
+        assert_eq!(cols2[1], "77.5");
+    }
+
+    /// Test 11: Format error response
+    #[tokio::test]
+    async fn test_format_error_response() {
+        let adapter = create_test_adapter();
+
+        let error = AdapterError::InvalidParameter("Invalid SQL syntax".to_string());
+
+        let response = adapter.format_error_response(&error).await;
+
+        // Should return Err with BAD_REQUEST status code
+        assert!(response.is_err(), "Error formatting should return Err");
+        assert_eq!(response.unwrap_err(), StatusCode::BAD_REQUEST);
+    }
+
+    /// Test 12: Get query endpoint path
+    #[test]
+    fn test_get_query_endpoint() {
+        let adapter = create_test_adapter();
+
+        let endpoint = adapter.get_query_endpoint();
+        assert_eq!(endpoint, "/clickhouse/query");
+    }
+
+    /// Test 13: Get adapter name
+    #[test]
+    fn test_adapter_name() {
+        let adapter = create_test_adapter();
+
+        let name = adapter.adapter_name();
+        assert_eq!(name, "ClickHouseHTTP");
+    }
+
+    /// Test 14: Get runtime info path
+    #[test]
+    fn test_get_runtime_info_path() {
+        let adapter = create_test_adapter();
+
+        let path = adapter.get_runtime_info_path();
+        assert_eq!(path, "/clickhouse/ping");
+    }
+
+    /// Test 15: Query parameter is required
+    #[tokio::test]
+    async fn test_query_parameter_required() {
+        let adapter = create_test_adapter();
+
+        let mut params = HashMap::new();
+        params.insert("query".to_string(), "SELECT 1".to_string());
+
+        let result = adapter.parse_get_request(Query(params)).await;
+
+        assert!(result.is_ok(), "GET request with query should succeed");
+        let parsed = result.unwrap();
+        assert_eq!(parsed.query, "SELECT 1");
+        let now = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap()
+            .as_secs_f64();
+        assert!(
+            (parsed.time - now).abs() < 1.0,
+            "Time should be within 1 second of current time"
+        );
+    }
+}
diff --git a/QueryEngineRust/src/drivers/query/adapters/config.rs b/QueryEngineRust/src/drivers/query/adapters/config.rs
new file mode 100644
index 0000000..948237a
--- /dev/null
+++ b/QueryEngineRust/src/drivers/query/adapters/config.rs
@@ -0,0 +1,121 @@
+use crate::data_model::enums::{QueryLanguage, QueryProtocol};
+use crate::drivers::query::fallback::FallbackClient;
+use std::sync::Arc;
+
+/// Configuration for a specific protocol adapter
+#[derive(Clone)]
+pub struct AdapterConfig {
+    /// The query protocol to use
+    pub protocol: QueryProtocol,
+
+    /// The query language to use
+    pub language: QueryLanguage,
+
+    /// Optional fallback client for unsupported queries
+    pub fallback: Option<Arc<dyn FallbackClient>>,
+}
+
+impl std::fmt::Debug for AdapterConfig {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("AdapterConfig")
+            .field("protocol", &self.protocol)
+            .field("language", &self.language)
+            .field(
+                "fallback",
+                &self.fallback.as_ref().map(|_| "Some(FallbackClient)"),
+            )
+            .finish()
+    }
+}
+
+impl AdapterConfig {
+    /// Generic constructor for adapter configuration
+    pub fn new(
+        protocol: QueryProtocol,
+        language: QueryLanguage,
+        fallback: Option<Arc<dyn FallbackClient>>,
+    ) -> Self {
+        Self {
+            protocol,
+            language,
+            fallback,
+        }
+    }
+
+    /// Create a configuration for Prometheus HTTP with PromQL
+    /// Convenience constructor for backward compatibility
+    pub fn prometheus_promql(fallback_url: String, forward_unsupported: bool) -> Self {
+        use crate::drivers::query::fallback::PrometheusHttpFallback;
+
+        let fallback = if forward_unsupported {
+            Some(Arc::new(PrometheusHttpFallback::new(fallback_url)) as Arc<dyn FallbackClient>)
+        } else {
+            None
+        };
+
+        Self::new(
+            QueryProtocol::PrometheusHttp,
+            QueryLanguage::promql,
+            fallback,
+        )
+    }
+
+    /// Create a configuration for ClickHouse HTTP with SQL
+    /// Convenience constructor for ClickHouse adapter
+    pub fn clickhouse_sql(base_url: String, database: String, forward_unsupported: bool) -> Self {
+        use crate::drivers::query::fallback::ClickHouseHttpFallback;
+
+        let fallback = if forward_unsupported {
+            Some(Arc::new(ClickHouseHttpFallback::new(base_url, database))
+                as Arc<dyn FallbackClient>)
+        } else {
+            None
+        };
+
+        Self::new(QueryProtocol::ClickHouseHttp, QueryLanguage::sql, fallback)
+    }
+
+    /// Create a configuration for Elasticsearch HTTP with Elasticsearch QueryDSL
+    /// Convenience constructor for Elasticsearch adapter
+    pub fn elastic_querydsl(base_url: String, index: String, forward_unsupported: bool) -> Self {
+        use crate::drivers::query::fallback::ElasticHttpFallback;
+
+        let fallback = if forward_unsupported {
+            Some(Arc::new(ElasticHttpFallback::new(
+                base_url,
+                index,
+                QueryLanguage::elastic_querydsl,
+            )) as Arc<dyn FallbackClient>)
+        } else {
+            None
+        };
+
+        Self::new(
+            QueryProtocol::ElasticHttp,
+            QueryLanguage::elastic_querydsl,
+            fallback,
+        )
+    }
+
+    /// Create a configuration for Elasticsearch HTTP with SQL
+    /// Convenience constructor for Elasticsearch SQL adapter
+    pub fn elastic_sql(base_url: String, index: String, forward_unsupported: bool) -> Self {
+        use crate::drivers::query::fallback::ElasticHttpFallback;
+
+        let fallback = if forward_unsupported {
+            Some(Arc::new(ElasticHttpFallback::new(
+                base_url,
+                index,
+                QueryLanguage::elastic_sql,
+            )) as Arc<dyn FallbackClient>)
+        } else {
+            None
+        };
+
+        Self::new(
+            QueryProtocol::ElasticHttp,
+            QueryLanguage::elastic_sql,
+            fallback,
+        )
+    }
+}
diff --git a/QueryEngineRust/src/drivers/query/adapters/elastic_http.rs b/QueryEngineRust/src/drivers/query/adapters/elastic_http.rs
new file mode 100644
index 0000000..fce8a9f
--- /dev/null
+++ b/QueryEngineRust/src/drivers/query/adapters/elastic_http.rs
@@ -0,0 +1,516 @@
+use super::config::AdapterConfig;
+use super::traits::*;
+use crate::data_model::QueryLanguage;
+use async_trait::async_trait;
+use axum::{
+    body::Bytes,
+    extract::{Form, Query},
+    http::StatusCode,
+    response::{IntoResponse, Json, Response},
+};
+use serde_json::{json, Value};
+use std::collections::HashMap;
+use std::sync::Arc;
+use tracing::{debug, error};
+
+/// Elasticsearch HTTP protocol adapter
+pub struct ElasticHttpAdapter {
+    #[allow(dead_code)]
+    config: AdapterConfig,
+}
+
+impl ElasticHttpAdapter {
+    pub fn new(config: AdapterConfig) -> Self {
+        Self { config }
+    }
+
+    /// Parse Elasticsearch query from JSON body
+    fn parse_elasticsearch_query(&self, body: &Bytes) -> Result<ParsedQueryRequest, AdapterError> {
+        debug!(
+            "Elasticsearch adapter: parsing query for language {:?}",
+            self.config.language
+        );
+
+        // Parse the JSON body
+        let json_body: Value = serde_json::from_slice(body)
+            .map_err(|e| AdapterError::ParseError(format!("Invalid JSON: {}", e)))?;
+
+        // Store the entire query as a JSON string
+        let query = serde_json::to_string(&json_body)
+            .map_err(|e| AdapterError::ParseError(format!("Failed to serialize query: {}", e)))?;
+
+        let time = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_secs_f64();
+
+        debug!(
+            "Elasticsearch adapter: parsed {} query with time={}",
+            if matches!(self.config.language, QueryLanguage::elastic_sql) {
+                "SQL"
+            } else {
+                "Query DSL"
+            },
+            time
+        );
+
+        Ok(ParsedQueryRequest { query, time })
+    }
+}
+
+#[async_trait]
+impl QueryRequestAdapter for ElasticHttpAdapter {
+    async fn parse_get_request(
+        &self,
+        _query_params: Query<HashMap<String, String>>,
+    ) -> Result<ParsedQueryRequest, AdapterError> {
+        // Elasticsearch primarily uses POST for _search queries
+        // GET is sometimes used for URI search, but we don't support it
+        Err(AdapterError::ProtocolError(
+            "GET requests not supported for Elasticsearch queries. Use POST with Query DSL."
+                .to_string(),
+        ))
+    }
+
+    async fn parse_post_request(
+        &self,
+        _form_params: Form<HashMap<String, String>>,
+    ) -> Result<ParsedQueryRequest, AdapterError> {
+        // Elasticsearch doesn't use form-encoded data
+        Err(AdapterError::ProtocolError(
+            "Form-encoded POST not supported for Elasticsearch. Use JSON Query DSL.".to_string(),
+        ))
+    }
+
+    /// Parse JSON POST request with Elasticsearch Query DSL
+    async fn parse_json_post_request(
+        &self,
+        body: Bytes,
+    ) -> Result<ParsedQueryRequest, AdapterError> {
+        debug!("Elasticsearch adapter: parsing JSON POST request");
+        self.parse_elasticsearch_query(&body)
+    }
+
+    fn get_query_endpoint(&self) -> &'static str {
+        match self.config.language {
+            QueryLanguage::elastic_sql => "/_sql",
+            QueryLanguage::elastic_querydsl => "/_search",
+            _ => panic!("Invalid query language configured for Elastic"),
+        }
+    }
+
+    async fn parse_range_get_request(
+        &self,
+        _query_params: Query<HashMap<String, String>>,
+    ) -> Result<ParsedRangeQueryRequest, AdapterError> {
+        Err(AdapterError::ProtocolError(
+            "Range queries not supported by Elasticsearch adapter".to_string(),
+        ))
+    }
+
+    async fn parse_range_post_request(
+        &self,
+        _form_params: Form<HashMap<String, String>>,
+    ) -> Result<ParsedRangeQueryRequest, AdapterError> {
+        Err(AdapterError::ProtocolError(
+            "Range queries not supported by Elasticsearch adapter".to_string(),
+        ))
+    }
+
+    fn get_range_query_endpoint(&self) -> &'static str {
+        "/_search_range" // Not actually used, just satisfying the trait
+    }
+}
+
+#[async_trait]
+impl QueryResponseAdapter for ElasticHttpAdapter {
+    async fn format_success_response(
+        &self,
+        _result: &QueryExecutionResult,
+    ) -> Result<Response, StatusCode> {
+        debug!("Elasticsearch adapter: formatting success response");
+
+        // For now, since we're falling back for every query,
+        // the result from the fallback will be passed through
+        // In the future, this could transform local execution results
+        // to Elasticsearch format
+
+        // Return a stub Elasticsearch-style response for now.
+        let response = json!({
+            "took": 0,
+            "timed_out": false,
+            "hits": {
+                "total": {
+                    "value": 0,
+                    "relation": "eq"
+                },
+                "hits": []
+            }
+        });
+
+        debug!(
+            "Elasticsearch adapter: returning stub response: {}",
+            serde_json::to_string_pretty(&response)
+                .unwrap_or_else(|_| "Unable to format".to_string())
+        );
+
+        Ok(Json(response).into_response())
+    }
+
+    async fn format_error_response(&self, error: &AdapterError) -> Result<Response, StatusCode> {
+        debug!(
+            "Elasticsearch adapter: formatting error response: {:?}",
+            error
+        );
+
+        let (status_code, error_type) = match error {
+            AdapterError::MissingParameter(_) => (StatusCode::BAD_REQUEST, "parsing_exception"),
+            AdapterError::InvalidParameter(_) => (StatusCode::BAD_REQUEST, "parsing_exception"),
+            AdapterError::ParseError(_) => (StatusCode::BAD_REQUEST, "parsing_exception"),
+            AdapterError::NetworkError(_) => (StatusCode::BAD_GATEWAY, "network_error"),
+            AdapterError::ProtocolError(_) => {
+                (StatusCode::BAD_REQUEST, "illegal_argument_exception")
+            }
+        };
+
+        let response = json!({
+            "error": {
+                "type": error_type,
+                "reason": error.to_string()
+            },
+            "status": status_code.as_u16()
+        });
+
+        // Return the error as JSON with appropriate status code
+        Ok(Json(response).into_response())
+    }
+
+    async fn format_unsupported_query_response(&self) -> Result<Response, StatusCode> {
+        debug!("Elasticsearch adapter: formatting unsupported query response");
+
+        let response = json!({
+            "error": {
+                "type": "illegal_argument_exception",
+                "reason": "Query not supported by local execution"
+            },
+            "status": 400
+        });
+
+        Ok(Json(response).into_response())
+    }
+
+    async fn format_range_success_response(
+        &self,
+        _result: &crate::engines::QueryResult,
+        _labels: &promql_utilities::KeyByLabelNames,
+    ) -> Result<Response, StatusCode> {
+        // Elasticsearch doesn't support Prometheus-style range queries
+        Err(StatusCode::NOT_IMPLEMENTED)
+    }
+}
+
+#[async_trait]
+impl HttpProtocolAdapter for ElasticHttpAdapter {
+    fn adapter_name(&self) -> &'static str {
+        "ElasticHttp"
+    }
+
+    fn get_runtime_info_path(&self) -> &'static str {
+        "/_cluster/health"
+    }
+
+    async fn handle_runtime_info(
+        &self,
+        store: Arc<dyn crate::stores::Store>,
+    ) -> Result<Json<Value>, StatusCode> {
+        // Delegate to the version with headers
+        self.handle_runtime_info_with_headers(store, HashMap::new())
+            .await
+    }
+
+    // Override the new method
+    async fn handle_runtime_info_with_headers(
+        &self,
+        store: Arc<dyn crate::stores::Store>,
+        headers: HashMap<String, String>,
+    ) -> Result<Json<Value>, StatusCode> {
+        debug!("Handling runtime info request in Elasticsearch adapter");
+
+        let earliest_timestamps = match store.get_earliest_timestamp_per_aggregation_id() {
+            Ok(timestamps) => timestamps,
+            Err(e) => {
+                error!("Error getting earliest timestamps: {}", e);
+                HashMap::new()
+            }
+        };
+
+        let mut runtime_data = if let Some(fallback) = &self.config.fallback {
+            debug!("Fetching runtime info from fallback Elasticsearch");
+            match fallback.get_runtime_info_with_headers(headers).await {
+                Ok(data) => data,
+                Err(e) => {
+                    error!("Failed to get runtime info from fallback: {:?}", e);
+                    json!({
+                        "cluster_name": "unknown",
+                        "status": "yellow"
+                    })
+                }
+            }
+        } else {
+            json!({
+                "cluster_name": "local",
+                "status": "green"
+            })
+        };
+
+        if let Some(data_obj) = runtime_data.as_object_mut() {
+            data_obj.insert(
+                "earliest_timestamp_per_aggregation_id".to_string(),
+                serde_json::to_value(earliest_timestamps).unwrap_or(json!({})),
+            );
+        }
+
+        Ok(Json(runtime_data))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::data_model::enums::{QueryLanguage, QueryProtocol};
+    use crate::engines::QueryResult;
+    use promql_utilities::data_model::KeyByLabelNames;
+
+    fn create_test_adapter() -> ElasticHttpAdapter {
+        let config = AdapterConfig::new(
+            QueryProtocol::ElasticHttp,
+            QueryLanguage::elastic_querydsl,
+            None, // No fallback for unit tests
+        );
+        ElasticHttpAdapter::new(config)
+    }
+
+    fn create_test_adapter_sql() -> ElasticHttpAdapter {
+        let config = AdapterConfig::new(
+            QueryProtocol::ElasticHttp,
+            QueryLanguage::elastic_sql,
+            None, // No fallback for unit tests
+        );
+        ElasticHttpAdapter::new(config)
+    }
+
+    /// Test: Parse JSON POST request with Query DSL
+    #[tokio::test]
+    async fn test_parse_json_post_request() {
+        let adapter = create_test_adapter();
+
+        let query_dsl = json!({
+            "query": {
+                "match_all": {}
+            }
+        });
+
+        let body = Bytes::from(serde_json::to_vec(&query_dsl).unwrap());
+        let result = adapter.parse_json_post_request(body).await;
+
+        assert!(result.is_ok(), "JSON POST request parsing should succeed");
+        let parsed = result.unwrap();
+        assert!(parsed.query.contains("match_all"));
+    }
+
+    /// Test: Parse Query DSL with timestamp
+    #[tokio::test]
+    async fn test_parse_query_with_timestamp() {
+        let adapter = create_test_adapter();
+
+        let query_dsl = json!({
+            "query": {
+                "bool": {
+                    "filter": [
+                        {
+                            "range": {
+                                "@timestamp": {
+                                    "gte": 1640000000.0,
+                                    "lte": 1640086400.0
+                                }
+                            }
+                        }
+                    ]
+                }
+            }
+        });
+
+        let body = Bytes::from(serde_json::to_vec(&query_dsl).unwrap());
+        let result = adapter.parse_json_post_request(body).await;
+
+        assert!(result.is_ok(), "Query with timestamp should parse");
+    }
+
+    /// Test: Parse SQL query
+    #[tokio::test]
+    async fn test_parse_sql_query() {
+        let adapter = create_test_adapter_sql();
+
+        let sql_query = json!({
+            "query": "SELECT * FROM logs WHERE @timestamp > now() - INTERVAL 1 HOUR"
+        });
+
+        let body = Bytes::from(serde_json::to_vec(&sql_query).unwrap());
+        let result = adapter.parse_json_post_request(body).await;
+
+        assert!(result.is_ok(), "SQL query parsing should succeed");
+        let parsed = result.unwrap();
+        assert!(parsed.query.contains("SELECT"));
+    }
+
+    /// Test: Parse SQL query with parameters
+    #[tokio::test]
+    async fn test_parse_sql_query_with_params() {
+        let adapter = create_test_adapter_sql();
+
+        let sql_query = json!({
+            "query": "SELECT status, COUNT(*) FROM logs GROUP BY status",
+            "fetch_size": 100,
+            "time_zone": "UTC"
+        });
+
+        let body = Bytes::from(serde_json::to_vec(&sql_query).unwrap());
+        let result = adapter.parse_json_post_request(body).await;
+
+        assert!(result.is_ok(), "SQL query with params should parse");
+        let parsed = result.unwrap();
+        assert!(parsed.query.contains("fetch_size"));
+        assert!(parsed.query.contains("time_zone"));
+    }
+
+    /// Test: Invalid JSON should return error
+    #[tokio::test]
+    async fn test_invalid_json() {
+        let adapter = create_test_adapter();
+
+        let invalid_json = Bytes::from("not valid json");
+        let result = adapter.parse_json_post_request(invalid_json).await;
+
+        assert!(result.is_err(), "Invalid JSON should return error");
+        match result {
+            Err(AdapterError::ParseError(_)) => {} // Expected
+            _ => panic!("Expected ParseError"),
+        }
+    }
+
+    /// Test: GET requests not supported
+    #[tokio::test]
+    async fn test_get_not_supported() {
+        let adapter = create_test_adapter();
+
+        let params = HashMap::new();
+        let result = adapter.parse_get_request(Query(params)).await;
+
+        assert!(result.is_err(), "GET requests should not be supported");
+        match result {
+            Err(AdapterError::ProtocolError(_)) => {} // Expected
+            _ => panic!("Expected ProtocolError"),
+        }
+    }
+
+    /// Test: Form POST not supported
+    #[tokio::test]
+    async fn test_form_post_not_supported() {
+        let adapter = create_test_adapter();
+
+        let params = HashMap::new();
+        let result = adapter.parse_post_request(Form(params)).await;
+
+        assert!(result.is_err(), "Form POST should not be supported");
+        match result {
+            Err(AdapterError::ProtocolError(_)) => {} // Expected
+            _ => panic!("Expected ProtocolError"),
+        }
+    }
+
+    /// Test: Format success response
+    #[tokio::test]
+    async fn test_format_success_response() {
+        let adapter = create_test_adapter();
+
+        let result = QueryExecutionResult {
+            query_output_labels: KeyByLabelNames::empty(),
+            query_result: QueryResult::vector(vec![], 0),
+        };
+
+        let response = adapter.format_success_response(&result).await;
+        assert!(response.is_ok(), "Response formatting should succeed");
+
+        // Extract JSON from Response
+        let http_response = response.unwrap();
+        let body_bytes = axum::body::to_bytes(http_response.into_body(), usize::MAX)
+            .await
+            .expect("Failed to read body");
+        let json_response: Value =
+            serde_json::from_slice(&body_bytes).expect("Failed to parse JSON");
+
+        // Verify Elasticsearch response structure
+        assert!(json_response.get("hits").is_some());
+        assert!(json_response["hits"].get("total").is_some());
+    }
+
+    /// Test: Format error response
+    #[tokio::test]
+    async fn test_format_error_response() {
+        let adapter = create_test_adapter();
+
+        let error = AdapterError::ParseError("Invalid query syntax".to_string());
+        let response = adapter.format_error_response(&error).await;
+
+        assert!(response.is_ok(), "Error formatting should succeed");
+
+        // Extract JSON from Response
+        let http_response = response.unwrap();
+        let body_bytes = axum::body::to_bytes(http_response.into_body(), usize::MAX)
+            .await
+            .expect("Failed to read body");
+
+        let json_response: Value =
+            serde_json::from_slice(&body_bytes).expect("Failed to parse JSON");
+
+        // Verify Elasticsearch error structure
+        assert!(json_response.get("error").is_some());
+        assert_eq!(json_response["error"]["type"], "parsing_exception");
+        assert_eq!(json_response["status"], 400);
+    }
+
+    /// Test: Get query endpoint for Query DSL
+    #[test]
+    fn test_get_query_endpoint_querydsl() {
+        let adapter = create_test_adapter();
+        assert_eq!(adapter.get_query_endpoint(), "/_search");
+    }
+
+    /// Test: Get query endpoint for SQL
+    #[test]
+    fn test_get_query_endpoint_sql() {
+        let adapter = create_test_adapter_sql();
+        assert_eq!(adapter.get_query_endpoint(), "/_sql");
+    }
+
+    /// Test: Get adapter name
+    #[test]
+    fn test_adapter_name() {
+        let adapter = create_test_adapter();
+        assert_eq!(adapter.adapter_name(), "ElasticHttp");
+    }
+
+    /// Test: Get runtime info path
+    #[test]
+    fn test_get_runtime_info_path() {
+        let adapter = create_test_adapter();
+        assert_eq!(adapter.get_runtime_info_path(), "/_cluster/health");
+    }
+
+    /// Test: SQL adapter also uses same runtime info path
+    #[test]
+    fn test_get_runtime_info_path_sql() {
+        let adapter = create_test_adapter_sql();
+        assert_eq!(adapter.get_runtime_info_path(), "/_cluster/health");
+    }
+}
diff --git a/QueryEngineRust/src/drivers/query/adapters/factory.rs b/QueryEngineRust/src/drivers/query/adapters/factory.rs
new file mode 100644
index 0000000..d139de8
--- /dev/null
+++ b/QueryEngineRust/src/drivers/query/adapters/factory.rs
@@ -0,0 +1,16 @@
+use super::clickhouse_http::ClickHouseHttpAdapter;
+use super::config::AdapterConfig;
+use super::elastic_http::ElasticHttpAdapter;
+use super::prometheus_http::PrometheusHttpAdapter;
+use super::traits::HttpProtocolAdapter;
+use crate::data_model::enums::QueryProtocol;
+use std::sync::Arc;
+
+/// Factory function to create appropriate HTTP adapter based on protocol
+pub fn create_http_adapter(config: AdapterConfig) -> Arc<dyn HttpProtocolAdapter> {
+    match config.protocol {
+        QueryProtocol::PrometheusHttp => Arc::new(PrometheusHttpAdapter::new(config)),
+        QueryProtocol::ClickHouseHttp => Arc::new(ClickHouseHttpAdapter::new(config)),
+        QueryProtocol::ElasticHttp => Arc::new(ElasticHttpAdapter::new(config)),
+    }
+}
diff --git a/QueryEngineRust/src/drivers/query/adapters/mod.rs b/QueryEngineRust/src/drivers/query/adapters/mod.rs
new file mode 100644
index 0000000..9bb932a
--- /dev/null
+++ b/QueryEngineRust/src/drivers/query/adapters/mod.rs
@@ -0,0 +1,15 @@
+pub mod clickhouse_http;
+pub mod config;
+pub mod elastic_http;
+pub mod factory;
+pub mod prometheus_http;
+pub mod traits;
+
+// Re-export main types
+pub use config::AdapterConfig;
+pub use factory::create_http_adapter;
+pub use prometheus_http::{PrometheusHttpAdapter, PrometheusResponse};
+pub use traits::{
+    AdapterError, HttpProtocolAdapter, ParsedQueryRequest, ParsedRangeQueryRequest,
+    QueryExecutionResult, QueryRequestAdapter, QueryResponseAdapter,
+};
diff --git a/QueryEngineRust/src/drivers/query/adapters/prometheus_http.rs b/QueryEngineRust/src/drivers/query/adapters/prometheus_http.rs
new file mode 100644
index 0000000..a52ad68
--- /dev/null
+++ b/QueryEngineRust/src/drivers/query/adapters/prometheus_http.rs
@@ -0,0 +1,515 @@
+use super::config::AdapterConfig;
+use super::traits::*;
+use crate::engines::QueryResult;
+use crate::utils::http::{convert_query_result_to_prometheus, convert_range_result_to_prometheus};
+use async_trait::async_trait;
+use axum::{
+    extract::{Form, Query},
+    http::StatusCode,
+    response::{IntoResponse, Json, Response},
+};
+use promql_utilities::data_model::KeyByLabelNames;
+use serde::{Deserialize, Serialize};
+use serde_json::{json, Value};
+use std::collections::HashMap;
+use std::sync::Arc;
+use tracing::{debug, error};
+
+/// Prometheus-compatible response structure
+#[derive(Debug, Serialize, Deserialize)]
+pub struct PrometheusResponse {
+    pub status: String,
+    pub data: Option<Value>,
+    #[serde(rename = "errorType", skip_serializing_if = "Option::is_none")]
+    pub error_type: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error: Option<String>,
+}
+
+impl PrometheusResponse {
+    pub fn success(data: Value) -> Self {
+        Self {
+            status: "success".to_string(),
+            data: Some(data),
+            error_type: None,
+            error: None,
+        }
+    }
+
+    pub fn error(error_type: &str, error: &str) -> Self {
+        Self {
+            status: "error".to_string(),
+            data: None,
+            error_type: Some(error_type.to_string()),
+            error: Some(error.to_string()),
+        }
+    }
+}
+
+/// Prometheus HTTP protocol adapter
+pub struct PrometheusHttpAdapter {
+    config: AdapterConfig,
+}
+
+impl PrometheusHttpAdapter {
+    pub fn new(config: AdapterConfig) -> Self {
+        Self { config }
+    }
+
+    /// Helper to parse query parameters (used by both GET and POST)
+    fn parse_params(
+        &self,
+        params: &HashMap<String, String>,
+    ) -> Result<ParsedQueryRequest, AdapterError> {
+        let query = params
+            .get("query")
+            .ok_or_else(|| AdapterError::MissingParameter("query".to_string()))?
+            .clone();
+
+        let time = if let Some(time_str) = params.get("time") {
+            time_str.parse::<f64>().map_err(|e| {
+                AdapterError::InvalidParameter(format!("Invalid time parameter: {}", e))
+            })?
+        } else {
+            // Use current time as default
+            std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .unwrap()
+                .as_secs_f64()
+        };
+
+        Ok(ParsedQueryRequest { query, time })
+    }
+
+    /// Helper to parse range query parameters
+    fn parse_range_params(
+        &self,
+        params: &HashMap<String, String>,
+    ) -> Result<ParsedRangeQueryRequest, AdapterError> {
+        let query = params
+            .get("query")
+            .ok_or_else(|| AdapterError::MissingParameter("query".to_string()))?
+            .clone();
+
+        let start = params
+            .get("start")
+            .ok_or_else(|| AdapterError::MissingParameter("start".to_string()))?
+            .parse::<f64>()
+            .map_err(|e| AdapterError::InvalidParameter(format!("Invalid start: {}", e)))?;
+
+        let end = params
+            .get("end")
+            .ok_or_else(|| AdapterError::MissingParameter("end".to_string()))?
+            .parse::<f64>()
+            .map_err(|e| AdapterError::InvalidParameter(format!("Invalid end: {}", e)))?;
+
+        let step = params
+            .get("step")
+            .ok_or_else(|| AdapterError::MissingParameter("step".to_string()))?
+            .parse::<f64>()
+            .map_err(|e| AdapterError::InvalidParameter(format!("Invalid step: {}", e)))?;
+
+        // Basic validation
+        if start >= end {
+            return Err(AdapterError::InvalidParameter(
+                "start must be before end".to_string(),
+            ));
+        }
+        if step <= 0.0 {
+            return Err(AdapterError::InvalidParameter(
+                "step must be positive".to_string(),
+            ));
+        }
+
+        Ok(ParsedRangeQueryRequest {
+            query,
+            start,
+            end,
+            step,
+        })
+    }
+}
+
+#[async_trait]
+impl QueryRequestAdapter for PrometheusHttpAdapter {
+    async fn parse_get_request(
+        &self,
+        Query(params): Query<HashMap<String, String>>,
+    ) -> Result<ParsedQueryRequest, AdapterError> {
+        debug!(
+            "Prometheus adapter: parsing GET request with params: {:?}",
+            params
+        );
+        self.parse_params(&params)
+    }
+
+    async fn parse_post_request(
+        &self,
+        Form(params): Form<HashMap<String, String>>,
+    ) -> Result<ParsedQueryRequest, AdapterError> {
+        debug!(
+            "Prometheus adapter: parsing POST request with params: {:?}",
+            params
+        );
+        self.parse_params(&params)
+    }
+
+    fn get_query_endpoint(&self) -> &'static str {
+        "/api/v1/query"
+    }
+
+    async fn parse_range_get_request(
+        &self,
+        Query(params): Query<HashMap<String, String>>,
+    ) -> Result<ParsedRangeQueryRequest, AdapterError> {
+        debug!(
+            "Prometheus adapter: parsing range GET request with params: {:?}",
+            params
+        );
+        self.parse_range_params(&params)
+    }
+
+    async fn parse_range_post_request(
+        &self,
+        Form(params): Form<HashMap<String, String>>,
+    ) -> Result<ParsedRangeQueryRequest, AdapterError> {
+        debug!(
+            "Prometheus adapter: parsing range POST request with params: {:?}",
+            params
+        );
+        self.parse_range_params(&params)
+    }
+
+    fn get_range_query_endpoint(&self) -> &'static str {
+        "/api/v1/query_range"
+    }
+}
+
+#[async_trait]
+impl QueryResponseAdapter for PrometheusHttpAdapter {
+    async fn format_success_response(
+        &self,
+        result: &QueryExecutionResult,
+    ) -> Result<Response, StatusCode> {
+        debug!("Prometheus adapter: formatting success response");
+
+        let prometheus_data =
+            convert_query_result_to_prometheus(&result.query_result, &result.query_output_labels)
+                .map_err(|e| {
+                error!("Failed to convert query result: {}", e);
+                StatusCode::INTERNAL_SERVER_ERROR
+            })?;
+
+        let response = PrometheusResponse::success(prometheus_data);
+        Ok(Json(serde_json::to_value(response).unwrap()).into_response())
+    }
+
+    async fn format_range_success_response(
+        &self,
+        result: &QueryResult,
+        labels: &KeyByLabelNames,
+    ) -> Result<Response, StatusCode> {
+        debug!("Prometheus adapter: formatting range success response");
+
+        let prometheus_data = convert_range_result_to_prometheus(result, labels).map_err(|e| {
+            error!("Failed to convert range result: {}", e);
+            StatusCode::INTERNAL_SERVER_ERROR
+        })?;
+        let response = PrometheusResponse::success(prometheus_data);
+        Ok(Json(serde_json::to_value(response).unwrap()).into_response())
+    }
+
+    async fn format_error_response(&self, error: &AdapterError) -> Result<Response, StatusCode> {
+        debug!("Prometheus adapter: formatting error response: {:?}", error);
+
+        let (error_type, error_msg) = match error {
+            AdapterError::MissingParameter(p) => ("bad_data", format!("Missing parameter: {}", p)),
+            AdapterError::InvalidParameter(p) => ("bad_data", format!("Invalid parameter: {}", p)),
+            AdapterError::ParseError(e) => ("bad_data", format!("Parse error: {}", e)),
+            AdapterError::NetworkError(e) => ("internal", format!("Network error: {}", e)),
+            AdapterError::ProtocolError(e) => ("internal", format!("Protocol error: {}", e)),
+        };
+
+        let response = PrometheusResponse::error(error_type, &error_msg);
+        Ok(Json(serde_json::to_value(response).unwrap()).into_response())
+    }
+
+    async fn format_unsupported_query_response(&self) -> Result<Response, StatusCode> {
+        debug!("Prometheus adapter: formatting unsupported query response");
+
+        let response = PrometheusResponse::error("bad_data", "No result for query");
+        Ok(Json(serde_json::to_value(response).unwrap()).into_response())
+    }
+}
+
+#[async_trait]
+impl HttpProtocolAdapter for PrometheusHttpAdapter {
+    fn adapter_name(&self) -> &'static str {
+        "PrometheusHTTP"
+    }
+
+    fn get_runtime_info_path(&self) -> &'static str {
+        "/api/v1/status/runtimeinfo"
+    }
+
+    async fn handle_runtime_info(
+        &self,
+        store: Arc<dyn crate::stores::Store>,
+    ) -> Result<Json<Value>, StatusCode> {
+        debug!("Handling runtime info request in Prometheus adapter");
+
+        // Get earliest timestamp per aggregation ID from store
+        let earliest_timestamps = match store.get_earliest_timestamp_per_aggregation_id() {
+            Ok(timestamps) => timestamps,
+            Err(e) => {
+                error!("Error getting earliest timestamps: {}", e);
+                HashMap::new()
+            }
+        };
+
+        // Get runtime info from fallback if available
+        let mut runtime_data = if let Some(fallback) = &self.config.fallback {
+            debug!("Fetching runtime info from fallback");
+            match fallback.get_runtime_info().await {
+                Ok(data) => data,
+                Err(e) => {
+                    error!("Failed to get runtime info from fallback: {:?}", e);
+                    json!({})
+                }
+            }
+        } else {
+            json!({})
+        };
+
+        // Merge local data with fallback data
+        if let Some(data_obj) = runtime_data.as_object_mut() {
+            data_obj.insert(
+                "earliest_timestamp_per_aggregation_id".to_string(),
+                serde_json::to_value(earliest_timestamps).unwrap_or(json!({})),
+            );
+        } else {
+            // If runtime_data is not an object, just create a new one with local data
+            runtime_data = json!({
+                "earliest_timestamp_per_aggregation_id": earliest_timestamps
+            });
+        }
+
+        debug!("Successfully merged runtime info with local data");
+
+        // Wrap in Prometheus response format
+        let response = PrometheusResponse::success(runtime_data);
+        Ok(Json(serde_json::to_value(response).unwrap()))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::data_model::enums::{QueryLanguage, QueryProtocol};
+
+    fn create_test_adapter() -> PrometheusHttpAdapter {
+        let config = AdapterConfig::new(QueryProtocol::PrometheusHttp, QueryLanguage::promql, None);
+        PrometheusHttpAdapter::new(config)
+    }
+
+    // Tests for parse_range_params
+
+    #[test]
+    fn test_parse_range_params_valid() {
+        let adapter = create_test_adapter();
+        let mut params = HashMap::new();
+        params.insert("query".to_string(), "sum(metric)".to_string());
+        params.insert("start".to_string(), "1700000000".to_string());
+        params.insert("end".to_string(), "1700001000".to_string());
+        params.insert("step".to_string(), "60".to_string());
+
+        let result = adapter.parse_range_params(&params);
+
+        assert!(result.is_ok());
+        let parsed = result.unwrap();
+        assert_eq!(parsed.query, "sum(metric)");
+        assert_eq!(parsed.start, 1700000000.0);
+        assert_eq!(parsed.end, 1700001000.0);
+        assert_eq!(parsed.step, 60.0);
+    }
+
+    #[test]
+    fn test_parse_range_params_missing_query() {
+        let adapter = create_test_adapter();
+        let mut params = HashMap::new();
+        params.insert("start".to_string(), "1700000000".to_string());
+        params.insert("end".to_string(), "1700001000".to_string());
+        params.insert("step".to_string(), "60".to_string());
+
+        let result = adapter.parse_range_params(&params);
+
+        assert!(result.is_err());
+        match result.unwrap_err() {
+            AdapterError::MissingParameter(p) => assert_eq!(p, "query"),
+            _ => panic!("Expected MissingParameter error"),
+        }
+    }
+
+    #[test]
+    fn test_parse_range_params_missing_start() {
+        let adapter = create_test_adapter();
+        let mut params = HashMap::new();
+        params.insert("query".to_string(), "sum(metric)".to_string());
+        params.insert("end".to_string(), "1700001000".to_string());
+        params.insert("step".to_string(), "60".to_string());
+
+        let result = adapter.parse_range_params(&params);
+
+        assert!(result.is_err());
+        match result.unwrap_err() {
+            AdapterError::MissingParameter(p) => assert_eq!(p, "start"),
+            _ => panic!("Expected MissingParameter error"),
+        }
+    }
+
+    #[test]
+    fn test_parse_range_params_missing_end() {
+        let adapter = create_test_adapter();
+        let mut params = HashMap::new();
+        params.insert("query".to_string(), "sum(metric)".to_string());
+        params.insert("start".to_string(), "1700000000".to_string());
+        params.insert("step".to_string(), "60".to_string());
+
+        let result = adapter.parse_range_params(&params);
+
+        assert!(result.is_err());
+        match result.unwrap_err() {
+            AdapterError::MissingParameter(p) => assert_eq!(p, "end"),
+            _ => panic!("Expected MissingParameter error"),
+        }
+    }
+
+    #[test]
+    fn test_parse_range_params_missing_step() {
+        let adapter = create_test_adapter();
+        let mut params = HashMap::new();
+        params.insert("query".to_string(), "sum(metric)".to_string());
+        params.insert("start".to_string(), "1700000000".to_string());
+        params.insert("end".to_string(), "1700001000".to_string());
+
+        let result = adapter.parse_range_params(&params);
+
+        assert!(result.is_err());
+        match result.unwrap_err() {
+            AdapterError::MissingParameter(p) => assert_eq!(p, "step"),
+            _ => panic!("Expected MissingParameter error"),
+        }
+    }
+
+    #[test]
+    fn test_parse_range_params_invalid_start() {
+        let adapter = create_test_adapter();
+        let mut params = HashMap::new();
+        params.insert("query".to_string(), "sum(metric)".to_string());
+        params.insert("start".to_string(), "not_a_number".to_string());
+        params.insert("end".to_string(), "1700001000".to_string());
+        params.insert("step".to_string(), "60".to_string());
+
+        let result = adapter.parse_range_params(&params);
+
+        assert!(result.is_err());
+        match result.unwrap_err() {
+            AdapterError::InvalidParameter(msg) => assert!(msg.contains("start")),
+            _ => panic!("Expected InvalidParameter error"),
+        }
+    }
+
+    #[test]
+    fn test_parse_range_params_start_after_end() {
+        let adapter = create_test_adapter();
+        let mut params = HashMap::new();
+        params.insert("query".to_string(), "sum(metric)".to_string());
+        params.insert("start".to_string(), "1700001000".to_string());
+        params.insert("end".to_string(), "1700000000".to_string()); // end before start
+        params.insert("step".to_string(), "60".to_string());
+
+        let result = adapter.parse_range_params(&params);
+
+        assert!(result.is_err());
+        match result.unwrap_err() {
+            AdapterError::InvalidParameter(msg) => {
+                assert!(msg.contains("start must be before end"))
+            }
+            _ => panic!("Expected InvalidParameter error"),
+        }
+    }
+
+    #[test]
+    fn test_parse_range_params_zero_step() {
+        let adapter = create_test_adapter();
+        let mut params = HashMap::new();
+        params.insert("query".to_string(), "sum(metric)".to_string());
+        params.insert("start".to_string(), "1700000000".to_string());
+        params.insert("end".to_string(), "1700001000".to_string());
+        params.insert("step".to_string(), "0".to_string());
+
+        let result = adapter.parse_range_params(&params);
+
+        assert!(result.is_err());
+        match result.unwrap_err() {
+            AdapterError::InvalidParameter(msg) => assert!(msg.contains("step must be positive")),
+            _ => panic!("Expected InvalidParameter error"),
+        }
+    }
+
+    #[test]
+    fn test_parse_range_params_negative_step() {
+        let adapter = create_test_adapter();
+        let mut params = HashMap::new();
+        params.insert("query".to_string(), "sum(metric)".to_string());
+        params.insert("start".to_string(), "1700000000".to_string());
+        params.insert("end".to_string(), "1700001000".to_string());
+        params.insert("step".to_string(), "-60".to_string());
+
+        let result = adapter.parse_range_params(&params);
+
+        assert!(result.is_err());
+        match result.unwrap_err() {
+            AdapterError::InvalidParameter(msg) => assert!(msg.contains("step must be positive")),
+            _ => panic!("Expected InvalidParameter error"),
+        }
+    }
+
+    #[test]
+    fn test_get_range_query_endpoint() {
+        let adapter = create_test_adapter();
+        assert_eq!(adapter.get_range_query_endpoint(), "/api/v1/query_range");
+    }
+
+    #[tokio::test]
+    async fn test_parse_range_get_request() {
+        let adapter = create_test_adapter();
+        let mut params = HashMap::new();
+        params.insert("query".to_string(), "sum(metric)".to_string());
+        params.insert("start".to_string(), "1700000000".to_string());
+        params.insert("end".to_string(), "1700001000".to_string());
+        params.insert("step".to_string(), "60".to_string());
+
+        let result = adapter.parse_range_get_request(Query(params)).await;
+
+        assert!(result.is_ok());
+        let parsed = result.unwrap();
+        assert_eq!(parsed.query, "sum(metric)");
+    }
+
+    #[tokio::test]
+    async fn test_parse_range_post_request() {
+        let adapter = create_test_adapter();
+        let mut params = HashMap::new();
+        params.insert("query".to_string(), "sum(metric)".to_string());
+        params.insert("start".to_string(), "1700000000".to_string());
+        params.insert("end".to_string(), "1700001000".to_string());
+        params.insert("step".to_string(), "60".to_string());
+
+        let result = adapter.parse_range_post_request(Form(params)).await;
+
+        assert!(result.is_ok());
+        let parsed = result.unwrap();
+        assert_eq!(parsed.query, "sum(metric)");
+    }
+}
diff --git a/QueryEngineRust/src/drivers/query/adapters/traits.rs b/QueryEngineRust/src/drivers/query/adapters/traits.rs
new file mode 100644
index 0000000..44031b6
--- /dev/null
+++ b/QueryEngineRust/src/drivers/query/adapters/traits.rs
@@ -0,0 +1,166 @@
+use async_trait::async_trait;
+use axum::{
+    body::Bytes,
+    extract::{Form, Query},
+    http::StatusCode,
+    response::{Json, Response},
+};
+use promql_utilities::data_model::KeyByLabelNames;
+use serde_json::Value;
+use std::collections::HashMap;
+
+use crate::engines::QueryResult;
+
+/// Parsed query request data ready for engine processing
+#[derive(Debug, Clone)]
+pub struct ParsedQueryRequest {
+    pub query: String,
+    pub time: f64,
+}
+
+/// Parsed range query request with validated parameters
+#[derive(Debug, Clone)]
+pub struct ParsedRangeQueryRequest {
+    pub query: String,
+    pub start: f64, // epoch seconds
+    pub end: f64,   // epoch seconds
+    pub step: f64,  // seconds, must be multiple of tumbling window
+}
+
+/// Result of query execution (before formatting for protocol)
+#[derive(Debug, Clone)]
+pub struct QueryExecutionResult {
+    pub query_output_labels: KeyByLabelNames,
+    pub query_result: QueryResult,
+}
+
+/// Error types for adapters
+#[derive(Debug)]
+pub enum AdapterError {
+    MissingParameter(String),
+    InvalidParameter(String),
+    ParseError(String),
+    NetworkError(String),
+    ProtocolError(String),
+}
+
+impl std::fmt::Display for AdapterError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            AdapterError::MissingParameter(p) => write!(f, "Missing parameter: {}", p),
+            AdapterError::InvalidParameter(p) => write!(f, "Invalid parameter: {}", p),
+            AdapterError::ParseError(e) => write!(f, "Parse error: {}", e),
+            AdapterError::NetworkError(e) => write!(f, "Network error: {}", e),
+            AdapterError::ProtocolError(e) => write!(f, "Protocol error: {}", e),
+        }
+    }
+}
+
+impl std::error::Error for AdapterError {}
+
+/// Trait for parsing incoming HTTP requests into internal query format
+/// Handles Axum extractors directly for different request types (GET/POST)
+#[async_trait]
+pub trait QueryRequestAdapter: Send + Sync {
+    /// Parse a GET request with query parameters
+    async fn parse_get_request(
+        &self,
+        query_params: Query<HashMap<String, String>>,
+    ) -> Result<ParsedQueryRequest, AdapterError>;
+
+    /// Parse a POST request with form data
+    async fn parse_post_request(
+        &self,
+        form_params: Form<HashMap<String, String>>,
+    ) -> Result<ParsedQueryRequest, AdapterError>;
+
+    /// Parse a POST request with JSON body (for Elasticsearch)
+    /// Default implementation returns an error - adapters that support JSON
+    /// POST requests should override this method.
+    async fn parse_json_post_request(
+        &self,
+        _body: Bytes,
+    ) -> Result<ParsedQueryRequest, AdapterError> {
+        Err(AdapterError::ProtocolError(
+            "JSON POST requests not supported by this adapter".to_string(),
+        ))
+    }
+
+    /// Get the HTTP path this adapter handles (e.g., "/api/v1/query")
+    fn get_query_endpoint(&self) -> &'static str;
+
+    /// Parse a GET request for range queries
+    async fn parse_range_get_request(
+        &self,
+        query_params: Query<HashMap<String, String>>,
+    ) -> Result<ParsedRangeQueryRequest, AdapterError>;
+
+    /// Parse a POST request for range queries
+    async fn parse_range_post_request(
+        &self,
+        form_params: Form<HashMap<String, String>>,
+    ) -> Result<ParsedRangeQueryRequest, AdapterError>;
+
+    /// Get the HTTP path for range queries (e.g., "/api/v1/query_range")
+    fn get_range_query_endpoint(&self) -> &'static str;
+}
+
+/// Trait for formatting query results into protocol-specific HTTP responses
+#[async_trait]
+pub trait QueryResponseAdapter: Send + Sync {
+    /// Format a successful query result into protocol response
+    async fn format_success_response(
+        &self,
+        result: &QueryExecutionResult,
+    ) -> Result<Response, StatusCode>;
+
+    /// Format a successful range query result into protocol response
+    async fn format_range_success_response(
+        &self,
+        result: &QueryResult,
+        labels: &KeyByLabelNames,
+    ) -> Result<Response, StatusCode>;
+
+    /// Format an error into protocol response
+    async fn format_error_response(&self, error: &AdapterError) -> Result<Response, StatusCode>;
+
+    /// Format an error when query returns None (unsupported query)
+    async fn format_unsupported_query_response(&self) -> Result<Response, StatusCode>;
+}
+
+/// Adapter trait for HTTP-based query protocols
+/// (Prometheus HTTP, ClickHouse HTTP, etc.)
+///
+/// For non-HTTP protocols (Flight SQL, native protocols),
+/// define separate adapter traits.
+///
+/// Note: Fallback logic is handled separately via FallbackClient
+#[async_trait]
+pub trait HttpProtocolAdapter: QueryRequestAdapter + QueryResponseAdapter + Send + Sync {
+    /// Get a descriptive name for this adapter (for logging/debugging)
+    fn adapter_name(&self) -> &'static str;
+
+    /// Get the path for the runtime info endpoint
+    ///
+    /// Example: "/api/v1/status/runtimeinfo" for Prometheus
+    fn get_runtime_info_path(&self) -> &'static str;
+
+    /// Handle runtime info request
+    ///
+    /// The adapter can query the store for internal metrics and
+    /// optionally forward to fallback backend for additional info.
+    async fn handle_runtime_info(
+        &self,
+        store: std::sync::Arc<dyn crate::stores::Store>,
+    ) -> Result<Json<Value>, StatusCode>;
+
+    async fn handle_runtime_info_with_headers(
+        &self,
+        store: std::sync::Arc<dyn crate::stores::Store>,
+        headers: HashMap<String, String>,
+    ) -> Result<Json<Value>, StatusCode> {
+        // Default implementation ignores headers and calls the old method
+        let _ = headers;
+        self.handle_runtime_info(store).await
+    }
+}
diff --git a/QueryEngineRust/src/drivers/query/fallback/clickhouse.rs b/QueryEngineRust/src/drivers/query/fallback/clickhouse.rs
new file mode 100644
index 0000000..4eab190
--- /dev/null
+++ b/QueryEngineRust/src/drivers/query/fallback/clickhouse.rs
@@ -0,0 +1,313 @@
+use super::{FallbackClient, FallbackResponse};
+use crate::drivers::query::adapters::ParsedQueryRequest;
+use async_trait::async_trait;
+use axum::http::StatusCode;
+use reqwest::Client;
+use tracing::{debug, error};
+
+/// Fallback client for ClickHouse HTTP API
+pub struct ClickHouseHttpFallback {
+    client: Client,
+    base_url: String,
+    database: String,
+}
+
+impl ClickHouseHttpFallback {
+    pub fn new(base_url: String, database: String) -> Self {
+        Self {
+            client: Client::new(),
+            base_url,
+            database,
+        }
+    }
+}
+
+#[async_trait]
+impl FallbackClient for ClickHouseHttpFallback {
+    async fn execute_query(
+        &self,
+        request: &ParsedQueryRequest,
+    ) -> Result<FallbackResponse, StatusCode> {
+        debug!("=== FORWARDING TO CLICKHOUSE ===");
+        debug!(
+            "Forwarding query: '{}', time: {}",
+            request.query, request.time
+        );
+
+        // Build ClickHouse API URL with database parameter
+        let full_url = format!(
+            "{}/?database={}",
+            self.base_url.trim_end_matches('/'),
+            self.database
+        );
+
+        debug!("Full forwarding URL: {}", full_url);
+
+        // NOTE: We do NOT append FORMAT JSON - queries without FORMAT will return TSV by default
+        // ClickHouse HTTP default format is TabSeparated (TSV)
+        let query = &request.query;
+
+        debug!("Query (no FORMAT modification): {}", query);
+
+        // Forward the request to ClickHouse via POST
+        debug!("Sending POST request to ClickHouse...");
+        match self
+            .client
+            .post(&full_url)
+            .body(query.clone())
+            .timeout(std::time::Duration::from_secs(30))
+            .send()
+            .await
+        {
+            Ok(response) => {
+                let status = response.status();
+                debug!("Received response from ClickHouse, status: {}", status);
+
+                // Get response as text (TSV format)
+                match response.text().await {
+                    Ok(tsv_response) => {
+                        // Check if response contains an error (ClickHouse errors are plain text)
+                        if tsv_response.contains("Code:") && tsv_response.contains("Exception") {
+                            error!("ClickHouse returned error: {}", tsv_response);
+                            debug!("ClickHouse error response: {}", tsv_response);
+                            // Return as error status
+                            return Err(StatusCode::BAD_REQUEST);
+                        }
+
+                        debug!("Successfully received ClickHouse TSV response");
+                        debug!(
+                            "ClickHouse response body (first 500 chars): {}",
+                            if tsv_response.len() > 500 {
+                                format!("{}...", &tsv_response[..500])
+                            } else {
+                                tsv_response.clone()
+                            }
+                        );
+
+                        Ok(FallbackResponse::Text(tsv_response))
+                    }
+                    Err(e) => {
+                        error!("Failed to read ClickHouse response as text: {}", e);
+                        Err(StatusCode::INTERNAL_SERVER_ERROR)
+                    }
+                }
+            }
+            Err(e) => {
+                error!("Failed to forward query to ClickHouse: {}", e);
+                Err(StatusCode::BAD_GATEWAY)
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use axum::{routing::post, Router};
+    use std::time::SystemTime;
+    use tokio::net::TcpListener;
+    use tokio::time::{sleep, Duration};
+
+    /// Test 1: Basic client creation
+    #[test]
+    fn test_clickhouse_fallback_creation() {
+        let base_url = "http://localhost:8123".to_string();
+        let database = "default".to_string();
+
+        let fallback = ClickHouseHttpFallback::new(base_url.clone(), database.clone());
+
+        // Verify fields are set (we can't access them directly due to privacy, but creation succeeds)
+        assert_eq!(fallback.base_url, base_url);
+        assert_eq!(fallback.database, database);
+    }
+
+    /// Test 2: Query execution with mock server - success case
+    #[tokio::test]
+    async fn test_execute_query_success() {
+        // Start mock ClickHouse server
+        let mock_port = 8124;
+        start_mock_clickhouse_server(mock_port).await.unwrap();
+
+        // Create fallback client
+        let fallback = ClickHouseHttpFallback::new(
+            format!("http://127.0.0.1:{}", mock_port),
+            "default".to_string(),
+        );
+
+        // Create test request
+        let request = ParsedQueryRequest {
+            query: "SELECT 1".to_string(),
+            time: SystemTime::now()
+                .duration_since(SystemTime::UNIX_EPOCH)
+                .unwrap()
+                .as_secs_f64(),
+        };
+
+        // Execute query
+        let result = fallback.execute_query(&request).await;
+
+        // Verify success
+        assert!(result.is_ok(), "Query execution should succeed");
+
+        // Verify we get a Text response (TSV format)
+        match result.unwrap() {
+            FallbackResponse::Text(tsv) => {
+                assert!(!tsv.is_empty(), "TSV response should not be empty");
+                // Basic check that it looks like TSV data
+                assert!(
+                    tsv.contains("\t") || tsv.contains("\n"),
+                    "Response should be TSV format"
+                );
+            }
+            FallbackResponse::Json(_) => {
+                panic!("Expected TSV response, got JSON");
+            }
+        }
+    }
+
+    /// Test 3: URL format validation - ensure database parameter is included
+    #[tokio::test]
+    async fn test_url_with_database_param() {
+        // This test verifies that the URL is correctly formatted with database parameter
+        // We'll test this by starting a mock server that checks the URL path
+        let mock_port = 8125;
+        start_mock_clickhouse_server_with_url_check(mock_port)
+            .await
+            .unwrap();
+
+        let fallback = ClickHouseHttpFallback::new(
+            format!("http://127.0.0.1:{}", mock_port),
+            "test_db".to_string(),
+        );
+
+        let request = ParsedQueryRequest {
+            query: "SELECT 1".to_string(),
+            time: SystemTime::now()
+                .duration_since(SystemTime::UNIX_EPOCH)
+                .unwrap()
+                .as_secs_f64(),
+        };
+
+        let result = fallback.execute_query(&request).await;
+
+        // The mock server will verify the URL contains the database parameter
+        assert!(result.is_ok(), "Query with correct URL should succeed");
+    }
+
+    /// Test 4: Error handling - ClickHouse returns error response
+    #[tokio::test]
+    async fn test_execute_query_error_response() {
+        // Start mock ClickHouse server that returns errors
+        let mock_port = 8126;
+        start_mock_clickhouse_error_server(mock_port).await.unwrap();
+
+        let fallback = ClickHouseHttpFallback::new(
+            format!("http://127.0.0.1:{}", mock_port),
+            "default".to_string(),
+        );
+
+        let request = ParsedQueryRequest {
+            query: "SELECT * FROM nonexistent_table".to_string(),
+            time: SystemTime::now()
+                .duration_since(SystemTime::UNIX_EPOCH)
+                .unwrap()
+                .as_secs_f64(),
+        };
+
+        let result = fallback.execute_query(&request).await;
+
+        // Should return error status (ClickHouse errors return Err(StatusCode))
+        assert!(result.is_err(), "Should handle ClickHouse error response");
+    }
+
+    /// Test 5: Network failure handling - server unreachable
+    #[tokio::test]
+    async fn test_clickhouse_server_unreachable() {
+        // Create fallback pointing to non-existent server
+        let fallback = ClickHouseHttpFallback::new(
+            "http://127.0.0.1:9999".to_string(), // Port that's not listening
+            "default".to_string(),
+        );
+
+        let request = ParsedQueryRequest {
+            query: "SELECT 1".to_string(),
+            time: SystemTime::now()
+                .duration_since(SystemTime::UNIX_EPOCH)
+                .unwrap()
+                .as_secs_f64(),
+        };
+
+        let result = fallback.execute_query(&request).await;
+
+        // Should return error status code
+        assert!(result.is_err(), "Unreachable server should return error");
+    }
+
+    // ===== Mock Server Helpers =====
+
+    async fn start_mock_clickhouse_server(port: u16) -> Result<(), Box<dyn std::error::Error>> {
+        async fn mock_query_handler(_body: String) -> String {
+            // Return ClickHouse-style TSV response (default format)
+            "1\n".to_string()
+        }
+
+        let app = Router::new().route("/", post(mock_query_handler));
+
+        let listener = TcpListener::bind(format!("127.0.0.1:{}", port))
+            .await
+            .expect("Failed to bind to port");
+
+        tokio::spawn(async move {
+            axum::serve(listener, app)
+                .await
+                .expect("Failed to start mock server");
+        });
+
+        // Give server time to start
+        sleep(Duration::from_millis(100)).await;
+        Ok(())
+    }
+
+    async fn start_mock_clickhouse_server_with_url_check(
+        port: u16,
+    ) -> Result<(), Box<dyn std::error::Error>> {
+        async fn mock_query_handler_with_check(_body: String) -> String {
+            // Return ClickHouse-style TSV response (default format)
+            "1\n".to_string()
+        }
+
+        let app = Router::new().route("/", post(mock_query_handler_with_check));
+
+        let listener = TcpListener::bind(format!("127.0.0.1:{}", port)).await?;
+
+        tokio::spawn(async move {
+            axum::serve(listener, app).await.unwrap();
+        });
+
+        sleep(Duration::from_millis(100)).await;
+        Ok(())
+    }
+
+    async fn start_mock_clickhouse_error_server(
+        port: u16,
+    ) -> Result<(), Box<dyn std::error::Error>> {
+        async fn mock_error_handler(_body: String) -> (StatusCode, String) {
+            // Return ClickHouse-style error message (plain text)
+            (
+                StatusCode::BAD_REQUEST,
+                "Code: 60. DB::Exception: Table doesn't exist".to_string(),
+            )
+        }
+
+        let app = Router::new().route("/", post(mock_error_handler));
+
+        let listener = TcpListener::bind(format!("127.0.0.1:{}", port)).await?;
+
+        tokio::spawn(async move {
+            axum::serve(listener, app).await.unwrap();
+        });
+
+        sleep(Duration::from_millis(100)).await;
+        Ok(())
+    }
+}
diff --git a/QueryEngineRust/src/drivers/query/fallback/elastic.rs b/QueryEngineRust/src/drivers/query/fallback/elastic.rs
new file mode 100644
index 0000000..5617faa
--- /dev/null
+++ b/QueryEngineRust/src/drivers/query/fallback/elastic.rs
@@ -0,0 +1,539 @@
+use super::{FallbackClient, FallbackResponse};
+use crate::data_model::QueryLanguage;
+use crate::drivers::query::adapters::ParsedQueryRequest;
+use async_trait::async_trait;
+use axum::http::StatusCode;
+use reqwest::Client;
+use serde_json::Value;
+use std::collections::HashMap;
+use tracing::{debug, error};
+
+/// Fallback client for Elasticsearch HTTP API
+pub struct ElasticHttpFallback {
+    client: Client,
+    base_url: String,
+    index: String,
+    language: QueryLanguage,
+}
+
+impl ElasticHttpFallback {
+    pub fn new(base_url: String, index: String, language: QueryLanguage) -> Self {
+        Self {
+            client: Client::new(),
+            base_url,
+            index,
+            language,
+        }
+    }
+}
+
+#[async_trait]
+impl FallbackClient for ElasticHttpFallback {
+    async fn execute_query(
+        &self,
+        request: &ParsedQueryRequest,
+    ) -> Result<FallbackResponse, StatusCode> {
+        debug!("=== FORWARDING TO ELASTICSEARCH ===");
+        debug!(
+            "Forwarding query: '{}', time: {}",
+            request.query, request.time
+        );
+
+        // Delegate to version with empty headers
+        self.execute_query_with_headers(request, HashMap::new())
+            .await
+    }
+
+    async fn execute_query_with_headers(
+        &self,
+        request: &ParsedQueryRequest,
+        headers: HashMap<String, String>,
+    ) -> Result<FallbackResponse, StatusCode> {
+        debug!("=== FORWARDING TO ELASTICSEARCH ===");
+        debug!(
+            "Forwarding query: '{}', time: {}",
+            request.query, request.time
+        );
+
+        // Build URL based on query language
+        let full_url = match self.language {
+            QueryLanguage::elastic_sql => {
+                // SQL endpoint doesn't use index in path
+                format!("{}/_sql", self.base_url.trim_end_matches('/'))
+            }
+            QueryLanguage::elastic_querydsl => {
+                if self.index.is_empty() {
+                    format!("{}/_search", self.base_url.trim_end_matches('/'))
+                } else {
+                    format!(
+                        "{}/{}/_search",
+                        self.base_url.trim_end_matches('/'),
+                        self.index
+                    )
+                }
+            }
+            _ => {
+                // Fallback to Query DSL endpoint
+                format!("{}/_search", self.base_url.trim_end_matches('/'))
+            }
+        };
+
+        debug!("Full forwarding URL: {}", full_url);
+
+        let query_body: Value = match serde_json::from_str(&request.query) {
+            Ok(json) => json,
+            Err(e) => {
+                error!("Failed to parse query as JSON: {}", e);
+                return Err(StatusCode::INTERNAL_SERVER_ERROR);
+            }
+        };
+
+        debug!("Parsed query body: {:?}", query_body);
+
+        // Build reqwest headers from HashMap
+        let mut req_headers = reqwest::header::HeaderMap::new();
+        req_headers.insert(
+            reqwest::header::CONTENT_TYPE,
+            reqwest::header::HeaderValue::from_static("application/json"),
+        );
+
+        if let Some(auth) = headers.get("Authorization") {
+            if let Ok(auth_value) = reqwest::header::HeaderValue::from_str(auth) {
+                debug!("Forwarding Authorization header");
+                req_headers.insert(reqwest::header::AUTHORIZATION, auth_value);
+            }
+        }
+
+        debug!("Sending POST request to Elasticsearch...");
+        match self
+            .client
+            .post(&full_url)
+            .headers(req_headers)
+            .json(&query_body)
+            .timeout(std::time::Duration::from_secs(30))
+            .send()
+            .await
+        {
+            Ok(response) => {
+                let status = response.status();
+                debug!("Received response from Elasticsearch, status: {}", status);
+
+                // Get response as JSON
+                match response.json::<Value>().await {
+                    Ok(es_response) => {
+                        // Always return the response, whether it's success or error
+                        // Elasticsearch includes error info in the JSON body
+                        if let Some(error) = es_response.get("error") {
+                            error!("Elasticsearch returned error: {:?}", error);
+                            debug!("=== ELASTICSEARCH FORWARD ERROR ===");
+                        } else {
+                            debug!(
+                                "Successfully received Elasticsearch response with {} hits",
+                                es_response
+                                    .get("hits")
+                                    .and_then(|h| h.get("total"))
+                                    .and_then(|t| t.get("value"))
+                                    .and_then(|v| v.as_u64())
+                                    .unwrap_or(0)
+                            );
+                            debug!("=== ELASTICSEARCH FORWARD SUCCESS ===");
+                        }
+
+                        Ok(FallbackResponse::Json(es_response))
+                    }
+                    Err(e) => {
+                        error!("Failed to parse Elasticsearch response as JSON: {}", e);
+                        debug!("=== ELASTICSEARCH FORWARD PARSE ERROR ===");
+
+                        Err(StatusCode::BAD_REQUEST)
+                    }
+                }
+            }
+            Err(e) => {
+                error!("Failed to forward query to Elasticsearch: {}", e);
+                debug!("=== ELASTICSEARCH FORWARD REQUEST ERROR ===");
+
+                Err(StatusCode::INTERNAL_SERVER_ERROR)
+            }
+        }
+    }
+
+    async fn get_runtime_info_with_headers(
+        &self,
+        headers: HashMap<String, String>,
+    ) -> Result<Value, StatusCode> {
+        debug!("Fetching runtime info from Elasticsearch fallback with headers");
+
+        // Build the cluster health URL (Elasticsearch's health endpoint)
+        let url = format!("{}/_cluster/health", self.base_url.trim_end_matches('/'));
+
+        debug!("Runtime info URL: {}", url);
+
+        // Build reqwest headers from HashMap
+        let mut req_headers = reqwest::header::HeaderMap::new();
+        if let Some(auth) = headers.get("Authorization") {
+            if let Ok(auth_value) = reqwest::header::HeaderValue::from_str(auth) {
+                debug!("Forwarding Authorization header to cluster health");
+                req_headers.insert(reqwest::header::AUTHORIZATION, auth_value);
+            }
+        }
+
+        // Send request to Elasticsearch with headers
+        match self
+            .client
+            .get(&url)
+            .headers(req_headers)
+            .timeout(std::time::Duration::from_secs(30))
+            .send()
+            .await
+        {
+            Ok(response) => match response.json::<Value>().await {
+                Ok(health_info) => {
+                    debug!("Elasticsearch cluster health: {:?}", health_info);
+                    Ok(health_info)
+                }
+                Err(e) => {
+                    error!("Failed to parse Elasticsearch health response: {}", e);
+                    Ok(serde_json::json!({
+                        "cluster_name": "unknown",
+                        "status": "unknown"
+                    }))
+                }
+            },
+            Err(e) => {
+                error!("Failed to fetch cluster health from Elasticsearch: {}", e);
+                Ok(serde_json::json!({
+                    "cluster_name": "unknown",
+                    "status": "unknown",
+                    "error": e.to_string()
+                }))
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::data_model::enums::QueryLanguage;
+    use axum::{routing::post, Json, Router};
+    use std::time::SystemTime;
+    use tokio::net::TcpListener;
+    use tokio::time::{sleep, Duration};
+
+    /// Test 1: Basic client creation for Query DSL
+    #[test]
+    fn test_elasticsearch_fallback_creation() {
+        let base_url = "http://localhost:9200".to_string();
+        let index = "logs-*".to_string();
+
+        let fallback = ElasticHttpFallback::new(
+            base_url.clone(),
+            index.clone(),
+            QueryLanguage::elastic_querydsl,
+        );
+
+        assert_eq!(fallback.base_url, base_url);
+        assert_eq!(fallback.index, index);
+    }
+
+    /// Test 1b: Basic client creation for SQL
+    #[test]
+    fn test_elasticsearch_sql_fallback_creation() {
+        let base_url = "http://localhost:9200".to_string();
+        let index = "logs-*".to_string();
+
+        let fallback =
+            ElasticHttpFallback::new(base_url.clone(), index.clone(), QueryLanguage::elastic_sql);
+
+        assert_eq!(fallback.base_url, base_url);
+        assert_eq!(fallback.index, index);
+    }
+
+    /// Test 2: Query execution with mock server - success case
+    #[tokio::test]
+    async fn test_execute_query_success() {
+        // Start mock Elasticsearch server
+        let mock_port = 9201;
+        start_mock_elasticsearch_server(mock_port).await.unwrap();
+
+        // Create fallback client
+        let fallback = ElasticHttpFallback::new(
+            format!("http://127.0.0.1:{}", mock_port),
+            "test_index".to_string(),
+            QueryLanguage::elastic_querydsl,
+        );
+
+        // Create test request with Query DSL
+        let query_dsl = serde_json::json!({
+            "query": {
+                "match_all": {}
+            }
+        });
+
+        let request = ParsedQueryRequest {
+            query: serde_json::to_string(&query_dsl).unwrap(),
+            time: SystemTime::now()
+                .duration_since(SystemTime::UNIX_EPOCH)
+                .unwrap()
+                .as_secs_f64(),
+        };
+
+        // Execute query
+        let result = fallback.execute_query(&request).await;
+
+        // Verify success
+        assert!(result.is_ok(), "Query execution should succeed");
+
+        // Verify we get a JSON response
+        match result.unwrap() {
+            FallbackResponse::Json(json) => {
+                assert!(json.get("hits").is_some(), "Response should have hits");
+                assert_eq!(json["hits"]["total"]["value"], 1);
+            }
+            FallbackResponse::Text(_) => {
+                panic!("Expected JSON response, got Text");
+            }
+        }
+    }
+
+    /// Test 3: URL format with index
+    #[tokio::test]
+    async fn test_url_with_index() {
+        let mock_port = 9202;
+        start_mock_elasticsearch_server(mock_port).await.unwrap();
+
+        let fallback = ElasticHttpFallback::new(
+            format!("http://127.0.0.1:{}", mock_port),
+            "my_index".to_string(),
+            QueryLanguage::elastic_querydsl,
+        );
+
+        let query_dsl = serde_json::json!({"query": {"match_all": {}}});
+        let request = ParsedQueryRequest {
+            query: serde_json::to_string(&query_dsl).unwrap(),
+            time: 0.0,
+        };
+
+        let result = fallback.execute_query(&request).await;
+        assert!(result.is_ok(), "Query with index should succeed");
+    }
+
+    /// Test 4: Error handling - Elasticsearch returns error
+    #[tokio::test]
+    async fn test_execute_query_error_response() {
+        let mock_port = 9203;
+        start_mock_elasticsearch_error_server(mock_port)
+            .await
+            .unwrap();
+
+        let fallback = ElasticHttpFallback::new(
+            format!("http://127.0.0.1:{}", mock_port),
+            "test_index".to_string(),
+            QueryLanguage::elastic_querydsl,
+        );
+
+        let query_dsl = serde_json::json!({"query": {"invalid": {}}});
+        let request = ParsedQueryRequest {
+            query: serde_json::to_string(&query_dsl).unwrap(),
+            time: 0.0,
+        };
+
+        let result = fallback.execute_query(&request).await;
+
+        // Should still return Ok with error in JSON (like Prometheus does)
+        assert!(result.is_ok(), "Should handle Elasticsearch error response");
+        match result.unwrap() {
+            FallbackResponse::Json(json) => {
+                assert!(json.get("error").is_some(), "Should have error field");
+            }
+            _ => panic!("Expected JSON response"),
+        }
+    }
+
+    /// Test 5: Network failure handling
+    #[tokio::test]
+    async fn test_elasticsearch_server_unreachable() {
+        let fallback = ElasticHttpFallback::new(
+            "http://127.0.0.1:9999".to_string(),
+            "test_index".to_string(),
+            QueryLanguage::elastic_querydsl,
+        );
+
+        let query_dsl = serde_json::json!({"query": {"match_all": {}}});
+        let request = ParsedQueryRequest {
+            query: serde_json::to_string(&query_dsl).unwrap(),
+            time: 0.0,
+        };
+
+        let result = fallback.execute_query(&request).await;
+
+        // Should return Err for connection failures
+        assert!(result.is_err(), "Should return Err for unreachable server");
+        assert_eq!(result.unwrap_err(), StatusCode::INTERNAL_SERVER_ERROR);
+    }
+
+    /// Test 6: SQL query execution with mock server
+    #[tokio::test]
+    async fn test_execute_sql_query_success() {
+        // Start mock Elasticsearch SQL server
+        let mock_port = 9204;
+        start_mock_elasticsearch_sql_server(mock_port)
+            .await
+            .unwrap();
+
+        // Create fallback client for SQL
+        let fallback = ElasticHttpFallback::new(
+            format!("http://127.0.0.1:{}", mock_port),
+            "test_index".to_string(),
+            QueryLanguage::elastic_sql,
+        );
+
+        // Create SQL query request
+        let sql_query = serde_json::json!({
+            "query": "SELECT * FROM logs WHERE status = 200"
+        });
+
+        let request = ParsedQueryRequest {
+            query: serde_json::to_string(&sql_query).unwrap(),
+            time: SystemTime::now()
+                .duration_since(SystemTime::UNIX_EPOCH)
+                .unwrap()
+                .as_secs_f64(),
+        };
+
+        // Execute query
+        let result = fallback.execute_query(&request).await;
+
+        // Verify success
+        assert!(result.is_ok(), "SQL query execution should succeed");
+
+        match result.unwrap() {
+            FallbackResponse::Json(json) => {
+                assert!(
+                    json.get("columns").is_some(),
+                    "SQL response should have columns"
+                );
+                assert!(json.get("rows").is_some(), "SQL response should have rows");
+            }
+            FallbackResponse::Text(_) => {
+                panic!("Expected JSON response, got Text");
+            }
+        }
+    }
+
+    /// Test 7: Verify SQL endpoint URL format (no index in path)
+    #[tokio::test]
+    async fn test_sql_url_format() {
+        let mock_port = 9205;
+        start_mock_elasticsearch_sql_server(mock_port)
+            .await
+            .unwrap();
+
+        let fallback = ElasticHttpFallback::new(
+            format!("http://127.0.0.1:{}", mock_port),
+            "some_index".to_string(), // Index should be ignored for SQL
+            QueryLanguage::elastic_sql,
+        );
+
+        let sql_query = serde_json::json!({
+            "query": "SELECT COUNT(*) FROM logs"
+        });
+
+        let request = ParsedQueryRequest {
+            query: serde_json::to_string(&sql_query).unwrap(),
+            time: 0.0,
+        };
+
+        let result = fallback.execute_query(&request).await;
+        assert!(result.is_ok(), "SQL query should succeed");
+        // URL should be /_sql, not /some_index/_sql
+    }
+
+    // ===== Mock Server Helpers =====
+
+    async fn start_mock_elasticsearch_server(port: u16) -> Result<(), Box<dyn std::error::Error>> {
+        async fn mock_search_handler(_body: Json<Value>) -> Json<Value> {
+            Json(serde_json::json!({
+                "took": 5,
+                "hits": {
+                    "total": {"value": 1, "relation": "eq"},
+                    "hits": [{
+                        "_source": {"field1": "value1"}
+                    }]
+                }
+            }))
+        }
+
+        let app = Router::new()
+            .route("/_search", post(mock_search_handler))
+            .route("/:index/_search", post(mock_search_handler));
+
+        let listener = TcpListener::bind(format!("127.0.0.1:{}", port)).await?;
+
+        tokio::spawn(async move {
+            axum::serve(listener, app).await.unwrap();
+        });
+
+        sleep(Duration::from_millis(100)).await;
+        Ok(())
+    }
+
+    async fn start_mock_elasticsearch_error_server(
+        port: u16,
+    ) -> Result<(), Box<dyn std::error::Error>> {
+        async fn mock_error_handler(_body: Json<Value>) -> (StatusCode, Json<Value>) {
+            (
+                StatusCode::BAD_REQUEST,
+                Json(serde_json::json!({
+                    "error": {
+                        "type": "parsing_exception",
+                        "reason": "Unknown query type"
+                    },
+                    "status": 400
+                })),
+            )
+        }
+
+        let app = Router::new()
+            .route("/_search", post(mock_error_handler))
+            .route("/:index/_search", post(mock_error_handler));
+
+        let listener = TcpListener::bind(format!("127.0.0.1:{}", port)).await?;
+
+        tokio::spawn(async move {
+            axum::serve(listener, app).await.unwrap();
+        });
+
+        sleep(Duration::from_millis(100)).await;
+        Ok(())
+    }
+
+    async fn start_mock_elasticsearch_sql_server(
+        port: u16,
+    ) -> Result<(), Box<dyn std::error::Error>> {
+        async fn mock_sql_handler(_body: Json<Value>) -> Json<Value> {
+            Json(serde_json::json!({
+                "columns": [
+                    {"name": "status", "type": "integer"},
+                    {"name": "count", "type": "long"}
+                ],
+                "rows": [
+                    [200, 150],
+                    [404, 23]
+                ]
+            }))
+        }
+
+        let app = Router::new().route("/_sql", post(mock_sql_handler));
+
+        let listener = TcpListener::bind(format!("127.0.0.1:{}", port)).await?;
+
+        tokio::spawn(async move {
+            axum::serve(listener, app).await.unwrap();
+        });
+
+        sleep(Duration::from_millis(100)).await;
+        Ok(())
+    }
+}
diff --git a/QueryEngineRust/src/drivers/query/fallback/mod.rs b/QueryEngineRust/src/drivers/query/fallback/mod.rs
new file mode 100644
index 0000000..6b42f51
--- /dev/null
+++ b/QueryEngineRust/src/drivers/query/fallback/mod.rs
@@ -0,0 +1,88 @@
+use async_trait::async_trait;
+use axum::{
+    http::StatusCode,
+    response::{IntoResponse, Json, Response},
+};
+use serde_json::Value;
+use std::collections::HashMap;
+
+use crate::drivers::query::adapters::ParsedQueryRequest;
+
+/// Response format from fallback backend
+#[derive(Debug, Clone)]
+pub enum FallbackResponse {
+    /// JSON response (used by Prometheus, etc.)
+    Json(Value),
+    /// Plain text response (used by ClickHouse TSV, etc.)
+    Text(String),
+}
+
+impl IntoResponse for FallbackResponse {
+    fn into_response(self) -> Response {
+        match self {
+            FallbackResponse::Json(value) => Json(value).into_response(),
+            FallbackResponse::Text(text) => {
+                // Return plain text with appropriate content type
+                (
+                    [(
+                        axum::http::header::CONTENT_TYPE,
+                        "text/tab-separated-values",
+                    )],
+                    text,
+                )
+                    .into_response()
+            }
+        }
+    }
+}
+
+/// Client for forwarding unsupported queries to a fallback backend
+#[async_trait]
+pub trait FallbackClient: Send + Sync {
+    /// Execute a query against the fallback backend
+    ///
+    /// # Arguments
+    /// * `request` - The parsed query request (query string, time, etc.)
+    ///
+    /// # Returns
+    /// Protocol-specific response from the fallback backend (JSON or Text)
+    async fn execute_query(
+        &self,
+        request: &ParsedQueryRequest,
+    ) -> Result<FallbackResponse, StatusCode>;
+
+    async fn execute_query_with_headers(
+        &self,
+        request: &ParsedQueryRequest,
+        _headers: HashMap<String, String>,
+    ) -> Result<FallbackResponse, StatusCode> {
+        // Default implementation delegates to execute_query
+        self.execute_query(request).await
+    }
+
+    /// Get runtime info from the fallback backend (optional)
+    ///
+    /// # Returns
+    /// Runtime info as JSON, or empty object if not supported
+    async fn get_runtime_info(&self) -> Result<Value, StatusCode> {
+        // Default implementation: return empty object
+        Ok(serde_json::json!({}))
+    }
+
+    async fn get_runtime_info_with_headers(
+        &self,
+        headers: HashMap<String, String>,
+    ) -> Result<Value, StatusCode> {
+        // Default implementation delegates to get_runtime_info
+        let _ = headers;
+        self.get_runtime_info().await
+    }
+}
+
+mod clickhouse;
+mod elastic;
+mod prometheus;
+
+pub use clickhouse::ClickHouseHttpFallback;
+pub use elastic::ElasticHttpFallback;
+pub use prometheus::PrometheusHttpFallback;
diff --git a/QueryEngineRust/src/drivers/query/fallback/prometheus.rs b/QueryEngineRust/src/drivers/query/fallback/prometheus.rs
new file mode 100644
index 0000000..45f040e
--- /dev/null
+++ b/QueryEngineRust/src/drivers/query/fallback/prometheus.rs
@@ -0,0 +1,156 @@
+use super::{FallbackClient, FallbackResponse};
+use crate::drivers::query::adapters::ParsedQueryRequest;
+use async_trait::async_trait;
+use axum::http::StatusCode;
+use reqwest::Client;
+use serde_json::Value;
+use tracing::{debug, error};
+
+/// Fallback client for Prometheus HTTP API
+pub struct PrometheusHttpFallback {
+    client: Client,
+    base_url: String,
+}
+
+impl PrometheusHttpFallback {
+    pub fn new(base_url: String) -> Self {
+        Self {
+            client: Client::new(),
+            base_url,
+        }
+    }
+}
+
+#[async_trait]
+impl FallbackClient for PrometheusHttpFallback {
+    async fn execute_query(
+        &self,
+        request: &ParsedQueryRequest,
+    ) -> Result<FallbackResponse, StatusCode> {
+        debug!("=== FORWARDING TO PROMETHEUS ===");
+        debug!(
+            "Forwarding query: '{}', time: {}",
+            request.query, request.time
+        );
+
+        // Build the full URL for the Prometheus endpoint
+        let full_url = format!("{}/api/v1/query", self.base_url.trim_end_matches('/'));
+
+        debug!("Full forwarding URL: {}", full_url);
+
+        // Prepare query parameters for forwarding
+        let query_params = vec![
+            ("query", request.query.clone()),
+            ("time", request.time.to_string()),
+        ];
+
+        debug!("Final query parameters for forwarding: {:?}", query_params);
+
+        // Forward the request to Prometheus
+        debug!("Sending request to Prometheus...");
+        match self
+            .client
+            .get(&full_url)
+            .query(&query_params)
+            .timeout(std::time::Duration::from_secs(30))
+            .send()
+            .await
+        {
+            Ok(response) => {
+                let status = response.status();
+                debug!("Received response from Prometheus, status: {}", status);
+                match response.json::<Value>().await {
+                    Ok(prometheus_response) => {
+                        debug!(
+                            "Successfully parsed Prometheus response: {:?}",
+                            prometheus_response
+                        );
+                        debug!("=== PROMETHEUS FORWARD SUCCESS ===");
+                        Ok(FallbackResponse::Json(prometheus_response))
+                    }
+                    Err(parse_err) => {
+                        error!("Failed to parse Prometheus response: {}", parse_err);
+                        debug!("=== PROMETHEUS FORWARD PARSE ERROR ===");
+
+                        use crate::drivers::query::adapters::PrometheusResponse;
+                        let error = PrometheusResponse::error(
+                            "internal",
+                            "Failed to parse Prometheus response",
+                        );
+                        Ok(FallbackResponse::Json(serde_json::to_value(error).unwrap()))
+                    }
+                }
+            }
+            Err(req_err) => {
+                error!("Failed to forward query to Prometheus: {}", req_err);
+                debug!("=== PROMETHEUS FORWARD REQUEST ERROR ===");
+
+                use crate::drivers::query::adapters::PrometheusResponse;
+                let error = PrometheusResponse::error(
+                    "internal",
+                    &format!("Failed to forward query to Prometheus: {}", req_err),
+                );
+                Ok(FallbackResponse::Json(serde_json::to_value(error).unwrap()))
+            }
+        }
+    }
+
+    async fn get_runtime_info(&self) -> Result<Value, StatusCode> {
+        debug!("Fetching runtime info from Prometheus fallback");
+
+        // Build the runtime info URL
+        let url = format!(
+            "{}/api/v1/status/runtimeinfo",
+            self.base_url.trim_end_matches('/')
+        );
+
+        debug!("Runtime info URL: {}", url);
+
+        // Send request to Prometheus
+        match self
+            .client
+            .get(&url)
+            .timeout(std::time::Duration::from_secs(30))
+            .send()
+            .await
+        {
+            Ok(response) => {
+                match response.text().await {
+                    Ok(text) => {
+                        debug!("Prometheus runtime info response: {}", text);
+
+                        // Check for VictoriaMetrics unsupported path error
+                        if text.contains("unsupported path requested") {
+                            debug!("VictoriaMetrics detected - returning empty runtime info");
+                            return Ok(serde_json::json!({}));
+                        }
+
+                        // Try to parse as JSON
+                        match serde_json::from_str::<Value>(&text) {
+                            Ok(json) => {
+                                // Extract the data field if it exists (Prometheus format)
+                                if let Some(data) = json.get("data") {
+                                    Ok(data.clone())
+                                } else {
+                                    Ok(json)
+                                }
+                            }
+                            Err(e) => {
+                                error!("Failed to parse runtime info response: {}", e);
+                                Ok(serde_json::json!({}))
+                            }
+                        }
+                    }
+                    Err(e) => {
+                        error!("Failed to read runtime info response: {}", e);
+                        Ok(serde_json::json!({}))
+                    }
+                }
+            }
+            Err(e) => {
+                error!("Failed to fetch runtime info from Prometheus: {}", e);
+                Ok(serde_json::json!({}))
+            }
+        }
+    }
+}
diff --git a/QueryEngineRust/src/drivers/query/mod.rs b/QueryEngineRust/src/drivers/query/mod.rs
new file mode 100644
index 0000000..b7a0c1f
--- /dev/null
+++ b/QueryEngineRust/src/drivers/query/mod.rs
@@ -0,0 +1,8 @@
+pub mod adapters;
+pub mod fallback;
+pub mod servers;
+
+// Re-export commonly used types for convenience
+pub use adapters::{create_http_adapter, AdapterConfig, HttpProtocolAdapter};
+pub use fallback::FallbackClient;
+pub use servers::{HttpServer, HttpServerConfig};
diff --git a/QueryEngineRust/src/drivers/query/servers/http.rs b/QueryEngineRust/src/drivers/query/servers/http.rs
new file mode 100644
index 0000000..322f560
--- /dev/null
+++ b/QueryEngineRust/src/drivers/query/servers/http.rs
@@ -0,0 +1,683 @@
+use crate::drivers::query::adapters::{ParsedQueryRequest, ParsedRangeQueryRequest};
+use axum::{
+    body::Bytes,
+    extract::{Form, Query, State},
+    http::StatusCode,
+    response::{IntoResponse, Json, Response},
+    routing::{get, post},
+    Router,
+};
+use serde_json::Value;
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::time::Instant;
+use tokio::net::TcpListener;
+use tracing::{debug, info};
+
+use crate::drivers::query::adapters::{create_http_adapter, AdapterConfig, HttpProtocolAdapter};
+use crate::engines::SimpleEngine;
+use crate::stores::Store;
+
+#[derive(Debug, Clone)]
+pub struct HttpServerConfig {
+    pub port: u16,
+    pub handle_http_requests: bool,
+    pub adapter_config: AdapterConfig,
+}
+
+#[derive(Clone)]
+pub struct HttpServer {
+    config: HttpServerConfig,
+    query_engine: Arc<SimpleEngine>,
+    store: Arc<dyn Store>,
+}
+
+#[derive(Clone)]
+struct AppState {
+    config: HttpServerConfig,
+    query_engine: Arc<SimpleEngine>,
+    store: Arc<dyn Store>,
+    adapter: Arc<dyn HttpProtocolAdapter>,
+    fallback: Option<Arc<dyn crate::drivers::query::fallback::FallbackClient>>,
+}
+
+impl HttpServer {
+    pub fn new(
+        config: HttpServerConfig,
+        query_engine: Arc<SimpleEngine>,
+        store: Arc<dyn Store>,
+    ) -> Self {
+        Self {
+            config,
+            query_engine,
+            store,
+        }
+    }
+
+    pub async fn run(self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+        // Create adapter using factory
+        let adapter = create_http_adapter(self.config.adapter_config.clone());
+
+        let query_endpoint = adapter.get_query_endpoint();
+        let runtime_info_path = adapter.get_runtime_info_path();
+        info!(
+            "Adapter '{}' configured for endpoint: {}",
+            adapter.adapter_name(),
+            query_endpoint
+        );
+        info!("Runtime info endpoint: {}", runtime_info_path);
+
+        let app_state = AppState {
+            config: self.config.clone(),
+            query_engine: self.query_engine,
+            store: self.store,
+            adapter: adapter.clone(),
+            fallback: self.config.adapter_config.fallback.clone(),
+        };
+
+        let range_query_endpoint = adapter.get_range_query_endpoint();
+
+        let app = Router::new()
+            .route(query_endpoint, get(handle_instant_query))
+            .route(query_endpoint, post(handle_instant_query_post))
+            .route(range_query_endpoint, get(handle_range_query))
+            .route(range_query_endpoint, post(handle_range_query_post))
+            .route(runtime_info_path, get(handle_runtime_info))
+            .route(runtime_info_path, post(handle_runtime_info))
+            .route("/metrics", get(handle_metrics))
+            .with_state(app_state);
+
+        let listener = TcpListener::bind(format!("0.0.0.0:{}", self.config.port)).await?;
+        info!("HTTP server listening on port {}", self.config.port);
+
+        axum::serve(listener, app).await?;
+        Ok(())
+    }
+
+    /// Start server for testing on a random available port
+    /// Returns the actual port number used
+    #[cfg(test)]
+    pub async fn start_test_server(&self) -> Result<u16, Box<dyn std::error::Error + Send + Sync>> {
+        // Create adapter using factory
+        let adapter = create_http_adapter(self.config.adapter_config.clone());
+
+        let query_endpoint = adapter.get_query_endpoint();
+        let runtime_info_path = adapter.get_runtime_info_path();
+
+        let app_state = AppState {
+            config: self.config.clone(),
+            query_engine: self.query_engine.clone(),
+            store: self.store.clone(),
+            adapter: adapter.clone(),
+            fallback: self.config.adapter_config.fallback.clone(),
+        };
+
+        let range_query_endpoint = adapter.get_range_query_endpoint();
+
+        let app = Router::new()
+            .route(query_endpoint, get(handle_instant_query))
+            .route(query_endpoint, post(handle_instant_query_post))
+            .route(range_query_endpoint, get(handle_range_query))
+            .route(range_query_endpoint, post(handle_range_query_post))
+            .route(runtime_info_path, get(handle_runtime_info))
+            .with_state(app_state);
+
+        let listener = TcpListener::bind("127.0.0.1:0").await?;
+        let actual_port = listener.local_addr()?.port();
+
+        tokio::spawn(async move {
+            axum::serve(listener, app).await.unwrap();
+        });
+
+        // Give the server time to start
+        tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
+
+        Ok(actual_port)
+    }
+}
+
+/// Core query execution logic shared between GET and POST handlers
+async fn process_query_request(
+    state: &AppState,
+    parsed_request: &ParsedQueryRequest,
+    start_time: Instant,
+    headers: HashMap<String, String>,
+) -> Response {
+    // Check if handling is enabled
+    if !state.config.handle_http_requests {
+        debug!("HTTP request handling is disabled");
+        if let Some(fallback) = &state.fallback {
+            debug!("Forwarding to fallback due to disabled handling");
+            return match fallback
+                .execute_query_with_headers(parsed_request, headers)
+                .await
+            {
+                Ok(response) => response.into_response(),
+                Err(status) => status.into_response(),
+            };
+        } else {
+            debug!("Returning error - both handling and forwarding disabled");
+            use crate::drivers::query::adapters::AdapterError;
+            return match state
+                .adapter
+                .format_error_response(&AdapterError::ProtocolError(
+                    "Query handling is disabled".to_string(),
+                ))
+                .await
+            {
+                Ok(json) => json.into_response(),
+                Err(status) => status.into_response(),
+            };
+        }
+    }
+
+    // Step 2: Execute query with engine (using parsed request)
+    let query_start_time = Instant::now();
+    debug!(
+        "About to call query_engine.handle_query with query='{}' and time={}",
+        parsed_request.query, parsed_request.time
+    );
+    match state
+        .query_engine
+        .handle_query(parsed_request.query.clone(), parsed_request.time)
+    {
+        Some((query_output_labels, query_result)) => {
+            let query_duration = query_start_time.elapsed();
+            debug!("=== QUERY ENGINE SUCCESS ===");
+            debug!(
+                "Query engine execution took: {:.2}ms",
+                query_duration.as_secs_f64() * 1000.0
+            );
+            debug!("Query output labels: {:?}", query_output_labels);
+            debug!("Query result: {:?}", query_result);
+
+            // Step 3: Format success response using adapter
+            // (Adapter handles protocol-specific formatting, e.g., convert_query_result_to_prometheus)
+            use crate::drivers::query::adapters::QueryExecutionResult;
+            let execution_result = QueryExecutionResult {
+                query_output_labels,
+                query_result,
+            };
+
+            let total_duration = start_time.elapsed();
+            debug!(
+                "Total request processing took: {:.2}ms",
+                total_duration.as_secs_f64() * 1000.0
+            );
+            debug!("=== RETURNING SUCCESS RESPONSE ===");
+
+            match state
+                .adapter
+                .format_success_response(&execution_result)
+                .await
+            {
+                Ok(json) => json.into_response(),
+                Err(status) => status.into_response(),
+            }
+        }
+        None => {
+            let total_duration = start_time.elapsed();
+            debug!("=== QUERY ENGINE RETURNED NONE ===");
+            debug!(
+                "Request failed after: {:.2}ms",
+                total_duration.as_secs_f64() * 1000.0
+            );
+
+            // Step 4: Handle unsupported query using fallback client
+            if let Some(fallback) = &state.fallback {
+                debug!("Query not supported locally, forwarding to fallback");
+                // Fallback client handles the HTTP call and returns formatted response
+                match fallback
+                    .execute_query_with_headers(parsed_request, headers)
+                    .await
+                {
+                    Ok(response) => response.into_response(),
+                    Err(status) => status.into_response(),
+                }
+            } else {
+                debug!("Query not supported and forwarding disabled, returning error");
+                // Adapter formats the unsupported query error for its protocol
+                match state.adapter.format_unsupported_query_response().await {
+                    Ok(json) => json.into_response(),
+                    Err(status) => status.into_response(),
+                }
+            }
+        }
+    }
+}
+
+async fn handle_instant_query(
+    query_params: Query<HashMap<String, String>>,
+    State(state): State<AppState>,
+) -> Response {
+    let start_time = Instant::now();
+    debug!("=== INCOMING GET REQUEST ===");
+    debug!("Raw query params: {:?}", query_params.0);
+
+    let parsed_request = match state.adapter.parse_get_request(query_params).await {
+        Ok(req) => {
+            debug!(
+                "Successfully parsed - query: '{}', time: {}",
+                req.query, req.time
+            );
+            req
+        }
+        Err(parse_error) => {
+            debug!("Failed to parse request: {:?}", parse_error);
+            return match state.adapter.format_error_response(&parse_error).await {
+                Ok(json) => json.into_response(),
+                Err(status) => status.into_response(),
+            };
+        }
+    };
+
+    process_query_request(&state, &parsed_request, start_time, HashMap::new()).await
+}
+
+async fn handle_instant_query_post(
+    State(state): State<AppState>,
+    headers: axum::http::HeaderMap,
+    body: Bytes,
+) -> Response {
+    let start_time = Instant::now();
+    debug!("=== INCOMING POST REQUEST ===");
+
+    // Extract headers we want to forward
+    let mut forwarding_headers = HashMap::new();
+    if let Some(auth) = headers.get(axum::http::header::AUTHORIZATION) {
+        if let Ok(auth_str) = auth.to_str() {
+            forwarding_headers.insert("Authorization".to_string(), auth_str.to_string());
+        }
+    }
+
+    // Check content type to determine how to parse the body
+    let content_type = headers
+        .get(axum::http::header::CONTENT_TYPE)
+        .and_then(|v| v.to_str().ok())
+        .unwrap_or("");
+
+    debug!("Content-Type: {}", content_type);
+
+    let parsed_request = if content_type.contains("application/json") {
+        // Handle JSON POST (Elasticsearch)
+        debug!("Parsing as JSON POST request");
+        match state.adapter.parse_json_post_request(body).await {
+            Ok(req) => {
+                debug!(
+                    "Successfully parsed JSON POST - query: '{}', time: {}",
+                    req.query, req.time
+                );
+                req
+            }
+            Err(parse_error) => {
+                debug!("Failed to parse JSON POST request: {:?}", parse_error);
+                return match state.adapter.format_error_response(&parse_error).await {
+                    Ok(json) => json.into_response(),
+                    Err(status) => status.into_response(),
+                };
+            }
+        }
+    } else {
+        // Handle form-encoded POST
+        debug!("Parsing as form-encoded POST request");
+
+        // Parse the body as form data
+        let body_str = match String::from_utf8(body.to_vec()) {
+            Ok(s) => s,
+            Err(e) => {
+                debug!("Failed to parse body as UTF-8: {}", e);
+                use crate::drivers::query::adapters::AdapterError;
+                return match state
+                    .adapter
+                    .format_error_response(&AdapterError::ParseError(format!(
+                        "Invalid UTF-8 in request body: {}",
+                        e
+                    )))
+                    .await
+                {
+                    Ok(json) => json.into_response(),
+                    Err(status) => status.into_response(),
+                };
+            }
+        };
+
+        // Parse form parameters
+        let params: HashMap<String, String> = form_urlencoded::parse(body_str.as_bytes())
+            .into_owned()
+            .collect();
+        debug!("Form params extracted: {:?}", params);
+
+        // Use adapter to parse POST request (handles form-encoded parameters)
+        match state.adapter.parse_post_request(Form(params)).await {
+            Ok(req) => {
+                debug!(
+                    "Successfully parsed POST - query: '{}', time: {}",
+                    req.query, req.time
+                );
+                req
+            }
+            Err(parse_error) => {
+                debug!("Failed to parse POST request: {:?}", parse_error);
+                return match state.adapter.format_error_response(&parse_error).await {
+                    Ok(json) => json.into_response(),
+                    Err(status) => status.into_response(),
+                };
+            }
+        }
+    };
+
+    let result =
+        process_query_request(&state, &parsed_request, start_time, forwarding_headers).await;
+
+    let total_duration = start_time.elapsed();
+    debug!(
+        "Total POST request processing took: {:.2}ms",
+        total_duration.as_secs_f64() * 1000.0
+    );
+
+    result
+}
+
+async fn handle_runtime_info(
+    State(state): State<AppState>,
+    headers: axum::http::HeaderMap,
+) -> Result<Json<Value>, StatusCode> {
+    debug!("Delegating runtime info request to adapter");
+
+    // Extract headers we want to forward
+    let mut forwarding_headers = HashMap::new();
+    if let Some(auth) = headers.get(axum::http::header::AUTHORIZATION) {
+        if let Ok(auth_str) = auth.to_str() {
+            debug!("Found Authorization header for runtime info: {}", auth_str);
+            forwarding_headers.insert("Authorization".to_string(), auth_str.to_string());
+        }
+    } else {
+        debug!("No Authorization header found in runtime info request");
+    }
+
+    // Delegate to adapter for protocol-specific handling
+    state
+        .adapter
+        .handle_runtime_info_with_headers(state.store.clone(), forwarding_headers)
+        .await
+}
+
+// ============================================================
+// Metrics Handler
+// ============================================================
+
+async fn handle_metrics() -> impl IntoResponse {
+    let encoder = prometheus::TextEncoder::new();
+    let metric_families = prometheus::gather();
+    let mut buffer = Vec::new();
+    prometheus::Encoder::encode(&encoder, &metric_families, &mut buffer)
+        .unwrap_or_else(|e| tracing::error!("Failed to encode metrics: {}", e));
+    (
+        [(
+            axum::http::header::CONTENT_TYPE,
+            "text/plain; version=0.0.4",
+        )],
+        buffer,
+    )
+}
+
+// ============================================================
+// Range Query Handlers
+// ============================================================
+
+/// Core range query execution logic shared between GET and POST handlers
+async fn process_range_query_request(
+    state: &AppState,
+    parsed_request: &ParsedRangeQueryRequest,
+    start_time: Instant,
+) -> Response {
+    // Check if handling is enabled
+    if !state.config.handle_http_requests {
+        debug!("HTTP request handling is disabled for range query");
+        // For now, return error - fallback for range queries can be added later
+        use crate::drivers::query::adapters::AdapterError;
+        return match state
+            .adapter
+            .format_error_response(&AdapterError::ProtocolError(
+                "Range query handling is disabled".to_string(),
+            ))
+            .await
+        {
+            Ok(json) => json.into_response(),
+            Err(status) => status.into_response(),
+        };
+    }
+
+    // Execute range query with engine
+    let query_start_time = Instant::now();
+    debug!(
+        "Executing range query: '{}' from {} to {} step {}",
+        parsed_request.query, parsed_request.start, parsed_request.end, parsed_request.step
+    );
+
+    match state.query_engine.handle_range_query_promql(
+        parsed_request.query.clone(),
+        parsed_request.start,
+        parsed_request.end,
+        parsed_request.step,
+    ) {
+        Some((query_output_labels, query_result)) => {
+            let query_duration = query_start_time.elapsed();
+            debug!(
+                "Range query execution took: {:.2}ms",
+                query_duration.as_secs_f64() * 1000.0
+            );
+
+            let total_duration = start_time.elapsed();
+            debug!(
+                "Total range query processing took: {:.2}ms",
+                total_duration.as_secs_f64() * 1000.0
+            );
+
+            // Format range success response
+            match state
+                .adapter
+                .format_range_success_response(&query_result, &query_output_labels)
+                .await
+            {
+                Ok(response) => response.into_response(),
+                Err(status) => status.into_response(),
+            }
+        }
+        None => {
+            debug!("Range query returned None - query not supported");
+            match state.adapter.format_unsupported_query_response().await {
+                Ok(json) => json.into_response(),
+                Err(status) => status.into_response(),
+            }
+        }
+    }
+}
+
+async fn handle_range_query(
+    query_params: Query<HashMap<String, String>>,
+    State(state): State<AppState>,
+) -> Response {
+    let start_time = Instant::now();
+    debug!("=== INCOMING RANGE QUERY GET REQUEST ===");
+    debug!("Raw query params: {:?}", query_params.0);
+
+    let parsed_request = match state.adapter.parse_range_get_request(query_params).await {
+        Ok(req) => {
+            debug!(
+                "Successfully parsed range query - query: '{}', start: {}, end: {}, step: {}",
+                req.query, req.start, req.end, req.step
+            );
+            req
+        }
+        Err(parse_error) => {
+            debug!("Failed to parse range query request: {:?}", parse_error);
+            return match state.adapter.format_error_response(&parse_error).await {
+                Ok(json) => json.into_response(),
+                Err(status) => status.into_response(),
+            };
+        }
+    };
+
+    process_range_query_request(&state, &parsed_request, start_time).await
+}
+
+async fn handle_range_query_post(State(state): State<AppState>, body: Bytes) -> Response {
+    let start_time = Instant::now();
+    debug!("=== INCOMING RANGE QUERY POST REQUEST ===");
+
+    // Parse the body as form data
+    let body_str = match String::from_utf8(body.to_vec()) {
+        Ok(s) => s,
+        Err(e) => {
+            debug!("Failed to parse body as UTF-8: {}", e);
+            use crate::drivers::query::adapters::AdapterError;
+            return match state
+                .adapter
+                .format_error_response(&AdapterError::ParseError(format!(
+                    "Invalid UTF-8 in request body: {}",
+                    e
+                )))
+                .await
+            {
+                Ok(json) => json.into_response(),
+                Err(status) => status.into_response(),
+            };
+        }
+    };
+
+    // Parse form parameters
+    let params: HashMap<String, String> = form_urlencoded::parse(body_str.as_bytes())
+        .into_owned()
+        .collect();
+    debug!("Form params extracted: {:?}", params);
+
+    let parsed_request = match state.adapter.parse_range_post_request(Form(params)).await {
+        Ok(req) => {
+            debug!(
+                "Successfully parsed range POST - query: '{}', start: {}, end: {}, step: {}",
+                req.query, req.start, req.end, req.step
+            );
+            req
+        }
+        Err(parse_error) => {
+            debug!("Failed to parse range POST request: {:?}", parse_error);
+            return match state.adapter.format_error_response(&parse_error).await {
+                Ok(json) => json.into_response(),
+                Err(status) => status.into_response(),
+            };
+        }
+    };
+
+    process_range_query_request(&state, &parsed_request, start_time).await
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::data_model::{InferenceConfig, StreamingConfig};
+    use crate::engines::SimpleEngine;
+    use crate::stores::simple_map_store::SimpleMapStore;
+    use reqwest::Client;
+    use std::sync::Arc;
+
+    async fn setup_test_server() -> u16 {
+        let adapter_config = AdapterConfig::prometheus_promql(
+            "http://127.0.0.1:9999".to_string(), // Unused for this test
+            false,                               // forward_unsupported_queries
+        );
+
+        let config = HttpServerConfig {
+            port: 0,
+            handle_http_requests: true,
+            adapter_config,
+        };
+
+        let inference_config = InferenceConfig::new(
+            crate::data_model::QueryLanguage::promql,
+            crate::data_model::CleanupPolicy::NoCleanup,
+        );
+        let streaming_config = Arc::new(StreamingConfig::default());
+        let store = Arc::new(SimpleMapStore::new(
+            streaming_config.clone(),
+            crate::data_model::CleanupPolicy::NoCleanup,
+        ));
+        let query_engine = Arc::new(SimpleEngine::new(
+            store.clone(),
+            // None,
+            inference_config,
+            streaming_config.clone(),
+            15000,
+            crate::data_model::QueryLanguage::promql,
+        ));
+
+        let server = HttpServer::new(config, query_engine, store);
+        server
+            .start_test_server()
+            .await
+            .expect("Failed to start test server")
+    }
+
+    #[tokio::test]
+    async fn test_get_endpoint_plus_symbol_decoding() {
+        // Enable debug logging for this test
+        // let _ = tracing_subscriber::fmt()
+        //     .with_env_filter("debug")
+        //     .try_init();
+
+        let server_port = setup_test_server().await;
+        let client = Client::new();
+
+        // Test query with + symbols that should become spaces
+        let test_query = "quantile by (instance, job) (0.95, fake_metric_total)";
+
+        println!("Sending query: {test_query}");
+
+        let response = client
+            .get(format!("http://127.0.0.1:{server_port}/api/v1/query"))
+            .query(&[("query", test_query)])
+            .send()
+            .await
+            .expect("Failed to send request");
+
+        let status = response.status();
+        let response_json: serde_json::Value = response.json().await.expect("Failed to parse JSON");
+
+        println!("Response status: {status}");
+        println!("Response JSON: {response_json}");
+
+        // The debug logs should show what query was actually parsed
+        assert!(status.is_success() || status == reqwest::StatusCode::OK);
+    }
+
+    #[tokio::test]
+    async fn test_post_endpoint_form_decoding() {
+        // let _ = tracing_subscriber::fmt()
+        //     .with_env_filter("debug")
+        //     .try_init();
+
+        let server_port = setup_test_server().await;
+        let client = Client::new();
+
+        // Test the same query via POST with form encoding
+        let test_query = "quantile+by+(instance,+job)+(0.95,+fake_metric_total)";
+
+        println!("Sending POST with form data: {test_query}");
+
+        let response = client
+            .post(format!("http://127.0.0.1:{server_port}/api/v1/query"))
+            .header("content-type", "application/x-www-form-urlencoded")
+            .body(format!("query={test_query}&time=1758161478.205"))
+            .send()
+            .await
+            .expect("Failed to send request");
+
+        let status = response.status();
+        let response_json: serde_json::Value = response.json().await.expect("Failed to parse JSON");
+
+        println!("Response status: {status}");
+        println!("Response JSON: {response_json}");
+
+        assert!(status.is_success() || status == reqwest::StatusCode::OK);
+    }
+}
diff --git a/QueryEngineRust/src/drivers/query/servers/mod.rs b/QueryEngineRust/src/drivers/query/servers/mod.rs
new file mode 100644
index 0000000..ee66dbf
--- /dev/null
+++ b/QueryEngineRust/src/drivers/query/servers/mod.rs
@@ -0,0 +1,3 @@
+pub mod http;
+
+pub use http::{HttpServer, HttpServerConfig};
diff --git a/QueryEngineRust/src/engines/logical/mod.rs b/QueryEngineRust/src/engines/logical/mod.rs
new file mode 100644
index 0000000..435ca52
--- /dev/null
+++ b/QueryEngineRust/src/engines/logical/mod.rs
@@ -0,0 +1,6 @@
+//! Logical Plan Builders for DataFusion
+//!
+//! This module converts QueryExecutionContext into DataFusion LogicalPlan trees
+//! using the custom extension nodes defined in datafusion_summary_library.
+
+pub mod plan_builder;
diff --git a/QueryEngineRust/src/engines/logical/plan_builder.rs b/QueryEngineRust/src/engines/logical/plan_builder.rs
new file mode 100644
index 0000000..400c374
--- /dev/null
+++ b/QueryEngineRust/src/engines/logical/plan_builder.rs
@@ -0,0 +1,887 @@
+//! Plan Builder Module
+//!
+//! Converts QueryExecutionContext to DataFusion LogicalPlan for OnlySpatial queries.
+//! This enables plan-based execution as an alternative to the existing pipeline.
+
+use arrow::datatypes::{DataType, Field};
+use datafusion::common::{DFSchema, DFSchemaRef};
+use datafusion::error::DataFusionError;
+use datafusion::logical_expr::{Extension, LogicalPlan};
+use datafusion_summary_library::{
+    InferOperation, PrecomputedSummaryRead, SketchType, SummaryInfer, SummaryMergeMultiple,
+};
+use promql_utilities::query_logics::enums::Statistic;
+use std::sync::Arc;
+
+use crate::engines::simple_engine::{QueryExecutionContext, StoreQueryParams};
+
+/// Extension trait for building DataFusion logical plans from QueryExecutionContext
+impl QueryExecutionContext {
+    /// Convert this execution context to a DataFusion LogicalPlan.
+    ///
+    /// The resulting plan structure for single-population queries:
+    /// ```text
+    /// SummaryInfer (extract values from summaries)
+    ///   └─► SummaryMergeMultiple (merge summaries with same group key)
+    ///         └─► PrecomputedSummaryRead (read from store)
+    /// ```
+    ///
+    /// For multi-population queries (separate keys_query):
+    /// ```text
+    /// SummaryInfer (dual-input: value sketch + keys enumeration)
+    ///   ├─► input 0: SummaryMergeMultiple (values)
+    ///   │     └─► PrecomputedSummaryRead (values agg_id)
+    ///   └─► input 1: SummaryMergeMultiple (keys)
+    ///         └─► PrecomputedSummaryRead (keys agg_id)
+    /// ```
+    pub fn to_logical_plan(&self) -> Result<LogicalPlan, DataFusionError> {
+        let has_separate_keys = self.store_plan.keys_query.is_some()
+            && self.agg_info.aggregation_id_for_key != self.agg_info.aggregation_id_for_value;
+
+        // 1. Map aggregation type to SummaryType (SketchType) for values
+        let summary_type = self.map_aggregation_type_to_summary_type()?;
+
+        // Determine labels for the values branch (store read/merge).
+        // For multi-population (dual-input or self-keyed): use grouping_labels (store GROUP BY)
+        // For single-population: use query_output_labels
+        let has_aggregated_labels = !self.aggregated_labels.labels.is_empty();
+        let values_labels = if has_separate_keys || has_aggregated_labels {
+            self.grouping_labels.labels.to_vec()
+        } else {
+            self.get_output_label_names()
+        };
+
+        // Sub-key labels come from aggregated_labels (labels that key the accumulator internally)
+        let sub_key_labels: Vec<String> = self.aggregated_labels.labels.to_vec();
+
+        // 2. Build values branch: Read -> Merge
+        let values_merge_plan = self.build_read_merge_branch(
+            &self.store_plan.values_query,
+            &values_labels,
+            &summary_type,
+        )?;
+
+        // 3. Map statistic to InferOperation
+        let infer_operation = self.map_statistic_to_infer_operation()?;
+
+        if has_separate_keys {
+            let keys_query = self.store_plan.keys_query.as_ref().unwrap();
+
+            // Map keys aggregation type to SketchType
+            let keys_summary_type = self.map_key_aggregation_type_to_summary_type()?;
+
+            // Build keys branch: Read -> Merge (using same spatial labels)
+            let keys_merge_plan =
+                self.build_read_merge_branch(keys_query, &values_labels, &keys_summary_type)?;
+
+            // Create dual-input SummaryInfer
+            let infer = SummaryInfer::new(
+                Arc::new(values_merge_plan),
+                vec![infer_operation],
+                vec!["value".to_string()],
+            )
+            .map_err(|e| DataFusionError::Plan(format!("Failed to create SummaryInfer: {}", e)))?
+            .with_keys_input(Arc::new(keys_merge_plan))
+            .with_group_key_columns(sub_key_labels, None)
+            .map_err(|e| {
+                DataFusionError::Plan(format!("Failed to set group_key_columns: {}", e))
+            })?;
+
+            Ok(LogicalPlan::Extension(Extension {
+                node: Arc::new(infer),
+            }))
+        } else {
+            // Single-input path
+            let mut infer = SummaryInfer::new(
+                Arc::new(values_merge_plan),
+                vec![infer_operation],
+                vec!["value".to_string()],
+            )
+            .map_err(|e| DataFusionError::Plan(format!("Failed to create SummaryInfer: {}", e)))?;
+
+            if !sub_key_labels.is_empty() {
+                // Self-keyed multi-pop: set sub-key columns so the output schema
+                // includes them and the physical operator knows to enumerate keys.
+                infer = infer
+                    .with_group_key_columns(sub_key_labels, None)
+                    .map_err(|e| {
+                        DataFusionError::Plan(format!("Failed to set group_key_columns: {}", e))
+                    })?;
+            }
+
+            Ok(LogicalPlan::Extension(Extension {
+                node: Arc::new(infer),
+            }))
+        }
+    }
+
+    /// Build a Read -> Merge branch for a given store query.
+    fn build_read_merge_branch(
+        &self,
+        query_params: &StoreQueryParams,
+        labels: &[String],
+        summary_type: &SketchType,
+    ) -> Result<LogicalPlan, DataFusionError> {
+        let read_schema = self.build_read_schema(labels)?;
+
+        let read = PrecomputedSummaryRead::new(
+            self.metric.clone(),
+            query_params.aggregation_id,
+            query_params.start_timestamp,
+            query_params.end_timestamp,
+            query_params.is_exact_query,
+            labels.to_vec(),
+            summary_type.clone(),
+            read_schema,
+        );
+        let read_plan = LogicalPlan::Extension(Extension {
+            node: Arc::new(read),
+        });
+
+        let merge = SummaryMergeMultiple::new(
+            Arc::new(read_plan),
+            labels.to_vec(),
+            "sketch".to_string(),
+            summary_type.clone(),
+        );
+        Ok(LogicalPlan::Extension(Extension {
+            node: Arc::new(merge),
+        }))
+    }
+
+    /// Get output label names from the query metadata
+    fn get_output_label_names(&self) -> Vec<String> {
+        self.metadata.query_output_labels.labels.to_vec()
+    }
+
+    /// Build schema for PrecomputedSummaryRead: [label columns, sketch column]
+    fn build_read_schema(&self, output_labels: &[String]) -> Result<DFSchemaRef, DataFusionError> {
+        let mut fields: Vec<(Option<datafusion::common::TableReference>, Arc<Field>)> = Vec::new();
+
+        // Add label columns (Utf8, nullable)
+        for label in output_labels {
+            fields.push((None, Arc::new(Field::new(label, DataType::Utf8, true))));
+        }
+
+        // Add sketch column (Binary, not nullable)
+        fields.push((
+            None,
+            Arc::new(Field::new("sketch", DataType::Binary, false)),
+        ));
+
+        let schema = DFSchema::new_with_metadata(fields, Default::default())
+            .map_err(|e| DataFusionError::Plan(format!("Failed to create read schema: {}", e)))?;
+
+        Ok(Arc::new(schema))
+    }
+
+    /// Map Statistic enum to InferOperation
+    pub(crate) fn map_statistic_to_infer_operation(
+        &self,
+    ) -> Result<InferOperation, DataFusionError> {
+        match self.metadata.statistic_to_compute {
+            Statistic::Sum => Ok(InferOperation::ExtractSum),
+            Statistic::Min => Ok(InferOperation::ExtractMin),
+            Statistic::Max => Ok(InferOperation::ExtractMax),
+            Statistic::Count => Ok(InferOperation::ExtractCount),
+            Statistic::Increase => Ok(InferOperation::ExtractIncrease),
+            Statistic::Rate => Ok(InferOperation::ExtractRate),
+            Statistic::Quantile => {
+                // Extract quantile parameter from query_kwargs
+                let q = self
+                    .metadata
+                    .query_kwargs
+                    .get("quantile")
+                    .and_then(|s| s.parse::<f64>().ok())
+                    .unwrap_or(0.5);
+                Ok(InferOperation::quantile(q))
+            }
+            Statistic::Cardinality => Ok(InferOperation::CountDistinct),
+            Statistic::Topk => {
+                // Extract k parameter from query_kwargs
+                let k = self
+                    .metadata
+                    .query_kwargs
+                    .get("k")
+                    .and_then(|s| s.parse::<usize>().ok())
+                    .unwrap_or(10);
+                Ok(InferOperation::TopK(k))
+            }
+        }
+    }
+
+    /// Map aggregation type string to SketchType (SummaryType) for the value accumulator
+    fn map_aggregation_type_to_summary_type(&self) -> Result<SketchType, DataFusionError> {
+        Self::agg_type_str_to_sketch_type(&self.agg_info.aggregation_type_for_value)
+    }
+
+    /// Map aggregation type string to SketchType (SummaryType) for the key accumulator
+    fn map_key_aggregation_type_to_summary_type(&self) -> Result<SketchType, DataFusionError> {
+        Self::agg_type_str_to_sketch_type(&self.agg_info.aggregation_type_for_key)
+    }
+
+    fn agg_type_str_to_sketch_type(agg_type: &str) -> Result<SketchType, DataFusionError> {
+        match agg_type {
+            "SumAggregator" | "SumAccumulator" | "Sum" => Ok(SketchType::Sum),
+            "IncreaseAggregator" | "IncreaseAccumulator" | "Increase" => Ok(SketchType::Increase),
+            "MinMaxAggregator" | "MinMaxAccumulator" | "MinMax" => Ok(SketchType::MinMax),
+            "MultipleSumAccumulator" | "MultipleSum" => Ok(SketchType::MultipleSum),
+            "MultipleIncreaseAccumulator" | "MultipleIncrease" => Ok(SketchType::MultipleIncrease),
+            "MultipleMinMaxAccumulator" | "MultipleMinMax" => Ok(SketchType::MultipleMinMax),
+            "DeltaSetAggregator" => Ok(SketchType::DeltaSetAggregator),
+            "SetAggregator" => Ok(SketchType::SetAggregator),
+            "DatasketchesKLLAccumulator" | "DatasketchesKLL" | "KLL" => Ok(SketchType::KLL),
+            "HydraKllSketchAccumulator" | "HydraKLL" => Ok(SketchType::HydraKLL),
+            "CountMinSketchAccumulator" | "CountMinSketch" => Ok(SketchType::CountMinSketch),
+            "HyperLogLog" | "HLL" => Ok(SketchType::HLL),
+            _ => Err(DataFusionError::Plan(format!(
+                "Unknown aggregation type: {}",
+                agg_type
+            ))),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::engines::simple_engine::{
+        AggregationIdInfo, QueryMetadata, StoreQueryParams, StoreQueryPlan,
+    };
+    use promql_utilities::data_model::KeyByLabelNames;
+
+    use std::collections::HashMap;
+
+    fn create_test_context(
+        metric: &str,
+        statistic: Statistic,
+        output_labels: Vec<&str>,
+        aggregation_type: &str,
+    ) -> QueryExecutionContext {
+        create_test_context_with_keys(metric, statistic, output_labels, aggregation_type, None, "")
+    }
+
+    fn create_test_context_with_kwargs(
+        metric: &str,
+        statistic: Statistic,
+        output_labels: Vec<&str>,
+        aggregation_type: &str,
+        kwargs: HashMap<String, String>,
+    ) -> QueryExecutionContext {
+        let mut ctx = create_test_context_with_keys(
+            metric,
+            statistic,
+            output_labels,
+            aggregation_type,
+            None,
+            "",
+        );
+        ctx.metadata.query_kwargs = kwargs;
+        ctx
+    }
+
+    fn create_test_context_with_keys(
+        metric: &str,
+        statistic: Statistic,
+        output_labels: Vec<&str>,
+        aggregation_type_for_value: &str,
+        keys_query: Option<StoreQueryParams>,
+        aggregation_type_for_key: &str,
+    ) -> QueryExecutionContext {
+        // Default: grouping_labels == query_output_labels
+        create_test_context_with_keys_and_grouping(
+            metric,
+            statistic,
+            output_labels.clone(),
+            aggregation_type_for_value,
+            keys_query,
+            aggregation_type_for_key,
+            output_labels,
+        )
+    }
+
+    fn create_test_context_with_keys_and_grouping(
+        metric: &str,
+        statistic: Statistic,
+        output_labels: Vec<&str>,
+        aggregation_type_for_value: &str,
+        keys_query: Option<StoreQueryParams>,
+        aggregation_type_for_key: &str,
+        grouping_label_strs: Vec<&str>,
+    ) -> QueryExecutionContext {
+        // Default: aggregated_labels = output_labels - grouping_labels
+        let aggregated: Vec<&str> = output_labels
+            .iter()
+            .filter(|l| !grouping_label_strs.contains(l))
+            .copied()
+            .collect();
+        create_test_context_full(
+            metric,
+            statistic,
+            output_labels,
+            aggregation_type_for_value,
+            keys_query,
+            aggregation_type_for_key,
+            grouping_label_strs,
+            aggregated,
+        )
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    fn create_test_context_full(
+        metric: &str,
+        statistic: Statistic,
+        output_labels: Vec<&str>,
+        aggregation_type_for_value: &str,
+        keys_query: Option<StoreQueryParams>,
+        aggregation_type_for_key: &str,
+        grouping_label_strs: Vec<&str>,
+        aggregated_label_strs: Vec<&str>,
+    ) -> QueryExecutionContext {
+        let aggregation_id_for_key = match &keys_query {
+            Some(kq) => kq.aggregation_id,
+            None => 42, // same as value when no separate keys
+        };
+        let output_labels_vec: Vec<String> = output_labels.into_iter().map(String::from).collect();
+        let query_output_labels = KeyByLabelNames {
+            labels: output_labels_vec,
+        };
+        let grouping_labels = KeyByLabelNames {
+            labels: grouping_label_strs.into_iter().map(String::from).collect(),
+        };
+        let aggregated_labels = KeyByLabelNames {
+            labels: aggregated_label_strs
+                .into_iter()
+                .map(String::from)
+                .collect(),
+        };
+        QueryExecutionContext {
+            metric: metric.to_string(),
+            metadata: QueryMetadata {
+                query_output_labels,
+                statistic_to_compute: statistic,
+                query_kwargs: HashMap::new(),
+            },
+            store_plan: StoreQueryPlan {
+                values_query: StoreQueryParams {
+                    metric: metric.to_string(),
+                    aggregation_id: 42,
+                    start_timestamp: 1000,
+                    end_timestamp: 2000,
+                    is_exact_query: true,
+                },
+                keys_query,
+            },
+            agg_info: AggregationIdInfo {
+                aggregation_id_for_key,
+                aggregation_id_for_value: 42,
+                aggregation_type_for_key: aggregation_type_for_key.to_string(),
+                aggregation_type_for_value: aggregation_type_for_value.to_string(),
+            },
+            do_merge: false,
+            spatial_filter: String::new(),
+            query_time: 2000,
+            grouping_labels,
+            aggregated_labels,
+        }
+    }
+
+    #[test]
+    fn test_to_logical_plan_creates_correct_structure() {
+        let context = create_test_context(
+            "http_requests",
+            Statistic::Sum,
+            vec!["host"],
+            "SumAggregator",
+        );
+
+        let plan = context.to_logical_plan().unwrap();
+
+        // Root should be SummaryInfer
+        match &plan {
+            LogicalPlan::Extension(ext) => {
+                assert_eq!(ext.node.name(), "SummaryInfer");
+            }
+            _ => panic!("Expected Extension node"),
+        }
+    }
+
+    #[test]
+    fn test_to_logical_plan_with_multiple_labels() {
+        let context = create_test_context(
+            "http_requests",
+            Statistic::Sum,
+            vec!["host", "region", "service"],
+            "SumAggregator",
+        );
+
+        let plan = context.to_logical_plan().unwrap();
+
+        // Verify plan was created successfully
+        assert!(matches!(plan, LogicalPlan::Extension(_)));
+    }
+
+    #[test]
+    fn test_map_statistic_to_infer_operation() {
+        let context = create_test_context("test", Statistic::Sum, vec!["host"], "SumAggregator");
+        assert!(matches!(
+            context.map_statistic_to_infer_operation().unwrap(),
+            InferOperation::ExtractSum
+        ));
+
+        let context = create_test_context("test", Statistic::Min, vec!["host"], "MinMaxAggregator");
+        assert!(matches!(
+            context.map_statistic_to_infer_operation().unwrap(),
+            InferOperation::ExtractMin
+        ));
+
+        let context = create_test_context("test", Statistic::Max, vec!["host"], "MinMaxAggregator");
+        assert!(matches!(
+            context.map_statistic_to_infer_operation().unwrap(),
+            InferOperation::ExtractMax
+        ));
+    }
+
+    #[test]
+    fn test_map_aggregation_type_to_summary_type() {
+        let context = create_test_context("test", Statistic::Sum, vec!["host"], "SumAggregator");
+        assert_eq!(
+            context.map_aggregation_type_to_summary_type().unwrap(),
+            SketchType::Sum
+        );
+
+        let context = create_test_context(
+            "test",
+            Statistic::Increase,
+            vec!["host"],
+            "IncreaseAggregator",
+        );
+        assert_eq!(
+            context.map_aggregation_type_to_summary_type().unwrap(),
+            SketchType::Increase
+        );
+
+        let context = create_test_context(
+            "test",
+            Statistic::Quantile,
+            vec!["host"],
+            "DatasketchesKLLAccumulator",
+        );
+        assert_eq!(
+            context.map_aggregation_type_to_summary_type().unwrap(),
+            SketchType::KLL
+        );
+    }
+
+    // ========================================================================
+    // Helper to walk the plan tree and collect node names top-down
+    // ========================================================================
+
+    fn collect_plan_node_names(plan: &LogicalPlan) -> Vec<String> {
+        let mut names = Vec::new();
+        collect_plan_node_names_recursive(plan, &mut names);
+        names
+    }
+
+    fn collect_plan_node_names_recursive(plan: &LogicalPlan, names: &mut Vec<String>) {
+        match plan {
+            LogicalPlan::Extension(ext) => {
+                names.push(ext.node.name().to_string());
+                for input in ext.node.inputs() {
+                    collect_plan_node_names_recursive(input, names);
+                }
+            }
+            _ => {
+                names.push(
+                    format!("{:?}", plan)
+                        .split('(')
+                        .next()
+                        .unwrap_or("Unknown")
+                        .to_string(),
+                );
+            }
+        }
+    }
+
+    /// Helper to extract the SummaryMergeMultiple node from a plan tree
+    fn extract_merge_node(plan: &LogicalPlan) -> Option<&SummaryMergeMultiple> {
+        match plan {
+            LogicalPlan::Extension(ext) => {
+                if let Some(merge) = ext.node.as_any().downcast_ref::<SummaryMergeMultiple>() {
+                    return Some(merge);
+                }
+                for input in ext.node.inputs() {
+                    if let Some(merge) = extract_merge_node(input) {
+                        return Some(merge);
+                    }
+                }
+                None
+            }
+            _ => None,
+        }
+    }
+
+    /// Helper to extract the PrecomputedSummaryRead node from a plan tree
+    fn extract_read_node(plan: &LogicalPlan) -> Option<&PrecomputedSummaryRead> {
+        match plan {
+            LogicalPlan::Extension(ext) => {
+                if let Some(read) = ext.node.as_any().downcast_ref::<PrecomputedSummaryRead>() {
+                    return Some(read);
+                }
+                for input in ext.node.inputs() {
+                    if let Some(read) = extract_read_node(input) {
+                        return Some(read);
+                    }
+                }
+                None
+            }
+            _ => None,
+        }
+    }
+
+    /// Helper to extract the SummaryInfer node from the root of a plan
+    fn extract_infer_node(plan: &LogicalPlan) -> Option<&SummaryInfer> {
+        match plan {
+            LogicalPlan::Extension(ext) => ext.node.as_any().downcast_ref::<SummaryInfer>(),
+            _ => None,
+        }
+    }
+
+    /// Count PrecomputedSummaryRead nodes in the plan tree
+    fn count_read_nodes(plan: &LogicalPlan) -> usize {
+        let names = collect_plan_node_names(plan);
+        names
+            .iter()
+            .filter(|n| *n == "PrecomputedSummaryRead")
+            .count()
+    }
+
+    // ========================================================================
+    // MultipleSumAccumulator (HydraSum) tests
+    // ========================================================================
+
+    #[test]
+    fn test_multiple_sum_accumulator_maps_to_hydra_sum() {
+        let context = create_test_context(
+            "http_requests",
+            Statistic::Sum,
+            vec!["host"],
+            "MultipleSumAccumulator",
+        );
+        assert_eq!(
+            context.map_aggregation_type_to_summary_type().unwrap(),
+            SketchType::MultipleSum
+        );
+    }
+
+    #[test]
+    fn test_multiple_sum_accumulator_plan_builds() {
+        // MultipleSumAccumulator is a Hydra (multi-population) accumulator.
+        // The current plan only builds a single-population SummaryInfer, which
+        // won't correctly query sub-populations at execution time.
+        let context = create_test_context(
+            "http_requests",
+            Statistic::Sum,
+            vec!["host"],
+            "MultipleSumAccumulator",
+        );
+
+        let plan = context.to_logical_plan().unwrap();
+        let node_names = collect_plan_node_names(&plan);
+        assert_eq!(
+            node_names,
+            vec![
+                "SummaryInfer",
+                "SummaryMergeMultiple",
+                "PrecomputedSummaryRead"
+            ]
+        );
+
+        // Verify the summary type propagates correctly through the plan
+        let merge = extract_merge_node(&plan).expect("Should have a SummaryMergeMultiple node");
+        assert_eq!(*merge.summary_type(), SketchType::MultipleSum);
+
+        let read = extract_read_node(&plan).expect("Should have a PrecomputedSummaryRead node");
+        assert_eq!(*read.summary_type(), SketchType::MultipleSum);
+    }
+
+    #[test]
+    fn test_multiple_sum_accumulator_single_pop_no_subkeys() {
+        // MultipleSumAccumulator without a separate keys_query stays single-population.
+        // No sub-key columns because there's no keys branch to enumerate from.
+        // To properly query Hydra types, a keys_query with a DeltaSetAggregator
+        // should be provided — see test_delta_set_aggregator_dual_input_plan.
+        let context = create_test_context(
+            "http_requests",
+            Statistic::Sum,
+            vec!["host"],
+            "MultipleSumAccumulator",
+        );
+
+        let plan = context.to_logical_plan().unwrap();
+
+        let infer = extract_infer_node(&plan).expect("Root should be SummaryInfer");
+        assert!(
+            infer.group_key_columns.is_empty(),
+            "Single-pop (no keys_query): group_key_columns should be empty"
+        );
+        assert!(
+            infer.keys_input.is_none(),
+            "Single-pop: should not have keys_input"
+        );
+    }
+
+    // ========================================================================
+    // CountMinSketch for values_plan tests
+    // ========================================================================
+
+    #[test]
+    fn test_count_min_sketch_maps_correctly() {
+        let context = create_test_context(
+            "http_requests",
+            Statistic::Count,
+            vec!["host"],
+            "CountMinSketchAccumulator",
+        );
+        assert_eq!(
+            context.map_aggregation_type_to_summary_type().unwrap(),
+            SketchType::CountMinSketch
+        );
+    }
+
+    #[test]
+    fn test_count_min_sketch_plan_builds() {
+        // CountMinSketch is a multi-population frequency sketch.
+        // Like Hydra types, it requires a sub-key to query (FrequencyEstimate, etc.)
+        let context = create_test_context(
+            "http_requests",
+            Statistic::Count,
+            vec!["host"],
+            "CountMinSketchAccumulator",
+        );
+
+        let plan = context.to_logical_plan().unwrap();
+        let node_names = collect_plan_node_names(&plan);
+        assert_eq!(
+            node_names,
+            vec![
+                "SummaryInfer",
+                "SummaryMergeMultiple",
+                "PrecomputedSummaryRead"
+            ]
+        );
+
+        // Verify summary type
+        let merge = extract_merge_node(&plan).expect("Should have SummaryMergeMultiple");
+        assert_eq!(*merge.summary_type(), SketchType::CountMinSketch);
+    }
+
+    #[test]
+    fn test_count_min_sketch_single_pop_no_subkeys() {
+        // CountMinSketch without a separate keys_query stays single-population.
+        // Same as MultipleSumAccumulator — need a keys_query for dual-input.
+        let context = create_test_context(
+            "http_requests",
+            Statistic::Count,
+            vec!["host"],
+            "CountMinSketchAccumulator",
+        );
+
+        let plan = context.to_logical_plan().unwrap();
+
+        let infer = extract_infer_node(&plan).expect("Root should be SummaryInfer");
+        assert!(infer.group_key_columns.is_empty());
+        assert!(infer.keys_input.is_none());
+    }
+
+    // ========================================================================
+    // DeltaSetAggregator for keys_plan tests
+    // ========================================================================
+
+    #[test]
+    fn test_delta_set_aggregator_dual_input_plan() {
+        // When keys_query is set with a different agg_id (DeltaSetAggregator for key
+        // enumeration), to_logical_plan() builds a dual-input SummaryInfer with both
+        // a values branch and a keys branch.
+        let keys_query = StoreQueryParams {
+            metric: "http_requests".to_string(),
+            aggregation_id: 99, // Different agg ID for keys
+            start_timestamp: 0,
+            end_timestamp: 2000,
+            is_exact_query: false,
+        };
+        let context = create_test_context_with_keys(
+            "http_requests",
+            Statistic::Sum,
+            vec!["host"],
+            "MultipleSumAccumulator", // values use Hydra
+            Some(keys_query),
+            "DeltaSetAggregator", // keys use DeltaSet
+        );
+
+        let plan = context.to_logical_plan().unwrap();
+
+        // The plan tree should now have 5 nodes: SummaryInfer with 2 branches
+        let node_names = collect_plan_node_names(&plan);
+        assert_eq!(
+            node_names,
+            vec![
+                "SummaryInfer",
+                "SummaryMergeMultiple",   // values branch
+                "PrecomputedSummaryRead", // values read
+                "SummaryMergeMultiple",   // keys branch
+                "PrecomputedSummaryRead", // keys read
+            ]
+        );
+
+        // Verify there are 2 PrecomputedSummaryRead nodes
+        assert_eq!(count_read_nodes(&plan), 2);
+
+        // The SummaryInfer should have a keys_input
+        let infer = extract_infer_node(&plan).expect("Root should be SummaryInfer");
+        assert!(
+            infer.keys_input.is_some(),
+            "SummaryInfer should have keys_input"
+        );
+    }
+
+    // ========================================================================
+    // HydraKLL for values_plan + DeltaSetAggregator for keys_plan
+    // ========================================================================
+
+    #[test]
+    fn test_hydra_kll_with_delta_set_keys_plan_builds() {
+        // HydraKLL for quantile queries with DeltaSetAggregator for key enumeration.
+        // This is a realistic configuration: HydraKLL stores per-key quantile sketches,
+        // and DeltaSetAggregator tracks which keys exist.
+        // grouping_labels = ["host"], sub-keys = ["endpoint"]
+        let mut kwargs = HashMap::new();
+        kwargs.insert("quantile".to_string(), "0.95".to_string());
+
+        let keys_query = StoreQueryParams {
+            metric: "request_duration".to_string(),
+            aggregation_id: 99,
+            start_timestamp: 0,
+            end_timestamp: 2000,
+            is_exact_query: false,
+        };
+        let mut context = create_test_context_with_keys_and_grouping(
+            "request_duration",
+            Statistic::Quantile,
+            vec!["host", "endpoint"], // query_output_labels
+            "HydraKllSketchAccumulator",
+            Some(keys_query),
+            "DeltaSetAggregator",
+            vec!["host"], // grouping_labels (spatial)
+        );
+        context.metadata.query_kwargs = kwargs;
+
+        let plan = context.to_logical_plan().unwrap();
+
+        // Verify dual-input plan structure (5 nodes)
+        let node_names = collect_plan_node_names(&plan);
+        assert_eq!(
+            node_names,
+            vec![
+                "SummaryInfer",
+                "SummaryMergeMultiple",   // values branch
+                "PrecomputedSummaryRead", // values read
+                "SummaryMergeMultiple",   // keys branch
+                "PrecomputedSummaryRead", // keys read
+            ]
+        );
+
+        // Verify types propagate in the values branch
+        let merge = extract_merge_node(&plan).expect("Should have SummaryMergeMultiple");
+        assert_eq!(*merge.summary_type(), SketchType::HydraKLL);
+
+        let read = extract_read_node(&plan).expect("Should have PrecomputedSummaryRead");
+        assert_eq!(*read.summary_type(), SketchType::HydraKLL);
+        // Values branch uses grouping_labels, not query_output_labels
+        assert_eq!(read.output_labels(), &["host"]);
+
+        // Verify SummaryInfer has sub-key columns
+        let infer = extract_infer_node(&plan).expect("Root should be SummaryInfer");
+        assert_eq!(infer.group_key_columns, vec!["endpoint"]);
+    }
+
+    #[test]
+    fn test_hydra_kll_with_delta_set_keys_dual_input_with_subkeys() {
+        // HydraKLL with DeltaSetAggregator keys, with sub-key columns.
+        // output_labels = ["host", "endpoint"], grouping_labels = ["host"]
+        // => sub_key_labels = ["endpoint"]
+        let keys_query = StoreQueryParams {
+            metric: "request_duration".to_string(),
+            aggregation_id: 99,
+            start_timestamp: 0,
+            end_timestamp: 2000,
+            is_exact_query: false,
+        };
+        let context = create_test_context_with_keys_and_grouping(
+            "request_duration",
+            Statistic::Quantile,
+            vec!["host", "endpoint"], // query_output_labels
+            "HydraKllSketchAccumulator",
+            Some(keys_query),
+            "DeltaSetAggregator",
+            vec!["host"], // grouping_labels (spatial store labels)
+        );
+
+        let plan = context.to_logical_plan().unwrap();
+
+        // Plan should have 2 PrecomputedSummaryRead nodes
+        assert_eq!(count_read_nodes(&plan), 2);
+
+        // SummaryInfer should have keys_input and group_key_columns = ["endpoint"]
+        let infer = extract_infer_node(&plan).expect("Root should be SummaryInfer");
+        assert!(infer.keys_input.is_some(), "Should have keys_input");
+        assert_eq!(
+            infer.group_key_columns,
+            vec!["endpoint"],
+            "Sub-key columns should be query_output_labels minus grouping_labels"
+        );
+    }
+
+    // ========================================================================
+    // Quantile kwargs propagation test
+    // ========================================================================
+
+    #[test]
+    fn test_quantile_kwargs_propagate_to_infer_operation() {
+        let mut kwargs = HashMap::new();
+        kwargs.insert("quantile".to_string(), "0.99".to_string());
+        let context = create_test_context_with_kwargs(
+            "latency",
+            Statistic::Quantile,
+            vec!["host"],
+            "DatasketchesKLLAccumulator",
+            kwargs,
+        );
+
+        match context.map_statistic_to_infer_operation().unwrap() {
+            InferOperation::Quantile(q) => {
+                // 0.99 * 10000 = 9900
+                assert_eq!(q, 9900, "Expected q=9900 (0.99), got {}", q);
+            }
+            other => panic!("Expected Quantile, got {:?}", other),
+        }
+    }
+
+    #[test]
+    fn test_quantile_defaults_to_median_when_no_kwargs() {
+        let context = create_test_context(
+            "latency",
+            Statistic::Quantile,
+            vec!["host"],
+            "DatasketchesKLLAccumulator",
+        );
+
+        match context.map_statistic_to_infer_operation().unwrap() {
+            InferOperation::Quantile(q) => {
+                // 0.5 * 10000 = 5000
+                assert_eq!(q, 5000, "Expected q=5000 (0.5), got {}", q);
+            }
+            other => panic!("Expected Quantile, got {:?}", other),
+        }
+    }
+}
diff --git a/QueryEngineRust/src/engines/mod.rs b/QueryEngineRust/src/engines/mod.rs
new file mode 100644
index 0000000..2bc64fb
--- /dev/null
+++ b/QueryEngineRust/src/engines/mod.rs
@@ -0,0 +1,9 @@
+pub mod logical;
+pub mod physical;
+pub mod query_result;
+pub mod simple_engine;
+pub mod window_merger;
+
+pub use query_result::{InstantVector, QueryResult, RangeVector, RangeVectorElement, Sample};
+pub use simple_engine::SimpleEngine;
+pub use window_merger::{create_window_merger, NaiveMerger, WindowMerger};
diff --git a/QueryEngineRust/src/engines/physical/accumulator_serde.rs b/QueryEngineRust/src/engines/physical/accumulator_serde.rs
new file mode 100644
index 0000000..6955cbd
--- /dev/null
+++ b/QueryEngineRust/src/engines/physical/accumulator_serde.rs
@@ -0,0 +1,313 @@
+//! Accumulator Serialization/Deserialization Registry
+//!
+//! Provides functions to deserialize bytes back to accumulator objects
+//! based on the SummaryType (SketchType).
+//!
+//! Note: This module assumes accumulators are serialized using the Arroyo format
+//! (MessagePack serialization via rmp-serde).
+
+use datafusion::error::DataFusionError;
+use datafusion_summary_library::SketchType;
+
+use crate::data_model::{MultipleSubpopulationAggregate, SingleSubpopulationAggregate};
+use crate::precompute_operators::{
+    CountMinSketchAccumulator, DatasketchesKLLAccumulator, DeltaSetAggregatorAccumulator,
+    HydraKllSketchAccumulator, MultipleIncreaseAccumulator, MultipleSumAccumulator,
+    SetAggregatorAccumulator, SumAccumulator,
+};
+use crate::AggregateCore;
+
+/// Deserialize bytes to an accumulator based on the summary type.
+///
+/// This function dispatches to the appropriate accumulator deserializer
+/// based on the SummaryType enum. Expects Arroyo format (MessagePack).
+///
+/// # Arguments
+/// * `bytes` - Serialized accumulator data (Arroyo/MessagePack format)
+/// * `summary_type` - Type of the summary (determines which deserializer to use)
+///
+/// # Returns
+/// A boxed AggregateCore trait object
+pub fn deserialize_accumulator(
+    bytes: &[u8],
+    summary_type: &SketchType,
+) -> Result<Box<dyn AggregateCore>, DataFusionError> {
+    match summary_type {
+        // Single-population exact aggregators
+        SketchType::Sum => {
+            let acc = SumAccumulator::deserialize_from_bytes_arroyo(bytes).map_err(|e| {
+                DataFusionError::Internal(format!("Failed to deserialize Sum: {}", e))
+            })?;
+            Ok(Box::new(acc))
+        }
+        SketchType::Increase => Err(DataFusionError::NotImplemented(
+            "Increase Arroyo deserialization not implemented".to_string(),
+        )),
+        SketchType::MinMax => Err(DataFusionError::NotImplemented(
+            "MinMax Arroyo deserialization not implemented".to_string(),
+        )),
+
+        // Quantile sketches
+        SketchType::KLL => {
+            let acc =
+                DatasketchesKLLAccumulator::deserialize_from_bytes_arroyo(bytes).map_err(|e| {
+                    DataFusionError::Internal(format!("Failed to deserialize KLL: {}", e))
+                })?;
+            Ok(Box::new(acc))
+        }
+        SketchType::HydraKLL => {
+            let acc =
+                HydraKllSketchAccumulator::deserialize_from_bytes_arroyo(bytes).map_err(|e| {
+                    DataFusionError::Internal(format!("Failed to deserialize HydraKLL: {}", e))
+                })?;
+            Ok(Box::new(acc))
+        }
+
+        // Set aggregators
+        SketchType::SetAggregator => {
+            let acc =
+                SetAggregatorAccumulator::deserialize_from_bytes_arroyo(bytes).map_err(|e| {
+                    DataFusionError::Internal(format!("Failed to deserialize SetAggregator: {}", e))
+                })?;
+            Ok(Box::new(acc))
+        }
+        SketchType::DeltaSetAggregator => {
+            let acc = DeltaSetAggregatorAccumulator::deserialize_from_bytes_arroyo(bytes).map_err(
+                |e| {
+                    DataFusionError::Internal(format!(
+                        "Failed to deserialize DeltaSetAggregator: {}",
+                        e
+                    ))
+                },
+            )?;
+            Ok(Box::new(acc))
+        }
+
+        // Multi-population exact aggregators
+        SketchType::MultipleIncrease => {
+            let acc =
+                MultipleIncreaseAccumulator::deserialize_from_bytes_arroyo(bytes).map_err(|e| {
+                    DataFusionError::Internal(format!(
+                        "Failed to deserialize MultipleIncrease: {}",
+                        e
+                    ))
+                })?;
+            Ok(Box::new(acc))
+        }
+        SketchType::MultipleSum => {
+            let acc =
+                MultipleSumAccumulator::deserialize_from_bytes_arroyo(bytes).map_err(|e| {
+                    DataFusionError::Internal(format!("Failed to deserialize MultipleSum: {}", e))
+                })?;
+            Ok(Box::new(acc))
+        }
+
+        // Frequency sketches
+        SketchType::CountMinSketch => {
+            let acc =
+                CountMinSketchAccumulator::deserialize_from_bytes_arroyo(bytes).map_err(|e| {
+                    DataFusionError::Internal(format!(
+                        "Failed to deserialize CountMinSketch: {}",
+                        e
+                    ))
+                })?;
+            Ok(Box::new(acc))
+        }
+
+        // Sketches that aren't implemented yet
+        _ => Err(DataFusionError::NotImplemented(format!(
+            "Accumulator deserialization not implemented for: {:?}",
+            summary_type
+        ))),
+    }
+}
+
+/// Serialize an accumulator to bytes (native format).
+///
+/// This is a convenience wrapper that calls serialize_to_bytes on the accumulator.
+pub fn serialize_accumulator(acc: &dyn AggregateCore) -> Vec<u8> {
+    acc.serialize_to_bytes()
+}
+
+/// Serialize an accumulator to Arroyo-compatible bytes (MessagePack format).
+///
+/// For accumulators whose native serialize_to_bytes already uses MessagePack,
+/// this delegates to serialize_to_bytes. For others (SumAccumulator,
+/// SetAggregatorAccumulator, MultipleIncreaseAccumulator), this uses
+/// their serialize_to_bytes_arroyo method.
+pub fn serialize_accumulator_arroyo(acc: &dyn AggregateCore) -> Vec<u8> {
+    // Try to downcast to types that have a separate arroyo format
+    if let Some(sum_acc) = acc.as_any().downcast_ref::<SumAccumulator>() {
+        return sum_acc.serialize_to_bytes_arroyo();
+    }
+    if let Some(set_acc) = acc.as_any().downcast_ref::<SetAggregatorAccumulator>() {
+        return set_acc.serialize_to_bytes_arroyo();
+    }
+    if let Some(inc_acc) = acc.as_any().downcast_ref::<MultipleIncreaseAccumulator>() {
+        return inc_acc.serialize_to_bytes_arroyo();
+    }
+    if let Some(ms_acc) = acc.as_any().downcast_ref::<MultipleSumAccumulator>() {
+        return ms_acc.serialize_to_bytes_arroyo();
+    }
+    // All other accumulators already use MessagePack in serialize_to_bytes
+    acc.serialize_to_bytes()
+}
+
+/// Deserialize bytes to a SingleSubpopulationAggregate for querying.
+///
+/// This function returns a trait object that supports the query method.
+/// Only works for single-subpopulation accumulators (Sum, Increase, MinMax, etc.).
+///
+/// Note: Uses Arroyo/MessagePack format.
+pub fn deserialize_single_subpopulation(
+    bytes: &[u8],
+    summary_type: &SketchType,
+) -> Result<Box<dyn SingleSubpopulationAggregate>, DataFusionError> {
+    match summary_type {
+        SketchType::Sum => {
+            let acc = SumAccumulator::deserialize_from_bytes_arroyo(bytes).map_err(|e| {
+                DataFusionError::Internal(format!("Failed to deserialize Sum: {}", e))
+            })?;
+            Ok(Box::new(acc))
+        }
+        SketchType::Increase => Err(DataFusionError::NotImplemented(
+            "Increase Arroyo deserialization not implemented".to_string(),
+        )),
+        SketchType::MinMax => Err(DataFusionError::NotImplemented(
+            "MinMax Arroyo deserialization not implemented".to_string(),
+        )),
+        SketchType::KLL => {
+            let acc =
+                DatasketchesKLLAccumulator::deserialize_from_bytes_arroyo(bytes).map_err(|e| {
+                    DataFusionError::Internal(format!("Failed to deserialize KLL: {}", e))
+                })?;
+            Ok(Box::new(acc))
+        }
+        _ => Err(DataFusionError::NotImplemented(format!(
+            "SingleSubpopulationAggregate deserialization not implemented for: {:?}",
+            summary_type
+        ))),
+    }
+}
+
+/// Deserialize bytes to a MultipleSubpopulationAggregate for querying.
+///
+/// This function returns a trait object that supports querying by sub-key.
+/// Works for multi-population accumulators (Hydra types, CountMinSketch, etc.).
+///
+/// Note: Uses Arroyo/MessagePack format.
+pub fn deserialize_multiple_subpopulation(
+    bytes: &[u8],
+    summary_type: &SketchType,
+) -> Result<Box<dyn MultipleSubpopulationAggregate>, DataFusionError> {
+    match summary_type {
+        SketchType::MultipleIncrease => {
+            let acc =
+                MultipleIncreaseAccumulator::deserialize_from_bytes_arroyo(bytes).map_err(|e| {
+                    DataFusionError::Internal(format!(
+                        "Failed to deserialize MultipleIncrease: {}",
+                        e
+                    ))
+                })?;
+            Ok(Box::new(acc))
+        }
+        SketchType::MultipleSum => {
+            let acc =
+                MultipleSumAccumulator::deserialize_from_bytes_arroyo(bytes).map_err(|e| {
+                    DataFusionError::Internal(format!("Failed to deserialize MultipleSum: {}", e))
+                })?;
+            Ok(Box::new(acc))
+        }
+        SketchType::HydraKLL => {
+            let acc =
+                HydraKllSketchAccumulator::deserialize_from_bytes_arroyo(bytes).map_err(|e| {
+                    DataFusionError::Internal(format!("Failed to deserialize HydraKLL: {}", e))
+                })?;
+            Ok(Box::new(acc))
+        }
+        SketchType::CountMinSketch => {
+            let acc =
+                CountMinSketchAccumulator::deserialize_from_bytes_arroyo(bytes).map_err(|e| {
+                    DataFusionError::Internal(format!(
+                        "Failed to deserialize CountMinSketch: {}",
+                        e
+                    ))
+                })?;
+            Ok(Box::new(acc))
+        }
+        _ => Err(DataFusionError::NotImplemented(format!(
+            "MultipleSubpopulationAggregate deserialization not implemented for: {:?}",
+            summary_type
+        ))),
+    }
+}
+
+/// Deserialize bytes to a keys accumulator (DeltaSetAggregator/SetAggregator).
+///
+/// Returns a boxed AggregateCore whose `get_keys()` method enumerates the sub-keys
+/// stored in the accumulator.
+pub fn deserialize_keys_accumulator(
+    bytes: &[u8],
+    summary_type: &SketchType,
+) -> Result<Box<dyn AggregateCore>, DataFusionError> {
+    match summary_type {
+        SketchType::DeltaSetAggregator => {
+            let acc = DeltaSetAggregatorAccumulator::deserialize_from_bytes_arroyo(bytes).map_err(
+                |e| {
+                    DataFusionError::Internal(format!(
+                        "Failed to deserialize DeltaSetAggregator: {}",
+                        e
+                    ))
+                },
+            )?;
+            Ok(Box::new(acc))
+        }
+        SketchType::SetAggregator => {
+            let acc =
+                SetAggregatorAccumulator::deserialize_from_bytes_arroyo(bytes).map_err(|e| {
+                    DataFusionError::Internal(format!("Failed to deserialize SetAggregator: {}", e))
+                })?;
+            Ok(Box::new(acc))
+        }
+        _ => Err(DataFusionError::NotImplemented(format!(
+            "Keys accumulator deserialization not supported for: {:?}",
+            summary_type
+        ))),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // Helper to serialize f64 as MessagePack (Arroyo format)
+    fn serialize_f64_arroyo(value: f64) -> Vec<u8> {
+        rmp_serde::to_vec(&value).unwrap()
+    }
+
+    #[test]
+    fn test_deserialize_sum_accumulator() {
+        // SumAccumulator::deserialize_from_bytes_arroyo expects MessagePack f64
+        let bytes = serialize_f64_arroyo(42.0);
+
+        let restored = deserialize_accumulator(&bytes, &SketchType::Sum).unwrap();
+
+        assert_eq!(restored.get_accumulator_type(), "SumAccumulator");
+    }
+
+    #[test]
+    fn test_deserialize_minmax_returns_not_implemented() {
+        let bytes = vec![1, 2, 3, 4];
+        let result = deserialize_accumulator(&bytes, &SketchType::MinMax);
+
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_deserialize_unsupported_type() {
+        let bytes = vec![1, 2, 3, 4];
+        let result = deserialize_accumulator(&bytes, &SketchType::HLL);
+
+        assert!(result.is_err());
+    }
+}
diff --git a/QueryEngineRust/src/engines/physical/conversion.rs b/QueryEngineRust/src/engines/physical/conversion.rs
new file mode 100644
index 0000000..9a4b3c2
--- /dev/null
+++ b/QueryEngineRust/src/engines/physical/conversion.rs
@@ -0,0 +1,442 @@
+//! Conversion Utilities Module
+//!
+//! Provides functions to convert between store results and Arrow RecordBatches.
+//! This enables DataFusion physical operators to work with precomputed outputs.
+
+use arrow::array::{ArrayRef, BinaryBuilder, Float64Array, StringBuilder};
+use arrow::datatypes::{DataType, Field, Schema};
+use arrow::record_batch::RecordBatch;
+use datafusion::error::DataFusionError;
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use crate::data_model::KeyByLabelValues;
+use crate::engines::physical::accumulator_serde::serialize_accumulator_arroyo;
+use crate::stores::traits::TimestampedBucketsMap;
+
+/// Convert store query results to an Arrow RecordBatch.
+///
+/// The output schema is: [label_columns..., sketch (Binary)]
+/// Each row represents one group key with its serialized accumulator.
+///
+/// # Arguments
+/// * `store_result` - HashMap from group key to accumulators
+/// * `label_names` - Names of the label columns (in order)
+///
+/// # Returns
+/// A RecordBatch with label columns (Utf8) and a sketch column (Binary)
+pub fn store_result_to_record_batch(
+    store_result: &TimestampedBucketsMap,
+    label_names: &[String],
+) -> Result<RecordBatch, DataFusionError> {
+    // Build arrays for each label column
+    let mut label_builders: Vec<StringBuilder> =
+        label_names.iter().map(|_| StringBuilder::new()).collect();
+
+    // Build array for sketch column
+    let mut sketch_builder = BinaryBuilder::new();
+
+    for (key_opt, timestamped_buckets) in store_result {
+        // Handle each accumulator for this key
+        for (_timestamps, acc) in timestamped_buckets {
+            // Add label values
+            if let Some(key) = key_opt {
+                for (i, label_value) in key.labels.iter().enumerate() {
+                    if i < label_builders.len() {
+                        label_builders[i].append_value(label_value);
+                    }
+                }
+                // Pad with empty strings if key has fewer labels than expected
+                for item in label_builders.iter_mut().skip(key.labels.len()) {
+                    item.append_value("");
+                }
+            } else {
+                // No key - use empty strings for all labels
+                for builder in &mut label_builders {
+                    builder.append_value("");
+                }
+            }
+
+            // Serialize accumulator to Arroyo-compatible bytes (MessagePack)
+            // so downstream operators can deserialize with deserialize_from_bytes_arroyo
+            let bytes = serialize_accumulator_arroyo(acc.as_ref());
+            sketch_builder.append_value(&bytes);
+        }
+    }
+
+    // Build schema
+    let mut fields: Vec<Field> = label_names
+        .iter()
+        .map(|name| Field::new(name, DataType::Utf8, true))
+        .collect();
+    fields.push(Field::new("sketch", DataType::Binary, false));
+    let schema = Arc::new(Schema::new(fields));
+
+    // Build columns
+    let mut columns: Vec<ArrayRef> = label_builders
+        .iter_mut()
+        .map(|b| Arc::new(b.finish()) as ArrayRef)
+        .collect();
+    columns.push(Arc::new(sketch_builder.finish()));
+
+    RecordBatch::try_new(schema, columns)
+        .map_err(|e| DataFusionError::Internal(format!("Failed to create RecordBatch: {}", e)))
+}
+
+/// Convert a RecordBatch with inferred values back to a result map.
+///
+/// The input schema is expected to be: [label_columns..., value_column (Float64)]
+///
+/// # Arguments
+/// * `batch` - RecordBatch with label columns and a value column
+/// * `label_names` - Names of the label columns (to identify which columns are labels)
+/// * `value_column` - Name of the column containing the inferred values
+///
+/// # Returns
+/// A HashMap from group key to the extracted value
+pub fn record_batch_to_result_map(
+    batch: &RecordBatch,
+    label_names: &[&str],
+    value_column: &str,
+) -> Result<HashMap<Option<KeyByLabelValues>, f64>, DataFusionError> {
+    let mut result: HashMap<Option<KeyByLabelValues>, f64> = HashMap::new();
+
+    // Find the value column
+    let value_col_idx = batch
+        .schema()
+        .fields()
+        .iter()
+        .position(|f| f.name() == value_column)
+        .ok_or_else(|| {
+            DataFusionError::Internal(format!(
+                "No '{}' column found in batch schema",
+                value_column
+            ))
+        })?;
+
+    let value_array = batch
+        .column(value_col_idx)
+        .as_any()
+        .downcast_ref::<Float64Array>()
+        .ok_or_else(|| {
+            DataFusionError::Internal(format!("'{}' column is not Float64", value_column))
+        })?;
+
+    // Find label column indices
+    let label_indices: Vec<usize> = label_names
+        .iter()
+        .filter_map(|name| {
+            batch
+                .schema()
+                .fields()
+                .iter()
+                .position(|f| f.name() == *name)
+        })
+        .collect();
+
+    for row_idx in 0..batch.num_rows() {
+        // Extract label values for this row
+        let labels: Vec<String> = label_indices
+            .iter()
+            .map(|&col_idx| {
+                let col = batch.column(col_idx);
+                // Try to extract string value
+                if let Some(str_array) = col.as_any().downcast_ref::<arrow::array::StringArray>() {
+                    str_array.value(row_idx).to_string()
+                } else {
+                    String::new()
+                }
+            })
+            .collect();
+
+        let key = if labels.is_empty() || labels.iter().all(|l| l.is_empty()) {
+            None
+        } else {
+            Some(KeyByLabelValues { labels })
+        };
+
+        let value = value_array.value(row_idx);
+        result.insert(key, value);
+    }
+
+    Ok(result)
+}
+
+/// Helper function to count total rows in store result
+pub fn count_store_result_rows(store_result: &TimestampedBucketsMap) -> usize {
+    store_result.values().map(|v| v.len()).sum()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::precompute_operators::SumAccumulator;
+    use crate::stores::traits::TimestampedBucket;
+
+    fn make_bucket(acc: Box<dyn crate::AggregateCore>) -> TimestampedBucket {
+        ((0, 0), acc)
+    }
+
+    #[test]
+    fn test_store_result_to_record_batch_basic() {
+        let mut store_result: TimestampedBucketsMap = HashMap::new();
+
+        let key1 = KeyByLabelValues {
+            labels: vec!["host-a".to_string()],
+        };
+        let acc1 = Box::new(SumAccumulator::with_sum(100.0));
+        store_result.insert(Some(key1), vec![make_bucket(acc1)]);
+
+        let key2 = KeyByLabelValues {
+            labels: vec!["host-b".to_string()],
+        };
+        let acc2 = Box::new(SumAccumulator::with_sum(200.0));
+        store_result.insert(Some(key2), vec![make_bucket(acc2)]);
+
+        let label_names = vec!["host".to_string()];
+        let batch = store_result_to_record_batch(&store_result, &label_names).unwrap();
+
+        assert_eq!(batch.num_rows(), 2);
+        assert_eq!(batch.num_columns(), 2); // host, sketch
+    }
+
+    #[test]
+    fn test_store_result_to_record_batch_multiple_labels() {
+        let mut store_result: TimestampedBucketsMap = HashMap::new();
+
+        let key1 = KeyByLabelValues {
+            labels: vec!["host-a".to_string(), "region-1".to_string()],
+        };
+        let acc1 = Box::new(SumAccumulator::with_sum(100.0));
+        store_result.insert(Some(key1), vec![make_bucket(acc1)]);
+
+        let label_names = vec!["host".to_string(), "region".to_string()];
+        let batch = store_result_to_record_batch(&store_result, &label_names).unwrap();
+
+        assert_eq!(batch.num_rows(), 1);
+        assert_eq!(batch.num_columns(), 3); // host, region, sketch
+    }
+
+    #[test]
+    fn test_store_result_to_record_batch_no_key() {
+        let mut store_result: TimestampedBucketsMap = HashMap::new();
+
+        let acc = Box::new(SumAccumulator::with_sum(500.0));
+        store_result.insert(None, vec![make_bucket(acc)]);
+
+        let label_names: Vec<String> = vec![];
+        let batch = store_result_to_record_batch(&store_result, &label_names).unwrap();
+
+        assert_eq!(batch.num_rows(), 1);
+        assert_eq!(batch.num_columns(), 1); // just sketch
+    }
+
+    #[test]
+    fn test_record_batch_to_result_map() {
+        // Create a test batch with [host, value]
+        let host_array = arrow::array::StringArray::from(vec!["host-a", "host-b"]);
+        let value_array = Float64Array::from(vec![100.0, 200.0]);
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("host", DataType::Utf8, true),
+            Field::new("value", DataType::Float64, false),
+        ]));
+
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(host_array) as ArrayRef,
+                Arc::new(value_array) as ArrayRef,
+            ],
+        )
+        .unwrap();
+
+        let result = record_batch_to_result_map(&batch, &["host"], "value").unwrap();
+
+        assert_eq!(result.len(), 2);
+
+        let key_a = KeyByLabelValues {
+            labels: vec!["host-a".to_string()],
+        };
+        assert_eq!(result.get(&Some(key_a)), Some(&100.0));
+
+        let key_b = KeyByLabelValues {
+            labels: vec!["host-b".to_string()],
+        };
+        assert_eq!(result.get(&Some(key_b)), Some(&200.0));
+    }
+
+    // ========================================================================
+    // Edge case tests
+    // ========================================================================
+
+    #[test]
+    fn test_store_result_to_record_batch_empty() {
+        let store_result: TimestampedBucketsMap = HashMap::new();
+        let label_names = vec!["host".to_string()];
+        let batch = store_result_to_record_batch(&store_result, &label_names).unwrap();
+        assert_eq!(batch.num_rows(), 0);
+        assert_eq!(batch.num_columns(), 2); // host + sketch
+    }
+
+    #[test]
+    fn test_store_result_to_record_batch_five_labels() {
+        let mut store_result: TimestampedBucketsMap = HashMap::new();
+        let key = KeyByLabelValues {
+            labels: vec![
+                "a".to_string(),
+                "b".to_string(),
+                "c".to_string(),
+                "d".to_string(),
+                "e".to_string(),
+            ],
+        };
+        store_result.insert(
+            Some(key),
+            vec![make_bucket(Box::new(SumAccumulator::with_sum(1.0)))],
+        );
+        let label_names: Vec<String> = vec!["l1", "l2", "l3", "l4", "l5"]
+            .into_iter()
+            .map(String::from)
+            .collect();
+        let batch = store_result_to_record_batch(&store_result, &label_names).unwrap();
+        assert_eq!(batch.num_rows(), 1);
+        assert_eq!(batch.num_columns(), 6); // 5 labels + sketch
+    }
+
+    #[test]
+    fn test_store_result_to_record_batch_special_chars() {
+        let mut store_result: TimestampedBucketsMap = HashMap::new();
+        let key = KeyByLabelValues {
+            labels: vec!["host,with,commas".to_string(), "région-1".to_string()],
+        };
+        store_result.insert(
+            Some(key),
+            vec![make_bucket(Box::new(SumAccumulator::with_sum(42.0)))],
+        );
+        let label_names = vec!["host".to_string(), "region".to_string()];
+        let batch = store_result_to_record_batch(&store_result, &label_names).unwrap();
+        assert_eq!(batch.num_rows(), 1);
+
+        // Verify the special characters survived
+        let host_col = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<arrow::array::StringArray>()
+            .unwrap();
+        assert_eq!(host_col.value(0), "host,with,commas");
+        let region_col = batch
+            .column(1)
+            .as_any()
+            .downcast_ref::<arrow::array::StringArray>()
+            .unwrap();
+        assert_eq!(region_col.value(0), "région-1");
+    }
+
+    #[test]
+    fn test_store_result_to_record_batch_multiple_timestamps_per_key() {
+        let mut store_result: TimestampedBucketsMap = HashMap::new();
+        let key = KeyByLabelValues {
+            labels: vec!["host-a".to_string()],
+        };
+        // 3 buckets for the same key
+        store_result.insert(
+            Some(key),
+            vec![
+                (
+                    (100, 200),
+                    Box::new(SumAccumulator::with_sum(10.0)) as Box<dyn crate::AggregateCore>,
+                ),
+                (
+                    (200, 300),
+                    Box::new(SumAccumulator::with_sum(20.0)) as Box<dyn crate::AggregateCore>,
+                ),
+                (
+                    (300, 400),
+                    Box::new(SumAccumulator::with_sum(30.0)) as Box<dyn crate::AggregateCore>,
+                ),
+            ],
+        );
+        let label_names = vec!["host".to_string()];
+        let batch = store_result_to_record_batch(&store_result, &label_names).unwrap();
+        assert_eq!(batch.num_rows(), 3, "3 buckets should produce 3 rows");
+    }
+
+    #[test]
+    fn test_record_batch_to_result_map_no_labels() {
+        let value_array = Float64Array::from(vec![42.0]);
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "value",
+            DataType::Float64,
+            false,
+        )]));
+        let batch = RecordBatch::try_new(schema, vec![Arc::new(value_array) as ArrayRef]).unwrap();
+
+        let result = record_batch_to_result_map(&batch, &[], "value").unwrap();
+        assert_eq!(result.len(), 1);
+        assert_eq!(result.get(&None), Some(&42.0));
+    }
+
+    #[test]
+    fn test_record_batch_to_result_map_missing_value_column() {
+        let host_array = arrow::array::StringArray::from(vec!["host-a"]);
+        let schema = Arc::new(Schema::new(vec![Field::new("host", DataType::Utf8, true)]));
+        let batch = RecordBatch::try_new(schema, vec![Arc::new(host_array) as ArrayRef]).unwrap();
+
+        let result = record_batch_to_result_map(&batch, &["host"], "value");
+        assert!(result.is_err(), "Missing value column should produce error");
+        let err_msg = format!("{}", result.unwrap_err());
+        assert!(
+            err_msg.contains("value"),
+            "Error should mention 'value' column"
+        );
+    }
+
+    #[test]
+    fn test_record_batch_to_result_map_wrong_type() {
+        // value column is Utf8 instead of Float64
+        let host_array = arrow::array::StringArray::from(vec!["host-a"]);
+        let value_array = arrow::array::StringArray::from(vec!["not_a_number"]);
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("host", DataType::Utf8, true),
+            Field::new("value", DataType::Utf8, false),
+        ]));
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(host_array) as ArrayRef,
+                Arc::new(value_array) as ArrayRef,
+            ],
+        )
+        .unwrap();
+
+        let result = record_batch_to_result_map(&batch, &["host"], "value");
+        assert!(result.is_err(), "Wrong type for value column should error");
+    }
+
+    #[test]
+    fn test_count_store_result_rows() {
+        let mut store_result: TimestampedBucketsMap = HashMap::new();
+        let key1 = KeyByLabelValues {
+            labels: vec!["a".to_string()],
+        };
+        let key2 = KeyByLabelValues {
+            labels: vec!["b".to_string()],
+        };
+        store_result.insert(
+            Some(key1),
+            vec![
+                make_bucket(Box::new(SumAccumulator::with_sum(1.0))),
+                make_bucket(Box::new(SumAccumulator::with_sum(2.0))),
+            ],
+        );
+        store_result.insert(
+            Some(key2),
+            vec![make_bucket(Box::new(SumAccumulator::with_sum(3.0)))],
+        );
+        assert_eq!(count_store_result_rows(&store_result), 3);
+
+        let empty: TimestampedBucketsMap = HashMap::new();
+        assert_eq!(count_store_result_rows(&empty), 0);
+    }
+}
diff --git a/QueryEngineRust/src/engines/physical/mod.rs b/QueryEngineRust/src/engines/physical/mod.rs
new file mode 100644
index 0000000..4cc5e54
--- /dev/null
+++ b/QueryEngineRust/src/engines/physical/mod.rs
@@ -0,0 +1,29 @@
+//! Physical Execution Operators for DataFusion
+//!
+//! This module provides physical execution plan operators that implement
+//! DataFusion's ExecutionPlan trait for precomputed summary operations.
+
+pub mod accumulator_serde;
+pub mod conversion;
+pub mod planner;
+pub mod precomputed_summary_read_exec;
+pub mod summary_infer_exec;
+pub mod summary_merge_multiple_exec;
+
+pub use planner::{CustomQueryPlanner, QueryEngineExtensionPlanner};
+pub use precomputed_summary_read_exec::PrecomputedSummaryReadExec;
+pub use summary_infer_exec::SummaryInferExec;
+pub use summary_merge_multiple_exec::SummaryMergeMultipleExec;
+
+use arrow::datatypes::SchemaRef;
+
+/// Format an Arrow schema as a compact string for debug logging.
+/// Example: `{host: Utf8, region: Utf8, sketch: Binary}`
+pub(crate) fn format_schema(schema: &SchemaRef) -> String {
+    let fields: Vec<String> = schema
+        .fields()
+        .iter()
+        .map(|f| format!("{}: {:?}", f.name(), f.data_type()))
+        .collect();
+    format!("{{{}}}", fields.join(", "))
+}
diff --git a/QueryEngineRust/src/engines/physical/planner.rs b/QueryEngineRust/src/engines/physical/planner.rs
new file mode 100644
index 0000000..b0dc7b9
--- /dev/null
+++ b/QueryEngineRust/src/engines/physical/planner.rs
@@ -0,0 +1,180 @@
+//! Extension Planner for QueryEngineRust
+//!
+//! This module provides an ExtensionPlanner implementation that converts
+//! custom logical operators (PrecomputedSummaryRead, SummaryMergeMultiple)
+//! into their physical execution counterparts.
+
+use async_trait::async_trait;
+use datafusion::error::DataFusionError;
+use datafusion::execution::context::SessionState;
+use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNode};
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion::physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner};
+use datafusion_summary_library::{
+    PrecomputedSummaryRead, SketchType, SummaryInfer, SummaryMergeMultiple,
+};
+use std::fmt;
+use std::sync::Arc;
+
+use super::{PrecomputedSummaryReadExec, SummaryInferExec, SummaryMergeMultipleExec};
+use crate::stores::Store;
+
+/// Extension planner that handles custom logical operators for QueryEngineRust.
+///
+/// This planner knows how to convert:
+/// - PrecomputedSummaryRead -> PrecomputedSummaryReadExec
+/// - SummaryMergeMultiple -> SummaryMergeMultipleExec
+///
+/// Note: SummaryInfer is handled by datafusion_summary_library's planner
+pub struct QueryEngineExtensionPlanner {
+    /// Reference to the store for reading precomputed outputs
+    store: Arc<dyn Store>,
+}
+
+impl QueryEngineExtensionPlanner {
+    pub fn new(store: Arc<dyn Store>) -> Self {
+        Self { store }
+    }
+}
+
+#[async_trait]
+impl ExtensionPlanner for QueryEngineExtensionPlanner {
+    async fn plan_extension(
+        &self,
+        _planner: &dyn PhysicalPlanner,
+        node: &dyn UserDefinedLogicalNode,
+        _logical_inputs: &[&LogicalPlan],
+        physical_inputs: &[Arc<dyn ExecutionPlan>],
+        _session_state: &SessionState,
+    ) -> Result<Option<Arc<dyn ExecutionPlan>>, DataFusionError> {
+        // Try to downcast to PrecomputedSummaryRead
+        if let Some(read) = node.as_any().downcast_ref::<PrecomputedSummaryRead>() {
+            return Ok(Some(Arc::new(PrecomputedSummaryReadExec::new(
+                read.clone(),
+                self.store.clone(),
+            ))));
+        }
+
+        // Try to downcast to SummaryMergeMultiple
+        if let Some(merge) = node.as_any().downcast_ref::<SummaryMergeMultiple>() {
+            if physical_inputs.len() != 1 {
+                return Err(DataFusionError::Internal(
+                    "SummaryMergeMultiple expects exactly one input".to_string(),
+                ));
+            }
+            return Ok(Some(Arc::new(SummaryMergeMultipleExec::new(
+                merge.clone(),
+                physical_inputs[0].clone(),
+            ))));
+        }
+
+        // Try to downcast to SummaryInfer
+        if let Some(infer) = node.as_any().downcast_ref::<SummaryInfer>() {
+            // Extract summary_type and sketch_column from the first logical input (values SummaryMergeMultiple)
+            let values_input_plan = _logical_inputs.first().ok_or_else(|| {
+                DataFusionError::Internal("SummaryInfer has no logical inputs".to_string())
+            })?;
+
+            let (summary_type, sketch_column) = extract_merge_info(values_input_plan, "values")?;
+
+            if _logical_inputs.len() == 2 && physical_inputs.len() == 2 {
+                // Dual-input: extract keys summary type from second logical input
+                let keys_input_plan = _logical_inputs[1];
+                let (keys_summary_type, _) = extract_merge_info(keys_input_plan, "keys")?;
+
+                return Ok(Some(Arc::new(SummaryInferExec::new_dual_input(
+                    infer.clone(),
+                    physical_inputs[0].clone(),
+                    physical_inputs[1].clone(),
+                    summary_type,
+                    keys_summary_type,
+                    sketch_column,
+                ))));
+            } else if physical_inputs.len() == 1 {
+                // Single-input (original path)
+                return Ok(Some(Arc::new(SummaryInferExec::new(
+                    infer.clone(),
+                    physical_inputs[0].clone(),
+                    summary_type,
+                    sketch_column,
+                ))));
+            } else {
+                return Err(DataFusionError::Internal(format!(
+                    "SummaryInfer: unexpected number of inputs: logical={}, physical={}",
+                    _logical_inputs.len(),
+                    physical_inputs.len()
+                )));
+            }
+        }
+
+        // Not a node we handle - let other planners try
+        Ok(None)
+    }
+}
+
+/// Extract summary_type and sketch_column from a SummaryMergeMultiple logical plan node.
+fn extract_merge_info(
+    plan: &LogicalPlan,
+    label: &str,
+) -> Result<(SketchType, String), DataFusionError> {
+    match plan {
+        LogicalPlan::Extension(ext) => ext
+            .node
+            .as_any()
+            .downcast_ref::<SummaryMergeMultiple>()
+            .map(|merge| {
+                (
+                    merge.summary_type().clone(),
+                    merge.sketch_column().to_string(),
+                )
+            })
+            .ok_or_else(|| {
+                DataFusionError::Internal(format!(
+                    "SummaryInfer {} input is not SummaryMergeMultiple",
+                    label
+                ))
+            }),
+        _ => Err(DataFusionError::Internal(format!(
+            "SummaryInfer {} input must be an Extension node",
+            label
+        ))),
+    }
+}
+
+/// Custom query planner that combines the default DataFusion planner with
+/// our QueryEngineExtensionPlanner for custom operators.
+pub struct CustomQueryPlanner {
+    store: Arc<dyn Store>,
+}
+
+impl CustomQueryPlanner {
+    pub fn new(store: Arc<dyn Store>) -> Self {
+        Self { store }
+    }
+}
+
+impl fmt::Debug for CustomQueryPlanner {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("CustomQueryPlanner")
+            .field("store", &"<Store>")
+            .finish()
+    }
+}
+
+#[async_trait]
+impl datafusion::execution::context::QueryPlanner for CustomQueryPlanner {
+    async fn create_physical_plan(
+        &self,
+        logical_plan: &LogicalPlan,
+        session_state: &SessionState,
+    ) -> Result<Arc<dyn ExecutionPlan>, DataFusionError> {
+        // Create default planner with our extension planner
+        let extension_planner = QueryEngineExtensionPlanner::new(self.store.clone());
+        let physical_planner =
+            DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(extension_planner)]);
+
+        physical_planner
+            .create_physical_plan(logical_plan, session_state)
+            .await
+    }
+}
diff --git a/QueryEngineRust/src/engines/physical/precomputed_summary_read_exec.rs b/QueryEngineRust/src/engines/physical/precomputed_summary_read_exec.rs
new file mode 100644
index 0000000..fa991ec
--- /dev/null
+++ b/QueryEngineRust/src/engines/physical/precomputed_summary_read_exec.rs
@@ -0,0 +1,169 @@
+//! PrecomputedSummaryReadExec - Physical execution operator for reading precomputed summaries
+//!
+//! This operator reads precomputed aggregates from a Store and produces
+//! RecordBatches with label columns and a serialized sketch column.
+
+use arrow::datatypes::SchemaRef;
+use datafusion::error::DataFusionError;
+use datafusion::execution::TaskContext;
+use datafusion::logical_expr::UserDefinedLogicalNodeCore;
+use datafusion::physical_expr::{EquivalenceProperties, Partitioning};
+use datafusion::physical_plan::{
+    stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan,
+    PlanProperties, SendableRecordBatchStream,
+};
+use datafusion_summary_library::PrecomputedSummaryRead;
+use futures::stream;
+use std::any::Any;
+use std::fmt;
+use std::sync::Arc;
+use std::time::Instant;
+use tracing::debug;
+
+use super::format_schema;
+use crate::engines::physical::conversion::store_result_to_record_batch;
+use crate::stores::Store;
+
+/// Physical execution plan for reading precomputed summaries from a store.
+pub struct PrecomputedSummaryReadExec {
+    /// The logical operator this was created from
+    logical_node: PrecomputedSummaryRead,
+    /// Reference to the store
+    store: Arc<dyn Store>,
+    /// Output schema
+    schema: SchemaRef,
+    /// Plan properties (cached)
+    properties: PlanProperties,
+}
+
+impl PrecomputedSummaryReadExec {
+    pub fn new(logical_node: PrecomputedSummaryRead, store: Arc<dyn Store>) -> Self {
+        // Convert DFSchema to Schema
+        let schema = Arc::new(logical_node.schema().as_ref().into());
+
+        let properties = PlanProperties::new(
+            EquivalenceProperties::new(Arc::clone(&schema)),
+            Partitioning::UnknownPartitioning(1),
+            ExecutionMode::Bounded,
+        );
+
+        Self {
+            logical_node,
+            store,
+            schema,
+            properties,
+        }
+    }
+}
+
+impl fmt::Debug for PrecomputedSummaryReadExec {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("PrecomputedSummaryReadExec")
+            .field("metric", &self.logical_node.metric())
+            .field("aggregation_id", &self.logical_node.aggregation_id())
+            .finish()
+    }
+}
+
+impl DisplayAs for PrecomputedSummaryReadExec {
+    fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "PrecomputedSummaryReadExec: metric={}, agg_id={}, range=[{}, {}]",
+            self.logical_node.metric(),
+            self.logical_node.aggregation_id(),
+            self.logical_node.start_timestamp(),
+            self.logical_node.end_timestamp()
+        )
+    }
+}
+
+impl ExecutionPlan for PrecomputedSummaryReadExec {
+    fn name(&self) -> &str {
+        "PrecomputedSummaryReadExec"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        Arc::clone(&self.schema)
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![] // Leaf node
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>, DataFusionError> {
+        // No children to replace
+        Ok(self)
+    }
+
+    fn execute(
+        &self,
+        _partition: usize,
+        _context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream, DataFusionError> {
+        debug!(
+            metric = %self.logical_node.metric(),
+            aggregation_id = self.logical_node.aggregation_id(),
+            start_timestamp = self.logical_node.start_timestamp(),
+            end_timestamp = self.logical_node.end_timestamp(),
+            is_exact_query = self.logical_node.is_exact_query(),
+            output_schema = %format_schema(&self.schema),
+            output_labels = ?self.logical_node.output_labels(),
+            "PrecomputedSummaryReadExec::execute"
+        );
+
+        // Query the store
+        let store_query_start = Instant::now();
+        let store_result = if self.logical_node.is_exact_query() {
+            self.store.query_precomputed_output_exact(
+                self.logical_node.metric(),
+                self.logical_node.aggregation_id(),
+                self.logical_node.start_timestamp(),
+                self.logical_node.end_timestamp(),
+            )
+        } else {
+            self.store.query_precomputed_output(
+                self.logical_node.metric(),
+                self.logical_node.aggregation_id(),
+                self.logical_node.start_timestamp(),
+                self.logical_node.end_timestamp(),
+            )
+        }
+        .map_err(DataFusionError::External)?;
+        debug!(
+            store_query_ms = format!("{:.2}", store_query_start.elapsed().as_secs_f64() * 1000.0),
+            unique_keys = store_result.len(),
+            "PrecomputedSummaryReadExec store query complete"
+        );
+
+        // Convert to RecordBatch
+        let convert_start = Instant::now();
+        let label_names: Vec<String> = self.logical_node.output_labels().to_vec();
+        let batch = store_result_to_record_batch(&store_result, &label_names)?;
+
+        debug!(
+            convert_ms = format!("{:.2}", convert_start.elapsed().as_secs_f64() * 1000.0),
+            output_rows = batch.num_rows(),
+            output_cols = batch.num_columns(),
+            "PrecomputedSummaryReadExec produced batch"
+        );
+
+        // Create a stream that yields this single batch
+        let schema = self.schema.clone();
+        Ok(Box::pin(RecordBatchStreamAdapter::new(
+            schema,
+            stream::once(async move { Ok(batch) }),
+        )))
+    }
+}
diff --git a/QueryEngineRust/src/engines/physical/summary_infer_exec.rs b/QueryEngineRust/src/engines/physical/summary_infer_exec.rs
new file mode 100644
index 0000000..5288552
--- /dev/null
+++ b/QueryEngineRust/src/engines/physical/summary_infer_exec.rs
@@ -0,0 +1,769 @@
+//! SummaryInferExec - Physical execution operator for extracting values from summaries
+//!
+//! This operator extracts values from serialized accumulators (summaries).
+//!
+//! **Single-population path** (no keys_input):
+//!   Input: rows with [label columns, sketch column]
+//!   Output: rows with [label columns, value column]
+//!
+//! **Multi-population path** (keys_input present):
+//!   Input 0 (values): rows with [spatial_label columns, sketch column]
+//!   Input 1 (keys):   rows with [spatial_label columns, sketch column]
+//!   Output: rows with [spatial_label columns, sub_key columns, value column]
+
+use arrow::array::{ArrayRef, BinaryArray, Float64Builder, StringBuilder};
+use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use arrow::record_batch::RecordBatch;
+use datafusion::error::DataFusionError;
+use datafusion::execution::TaskContext;
+use datafusion::physical_expr::{EquivalenceProperties, Partitioning};
+use datafusion::physical_plan::common::collect;
+use datafusion::physical_plan::{
+    stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan,
+    PlanProperties, SendableRecordBatchStream,
+};
+use datafusion_summary_library::{InferOperation, SketchType, SummaryInfer};
+use futures::stream;
+use promql_utilities::query_logics::enums::Statistic;
+use std::any::Any;
+use std::collections::HashMap;
+use std::fmt;
+use std::sync::Arc;
+use std::time::Instant;
+use tracing::debug;
+
+use super::format_schema;
+use crate::engines::physical::accumulator_serde::{
+    deserialize_accumulator, deserialize_keys_accumulator, deserialize_multiple_subpopulation,
+    deserialize_single_subpopulation,
+};
+
+/// Physical execution plan for extracting values from serialized summaries.
+pub struct SummaryInferExec {
+    /// The logical operator this was created from
+    logical_node: SummaryInfer,
+    /// Input execution plan (values branch)
+    input: Arc<dyn ExecutionPlan>,
+    /// Optional second input (keys branch) for multi-population accumulators
+    keys_input: Option<Arc<dyn ExecutionPlan>>,
+    /// Type of summary being inferred (determines deserialization)
+    summary_type: SketchType,
+    /// Type of the keys summary (only set for dual-input)
+    keys_summary_type: Option<SketchType>,
+    /// Name of the sketch column in the input schema
+    sketch_column: String,
+    /// Output schema (labels + value)
+    schema: SchemaRef,
+    /// Plan properties (cached)
+    properties: PlanProperties,
+}
+
+impl SummaryInferExec {
+    pub fn new(
+        logical_node: SummaryInfer,
+        input: Arc<dyn ExecutionPlan>,
+        summary_type: SketchType,
+        sketch_column: String,
+    ) -> Self {
+        let schema = Self::build_schema(&logical_node, &input, None, &sketch_column);
+
+        let properties = PlanProperties::new(
+            EquivalenceProperties::new(Arc::clone(&schema)),
+            Partitioning::UnknownPartitioning(1),
+            ExecutionMode::Bounded,
+        );
+
+        Self {
+            logical_node,
+            input,
+            keys_input: None,
+            summary_type,
+            keys_summary_type: None,
+            sketch_column,
+            schema,
+            properties,
+        }
+    }
+
+    /// Create a dual-input SummaryInferExec for multi-population accumulators.
+    pub fn new_dual_input(
+        logical_node: SummaryInfer,
+        input: Arc<dyn ExecutionPlan>,
+        keys_input: Arc<dyn ExecutionPlan>,
+        summary_type: SketchType,
+        keys_summary_type: SketchType,
+        sketch_column: String,
+    ) -> Self {
+        let schema = Self::build_schema(&logical_node, &input, Some(&keys_input), &sketch_column);
+
+        let properties = PlanProperties::new(
+            EquivalenceProperties::new(Arc::clone(&schema)),
+            Partitioning::UnknownPartitioning(1),
+            ExecutionMode::Bounded,
+        );
+
+        Self {
+            logical_node,
+            input,
+            keys_input: Some(keys_input),
+            summary_type,
+            keys_summary_type: Some(keys_summary_type),
+            sketch_column,
+            schema,
+            properties,
+        }
+    }
+
+    fn build_schema(
+        logical_node: &SummaryInfer,
+        input: &Arc<dyn ExecutionPlan>,
+        _keys_input: Option<&Arc<dyn ExecutionPlan>>,
+        sketch_column: &str,
+    ) -> SchemaRef {
+        let input_schema = input.schema();
+
+        // Build output schema: label columns from input (minus sketch) ...
+        let mut fields: Vec<Field> = input_schema
+            .fields()
+            .iter()
+            .filter(|f| f.name() != sketch_column)
+            .map(|f| f.as_ref().clone())
+            .collect();
+
+        // ... plus sub-key columns for dual-input (group_key_columns)
+        for key_col in &logical_node.group_key_columns {
+            fields.push(Field::new(key_col, DataType::Utf8, true));
+        }
+
+        // ... plus output value columns (one per operation)
+        for output_name in &logical_node.output_names {
+            fields.push(Field::new(output_name, DataType::Float64, false));
+        }
+
+        Arc::new(Schema::new(fields))
+    }
+
+    /// Map InferOperation to Statistic for accumulator query
+    fn infer_op_to_statistic(op: &InferOperation) -> Statistic {
+        match op {
+            InferOperation::ExtractSum => Statistic::Sum,
+            InferOperation::ExtractCount => Statistic::Count,
+            InferOperation::ExtractMin => Statistic::Min,
+            InferOperation::ExtractMax => Statistic::Max,
+            InferOperation::ExtractIncrease => Statistic::Increase,
+            InferOperation::ExtractRate => Statistic::Rate,
+            InferOperation::CountDistinct => Statistic::Cardinality,
+            InferOperation::Quantile(_) | InferOperation::Median => Statistic::Quantile,
+            InferOperation::TopK(_) => Statistic::Topk,
+            // All other operations - use Count as fallback
+            _ => Statistic::Count,
+        }
+    }
+
+    /// Extract query kwargs from an InferOperation.
+    ///
+    /// Some operations embed parameters (e.g. Quantile embeds the quantile value,
+    /// TopK embeds k) that accumulators need via the kwargs HashMap.
+    fn infer_op_to_kwargs(op: &InferOperation) -> Option<HashMap<String, String>> {
+        match op {
+            InferOperation::Quantile(q_u16) => {
+                let q = *q_u16 as f64 / 10000.0;
+                let mut kwargs = HashMap::new();
+                kwargs.insert("quantile".to_string(), q.to_string());
+                Some(kwargs)
+            }
+            InferOperation::Median => {
+                let mut kwargs = HashMap::new();
+                kwargs.insert("quantile".to_string(), "0.5".to_string());
+                Some(kwargs)
+            }
+            InferOperation::TopK(k) => {
+                let mut kwargs = HashMap::new();
+                kwargs.insert("k".to_string(), k.to_string());
+                Some(kwargs)
+            }
+            _ => None,
+        }
+    }
+}
+
+impl fmt::Debug for SummaryInferExec {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("SummaryInferExec")
+            .field("operations", &self.logical_node.operations)
+            .field("has_keys_input", &self.keys_input.is_some())
+            .finish()
+    }
+}
+
+impl DisplayAs for SummaryInferExec {
+    fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "SummaryInferExec: operations={:?}, dual_input={}",
+            self.logical_node.operations,
+            self.keys_input.is_some()
+        )
+    }
+}
+
+impl ExecutionPlan for SummaryInferExec {
+    fn name(&self) -> &str {
+        "SummaryInferExec"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        Arc::clone(&self.schema)
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        let mut children = vec![&self.input];
+        if let Some(ref keys_input) = self.keys_input {
+            children.push(keys_input);
+        }
+        children
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>, DataFusionError> {
+        match (children.len(), &self.keys_input) {
+            (1, None) => Ok(Arc::new(Self::new(
+                self.logical_node.clone(),
+                children[0].clone(),
+                self.summary_type.clone(),
+                self.sketch_column.clone(),
+            ))),
+            (2, Some(_)) => Ok(Arc::new(Self::new_dual_input(
+                self.logical_node.clone(),
+                children[0].clone(),
+                children[1].clone(),
+                self.summary_type.clone(),
+                self.keys_summary_type.clone().unwrap(),
+                self.sketch_column.clone(),
+            ))),
+            _ => Err(DataFusionError::Internal(format!(
+                "SummaryInferExec: expected {} children, got {}",
+                if self.keys_input.is_some() { 2 } else { 1 },
+                children.len()
+            ))),
+        }
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream, DataFusionError> {
+        let schema = self.schema.clone();
+        let schema_for_stream = schema.clone();
+        let logical_node = self.logical_node.clone();
+        let summary_type = self.summary_type.clone();
+        let sketch_column = self.sketch_column.clone();
+
+        debug!(
+            input_schema = %format_schema(&self.input.schema()),
+            output_schema = %format_schema(&self.schema),
+            operations = ?self.logical_node.operations,
+            sketch_column = %self.sketch_column,
+            has_keys_input = self.keys_input.is_some(),
+            "SummaryInferExec::execute"
+        );
+
+        if let Some(ref keys_input) = self.keys_input {
+            // Multi-population path
+            let values_stream = self.input.execute(partition, context.clone())?;
+            let keys_stream = keys_input.execute(partition, context)?;
+            let keys_summary_type = self.keys_summary_type.clone().unwrap();
+
+            let output_stream = async move {
+                let collect_start = Instant::now();
+                let values_batches = collect(values_stream).await?;
+                let keys_batches = collect(keys_stream).await?;
+                let values_rows: usize = values_batches.iter().map(|b| b.num_rows()).sum();
+                let keys_rows: usize = keys_batches.iter().map(|b| b.num_rows()).sum();
+                debug!(
+                    collect_ms = format!("{:.2}", collect_start.elapsed().as_secs_f64() * 1000.0),
+                    values_rows, keys_rows, "SummaryInferExec collected dual-input batches"
+                );
+
+                let infer_start = Instant::now();
+                let result = process_dual_input(
+                    &values_batches,
+                    &keys_batches,
+                    &logical_node,
+                    &summary_type,
+                    &keys_summary_type,
+                    &sketch_column,
+                    &schema,
+                )?;
+                debug!(
+                    infer_ms = format!("{:.2}", infer_start.elapsed().as_secs_f64() * 1000.0),
+                    output_rows = result.num_rows(),
+                    "SummaryInferExec dual-input infer complete"
+                );
+                Ok(result)
+            };
+
+            Ok(Box::pin(RecordBatchStreamAdapter::new(
+                schema_for_stream,
+                stream::once(output_stream),
+            )))
+        } else {
+            // Single-population path (original behavior)
+            let input_stream = self.input.execute(partition, context)?;
+
+            let output_stream = async move {
+                let collect_start = Instant::now();
+                let batches = collect(input_stream).await?;
+                let total_input_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
+                debug!(
+                    collect_ms = format!("{:.2}", collect_start.elapsed().as_secs_f64() * 1000.0),
+                    total_input_rows,
+                    num_batches = batches.len(),
+                    "SummaryInferExec collected input (single-pop)"
+                );
+
+                let mut all_label_values: Vec<Vec<String>> = Vec::new();
+                let mut all_result_values: Vec<f64> = Vec::new();
+
+                let self_keyed = is_self_keyed_multi_pop(&summary_type);
+
+                let infer_start = Instant::now();
+                for batch in &batches {
+                    if self_keyed {
+                        process_self_keyed_multi_pop_batch(
+                            &mut all_label_values,
+                            &mut all_result_values,
+                            batch,
+                            &logical_node,
+                            &summary_type,
+                            &sketch_column,
+                        )?;
+                    } else {
+                        process_single_pop_batch(
+                            &mut all_label_values,
+                            &mut all_result_values,
+                            batch,
+                            &logical_node,
+                            &summary_type,
+                            &sketch_column,
+                        )?;
+                    }
+                }
+                debug!(
+                    infer_ms = format!("{:.2}", infer_start.elapsed().as_secs_f64() * 1000.0),
+                    output_rows = all_result_values.len(),
+                    self_keyed,
+                    "SummaryInferExec infer complete (single-pop)"
+                );
+
+                build_output_batch(&all_label_values, &all_result_values, &schema)
+            };
+
+            Ok(Box::pin(RecordBatchStreamAdapter::new(
+                schema_for_stream,
+                stream::once(output_stream),
+            )))
+        }
+    }
+}
+
+// ============================================================================
+// Single-population processing (unchanged logic)
+// ============================================================================
+
+/// Process a batch and extract values from single-population accumulators
+fn process_single_pop_batch(
+    all_label_values: &mut Vec<Vec<String>>,
+    all_result_values: &mut Vec<f64>,
+    batch: &RecordBatch,
+    logical_node: &SummaryInfer,
+    summary_type: &SketchType,
+    sketch_column: &str,
+) -> Result<(), DataFusionError> {
+    let sketch_idx = find_sketch_column_index(batch, sketch_column)?;
+
+    let sketch_array = batch
+        .column(sketch_idx)
+        .as_any()
+        .downcast_ref::<BinaryArray>()
+        .ok_or_else(|| DataFusionError::Internal("Sketch column is not Binary".to_string()))?;
+
+    let label_indices: Vec<usize> = (0..batch.num_columns())
+        .filter(|&i| i != sketch_idx)
+        .collect();
+
+    let operation = logical_node
+        .operations
+        .first()
+        .ok_or_else(|| DataFusionError::Internal("SummaryInfer has no operations".to_string()))?;
+
+    let statistic = SummaryInferExec::infer_op_to_statistic(operation);
+    let query_kwargs = SummaryInferExec::infer_op_to_kwargs(operation);
+
+    for row in 0..batch.num_rows() {
+        let label_values = extract_label_values(batch, &label_indices, row);
+        let sketch_bytes = sketch_array.value(row);
+
+        let accumulator =
+            deserialize_single_subpopulation(sketch_bytes, summary_type).map_err(|e| {
+                DataFusionError::Internal(format!("Failed to deserialize accumulator: {}", e))
+            })?;
+
+        let value = accumulator
+            .query(statistic, query_kwargs.as_ref())
+            .map_err(|e| {
+                DataFusionError::Internal(format!("Failed to query accumulator: {}", e))
+            })?;
+
+        all_label_values.push(label_values);
+        all_result_values.push(value);
+    }
+
+    Ok(())
+}
+
+// ============================================================================
+// Self-keyed multi-population processing (single-input, keys from accumulator)
+// ============================================================================
+
+/// Returns true if the SketchType is a multi-population accumulator that
+/// carries its own keys (via `get_keys()`), so it can be processed in
+/// single-input mode without a separate keys stream.
+fn is_self_keyed_multi_pop(summary_type: &SketchType) -> bool {
+    matches!(
+        summary_type,
+        SketchType::MultipleIncrease | SketchType::MultipleSum | SketchType::MultipleMinMax
+    )
+}
+
+/// Process a batch of self-keyed multi-population accumulators.
+///
+/// For each row (spatial group):
+///   1. Deserialize as AggregateCore to call get_keys()
+///   2. Deserialize as MultipleSubpopulationAggregate to call query(stat, key)
+///   3. Emit one output row per sub-key
+fn process_self_keyed_multi_pop_batch(
+    all_label_values: &mut Vec<Vec<String>>,
+    all_result_values: &mut Vec<f64>,
+    batch: &RecordBatch,
+    logical_node: &SummaryInfer,
+    summary_type: &SketchType,
+    sketch_column: &str,
+) -> Result<(), DataFusionError> {
+    let sketch_idx = find_sketch_column_index(batch, sketch_column)?;
+
+    let sketch_array = batch
+        .column(sketch_idx)
+        .as_any()
+        .downcast_ref::<BinaryArray>()
+        .ok_or_else(|| DataFusionError::Internal("Sketch column is not Binary".to_string()))?;
+
+    let label_indices: Vec<usize> = (0..batch.num_columns())
+        .filter(|&i| i != sketch_idx)
+        .collect();
+
+    let operation = logical_node
+        .operations
+        .first()
+        .ok_or_else(|| DataFusionError::Internal("SummaryInfer has no operations".to_string()))?;
+
+    let statistic = SummaryInferExec::infer_op_to_statistic(operation);
+    let query_kwargs = SummaryInferExec::infer_op_to_kwargs(operation);
+
+    let num_sub_key_cols = logical_node.group_key_columns.len();
+
+    for row in 0..batch.num_rows() {
+        let spatial_labels = extract_label_values(batch, &label_indices, row);
+        let sketch_bytes = sketch_array.value(row);
+
+        // Get keys from the accumulator itself
+        let acc = deserialize_accumulator(sketch_bytes, summary_type).map_err(|e| {
+            DataFusionError::Internal(format!("Failed to deserialize accumulator: {}", e))
+        })?;
+        let sub_keys = acc.get_keys().unwrap_or_default();
+
+        // Deserialize as multi-pop for querying
+        let multi_acc =
+            deserialize_multiple_subpopulation(sketch_bytes, summary_type).map_err(|e| {
+                DataFusionError::Internal(format!(
+                    "Failed to deserialize multi-pop accumulator: {}",
+                    e
+                ))
+            })?;
+
+        for sub_key in &sub_keys {
+            let value = multi_acc
+                .query(statistic, sub_key, query_kwargs.as_ref())
+                .map_err(|e| {
+                    DataFusionError::Internal(format!(
+                        "Failed to query multi-pop accumulator for key {:?}: {}",
+                        sub_key, e
+                    ))
+                })?;
+
+            let mut row_labels = spatial_labels.clone();
+            for i in 0..num_sub_key_cols {
+                if i < sub_key.labels.len() {
+                    row_labels.push(sub_key.labels[i].clone());
+                } else {
+                    row_labels.push(String::new());
+                }
+            }
+
+            all_label_values.push(row_labels);
+            all_result_values.push(value);
+        }
+    }
+
+    Ok(())
+}
+
+// ============================================================================
+// Multi-population (dual-input) processing
+// ============================================================================
+
+/// Process dual-input: values + keys batches for multi-population accumulators.
+///
+/// For each spatial group (row) in the values stream:
+///   1. Deserialize the value sketch as MultipleSubpopulationAggregate
+///   2. Find the matching spatial group in the keys stream
+///   3. Deserialize the keys accumulator, call get_keys() to enumerate sub-keys
+///   4. For each sub-key, call value_acc.query(statistic, sub_key) -> one output row
+fn process_dual_input(
+    values_batches: &[RecordBatch],
+    keys_batches: &[RecordBatch],
+    logical_node: &SummaryInfer,
+    values_summary_type: &SketchType,
+    keys_summary_type: &SketchType,
+    sketch_column: &str,
+    schema: &SchemaRef,
+) -> Result<RecordBatch, DataFusionError> {
+    let operation = logical_node
+        .operations
+        .first()
+        .ok_or_else(|| DataFusionError::Internal("SummaryInfer has no operations".to_string()))?;
+    let statistic = SummaryInferExec::infer_op_to_statistic(operation);
+    let query_kwargs = SummaryInferExec::infer_op_to_kwargs(operation);
+
+    // Build a lookup from spatial labels -> keys sketch bytes
+    // Key: spatial label values (as Vec<String>), Value: serialized keys accumulator bytes
+    let keys_lookup = build_keys_lookup(keys_batches, sketch_column)?;
+
+    let num_sub_key_cols = logical_node.group_key_columns.len();
+
+    let mut all_label_values: Vec<Vec<String>> = Vec::new();
+    let mut all_result_values: Vec<f64> = Vec::new();
+
+    for batch in values_batches {
+        let sketch_idx = find_sketch_column_index(batch, sketch_column)?;
+
+        let sketch_array = batch
+            .column(sketch_idx)
+            .as_any()
+            .downcast_ref::<BinaryArray>()
+            .ok_or_else(|| {
+                DataFusionError::Internal("Values sketch column is not Binary".to_string())
+            })?;
+
+        let label_indices: Vec<usize> = (0..batch.num_columns())
+            .filter(|&i| i != sketch_idx)
+            .collect();
+
+        for row in 0..batch.num_rows() {
+            let spatial_labels = extract_label_values(batch, &label_indices, row);
+            let value_sketch_bytes = sketch_array.value(row);
+
+            // Deserialize the value sketch as MultipleSubpopulationAggregate
+            let value_acc =
+                deserialize_multiple_subpopulation(value_sketch_bytes, values_summary_type)
+                    .map_err(|e| {
+                        DataFusionError::Internal(format!(
+                            "Failed to deserialize multi-pop value accumulator: {}",
+                            e
+                        ))
+                    })?;
+
+            // Find matching keys accumulator
+            let keys_bytes = keys_lookup.get(&spatial_labels).ok_or_else(|| {
+                DataFusionError::Internal(format!(
+                    "No keys accumulator found for spatial group: {:?}",
+                    spatial_labels
+                ))
+            })?;
+
+            let keys_acc =
+                deserialize_keys_accumulator(keys_bytes, keys_summary_type).map_err(|e| {
+                    DataFusionError::Internal(format!(
+                        "Failed to deserialize keys accumulator: {}",
+                        e
+                    ))
+                })?;
+
+            let sub_keys = keys_acc.get_keys().unwrap_or_default();
+
+            debug!(
+                spatial_labels = ?spatial_labels,
+                num_sub_keys = sub_keys.len(),
+                "Processing multi-pop spatial group"
+            );
+
+            // For each sub-key, query the value accumulator
+            for sub_key in &sub_keys {
+                let value = value_acc
+                    .query(statistic, sub_key, query_kwargs.as_ref())
+                    .map_err(|e| {
+                        DataFusionError::Internal(format!(
+                            "Failed to query multi-pop accumulator for key {:?}: {}",
+                            sub_key, e
+                        ))
+                    })?;
+
+                // Output row: [spatial_labels..., sub_key_labels..., value]
+                let mut row_labels = spatial_labels.clone();
+                // Append sub-key label values
+                // sub_key.labels is Vec<String> of values
+                for i in 0..num_sub_key_cols {
+                    if i < sub_key.labels.len() {
+                        row_labels.push(sub_key.labels[i].clone());
+                    } else {
+                        row_labels.push(String::new());
+                    }
+                }
+
+                all_label_values.push(row_labels);
+                all_result_values.push(value);
+            }
+        }
+    }
+
+    debug!(
+        output_rows = all_result_values.len(),
+        "SummaryInferExec building output (multi-pop)"
+    );
+
+    build_output_batch(&all_label_values, &all_result_values, schema)
+}
+
+/// Build a lookup map from spatial label values to keys sketch bytes.
+fn build_keys_lookup(
+    keys_batches: &[RecordBatch],
+    sketch_column: &str,
+) -> Result<HashMap<Vec<String>, Vec<u8>>, DataFusionError> {
+    let mut lookup: HashMap<Vec<String>, Vec<u8>> = HashMap::new();
+
+    for batch in keys_batches {
+        let sketch_idx = find_sketch_column_index(batch, sketch_column)?;
+
+        let sketch_array = batch
+            .column(sketch_idx)
+            .as_any()
+            .downcast_ref::<BinaryArray>()
+            .ok_or_else(|| {
+                DataFusionError::Internal("Keys sketch column is not Binary".to_string())
+            })?;
+
+        let label_indices: Vec<usize> = (0..batch.num_columns())
+            .filter(|&i| i != sketch_idx)
+            .collect();
+
+        for row in 0..batch.num_rows() {
+            let label_values = extract_label_values(batch, &label_indices, row);
+            let sketch_bytes = sketch_array.value(row).to_vec();
+            lookup.insert(label_values, sketch_bytes);
+        }
+    }
+
+    Ok(lookup)
+}
+
+// ============================================================================
+// Common helpers
+// ============================================================================
+
+/// Find the index of the sketch column in a batch
+fn find_sketch_column_index(
+    batch: &RecordBatch,
+    sketch_column: &str,
+) -> Result<usize, DataFusionError> {
+    batch
+        .schema()
+        .fields()
+        .iter()
+        .position(|f| f.name() == sketch_column)
+        .ok_or_else(|| {
+            DataFusionError::Internal(format!(
+                "Sketch column '{}' not found in batch schema: {:?}",
+                sketch_column,
+                batch
+                    .schema()
+                    .fields()
+                    .iter()
+                    .map(|f| f.name())
+                    .collect::<Vec<_>>()
+            ))
+        })
+}
+
+/// Extract label values from a batch row
+fn extract_label_values(batch: &RecordBatch, label_indices: &[usize], row: usize) -> Vec<String> {
+    label_indices
+        .iter()
+        .map(|&idx| {
+            let col = batch.column(idx);
+            if let Some(str_array) = col.as_any().downcast_ref::<arrow::array::StringArray>() {
+                str_array.value(row).to_string()
+            } else {
+                String::new()
+            }
+        })
+        .collect()
+}
+
+/// Build output batch from extracted values
+fn build_output_batch(
+    all_label_values: &[Vec<String>],
+    all_result_values: &[f64],
+    schema: &SchemaRef,
+) -> Result<RecordBatch, DataFusionError> {
+    // Get number of label columns (schema fields minus the value column)
+    let num_label_cols = schema.fields().len() - 1;
+
+    // Build label column builders
+    let mut label_builders: Vec<StringBuilder> =
+        (0..num_label_cols).map(|_| StringBuilder::new()).collect();
+
+    // Build value column
+    let mut value_builder = Float64Builder::new();
+
+    for (label_values, value) in all_label_values.iter().zip(all_result_values.iter()) {
+        // Add label values
+        for (i, label_value) in label_values.iter().enumerate() {
+            if i < label_builders.len() {
+                label_builders[i].append_value(label_value);
+            }
+        }
+        // Add value
+        value_builder.append_value(*value);
+    }
+
+    // Build columns
+    let mut columns: Vec<ArrayRef> = label_builders
+        .iter_mut()
+        .map(|b| Arc::new(b.finish()) as ArrayRef)
+        .collect();
+    columns.push(Arc::new(value_builder.finish()));
+
+    RecordBatch::try_new(schema.clone(), columns)
+        .map_err(|e| DataFusionError::Internal(format!("Failed to build output batch: {}", e)))
+}
diff --git a/QueryEngineRust/src/engines/physical/summary_merge_multiple_exec.rs b/QueryEngineRust/src/engines/physical/summary_merge_multiple_exec.rs
new file mode 100644
index 0000000..a58c984
--- /dev/null
+++ b/QueryEngineRust/src/engines/physical/summary_merge_multiple_exec.rs
@@ -0,0 +1,556 @@
+//! SummaryMergeMultipleExec - Physical execution operator for merging summaries
+//!
+//! This operator merges multiple summaries with the same group key into one.
+//! Input: multiple rows per group key with serialized accumulators
+//! Output: one row per group key with merged accumulator
+
+use arrow::array::{ArrayRef, BinaryArray, BinaryBuilder, StringBuilder};
+use arrow::datatypes::SchemaRef;
+use arrow::record_batch::RecordBatch;
+use datafusion::error::DataFusionError;
+use datafusion::execution::TaskContext;
+use datafusion::physical_expr::{EquivalenceProperties, Partitioning};
+use datafusion::physical_plan::common::collect;
+use datafusion::physical_plan::{
+    stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan,
+    PlanProperties, SendableRecordBatchStream,
+};
+use datafusion_summary_library::SummaryMergeMultiple;
+use futures::stream;
+use std::any::Any;
+use std::collections::HashMap;
+use std::fmt;
+use std::sync::Arc;
+use std::time::Instant;
+use tracing::debug;
+
+use super::format_schema;
+use crate::engines::physical::accumulator_serde::{
+    deserialize_accumulator, serialize_accumulator_arroyo,
+};
+
+/// Physical execution plan for merging multiple summaries by group key.
+pub struct SummaryMergeMultipleExec {
+    /// The logical operator this was created from
+    logical_node: SummaryMergeMultiple,
+    /// Input execution plan
+    input: Arc<dyn ExecutionPlan>,
+    /// Output schema (same as input)
+    schema: SchemaRef,
+    /// Plan properties (cached)
+    properties: PlanProperties,
+}
+
+impl SummaryMergeMultipleExec {
+    pub fn new(logical_node: SummaryMergeMultiple, input: Arc<dyn ExecutionPlan>) -> Self {
+        let schema = input.schema();
+
+        let properties = PlanProperties::new(
+            EquivalenceProperties::new(Arc::clone(&schema)),
+            Partitioning::UnknownPartitioning(1),
+            ExecutionMode::Bounded,
+        );
+
+        Self {
+            logical_node,
+            input,
+            schema,
+            properties,
+        }
+    }
+}
+
+impl fmt::Debug for SummaryMergeMultipleExec {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("SummaryMergeMultipleExec")
+            .field("group_by", &self.logical_node.group_by())
+            .field("summary_type", &self.logical_node.summary_type())
+            .finish()
+    }
+}
+
+impl DisplayAs for SummaryMergeMultipleExec {
+    fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "SummaryMergeMultipleExec: group_by=[{}], type={}",
+            self.logical_node.group_by().join(", "),
+            self.logical_node.summary_type()
+        )
+    }
+}
+
+impl ExecutionPlan for SummaryMergeMultipleExec {
+    fn name(&self) -> &str {
+        "SummaryMergeMultipleExec"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        Arc::clone(&self.schema)
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>, DataFusionError> {
+        if children.len() != 1 {
+            return Err(DataFusionError::Internal(
+                "SummaryMergeMultipleExec expects exactly one child".to_string(),
+            ));
+        }
+        Ok(Arc::new(Self::new(
+            self.logical_node.clone(),
+            children[0].clone(),
+        )))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream, DataFusionError> {
+        let input_stream = self.input.execute(partition, context)?;
+        let schema = self.schema.clone();
+        let schema_for_stream = schema.clone();
+        let logical_node = self.logical_node.clone();
+
+        debug!(
+            input_schema = %format_schema(&self.input.schema()),
+            output_schema = %format_schema(&self.schema),
+            group_by = ?self.logical_node.group_by(),
+            sketch_column = %self.logical_node.sketch_column(),
+            summary_type = ?self.logical_node.summary_type(),
+            "SummaryMergeMultipleExec::execute"
+        );
+
+        // Use an async block to process all batches and merge
+        let output_stream = async move {
+            let mut groups: HashMap<Vec<String>, (Vec<String>, Vec<u8>)> = HashMap::new();
+
+            // Collect all batches from input using datafusion's collect helper
+            let collect_start = Instant::now();
+            let batches = collect(input_stream).await?;
+            let total_input_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
+            debug!(
+                collect_ms = format!("{:.2}", collect_start.elapsed().as_secs_f64() * 1000.0),
+                total_input_rows,
+                num_batches = batches.len(),
+                "SummaryMergeMultipleExec collected input"
+            );
+
+            // Process each batch
+            let merge_start = Instant::now();
+            for batch in &batches {
+                process_batch(&mut groups, batch, &logical_node)?;
+            }
+            debug!(
+                merge_ms = format!("{:.2}", merge_start.elapsed().as_secs_f64() * 1000.0),
+                output_groups = groups.len(),
+                "SummaryMergeMultipleExec merged into groups"
+            );
+
+            // Build output batch
+            build_output_batch(&groups, &logical_node, &schema)
+        };
+
+        // Convert to stream
+        Ok(Box::pin(RecordBatchStreamAdapter::new(
+            schema_for_stream,
+            stream::once(output_stream),
+        )))
+    }
+}
+
+/// Process a batch and accumulate into groups
+fn process_batch(
+    groups: &mut HashMap<Vec<String>, (Vec<String>, Vec<u8>)>,
+    batch: &RecordBatch,
+    logical_node: &SummaryMergeMultiple,
+) -> Result<(), DataFusionError> {
+    let group_by_cols = logical_node.group_by();
+    let sketch_col_name = logical_node.sketch_column();
+
+    // Find column indices
+    let group_indices: Vec<usize> = group_by_cols
+        .iter()
+        .filter_map(|name| {
+            batch
+                .schema()
+                .fields()
+                .iter()
+                .position(|f| f.name() == name)
+        })
+        .collect();
+
+    let sketch_idx = batch
+        .schema()
+        .fields()
+        .iter()
+        .position(|f| f.name() == sketch_col_name)
+        .ok_or_else(|| {
+            DataFusionError::Internal(format!("Sketch column '{}' not found", sketch_col_name))
+        })?;
+
+    let sketch_array = batch
+        .column(sketch_idx)
+        .as_any()
+        .downcast_ref::<BinaryArray>()
+        .ok_or_else(|| DataFusionError::Internal("Sketch column is not Binary".to_string()))?;
+
+    for row in 0..batch.num_rows() {
+        // Extract group key
+        let group_key: Vec<String> = group_indices
+            .iter()
+            .map(|&idx| {
+                let col = batch.column(idx);
+                if let Some(str_array) = col.as_any().downcast_ref::<arrow::array::StringArray>() {
+                    str_array.value(row).to_string()
+                } else {
+                    String::new()
+                }
+            })
+            .collect();
+
+        // Get sketch bytes
+        let sketch_bytes = sketch_array.value(row);
+
+        // Merge with existing group or insert new
+        if let Some((_, existing_bytes)) = groups.get_mut(&group_key) {
+            // Deserialize both accumulators and merge
+            let existing_acc =
+                deserialize_accumulator(existing_bytes, logical_node.summary_type())?;
+            let new_acc = deserialize_accumulator(sketch_bytes, logical_node.summary_type())?;
+
+            // Merge accumulators
+            let merged = existing_acc.merge_with(new_acc.as_ref()).map_err(|e| {
+                DataFusionError::Internal(format!("Failed to merge accumulators: {}", e))
+            })?;
+
+            // Serialize merged accumulator in arroyo format for downstream deserialization
+            *existing_bytes = serialize_accumulator_arroyo(merged.as_ref());
+        } else {
+            // First time seeing this group
+            groups.insert(group_key.clone(), (group_key, sketch_bytes.to_vec()));
+        }
+    }
+
+    Ok(())
+}
+
+/// Build output batch from merged groups (public for testing)
+pub(crate) fn build_output_batch(
+    groups: &HashMap<Vec<String>, (Vec<String>, Vec<u8>)>,
+    logical_node: &SummaryMergeMultiple,
+    schema: &SchemaRef,
+) -> Result<RecordBatch, DataFusionError> {
+    let group_by_cols = logical_node.group_by();
+
+    // Build arrays for each column
+    let mut label_builders: Vec<StringBuilder> =
+        group_by_cols.iter().map(|_| StringBuilder::new()).collect();
+    let mut sketch_builder = BinaryBuilder::new();
+
+    for (label_values, bytes) in groups.values() {
+        // Add label values
+        for (i, value) in label_values.iter().enumerate() {
+            if i < label_builders.len() {
+                label_builders[i].append_value(value);
+            }
+        }
+        // Add sketch bytes
+        sketch_builder.append_value(bytes);
+    }
+
+    // Build columns
+    let mut columns: Vec<ArrayRef> = label_builders
+        .iter_mut()
+        .map(|b| Arc::new(b.finish()) as ArrayRef)
+        .collect();
+    columns.push(Arc::new(sketch_builder.finish()));
+
+    RecordBatch::try_new(schema.clone(), columns)
+        .map_err(|e| DataFusionError::Internal(format!("Failed to build output batch: {}", e)))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::data_model::KeyByLabelValues;
+    use crate::engines::physical::accumulator_serde::serialize_accumulator_arroyo;
+    use crate::precompute_operators::{
+        DatasketchesKLLAccumulator, SetAggregatorAccumulator, SumAccumulator,
+    };
+    use arrow::array::StringArray;
+    use arrow::datatypes::{DataType, Field, Schema};
+    use datafusion_summary_library::SketchType;
+
+    /// Helper to create a RecordBatch with [host (Utf8), sketch (Binary)]
+    fn make_batch(rows: Vec<(&str, Vec<u8>)>) -> RecordBatch {
+        let mut host_builder = StringBuilder::new();
+        let mut sketch_builder = BinaryBuilder::new();
+        for (host, sketch_bytes) in &rows {
+            host_builder.append_value(host);
+            sketch_builder.append_value(sketch_bytes);
+        }
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("host", DataType::Utf8, true),
+            Field::new("sketch", DataType::Binary, false),
+        ]));
+        RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(host_builder.finish()) as ArrayRef,
+                Arc::new(sketch_builder.finish()) as ArrayRef,
+            ],
+        )
+        .unwrap()
+    }
+
+    /// Helper to create a SummaryMergeMultiple logical node for testing
+    fn make_logical_node(summary_type: SketchType) -> SummaryMergeMultiple {
+        use arrow::datatypes::DataType as DT;
+        use datafusion::common::DFSchema;
+        use datafusion::logical_expr::{Extension, LogicalPlan};
+        use datafusion_summary_library::PrecomputedSummaryRead;
+
+        let fields = vec![
+            (None, Arc::new(Field::new("host", DT::Utf8, true))),
+            (None, Arc::new(Field::new("sketch", DT::Binary, false))),
+        ];
+        let schema = Arc::new(DFSchema::new_with_metadata(fields, Default::default()).unwrap());
+        let read = PrecomputedSummaryRead::new(
+            "test".to_string(),
+            1,
+            0,
+            1000,
+            true,
+            vec!["host".to_string()],
+            summary_type.clone(),
+            schema,
+        );
+        let read_plan = LogicalPlan::Extension(Extension {
+            node: Arc::new(read),
+        });
+        SummaryMergeMultiple::new(
+            Arc::new(read_plan),
+            vec!["host".to_string()],
+            "sketch".to_string(),
+            summary_type,
+        )
+    }
+
+    #[test]
+    fn test_merge_single_row_passthrough() {
+        let acc = SumAccumulator::with_sum(42.0);
+        let bytes = serialize_accumulator_arroyo(&acc);
+        let batch = make_batch(vec![("host-a", bytes.clone())]);
+
+        let logical = make_logical_node(SketchType::Sum);
+        let mut groups: HashMap<Vec<String>, (Vec<String>, Vec<u8>)> = HashMap::new();
+        process_batch(&mut groups, &batch, &logical).unwrap();
+
+        assert_eq!(groups.len(), 1);
+        // The single row should pass through unchanged
+        let (_, merged_bytes) = groups.values().next().unwrap();
+        let restored = deserialize_accumulator(merged_bytes, &SketchType::Sum).unwrap();
+        assert_eq!(restored.get_accumulator_type(), "SumAccumulator");
+    }
+
+    #[test]
+    fn test_merge_two_sums_same_group() {
+        let acc1 = SumAccumulator::with_sum(50.0);
+        let acc2 = SumAccumulator::with_sum(50.0);
+        let bytes1 = serialize_accumulator_arroyo(&acc1);
+        let bytes2 = serialize_accumulator_arroyo(&acc2);
+        let batch = make_batch(vec![("host-a", bytes1), ("host-a", bytes2)]);
+
+        let logical = make_logical_node(SketchType::Sum);
+        let mut groups: HashMap<Vec<String>, (Vec<String>, Vec<u8>)> = HashMap::new();
+        process_batch(&mut groups, &batch, &logical).unwrap();
+
+        assert_eq!(groups.len(), 1);
+        let (_, merged_bytes) = groups.values().next().unwrap();
+        let restored =
+            crate::engines::physical::accumulator_serde::deserialize_single_subpopulation(
+                merged_bytes,
+                &SketchType::Sum,
+            )
+            .unwrap();
+        let value = restored
+            .query(promql_utilities::query_logics::enums::Statistic::Sum, None)
+            .unwrap();
+        assert!(
+            (value - 100.0).abs() < 1e-10,
+            "Merged sum should be 100.0, got {}",
+            value
+        );
+    }
+
+    #[test]
+    fn test_merge_three_sums_associativity() {
+        let bytes: Vec<Vec<u8>> = [30.0, 40.0, 30.0]
+            .iter()
+            .map(|v| serialize_accumulator_arroyo(&SumAccumulator::with_sum(*v)))
+            .collect();
+        let batch = make_batch(vec![
+            ("host-a", bytes[0].clone()),
+            ("host-a", bytes[1].clone()),
+            ("host-a", bytes[2].clone()),
+        ]);
+
+        let logical = make_logical_node(SketchType::Sum);
+        let mut groups: HashMap<Vec<String>, (Vec<String>, Vec<u8>)> = HashMap::new();
+        process_batch(&mut groups, &batch, &logical).unwrap();
+
+        assert_eq!(groups.len(), 1);
+        let (_, merged_bytes) = groups.values().next().unwrap();
+        let restored =
+            crate::engines::physical::accumulator_serde::deserialize_single_subpopulation(
+                merged_bytes,
+                &SketchType::Sum,
+            )
+            .unwrap();
+        let value = restored
+            .query(promql_utilities::query_logics::enums::Statistic::Sum, None)
+            .unwrap();
+        assert!(
+            (value - 100.0).abs() < 1e-10,
+            "30+40+30 should be 100.0, got {}",
+            value
+        );
+    }
+
+    #[test]
+    fn test_merge_separate_groups_no_contamination() {
+        let bytes_a = serialize_accumulator_arroyo(&SumAccumulator::with_sum(100.0));
+        let bytes_b = serialize_accumulator_arroyo(&SumAccumulator::with_sum(200.0));
+        let batch = make_batch(vec![("host-a", bytes_a), ("host-b", bytes_b)]);
+
+        let logical = make_logical_node(SketchType::Sum);
+        let mut groups: HashMap<Vec<String>, (Vec<String>, Vec<u8>)> = HashMap::new();
+        process_batch(&mut groups, &batch, &logical).unwrap();
+
+        assert_eq!(groups.len(), 2, "Two different hosts should stay separate");
+
+        // Verify values
+        for (key, (_, bytes)) in &groups {
+            let restored =
+                crate::engines::physical::accumulator_serde::deserialize_single_subpopulation(
+                    bytes,
+                    &SketchType::Sum,
+                )
+                .unwrap();
+            let value = restored
+                .query(promql_utilities::query_logics::enums::Statistic::Sum, None)
+                .unwrap();
+            if key[0] == "host-a" {
+                assert!((value - 100.0).abs() < 1e-10);
+            } else {
+                assert!((value - 200.0).abs() < 1e-10);
+            }
+        }
+    }
+
+    #[test]
+    fn test_merge_kll_sketches() {
+        let mut kll1 = DatasketchesKLLAccumulator::new(200);
+        kll1._update(1.0);
+        kll1._update(2.0);
+        let mut kll2 = DatasketchesKLLAccumulator::new(200);
+        kll2._update(3.0);
+        kll2._update(4.0);
+
+        let bytes1 = serialize_accumulator_arroyo(&kll1);
+        let bytes2 = serialize_accumulator_arroyo(&kll2);
+        let batch = make_batch(vec![("host-a", bytes1), ("host-a", bytes2)]);
+
+        let logical = make_logical_node(SketchType::KLL);
+        let mut groups: HashMap<Vec<String>, (Vec<String>, Vec<u8>)> = HashMap::new();
+        process_batch(&mut groups, &batch, &logical).unwrap();
+
+        assert_eq!(groups.len(), 1);
+        // Verify merged KLL has data from both
+        let (_, merged_bytes) = groups.values().next().unwrap();
+        let restored =
+            crate::engines::physical::accumulator_serde::deserialize_single_subpopulation(
+                merged_bytes,
+                &SketchType::KLL,
+            )
+            .unwrap();
+        let mut kwargs = std::collections::HashMap::new();
+        kwargs.insert("quantile".to_string(), "1.0".to_string());
+        let max = restored
+            .query(
+                promql_utilities::query_logics::enums::Statistic::Quantile,
+                Some(&kwargs),
+            )
+            .unwrap();
+        assert!(
+            (max - 4.0).abs() < 1e-10,
+            "Max quantile should be 4.0, got {}",
+            max
+        );
+    }
+
+    #[test]
+    fn test_merge_set_aggregators() {
+        let mut set1 = SetAggregatorAccumulator::new();
+        set1.add_key(KeyByLabelValues {
+            labels: vec!["a".to_string()],
+        });
+        let mut set2 = SetAggregatorAccumulator::new();
+        set2.add_key(KeyByLabelValues {
+            labels: vec!["b".to_string()],
+        });
+
+        let bytes1 = serialize_accumulator_arroyo(&set1);
+        let bytes2 = serialize_accumulator_arroyo(&set2);
+        let batch = make_batch(vec![("host-a", bytes1), ("host-a", bytes2)]);
+
+        let logical = make_logical_node(SketchType::SetAggregator);
+        let mut groups: HashMap<Vec<String>, (Vec<String>, Vec<u8>)> = HashMap::new();
+        process_batch(&mut groups, &batch, &logical).unwrap();
+
+        assert_eq!(groups.len(), 1);
+        let (_, merged_bytes) = groups.values().next().unwrap();
+        let restored = deserialize_accumulator(merged_bytes, &SketchType::SetAggregator).unwrap();
+        let keys = restored.get_keys().unwrap();
+        assert_eq!(keys.len(), 2, "Union of two sets should have 2 keys");
+    }
+
+    #[test]
+    fn test_merge_empty_batch() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("host", DataType::Utf8, true),
+            Field::new("sketch", DataType::Binary, false),
+        ]));
+        let host_array = StringArray::from(Vec::<&str>::new());
+        let sketch_array = arrow::array::BinaryArray::from(Vec::<&[u8]>::new());
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(host_array) as ArrayRef,
+                Arc::new(sketch_array) as ArrayRef,
+            ],
+        )
+        .unwrap();
+
+        let logical = make_logical_node(SketchType::Sum);
+        let mut groups: HashMap<Vec<String>, (Vec<String>, Vec<u8>)> = HashMap::new();
+        process_batch(&mut groups, &batch, &logical).unwrap();
+
+        assert_eq!(groups.len(), 0, "Empty batch should produce 0 groups");
+    }
+}
diff --git a/QueryEngineRust/src/engines/query_result.rs b/QueryEngineRust/src/engines/query_result.rs
new file mode 100644
index 0000000..24be3e8
--- /dev/null
+++ b/QueryEngineRust/src/engines/query_result.rs
@@ -0,0 +1,238 @@
+use crate::data_model::KeyByLabelValues;
+use serde::{Deserialize, Serialize};
+
+use promql_utilities::query_logics::enums::QueryResultType;
+
+/// Represents the result of a PromQL query
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum QueryResult {
+    Vector(InstantVector),
+    Matrix(RangeVector),
+}
+
+impl QueryResult {
+    pub fn result_type(&self) -> QueryResultType {
+        match self {
+            QueryResult::Vector(_) => QueryResultType::InstantVector,
+            QueryResult::Matrix(_) => QueryResultType::RangeVector,
+        }
+    }
+
+    pub fn vector(values: Vec<InstantVectorElement>, timestamp: u64) -> Self {
+        QueryResult::Vector(InstantVector { values, timestamp })
+    }
+
+    pub fn matrix(values: Vec<RangeVectorElement>) -> Self {
+        QueryResult::Matrix(RangeVector { values })
+    }
+}
+
+/// Instant vector - a set of time series containing a single sample for each time series, all sharing the same timestamp
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct InstantVector {
+    pub values: Vec<InstantVectorElement>,
+    pub timestamp: u64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct InstantVectorElement {
+    pub labels: KeyByLabelValues,
+    pub value: f64,
+}
+
+impl InstantVectorElement {
+    pub fn new(labels: KeyByLabelValues, value: f64) -> Self {
+        Self { labels, value }
+    }
+}
+
+/// Range vector - a set of time series containing multiple samples over a time range
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RangeVector {
+    pub values: Vec<RangeVectorElement>,
+}
+
+/// Individual element in a range vector
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RangeVectorElement {
+    pub labels: KeyByLabelValues,
+    pub samples: Vec<Sample>,
+}
+
+/// A single sample (timestamp, value) pair
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Sample {
+    pub timestamp: u64,
+    pub value: f64,
+}
+
+impl Sample {
+    pub fn new(timestamp: u64, value: f64) -> Self {
+        Self { timestamp, value }
+    }
+}
+
+impl RangeVectorElement {
+    pub fn new(labels: KeyByLabelValues) -> Self {
+        Self {
+            labels,
+            samples: Vec::new(),
+        }
+    }
+
+    pub fn add_sample(&mut self, timestamp: u64, value: f64) {
+        self.samples.push(Sample::new(timestamp, value));
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::vec;
+
+    use super::*;
+
+    fn create_test_labels() -> KeyByLabelValues {
+        KeyByLabelValues::new_with_labels(vec![
+            "localhost:9090".to_string(),
+            "prometheus".to_string(),
+        ])
+    }
+
+    #[test]
+    fn test_instant_vector_creation() {
+        let labels = create_test_labels();
+        let element = InstantVectorElement::new(labels.clone(), 42.0);
+        let vector = QueryResult::vector(vec![element], 1000);
+
+        assert_eq!(vector.result_type(), QueryResultType::InstantVector);
+
+        if let QueryResult::Vector(iv) = vector {
+            assert_eq!(iv.values.len(), 1);
+            assert_eq!(iv.values[0].value, 42.0);
+            assert_eq!(iv.values[0].labels, labels);
+        } else {
+            panic!("Expected Vector result");
+        }
+    }
+
+    #[test]
+    fn test_range_vector_creation() {
+        let labels = create_test_labels();
+        let mut element = RangeVectorElement::new(labels.clone());
+        element.add_sample(1000, 42.0);
+        element.add_sample(2000, 43.0);
+
+        let result = QueryResult::matrix(vec![element]);
+
+        assert_eq!(result.result_type(), QueryResultType::RangeVector);
+
+        if let QueryResult::Matrix(matrix) = result {
+            assert_eq!(matrix.values.len(), 1);
+            assert_eq!(matrix.values[0].samples.len(), 2);
+            assert_eq!(matrix.values[0].samples[0].timestamp, 1000);
+            assert_eq!(matrix.values[0].samples[0].value, 42.0);
+            assert_eq!(matrix.values[0].samples[1].timestamp, 2000);
+            assert_eq!(matrix.values[0].samples[1].value, 43.0);
+        } else {
+            panic!("Expected Matrix result");
+        }
+    }
+
+    #[test]
+    fn test_serialization() {
+        let labels = create_test_labels();
+        let element = InstantVectorElement::new(labels, 42.0);
+        let vector = QueryResult::vector(vec![element], 1000);
+
+        let json = serde_json::to_string(&vector).unwrap();
+        let deserialized: QueryResult = serde_json::from_str(&json).unwrap();
+
+        assert_eq!(vector.result_type(), deserialized.result_type());
+    }
+
+    #[test]
+    fn test_range_vector_serialization() {
+        let labels = create_test_labels();
+        let mut element = RangeVectorElement::new(labels);
+        element.add_sample(1000, 42.0);
+        element.add_sample(2000, 43.0);
+
+        let result = QueryResult::matrix(vec![element]);
+
+        let json = serde_json::to_string(&result).unwrap();
+        let deserialized: QueryResult = serde_json::from_str(&json).unwrap();
+
+        assert_eq!(result.result_type(), deserialized.result_type());
+    }
+
+    #[test]
+    fn test_range_vector_empty_samples() {
+        let labels = create_test_labels();
+        let element = RangeVectorElement::new(labels.clone());
+
+        assert!(element.samples.is_empty());
+        assert_eq!(element.labels, labels);
+
+        let result = QueryResult::matrix(vec![element]);
+        if let QueryResult::Matrix(matrix) = result {
+            assert_eq!(matrix.values.len(), 1);
+            assert!(matrix.values[0].samples.is_empty());
+        } else {
+            panic!("Expected Matrix result");
+        }
+    }
+
+    #[test]
+    fn test_range_vector_multiple_elements() {
+        let labels1 = KeyByLabelValues::new_with_labels(vec!["host1".to_string()]);
+        let labels2 = KeyByLabelValues::new_with_labels(vec!["host2".to_string()]);
+
+        let mut element1 = RangeVectorElement::new(labels1.clone());
+        element1.add_sample(1000, 10.0);
+        element1.add_sample(2000, 20.0);
+
+        let mut element2 = RangeVectorElement::new(labels2.clone());
+        element2.add_sample(1000, 100.0);
+        element2.add_sample(2000, 200.0);
+
+        let result = QueryResult::matrix(vec![element1, element2]);
+
+        if let QueryResult::Matrix(matrix) = result {
+            assert_eq!(matrix.values.len(), 2);
+
+            // First element
+            assert_eq!(matrix.values[0].labels, labels1);
+            assert_eq!(matrix.values[0].samples.len(), 2);
+
+            // Second element
+            assert_eq!(matrix.values[1].labels, labels2);
+            assert_eq!(matrix.values[1].samples.len(), 2);
+            assert_eq!(matrix.values[1].samples[0].value, 100.0);
+        } else {
+            panic!("Expected Matrix result");
+        }
+    }
+
+    #[test]
+    fn test_sample_ordering_preserved() {
+        let labels = create_test_labels();
+        let mut element = RangeVectorElement::new(labels);
+
+        // Add samples in specific order
+        element.add_sample(3000, 30.0);
+        element.add_sample(1000, 10.0);
+        element.add_sample(2000, 20.0);
+
+        // Order should be preserved (not sorted)
+        assert_eq!(element.samples[0].timestamp, 3000);
+        assert_eq!(element.samples[1].timestamp, 1000);
+        assert_eq!(element.samples[2].timestamp, 2000);
+    }
+
+    #[test]
+    fn test_sample_new() {
+        let sample = Sample::new(12345, 99.9);
+        assert_eq!(sample.timestamp, 12345);
+        assert_eq!(sample.value, 99.9);
+    }
+}
diff --git a/QueryEngineRust/src/engines/simple_engine.rs b/QueryEngineRust/src/engines/simple_engine.rs
new file mode 100644
index 0000000..1fa7558
--- /dev/null
+++ b/QueryEngineRust/src/engines/simple_engine.rs
@@ -0,0 +1,3897 @@
+use crate::data_model::{
+    InferenceConfig, KeyByLabelValues, QueryConfig, QueryLanguage, SchemaConfig, StreamingConfig,
+};
+use crate::engines::query_result::{InstantVectorElement, QueryResult, RangeVectorElement};
+// use crate::stores::promsketch_store::{
+//     self, is_usampling_function, metrics as ps_metrics, PromSketchStore,
+// };
+use crate::stores::{Store, TimestampedBucketsMap};
+use core::panic;
+use promql_utilities::get_is_collapsable;
+use serde_json::Value;
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::time::Instant;
+use tracing::{debug, warn};
+
+use crate::AggregateCore;
+
+use promql_utilities::ast_matching::{PromQLMatchResult, PromQLPattern, PromQLPatternBuilder};
+use promql_utilities::data_model::KeyByLabelNames;
+use promql_utilities::query_logics::enums::{QueryPatternType, Statistic};
+use promql_utilities::query_logics::parsing::{
+    get_metric_and_spatial_filter, get_spatial_aggregation_output_labels, get_statistics_to_compute,
+};
+
+use sql_utilities::ast_matching::QueryType;
+use sql_utilities::ast_matching::{SQLPatternMatcher, SQLPatternParser, SQLQuery};
+use sql_utilities::sqlhelper::AggregationInfo;
+use sqlparser::dialect::*;
+use sqlparser::parser::Parser as parser;
+
+// SQL issue: refactor simpleengine to create matchresult similar to SQLquerydata
+
+// Type alias for merged outputs (single aggregate per key after merging)
+type MergedOutputsMap = HashMap<Option<KeyByLabelValues>, Box<dyn AggregateCore>>;
+
+/// Information about bucket timeline for a single key (used for gap detection)
+/// Aggregation IDs and types for key and value
+#[derive(Debug, Clone)]
+pub struct AggregationIdInfo {
+    pub aggregation_id_for_key: u64,
+    pub aggregation_id_for_value: u64,
+    pub aggregation_type_for_key: String,
+    pub aggregation_type_for_value: String,
+}
+
+/// Metadata extracted from a query, independent of query language
+#[derive(Debug, Clone)]
+pub struct QueryMetadata {
+    /// Labels that will appear in the query output
+    pub query_output_labels: KeyByLabelNames,
+    /// The primary statistic to compute (sum, max, quantile, etc.)
+    pub statistic_to_compute: Statistic,
+    /// Additional parameters (e.g., "quantile" -> "0.95", "k" -> "10")
+    pub query_kwargs: HashMap<String, String>,
+}
+
+/// Parameters for a single store query
+#[derive(Debug, Clone)]
+pub struct StoreQueryParams {
+    pub metric: String,
+    pub aggregation_id: u64,
+    pub start_timestamp: u64,
+    pub end_timestamp: u64,
+    /// true for sliding windows (exact match), false for tumbling (range)
+    pub is_exact_query: bool,
+}
+
+/// Complete plan for querying store (values + optional separate keys)
+#[derive(Debug, Clone)]
+pub struct StoreQueryPlan {
+    pub values_query: StoreQueryParams,
+    /// Some when key and value use different aggregations (DeltaSet/SetAggregator)
+    pub keys_query: Option<StoreQueryParams>,
+}
+
+/// Timestamps for query execution
+#[derive(Debug, Clone)]
+pub struct QueryTimestamps {
+    pub start_timestamp: u64,
+    pub end_timestamp: u64,
+}
+
+/// Complete execution context for a query
+#[derive(Debug, Clone)]
+pub struct QueryExecutionContext {
+    pub metric: String,
+    pub metadata: QueryMetadata,
+    pub store_plan: StoreQueryPlan,
+    pub agg_info: AggregationIdInfo,
+    /// Whether to merge multiple precomputes (true for temporal queries)
+    pub do_merge: bool,
+    #[allow(dead_code)]
+    pub spatial_filter: String,
+    pub query_time: u64,
+    /// Spatial grouping labels from the value aggregation config.
+    /// These are the store GROUP BY columns.
+    pub grouping_labels: KeyByLabelNames,
+    /// Aggregated labels from the value aggregation config.
+    /// These are labels that "key" an accumulator/sketch internally
+    /// (e.g. endpoint within a MultipleIncrease accumulator).
+    pub aggregated_labels: KeyByLabelNames,
+}
+
+/// Parameters for a range query
+#[derive(Debug, Clone)]
+pub struct RangeQueryParams {
+    pub start: u64, // start timestamp in ms
+    pub end: u64,   // end timestamp in ms
+    pub step: u64,  // step in ms
+}
+
+/// Extended execution context for range queries
+#[derive(Debug, Clone)]
+pub struct RangeQueryExecutionContext {
+    /// Base context (metric, metadata, store_plan, etc.)
+    pub base: QueryExecutionContext,
+    /// Range-specific parameters
+    pub range_params: RangeQueryParams,
+    /// Number of buckets per step (step / tumbling_window)
+    pub buckets_per_step: usize,
+    /// Number of buckets in lookback window
+    pub lookback_bucket_count: usize,
+    /// Tumbling window size in ms
+    pub tumbling_window_ms: u64,
+}
+
+// /// Parsed components of a sketch query, extracted either via the PromQL AST
+// /// parser (for standard functions) or via regex (for custom functions like
+// /// `entropy_over_time` that the promql-parser crate doesn't recognize).
+// struct SketchQueryComponents {
+//     func_name: String,
+//     metric: String,
+//     range_seconds: u64,
+//     /// Extra numeric argument (e.g. quantile value). 0.0 when unused.
+//     args: f64,
+// }
+
+/// Simple query engine for processing PromQL-like queries against precomputed data
+pub struct SimpleEngine {
+    store: Arc<dyn Store>,
+    // promsketch_store: Option<Arc<PromSketchStore>>,
+    inference_config: InferenceConfig,
+    streaming_config: Arc<StreamingConfig>,
+    prometheus_scrape_interval: u64,
+    controller_patterns: HashMap<QueryPatternType, Vec<PromQLPattern>>,
+    query_language: QueryLanguage,
+}
+
+impl SimpleEngine {
+    pub fn new(
+        store: Arc<dyn Store>,
+        // promsketch_store: Option<Arc<PromSketchStore>>,
+        inference_config: InferenceConfig,
+        streaming_config: Arc<StreamingConfig>,
+        prometheus_scrape_interval: u64,
+        query_language: QueryLanguage,
+    ) -> Self {
+        // Create temporal pattern blocks
+        let mut temporal_pattern_blocks = HashMap::new();
+        temporal_pattern_blocks.insert(
+            "quantile".to_string(),
+            PromQLPatternBuilder::function(
+                vec!["quantile_over_time"],
+                vec![
+                    PromQLPatternBuilder::number(None, Some("quantile_param")),
+                    PromQLPatternBuilder::matrix_selector(
+                        PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+                        None,
+                        Some("range_vector"),
+                    ),
+                ],
+                Some("function"),
+                Some("function_args"),
+            ),
+        );
+
+        temporal_pattern_blocks.insert(
+            "generic".to_string(),
+            PromQLPatternBuilder::function(
+                vec![
+                    "sum_over_time",
+                    "count_over_time",
+                    "avg_over_time",
+                    "min_over_time",
+                    "max_over_time",
+                    "increase",
+                    "rate",
+                    "entropy_over_time",
+                    "distinct_over_time",
+                    "l1_over_time",
+                    "l2_over_time",
+                    "stddev_over_time",
+                    "stdvar_over_time",
+                    "sum2_over_time",
+                ],
+                vec![PromQLPatternBuilder::matrix_selector(
+                    PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+                    None,
+                    Some("range_vector"),
+                )],
+                Some("function"),
+                Some("function_args"),
+            ),
+        );
+
+        // Create spatial pattern blocks
+        let mut spatial_pattern_blocks = HashMap::new();
+        spatial_pattern_blocks.insert(
+            "generic".to_string(),
+            PromQLPatternBuilder::aggregation(
+                vec!["sum", "count", "avg", "quantile", "min", "max", "topk"],
+                PromQLPatternBuilder::metric(None, None, None, Some("metric")),
+                None,
+                None,
+                None,
+                Some("aggregation"),
+            ),
+        );
+
+        // Helper functions (these would be closures or separate methods)
+        fn temporal_pattern(
+            pattern_type: &str,
+            blocks: &HashMap<String, Option<HashMap<String, Value>>>,
+        ) -> PromQLPattern {
+            PromQLPattern::new(blocks[pattern_type].clone())
+        }
+
+        fn spatial_pattern(
+            pattern_type: &str,
+            blocks: &HashMap<String, Option<HashMap<String, Value>>>,
+        ) -> PromQLPattern {
+            PromQLPattern::new(blocks[pattern_type].clone())
+        }
+
+        fn spatial_of_temporal_pattern(
+            temporal_block: &Option<HashMap<String, Value>>,
+        ) -> PromQLPattern {
+            let pattern = PromQLPatternBuilder::aggregation(
+                vec!["sum", "count", "avg", "quantile", "min", "max"],
+                temporal_block.clone(),
+                None,
+                None,
+                None,
+                Some("aggregation"),
+            );
+            PromQLPattern::new(pattern)
+        }
+
+        // Create controller patterns
+        let mut controller_patterns = HashMap::new();
+        controller_patterns.insert(
+            QueryPatternType::OnlyTemporal,
+            vec![
+                temporal_pattern("quantile", &temporal_pattern_blocks),
+                temporal_pattern("generic", &temporal_pattern_blocks),
+            ],
+        );
+        controller_patterns.insert(
+            QueryPatternType::OnlySpatial,
+            vec![spatial_pattern("generic", &spatial_pattern_blocks)],
+        );
+        controller_patterns.insert(
+            QueryPatternType::OneTemporalOneSpatial,
+            vec![
+                spatial_of_temporal_pattern(&temporal_pattern_blocks["quantile"]),
+                spatial_of_temporal_pattern(&temporal_pattern_blocks["generic"]),
+            ],
+        );
+
+        Self {
+            store,
+            // promsketch_store,
+            inference_config,
+            streaming_config,
+            prometheus_scrape_interval,
+            controller_patterns,
+            query_language,
+        }
+    }
+
+    /// Convert query timestamp (seconds) to data timestamp (milliseconds)
+    pub fn convert_query_time_to_data_time(query_time: f64) -> u64 {
+        (query_time * 1000.0) as u64
+    }
+
+    /// Finds the query configuration for a given query string
+    fn find_query_config(&self, query: &str) -> Option<&QueryConfig> {
+        self.inference_config
+            .query_configs
+            .iter()
+            .find(|config| config.query == query)
+    }
+
+    /// Validates and potentially aligns end timestamp based on query pattern
+    fn validate_and_align_end_timestamp(
+        &self,
+        mut end_timestamp: u64,
+        query_pattern_type: QueryPatternType,
+    ) -> u64 {
+        let interval_ms = self.prometheus_scrape_interval * 1000;
+
+        if !end_timestamp.is_multiple_of(interval_ms) {
+            warn!(
+                "Query end timestamp {} is not aligned with Prometheus scrape interval of {} seconds. \
+                 This may lead to inaccurate results.",
+                end_timestamp, self.prometheus_scrape_interval
+            );
+        }
+
+        // For OnlySpatial, align end_timestamp to nearest scrape interval
+        if query_pattern_type == QueryPatternType::OnlySpatial
+            && !end_timestamp.is_multiple_of(interval_ms)
+        {
+            let aligned_end_timestamp = (end_timestamp / interval_ms) * interval_ms;
+            debug!(
+                "OnlySpatial query: Aligning end_timestamp from {} to {} using scrape interval of {} seconds",
+                end_timestamp, aligned_end_timestamp, self.prometheus_scrape_interval
+            );
+            end_timestamp = aligned_end_timestamp;
+        }
+
+        end_timestamp
+    }
+
+    /// Calculates start timestamp for PromQL queries
+    fn calculate_start_timestamp_promql(
+        &self,
+        end_timestamp: u64,
+        query_pattern_type: QueryPatternType,
+        match_result: &PromQLMatchResult,
+    ) -> u64 {
+        match query_pattern_type {
+            QueryPatternType::OnlyTemporal | QueryPatternType::OneTemporalOneSpatial => {
+                let range_seconds = match_result.get_range_duration().unwrap().num_seconds() as u64;
+                end_timestamp - (range_seconds * 1000)
+            }
+            QueryPatternType::OnlySpatial => {
+                end_timestamp - (self.prometheus_scrape_interval * 1000)
+            }
+        }
+    }
+
+    /// Calculates start timestamp for SQL queries
+    fn calculate_start_timestamp_sql(
+        &self,
+        end_timestamp: u64,
+        query_pattern_type: QueryPatternType,
+        match_result: &SQLQuery,
+    ) -> u64 {
+        match query_pattern_type {
+            QueryPatternType::OnlyTemporal => {
+                let scrape_intervals =
+                    match_result.query_data[0].time_info.clone().get_duration() as u64;
+                end_timestamp - (scrape_intervals * self.prometheus_scrape_interval * 1000)
+            }
+            QueryPatternType::OneTemporalOneSpatial => {
+                let scrape_intervals =
+                    match_result.query_data[1].time_info.clone().get_duration() as u64;
+                end_timestamp - (scrape_intervals * self.prometheus_scrape_interval * 1000)
+            }
+            QueryPatternType::OnlySpatial => {
+                end_timestamp - (self.prometheus_scrape_interval * 1000)
+            }
+        }
+    }
+
+    /// Calculates and validates query timestamps for PromQL
+    fn calculate_query_timestamps_promql(
+        &self,
+        query_time: u64,
+        query_pattern_type: QueryPatternType,
+        match_result: &PromQLMatchResult,
+    ) -> QueryTimestamps {
+        let mut end_timestamp = if let Some(at_modifier) = match_result
+            .tokens
+            .get("metric")
+            .and_then(|t| t.metric.as_ref())
+            .and_then(|m| m.at_modifier)
+        {
+            at_modifier * 1000
+        } else {
+            query_time
+        };
+
+        end_timestamp = self.validate_and_align_end_timestamp(end_timestamp, query_pattern_type);
+        let start_timestamp =
+            self.calculate_start_timestamp_promql(end_timestamp, query_pattern_type, match_result);
+
+        QueryTimestamps {
+            start_timestamp,
+            end_timestamp,
+        }
+    }
+
+    /// Calculates and validates query timestamps for SQL
+    fn calculate_query_timestamps_sql(
+        &self,
+        query_time: u64,
+        query_pattern_type: QueryPatternType,
+        match_result: &SQLQuery,
+    ) -> QueryTimestamps {
+        let mut end_timestamp = query_time;
+        end_timestamp = self.validate_and_align_end_timestamp(end_timestamp, query_pattern_type);
+        let start_timestamp =
+            self.calculate_start_timestamp_sql(end_timestamp, query_pattern_type, match_result);
+
+        QueryTimestamps {
+            start_timestamp,
+            end_timestamp,
+        }
+    }
+
+    /// Extracts quantile parameter from PromQL match result
+    fn extract_quantile_param_promql(
+        &self,
+        query_pattern_type: QueryPatternType,
+        match_result: &PromQLMatchResult,
+    ) -> Option<String> {
+        let quantile_value = match query_pattern_type {
+            QueryPatternType::OnlyTemporal | QueryPatternType::OneTemporalOneSpatial => {
+                match_result
+                    .tokens
+                    .get("function_args")
+                    .and_then(|token| token.function.as_ref())
+                    .and_then(|func| func.args.first())
+            }
+            QueryPatternType::OnlySpatial => match_result
+                .tokens
+                .get("aggregation")
+                .and_then(|token| token.aggregation.as_ref())
+                .and_then(|agg| agg.param.as_ref()),
+        };
+
+        quantile_value.map(|s| s.to_string())
+    }
+
+    /// Extracts quantile parameter from SQL match result
+    fn extract_quantile_param_sql(&self, match_result: &SQLQuery) -> Option<String> {
+        match_result
+            .query_data
+            .first()
+            .map(|data| data.aggregation_info.get_args()[0].to_string())
+    }
+
+    /// Extracts topk k parameter from PromQL match result
+    fn extract_topk_param(
+        &self,
+        query_pattern_type: QueryPatternType,
+        match_result: &PromQLMatchResult,
+    ) -> Result<String, String> {
+        match query_pattern_type {
+            QueryPatternType::OnlySpatial => match_result
+                .tokens
+                .get("aggregation")
+                .and_then(|token| token.aggregation.as_ref())
+                .and_then(|agg| agg.param.as_ref())
+                .map(|s| s.to_string())
+                .ok_or_else(|| "Missing k parameter for top-k query".to_string()),
+            _ => Err(format!(
+                "Top-k statistic is only supported for OnlySpatial pattern, found {:?}",
+                query_pattern_type
+            )),
+        }
+    }
+
+    /// Builds query kwargs (quantile, k, etc.) for PromQL queries
+    fn build_query_kwargs_promql(
+        &self,
+        statistic: &Statistic,
+        query_pattern_type: QueryPatternType,
+        match_result: &PromQLMatchResult,
+    ) -> Result<HashMap<String, String>, String> {
+        let mut query_kwargs = HashMap::new();
+
+        match statistic {
+            Statistic::Quantile => {
+                let quantile = self
+                    .extract_quantile_param_promql(query_pattern_type, match_result)
+                    .ok_or_else(|| "Missing quantile parameter for quantile query".to_string())?;
+                debug!("Extracted quantile value: {:?}", quantile);
+                query_kwargs.insert("quantile".to_string(), quantile);
+            }
+            Statistic::Topk => {
+                let k = self.extract_topk_param(query_pattern_type, match_result)?;
+                debug!("Extracted k value: {:?}", k);
+                query_kwargs.insert("k".to_string(), k);
+            }
+            _ => {}
+        }
+
+        Ok(query_kwargs)
+    }
+
+    /// Builds query kwargs for SQL queries
+    fn build_query_kwargs_sql(
+        &self,
+        statistic: &Statistic,
+        match_result: &SQLQuery,
+    ) -> Result<HashMap<String, String>, String> {
+        let mut query_kwargs = HashMap::new();
+
+        if *statistic == Statistic::Quantile {
+            let quantile = self
+                .extract_quantile_param_sql(match_result)
+                .ok_or_else(|| "Missing quantile parameter for quantile query".to_string())?;
+            query_kwargs.insert("quantile".to_string(), quantile);
+        }
+        // Note: SQL doesn't support topk limiting yet
+
+        Ok(query_kwargs)
+    }
+
+    /// Creates query parameters for separate keys query
+    fn create_keys_query_params(
+        &self,
+        metric: &str,
+        end_timestamp: u64,
+        agg_info: &AggregationIdInfo,
+    ) -> Result<StoreQueryParams, String> {
+        let (start_timestamp, end_timestamp) = match agg_info.aggregation_type_for_key.as_str() {
+            "DeltaSetAggregator" => {
+                // All keys from beginning of time
+                (0, end_timestamp)
+            }
+            "SetAggregator" => {
+                // Latest window only
+                let tumbling_window_size = self
+                    .streaming_config
+                    .get_aggregation_config(agg_info.aggregation_id_for_key)
+                    .map(|config| config.tumbling_window_size * 1000)
+                    .ok_or_else(|| {
+                        format!(
+                            "Failed to get tumbling window size for aggregation {}",
+                            agg_info.aggregation_id_for_key
+                        )
+                    })?;
+                (end_timestamp - tumbling_window_size, end_timestamp)
+            }
+            other => {
+                return Err(format!("Unsupported key aggregation type: {}", other));
+            }
+        };
+
+        Ok(StoreQueryParams {
+            metric: metric.to_string(),
+            aggregation_id: agg_info.aggregation_id_for_key,
+            start_timestamp,
+            end_timestamp,
+            is_exact_query: false, // Keys always use range queries
+        })
+    }
+
+    /// Creates a plan for querying the store based on aggregation configuration
+    fn create_store_query_plan(
+        &self,
+        metric: &str,
+        timestamps: &QueryTimestamps,
+        agg_info: &AggregationIdInfo,
+    ) -> Result<StoreQueryPlan, String> {
+        // Get aggregation config for value to determine window type
+        let aggregation_config_for_value = self
+            .streaming_config
+            .get_aggregation_config(agg_info.aggregation_id_for_value)
+            .ok_or_else(|| {
+                format!(
+                    "Aggregation config not found for aggregation_id: {}",
+                    agg_info.aggregation_id_for_value
+                )
+            })?;
+
+        let window_type = &aggregation_config_for_value.window_type;
+        let is_exact_query = window_type == "sliding";
+
+        // Determine start/end for values query based on window type
+        let (values_start, values_end) = if is_exact_query {
+            // Sliding window: exact window match
+            let exact_start =
+                timestamps.end_timestamp - (aggregation_config_for_value.window_size * 1000);
+            (exact_start, timestamps.end_timestamp)
+        } else {
+            // Tumbling window: range query
+            (timestamps.start_timestamp, timestamps.end_timestamp)
+        };
+
+        let values_query = StoreQueryParams {
+            metric: metric.to_string(),
+            aggregation_id: agg_info.aggregation_id_for_value,
+            start_timestamp: values_start,
+            end_timestamp: values_end,
+            is_exact_query,
+        };
+
+        // Determine if we need a separate keys query
+        let keys_query = if agg_info.aggregation_id_for_key != agg_info.aggregation_id_for_value {
+            Some(self.create_keys_query_params(metric, timestamps.end_timestamp, agg_info)?)
+        } else {
+            None
+        };
+
+        Ok(StoreQueryPlan {
+            values_query,
+            keys_query,
+        })
+    }
+
+    /// Executes a single store query based on parameters
+    fn execute_store_query(
+        &self,
+        params: &StoreQueryParams,
+    ) -> Result<TimestampedBucketsMap, String> {
+        debug!(
+            "Querying store: metric={}, agg_id={}, range=[{}, {}], exact={}",
+            params.metric,
+            params.aggregation_id,
+            params.start_timestamp,
+            params.end_timestamp,
+            params.is_exact_query
+        );
+
+        let store_query_start_time = Instant::now();
+
+        let result = if params.is_exact_query {
+            debug!(
+                "Sliding window query: Looking for exact window [{}, {}]",
+                params.start_timestamp, params.end_timestamp
+            );
+            let res = self.store.query_precomputed_output_exact(
+                &params.metric,
+                params.aggregation_id,
+                params.start_timestamp,
+                params.end_timestamp,
+            );
+            if let Ok(ref outputs) = res {
+                let store_query_duration = store_query_start_time.elapsed();
+                debug!(
+                    "Sliding window exact query took: {:.2}ms, found {} unique keys",
+                    store_query_duration.as_secs_f64() * 1000.0,
+                    outputs.len()
+                );
+            }
+            res
+        } else {
+            debug!(
+                "Tumbling window query: range [{}, {}]",
+                params.start_timestamp, params.end_timestamp
+            );
+            let res = self.store.query_precomputed_output(
+                &params.metric,
+                params.aggregation_id,
+                params.start_timestamp,
+                params.end_timestamp,
+            );
+            if res.is_ok() {
+                let store_query_duration = store_query_start_time.elapsed();
+                debug!(
+                    "Tumbling window range query took: {:.2}ms",
+                    store_query_duration.as_secs_f64() * 1000.0
+                );
+            }
+            res
+        };
+
+        result.map_err(|e| {
+            format!(
+                "Error querying store for metric {}, agg {}, range [{}, {}]: {}",
+                params.metric,
+                params.aggregation_id,
+                params.start_timestamp,
+                params.end_timestamp,
+                e
+            )
+        })
+    }
+
+    /// Executes the full store query plan and returns merged results
+    fn execute_and_merge_store_queries(
+        &self,
+        plan: &StoreQueryPlan,
+        do_merge: bool,
+        agg_info: &AggregationIdInfo,
+    ) -> Result<(MergedOutputsMap, Option<MergedOutputsMap>), String> {
+        // Query and merge values
+        let values_map = self.execute_store_query(&plan.values_query).map_err(|e| {
+            warn!("Error querying store for values: {}", e);
+            e
+        })?;
+
+        if values_map.is_empty() {
+            return Err(format!(
+                "No precomputed outputs found for metric: {}, aggregation_id: {}",
+                plan.values_query.metric, plan.values_query.aggregation_id
+            ));
+        }
+
+        debug!("Store query returned {} unique keys", values_map.len());
+
+        let merge_start_time = Instant::now();
+        let window_type = if plan.values_query.is_exact_query {
+            "sliding"
+        } else {
+            "tumbling"
+        };
+
+        let merged_values = if plan.values_query.is_exact_query {
+            // Sliding window: no merge needed, extract buckets from timestamped data
+            debug!("Sliding window mode: Skipping merge (expecting 1 precompute per key)");
+            values_map
+                .into_iter()
+                .map(|(key, timestamped_buckets)| {
+                    if timestamped_buckets.len() != 1 {
+                        warn!(
+                            "Sliding window expected 1 precompute per key, found {}. Using first.",
+                            timestamped_buckets.len()
+                        );
+                    }
+                    // Extract bucket from timestamped tuple
+                    let (_, bucket) = timestamped_buckets.into_iter().next().unwrap();
+                    (key, bucket)
+                })
+                .collect()
+        } else {
+            // Tumbling window: merge needed
+            debug!("Tumbling window mode: Merging {} outputs", values_map.len());
+            self.merge_precomputed_outputs(
+                &values_map,
+                do_merge,
+                agg_info.aggregation_type_for_value.clone(),
+            )
+        };
+
+        let merge_duration = merge_start_time.elapsed();
+        debug!(
+            "[LATENCY] Precomputed output processing ({}): {:.2}ms, resulted in {} merged outputs",
+            if window_type == "sliding" {
+                "no merge"
+            } else {
+                "merge"
+            },
+            merge_duration.as_secs_f64() * 1000.0,
+            merged_values.len()
+        );
+
+        // Query and merge keys if needed
+        let merged_keys = if let Some(keys_params) = &plan.keys_query {
+            let keys_store_query_start_time = Instant::now();
+            let keys_map = self.execute_store_query(keys_params).map_err(|e| {
+                warn!("Error querying store for keys: {}", e);
+                e
+            })?;
+            debug!(
+                "[LATENCY] Keys store query (metric: {}, agg: {}): {}ms",
+                &keys_params.metric,
+                keys_params.aggregation_id,
+                keys_store_query_start_time.elapsed().as_millis()
+            );
+            debug!("Keys query returned {} unique keys", keys_map.len());
+
+            let keys_merge_start_time = Instant::now();
+            let merged = self.merge_precomputed_outputs(
+                &keys_map,
+                do_merge,
+                agg_info.aggregation_type_for_key.clone(),
+            );
+            debug!(
+                "[LATENCY] Keys merge operation: {:.2}ms, resulted in {} merged outputs",
+                keys_merge_start_time.elapsed().as_secs_f64() * 1000.0,
+                merged.len()
+            );
+            Some(merged)
+        } else {
+            None
+        };
+
+        Ok((merged_values, merged_keys))
+    }
+
+    /// Collects all results based on whether keys are separate or not
+    fn collect_all_results(
+        &self,
+        merged_values: &HashMap<Option<KeyByLabelValues>, Box<dyn AggregateCore>>,
+        merged_keys: Option<&HashMap<Option<KeyByLabelValues>, Box<dyn AggregateCore>>>,
+        statistic: &Statistic,
+        query_kwargs: &HashMap<String, String>,
+        enable_topk_limiting: bool,
+    ) -> Result<HashMap<Option<KeyByLabelValues>, f64>, String> {
+        if let Some(keys_map) = merged_keys {
+            // Separate keys and values
+            self.collect_results_separate_keys(merged_values, keys_map, statistic, query_kwargs)
+        } else {
+            // Same aggregation for keys and values
+            self.collect_results_same_aggregation(
+                merged_values,
+                statistic,
+                query_kwargs,
+                enable_topk_limiting,
+            )
+        }
+    }
+
+    /// Executes the complete query pipeline: plan, execute, collect, and format
+    pub fn execute_query_pipeline(
+        &self,
+        context: &QueryExecutionContext,
+        enable_topk: bool,
+    ) -> Result<Vec<InstantVectorElement>, String> {
+        // Step 1: Execute the query plan (already created in context.store_plan)
+        let (merged_values, merged_keys) = self.execute_and_merge_store_queries(
+            &context.store_plan,
+            context.do_merge,
+            &context.agg_info,
+        )?;
+
+        // Step 2: Collect results
+        let unformatted_results_start_time = Instant::now();
+        let unformatted_results = self.collect_all_results(
+            &merged_values,
+            merged_keys.as_ref(),
+            &context.metadata.statistic_to_compute,
+            &context.metadata.query_kwargs,
+            enable_topk, // SQL=false, PromQL=true
+        )?;
+        debug!(
+            "[LATENCY] Unformatted results collection: {:.2}ms",
+            unformatted_results_start_time.elapsed().as_secs_f64() * 1000.0
+        );
+
+        // Step 3: Format results
+        let results_start_time = Instant::now();
+        let results = self.format_final_results(
+            unformatted_results,
+            &context.metadata.statistic_to_compute,
+            &context.metric,
+            enable_topk, // SQL=false, PromQL=true
+        );
+        debug!(
+            "[LATENCY] Results collection: {}ms",
+            results_start_time.elapsed().as_millis()
+        );
+
+        Ok(results)
+    }
+
+    /// Execute a query using the plan-based approach (for testing)
+    ///
+    /// This is an alternative execution path that uses DataFusion logical/physical
+    /// plans instead of the existing execute_query_pipeline.
+    ///
+    /// # Arguments
+    /// * `context` - The query execution context
+    ///
+    /// # Returns
+    /// A Result containing the query results or an error
+    #[allow(dead_code)]
+    pub async fn execute_plan(
+        &self,
+        context: &QueryExecutionContext,
+    ) -> Result<Vec<InstantVectorElement>, String> {
+        use datafusion::execution::context::SessionContext;
+        use datafusion::physical_plan::collect;
+
+        use super::physical::conversion::record_batch_to_result_map;
+
+        let total_start = Instant::now();
+
+        // 1. Build logical plan from context
+        let plan_build_start = Instant::now();
+        let logical_plan = context
+            .to_logical_plan()
+            .map_err(|e| format!("Failed to build logical plan: {}", e))?;
+        debug!(
+            "[LATENCY] DataFusion: logical plan build: {:.2}ms",
+            plan_build_start.elapsed().as_secs_f64() * 1000.0
+        );
+        debug!(
+            "DataFusion logical plan:\n{}",
+            logical_plan.display_indent()
+        );
+
+        // 2. Create session context with our custom extension planner
+        let physical_plan_start = Instant::now();
+        let session_ctx = SessionContext::new();
+        #[allow(deprecated)]
+        let state = session_ctx.state().with_query_planner(std::sync::Arc::new(
+            super::physical::CustomQueryPlanner::new(self.store.clone()),
+        ));
+
+        // 3. Create physical plan
+        let physical_plan = state
+            .create_physical_plan(&logical_plan)
+            .await
+            .map_err(|e| format!("Failed to create physical plan: {}", e))?;
+        debug!(
+            "[LATENCY] DataFusion: physical plan creation: {:.2}ms",
+            physical_plan_start.elapsed().as_secs_f64() * 1000.0
+        );
+
+        // 4. Execute
+        let execute_start = Instant::now();
+        let task_ctx = session_ctx.task_ctx();
+        let batches = collect(physical_plan, task_ctx)
+            .await
+            .map_err(|e| format!("Failed to execute plan: {}", e))?;
+        let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
+        debug!(
+            "[LATENCY] DataFusion: plan execution: {:.2}ms, {} batch(es), {} total rows",
+            execute_start.elapsed().as_secs_f64() * 1000.0,
+            batches.len(),
+            total_rows
+        );
+
+        // 5. Convert results
+        let convert_start = Instant::now();
+        let label_names: Vec<&str> = context
+            .metadata
+            .query_output_labels
+            .labels
+            .iter()
+            .map(String::as_str)
+            .collect();
+
+        let mut all_results: HashMap<Option<KeyByLabelValues>, f64> = HashMap::new();
+        for batch in &batches {
+            let batch_results = record_batch_to_result_map(batch, &label_names, "value")
+                .map_err(|e| format!("Failed to convert results: {}", e))?;
+            all_results.extend(batch_results);
+        }
+        debug!(
+            "[LATENCY] DataFusion: result conversion: {:.2}ms, {} output rows",
+            convert_start.elapsed().as_secs_f64() * 1000.0,
+            all_results.len()
+        );
+
+        // 6. Format results
+        let format_start = Instant::now();
+        let results = self.format_final_results(
+            all_results,
+            &context.metadata.statistic_to_compute,
+            &context.metric,
+            false,
+        );
+        debug!(
+            "[LATENCY] DataFusion: result formatting: {:.2}ms, {} results",
+            format_start.elapsed().as_secs_f64() * 1000.0,
+            results.len()
+        );
+
+        debug!(
+            "[LATENCY] DataFusion: total execute_plan: {:.2}ms",
+            total_start.elapsed().as_secs_f64() * 1000.0
+        );
+
+        Ok(results)
+    }
+
+    /// Formats unformatted results into final InstantVectorElement format
+    /// For topk queries (when enabled), sorts by value and prepends metric name to keys
+    fn format_final_results(
+        &self,
+        unformatted_results: HashMap<Option<KeyByLabelValues>, f64>,
+        statistic: &Statistic,
+        metric: &str,
+        enable_topk_formatting: bool,
+    ) -> Vec<InstantVectorElement> {
+        let sorted_results: Vec<(Option<KeyByLabelValues>, f64)> =
+            if *statistic == Statistic::Topk && enable_topk_formatting {
+                // Sort by value descending for topk
+                let mut sorted: Vec<_> = unformatted_results.into_iter().collect();
+                sorted.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
+
+                // Prepend metric name to each key's label values
+                sorted
+                    .into_iter()
+                    .map(|(key_opt, value)| {
+                        let updated_key = key_opt.map(|mut key| {
+                            let mut new_labels = vec![metric.to_string()];
+                            new_labels.extend(key.labels);
+                            key.labels = new_labels;
+                            key
+                        });
+                        (updated_key, value)
+                    })
+                    .collect()
+            } else {
+                unformatted_results.into_iter().collect()
+            };
+
+        sorted_results
+            .into_iter()
+            .filter_map(|(key, value)| key.map(|k| InstantVectorElement::new(k, value)))
+            .collect()
+    }
+
+    fn sql_get_is_collapsable(
+        &self,
+        temporal_aggregation: &AggregationInfo,
+        spatial_aggregation: &AggregationInfo,
+    ) -> bool {
+        match spatial_aggregation.get_name() {
+            "SUM" => matches!(
+                temporal_aggregation.get_name(),
+                "SUM" | "COUNT" // Note: "increase" and "rate" are commented out in Python
+            ),
+            "MIN" => temporal_aggregation.get_name() == "MIN",
+            "MAX" => temporal_aggregation.get_name() == "MAX",
+            _ => false,
+        }
+    }
+
+    fn get_aggregation_id_info(&self, query_config: &QueryConfig) -> AggregationIdInfo {
+        let query_config_aggregations = &query_config.aggregations;
+        let mut aggregation_id_for_key: Option<u64> = None;
+        let mut aggregation_id_for_value: Option<u64> = None;
+        let mut aggregation_type_for_key: Option<String> = None;
+        let mut aggregation_type_for_value: Option<String> = None;
+
+        if query_config_aggregations.is_empty() {
+            panic!("Query config for query has no aggregations defined",);
+        } else if query_config_aggregations.len() > 2 {
+            panic!("Query config with > 2 aggregations is not supported");
+        } else if query_config_aggregations.len() == 2 {
+            for aggregation in query_config_aggregations {
+                let aggregation_type = self
+                    .streaming_config
+                    .get_aggregation_config(aggregation.aggregation_id)
+                    .map(|config| config.aggregation_type.clone());
+
+                if aggregation_type.as_ref().unwrap() == "DeltaSetAggregator"
+                    || aggregation_type.as_ref().unwrap() == "SetAggregator"
+                {
+                    if aggregation_id_for_key.is_some() {
+                        panic!("Aggregation ID for key must be None");
+                    }
+                    if aggregation_type_for_key.is_some() {
+                        panic!("Aggregation type for key must be None");
+                    }
+                    aggregation_id_for_key = Some(aggregation.aggregation_id);
+                    aggregation_type_for_key = aggregation_type;
+                } else {
+                    if aggregation_id_for_value.is_some() {
+                        panic!("Aggregation ID for value must be None");
+                    }
+                    aggregation_id_for_value = Some(aggregation.aggregation_id);
+                    aggregation_type_for_value = aggregation_type;
+                }
+            }
+        } else {
+            aggregation_id_for_key = Some(query_config_aggregations[0].aggregation_id);
+            aggregation_id_for_value = aggregation_id_for_key;
+            // aggregation_type_for_key = Some(query_config_aggregations[0].aggregation_type.clone());
+            aggregation_type_for_key = self
+                .streaming_config
+                .get_aggregation_config(aggregation_id_for_key.unwrap())
+                .map(|config| config.aggregation_type.clone());
+            aggregation_type_for_value = self
+                .streaming_config
+                .get_aggregation_config(aggregation_id_for_value.unwrap())
+                .map(|config| config.aggregation_type.clone());
+        }
+
+        // check for None
+        if aggregation_id_for_key.is_none() || aggregation_id_for_value.is_none() {
+            panic!("Aggregation IDs must not be None");
+        }
+
+        AggregationIdInfo {
+            aggregation_id_for_key: aggregation_id_for_key.unwrap(),
+            aggregation_id_for_value: aggregation_id_for_value.unwrap(),
+            aggregation_type_for_key: aggregation_type_for_key.unwrap(),
+            aggregation_type_for_value: aggregation_type_for_value.unwrap(),
+        }
+    }
+
+    pub fn handle_query_sql(
+        &self,
+        query: String,
+        time: f64,
+    ) -> Option<(KeyByLabelNames, QueryResult)> {
+        let context = self.build_query_execution_context_sql(query, time)?;
+        // Execute complete query pipeline
+        let results = self
+            .execute_query_pipeline(&context, false) // SQL: topk disabled
+            .map_err(|e| {
+                warn!("Query execution failed: {}", e);
+                e
+            })
+            .ok()?;
+
+        Some((
+            context.metadata.query_output_labels,
+            QueryResult::vector(results, context.query_time),
+        ))
+    }
+
+    pub fn build_query_execution_context_sql(
+        &self,
+        query: String,
+        time: f64,
+    ) -> Option<QueryExecutionContext> {
+        // Get SQL schema from inference config
+        let schema = match &self.inference_config.schema {
+            SchemaConfig::SQL(sql_schema) => sql_schema.clone(),
+            SchemaConfig::PromQL(_) => {
+                warn!("SQL query requested but config has PromQL schema");
+                return None;
+            }
+            &SchemaConfig::ElasticQueryDSL => todo!(),
+            &SchemaConfig::ElasticSQL => todo!(),
+        };
+
+        let statements = parser::parse_sql(&GenericDialect {}, query.as_str()).unwrap();
+        let query_data = SQLPatternParser::new(&schema, time).parse_query(&statements);
+
+        let query_data = match query_data {
+            Some(data) => data,
+            None => {
+                debug!("Could not parse query");
+                return None;
+            }
+        };
+
+        let matcher = SQLPatternMatcher::new(schema, self.prometheus_scrape_interval as f64);
+        let match_result = matcher.query_info_to_pattern(&query_data);
+
+        debug!("Match result: {:?}", match_result);
+        debug!("Validity: {}", match_result.is_valid());
+
+        if !match_result.is_valid() {
+            return None;
+        }
+
+        // Handle SpatioTemporal queries separately - they bypass QueryPatternType mapping
+        if match_result.query_type == vec![QueryType::SpatioTemporal] {
+            let query_time = Self::convert_query_time_to_data_time(
+                query_data.time_info.get_start() + query_data.time_info.get_duration(),
+            );
+            return self.build_spatiotemporal_context(&match_result, query_time, &query);
+        }
+
+        let query_pattern_type = match &match_result.query_type[..] {
+            [x] => match x {
+                QueryType::Spatial => QueryPatternType::OnlySpatial,
+                QueryType::TemporalGeneric => QueryPatternType::OnlyTemporal,
+                QueryType::TemporalQuantile => QueryPatternType::OnlyTemporal,
+                QueryType::SpatioTemporal => unreachable!("SpatioTemporal handled above"),
+            },
+            [x, y] => match (x, y) {
+                (QueryType::Spatial, QueryType::TemporalGeneric) => {
+                    QueryPatternType::OneTemporalOneSpatial
+                }
+                (QueryType::Spatial, QueryType::TemporalQuantile) => {
+                    QueryPatternType::OneTemporalOneSpatial
+                }
+                _ => panic!("Unsupported query type found"),
+            },
+            _ => panic!("Unsupported query type found"),
+        };
+
+        let query_config = self.find_query_config(&query)?;
+
+        // For nested queries (spatial of temporal), the outer query has no time clause,
+        // so we need to use the inner (temporal) query's time_info to compute query_time
+        let query_time = match query_pattern_type {
+            QueryPatternType::OneTemporalOneSpatial => {
+                let inner_time_info = &match_result.query_data[1].time_info;
+                Self::convert_query_time_to_data_time(
+                    inner_time_info.get_start() + inner_time_info.get_duration(),
+                )
+            }
+            _ => Self::convert_query_time_to_data_time(
+                query_data.time_info.get_start() + query_data.time_info.get_duration(),
+            ),
+        };
+
+        //     self.handle_sql_temporal_aggregation(
+        //         query_config,
+        //         &match_result,
+        //         query_time,
+        //         query_pattern_type,
+        //     )
+        // }
+
+        // fn handle_sql_temporal_aggregation(
+        //     &self,
+        //     query_config: &QueryConfig,
+        //     match_result: &SQLQuery,
+        //     query_time: u64,
+        //     query_pattern_type: QueryPatternType,
+        // ) -> Option<(KeyByLabelNames, QueryResult)> {
+        // Labels
+
+        let query_output_labels = match &match_result.query_type.len() {
+            // Potentially change SQLQueryType
+            1 => {
+                // For non-nested queries, output associated labels
+                let labels = &match_result.query_data[0].labels;
+
+                KeyByLabelNames::new(labels.clone().into_iter().collect())
+            }
+            2 => {
+                // Extract spatial aggregation output labels using AST-based approach
+                let temporal_labels = &match_result.query_data[1].labels;
+                let spatial_labels = &match_result.query_data[0].labels;
+
+                let temporal_aggregation = &match_result.query_data[1].aggregation_info;
+                let spatial_aggregation = &match_result.query_data[0].aggregation_info;
+
+                match self.sql_get_is_collapsable(temporal_aggregation, spatial_aggregation) {
+                    // If false: get all labels, which are all temporal labels. If true, get only spatial labels
+                    false => KeyByLabelNames::new(temporal_labels.clone().into_iter().collect()),
+                    true => KeyByLabelNames::new(spatial_labels.clone().into_iter().collect()),
+                }
+            }
+            _ => {
+                warn!("Invalid query type: {}", query_pattern_type);
+                KeyByLabelNames::new(Vec::new())
+            }
+        };
+
+        // Statistic - determine based on query pattern type
+        let statistic_name = match query_pattern_type {
+            QueryPatternType::OnlyTemporal => {
+                // Use the temporal aggregation (first subquery)
+                match_result.query_data[0]
+                    .aggregation_info
+                    .get_name()
+                    .to_lowercase()
+            }
+            QueryPatternType::OneTemporalOneSpatial => {
+                // Use the temporal aggregation (second subquery contains temporal)
+                match_result.query_data[1]
+                    .aggregation_info
+                    .get_name()
+                    .to_lowercase()
+            }
+            QueryPatternType::OnlySpatial => {
+                // Use the spatial aggregation (first subquery)
+                match_result.query_data[0]
+                    .aggregation_info
+                    .get_name()
+                    .to_lowercase()
+            }
+        };
+
+        let statistics_to_compute: Vec<Statistic> = if statistic_name == "avg" {
+            vec![Statistic::Sum, Statistic::Count]
+        } else if let Ok(stat) = statistic_name.parse::<Statistic>() {
+            vec![stat]
+        } else {
+            panic!("Unsupported statistic: {}", statistic_name);
+        };
+
+        if statistics_to_compute.len() != 1 {
+            panic!(
+                "Expected exactly one statistic to compute, found {}",
+                statistics_to_compute.len()
+            );
+        }
+        let statistic_to_compute = statistics_to_compute.first().unwrap();
+
+        let query_kwargs = self
+            .build_query_kwargs_sql(statistic_to_compute, &match_result)
+            .map_err(|e| {
+                warn!("{}", e);
+                e
+            })
+            .ok()?;
+
+        // Create query metadata
+        let metadata = QueryMetadata {
+            query_output_labels: query_output_labels.clone(),
+            statistic_to_compute: *statistic_to_compute,
+            query_kwargs: query_kwargs.clone(),
+        };
+
+        // Time
+        let timestamps =
+            self.calculate_query_timestamps_sql(query_time, query_pattern_type, &match_result);
+
+        // Precomputed output
+
+        let agg_info = self.get_aggregation_id_info(query_config);
+
+        let metric = &match_result.query_data[0].metric;
+
+        let spatial_filter = if query_pattern_type == QueryPatternType::OneTemporalOneSpatial {
+            match_result.query_data[0]
+                .labels
+                .iter()
+                .cloned()
+                .collect::<Vec<_>>()
+                .join(",")
+        } else {
+            String::new()
+        };
+
+        // Create query plan and execute values query
+        let query_plan = self
+            .create_store_query_plan(metric, &timestamps, &agg_info)
+            .map_err(|e| {
+                warn!("Failed to create store query plan: {}", e);
+                e
+            })
+            .ok()?;
+
+        // Create execution context
+        // do_merge is true for temporal queries (OnlyTemporal or OneTemporalOneSpatial)
+        let do_merge = query_pattern_type == QueryPatternType::OnlyTemporal
+            || query_pattern_type == QueryPatternType::OneTemporalOneSpatial;
+
+        let grouping_labels = self
+            .streaming_config
+            .get_aggregation_config(agg_info.aggregation_id_for_value)
+            .map(|config| config.grouping_labels.clone())
+            .unwrap_or_else(|| query_output_labels.clone());
+
+        let aggregated_labels = self
+            .streaming_config
+            .get_aggregation_config(agg_info.aggregation_id_for_key)
+            .map(|config| config.aggregated_labels.clone())
+            .unwrap_or_else(KeyByLabelNames::empty);
+
+        Some(QueryExecutionContext {
+            metric: metric.to_string(),
+            metadata,
+            store_plan: query_plan.clone(),
+            agg_info: agg_info.clone(),
+            do_merge,
+            spatial_filter,
+            query_time,
+            grouping_labels,
+            aggregated_labels,
+        })
+
+        // TODO: Handle spatial aggregation for OneTemporalOneSpatial when not collapsable
+    }
+
+    /// Build execution context for SpatioTemporal queries.
+    /// These queries span multiple scrape intervals but GROUP BY a subset of labels.
+    fn build_spatiotemporal_context(
+        &self,
+        match_result: &SQLQuery,
+        query_time: u64,
+        query: &str,
+    ) -> Option<QueryExecutionContext> {
+        let query_config = self.find_query_config(query)?;
+
+        // Output labels are the GROUP BY columns (subset of all labels)
+        let query_output_labels = KeyByLabelNames::new(
+            match_result.query_data[0]
+                .labels
+                .clone()
+                .into_iter()
+                .collect(),
+        );
+
+        // Get the statistic from the aggregation
+        let statistic_name = match_result.query_data[0]
+            .aggregation_info
+            .get_name()
+            .to_lowercase();
+
+        let statistics_to_compute: Vec<Statistic> = if statistic_name == "avg" {
+            vec![Statistic::Sum, Statistic::Count]
+        } else if let Ok(stat) = statistic_name.parse::<Statistic>() {
+            vec![stat]
+        } else {
+            panic!("Unsupported statistic: {}", statistic_name);
+        };
+
+        if statistics_to_compute.len() != 1 {
+            panic!(
+                "Expected exactly one statistic to compute, found {}",
+                statistics_to_compute.len()
+            );
+        }
+        let statistic_to_compute = statistics_to_compute.first().unwrap();
+
+        let query_kwargs = self
+            .build_query_kwargs_sql(statistic_to_compute, match_result)
+            .map_err(|e| {
+                warn!("{}", e);
+                e
+            })
+            .ok()?;
+
+        let metadata = QueryMetadata {
+            query_output_labels: query_output_labels.clone(),
+            statistic_to_compute: *statistic_to_compute,
+            query_kwargs: query_kwargs.clone(),
+        };
+
+        // Calculate timestamps - similar to OnlyTemporal
+        let end_timestamp =
+            self.validate_and_align_end_timestamp(query_time, QueryPatternType::OnlyTemporal);
+        let scrape_intervals = match_result.query_data[0].time_info.get_duration() as u64;
+        let start_timestamp =
+            end_timestamp - (scrape_intervals * self.prometheus_scrape_interval * 1000);
+
+        let timestamps = QueryTimestamps {
+            start_timestamp,
+            end_timestamp,
+        };
+
+        let agg_info = self.get_aggregation_id_info(query_config);
+        let metric = &match_result.query_data[0].metric;
+
+        let query_plan = self
+            .create_store_query_plan(metric, &timestamps, &agg_info)
+            .map_err(|e| {
+                warn!("Failed to create store query plan: {}", e);
+                e
+            })
+            .ok()?;
+
+        // SpatioTemporal queries need merging (like temporal queries)
+        let grouping_labels = self
+            .streaming_config
+            .get_aggregation_config(agg_info.aggregation_id_for_value)
+            .map(|config| config.grouping_labels.clone())
+            .unwrap_or_else(|| query_output_labels.clone());
+
+        let aggregated_labels = self
+            .streaming_config
+            .get_aggregation_config(agg_info.aggregation_id_for_key)
+            .map(|config| config.aggregated_labels.clone())
+            .unwrap_or_else(KeyByLabelNames::empty);
+
+        Some(QueryExecutionContext {
+            metric: metric.to_string(),
+            metadata,
+            store_plan: query_plan,
+            agg_info,
+            do_merge: true,
+            spatial_filter: String::new(),
+            query_time,
+            grouping_labels,
+            aggregated_labels,
+        })
+    }
+
+    /// Handle a query following Python's unified architecture
+    // pub async fn handle_query(
+    pub fn handle_query(&self, query: String, time: f64) -> Option<(KeyByLabelNames, QueryResult)> {
+        match self.query_language {
+            QueryLanguage::promql => self.handle_query_promql(query, time),
+            QueryLanguage::sql => self.handle_query_sql(query, time),
+            QueryLanguage::elastic_querydsl => self.handle_query_elastic(),
+            QueryLanguage::elastic_sql => self.handle_query_elastic(),
+        }
+    }
+
+    pub fn handle_query_elastic(&self) -> Option<(KeyByLabelNames, QueryResult)> {
+        None
+    }
+
+    // /// Try to extract sketch query components from a PromQL query string.
+    // ///
+    // /// Attempts the standard AST parser first. If that fails (e.g. for custom
+    // /// sketch-only functions), falls back to a lightweight regex extraction for
+    // /// patterns like `func(metric[range])` and `func(number, metric[range])`.
+    // /// Extract just the sketch function name from a query without full evaluation.
+    // fn extract_sketch_func_name(&self, query: &str) -> Option<String> {
+    //     self.parse_sketch_query_components(query)
+    //         .map(|c| c.func_name)
+    // }
+
+    // fn parse_sketch_query_components(&self, query: &str) -> Option<SketchQueryComponents> {
+    //     // --- Path A: standard PromQL parser + pattern matching ---
+    //     if let Some(components) = self.parse_sketch_via_ast(query) {
+    //         return Some(components);
+    //     }
+
+    //     // --- Path B: regex fallback for custom sketch functions ---
+    //     self.parse_sketch_via_regex(query)
+    // }
+
+    // /// Parse sketch components using the standard PromQL AST parser.
+    // fn parse_sketch_via_ast(&self, query: &str) -> Option<SketchQueryComponents> {
+    //     let ast = match promql_parser::parser::parse(query) {
+    //         Ok(ast) => ast,
+    //         Err(_) => return None,
+    //     };
+
+    //     let mut found_match = None;
+    //     for (pattern_type, patterns) in &self.controller_patterns {
+    //         for pattern in patterns {
+    //             let match_result = pattern.matches(&ast);
+    //             if match_result.matches {
+    //                 found_match = Some((*pattern_type, match_result));
+    //                 break;
+    //             }
+    //         }
+    //         if found_match.is_some() {
+    //             break;
+    //         }
+    //     }
+
+    //     let (query_pattern_type, match_result) = found_match?;
+
+    //     if query_pattern_type != QueryPatternType::OnlyTemporal {
+    //         debug!(
+    //             "Sketch query (AST): pattern type {:?} is not OnlyTemporal, skipping for '{}'",
+    //             query_pattern_type, query
+    //         );
+    //         return None;
+    //     }
+
+    //     let func_name = match_result.get_function_name()?;
+    //     promsketch_store::promsketch_func_map(&func_name)?;
+
+    //     let (metric, spatial_filter) = get_metric_and_spatial_filter(&match_result);
+    //     let metric = if spatial_filter.is_empty() {
+    //         metric
+    //     } else {
+    //         format!("{}{{{}}}", metric, spatial_filter)
+    //     };
+
+    //     let range_seconds = match_result.get_range_duration()?.num_seconds() as u64;
+
+    //     let args = if func_name == "quantile_over_time" {
+    //         self.extract_quantile_param_promql(query_pattern_type, &match_result)
+    //             .and_then(|s| s.parse::<f64>().ok())
+    //             .unwrap_or(0.5)
+    //     } else {
+    //         0.0
+    //     };
+
+    //     Some(SketchQueryComponents {
+    //         func_name,
+    //         metric,
+    //         range_seconds,
+    //         args,
+    //     })
+    // }
+
+    // /// Regex fallback for custom sketch functions the PromQL parser doesn't know.
+    // ///
+    // /// Matches two forms:
+    // ///   - `func_name(metric[duration])`                  (generic)
+    // ///   - `func_name(number, metric[duration])`          (quantile)
+    // ///   - `func_name(metric{filter}[duration])`          (with label filter)
+    // fn parse_sketch_via_regex(&self, query: &str) -> Option<SketchQueryComponents> {
+    //     use regex::Regex;
+
+    //     // quantile form: quantile_over_time(0.5, metric{...}[5m])
+    //     let quantile_re =
+    //         Regex::new(r"^(\w+)\(\s*([0-9.]+)\s*,\s*(\w+(?:\{[^}]*\})?)\[(\d+)([smhd])\]\s*\)$")
+    //             .ok()?;
+
+    //     // generic form: func(metric{...}[5m])
+    //     let generic_re =
+    //         Regex::new(r"^(\w+)\(\s*(\w+(?:\{[^}]*\})?)\[(\d+)([smhd])\]\s*\)$").ok()?;
+
+    //     if let Some(caps) = quantile_re.captures(query.trim()) {
+    //         let func_name = caps[1].to_string();
+    //         promsketch_store::promsketch_func_map(&func_name)?;
+    //         let args: f64 = caps[2].parse().ok()?;
+    //         let metric = caps[3].to_string();
+    //         let range_seconds = Self::parse_duration_to_seconds(&caps[4], &caps[5])?;
+    //         debug!(
+    //             "Sketch query (regex/quantile): parsed {} with metric={}, range={}s, args={}",
+    //             func_name, metric, range_seconds, args
+    //         );
+    //         return Some(SketchQueryComponents {
+    //             func_name,
+    //             metric,
+    //             range_seconds,
+    //             args,
+    //         });
+    //     }
+
+    //     if let Some(caps) = generic_re.captures(query.trim()) {
+    //         let func_name = caps[1].to_string();
+    //         promsketch_store::promsketch_func_map(&func_name)?;
+    //         let metric = caps[2].to_string();
+    //         let range_seconds = Self::parse_duration_to_seconds(&caps[3], &caps[4])?;
+    //         debug!(
+    //             "Sketch query (regex/generic): parsed {} with metric={}, range={}s",
+    //             func_name, metric, range_seconds
+    //         );
+    //         return Some(SketchQueryComponents {
+    //             func_name,
+    //             metric,
+    //             range_seconds,
+    //             args: 0.0,
+    //         });
+    //     }
+
+    //     None
+    // }
+
+    // /// Convert a numeric value + unit suffix into seconds.
+    // fn parse_duration_to_seconds(value: &str, unit: &str) -> Option<u64> {
+    //     let n: u64 = value.parse().ok()?;
+    //     let multiplier = match unit {
+    //         "s" => 1,
+    //         "m" => 60,
+    //         "h" => 3600,
+    //         "d" => 86400,
+    //         _ => return None,
+    //     };
+    //     Some(n * multiplier)
+    // }
+
+    // /// Try to handle a PromQL query via the sketch shortcut path.
+    // /// Returns Some if the query is sketch-backed and PromSketchStore is available.
+    // /// Returns None to fall through to the precomputed pipeline.
+    // fn handle_sketch_query_promql(
+    //     &self,
+    //     query: &str,
+    //     time: f64,
+    // ) -> Option<(KeyByLabelNames, QueryResult)> {
+    //     let ps = self.promsketch_store.as_ref()?;
+
+    //     let components = match self.parse_sketch_query_components(query) {
+    //         Some(c) => c,
+    //         None => {
+    //             debug!(
+    //                 "Sketch query: could not parse sketch components from '{}'",
+    //                 query
+    //             );
+    //             return None;
+    //         }
+    //     };
+
+    //     let eval_start = Instant::now();
+
+    //     let query_time = Self::convert_query_time_to_data_time(time);
+    //     let end = query_time;
+    //     let start = end.saturating_sub(components.range_seconds * 1000);
+
+    //     debug!(
+    //         "Sketch query: evaluating {}({}) range=[{}, {}] args={}",
+    //         components.func_name, components.metric, start, end, components.args
+    //     );
+
+    //     let results = match ps.eval_matching(
+    //         &components.func_name,
+    //         &components.metric,
+    //         components.args,
+    //         start,
+    //         end,
+    //     ) {
+    //         Ok(r) => r,
+    //         Err(e) => {
+    //             warn!(
+    //                 "Sketch query: eval_matching failed for {}({}): {}",
+    //                 components.func_name, components.metric, e
+    //             );
+    //             ps_metrics::SKETCH_QUERIES_TOTAL
+    //                 .with_label_values(&["miss"])
+    //                 .inc();
+    //             return None;
+    //         }
+    //     };
+
+    //     if results.is_empty() {
+    //         debug!(
+    //             "Sketch query: no matching series with data for {}({}), falling through",
+    //             components.func_name, components.metric
+    //         );
+    //         ps_metrics::SKETCH_QUERIES_TOTAL
+    //             .with_label_values(&["miss"])
+    //             .inc();
+    //         return None;
+    //     }
+
+    //     ps_metrics::SKETCH_QUERIES_TOTAL
+    //         .with_label_values(&["hit"])
+    //         .inc();
+    //     ps_metrics::SKETCH_QUERY_DURATION.observe(eval_start.elapsed().as_secs_f64());
+
+    //     info!(
+    //         "Sketch query: {}({}) returned {} series results",
+    //         components.func_name,
+    //         components.metric,
+    //         results.len()
+    //     );
+
+    //     let elements: Vec<InstantVectorElement> = results
+    //         .into_iter()
+    //         .map(|(labels_str, value)| {
+    //             let labels = KeyByLabelValues::new_with_labels(vec![labels_str]);
+    //             InstantVectorElement::new(labels, value)
+    //         })
+    //         .collect();
+
+    //     let output_labels = KeyByLabelNames::new(vec!["__name__".to_string()]);
+    //     Some((output_labels, QueryResult::vector(elements, query_time)))
+    // }
+
+    pub fn handle_query_promql(
+        &self,
+        query: String,
+        time: f64,
+    ) -> Option<(KeyByLabelNames, QueryResult)> {
+        let query_start_time = Instant::now();
+        debug!("Handling query: {} at time {}", query, time);
+
+        let context = self.build_query_execution_context_promql(query, time)?;
+
+        debug!(
+            "Querying store for metric: {}, aggregation_id: {}, range: [{}, {}]",
+            context.metric,
+            context.agg_info.aggregation_id_for_value,
+            context.store_plan.values_query.start_timestamp,
+            context.store_plan.values_query.end_timestamp
+        );
+
+        // TODO: Make handle_query_promql (and handle_query) async and use .await directly
+        // instead of blocking. See execute_plan for the async implementation.
+        // Execute complete query pipeline
+        //let results = tokio::task::block_in_place(|| {
+        //    tokio::runtime::Handle::current().block_on(self.execute_plan(&context))
+        //})
+        let results = self
+            .execute_query_pipeline(&context, true) // PromQL: topk enabled
+            .map_err(|e| {
+                warn!("Query execution failed: {}", e);
+                e
+            })
+            .ok()?;
+
+        let result = Some((
+            context.metadata.query_output_labels,
+            QueryResult::vector(results, context.query_time),
+        ));
+
+        // Determine query routing order based on function type.
+        // USampling functions prefer the precomputed path first (sketch fallback),
+        // while EHUniv/EHKLL functions prefer the sketch path first.
+        // let prefer_precomputed = self
+        //     .extract_sketch_func_name(&query)
+        //     .is_some_and(|name| is_usampling_function(&name));
+
+        // if !prefer_precomputed {
+        //     // Non-USampling sketch functions: try sketch path first
+        //     if let Some(result) = self.handle_sketch_query_promql(&query, time) {
+        //         let total_query_duration = query_start_time.elapsed();
+        //         debug!(
+        //             "Sketch query handling took: {:.2}ms",
+        //             total_query_duration.as_secs_f64() * 1000.0
+        //         );
+        //         return Some(result);
+        //     }
+        // }
+
+        // // Precomputed pipeline
+        // let precomputed_result = (|| -> Option<(KeyByLabelNames, QueryResult)> {
+        //     let context = self.build_query_execution_context_promql(query.clone(), time)?;
+
+        //     debug!(
+        //         "Querying store for metric: {}, aggregation_id: {}, range: [{}, {}]",
+        //         context.metric,
+        //         context.agg_info.aggregation_id_for_value,
+        //         context.store_plan.values_query.start_timestamp,
+        //         context.store_plan.values_query.end_timestamp
+        //     );
+
+        //     let results = self
+        //         .execute_query_pipeline(&context, true) // PromQL: topk enabled
+        //         .map_err(|e| {
+        //             warn!("Query execution failed: {}", e);
+        //             e
+        //         })
+        //         .ok()?;
+
+        //     Some((
+        //         context.metadata.query_output_labels,
+        //         QueryResult::vector(results, context.query_time),
+        //     ))
+        // })();
+
+        // if precomputed_result.is_some() {
+        //     let total_query_duration = query_start_time.elapsed();
+        //     debug!(
+        //         "Total query handling took: {:.2}ms",
+        //         total_query_duration.as_secs_f64() * 1000.0
+        //     );
+        //     return precomputed_result;
+        // }
+
+        // // Fallback: USampling functions try sketch if precomputed had no data
+        // if prefer_precomputed {
+        //     if let Some(result) = self.handle_sketch_query_promql(&query, time) {
+        //         let total_query_duration = query_start_time.elapsed();
+        //         debug!(
+        //             "Sketch fallback query handling took: {:.2}ms",
+        //             total_query_duration.as_secs_f64() * 1000.0
+        //         );
+        //         return Some(result);
+        //     }
+        // }
+
+        let total_query_duration = query_start_time.elapsed();
+        debug!(
+            "Total query handling took: {:.2}ms (no results)",
+            total_query_duration.as_secs_f64() * 1000.0
+        );
+        result
+    }
+
+    pub fn build_query_execution_context_promql(
+        &self,
+        query: String,
+        time: f64,
+    ) -> Option<QueryExecutionContext> {
+        // Track query configuration processing latency
+        let config_start_time = Instant::now();
+
+        let query_config = self.find_query_config(&query)?;
+
+        let config_duration = config_start_time.elapsed();
+        debug!(
+            "[LATENCY] Query configuration processing: {:.2}ms",
+            config_duration.as_secs_f64() * 1000.0
+        );
+
+        let query_time = Self::convert_query_time_to_data_time(time);
+
+        // Parse PromQL AST using promql-parser crate
+        let parse_start_time = Instant::now();
+        let ast = match promql_parser::parser::parse(&query) {
+            Ok(ast) => {
+                let parse_duration = parse_start_time.elapsed();
+                debug!(
+                    "PromQL parsing took: {:.2}ms",
+                    parse_duration.as_secs_f64() * 1000.0
+                );
+                ast
+            }
+            Err(e) => {
+                warn!("Failed to parse PromQL query '{}': {}", query, e);
+                return None;
+            }
+        };
+
+        let pattern_match_start_time = Instant::now();
+
+        let mut found_match = None;
+        for (pattern_type, patterns) in &self.controller_patterns {
+            for pattern in patterns {
+                debug!(
+                    "Trying pattern type: {:?} for query: {}",
+                    pattern_type, query
+                );
+                let match_result = pattern.matches(&ast);
+                debug!("Match result: {:?}", match_result);
+                if match_result.matches {
+                    found_match = Some((*pattern_type, match_result));
+                    break;
+                }
+            }
+            if found_match.is_some() {
+                break;
+            }
+        }
+
+        let (query_pattern_type, match_result) = match found_match {
+            Some((pt, result)) => {
+                let pattern_match_duration = pattern_match_start_time.elapsed();
+                debug!(
+                    "Pattern matching took: {:.2}ms",
+                    pattern_match_duration.as_secs_f64() * 1000.0
+                );
+                (pt, result)
+            }
+            None => {
+                warn!("No matching pattern found for query: {}", query);
+                return None;
+            }
+        };
+
+        debug!("Found matching query config for: {}", query);
+
+        // Track query metadata setup latency
+        let query_metadata_start_time = Instant::now();
+
+        // Extract metric and spatial filter using AST-based approach
+        // SQL issue: table name and filter label names, return empty filter for now but compute later
+        let (metric, spatial_filter) = get_metric_and_spatial_filter(&match_result);
+
+        // Get all labels from inference config for this metric
+        let promql_schema = match &self.inference_config.schema {
+            SchemaConfig::PromQL(schema) => schema,
+            SchemaConfig::SQL(_) => {
+                warn!("PromQL query requested but config has SQL schema");
+                return None;
+            }
+            &SchemaConfig::ElasticQueryDSL => {
+                warn!("PromQL query requested but config has ElasticQueryDSL schema");
+                return None;
+            }
+            &SchemaConfig::ElasticSQL => {
+                warn!("PromQL query requested but config has ElasticSQL schema");
+                return None;
+            }
+        };
+        let all_labels = promql_schema
+            .get_labels(&metric)
+            .cloned()
+            .unwrap_or_else(|| {
+                warn!(
+                    "No metric configuration found for '{}', using empty labels",
+                    metric
+                );
+                panic!("No metric configuration found");
+            });
+
+        // Determine query output labels based on pattern type
+        // TODO: should we be returning this and using it to convert to final HTTP response?
+        let mut query_output_labels = match query_pattern_type {
+            QueryPatternType::OnlyTemporal => {
+                // For temporal-only queries, output all labels
+                all_labels.clone()
+            }
+            QueryPatternType::OnlySpatial => {
+                // Extract spatial aggregation output labels using AST-based approach
+                get_spatial_aggregation_output_labels(&match_result, &all_labels)
+            }
+            QueryPatternType::OneTemporalOneSpatial => {
+                // Extract spatial aggregation output labels for combined queries
+                let temporal_aggregation = match_result.get_function_name().unwrap();
+                let spatial_aggregation = match_result.get_aggregation_op().unwrap();
+                // iff temporal outer labels issubset of spatial inner labels, collapse
+                // SQL issue: take into account labels from the query, not needed at present because only uses promql translations
+                match get_is_collapsable(&temporal_aggregation, &spatial_aggregation) {
+                    false => all_labels.clone(),
+                    true => get_spatial_aggregation_output_labels(&match_result, &all_labels),
+                }
+            }
+        };
+
+        let timestamps =
+            self.calculate_query_timestamps_promql(query_time, query_pattern_type, &match_result);
+
+        // Extract statistics to compute using AST-based approach
+        let statistics_to_compute = get_statistics_to_compute(query_pattern_type, &match_result);
+        if statistics_to_compute.len() != 1 {
+            panic!(
+                "Expected exactly one statistic to compute, found {}",
+                statistics_to_compute.len()
+            );
+        }
+        let statistic_to_compute = statistics_to_compute.first().unwrap();
+
+        // For topk queries, prepend "__name__" to query_output_labels
+        if *statistic_to_compute == Statistic::Topk {
+            let mut new_labels = vec!["__name__".to_string()];
+            new_labels.extend(query_output_labels.labels);
+            query_output_labels = KeyByLabelNames::new(new_labels);
+        }
+
+        let query_kwargs = self
+            .build_query_kwargs_promql(statistic_to_compute, query_pattern_type, &match_result)
+            .map_err(|e| {
+                warn!("{}", e);
+                e
+            })
+            .ok()?;
+
+        let query_metadata_duration = query_metadata_start_time.elapsed();
+        debug!(
+            "[LATENCY] Query metadata calculation: {:.2}ms",
+            query_metadata_duration.as_secs_f64() * 1000.0
+        );
+
+        // Create query metadata
+        let metadata = QueryMetadata {
+            query_output_labels: query_output_labels.clone(),
+            statistic_to_compute: *statistic_to_compute,
+            query_kwargs: query_kwargs.clone(),
+        };
+
+        // Track aggregation configuration processing latency
+        let agg_config_start_time = Instant::now();
+
+        let agg_info = self.get_aggregation_id_info(query_config);
+
+        let agg_config_duration = agg_config_start_time.elapsed();
+        debug!(
+            "[LATENCY] Aggregation configuration processing: {:.2}ms",
+            agg_config_duration.as_secs_f64() * 1000.0
+        );
+
+        // Create query plan (determines window type and calculates timestamps)
+        let query_plan = self
+            .create_store_query_plan(&metric, &timestamps, &agg_info)
+            .map_err(|e| {
+                warn!("Failed to create store query plan: {}", e);
+                e
+            })
+            .ok()?;
+
+        // let window_type = if query_plan.values_query.is_exact_query {
+        //     "sliding"
+        // } else {
+        //     "tumbling"
+        // };
+
+        // Create execution context
+        // do_merge is true for temporal queries (OnlyTemporal or OneTemporalOneSpatial)
+        let do_merge = query_pattern_type == QueryPatternType::OnlyTemporal
+            || query_pattern_type == QueryPatternType::OneTemporalOneSpatial;
+
+        let grouping_labels = self
+            .streaming_config
+            .get_aggregation_config(agg_info.aggregation_id_for_value)
+            .map(|config| config.grouping_labels.clone())
+            .unwrap_or_else(|| query_output_labels.clone());
+
+        let aggregated_labels = self
+            .streaming_config
+            .get_aggregation_config(agg_info.aggregation_id_for_key)
+            .map(|config| config.aggregated_labels.clone())
+            .unwrap_or_else(KeyByLabelNames::empty);
+
+        Some(QueryExecutionContext {
+            metric: metric.clone(),
+            metadata,
+            store_plan: query_plan.clone(),
+            agg_info: agg_info.clone(),
+            do_merge,
+            spatial_filter,
+            query_time,
+            grouping_labels,
+            aggregated_labels,
+        })
+
+        // TODO: Handle spatial aggregation for OneTemporalOneSpatial when not collapsable
+    }
+
+    /// Merge precomputed outputs (extracts buckets from timestamped data)
+    fn merge_precomputed_outputs(
+        &self,
+        precomputed_outputs_map: &TimestampedBucketsMap,
+        do_merge: bool,
+        aggregation_type: String,
+    ) -> HashMap<Option<KeyByLabelValues>, Box<dyn crate::data_model::AggregateCore>> {
+        #[cfg(feature = "extra_debugging")]
+        let start_time = Instant::now();
+        #[cfg(feature = "extra_debugging")]
+        debug!("Starting merge for {} keys", precomputed_outputs_map.len());
+        #[cfg(feature = "extra_debugging")]
+        debug!(
+            "do_merge: {}, aggregation_type: {}",
+            do_merge, aggregation_type
+        );
+
+        // Merge if: temporal query OR DeltaSetAggregator (which accumulates keys over time)
+        let should_merge = do_merge || aggregation_type == "DeltaSetAggregator";
+
+        let mut merged = HashMap::with_capacity(precomputed_outputs_map.len());
+
+        for (idx, (key, timestamped_buckets)) in precomputed_outputs_map.iter().enumerate() {
+            #[cfg(feature = "extra_debugging")]
+            debug!(
+                "Processing key {} of {}: {:?}",
+                idx + 1,
+                precomputed_outputs_map.len(),
+                key
+            );
+            #[cfg(feature = "extra_debugging")]
+            debug!(
+                "  Number of precomputes for this key: {}",
+                timestamped_buckets.len()
+            );
+
+            if !timestamped_buckets.is_empty() {
+                // Extract just the buckets (without timestamps) for merging
+                let precomputes: Vec<Box<dyn AggregateCore>> = timestamped_buckets
+                    .iter()
+                    .map(|(_, bucket)| bucket.clone_boxed_core())
+                    .collect();
+
+                if should_merge {
+                    #[cfg(feature = "extra_debugging")]
+                    debug!("  Merging accumulators (should_merge=true)");
+                    #[cfg(feature = "extra_debugging")]
+                    let merge_start = Instant::now();
+                    let merged_accumulator = self.merge_accumulators(&precomputes);
+                    #[cfg(feature = "extra_debugging")]
+                    let merge_duration = merge_start.elapsed();
+                    #[cfg(feature = "extra_debugging")]
+                    debug!(
+                        "  Merge completed in {:.2}ms, result type: {}",
+                        merge_duration.as_secs_f64() * 1000.0,
+                        merged_accumulator.get_accumulator_type()
+                    );
+                    merged.insert(key.clone(), merged_accumulator);
+                } else {
+                    assert_eq!(
+                        precomputes.len(),
+                        1,
+                        "Spatial queries should have exactly 1 precompute per key"
+                    );
+                    merged.insert(key.clone(), precomputes[0].clone_boxed_core());
+                }
+            }
+        }
+
+        #[cfg(feature = "extra_debugging")]
+        let total_duration = start_time.elapsed();
+        #[cfg(feature = "extra_debugging")]
+        debug!(
+            "[LATENCY] Complete merge operation: {:.2}ms, merged {} keys",
+            total_duration.as_secs_f64() * 1000.0,
+            merged.len()
+        );
+
+        merged
+    }
+
+    /// Merge multiple accumulators using the merge_with method from AggregateCore trait
+    /// This follows the Python merge_accumulators approach
+    fn merge_accumulators(
+        &self,
+        accumulators: &[Box<dyn crate::data_model::AggregateCore>],
+    ) -> Box<dyn crate::data_model::AggregateCore> {
+        if accumulators.is_empty() {
+            panic!("No accumulators to merge");
+        }
+
+        if accumulators.len() == 1 {
+            return accumulators[0].clone_boxed_core();
+        }
+
+        // Try to use optimized batch merge for KLL accumulators
+        if !accumulators.is_empty()
+            && accumulators[0].get_accumulator_type() == "DatasketchesKLLAccumulator"
+        {
+            use crate::precompute_operators::datasketches_kll_accumulator::DatasketchesKLLAccumulator;
+
+            match DatasketchesKLLAccumulator::merge_multiple(accumulators) {
+                Ok(merged) => return Box::new(merged),
+                Err(e) => {
+                    warn!(
+                        "Batch merge failed: {}. Falling back to sequential merge.",
+                        e
+                    );
+                    // Fall through to sequential merge below
+                }
+            }
+        }
+
+        // Try to use optimized batch merge for CountMinSketch accumulators
+        if !accumulators.is_empty()
+            && accumulators[0].get_accumulator_type() == "CountMinSketchAccumulator"
+        {
+            use crate::precompute_operators::count_min_sketch_accumulator::CountMinSketchAccumulator;
+
+            match CountMinSketchAccumulator::merge_multiple(accumulators) {
+                Ok(merged) => return Box::new(merged),
+                Err(e) => {
+                    warn!(
+                        "Batch merge failed: {}. Falling back to sequential merge.",
+                        e
+                    );
+                    // Fall through to sequential merge below
+                }
+            }
+        }
+
+        // Fallback: sequential merge for other accumulator types
+        // (Still benefits from Phase 1 optimization of merge_with)
+        let mut result = accumulators[0].clone_boxed_core();
+
+        for accumulator in &accumulators[1..] {
+            match result.merge_with(accumulator.as_ref()) {
+                Ok(merged) => {
+                    result = merged;
+                }
+                Err(e) => {
+                    warn!("Failed to merge accumulator: {}. Using existing result.", e);
+                    // Continue with the current result if merge fails
+                }
+            }
+        }
+
+        result
+    }
+
+    /// Collects results when key and value use different aggregations
+    fn collect_results_separate_keys(
+        &self,
+        merged_values: &HashMap<Option<KeyByLabelValues>, Box<dyn AggregateCore>>,
+        merged_keys: &HashMap<Option<KeyByLabelValues>, Box<dyn AggregateCore>>,
+        statistic: &Statistic,
+        query_kwargs: &HashMap<String, String>,
+    ) -> Result<HashMap<Option<KeyByLabelValues>, f64>, String> {
+        let mut unformatted_results = HashMap::new();
+
+        for (key, precompute) in merged_keys {
+            let keys_for_this_precompute = precompute
+                .get_keys()
+                .ok_or_else(|| "Keys required for separate aggregation".to_string())?;
+
+            for key_for_this_precompute in keys_for_this_precompute {
+                let value_precompute = merged_values
+                    .get(key)
+                    .ok_or_else(|| format!("No value for key: {:?}", key))?;
+
+                let value = self
+                    .query_precompute_for_statistic(
+                        value_precompute.as_ref(),
+                        statistic,
+                        &Some(key_for_this_precompute.clone()),
+                        query_kwargs,
+                    )
+                    .map_err(|e| format!("Query failed: {}", e))?;
+
+                unformatted_results.insert(Some(key_for_this_precompute.clone()), value);
+            }
+        }
+
+        Ok(unformatted_results)
+    }
+
+    /// Collects results when key and value use same aggregation
+    fn collect_results_same_aggregation(
+        &self,
+        merged_outputs: &HashMap<Option<KeyByLabelValues>, Box<dyn AggregateCore>>,
+        statistic: &Statistic,
+        query_kwargs: &HashMap<String, String>,
+        enable_topk_limiting: bool,
+    ) -> Result<HashMap<Option<KeyByLabelValues>, f64>, String> {
+        let mut unformatted_results = HashMap::new();
+
+        for (key, precompute) in merged_outputs {
+            if let Some(unwrapped_keys) = precompute.get_keys() {
+                let keys_to_process = if enable_topk_limiting {
+                    self.limit_keys_for_topk(unwrapped_keys, statistic, query_kwargs)?
+                } else {
+                    unwrapped_keys
+                };
+
+                for key_for_this_precompute in keys_to_process {
+                    let value = self
+                        .query_precompute_for_statistic(
+                            precompute.as_ref(),
+                            statistic,
+                            &Some(key_for_this_precompute.clone()),
+                            query_kwargs,
+                        )
+                        .map_err(|e| format!("Query failed: {}", e))?;
+
+                    unformatted_results.insert(Some(key_for_this_precompute.clone()), value);
+                }
+            } else {
+                let value = self
+                    .query_precompute_for_statistic(
+                        precompute.as_ref(),
+                        statistic,
+                        &None,
+                        query_kwargs,
+                    )
+                    .map_err(|e| format!("Query failed: {}", e))?;
+
+                unformatted_results.insert(key.clone(), value);
+            }
+        }
+
+        Ok(unformatted_results)
+    }
+
+    /// Limits keys for topk queries
+    fn limit_keys_for_topk(
+        &self,
+        keys: Vec<KeyByLabelValues>,
+        statistic: &Statistic,
+        query_kwargs: &HashMap<String, String>,
+    ) -> Result<Vec<KeyByLabelValues>, String> {
+        if *statistic != Statistic::Topk {
+            return Ok(keys);
+        }
+
+        let k_str = query_kwargs
+            .get("k")
+            .ok_or_else(|| "Missing k parameter for topk".to_string())?;
+
+        let k = k_str
+            .parse::<usize>()
+            .map_err(|_| format!("Failed to parse k: '{}'", k_str))?;
+
+        Ok(keys.into_iter().take(k).collect())
+    }
+
+    /// Query a precompute for a specific statistic
+    /// This follows the Python approach where precompute.query(statistic, key) is called
+    fn query_precompute_for_statistic(
+        &self,
+        precompute: &dyn AggregateCore,
+        statistic: &Statistic,
+        key: &Option<KeyByLabelValues>,
+        query_kwargs: &HashMap<String, String>,
+    ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+        // Handle different accumulator types and statistics using the trait methods
+        // TODO: change this logic to just check Single vs MultipleSubpopulationAggregate
+        match precompute.get_accumulator_type() {
+            "SumAccumulator" => {
+                if let Some(sum_acc) = precompute.as_any().downcast_ref::<crate::precompute_operators::sum_accumulator::SumAccumulator>() {
+                    use crate::data_model::SingleSubpopulationAggregate;
+                    sum_acc.query(*statistic, None)
+                } else {
+                    Err("Failed to downcast to SumAccumulator".into())
+                }
+            }
+            "MinMaxAccumulator" => {
+                if let Some(minmax_acc) = precompute.as_any().downcast_ref::<crate::precompute_operators::min_max_accumulator::MinMaxAccumulator>() {
+                    use crate::data_model::SingleSubpopulationAggregate;
+                    minmax_acc.query(*statistic, None)
+                } else {
+                    Err("Failed to downcast to MinMaxAccumulator".into())
+                }
+            }
+            "IncreaseAccumulator" => {
+                if let Some(inc_acc) = precompute.as_any().downcast_ref::<crate::precompute_operators::increase_accumulator::IncreaseAccumulator>() {
+                    use crate::data_model::SingleSubpopulationAggregate;
+                    inc_acc.query(*statistic, None)
+                } else {
+                    Err("Failed to downcast to IncreaseAccumulator".into())
+                }
+            }
+            "MultipleSumAccumulator" => {
+                if let Some(multi_sum_acc) = precompute.as_any().downcast_ref::<crate::precompute_operators::multiple_sum_accumulator::MultipleSumAccumulator>() {
+                    if let Some(key_val) = key {
+                        use crate::data_model::MultipleSubpopulationAggregate;
+                        multi_sum_acc.query(*statistic, key_val, Some(query_kwargs))
+                    } else {
+                        Err("Key required for MultipleSumAccumulator".into())
+                    }
+                } else {
+                    Err("Failed to downcast to MultipleSumAccumulator".into())
+                }
+            }
+            "MultipleMinMaxAccumulator" => {
+                if let Some(multi_minmax_acc) = precompute.as_any().downcast_ref::<crate::precompute_operators::multiple_min_max_accumulator::MultipleMinMaxAccumulator>() {
+                    if let Some(key_val) = key {
+                        use crate::data_model::MultipleSubpopulationAggregate;
+                        multi_minmax_acc.query(*statistic, key_val, Some(query_kwargs))
+                    } else {
+                        Err("Key required for MultipleMinMaxAccumulator".into())
+                    }
+                } else {
+                    Err("Failed to downcast to MultipleMinMaxAccumulator".into())
+                }
+            }
+            "MultipleIncreaseAccumulator" => {
+                if let Some(multi_inc_acc) = precompute.as_any().downcast_ref::<crate::precompute_operators::multiple_increase_accumulator::MultipleIncreaseAccumulator>() {
+                    if let Some(key_val) = key {
+                        use crate::data_model::MultipleSubpopulationAggregate;
+                        multi_inc_acc.query(*statistic, key_val, Some(query_kwargs))
+                    } else {
+                        Err("Key required for MultipleIncreaseAccumulator".into())
+                    }
+                } else {
+                    Err("Failed to downcast to MultipleIncreaseAccumulator".into())
+                }
+            }
+            "CountMinSketchAccumulator" => {
+                if let Some(cms_acc) = precompute.as_any().downcast_ref::<crate::precompute_operators::count_min_sketch_accumulator::CountMinSketchAccumulator>() {
+                    use crate::data_model::MultipleSubpopulationAggregate;
+                    if let Some(key_val) = key {
+                        cms_acc.query(*statistic, key_val, Some(query_kwargs))
+                    } else {
+                        Err("Key required for CountMinSketchAccumulator".into())
+                    }
+                } else {
+                    Err("Failed to downcast to CountMinSketchAccumulator".into())
+                }
+            }
+            "CountMinSketchWithHeapAccumulator" => {
+                if let Some(cms_heap_acc) = precompute.as_any().downcast_ref::<crate::precompute_operators::count_min_sketch_with_heap_accumulator::CountMinSketchWithHeapAccumulator>() {
+                    use crate::data_model::MultipleSubpopulationAggregate;
+                    if let Some(key_val) = key {
+                        cms_heap_acc.query(*statistic, key_val, Some(query_kwargs))
+                    } else {
+                        Err("Key required for CountMinSketchWithHeapAccumulator".into())
+                    }
+                } else {
+                    Err("Failed to downcast to CountMinSketchWithHeapAccumulator".into())
+                }
+            }
+            "DatasketchesKLLAccumulator" => {
+                if let Some(kll_acc) = precompute.as_any().downcast_ref::<crate::precompute_operators::datasketches_kll_accumulator::DatasketchesKLLAccumulator>() {
+                    use crate::data_model::SingleSubpopulationAggregate;
+                    kll_acc.query(*statistic, Some(query_kwargs))
+                } else {
+                    Err("Failed to downcast to DatasketchesKLLAccumulator".into())
+                }
+            }
+            "HydraKllSketchAccumulator" => {
+                if let Some(hydra_kll_acc) = precompute.as_any()
+                    .downcast_ref::<crate::precompute_operators::hydra_kll_accumulator::HydraKllSketchAccumulator>()
+                {
+                    if let Some(key_val) = key {
+                        use crate::data_model::MultipleSubpopulationAggregate;
+                        hydra_kll_acc.query(*statistic, key_val, Some(query_kwargs))
+                    } else {
+                        Err("Key required for HydraKllSketchAccumulator".into())
+                    }
+                } else {
+                    Err("Failed to downcast to HydraKllSketchAccumulator".into())
+                }
+            }
+            "DeltaSetAggregatorAccumulator" => {
+                if let Some(delta_acc) = precompute.as_any().downcast_ref::<crate::precompute_operators::delta_set_aggregator_accumulator::DeltaSetAggregatorAccumulator>() {
+                    if let Some(key_val) = key {
+                        use crate::data_model::MultipleSubpopulationAggregate;
+                        delta_acc.query(*statistic, key_val, Some(query_kwargs))
+                    } else {
+                        // For DeltaSetAggregatorAccumulator without a key, return the union size
+                        Ok((delta_acc.added.union(&delta_acc.removed).count()) as f64)
+                    }
+                } else {
+                    Err("Failed to downcast to DeltaSetAggregatorAccumulator".into())
+                }
+            }
+            "SetAggregatorAccumulator" => {
+                if let Some(set_acc) = precompute.as_any().downcast_ref::<crate::precompute_operators::set_aggregator_accumulator::SetAggregatorAccumulator>() {
+                    if let Some(key_val) = key {
+                        use crate::data_model::MultipleSubpopulationAggregate;
+                        set_acc.query(*statistic, key_val, Some(query_kwargs))
+                    } else {
+                        // For SetAggregatorAccumulator without a key, return the set size
+                        Ok(set_acc.added.len() as f64)
+                    }
+                } else {
+                    Err("Failed to downcast to SetAggregatorAccumulator".into())
+                }
+            }
+            _ => {
+                Err(format!("Unknown accumulator type: {}", precompute.get_accumulator_type()).into())
+            }
+        }
+    }
+
+    // ============================================================
+    // Range Query Support
+    // ============================================================
+
+    /// Validate range query parameters
+    fn validate_range_query_params(
+        &self,
+        start: u64,
+        end: u64,
+        step: u64,
+        tumbling_window_ms: u64,
+    ) -> Result<(), String> {
+        if start >= end {
+            return Err("start must be before end".to_string());
+        }
+        if step == 0 {
+            return Err("step must be positive".to_string());
+        }
+        if !step.is_multiple_of(tumbling_window_ms) {
+            return Err(format!(
+                "step ({} ms) must be a multiple of tumbling window size ({} ms)",
+                step, tumbling_window_ms
+            ));
+        }
+        Ok(())
+    }
+
+    /// Build execution context for range query
+    pub fn build_range_query_execution_context_promql(
+        &self,
+        query: String,
+        start: f64,
+        end: f64,
+        step: f64,
+    ) -> Option<RangeQueryExecutionContext> {
+        // First, build the base instant query context (reuse existing logic)
+        // Use 'end' as the reference time for parsing
+        let base_context = self.build_query_execution_context_promql(query, end)?;
+
+        // Convert to milliseconds
+        let start_ms = Self::convert_query_time_to_data_time(start);
+        let end_ms = Self::convert_query_time_to_data_time(end);
+        let step_ms = (step * 1000.0) as u64;
+
+        // Get tumbling window size
+        let tumbling_window_ms = self
+            .streaming_config
+            .get_aggregation_config(base_context.agg_info.aggregation_id_for_value)
+            .map(|config| config.tumbling_window_size * 1000)?;
+
+        // Validate parameters
+        self.validate_range_query_params(start_ms, end_ms, step_ms, tumbling_window_ms)
+            .map_err(|e| {
+                warn!("Range query validation failed: {}", e);
+                e
+            })
+            .ok()?;
+
+        // Calculate lookback from the base context's store plan
+        let lookback_ms = base_context.store_plan.values_query.end_timestamp
+            - base_context.store_plan.values_query.start_timestamp;
+
+        let buckets_per_step = (step_ms / tumbling_window_ms) as usize;
+        let lookback_bucket_count = (lookback_ms / tumbling_window_ms) as usize;
+
+        // Modify the store plan to cover the entire range
+        let mut extended_store_plan = base_context.store_plan.clone();
+        extended_store_plan.values_query.start_timestamp = start_ms.saturating_sub(lookback_ms);
+        extended_store_plan.values_query.end_timestamp = end_ms;
+        // Range queries always use range fetch, not exact
+        extended_store_plan.values_query.is_exact_query = false;
+
+        Some(RangeQueryExecutionContext {
+            base: QueryExecutionContext {
+                store_plan: extended_store_plan,
+                ..base_context
+            },
+            range_params: RangeQueryParams {
+                start: start_ms,
+                end: end_ms,
+                step: step_ms,
+            },
+            buckets_per_step,
+            lookback_bucket_count,
+            tumbling_window_ms,
+        })
+    }
+
+    // /// Try to handle a PromQL range query via the sketch shortcut path.
+    // /// Returns Some if the query is sketch-backed and PromSketchStore is available.
+    // /// Returns None to fall through to the precomputed pipeline.
+    // fn handle_sketch_range_query_promql(
+    //     &self,
+    //     query: &str,
+    //     start: f64,
+    //     end: f64,
+    //     step: f64,
+    // ) -> Option<(KeyByLabelNames, QueryResult)> {
+    //     let ps = self.promsketch_store.as_ref()?;
+
+    //     let components = match self.parse_sketch_query_components(query) {
+    //         Some(c) => c,
+    //         None => {
+    //             debug!(
+    //                 "Sketch range query: could not parse sketch components from '{}'",
+    //                 query
+    //             );
+    //             return None;
+    //         }
+    //     };
+
+    //     let eval_start = Instant::now();
+    //     let range_ms = components.range_seconds * 1000;
+
+    //     // Convert query params to ms
+    //     let start_ms = Self::convert_query_time_to_data_time(start);
+    //     let end_ms = Self::convert_query_time_to_data_time(end);
+    //     let step_ms = (step * 1000.0) as u64;
+
+    //     if step_ms == 0 || start_ms >= end_ms {
+    //         warn!(
+    //             "Sketch range query: invalid params step_ms={}, start_ms={}, end_ms={}",
+    //             step_ms, start_ms, end_ms
+    //         );
+    //         return None;
+    //     }
+
+    //     // Get all matching series labels
+    //     let series_labels = ps.matching_series_labels(&components.metric);
+    //     if series_labels.is_empty() {
+    //         debug!(
+    //             "Sketch range query: no matching series for {}, falling through",
+    //             components.metric
+    //         );
+    //         return None;
+    //     }
+
+    //     info!(
+    //         "Sketch range query: {}({}) over [{}, {}] step {} with {} series",
+    //         components.func_name,
+    //         components.metric,
+    //         start_ms,
+    //         end_ms,
+    //         step_ms,
+    //         series_labels.len()
+    //     );
+
+    //     // For each matching series, iterate over time steps
+    //     let mut range_elements: Vec<RangeVectorElement> = Vec::new();
+
+    //     for series_label in &series_labels {
+    //         let labels = KeyByLabelValues::new_with_labels(vec![series_label.clone()]);
+    //         let mut element = RangeVectorElement::new(labels);
+
+    //         let mut current_time = start_ms;
+    //         while current_time <= end_ms {
+    //             let step_end = current_time;
+    //             let step_start = step_end.saturating_sub(range_ms);
+
+    //             match ps.eval(
+    //                 &components.func_name,
+    //                 series_label,
+    //                 components.args,
+    //                 step_start,
+    //                 step_end,
+    //             ) {
+    //                 Ok(value) => element.add_sample(current_time, value),
+    //                 Err(e) => {
+    //                     debug!(
+    //                         "Sketch range query: eval failed for {} at t={}: {}",
+    //                         series_label, current_time, e
+    //                     );
+    //                 }
+    //             }
+
+    //             current_time += step_ms;
+    //         }
+
+    //         if !element.samples.is_empty() {
+    //             range_elements.push(element);
+    //         }
+    //     }
+
+    //     if range_elements.is_empty() {
+    //         debug!(
+    //             "Sketch range query: all series produced empty results for {}({})",
+    //             components.func_name, components.metric
+    //         );
+    //         ps_metrics::SKETCH_QUERIES_TOTAL
+    //             .with_label_values(&["miss"])
+    //             .inc();
+    //         return None;
+    //     }
+
+    //     ps_metrics::SKETCH_QUERIES_TOTAL
+    //         .with_label_values(&["hit"])
+    //         .inc();
+    //     ps_metrics::SKETCH_QUERY_DURATION.observe(eval_start.elapsed().as_secs_f64());
+
+    //     let output_labels = KeyByLabelNames::new(vec!["__name__".to_string()]);
+    //     Some((output_labels, QueryResult::matrix(range_elements)))
+    // }
+
+    /// Main entry point for range queries
+    pub fn handle_range_query_promql(
+        &self,
+        query: String,
+        start: f64,
+        end: f64,
+        step: f64,
+    ) -> Option<(KeyByLabelNames, QueryResult)> {
+        let query_start_time = Instant::now();
+        debug!(
+            "Handling range query: {} from {} to {} step {}",
+            query, start, end, step
+        );
+
+        let context = self.build_range_query_execution_context_promql(query, start, end, step)?;
+
+        // Execute range query pipeline
+        let results: Vec<RangeVectorElement> = self
+            .execute_range_query_pipeline(&context)
+            .map_err(|e| {
+                warn!("Range query execution failed: {}", e);
+                e
+            })
+            .ok()?;
+
+        // // Determine query routing order based on function type.
+        // // USampling functions prefer the precomputed path first (sketch fallback),
+        // // while EHUniv/EHKLL functions prefer the sketch path first.
+        // let prefer_precomputed = self
+        //     .extract_sketch_func_name(&query)
+        //     .is_some_and(|name| is_usampling_function(&name));
+
+        // if !prefer_precomputed {
+        //     // Non-USampling sketch functions: try sketch path first
+        //     if let Some(result) = self.handle_sketch_range_query_promql(&query, start, end, step) {
+        //         let total_duration = query_start_time.elapsed();
+        //         debug!(
+        //             "Sketch range query handling took: {:.2}ms",
+        //             total_duration.as_secs_f64() * 1000.0
+        //         );
+        //         return Some(result);
+        //     }
+        // }
+
+        // // Precomputed pipeline
+        // let precomputed_result = (|| -> Option<(KeyByLabelNames, QueryResult)> {
+        //     let context =
+        //         self.build_range_query_execution_context_promql(query.clone(), start, end, step)?;
+
+        //     let results: Vec<RangeVectorElement> = self
+        //         .execute_range_query_pipeline(&context)
+        //         .map_err(|e| {
+        //             warn!("Range query execution failed: {}", e);
+        //             e
+        //         })
+        //         .ok()?;
+
+        //     Some((
+        //         context.base.metadata.query_output_labels,
+        //         QueryResult::matrix(results),
+        //     ))
+        // })();
+
+        // // Fallback: USampling functions try sketch if precomputed had no data
+        // if prefer_precomputed {
+        //     if let Some(result) = self.handle_sketch_range_query_promql(&query, start, end, step) {
+        //         let total_duration = query_start_time.elapsed();
+        //         debug!(
+        //             "Sketch fallback range query handling took: {:.2}ms",
+        //             total_duration.as_secs_f64() * 1000.0
+        //         );
+        //         return Some(result);
+        //     }
+        // }
+
+        let total_duration = query_start_time.elapsed();
+        debug!(
+            "Total range query handling took: {:.2}ms",
+            total_duration.as_secs_f64() * 1000.0
+        );
+
+        Some((
+            context.base.metadata.query_output_labels,
+            QueryResult::matrix(results),
+        ))
+    }
+
+    /// Execute the range query pipeline
+    fn execute_range_query_pipeline(
+        &self,
+        context: &RangeQueryExecutionContext,
+    ) -> Result<Vec<crate::engines::query_result::RangeVectorElement>, String> {
+        use crate::engines::query_result::RangeVectorElement;
+        use crate::engines::window_merger::create_window_merger;
+
+        // Step 1: Fetch all data needed for the entire range
+        let all_data = self.execute_store_query(&context.base.store_plan.values_query)?;
+
+        if all_data.is_empty() {
+            return Err(format!("No data found for metric: {}", context.base.metric));
+        }
+
+        debug!(
+            "Range query: fetched {} keys, {} total buckets",
+            all_data.len(),
+            all_data.values().map(|v| v.len()).sum::<usize>()
+        );
+
+        let mut results: HashMap<KeyByLabelValues, RangeVectorElement> = HashMap::new();
+
+        // Determine accumulator type for merger selection
+        let accumulator_type = &context.base.agg_info.aggregation_type_for_value;
+
+        // Calculate step parameters
+        let step_ms = context.range_params.step;
+        let start_ms = context.range_params.start;
+        let end_ms = context.range_params.end;
+        let buckets_per_step = context.buckets_per_step;
+        let lookback_bucket_count = context.lookback_bucket_count;
+
+        let window_mode = if buckets_per_step <= lookback_bucket_count {
+            "sliding (slide <= size)"
+        } else {
+            "hopping (slide > size)"
+        };
+        debug!(
+            "Range query params: start={}, end={}, step_ms={}, tumbling_window_ms={}, \
+             buckets_per_step (slide)={}, lookback_bucket_count (size)={}, mode={}",
+            start_ms,
+            end_ms,
+            step_ms,
+            context.tumbling_window_ms,
+            buckets_per_step,
+            lookback_bucket_count,
+            window_mode
+        );
+
+        // Process each key independently
+        for (key_opt, timestamped_buckets) in &all_data {
+            let key = match key_opt {
+                Some(k) => k.clone(),
+                None => continue, // Skip None keys for now
+            };
+
+            // Build lookup: bucket_start_timestamp -> bucket for O(1) access
+            let bucket_map: HashMap<u64, &Box<dyn AggregateCore>> = timestamped_buckets
+                .iter()
+                .map(|((start, _), bucket)| (*start, bucket))
+                .collect();
+
+            debug!(
+                "Key {:?}: built bucket_map with {} entries, timestamps: {:?}",
+                key,
+                bucket_map.len(),
+                bucket_map.keys().collect::<Vec<_>>()
+            );
+
+            // Create result element for this key
+            let mut element = RangeVectorElement::new(key.clone());
+
+            // Calculate window parameters
+            let tumbling_window_ms = context.tumbling_window_ms;
+            let lookback_ms = (lookback_bucket_count as u64) * tumbling_window_ms;
+
+            debug!(
+                "Key {:?}: range [{}, {}], step={}, lookback_ms={}, tumbling_window_ms={}",
+                key, start_ms, end_ms, step_ms, lookback_ms, tumbling_window_ms
+            );
+
+            // Iterate by OUTPUT timestamp, not by bucket index
+            let mut current_time = start_ms;
+            while current_time <= end_ms {
+                // Window covers [current_time - lookback_ms, current_time)
+                // This means we look at buckets that START within this range
+                let window_start = current_time.saturating_sub(lookback_ms);
+
+                // Collect all AVAILABLE buckets in this window (skip missing ones)
+                let mut window_buckets: Vec<Box<dyn AggregateCore>> = Vec::new();
+
+                let mut t = window_start;
+                while t < current_time {
+                    if let Some(bucket) = bucket_map.get(&t) {
+                        window_buckets.push((*bucket).clone_boxed_core());
+                    }
+                    // If bucket missing at timestamp t, just skip it (partial data is okay)
+                    t += tumbling_window_ms;
+                }
+
+                if !window_buckets.is_empty() {
+                    // Merge available buckets
+                    let mut merger = create_window_merger(accumulator_type);
+                    merger.initialize(window_buckets);
+
+                    match merger.get_merged() {
+                        Ok(merged) => {
+                            // Query statistic and emit sample at current_time
+                            match self.query_precompute_for_statistic(
+                                merged.as_ref(),
+                                &context.base.metadata.statistic_to_compute,
+                                &Some(key.clone()),
+                                &context.base.metadata.query_kwargs,
+                            ) {
+                                Ok(value) => {
+                                    debug!(
+                                        "Key {:?}: emitting sample (t={}, value={})",
+                                        key, current_time, value
+                                    );
+                                    element.add_sample(current_time, value);
+                                }
+                                Err(e) => {
+                                    debug!(
+                                        "Failed to query statistic at t={} for key {:?}: {}",
+                                        current_time, key, e
+                                    );
+                                }
+                            }
+                        }
+                        Err(e) => {
+                            debug!(
+                                "Failed to get merged result at t={} for key {:?}: {}",
+                                current_time, key, e
+                            );
+                        }
+                    }
+                } else {
+                    // No data at all for this window - skip sample
+                    debug!(
+                        "Key {:?}: skipping sample at {} - no data in window [{}, {})",
+                        key, current_time, window_start, current_time
+                    );
+                }
+
+                current_time += step_ms;
+            }
+
+            debug!(
+                "Key {:?}: finished with {} samples",
+                key,
+                element.samples.len()
+            );
+
+            // Only include keys with samples
+            if !element.samples.is_empty() {
+                results.insert(key, element);
+            }
+        }
+
+        // Convert to Vec
+        Ok(results.into_values().collect())
+    }
+}
+
+#[cfg(test)]
+mod range_query_tests {
+    use crate::data_model::{AggregateCore, KeyByLabelValues, SerializableToSink};
+    use crate::engines::window_merger::NaiveMerger;
+    use serde_json::Value;
+    use std::any::Any;
+
+    /// Mock accumulator that stores a unique ID to detect stale window reuse
+    #[derive(Clone, Debug)]
+    struct MockBucketAccumulator {
+        bucket_id: u64,
+        value: f64,
+    }
+
+    impl MockBucketAccumulator {
+        fn new(bucket_id: u64, value: f64) -> Self {
+            Self { bucket_id, value }
+        }
+    }
+
+    impl SerializableToSink for MockBucketAccumulator {
+        fn serialize_to_json(&self) -> Value {
+            serde_json::json!({"bucket_id": self.bucket_id, "value": self.value})
+        }
+
+        fn serialize_to_bytes(&self) -> Vec<u8> {
+            format!("{}:{}", self.bucket_id, self.value).into_bytes()
+        }
+    }
+
+    impl AggregateCore for MockBucketAccumulator {
+        fn clone_boxed_core(&self) -> Box<dyn AggregateCore> {
+            Box::new(self.clone())
+        }
+
+        fn type_name(&self) -> &'static str {
+            "MockBucketAccumulator"
+        }
+
+        fn as_any(&self) -> &dyn Any {
+            self
+        }
+
+        fn merge_with(
+            &self,
+            other: &dyn AggregateCore,
+        ) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error + Send + Sync>> {
+            if let Some(other_mock) = other.as_any().downcast_ref::<MockBucketAccumulator>() {
+                // Sum values, keep max bucket_id to track which buckets are in window
+                Ok(Box::new(MockBucketAccumulator::new(
+                    self.bucket_id.max(other_mock.bucket_id),
+                    self.value + other_mock.value,
+                )))
+            } else {
+                Err("Cannot merge with different accumulator type".into())
+            }
+        }
+
+        fn get_accumulator_type(&self) -> &'static str {
+            "MockBucketAccumulator"
+        }
+
+        fn get_keys(&self) -> Option<Vec<KeyByLabelValues>> {
+            None
+        }
+    }
+
+    /// Simulates the sliding window loop from execute_range_query_pipeline
+    /// Returns: Vec of (timestamp, merged_value, max_bucket_id_in_window)
+    fn simulate_sliding_window(
+        buckets: Vec<Box<dyn AggregateCore>>,
+        lookback_bucket_count: usize,
+        buckets_per_step: usize,
+        start_ms: u64,
+        end_ms: u64,
+        step_ms: u64,
+    ) -> Vec<(u64, f64, u64)> {
+        use crate::engines::window_merger::WindowMerger;
+
+        let mut results = Vec::new();
+
+        if buckets.len() < lookback_bucket_count {
+            return results;
+        }
+
+        let mut merger = NaiveMerger::new();
+
+        // Initialize with first window
+        let initial_window: Vec<_> = buckets[0..lookback_bucket_count]
+            .iter()
+            .map(|b| b.clone_boxed_core())
+            .collect();
+        merger.initialize(initial_window);
+
+        let mut bucket_index = lookback_bucket_count;
+        let mut current_time = start_ms;
+
+        while current_time <= end_ms {
+            // Query current window
+            if let Ok(merged) = merger.get_merged() {
+                if let Some(mock) = merged.as_any().downcast_ref::<MockBucketAccumulator>() {
+                    results.push((current_time, mock.value, mock.bucket_id));
+                }
+            }
+
+            // Slide window for next step
+            current_time += step_ms;
+
+            if current_time <= end_ms {
+                if bucket_index + buckets_per_step <= buckets.len() {
+                    let new_buckets: Vec<_> = buckets
+                        [bucket_index..bucket_index + buckets_per_step]
+                        .iter()
+                        .map(|b| b.clone_boxed_core())
+                        .collect();
+                    merger.slide(buckets_per_step, new_buckets);
+                    bucket_index += buckets_per_step;
+                } else {
+                    // Not enough buckets to continue - stop to avoid stale data
+                    break;
+                }
+            }
+        }
+
+        results
+    }
+
+    /// Simulates sliding window with proper timestamp alignment for missing data.
+    /// This accounts for the scenario where the store returns fewer buckets than
+    /// expected because data is missing at the start of the query range.
+    ///
+    /// # Arguments
+    /// * `expected_bucket_count` - How many buckets we would have if data was complete
+    fn simulate_sliding_window_with_alignment(
+        buckets: Vec<Box<dyn AggregateCore>>,
+        lookback_bucket_count: usize,
+        buckets_per_step: usize,
+        start_ms: u64,
+        end_ms: u64,
+        step_ms: u64,
+        expected_bucket_count: usize,
+    ) -> Vec<(u64, f64, u64)> {
+        use crate::engines::window_merger::WindowMerger;
+
+        let mut results = Vec::new();
+
+        // Check if we have enough buckets for at least one window
+        if buckets.len() < lookback_bucket_count {
+            return results;
+        }
+
+        // Calculate missing data offset
+        let missing_buckets = expected_bucket_count.saturating_sub(buckets.len());
+        let tumbling_window_ms = step_ms / (buckets_per_step as u64);
+
+        // First valid sample is offset by missing buckets (data missing at the start)
+        let first_valid_sample_ms = start_ms + (missing_buckets as u64) * tumbling_window_ms;
+
+        // Round up to step boundary if needed
+        let first_sample_ms = if first_valid_sample_ms <= start_ms {
+            start_ms
+        } else {
+            let offset = first_valid_sample_ms - start_ms;
+            if offset.is_multiple_of(step_ms) {
+                first_valid_sample_ms
+            } else {
+                start_ms + ((offset / step_ms) + 1) * step_ms
+            }
+        };
+
+        // When we have missing buckets at the start, we need to figure out where to
+        // start reading from the available buckets. The missing buckets are conceptually
+        // at the beginning, so we start reading from the first available bucket.
+        //
+        // However, if we rounded up to a step boundary, we may need to skip some
+        // additional buckets from what we have.
+        let extra_offset_ms = first_sample_ms.saturating_sub(first_valid_sample_ms);
+        let extra_buckets_to_skip = (extra_offset_ms / tumbling_window_ms) as usize;
+
+        // Check if we have enough data for at least one window after any extra skip
+        if extra_buckets_to_skip + lookback_bucket_count > buckets.len() {
+            return results;
+        }
+
+        let mut merger = NaiveMerger::new();
+
+        // Initialize with window at adjusted position
+        let initial_window: Vec<_> = buckets
+            [extra_buckets_to_skip..extra_buckets_to_skip + lookback_bucket_count]
+            .iter()
+            .map(|b| b.clone_boxed_core())
+            .collect();
+        merger.initialize(initial_window);
+
+        let mut bucket_index = extra_buckets_to_skip + lookback_bucket_count;
+        let mut current_time = first_sample_ms;
+
+        while current_time <= end_ms {
+            // Query current window
+            if let Ok(merged) = merger.get_merged() {
+                if let Some(mock) = merged.as_any().downcast_ref::<MockBucketAccumulator>() {
+                    results.push((current_time, mock.value, mock.bucket_id));
+                }
+            }
+
+            // Slide window for next step
+            current_time += step_ms;
+
+            if current_time <= end_ms {
+                if bucket_index + buckets_per_step <= buckets.len() {
+                    let new_buckets: Vec<_> = buckets
+                        [bucket_index..bucket_index + buckets_per_step]
+                        .iter()
+                        .map(|b| b.clone_boxed_core())
+                        .collect();
+                    merger.slide(buckets_per_step, new_buckets);
+                    bucket_index += buckets_per_step;
+                } else {
+                    break;
+                }
+            }
+        }
+
+        results
+    }
+
+    #[test]
+    fn test_sliding_window_sufficient_buckets() {
+        // Setup: 7 buckets, lookback=5, step=1
+        // Should produce 3 valid samples
+        let buckets: Vec<Box<dyn AggregateCore>> = (0..7)
+            .map(|i| Box::new(MockBucketAccumulator::new(i, 10.0)) as Box<dyn AggregateCore>)
+            .collect();
+
+        let results = simulate_sliding_window(
+            buckets, 5,    // lookback_bucket_count
+            1,    // buckets_per_step
+            1000, // start_ms
+            3000, // end_ms (3 steps: 1000, 2000, 3000)
+            1000, // step_ms
+        );
+
+        assert_eq!(results.len(), 3, "Should produce 3 samples");
+
+        // Window 1: buckets [0,1,2,3,4], max_id=4, value=50
+        assert_eq!(results[0], (1000, 50.0, 4));
+        // Window 2: buckets [1,2,3,4,5], max_id=5, value=50
+        assert_eq!(results[1], (2000, 50.0, 5));
+        // Window 3: buckets [2,3,4,5,6], max_id=6, value=50
+        assert_eq!(results[2], (3000, 50.0, 6));
+    }
+
+    #[test]
+    fn test_sliding_window_insufficient_buckets_stops_early() {
+        // 6 buckets, lookback=5, step=1
+        // Requesting 3 timestamps but only have data for 2
+        // Should stop early rather than produce stale samples
+        let buckets: Vec<Box<dyn AggregateCore>> = (0..6)
+            .map(|i| Box::new(MockBucketAccumulator::new(i, 10.0)) as Box<dyn AggregateCore>)
+            .collect();
+
+        let results = simulate_sliding_window(
+            buckets, 5,    // lookback_bucket_count
+            1,    // buckets_per_step
+            1000, // start_ms
+            3000, // end_ms (requests 3 steps: 1000, 2000, 3000)
+            1000, // step_ms
+        );
+
+        println!("Results: {:?}", results);
+
+        // Should only produce 2 valid samples (not 3 with stale data)
+        assert_eq!(
+            results.len(),
+            2,
+            "Should only produce 2 samples when data is insufficient for 3rd"
+        );
+
+        // Window 1: buckets [0,1,2,3,4], max_id=4
+        assert_eq!(results[0], (1000, 50.0, 4));
+        // Window 2: buckets [1,2,3,4,5], max_id=5
+        assert_eq!(results[1], (2000, 50.0, 5));
+        // No window 3 - not enough buckets to slide
+    }
+
+    #[test]
+    fn test_sliding_window_exactly_enough_buckets() {
+        // 5 buckets, lookback=5, step=1
+        // Should produce exactly 1 sample (initial window only, can't slide)
+        let buckets: Vec<Box<dyn AggregateCore>> = (0..5)
+            .map(|i| Box::new(MockBucketAccumulator::new(i, 10.0)) as Box<dyn AggregateCore>)
+            .collect();
+
+        let results = simulate_sliding_window(
+            buckets, 5,    // lookback_bucket_count
+            1,    // buckets_per_step
+            1000, // start_ms
+            3000, // end_ms
+            1000, // step_ms
+        );
+
+        println!("Results with exactly enough buckets: {:?}", results);
+
+        // Should produce only 1 sample - can't slide without more buckets
+        assert_eq!(results.len(), 1, "Should produce exactly 1 sample");
+        assert_eq!(results[0], (1000, 50.0, 4));
+    }
+
+    #[test]
+    fn test_sliding_window_multi_bucket_step() {
+        // 10 buckets, lookback=4, step=2 buckets at a time
+        // Should produce samples at positions requiring new data
+        let buckets: Vec<Box<dyn AggregateCore>> = (0..10)
+            .map(|i| Box::new(MockBucketAccumulator::new(i, 10.0)) as Box<dyn AggregateCore>)
+            .collect();
+
+        let results = simulate_sliding_window(
+            buckets, 4,    // lookback_bucket_count
+            2,    // buckets_per_step (slide 2 at a time)
+            1000, // start_ms
+            4000, // end_ms (4 steps)
+            1000, // step_ms
+        );
+
+        // Initial: [0,1,2,3], max_id=3
+        // After slide 1: [2,3,4,5], max_id=5
+        // After slide 2: [4,5,6,7], max_id=7
+        // After slide 3: [6,7,8,9], max_id=9
+        assert_eq!(results.len(), 4, "Should produce 4 samples");
+        assert_eq!(results[0].2, 3, "Window 1 max_id should be 3");
+        assert_eq!(results[1].2, 5, "Window 2 max_id should be 5");
+        assert_eq!(results[2].2, 7, "Window 3 max_id should be 7");
+        assert_eq!(results[3].2, 9, "Window 4 max_id should be 9");
+    }
+
+    #[test]
+    fn test_sliding_window_missing_data_at_start_aligns_timestamps() {
+        // Scenario: Query requests timestamps 1000, 2000, 3000
+        // But only 5 buckets exist (enough for 1 sample), not 7 (for 3 samples)
+        // lookback=5, step=1 bucket
+        // Expected buckets for [1000, 3000]: 7 (5 for first window + 2 steps)
+        // Actual buckets: 5 (missing 2 at start)
+        // Missing 2 buckets = 2000ms offset
+        // First valid sample at: 1000 + 2000 = 3000ms
+
+        let buckets: Vec<Box<dyn AggregateCore>> = (0..5)
+            .map(|i| Box::new(MockBucketAccumulator::new(i, 10.0)) as Box<dyn AggregateCore>)
+            .collect();
+
+        let results = simulate_sliding_window_with_alignment(
+            buckets, 5,    // lookback_bucket_count
+            1,    // buckets_per_step
+            1000, // start_ms
+            3000, // end_ms
+            1000, // step_ms
+            7,    // expected_bucket_count for full range
+        );
+
+        // Should have 1 sample at timestamp 3000, NOT at 1000
+        assert_eq!(results.len(), 1, "Should produce 1 sample");
+        assert_eq!(results[0].0, 3000, "Sample should be at t=3000, not t=1000");
+    }
+
+    #[test]
+    fn test_sliding_window_missing_data_rounds_to_step_boundary() {
+        // Query: start=0, end=6000, step=2000 (timestamps: 0, 2000, 4000, 6000)
+        // Lookback: 4 buckets, step: 2 buckets
+        // Expected buckets: 4 + 6 = 10 buckets for full range
+        // Actual: 7 buckets (missing 3 at start)
+        // Missing 3 buckets = 3000ms offset
+        // First valid sample time = 0 + 3000 = 3000ms
+        // But 3000 is not on step boundary, so round UP to 4000ms
+
+        let buckets: Vec<Box<dyn AggregateCore>> = (0..7)
+            .map(|i| Box::new(MockBucketAccumulator::new(i, 10.0)) as Box<dyn AggregateCore>)
+            .collect();
+
+        let results = simulate_sliding_window_with_alignment(
+            buckets, 4,    // lookback_bucket_count
+            2,    // buckets_per_step (2000ms step / 1000ms tumbling = 2)
+            0,    // start_ms
+            6000, // end_ms
+            2000, // step_ms
+            10,   // expected_bucket_count
+        );
+
+        // First sample at 4000 (rounded up from 3000), second at 6000
+        assert_eq!(results.len(), 2, "Should produce 2 samples");
+        assert_eq!(results[0].0, 4000, "First sample at step boundary 4000");
+        assert_eq!(results[1].0, 6000, "Second sample at 6000");
+    }
+
+    #[test]
+    fn test_sliding_window_full_data_starts_at_query_start() {
+        // All data present - should behave same as before (start at start_ms)
+        // lookback=5, step=1, query [1000, 3000] = 3 samples
+        // Expected buckets: 7, Actual: 7 (no missing data)
+
+        let buckets: Vec<Box<dyn AggregateCore>> = (0..7)
+            .map(|i| Box::new(MockBucketAccumulator::new(i, 10.0)) as Box<dyn AggregateCore>)
+            .collect();
+
+        let results = simulate_sliding_window_with_alignment(
+            buckets, 5,    // lookback_bucket_count
+            1,    // buckets_per_step
+            1000, // start_ms
+            3000, // end_ms
+            1000, // step_ms
+            7,    // expected_bucket_count (matches actual - no missing data)
+        );
+
+        assert_eq!(results.len(), 3, "Should produce 3 samples");
+        assert_eq!(results[0].0, 1000, "First sample at query start");
+        assert_eq!(results[1].0, 2000);
+        assert_eq!(results[2].0, 3000);
+    }
+
+    #[test]
+    fn test_sliding_window_insufficient_data_for_any_window_returns_empty() {
+        // lookback=5 but only 3 buckets - can't form even one window
+        let buckets: Vec<Box<dyn AggregateCore>> = (0..3)
+            .map(|i| Box::new(MockBucketAccumulator::new(i, 10.0)) as Box<dyn AggregateCore>)
+            .collect();
+
+        let results = simulate_sliding_window_with_alignment(
+            buckets, 5, // lookback_bucket_count (need 5, have 3)
+            1, 1000, 5000, 1000, 9,
+        );
+
+        assert_eq!(
+            results.len(),
+            0,
+            "No samples when insufficient data for any window"
+        );
+    }
+
+    // ============================================================================
+    // Tests for timestamp-based lookup implementation (handles gaps in data)
+    // ============================================================================
+
+    /// Simulates the timestamp-based lookup approach from execute_range_query_pipeline.
+    /// This is the new implementation that handles gaps in data correctly.
+    ///
+    /// # Arguments
+    /// * `timestamped_buckets` - Vec of (bucket_start_timestamp, bucket)
+    /// * `lookback_bucket_count` - Number of buckets in each window
+    /// * `tumbling_window_ms` - Duration of each tumbling window bucket
+    /// * `start_ms` - Query start time
+    /// * `end_ms` - Query end time
+    /// * `step_ms` - Step between output samples
+    ///
+    /// # Returns
+    /// Vec of (timestamp, merged_value, max_bucket_id_in_window)
+    fn simulate_timestamp_based_lookup(
+        timestamped_buckets: Vec<(u64, Box<dyn AggregateCore>)>,
+        lookback_bucket_count: usize,
+        tumbling_window_ms: u64,
+        start_ms: u64,
+        end_ms: u64,
+        step_ms: u64,
+    ) -> Vec<(u64, f64, u64)> {
+        use crate::engines::window_merger::WindowMerger;
+        use std::collections::HashMap;
+
+        let mut results = Vec::new();
+
+        // Build lookup: bucket_start_timestamp -> bucket for O(1) access
+        let bucket_map: HashMap<u64, &Box<dyn AggregateCore>> = timestamped_buckets
+            .iter()
+            .map(|(start, bucket)| (*start, bucket))
+            .collect();
+
+        let lookback_ms = (lookback_bucket_count as u64) * tumbling_window_ms;
+
+        // Iterate by OUTPUT timestamp, not by bucket index
+        let mut current_time = start_ms;
+        while current_time <= end_ms {
+            // Window covers [current_time - lookback_ms, current_time)
+            let window_start = current_time.saturating_sub(lookback_ms);
+
+            // Collect all AVAILABLE buckets in this window (skip missing ones)
+            let mut window_buckets: Vec<Box<dyn AggregateCore>> = Vec::new();
+
+            let mut t = window_start;
+            while t < current_time {
+                if let Some(bucket) = bucket_map.get(&t) {
+                    window_buckets.push((*bucket).clone_boxed_core());
+                }
+                t += tumbling_window_ms;
+            }
+
+            if !window_buckets.is_empty() {
+                // Merge available buckets
+                let mut merger = NaiveMerger::new();
+                merger.initialize(window_buckets);
+
+                if let Ok(merged) = merger.get_merged() {
+                    if let Some(mock) = merged.as_any().downcast_ref::<MockBucketAccumulator>() {
+                        results.push((current_time, mock.value, mock.bucket_id));
+                    }
+                }
+            }
+            // If no buckets available, skip this sample (no entry in results)
+
+            current_time += step_ms;
+        }
+
+        results
+    }
+
+    #[test]
+    fn test_timestamp_lookup_missing_data_at_start() {
+        // Scenario: Query range [1000, 5000] with step=1000, lookback=3 buckets
+        // Tumbling window = 1000ms
+        // Expected buckets for full window coverage starting at t=1000:
+        //   - t=1000 needs buckets at -2000, -1000, 0 (before query range)
+        // But data only exists at t=3000, 4000, 5000
+        //
+        // Sample at t=1000: window [1000-3000, 1000) = [-2000, 1000) -> no buckets -> skip
+        // Sample at t=2000: window [2000-3000, 2000) = [-1000, 2000) -> no buckets -> skip
+        // Sample at t=3000: window [3000-3000, 3000) = [0, 3000) -> no buckets -> skip
+        // Sample at t=4000: window [4000-3000, 4000) = [1000, 4000) -> bucket at 3000 -> emit
+        // Sample at t=5000: window [5000-3000, 5000) = [2000, 5000) -> buckets at 3000, 4000 -> emit
+
+        let timestamped_buckets: Vec<(u64, Box<dyn AggregateCore>)> = vec![
+            (3000, Box::new(MockBucketAccumulator::new(3, 10.0))),
+            (4000, Box::new(MockBucketAccumulator::new(4, 10.0))),
+            (5000, Box::new(MockBucketAccumulator::new(5, 10.0))),
+        ];
+
+        let results = simulate_timestamp_based_lookup(
+            timestamped_buckets,
+            3,    // lookback_bucket_count
+            1000, // tumbling_window_ms
+            1000, // start_ms
+            5000, // end_ms
+            1000, // step_ms
+        );
+
+        // Should skip samples at 1000, 2000, 3000 (no data in window)
+        // Should emit samples at 4000 (partial data) and 5000 (partial data)
+        assert_eq!(
+            results.len(),
+            2,
+            "Should produce 2 samples (skipping early ones with no data)"
+        );
+        assert_eq!(results[0].0, 4000, "First sample at t=4000");
+        assert_eq!(results[0].1, 10.0, "Value at t=4000 (1 bucket)");
+        assert_eq!(results[1].0, 5000, "Second sample at t=5000");
+        assert_eq!(results[1].1, 20.0, "Value at t=5000 (2 buckets merged)");
+    }
+
+    #[test]
+    fn test_timestamp_lookup_missing_data_in_middle() {
+        // Scenario: Buckets at t=1000, 2000, 4000, 5000 (missing t=3000)
+        // Query range [4000, 6000], step=1000, lookback=3 buckets
+        // Tumbling window = 1000ms
+        //
+        // Sample at t=4000: window [1000, 4000) -> buckets at 1000, 2000 (missing 3000) -> 2 buckets
+        // Sample at t=5000: window [2000, 5000) -> buckets at 2000, 4000 (missing 3000) -> 2 buckets
+        // Sample at t=6000: window [3000, 6000) -> buckets at 4000, 5000 (missing 3000) -> 2 buckets
+
+        let timestamped_buckets: Vec<(u64, Box<dyn AggregateCore>)> = vec![
+            (1000, Box::new(MockBucketAccumulator::new(1, 10.0))),
+            (2000, Box::new(MockBucketAccumulator::new(2, 10.0))),
+            // Missing bucket at 3000
+            (4000, Box::new(MockBucketAccumulator::new(4, 10.0))),
+            (5000, Box::new(MockBucketAccumulator::new(5, 10.0))),
+        ];
+
+        let results = simulate_timestamp_based_lookup(
+            timestamped_buckets,
+            3,    // lookback_bucket_count
+            1000, // tumbling_window_ms
+            4000, // start_ms
+            6000, // end_ms
+            1000, // step_ms
+        );
+
+        // All samples should be emitted with partial data (missing bucket is skipped)
+        assert_eq!(
+            results.len(),
+            3,
+            "Should produce 3 samples with partial data"
+        );
+
+        // t=4000: window [1000, 4000) contains buckets 1000, 2000 -> value=20, max_id=2
+        assert_eq!(results[0].0, 4000);
+        assert_eq!(results[0].1, 20.0, "2 buckets merged");
+        assert_eq!(results[0].2, 2, "max bucket_id = 2");
+
+        // t=5000: window [2000, 5000) contains buckets 2000, 4000 -> value=20, max_id=4
+        assert_eq!(results[1].0, 5000);
+        assert_eq!(results[1].1, 20.0, "2 buckets merged");
+        assert_eq!(results[1].2, 4, "max bucket_id = 4");
+
+        // t=6000: window [3000, 6000) contains buckets 4000, 5000 -> value=20, max_id=5
+        assert_eq!(results[2].0, 6000);
+        assert_eq!(results[2].1, 20.0, "2 buckets merged");
+        assert_eq!(results[2].2, 5, "max bucket_id = 5");
+    }
+
+    #[test]
+    fn test_timestamp_lookup_all_data_missing_for_window() {
+        // Scenario: Query window where no buckets exist at all
+        // Buckets at t=10000, 11000, 12000
+        // Query range [1000, 3000], step=1000, lookback=3 buckets
+        // All windows have no data -> should skip all samples
+
+        let timestamped_buckets: Vec<(u64, Box<dyn AggregateCore>)> = vec![
+            (10000, Box::new(MockBucketAccumulator::new(10, 10.0))),
+            (11000, Box::new(MockBucketAccumulator::new(11, 10.0))),
+            (12000, Box::new(MockBucketAccumulator::new(12, 10.0))),
+        ];
+
+        let results = simulate_timestamp_based_lookup(
+            timestamped_buckets,
+            3,    // lookback_bucket_count
+            1000, // tumbling_window_ms
+            1000, // start_ms
+            3000, // end_ms
+            1000, // step_ms
+        );
+
+        assert_eq!(
+            results.len(),
+            0,
+            "Should produce 0 samples when all windows have no data"
+        );
+    }
+
+    #[test]
+    fn test_timestamp_lookup_full_data_matches_expected() {
+        // Scenario: Full data available, should behave like contiguous case
+        // Buckets at t=0, 1000, 2000, 3000, 4000
+        // Query range [3000, 5000], step=1000, lookback=3 buckets
+        //
+        // Sample at t=3000: window [0, 3000) -> buckets 0, 1000, 2000 -> value=30
+        // Sample at t=4000: window [1000, 4000) -> buckets 1000, 2000, 3000 -> value=30
+        // Sample at t=5000: window [2000, 5000) -> buckets 2000, 3000, 4000 -> value=30
+
+        let timestamped_buckets: Vec<(u64, Box<dyn AggregateCore>)> = vec![
+            (0, Box::new(MockBucketAccumulator::new(0, 10.0))),
+            (1000, Box::new(MockBucketAccumulator::new(1, 10.0))),
+            (2000, Box::new(MockBucketAccumulator::new(2, 10.0))),
+            (3000, Box::new(MockBucketAccumulator::new(3, 10.0))),
+            (4000, Box::new(MockBucketAccumulator::new(4, 10.0))),
+        ];
+
+        let results = simulate_timestamp_based_lookup(
+            timestamped_buckets,
+            3,    // lookback_bucket_count
+            1000, // tumbling_window_ms
+            3000, // start_ms
+            5000, // end_ms
+            1000, // step_ms
+        );
+
+        assert_eq!(results.len(), 3, "Should produce 3 samples");
+
+        assert_eq!(results[0], (3000, 30.0, 2), "t=3000: buckets 0,1,2");
+        assert_eq!(results[1], (4000, 30.0, 3), "t=4000: buckets 1,2,3");
+        assert_eq!(results[2], (5000, 30.0, 4), "t=5000: buckets 2,3,4");
+    }
+
+    #[test]
+    fn test_timestamp_lookup_sparse_data() {
+        // Scenario: Very sparse data - only every 3rd bucket exists
+        // Buckets at t=0, 3000, 6000, 9000
+        // Query range [3000, 9000], step=3000, lookback=3 buckets (3000ms)
+        //
+        // Sample at t=3000: window [0, 3000) -> bucket 0 -> value=10
+        // Sample at t=6000: window [3000, 6000) -> bucket 3000 -> value=10
+        // Sample at t=9000: window [6000, 9000) -> bucket 6000 -> value=10
+
+        let timestamped_buckets: Vec<(u64, Box<dyn AggregateCore>)> = vec![
+            (0, Box::new(MockBucketAccumulator::new(0, 10.0))),
+            (3000, Box::new(MockBucketAccumulator::new(3, 10.0))),
+            (6000, Box::new(MockBucketAccumulator::new(6, 10.0))),
+            (9000, Box::new(MockBucketAccumulator::new(9, 10.0))),
+        ];
+
+        let results = simulate_timestamp_based_lookup(
+            timestamped_buckets,
+            3,    // lookback_bucket_count
+            1000, // tumbling_window_ms
+            3000, // start_ms
+            9000, // end_ms
+            3000, // step_ms
+        );
+
+        assert_eq!(
+            results.len(),
+            3,
+            "Should produce 3 samples with sparse data"
+        );
+
+        // Each window only has 1 bucket because data is sparse
+        assert_eq!(
+            results[0],
+            (3000, 10.0, 0),
+            "t=3000: only bucket 0 in window"
+        );
+        assert_eq!(
+            results[1],
+            (6000, 10.0, 3),
+            "t=6000: only bucket 3 in window"
+        );
+        assert_eq!(
+            results[2],
+            (9000, 10.0, 6),
+            "t=9000: only bucket 6 in window"
+        );
+    }
+
+    #[test]
+    fn test_timestamp_lookup_missing_data_at_end() {
+        // Scenario: Data missing at end of query range
+        // Buckets at t=0, 1000, 2000
+        // Query range [3000, 6000], step=1000, lookback=3 buckets
+        //
+        // Sample at t=3000: window [0, 3000) -> buckets 0, 1000, 2000 -> full data
+        // Sample at t=4000: window [1000, 4000) -> buckets 1000, 2000 -> partial (missing 3000)
+        // Sample at t=5000: window [2000, 5000) -> bucket 2000 -> partial
+        // Sample at t=6000: window [3000, 6000) -> no buckets -> skip
+
+        let timestamped_buckets: Vec<(u64, Box<dyn AggregateCore>)> = vec![
+            (0, Box::new(MockBucketAccumulator::new(0, 10.0))),
+            (1000, Box::new(MockBucketAccumulator::new(1, 10.0))),
+            (2000, Box::new(MockBucketAccumulator::new(2, 10.0))),
+        ];
+
+        let results = simulate_timestamp_based_lookup(
+            timestamped_buckets,
+            3,    // lookback_bucket_count
+            1000, // tumbling_window_ms
+            3000, // start_ms
+            6000, // end_ms
+            1000, // step_ms
+        );
+
+        assert_eq!(
+            results.len(),
+            3,
+            "Should produce 3 samples (last one skipped)"
+        );
+
+        assert_eq!(results[0], (3000, 30.0, 2), "t=3000: full window");
+        assert_eq!(
+            results[1],
+            (4000, 20.0, 2),
+            "t=4000: partial window (2 buckets)"
+        );
+        assert_eq!(
+            results[2],
+            (5000, 10.0, 2),
+            "t=5000: partial window (1 bucket)"
+        );
+        // t=6000 is skipped because no data
+    }
+}
+
+#[cfg(test)]
+mod sketch_query_tests {
+    // use crate::data_model::{CleanupPolicy, InferenceConfig, QueryLanguage, StreamingConfig};
+    // use crate::engines::simple_engine::SimpleEngine;
+    // use crate::stores::promsketch_store::PromSketchStore;
+    // use crate::stores::{Store, TimestampedBucketsMap};
+    // use std::collections::HashMap;
+    // use std::sync::Arc;
+
+    // /// Minimal no-op store — sketch queries bypass the store entirely
+    // struct NoOpStore;
+
+    // impl Store for NoOpStore {
+    //     fn query_precomputed_output(
+    //         &self,
+    //         _: &str,
+    //         _: u64,
+    //         _: u64,
+    //         _: u64,
+    //     ) -> Result<TimestampedBucketsMap, Box<dyn std::error::Error + Send + Sync>> {
+    //         panic!("NoOpStore should not be called for sketch queries");
+    //     }
+    //     fn query_precomputed_output_exact(
+    //         &self,
+    //         _: &str,
+    //         _: u64,
+    //         _: u64,
+    //         _: u64,
+    //     ) -> Result<TimestampedBucketsMap, Box<dyn std::error::Error + Send + Sync>> {
+    //         panic!("NoOpStore should not be called for sketch queries");
+    //     }
+    //     fn insert_precomputed_output(
+    //         &self,
+    //         _: crate::data_model::PrecomputedOutput,
+    //         _: Box<dyn crate::data_model::AggregateCore>,
+    //     ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+    //         panic!("NoOpStore should not be called for sketch queries");
+    //     }
+    //     fn insert_precomputed_output_batch(
+    //         &self,
+    //         _: Vec<(
+    //             crate::data_model::PrecomputedOutput,
+    //             Box<dyn crate::data_model::AggregateCore>,
+    //         )>,
+    //     ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+    //         panic!("NoOpStore should not be called for sketch queries");
+    //     }
+    //     fn get_earliest_timestamp_per_aggregation_id(
+    //         &self,
+    //     ) -> Result<HashMap<u64, u64>, Box<dyn std::error::Error + Send + Sync>> {
+    //         Ok(HashMap::new())
+    //     }
+    //     fn close(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+    //         Ok(())
+    //     }
+    // }
+
+    // /// Helper: create an engine with a populated PromSketchStore.
+    // /// Inserts data points 1..=100 into a series with labels = `series_key`.
+    // fn engine_with_sketch_data(series_key: &str) -> SimpleEngine {
+    //     let ps = Arc::new(PromSketchStore::with_default_config());
+    //     ps.ensure_all_sketches(series_key).unwrap();
+    //     for i in 1..=100u64 {
+    //         ps.sketch_insert(series_key, i, i as f64).unwrap();
+    //     }
+
+    //     let inference_config =
+    //         InferenceConfig::new(QueryLanguage::promql, CleanupPolicy::NoCleanup);
+    //     let streaming_config = Arc::new(StreamingConfig::default());
+
+    //     SimpleEngine::new(
+    //         Arc::new(NoOpStore),
+    //         Some(ps),
+    //         inference_config,
+    //         streaming_config,
+    //         15,
+    //         QueryLanguage::promql,
+    //     )
+    // }
+
+    // // ---- Instant query tests ----
+
+    // #[test]
+    // fn test_sketch_instant_entropy_over_time() {
+    //     let engine = engine_with_sketch_data("mymetric");
+    //     // Query at time 0.1s (= 100ms) with a 100ms range
+    //     let result = engine.handle_query_promql("entropy_over_time(mymetric[100s])".into(), 0.1);
+    //     assert!(result.is_some(), "entropy_over_time should return a result");
+    //     let (labels, qr) = result.unwrap();
+    //     assert!(!labels.labels.is_empty());
+    //     if let crate::engines::query_result::QueryResult::Vector(iv) = qr {
+    //         assert!(!iv.values.is_empty(), "should have at least one result");
+    //         let val = iv.values[0].value;
+    //         assert!(val >= 0.0, "entropy should be non-negative, got {}", val);
+    //     } else {
+    //         panic!("expected Vector result");
+    //     }
+    // }
+
+    // #[test]
+    // fn test_sketch_instant_quantile_over_time() {
+    //     let engine = engine_with_sketch_data("mymetric");
+    //     let result =
+    //         engine.handle_query_promql("quantile_over_time(0.5, mymetric[100s])".into(), 0.1);
+    //     assert!(
+    //         result.is_some(),
+    //         "quantile_over_time should return a result"
+    //     );
+    //     let (_labels, qr) = result.unwrap();
+    //     if let crate::engines::query_result::QueryResult::Vector(iv) = qr {
+    //         assert!(!iv.values.is_empty());
+    //         let val = iv.values[0].value;
+    //         // Median of 1..100 should be roughly 50
+    //         assert!(
+    //             val > 20.0 && val < 80.0,
+    //             "median should be roughly 50, got {}",
+    //             val
+    //         );
+    //     } else {
+    //         panic!("expected Vector result");
+    //     }
+    // }
+
+    // #[test]
+    // fn test_sketch_instant_avg_over_time() {
+    //     let engine = engine_with_sketch_data("cpu");
+    //     let result = engine.handle_query_promql("avg_over_time(cpu[100s])".into(), 0.1);
+    //     assert!(result.is_some(), "avg_over_time should return a result");
+    //     let (_labels, qr) = result.unwrap();
+    //     if let crate::engines::query_result::QueryResult::Vector(iv) = qr {
+    //         assert!(!iv.values.is_empty());
+    //         let val = iv.values[0].value;
+    //         // avg of 1..100 = 50.5
+    //         assert!(val > 30.0 && val < 70.0, "avg should be ~50.5, got {}", val);
+    //     } else {
+    //         panic!("expected Vector result");
+    //     }
+    // }
+
+    // #[test]
+    // fn test_sketch_instant_returns_none_without_store() {
+    //     // Engine with promsketch_store = None
+    //     let inference_config =
+    //         InferenceConfig::new(QueryLanguage::promql, CleanupPolicy::NoCleanup);
+    //     let streaming_config = Arc::new(StreamingConfig::default());
+    //     let engine = SimpleEngine::new(
+    //         Arc::new(NoOpStore),
+    //         None,
+    //         inference_config,
+    //         streaming_config,
+    //         15,
+    //         QueryLanguage::promql,
+    //     );
+    //     // Sketch function should fall through (return None) without panicking
+    //     let result = engine.handle_sketch_query_promql("entropy_over_time(metric[5m])", 100.0);
+    //     assert!(result.is_none());
+    // }
+
+    // #[test]
+    // fn test_sketch_instant_returns_none_for_non_sketch_function() {
+    //     let engine = engine_with_sketch_data("mymetric");
+    //     // "rate" is not sketch-backed, so should return None from sketch path
+    //     let result = engine.handle_sketch_query_promql("rate(mymetric[100s])", 0.1);
+    //     assert!(result.is_none());
+    // }
+
+    // #[test]
+    // fn test_sketch_instant_returns_none_for_missing_series() {
+    //     let engine = engine_with_sketch_data("mymetric");
+    //     // Query a metric that doesn't exist in the sketch store
+    //     let result = engine.handle_sketch_query_promql("entropy_over_time(nonexistent[100s])", 0.1);
+    //     assert!(result.is_none());
+    // }
+
+    // ---- Range query tests ----
+
+    // #[test]
+    // fn test_sketch_range_entropy_over_time() {
+    //     let engine = engine_with_sketch_data("mymetric");
+    //     // Range query: start=0.01, end=0.1 (10ms to 100ms), step=0.01 (10ms)
+    //     // with a 50ms window [50s range]
+    //     let result = engine.handle_range_query_promql(
+    //         "entropy_over_time(mymetric[50s])".into(),
+    //         0.01,
+    //         0.1,
+    //         0.01,
+    //     );
+    //     assert!(
+    //         result.is_some(),
+    //         "sketch range query should return a result"
+    //     );
+    //     let (_labels, qr) = result.unwrap();
+    //     if let crate::engines::query_result::QueryResult::Matrix(rv) = qr {
+    //         assert!(!rv.values.is_empty(), "should have at least one series");
+    //         let samples = &rv.values[0].samples;
+    //         assert!(
+    //             samples.len() > 1,
+    //             "range query should produce multiple samples, got {}",
+    //             samples.len()
+    //         );
+    //         for sample in samples {
+    //             assert!(
+    //                 sample.value >= 0.0,
+    //                 "entropy should be non-negative, got {}",
+    //                 sample.value
+    //             );
+    //         }
+    //     } else {
+    //         panic!("expected Matrix result");
+    //     }
+    // }
+
+    // #[test]
+    // fn test_sketch_range_returns_none_without_store() {
+    //     let inference_config =
+    //         InferenceConfig::new(QueryLanguage::promql, CleanupPolicy::NoCleanup);
+    //     let streaming_config = Arc::new(StreamingConfig::default());
+    //     let engine = SimpleEngine::new(
+    //         Arc::new(NoOpStore),
+    //         None,
+    //         inference_config,
+    //         streaming_config,
+    //         15,
+    //         QueryLanguage::promql,
+    //     );
+    //     let result = engine.handle_sketch_range_query_promql(
+    //         "entropy_over_time(metric[5m])",
+    //         0.0,
+    //         100.0,
+    //         10.0,
+    //     );
+    //     assert!(result.is_none());
+    // }
+
+    // #[test]
+    // fn test_sketch_range_returns_none_for_non_sketch_function() {
+    //     let engine = engine_with_sketch_data("mymetric");
+    //     let result =
+    //         engine.handle_sketch_range_query_promql("rate(mymetric[100s])", 0.01, 0.1, 0.01);
+    //     assert!(result.is_none());
+    // }
+}
diff --git a/QueryEngineRust/src/engines/window_merger.rs b/QueryEngineRust/src/engines/window_merger.rs
new file mode 100644
index 0000000..976cec8
--- /dev/null
+++ b/QueryEngineRust/src/engines/window_merger.rs
@@ -0,0 +1,395 @@
+//! Window merging strategies for range queries
+//!
+//! This module provides the `WindowMerger` trait and implementations for
+//! merging buckets in a sliding window. The abstraction allows swapping
+//! merge strategies:
+//!
+//! - `NaiveMerger`: Re-merge all buckets each step (current implementation)
+//! - `IncrementalMerger`: Add/subtract for subtractable accumulators (future)
+//! - `SwagMerger`: Two-stack queue for non-subtractable accumulators (future)
+
+use crate::data_model::AggregateCore;
+
+/// Trait for merging buckets in a sliding window
+///
+/// This abstraction allows swapping merge strategies:
+/// - NaiveMerger: Re-merge all buckets each step (current implementation)
+/// - IncrementalMerger: Add/subtract for subtractable accumulators (future)
+/// - SwagMerger: Two-stack queue for non-subtractable accumulators (future)
+pub trait WindowMerger: Send + Sync {
+    /// Initialize with the first window's buckets
+    fn initialize(&mut self, buckets: Vec<Box<dyn AggregateCore>>);
+
+    /// Slide the window: remove `remove_count` old buckets, add new buckets
+    fn slide(&mut self, remove_count: usize, new_buckets: Vec<Box<dyn AggregateCore>>);
+
+    /// Get the current merged result
+    fn get_merged(&self) -> Result<Box<dyn AggregateCore>, String>;
+
+    /// Check if the window has been initialized
+    fn is_initialized(&self) -> bool;
+}
+
+/// Naive implementation that re-merges all buckets each time
+///
+/// This is the simplest implementation with O(n) merge per step.
+/// It's suitable for small windows or when optimization isn't critical.
+pub struct NaiveMerger {
+    buckets: Vec<Box<dyn AggregateCore>>,
+}
+
+impl NaiveMerger {
+    pub fn new() -> Self {
+        Self {
+            buckets: Vec::new(),
+        }
+    }
+
+    fn merge_all(&self) -> Result<Box<dyn AggregateCore>, String> {
+        if self.buckets.is_empty() {
+            return Err("No buckets to merge".to_string());
+        }
+
+        let mut result = self.buckets[0].clone_boxed_core();
+        for bucket in &self.buckets[1..] {
+            result = result
+                .merge_with(bucket.as_ref())
+                .map_err(|e| format!("Merge failed: {}", e))?;
+        }
+        Ok(result)
+    }
+}
+
+impl Default for NaiveMerger {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl WindowMerger for NaiveMerger {
+    fn initialize(&mut self, buckets: Vec<Box<dyn AggregateCore>>) {
+        self.buckets = buckets;
+    }
+
+    fn slide(&mut self, remove_count: usize, new_buckets: Vec<Box<dyn AggregateCore>>) {
+        // Remove old buckets from front
+        self.buckets.drain(0..remove_count.min(self.buckets.len()));
+        // Add new buckets to back
+        self.buckets.extend(new_buckets);
+    }
+
+    fn get_merged(&self) -> Result<Box<dyn AggregateCore>, String> {
+        self.merge_all()
+    }
+
+    fn is_initialized(&self) -> bool {
+        !self.buckets.is_empty()
+    }
+}
+
+/// Factory function to create appropriate merger based on accumulator type
+///
+/// For now, always returns NaiveMerger. Future implementations could return
+/// optimized mergers based on the accumulator type:
+/// - IncrementalMerger for subtractable accumulators (Sum, CountMinSketch)
+/// - SwagMerger for non-subtractable accumulators (KLL, MinMax)
+#[allow(dead_code)]
+pub fn create_window_merger(_accumulator_type: &str) -> Box<dyn WindowMerger> {
+    // Future implementation:
+    // match accumulator_type {
+    //     "SumAccumulator" | "CountMinSketchAccumulator" => Box::new(IncrementalMerger::new()),
+    //     "DatasketchesKLLAccumulator" | "MinMaxAccumulator" => Box::new(SwagMerger::new()),
+    //     _ => Box::new(NaiveMerger::new()),
+    // }
+    Box::new(NaiveMerger::new())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::data_model::{KeyByLabelValues, SerializableToSink};
+    use serde_json::Value;
+    use std::any::Any;
+
+    /// Mock accumulator for testing - simply sums values
+    #[derive(Clone, Debug)]
+    struct MockSumAccumulator {
+        value: f64,
+    }
+
+    impl MockSumAccumulator {
+        fn new(value: f64) -> Self {
+            Self { value }
+        }
+    }
+
+    impl SerializableToSink for MockSumAccumulator {
+        fn serialize_to_json(&self) -> Value {
+            serde_json::json!({"value": self.value})
+        }
+
+        fn serialize_to_bytes(&self) -> Vec<u8> {
+            self.value.to_le_bytes().to_vec()
+        }
+    }
+
+    impl AggregateCore for MockSumAccumulator {
+        fn clone_boxed_core(&self) -> Box<dyn AggregateCore> {
+            Box::new(self.clone())
+        }
+
+        fn type_name(&self) -> &'static str {
+            "MockSumAccumulator"
+        }
+
+        fn as_any(&self) -> &dyn Any {
+            self
+        }
+
+        fn merge_with(
+            &self,
+            other: &dyn AggregateCore,
+        ) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error + Send + Sync>> {
+            if let Some(other_mock) = other.as_any().downcast_ref::<MockSumAccumulator>() {
+                Ok(Box::new(MockSumAccumulator::new(
+                    self.value + other_mock.value,
+                )))
+            } else {
+                Err("Cannot merge with different accumulator type".into())
+            }
+        }
+
+        fn get_accumulator_type(&self) -> &'static str {
+            "MockSumAccumulator"
+        }
+
+        fn get_keys(&self) -> Option<Vec<KeyByLabelValues>> {
+            None
+        }
+    }
+
+    // Basic structure tests
+
+    #[test]
+    fn test_naive_merger_creation() {
+        let merger = NaiveMerger::new();
+        assert!(!merger.is_initialized());
+    }
+
+    #[test]
+    fn test_naive_merger_empty_merge_error() {
+        let merger = NaiveMerger::new();
+        let result = merger.get_merged();
+        assert!(result.is_err());
+        assert_eq!(result.err().unwrap(), "No buckets to merge");
+    }
+
+    #[test]
+    fn test_create_window_merger() {
+        let merger = create_window_merger("SumAccumulator");
+        assert!(!merger.is_initialized());
+    }
+
+    // Tests with MockSumAccumulator
+
+    #[test]
+    fn test_naive_merger_initialize() {
+        let mut merger = NaiveMerger::new();
+        assert!(!merger.is_initialized());
+
+        let buckets: Vec<Box<dyn AggregateCore>> = vec![
+            Box::new(MockSumAccumulator::new(10.0)),
+            Box::new(MockSumAccumulator::new(20.0)),
+        ];
+        merger.initialize(buckets);
+
+        assert!(merger.is_initialized());
+    }
+
+    #[test]
+    fn test_naive_merger_get_merged_single_bucket() {
+        let mut merger = NaiveMerger::new();
+        let buckets: Vec<Box<dyn AggregateCore>> = vec![Box::new(MockSumAccumulator::new(42.0))];
+        merger.initialize(buckets);
+
+        let result = merger.get_merged();
+        assert!(result.is_ok());
+
+        let merged = result.unwrap();
+        let mock = merged
+            .as_any()
+            .downcast_ref::<MockSumAccumulator>()
+            .unwrap();
+        assert_eq!(mock.value, 42.0);
+    }
+
+    #[test]
+    fn test_naive_merger_get_merged_multiple_buckets() {
+        let mut merger = NaiveMerger::new();
+        let buckets: Vec<Box<dyn AggregateCore>> = vec![
+            Box::new(MockSumAccumulator::new(10.0)),
+            Box::new(MockSumAccumulator::new(20.0)),
+            Box::new(MockSumAccumulator::new(30.0)),
+        ];
+        merger.initialize(buckets);
+
+        let result = merger.get_merged();
+        assert!(result.is_ok());
+
+        let merged = result.unwrap();
+        let mock = merged
+            .as_any()
+            .downcast_ref::<MockSumAccumulator>()
+            .unwrap();
+        assert_eq!(mock.value, 60.0); // 10 + 20 + 30
+    }
+
+    #[test]
+    fn test_naive_merger_slide_removes_old_buckets() {
+        let mut merger = NaiveMerger::new();
+        // Initialize with [10, 20, 30]
+        let buckets: Vec<Box<dyn AggregateCore>> = vec![
+            Box::new(MockSumAccumulator::new(10.0)),
+            Box::new(MockSumAccumulator::new(20.0)),
+            Box::new(MockSumAccumulator::new(30.0)),
+        ];
+        merger.initialize(buckets);
+
+        // Slide: remove 1 old, add [40]
+        // Result should be [20, 30, 40]
+        let new_buckets: Vec<Box<dyn AggregateCore>> =
+            vec![Box::new(MockSumAccumulator::new(40.0))];
+        merger.slide(1, new_buckets);
+
+        let result = merger.get_merged();
+        assert!(result.is_ok());
+
+        let merged = result.unwrap();
+        let mock = merged
+            .as_any()
+            .downcast_ref::<MockSumAccumulator>()
+            .unwrap();
+        assert_eq!(mock.value, 90.0); // 20 + 30 + 40
+    }
+
+    #[test]
+    fn test_naive_merger_slide_removes_multiple_buckets() {
+        let mut merger = NaiveMerger::new();
+        // Initialize with [10, 20, 30, 40]
+        let buckets: Vec<Box<dyn AggregateCore>> = vec![
+            Box::new(MockSumAccumulator::new(10.0)),
+            Box::new(MockSumAccumulator::new(20.0)),
+            Box::new(MockSumAccumulator::new(30.0)),
+            Box::new(MockSumAccumulator::new(40.0)),
+        ];
+        merger.initialize(buckets);
+
+        // Slide: remove 2 old, add [50, 60]
+        // Result should be [30, 40, 50, 60]
+        let new_buckets: Vec<Box<dyn AggregateCore>> = vec![
+            Box::new(MockSumAccumulator::new(50.0)),
+            Box::new(MockSumAccumulator::new(60.0)),
+        ];
+        merger.slide(2, new_buckets);
+
+        let result = merger.get_merged();
+        assert!(result.is_ok());
+
+        let merged = result.unwrap();
+        let mock = merged
+            .as_any()
+            .downcast_ref::<MockSumAccumulator>()
+            .unwrap();
+        assert_eq!(mock.value, 180.0); // 30 + 40 + 50 + 60
+    }
+
+    #[test]
+    fn test_naive_merger_slide_with_empty_new_buckets() {
+        let mut merger = NaiveMerger::new();
+        let buckets: Vec<Box<dyn AggregateCore>> = vec![
+            Box::new(MockSumAccumulator::new(10.0)),
+            Box::new(MockSumAccumulator::new(20.0)),
+            Box::new(MockSumAccumulator::new(30.0)),
+        ];
+        merger.initialize(buckets);
+
+        // Slide: remove 1 old, add nothing
+        // Result should be [20, 30]
+        merger.slide(1, vec![]);
+
+        let result = merger.get_merged();
+        assert!(result.is_ok());
+
+        let merged = result.unwrap();
+        let mock = merged
+            .as_any()
+            .downcast_ref::<MockSumAccumulator>()
+            .unwrap();
+        assert_eq!(mock.value, 50.0); // 20 + 30
+    }
+
+    #[test]
+    fn test_naive_merger_slide_remove_more_than_exists() {
+        let mut merger = NaiveMerger::new();
+        let buckets: Vec<Box<dyn AggregateCore>> = vec![
+            Box::new(MockSumAccumulator::new(10.0)),
+            Box::new(MockSumAccumulator::new(20.0)),
+        ];
+        merger.initialize(buckets);
+
+        // Slide: try to remove 5 (more than exists), add [30]
+        // Should only remove what exists, result should be [30]
+        let new_buckets: Vec<Box<dyn AggregateCore>> =
+            vec![Box::new(MockSumAccumulator::new(30.0))];
+        merger.slide(5, new_buckets);
+
+        let result = merger.get_merged();
+        assert!(result.is_ok());
+
+        let merged = result.unwrap();
+        let mock = merged
+            .as_any()
+            .downcast_ref::<MockSumAccumulator>()
+            .unwrap();
+        assert_eq!(mock.value, 30.0);
+    }
+
+    #[test]
+    fn test_naive_merger_simulates_sliding_window() {
+        let mut merger = NaiveMerger::new();
+
+        // Simulate a sliding window of size 3 with step 1
+        // Window 1: [10, 20, 30] = 60
+        let buckets: Vec<Box<dyn AggregateCore>> = vec![
+            Box::new(MockSumAccumulator::new(10.0)),
+            Box::new(MockSumAccumulator::new(20.0)),
+            Box::new(MockSumAccumulator::new(30.0)),
+        ];
+        merger.initialize(buckets);
+
+        let result1 = merger.get_merged().unwrap();
+        let mock1 = result1
+            .as_any()
+            .downcast_ref::<MockSumAccumulator>()
+            .unwrap();
+        assert_eq!(mock1.value, 60.0);
+
+        // Window 2: [20, 30, 40] = 90
+        merger.slide(1, vec![Box::new(MockSumAccumulator::new(40.0))]);
+        let result2 = merger.get_merged().unwrap();
+        let mock2 = result2
+            .as_any()
+            .downcast_ref::<MockSumAccumulator>()
+            .unwrap();
+        assert_eq!(mock2.value, 90.0);
+
+        // Window 3: [30, 40, 50] = 120
+        merger.slide(1, vec![Box::new(MockSumAccumulator::new(50.0))]);
+        let result3 = merger.get_merged().unwrap();
+        let mock3 = result3
+            .as_any()
+            .downcast_ref::<MockSumAccumulator>()
+            .unwrap();
+        assert_eq!(mock3.value, 120.0);
+    }
+}
diff --git a/QueryEngineRust/src/lib.rs b/QueryEngineRust/src/lib.rs
new file mode 100644
index 0000000..47893c1
--- /dev/null
+++ b/QueryEngineRust/src/lib.rs
@@ -0,0 +1,31 @@
+pub mod data_model;
+pub mod drivers;
+pub mod engines;
+pub mod precompute_operators;
+pub mod stores;
+
+#[cfg(test)]
+pub mod tests;
+pub mod utils;
+
+// Re-export commonly used types to avoid glob import conflicts
+pub use data_model::{
+    AccumulatorFactory, AggregateCore, AggregationConfig, InferenceConfig, KeyByLabelValues,
+    Measurement, MergeableAccumulator, MultipleSubpopulationAggregate,
+    MultipleSubpopulationAggregateFactory, PrecomputedOutput, PromQLSchema, QueryConfig,
+    SerializableToSink, SingleSubpopulationAggregate, SingleSubpopulationAggregateFactory,
+};
+
+pub use precompute_operators::{
+    IncreaseAccumulator, MinMaxAccumulator, MultipleSumAccumulator, SumAccumulator,
+};
+
+pub use stores::{SimpleMapStore, Store, StoreResult};
+
+pub use engines::{InstantVector, QueryResult, SimpleEngine};
+
+pub use drivers::{HttpServer, HttpServerConfig, KafkaConsumer, KafkaConsumerConfig};
+
+pub use utils::{normalize_spatial_filter, read_inference_config, read_streaming_config};
+
+pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;
diff --git a/QueryEngineRust/src/main.rs b/QueryEngineRust/src/main.rs
new file mode 100644
index 0000000..0cc5d95
--- /dev/null
+++ b/QueryEngineRust/src/main.rs
@@ -0,0 +1,335 @@
+use clap::Parser;
+use query_engine_rust::data_model::QueryLanguage;
+use std::fs;
+use std::sync::Arc;
+use tokio::signal;
+use tracing::{error, info};
+
+use query_engine_rust::data_model::enums::{InputFormat, LockStrategy, StreamingEngine};
+use query_engine_rust::drivers::AdapterConfig;
+use query_engine_rust::utils::file_io::{read_inference_config, read_streaming_config};
+use query_engine_rust::{
+    HttpServer, HttpServerConfig, KafkaConsumer, KafkaConsumerConfig, Result, SimpleEngine,
+    SimpleMapStore,
+};
+
+#[derive(Parser, Debug)]
+#[command(author, version, about, long_about = None)]
+struct Args {
+    /// Kafka topic to consume from
+    #[arg(long)]
+    kafka_topic: String,
+
+    /// Input format for Kafka messages
+    #[arg(long, value_enum)]
+    input_format: InputFormat,
+
+    /// Configuration file path
+    #[arg(long)]
+    config: String,
+
+    /// File path for streaming_config
+    #[arg(long)]
+    streaming_config: String,
+
+    /// Streaming engine to use
+    #[arg(long, value_enum)]
+    streaming_engine: StreamingEngine,
+
+    /// Prometheus scrape interval in seconds
+    #[arg(long)]
+    prometheus_scrape_interval: u64,
+
+    /// HTTP server port
+    #[arg(long, default_value = "8088")]
+    http_port: u16,
+
+    /// Prometheus server URL
+    #[arg(long, default_value = "http://localhost:9090")]
+    prometheus_server: String,
+
+    /// Forward unsupported queries to Prometheus
+    #[arg(long)]
+    forward_unsupported_queries: bool,
+
+    /// Kafka broker address
+    #[arg(long, default_value = "localhost:9092")]
+    kafka_broker: String,
+
+    /// Database path (currently unused, kept for compatibility)
+    #[arg(long, default_value = "sketchdb.db")]
+    db_path: String,
+
+    /// Delete existing database (currently unused, kept for compatibility)
+    #[arg(long)]
+    delete_existing_db: bool,
+
+    /// Output directory for logs
+    #[arg(long)]
+    output_dir: String,
+
+    /// Log level
+    #[arg(long, default_value = "INFO")]
+    log_level: String,
+
+    /// Enable profiling (currently unused, kept for compatibility)
+    #[arg(long)]
+    do_profiling: bool,
+
+    /// Decompress JSON messages
+    #[arg(long)]
+    decompress_json: bool,
+
+    /// Enable dumping received precomputes to files for debugging
+    #[arg(long)]
+    dump_precomputes: bool,
+
+    /// Differentiate between query languages of input query
+    #[arg(long, value_enum)]
+    query_language: QueryLanguage,
+
+    /// Lock strategy for SimpleMapStore: "global" for single mutex, "per-key" for fine-grained locking
+    #[arg(long, value_enum)]
+    lock_strategy: LockStrategy,
+
+    /// Enable Prometheus remote write ingest endpoint
+    #[arg(long)]
+    enable_prometheus_remote_write: bool,
+
+    /// Port for the Prometheus remote write endpoint
+    #[arg(long, default_value = "9090")]
+    prometheus_remote_write_port: u16,
+
+    /// Automatically initialize all sketch types for newly seen series during ingestion
+    #[arg(long, default_value = "true")]
+    auto_init_sketches: bool,
+
+    /// Path to promsketch configuration YAML file (optional; uses defaults if omitted)
+    #[arg(long)]
+    promsketch_config: Option<String>,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let args = Args::parse();
+
+    // Create output directory
+    fs::create_dir_all(&args.output_dir)?;
+
+    // Initialize logging similar to Python's create_loggers function
+    // Keep the guard alive for the entire lifetime of the application
+    let _log_guard = setup_logging(&args.output_dir, &args.log_level)?;
+
+    info!("Starting Query Engine Rust");
+    info!("Config file: {}", args.config);
+    info!("Output directory: {}", args.output_dir);
+
+    // Read config (equivalent to utils.file_io.read_inference_config)
+    let inference_config = read_inference_config(&args.config, args.query_language)?;
+    info!(
+        "Loaded inference config with {} query configs",
+        inference_config.query_configs.len()
+    );
+    info!("Inference config: {:?}", inference_config);
+
+    let streaming_config = Arc::new(read_streaming_config(
+        &args.streaming_config,
+        &inference_config,
+    )?);
+    info!(
+        "Loaded streaming config with {} entries",
+        streaming_config.get_all_aggregation_configs().len()
+    );
+    info!("Streaming config: {:?}", streaming_config);
+
+    // Setup store (equivalent to Python's SimpleMapStore())
+    // Get cleanup policy from inference config
+    let cleanup_policy = inference_config.cleanup_policy;
+    info!("Using cleanup policy: {:?}", cleanup_policy);
+    let store = Arc::new(SimpleMapStore::new_with_strategy(
+        streaming_config.clone(),
+        cleanup_policy,
+        args.lock_strategy,
+    ));
+
+    // // Setup PromSketchStore (shared between engine and remote write server)
+    // let promsketch_store = if args.enable_prometheus_remote_write {
+    //     let promsketch_config = match &args.promsketch_config {
+    //         Some(path) => {
+    //             let cfg = read_promsketch_config(path)?;
+    //             info!("Loaded promsketch config from {}: {:?}", path, cfg);
+    //             cfg
+    //         }
+    //         None => {
+    //             info!("Using default promsketch config");
+    //             PromSketchConfig::default()
+    //         }
+    //     };
+    //     info!("Prometheus remote write enabled: creating PromSketchStore");
+    //     Some(Arc::new(PromSketchStore::new(promsketch_config)))
+    // } else {
+    //     None
+    // };
+
+    // Setup query engine
+    let engine = Arc::new(SimpleEngine::new(
+        store.clone(),
+        // promsketch_store.clone(),
+        inference_config,
+        streaming_config.clone(),
+        args.prometheus_scrape_interval,
+        args.query_language,
+    ));
+
+    // Setup Kafka consumer (equivalent to Python's kafka_thread)
+    let kafka_config = KafkaConsumerConfig {
+        broker: args.kafka_broker.clone(),
+        topic: args.kafka_topic.clone(),
+        group_id: "query-engine-rust".to_string(),
+        auto_offset_reset: "beginning".to_string(),
+        input_format: args.input_format,
+        decompress_json: args.decompress_json,
+        batch_size: 1000,
+        poll_timeout_ms: 1000,
+        streaming_engine: args.streaming_engine.clone(),
+        dump_precomputes: args.dump_precomputes,
+        dump_output_dir: if args.dump_precomputes {
+            Some(args.output_dir.clone())
+        } else {
+            None
+        },
+    };
+
+    let store_for_kafka = store.clone();
+    let kafka_consumer_result =
+        KafkaConsumer::new(kafka_config, store_for_kafka, streaming_config.clone());
+    let kafka_handle = match kafka_consumer_result {
+        Ok(mut consumer) => {
+            info!("Starting Kafka consumer for topic: {}", args.kafka_topic);
+            Some(tokio::spawn(async move {
+                if let Err(e) = consumer.run().await {
+                    error!("Kafka consumer error: {}", e);
+                }
+            }))
+        }
+        Err(e) => {
+            error!("Failed to create Kafka consumer: {}", e);
+            info!("Continuing without Kafka consumer");
+            None
+        }
+    };
+
+    // Setup Prometheus remote write server
+    // let prometheus_remote_write_handle = if args.enable_prometheus_remote_write {
+    //     let prw_config = PrometheusRemoteWriteConfig {
+    //         port: args.prometheus_remote_write_port,
+    //         auto_init_sketches: args.auto_init_sketches,
+    //     };
+    //     let server = PrometheusRemoteWriteServer::new(prw_config, promsketch_store.clone());
+    //     info!(
+    //         "Starting Prometheus remote write server on port {}",
+    //         args.prometheus_remote_write_port
+    //     );
+    //     Some(tokio::spawn(async move {
+    //         if let Err(e) = server.run().await {
+    //             error!("Prometheus remote write server error: {}", e);
+    //         }
+    //     }))
+    // } else {
+    //     None
+    // };
+
+    //info!("=== TEMPORARY: Using ClickHouse HTTP adapter ===");
+    //info!("ClickHouse endpoint will be available at: /clickhouse/query");
+    //info!("ClickHouse fallback URL: http://localhost:8123/?database=default");
+
+    //let adapter_config = AdapterConfig::clickhouse_sql(
+    //    "http://localhost:8123".to_string(), // ClickHouse server URL
+    //    "default".to_string(),               // Database name
+    //    true,                                // Always forward (fallback for every query)
+    //);
+
+    // Original Prometheus config (commented out temporarily):
+    let adapter_config = AdapterConfig::prometheus_promql(
+        args.prometheus_server.clone(),
+        args.forward_unsupported_queries,
+    );
+
+    let http_config = HttpServerConfig {
+        port: args.http_port,
+        handle_http_requests: true,
+        adapter_config,
+    };
+
+    let server = HttpServer::new(http_config, engine, store);
+    info!("Starting HTTP server on port {}", args.http_port);
+
+    // Wait for shutdown signal
+    tokio::select! {
+        result = server.run() => {
+            if let Err(e) = result {
+                error!("HTTP server error: {}", e);
+            }
+        }
+        _ = signal::ctrl_c() => {
+            info!("Shutdown signal received");
+        }
+    }
+
+    // Cleanup - gracefully shutdown background tasks
+    if let Some(handle) = kafka_handle {
+        info!("Shutting down Kafka consumer...");
+        handle.abort();
+        let _ = handle.await;
+    }
+
+    // if let Some(handle) = prometheus_remote_write_handle {
+    //     info!("Shutting down Prometheus remote write server...");
+    //     handle.abort();
+    //     let _ = handle.await;
+    // }
+
+    info!("Shutdown complete");
+    Ok(())
+}
+
+fn setup_logging(
+    output_dir: &str,
+    log_level: &str,
+) -> Result<tracing_appender::non_blocking::WorkerGuard> {
+    use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter};
+
+    // Create env filter that respects RUST_LOG, with fallback to command line arg
+    let env_filter = EnvFilter::try_from_default_env()
+        .or_else(|_| EnvFilter::try_new(log_level))
+        .unwrap_or_else(|_| EnvFilter::new("info"));
+
+    // Create file appender for logging to file
+    let file_appender = tracing_appender::rolling::never(output_dir, "query_engine.log");
+    let (non_blocking_file, guard) = tracing_appender::non_blocking(file_appender);
+
+    // Create console layer for stdout
+    let console_layer = tracing_subscriber::fmt::layer()
+        .with_file(true)
+        .with_line_number(true)
+        .with_target(true)
+        .with_writer(std::io::stdout);
+
+    // Create file layer for file output
+    let file_layer = tracing_subscriber::fmt::layer()
+        .with_file(true)
+        .with_line_number(true)
+        .with_target(true)
+        .with_ansi(false) // Disable ANSI color codes in log file
+        .with_writer(non_blocking_file);
+
+    tracing_subscriber::registry()
+        .with(env_filter)
+        .with(console_layer)
+        .with(file_layer)
+        .init();
+
+    info!("Logging initialized (respects RUST_LOG environment variable)");
+    info!("Logs will be written to: {}/query_engine.log", output_dir);
+    Ok(guard)
+}
diff --git a/QueryEngineRust/src/precompute_operators/count_min_sketch_accumulator.rs b/QueryEngineRust/src/precompute_operators/count_min_sketch_accumulator.rs
new file mode 100644
index 0000000..bba716d
--- /dev/null
+++ b/QueryEngineRust/src/precompute_operators/count_min_sketch_accumulator.rs
@@ -0,0 +1,436 @@
+use crate::data_model::{
+    AggregateCore, KeyByLabelValues, MergeableAccumulator, MultipleSubpopulationAggregate,
+    SerializableToSink,
+};
+use serde_json::Value;
+use sketch_core::count_min::CountMinSketch;
+use std::collections::HashMap;
+
+use promql_utilities::query_logics::enums::Statistic;
+
+/// Count-Min Sketch accumulator — wraps sketch_core::CountMinSketch.
+/// Core struct, update/merge/serde logic live in sketch-core.
+/// This file retains QE-specific trait impls, legacy deserializers, and JSON output.
+#[derive(Debug, Clone)]
+pub struct CountMinSketchAccumulator {
+    pub inner: CountMinSketch,
+}
+
+impl CountMinSketchAccumulator {
+    pub fn new(row_num: usize, col_num: usize) -> Self {
+        Self {
+            inner: CountMinSketch::new(row_num, col_num),
+        }
+    }
+
+    // Marked as _update and kept private; only called internally.
+    fn _update(&mut self, key: &KeyByLabelValues, value: f64) {
+        self.inner.update(&key.to_semicolon_str(), value);
+    }
+
+    pub fn query_key(&self, key: &KeyByLabelValues) -> f64 {
+        self.inner.query_key(&key.to_semicolon_str())
+    }
+
+    pub fn deserialize_from_json(data: &Value) -> Result<Self, Box<dyn std::error::Error>> {
+        let row_num = data["row_num"]
+            .as_f64()
+            .ok_or("Missing or invalid 'row_num' field")? as usize;
+        let col_num = data["col_num"]
+            .as_f64()
+            .ok_or("Missing or invalid 'col_num' field")? as usize;
+
+        let sketch_data = data["sketch"]
+            .as_array()
+            .ok_or("Missing or invalid 'sketch' field")?;
+
+        let mut sketch = Vec::new();
+        for row in sketch_data {
+            let row_array = row.as_array().ok_or("Invalid row in sketch data")?;
+            let mut sketch_row = Vec::new();
+            for cell in row_array {
+                let value = cell.as_f64().ok_or("Invalid cell value in sketch data")?;
+                sketch_row.push(value);
+            }
+            sketch.push(sketch_row);
+        }
+
+        Ok(Self {
+            inner: CountMinSketch {
+                sketch,
+                row_num,
+                col_num,
+            },
+        })
+    }
+
+    pub fn deserialize_from_bytes_arroyo(
+        buffer: &[u8],
+    ) -> Result<Self, Box<dyn std::error::Error>> {
+        Ok(Self {
+            inner: CountMinSketch::deserialize_msgpack(buffer)?,
+        })
+    }
+
+    pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        if buffer.len() < 8 {
+            return Err("Buffer too short for row_num and col_num".into());
+        }
+
+        // TODO: this logic will need to be checked for i32 -> f64
+        // Github Issue #11
+
+        let row_num = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]) as usize;
+        let col_num = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]) as usize;
+
+        let expected_size = 8 + (row_num * col_num * 4);
+        if buffer.len() < expected_size {
+            return Err("Buffer too short for sketch data".into());
+        }
+
+        let mut sketch = Vec::new();
+        let mut offset = 8;
+
+        for _ in 0..row_num {
+            let mut row = Vec::new();
+            for _ in 0..col_num {
+                let value = f64::from_le_bytes([
+                    buffer[offset],
+                    buffer[offset + 1],
+                    buffer[offset + 2],
+                    buffer[offset + 3],
+                    buffer[offset + 4],
+                    buffer[offset + 5],
+                    buffer[offset + 6],
+                    buffer[offset + 7],
+                ]);
+                row.push(value);
+                offset += 8;
+            }
+            sketch.push(row);
+        }
+
+        Ok(Self {
+            inner: CountMinSketch {
+                row_num,
+                col_num,
+                sketch,
+            },
+        })
+    }
+
+    /// Merge multiple accumulators efficiently without cloning all of them.
+    pub fn merge_multiple(
+        accumulators: &[Box<dyn crate::data_model::AggregateCore>],
+    ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+
+        let mut cms_accumulators = Vec::with_capacity(accumulators.len());
+        for acc in accumulators {
+            if acc.get_accumulator_type() != "CountMinSketchAccumulator" {
+                return Err(format!(
+                    "Cannot merge CountMinSketchAccumulator with {}",
+                    acc.get_accumulator_type()
+                )
+                .into());
+            }
+            let cms_acc = acc
+                .as_any()
+                .downcast_ref::<CountMinSketchAccumulator>()
+                .ok_or("Failed to downcast to CountMinSketchAccumulator")?;
+            cms_accumulators.push(cms_acc);
+        }
+
+        // Check dimensions are consistent
+        let row_num = cms_accumulators[0].inner.row_num;
+        let col_num = cms_accumulators[0].inner.col_num;
+        for acc in &cms_accumulators {
+            if acc.inner.row_num != row_num || acc.inner.col_num != col_num {
+                return Err(
+                    "Cannot merge CountMinSketch accumulators with different dimensions".into(),
+                );
+            }
+        }
+
+        let inner_refs: Vec<&CountMinSketch> =
+            cms_accumulators.iter().map(|acc| &acc.inner).collect();
+        let merged_inner = CountMinSketch::merge_refs(&inner_refs)?;
+        Ok(Self {
+            inner: merged_inner,
+        })
+    }
+}
+
+impl SerializableToSink for CountMinSketchAccumulator {
+    fn serialize_to_json(&self) -> Value {
+        serde_json::json!({
+            "row_num": self.inner.row_num,
+            "col_num": self.inner.col_num,
+            "sketch": self.inner.sketch
+        })
+    }
+
+    fn serialize_to_bytes(&self) -> Vec<u8> {
+        self.inner.serialize_msgpack()
+    }
+}
+
+impl AggregateCore for CountMinSketchAccumulator {
+    fn clone_boxed_core(&self) -> Box<dyn AggregateCore> {
+        Box::new(self.clone())
+    }
+
+    fn type_name(&self) -> &'static str {
+        "CountMinSketchAccumulator"
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn merge_with(
+        &self,
+        other: &dyn AggregateCore,
+    ) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error + Send + Sync>> {
+        if other.get_accumulator_type() != self.get_accumulator_type() {
+            return Err(format!(
+                "Cannot merge CountMinSketchAccumulator with {}",
+                other.get_accumulator_type()
+            )
+            .into());
+        }
+
+        let other_cms = other
+            .as_any()
+            .downcast_ref::<CountMinSketchAccumulator>()
+            .ok_or("Failed to downcast to CountMinSketchAccumulator")?;
+
+        let merged_inner = CountMinSketch::merge_refs(&[&self.inner, &other_cms.inner])?;
+        Ok(Box::new(Self {
+            inner: merged_inner,
+        }))
+    }
+
+    fn get_accumulator_type(&self) -> &'static str {
+        "CountMinSketchAccumulator"
+    }
+
+    fn get_keys(&self) -> Option<Vec<crate::KeyByLabelValues>> {
+        None
+    }
+}
+
+impl MultipleSubpopulationAggregate for CountMinSketchAccumulator {
+    fn query(
+        &self,
+        _statistic: Statistic,
+        key: &KeyByLabelValues,
+        _query_kwargs: Option<&HashMap<String, String>>,
+    ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+        Ok(self.query_key(key))
+    }
+
+    fn clone_boxed(&self) -> Box<dyn MultipleSubpopulationAggregate> {
+        Box::new(self.clone())
+    }
+}
+
+impl MergeableAccumulator<CountMinSketchAccumulator> for CountMinSketchAccumulator {
+    fn merge_accumulators(
+        accumulators: Vec<CountMinSketchAccumulator>,
+    ) -> Result<CountMinSketchAccumulator, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+        let inners: Vec<CountMinSketch> = accumulators.into_iter().map(|acc| acc.inner).collect();
+        let merged_inner = CountMinSketch::merge(inners)?;
+        Ok(Self {
+            inner: merged_inner,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_count_min_sketch_creation() {
+        let cms = CountMinSketchAccumulator::new(4, 1000);
+        assert_eq!(cms.inner.row_num, 4);
+        assert_eq!(cms.inner.col_num, 1000);
+        assert_eq!(cms.inner.sketch.len(), 4);
+        assert_eq!(cms.inner.sketch[0].len(), 1000);
+
+        for row in &cms.inner.sketch {
+            for &value in row {
+                assert_eq!(value, 0.0);
+            }
+        }
+    }
+
+    #[test]
+    fn test_count_min_sketch_update() {
+        let mut cms = CountMinSketchAccumulator::new(2, 10);
+        let key = KeyByLabelValues::new();
+        cms._update(&key, 1.0);
+        let result = cms.query_key(&key);
+        assert!(result >= 1.0);
+    }
+
+    #[test]
+    fn test_count_min_sketch_query() {
+        let cms = CountMinSketchAccumulator::new(2, 10);
+        let key = KeyByLabelValues::new();
+        assert_eq!(cms.query_key(&key), 0.0);
+
+        let multi_trait: &dyn MultipleSubpopulationAggregate = &cms;
+        assert_eq!(multi_trait.query(Statistic::Sum, &key, None).unwrap(), 0.0);
+    }
+
+    #[test]
+    fn test_count_min_sketch_merge() {
+        let mut cms1 = CountMinSketchAccumulator::new(2, 3);
+        let mut cms2 = CountMinSketchAccumulator::new(2, 3);
+
+        cms1.inner.sketch[0][0] = 5.0;
+        cms1.inner.sketch[1][2] = 10.0;
+        cms2.inner.sketch[0][0] = 3.0;
+        cms2.inner.sketch[0][1] = 7.0;
+
+        let merged = CountMinSketchAccumulator::merge_accumulators(vec![cms1, cms2]).unwrap();
+
+        assert_eq!(merged.inner.sketch[0][0], 8.0);
+        assert_eq!(merged.inner.sketch[0][1], 7.0);
+        assert_eq!(merged.inner.sketch[1][2], 10.0);
+    }
+
+    #[test]
+    fn test_count_min_sketch_merge_dimension_mismatch() {
+        let cms1 = CountMinSketchAccumulator::new(2, 3);
+        let cms2 = CountMinSketchAccumulator::new(3, 3);
+        let result = CountMinSketchAccumulator::merge_accumulators(vec![cms1, cms2]);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_count_min_sketch_serialization() {
+        let mut cms = CountMinSketchAccumulator::new(2, 3);
+        cms.inner.sketch[0][1] = 42.0;
+        cms.inner.sketch[1][2] = 100.0;
+
+        let bytes = cms.serialize_to_bytes();
+        let deserialized =
+            CountMinSketchAccumulator::deserialize_from_bytes_arroyo(&bytes).unwrap();
+
+        assert_eq!(deserialized.inner.row_num, 2);
+        assert_eq!(deserialized.inner.col_num, 3);
+        assert_eq!(deserialized.inner.sketch[0][1], 42.0);
+        assert_eq!(deserialized.inner.sketch[1][2], 100.0);
+    }
+
+    #[test]
+    fn test_count_min_sketch_as_aggregate_core() {
+        let cms = CountMinSketchAccumulator::new(2, 3);
+        assert_eq!(cms.type_name(), "CountMinSketchAccumulator");
+    }
+
+    #[test]
+    fn test_trait_object() {
+        let cms = CountMinSketchAccumulator::new(2, 3);
+        let trait_obj: Box<dyn AggregateCore> = Box::new(cms);
+        assert_eq!(trait_obj.type_name(), "CountMinSketchAccumulator");
+    }
+
+    #[test]
+    fn test_count_min_sketch_key_query() {
+        let mut cms = CountMinSketchAccumulator::new(4, 100);
+        let key = KeyByLabelValues::new();
+        assert_eq!(cms.query_key(&key), 0.0);
+        cms._update(&key, 5.0);
+        let result = cms.query_key(&key);
+        assert!(result >= 5.0);
+    }
+
+    #[test]
+    fn test_update_and_query_use_same_key_encoding() {
+        // Regression test: _update and query_key must hash the same key string.
+        // Previously _update went through serialize_to_json (which returns a JSON
+        // array, so as_object() is always None) and always stored under key "".
+        // query_key correctly used key.labels.join(";"), so they never matched.
+        let mut cms = CountMinSketchAccumulator::new(4, 1000);
+        let key = KeyByLabelValues::new_with_labels(vec!["web".to_string(), "prod".to_string()]);
+        cms._update(&key, 5.0);
+        let result = cms.query_key(&key);
+        assert!(
+            result >= 5.0,
+            "_update and query_key used different key encodings: got {result}"
+        );
+
+        // Also verify a different key does not interfere.
+        let other_key = KeyByLabelValues::new_with_labels(vec!["api".to_string()]);
+        // other_key was never updated; its estimate should be lower than key's.
+        let other_result = cms.query_key(&other_key);
+        // In a sketch this large there should be no collision, so other_result == 0.
+        assert_eq!(
+            other_result, 0.0,
+            "unrelated key returned non-zero: {other_result}"
+        );
+    }
+
+    #[test]
+    fn test_multiple_subpopulation_aggregate() {
+        let mut cms = CountMinSketchAccumulator::new(3, 50);
+        let key = KeyByLabelValues::new();
+        cms._update(&key, 10.0);
+
+        let multi_trait: &dyn MultipleSubpopulationAggregate = &cms;
+        let result = multi_trait.query(Statistic::Sum, &key, None).unwrap();
+        assert!(result >= 10.0);
+
+        let keys = multi_trait.get_keys();
+        assert!(keys.is_none());
+    }
+
+    #[test]
+    fn test_count_min_sketch_merge_multiple() {
+        let mut cms1 = CountMinSketchAccumulator::new(2, 3);
+        let mut cms2 = CountMinSketchAccumulator::new(2, 3);
+        let mut cms3 = CountMinSketchAccumulator::new(2, 3);
+
+        cms1.inner.sketch[0][0] = 5.0;
+        cms1.inner.sketch[1][2] = 10.0;
+        cms2.inner.sketch[0][0] = 3.0;
+        cms2.inner.sketch[0][1] = 7.0;
+        cms3.inner.sketch[0][0] = 2.0;
+        cms3.inner.sketch[1][2] = 5.0;
+
+        let boxed_accs: Vec<Box<dyn AggregateCore>> =
+            vec![Box::new(cms1), Box::new(cms2), Box::new(cms3)];
+
+        let merged = CountMinSketchAccumulator::merge_multiple(&boxed_accs).unwrap();
+
+        assert_eq!(merged.inner.sketch[0][0], 10.0);
+        assert_eq!(merged.inner.sketch[0][1], 7.0);
+        assert_eq!(merged.inner.sketch[1][2], 15.0);
+    }
+
+    #[test]
+    fn test_count_min_sketch_merge_multiple_error_cases() {
+        let empty: Vec<Box<dyn AggregateCore>> = vec![];
+        assert!(CountMinSketchAccumulator::merge_multiple(&empty).is_err());
+
+        let cms1 = CountMinSketchAccumulator::new(2, 3);
+        let cms2 = CountMinSketchAccumulator::new(3, 3);
+        let boxed_accs: Vec<Box<dyn AggregateCore>> = vec![Box::new(cms1), Box::new(cms2)];
+        assert!(CountMinSketchAccumulator::merge_multiple(&boxed_accs).is_err());
+
+        use crate::precompute_operators::sum_accumulator::SumAccumulator;
+        let cms = CountMinSketchAccumulator::new(2, 3);
+        let sum = SumAccumulator::new();
+        let mixed_accs: Vec<Box<dyn AggregateCore>> = vec![Box::new(cms), Box::new(sum)];
+        assert!(CountMinSketchAccumulator::merge_multiple(&mixed_accs).is_err());
+    }
+}
diff --git a/QueryEngineRust/src/precompute_operators/count_min_sketch_with_heap_accumulator.rs b/QueryEngineRust/src/precompute_operators/count_min_sketch_with_heap_accumulator.rs
new file mode 100644
index 0000000..15e0ca3
--- /dev/null
+++ b/QueryEngineRust/src/precompute_operators/count_min_sketch_with_heap_accumulator.rs
@@ -0,0 +1,361 @@
+use crate::data_model::{
+    AggregateCore, KeyByLabelValues, MergeableAccumulator, MultipleSubpopulationAggregate,
+    SerializableToSink,
+};
+use serde_json::Value;
+use sketch_core::count_min_with_heap::{CountMinSketchWithHeap, HeapItem};
+use std::collections::HashMap;
+
+use promql_utilities::query_logics::enums::Statistic;
+
+/// Count-Min Sketch with Heap accumulator — wraps sketch_core::CountMinSketchWithHeap.
+/// Core struct, update/merge/serde logic live in sketch-core.
+/// This file retains QE-specific trait impls, legacy deserializers, and JSON output.
+///
+/// NOTE (bug, do not fix): QueryEngineRust uses xxhash-rust::xxh32; the Arroyo template uses
+/// twox-hash::XxHash32. Bucket assignments differ. Tracked separately.
+#[derive(Debug, Clone)]
+pub struct CountMinSketchWithHeapAccumulator {
+    pub inner: CountMinSketchWithHeap,
+}
+
+// Re-export HeapItem so existing code using CountMinSketchWithHeapAccumulator::HeapItem still works.
+pub use sketch_core::count_min_with_heap::HeapItem as HeapItemReexport;
+
+impl CountMinSketchWithHeapAccumulator {
+    pub fn new(row_num: usize, col_num: usize, heap_size: usize) -> Self {
+        Self {
+            inner: CountMinSketchWithHeap::new(row_num, col_num, heap_size),
+        }
+    }
+
+    pub fn query_key(&self, key: &KeyByLabelValues) -> f64 {
+        let key_string = key.labels.join(";");
+        self.inner.query_key(&key_string)
+    }
+
+    /// This function seems will never be used anymore. Keep it for possible future use.
+    pub fn deserialize_from_json(data: &Value) -> Result<Self, Box<dyn std::error::Error>> {
+        let row_num = data["row_num"]
+            .as_f64()
+            .ok_or("Missing or invalid 'row_num' field")? as usize;
+        let col_num = data["col_num"]
+            .as_f64()
+            .ok_or("Missing or invalid 'col_num' field")? as usize;
+        let heap_size = data["heap_size"]
+            .as_f64()
+            .ok_or("Missing or invalid 'heap_size' field")? as usize;
+
+        let sketch_data = data["sketch"]
+            .as_array()
+            .ok_or("Missing or invalid 'sketch' field")?;
+
+        let mut sketch = Vec::new();
+        for row in sketch_data {
+            let row_array = row.as_array().ok_or("Invalid row in sketch data")?;
+            let mut sketch_row = Vec::new();
+            for cell in row_array {
+                let value = cell.as_f64().ok_or("Invalid cell value in sketch data")?;
+                sketch_row.push(value);
+            }
+            sketch.push(sketch_row);
+        }
+
+        let topk_heap_data = data["topk_heap"]
+            .as_array()
+            .ok_or("Missing or invalid 'topk_heap' field")?;
+
+        let mut topk_heap = Vec::new();
+        for item in topk_heap_data {
+            let key = item["key"]
+                .as_str()
+                .ok_or("Missing or invalid 'key' in heap item")?
+                .to_string();
+            let value = item["value"]
+                .as_f64()
+                .ok_or("Missing or invalid 'value' in heap item")?;
+            topk_heap.push(HeapItem { key, value });
+        }
+
+        Ok(Self {
+            inner: CountMinSketchWithHeap {
+                sketch,
+                row_num,
+                col_num,
+                topk_heap,
+                heap_size,
+            },
+        })
+    }
+
+    pub fn deserialize_from_bytes_arroyo(
+        buffer: &[u8],
+    ) -> Result<Self, Box<dyn std::error::Error>> {
+        Ok(Self {
+            inner: CountMinSketchWithHeap::deserialize_msgpack(buffer)?,
+        })
+    }
+
+    pub fn deserialize_from_bytes(_buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        Err("deserialize_from_bytes for CountMinSketchWithHeapAccumulator not implemented".into())
+    }
+
+    /// Get all keys from the top-k heap.
+    pub fn get_topk_keys(&self) -> Vec<KeyByLabelValues> {
+        self.inner
+            .topk_heap
+            .iter()
+            .map(|item| {
+                let labels: Vec<String> = item.key.split(';').map(|s| s.to_string()).collect();
+                KeyByLabelValues { labels }
+            })
+            .collect()
+    }
+}
+
+impl SerializableToSink for CountMinSketchWithHeapAccumulator {
+    fn serialize_to_json(&self) -> Value {
+        let heap_items: Vec<Value> = self
+            .inner
+            .topk_heap
+            .iter()
+            .map(|item| {
+                serde_json::json!({
+                    "key": item.key,
+                    "value": item.value
+                })
+            })
+            .collect();
+
+        serde_json::json!({
+            "row_num": self.inner.row_num,
+            "col_num": self.inner.col_num,
+            "heap_size": self.inner.heap_size,
+            "sketch": self.inner.sketch,
+            "topk_heap": heap_items
+        })
+    }
+
+    fn serialize_to_bytes(&self) -> Vec<u8> {
+        self.inner.serialize_msgpack()
+    }
+}
+
+impl AggregateCore for CountMinSketchWithHeapAccumulator {
+    fn clone_boxed_core(&self) -> Box<dyn AggregateCore> {
+        Box::new(self.clone())
+    }
+
+    fn type_name(&self) -> &'static str {
+        "CountMinSketchWithHeapAccumulator"
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn merge_with(
+        &self,
+        other: &dyn AggregateCore,
+    ) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error + Send + Sync>> {
+        if other.get_accumulator_type() != self.get_accumulator_type() {
+            return Err(format!(
+                "Cannot merge CountMinSketchWithHeapAccumulator with {}",
+                other.get_accumulator_type()
+            )
+            .into());
+        }
+
+        let other_cms = other
+            .as_any()
+            .downcast_ref::<CountMinSketchWithHeapAccumulator>()
+            .ok_or("Failed to downcast to CountMinSketchWithHeapAccumulator")?;
+
+        let merged = Self::merge_accumulators(vec![self.clone(), other_cms.clone()])?;
+        Ok(Box::new(merged))
+    }
+
+    fn get_accumulator_type(&self) -> &'static str {
+        "CountMinSketchWithHeapAccumulator"
+    }
+
+    fn get_keys(&self) -> Option<Vec<crate::KeyByLabelValues>> {
+        Some(self.get_topk_keys())
+    }
+}
+
+impl MultipleSubpopulationAggregate for CountMinSketchWithHeapAccumulator {
+    fn query(
+        &self,
+        _statistic: Statistic,
+        key: &KeyByLabelValues,
+        _query_kwargs: Option<&HashMap<String, String>>,
+    ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+        Ok(self.query_key(key))
+    }
+
+    fn clone_boxed(&self) -> Box<dyn MultipleSubpopulationAggregate> {
+        Box::new(self.clone())
+    }
+}
+
+impl MergeableAccumulator<CountMinSketchWithHeapAccumulator> for CountMinSketchWithHeapAccumulator {
+    fn merge_accumulators(
+        accumulators: Vec<CountMinSketchWithHeapAccumulator>,
+    ) -> Result<CountMinSketchWithHeapAccumulator, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+        let inners: Vec<CountMinSketchWithHeap> =
+            accumulators.into_iter().map(|acc| acc.inner).collect();
+        let merged_inner = CountMinSketchWithHeap::merge(inners)?;
+        Ok(Self {
+            inner: merged_inner,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_count_min_sketch_with_heap_creation() {
+        let cms = CountMinSketchWithHeapAccumulator::new(4, 1000, 20);
+        assert_eq!(cms.inner.row_num, 4);
+        assert_eq!(cms.inner.col_num, 1000);
+        assert_eq!(cms.inner.heap_size, 20);
+        assert_eq!(cms.inner.topk_heap.len(), 0);
+    }
+
+    #[test]
+    fn test_count_min_sketch_with_heap_query() {
+        let cms = CountMinSketchWithHeapAccumulator::new(2, 10, 5);
+        let key = KeyByLabelValues::new();
+        assert_eq!(cms.query_key(&key), 0.0);
+
+        let multi_trait: &dyn MultipleSubpopulationAggregate = &cms;
+        assert_eq!(multi_trait.query(Statistic::Sum, &key, None).unwrap(), 0.0);
+    }
+
+    #[test]
+    fn test_count_min_sketch_with_heap_merge() {
+        let mut cms1 = CountMinSketchWithHeapAccumulator::new(2, 10, 5);
+        let mut cms2 = CountMinSketchWithHeapAccumulator::new(2, 10, 3);
+
+        cms1.inner.sketch[0][0] = 10.0;
+        cms1.inner.sketch[1][1] = 20.0;
+        cms2.inner.sketch[0][0] = 5.0;
+        cms2.inner.sketch[1][1] = 15.0;
+
+        cms1.inner.topk_heap.push(HeapItem {
+            key: "key1".to_string(),
+            value: 100.0,
+        });
+        cms1.inner.topk_heap.push(HeapItem {
+            key: "key2".to_string(),
+            value: 50.0,
+        });
+        cms2.inner.topk_heap.push(HeapItem {
+            key: "key3".to_string(),
+            value: 75.0,
+        });
+        cms2.inner.topk_heap.push(HeapItem {
+            key: "key1".to_string(),
+            value: 80.0,
+        });
+
+        let result = CountMinSketchWithHeapAccumulator::merge_accumulators(vec![cms1, cms2]);
+        assert!(result.is_ok());
+        let merged = result.unwrap();
+        assert_eq!(merged.inner.sketch[0][0], 15.0);
+        assert_eq!(merged.inner.sketch[1][1], 35.0);
+        assert_eq!(merged.inner.heap_size, 3);
+        assert!(merged.inner.topk_heap.len() <= 3);
+    }
+
+    #[test]
+    fn test_count_min_sketch_with_heap_merge_single() {
+        let cms = CountMinSketchWithHeapAccumulator::new(2, 3, 5);
+        let result = CountMinSketchWithHeapAccumulator::merge_accumulators(vec![cms.clone()]);
+        assert!(result.is_ok());
+        let merged = result.unwrap();
+        assert_eq!(merged.inner.row_num, cms.inner.row_num);
+        assert_eq!(merged.inner.col_num, cms.inner.col_num);
+        assert_eq!(merged.inner.heap_size, cms.inner.heap_size);
+    }
+
+    #[test]
+    fn test_count_min_sketch_with_heap_merge_dimension_mismatch() {
+        let cms1 = CountMinSketchWithHeapAccumulator::new(2, 10, 5);
+        let cms2 = CountMinSketchWithHeapAccumulator::new(3, 10, 5);
+        let result = CountMinSketchWithHeapAccumulator::merge_accumulators(vec![cms1, cms2]);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("different dimensions"));
+    }
+
+    #[test]
+    fn test_count_min_sketch_with_heap_serialization() {
+        let mut cms = CountMinSketchWithHeapAccumulator::new(2, 3, 5);
+        cms.inner.sketch[0][1] = 42.0;
+        cms.inner.sketch[1][2] = 100.0;
+        cms.inner.topk_heap.push(HeapItem {
+            key: "test_key".to_string(),
+            value: 99.0,
+        });
+
+        let bytes = cms.serialize_to_bytes();
+        let deserialized =
+            CountMinSketchWithHeapAccumulator::deserialize_from_bytes_arroyo(&bytes).unwrap();
+
+        assert_eq!(deserialized.inner.row_num, 2);
+        assert_eq!(deserialized.inner.col_num, 3);
+        assert_eq!(deserialized.inner.heap_size, 5);
+        assert_eq!(deserialized.inner.sketch[0][1], 42.0);
+        assert_eq!(deserialized.inner.sketch[1][2], 100.0);
+        assert_eq!(deserialized.inner.topk_heap.len(), 1);
+        assert_eq!(deserialized.inner.topk_heap[0].key, "test_key");
+        assert_eq!(deserialized.inner.topk_heap[0].value, 99.0);
+    }
+
+    #[test]
+    fn test_count_min_sketch_with_heap_as_aggregate_core() {
+        let cms = CountMinSketchWithHeapAccumulator::new(2, 3, 5);
+        assert_eq!(cms.type_name(), "CountMinSketchWithHeapAccumulator");
+    }
+
+    #[test]
+    fn test_get_topk_keys() {
+        let mut cms = CountMinSketchWithHeapAccumulator::new(2, 3, 5);
+        cms.inner.topk_heap.push(HeapItem {
+            key: "label1;label2".to_string(),
+            value: 100.0,
+        });
+        cms.inner.topk_heap.push(HeapItem {
+            key: "label3;label4".to_string(),
+            value: 50.0,
+        });
+
+        let keys = cms.get_topk_keys();
+        assert_eq!(keys.len(), 2);
+        assert_eq!(keys[0].labels, vec!["label1", "label2"]);
+        assert_eq!(keys[1].labels, vec!["label3", "label4"]);
+    }
+
+    #[test]
+    fn test_multiple_subpopulation_aggregate() {
+        let cms = CountMinSketchWithHeapAccumulator::new(3, 50, 10);
+        let key = KeyByLabelValues::new();
+
+        let multi_trait: &dyn MultipleSubpopulationAggregate = &cms;
+        let result = multi_trait.query(Statistic::Sum, &key, None).unwrap();
+        assert_eq!(result, 0.0);
+
+        let keys = multi_trait.get_keys();
+        assert!(keys.is_some());
+        assert_eq!(keys.unwrap().len(), 0);
+    }
+}
diff --git a/QueryEngineRust/src/precompute_operators/datasketches_kll_accumulator.rs b/QueryEngineRust/src/precompute_operators/datasketches_kll_accumulator.rs
new file mode 100644
index 0000000..78e6ab0
--- /dev/null
+++ b/QueryEngineRust/src/precompute_operators/datasketches_kll_accumulator.rs
@@ -0,0 +1,434 @@
+use crate::data_model::{
+    AggregateCore, MergeableAccumulator, SerializableToSink, SingleSubpopulationAggregate,
+};
+use base64::{engine::general_purpose, Engine as _};
+use serde_json::Value;
+use sketch_core::kll::KllSketch;
+use std::collections::HashMap;
+use std::time::Instant;
+use tracing::debug;
+
+use promql_utilities::query_logics::enums::Statistic;
+
+/// KLL sketch accumulator — wraps sketch_core::KllSketch.
+/// Core struct, update/merge/serde logic live in sketch-core.
+/// This file retains QE-specific trait impls, legacy deserializers, and JSON output.
+pub struct DatasketchesKLLAccumulator {
+    pub inner: KllSketch,
+}
+
+impl DatasketchesKLLAccumulator {
+    pub fn new(k: u16) -> Self {
+        Self {
+            inner: KllSketch::new(k),
+        }
+    }
+
+    pub fn _update(&mut self, value: f64) {
+        self.inner.update(value);
+    }
+
+    pub fn get_quantile(&self, quantile: f64) -> f64 {
+        self.inner.get_quantile(quantile)
+    }
+
+    pub fn deserialize_from_json(data: &Value) -> Result<Self, Box<dyn std::error::Error>> {
+        // Mirror Python implementation: expects {"sketch": base64_encoded_string}
+        let sketch_b64 = data["sketch"]
+            .as_str()
+            .ok_or("Missing or invalid 'sketch' field")?;
+
+        let sketch_bytes = general_purpose::STANDARD
+            .decode(sketch_b64)
+            .map_err(|e| format!("Failed to decode base64 sketch data: {e}"))?;
+
+        // TODO: remove this hardcoding once FlinkSketch serializes k in its output
+        Ok(Self {
+            inner: KllSketch::from_dsrs_bytes(&sketch_bytes, 200)?,
+        })
+    }
+
+    pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        // Mirror Python implementation: deserialize sketch directly from bytes
+        // TODO: remove this hardcoding once FlinkSketch serializes k in its output
+        Ok(Self {
+            inner: KllSketch::from_dsrs_bytes(buffer, 200)?,
+        })
+    }
+
+    pub fn deserialize_from_bytes_arroyo(
+        buffer: &[u8],
+    ) -> Result<Self, Box<dyn std::error::Error>> {
+        debug!(
+            "Deserializing DatasketchesKLLAccumulator from Arroyo MessagePack buffer of size {}",
+            buffer.len()
+        );
+        Ok(Self {
+            inner: KllSketch::deserialize_msgpack(buffer)?,
+        })
+    }
+
+    /// Merge multiple accumulators efficiently without cloning all of them.
+    pub fn merge_multiple(
+        accumulators: &[Box<dyn crate::data_model::AggregateCore>],
+    ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+
+        let mut kll_accumulators = Vec::with_capacity(accumulators.len());
+        for acc in accumulators {
+            if acc.get_accumulator_type() != "DatasketchesKLLAccumulator" {
+                return Err(format!(
+                    "Cannot merge DatasketchesKLLAccumulator with {}",
+                    acc.get_accumulator_type()
+                )
+                .into());
+            }
+            let kll_acc = acc
+                .as_any()
+                .downcast_ref::<DatasketchesKLLAccumulator>()
+                .ok_or("Failed to downcast to DatasketchesKLLAccumulator")?;
+            kll_accumulators.push(kll_acc);
+        }
+
+        let inner_refs: Vec<&KllSketch> = kll_accumulators.iter().map(|acc| &acc.inner).collect();
+        let merged_inner = KllSketch::merge_refs(&inner_refs)?;
+        Ok(Self {
+            inner: merged_inner,
+        })
+    }
+}
+
+// Manual trait implementations since the C++ library doesn't provide them
+impl Clone for DatasketchesKLLAccumulator {
+    fn clone(&self) -> Self {
+        Self {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+impl std::fmt::Debug for DatasketchesKLLAccumulator {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("DatasketchesKLLAccumulator")
+            .field("k", &self.inner.k)
+            .field("sketch_n", &self.inner.sketch.get_n())
+            .finish()
+    }
+}
+
+// TODO: verify this
+// Thread safety: The C++ library is not thread-safe by default, but since we're using it
+// in a single-threaded context per accumulator instance and only sharing read-only operations,
+// this should be safe.
+unsafe impl Send for DatasketchesKLLAccumulator {}
+unsafe impl Sync for DatasketchesKLLAccumulator {}
+
+impl SerializableToSink for DatasketchesKLLAccumulator {
+    fn serialize_to_json(&self) -> Value {
+        // Mirror Python implementation: {"sketch": base64_encoded_string}
+        let sketch_bytes = self.inner.sketch.serialize();
+        let sketch_b64 = general_purpose::STANDARD.encode(&sketch_bytes);
+        serde_json::json!({ "sketch": sketch_b64 })
+    }
+
+    fn serialize_to_bytes(&self) -> Vec<u8> {
+        self.inner.serialize_msgpack()
+    }
+}
+
+impl AggregateCore for DatasketchesKLLAccumulator {
+    fn clone_boxed_core(&self) -> Box<dyn AggregateCore> {
+        Box::new(self.clone())
+    }
+
+    fn type_name(&self) -> &'static str {
+        "DatasketchesKLLAccumulator"
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn merge_with(
+        &self,
+        other: &dyn AggregateCore,
+    ) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error + Send + Sync>> {
+        #[cfg(feature = "extra_debugging")]
+        let merge_with_start = Instant::now();
+        #[cfg(feature = "extra_debugging")]
+        debug!(
+            "[PERF] DatasketchesKLLAccumulator::merge_with() started - self.k={}, self.n={}",
+            self.inner.k,
+            self.inner.sketch.get_n()
+        );
+
+        if other.get_accumulator_type() != self.get_accumulator_type() {
+            return Err(format!(
+                "Cannot merge DatasketchesKLLAccumulator with {}",
+                other.get_accumulator_type()
+            )
+            .into());
+        }
+
+        let other_kll = other
+            .as_any()
+            .downcast_ref::<DatasketchesKLLAccumulator>()
+            .ok_or("Failed to downcast to DatasketchesKLLAccumulator")?;
+
+        let merged_inner = KllSketch::merge_refs(&[&self.inner, &other_kll.inner])?;
+        let merged = Self {
+            inner: merged_inner,
+        };
+
+        #[cfg(feature = "extra_debugging")]
+        debug!(
+            "[PERF] DatasketchesKLLAccumulator::merge_with() TOTAL TIME: {:?}",
+            merge_with_start.elapsed()
+        );
+
+        Ok(Box::new(merged))
+    }
+
+    fn get_accumulator_type(&self) -> &'static str {
+        "DatasketchesKLLAccumulator"
+    }
+
+    fn get_keys(&self) -> Option<Vec<crate::KeyByLabelValues>> {
+        None
+    }
+}
+
+impl SingleSubpopulationAggregate for DatasketchesKLLAccumulator {
+    fn query(
+        &self,
+        statistic: Statistic,
+        query_kwargs: Option<&HashMap<String, String>>,
+    ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+        match statistic {
+            Statistic::Quantile => {
+                debug!(
+                    "Querying DatasketchesKLLAccumulator for quantile with kwargs: {:?}",
+                    query_kwargs
+                );
+                let quantile = query_kwargs
+                    .and_then(|kwargs| kwargs.get("quantile"))
+                    .ok_or("Missing quantile parameter for quantile query")?
+                    .parse::<f64>()
+                    .map_err(|_| "Invalid quantile parameter format")?;
+
+                if !(0.0..=1.0).contains(&quantile) {
+                    return Err("Quantile must be between 0.0 and 1.0".into());
+                }
+
+                Ok(self.get_quantile(quantile))
+            }
+            _ => Err(
+                format!("Unsupported statistic in DatasketchesKLLAccumulator: {statistic:?}")
+                    .into(),
+            ),
+        }
+    }
+
+    fn clone_boxed(&self) -> Box<dyn SingleSubpopulationAggregate> {
+        Box::new(self.clone())
+    }
+}
+
+impl MergeableAccumulator<DatasketchesKLLAccumulator> for DatasketchesKLLAccumulator {
+    fn merge_accumulators(
+        accumulators: Vec<DatasketchesKLLAccumulator>,
+    ) -> Result<DatasketchesKLLAccumulator, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+        let inners: Vec<KllSketch> = accumulators.into_iter().map(|acc| acc.inner).collect();
+        let merged_inner = KllSketch::merge(inners)?;
+        Ok(Self {
+            inner: merged_inner,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_datasketches_kll_creation() {
+        let kll = DatasketchesKLLAccumulator::new(200);
+        assert!(kll.inner.sketch.get_n() == 0);
+        assert_eq!(kll.inner.k, 200);
+    }
+
+    #[test]
+    fn test_datasketches_kll_update() {
+        let mut kll = DatasketchesKLLAccumulator::new(200);
+        kll._update(10.0);
+        kll._update(20.0);
+        kll._update(15.0);
+        assert_eq!(kll.inner.sketch.get_n(), 3);
+    }
+
+    #[test]
+    fn test_datasketches_kll_quantile() {
+        let mut kll = DatasketchesKLLAccumulator::new(200);
+        for i in 1..=10 {
+            kll._update(i as f64);
+        }
+        assert_eq!(kll.get_quantile(0.0), 1.0);
+        assert_eq!(kll.get_quantile(1.0), 10.0);
+        assert_eq!(kll.get_quantile(0.5), 6.0);
+    }
+
+    #[test]
+    fn test_datasketches_kll_query() {
+        let mut kll = DatasketchesKLLAccumulator::new(200);
+        for i in 1..=10 {
+            kll._update(i as f64);
+        }
+
+        let mut query_kwargs = HashMap::new();
+        query_kwargs.insert("quantile".to_string(), "0.5".to_string());
+        let result = kll.query(Statistic::Quantile, Some(&query_kwargs)).unwrap();
+        assert_eq!(result, 6.0);
+
+        assert!(kll.query(Statistic::Sum, Some(&query_kwargs)).is_err());
+    }
+
+    #[test]
+    fn test_datasketches_kll_merge() {
+        let mut kll1 = DatasketchesKLLAccumulator::new(200);
+        let mut kll2 = DatasketchesKLLAccumulator::new(200);
+
+        for i in 1..=5 {
+            kll1._update(i as f64);
+        }
+        for i in 6..=10 {
+            kll2._update(i as f64);
+        }
+
+        let merged = DatasketchesKLLAccumulator::merge_accumulators(vec![kll1, kll2]).unwrap();
+        assert_eq!(merged.inner.sketch.get_n(), 10);
+        assert_eq!(merged.get_quantile(0.0), 1.0);
+        assert_eq!(merged.get_quantile(1.0), 10.0);
+    }
+
+    #[test]
+    fn test_datasketches_kll_serialization() {
+        let mut kll = DatasketchesKLLAccumulator::new(200);
+        for i in 1..=5 {
+            kll._update(i as f64);
+        }
+
+        let bytes = kll.serialize_to_bytes();
+        let deserialized =
+            DatasketchesKLLAccumulator::deserialize_from_bytes_arroyo(&bytes).unwrap();
+
+        assert_eq!(deserialized.inner.k, 200);
+        assert_eq!(deserialized.inner.sketch.get_n(), 5);
+        assert_eq!(deserialized.get_quantile(0.0), 1.0);
+        assert_eq!(deserialized.get_quantile(1.0), 5.0);
+    }
+
+    #[test]
+    fn test_datasketches_kll_get_keys() {
+        let kll = DatasketchesKLLAccumulator::new(200);
+        assert_eq!(kll.type_name(), "DatasketchesKLLAccumulator");
+    }
+
+    #[test]
+    fn test_trait_object() {
+        let mut kll = DatasketchesKLLAccumulator::new(200);
+        kll._update(5.0);
+        let trait_obj: Box<dyn AggregateCore> = Box::new(kll);
+        assert_eq!(trait_obj.type_name(), "DatasketchesKLLAccumulator");
+    }
+
+    #[test]
+    fn test_datasketches_kll_query_with_kwargs() {
+        let mut kll = DatasketchesKLLAccumulator::new(200);
+        for i in 1..=10 {
+            kll._update(i as f64);
+        }
+
+        let mut query_kwargs = HashMap::new();
+        query_kwargs.insert("quantile".to_string(), "0.5".to_string());
+        let result = kll.query(Statistic::Quantile, Some(&query_kwargs)).unwrap();
+        assert_eq!(result, 6.0);
+
+        query_kwargs.insert("quantile".to_string(), "0.9".to_string());
+        let result = kll.query(Statistic::Quantile, Some(&query_kwargs)).unwrap();
+        assert_eq!(result, 10.0);
+
+        query_kwargs.insert("quantile".to_string(), "0.0".to_string());
+        assert_eq!(
+            kll.query(Statistic::Quantile, Some(&query_kwargs)).unwrap(),
+            1.0
+        );
+
+        query_kwargs.insert("quantile".to_string(), "1.0".to_string());
+        assert_eq!(
+            kll.query(Statistic::Quantile, Some(&query_kwargs)).unwrap(),
+            10.0
+        );
+
+        assert!(kll.query(Statistic::Quantile, None).is_err());
+
+        query_kwargs.insert("quantile".to_string(), "invalid".to_string());
+        assert!(kll.query(Statistic::Quantile, Some(&query_kwargs)).is_err());
+
+        query_kwargs.insert("quantile".to_string(), "1.5".to_string());
+        assert!(kll.query(Statistic::Quantile, Some(&query_kwargs)).is_err());
+
+        query_kwargs.insert("quantile".to_string(), "-0.1".to_string());
+        assert!(kll.query(Statistic::Quantile, Some(&query_kwargs)).is_err());
+
+        query_kwargs.insert("quantile".to_string(), "0.5".to_string());
+        assert!(kll.query(Statistic::Sum, Some(&query_kwargs)).is_err());
+    }
+
+    #[test]
+    fn test_datasketches_kll_merge_multiple() {
+        let mut kll1 = DatasketchesKLLAccumulator::new(200);
+        let mut kll2 = DatasketchesKLLAccumulator::new(200);
+        let mut kll3 = DatasketchesKLLAccumulator::new(200);
+
+        for i in 1..=5 {
+            kll1._update(i as f64);
+        }
+        for i in 6..=10 {
+            kll2._update(i as f64);
+        }
+        for i in 11..=15 {
+            kll3._update(i as f64);
+        }
+
+        let boxed_accs: Vec<Box<dyn AggregateCore>> =
+            vec![Box::new(kll1), Box::new(kll2), Box::new(kll3)];
+
+        let merged = DatasketchesKLLAccumulator::merge_multiple(&boxed_accs).unwrap();
+        assert_eq!(merged.inner.sketch.get_n(), 15);
+        assert_eq!(merged.get_quantile(0.0), 1.0);
+        assert_eq!(merged.get_quantile(1.0), 15.0);
+        assert_eq!(merged.get_quantile(0.5), 8.0);
+    }
+
+    #[test]
+    fn test_datasketches_kll_merge_multiple_error_cases() {
+        let empty: Vec<Box<dyn AggregateCore>> = vec![];
+        assert!(DatasketchesKLLAccumulator::merge_multiple(&empty).is_err());
+
+        let kll1 = DatasketchesKLLAccumulator::new(200);
+        let kll2 = DatasketchesKLLAccumulator::new(100);
+        let boxed_accs: Vec<Box<dyn AggregateCore>> = vec![Box::new(kll1), Box::new(kll2)];
+        assert!(DatasketchesKLLAccumulator::merge_multiple(&boxed_accs).is_err());
+
+        use crate::precompute_operators::sum_accumulator::SumAccumulator;
+        let kll = DatasketchesKLLAccumulator::new(200);
+        let sum = SumAccumulator::new();
+        let mixed_accs: Vec<Box<dyn AggregateCore>> = vec![Box::new(kll), Box::new(sum)];
+        assert!(DatasketchesKLLAccumulator::merge_multiple(&mixed_accs).is_err());
+    }
+}
diff --git a/QueryEngineRust/src/precompute_operators/delta_set_aggregator_accumulator.rs b/QueryEngineRust/src/precompute_operators/delta_set_aggregator_accumulator.rs
new file mode 100644
index 0000000..d12c848
--- /dev/null
+++ b/QueryEngineRust/src/precompute_operators/delta_set_aggregator_accumulator.rs
@@ -0,0 +1,397 @@
+use crate::data_model::{
+    AggregateCore, KeyByLabelValues, MergeableAccumulator, MultipleSubpopulationAggregate,
+    SerializableToSink,
+};
+use serde_json::Value;
+use sketch_core::delta_set_aggregator::{deserialize_msgpack, serialize_msgpack};
+use std::collections::{HashMap, HashSet};
+
+use promql_utilities::query_logics::enums::Statistic;
+
+/// Accumulator that tracks sets of added and removed keys.
+/// Used for delta aggregation to track changes in cardinality.
+/// Wire format (DeltaResult) and msgpack serde live in sketch-core.
+#[derive(Debug, Clone)]
+pub struct DeltaSetAggregatorAccumulator {
+    pub added: HashSet<KeyByLabelValues>,
+    pub removed: HashSet<KeyByLabelValues>,
+}
+
+impl DeltaSetAggregatorAccumulator {
+    pub fn new() -> Self {
+        Self {
+            added: HashSet::new(),
+            removed: HashSet::new(),
+        }
+    }
+
+    pub fn new_with_sets(
+        added: HashSet<KeyByLabelValues>,
+        removed: HashSet<KeyByLabelValues>,
+    ) -> Self {
+        Self { added, removed }
+    }
+
+    pub fn add_key(&mut self, key: KeyByLabelValues) {
+        self.added.insert(key);
+    }
+
+    pub fn remove_key(&mut self, key: KeyByLabelValues) {
+        self.removed.insert(key);
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.added.is_empty() && self.removed.is_empty()
+    }
+
+    pub fn deserialize_from_json(data: &Value) -> Result<Self, Box<dyn std::error::Error>> {
+        let mut added = HashSet::new();
+        let mut removed = HashSet::new();
+
+        if let Some(added_array) = data["added"].as_array() {
+            for item in added_array {
+                let key_data = if let Some(values) = item.get("values") {
+                    values
+                } else {
+                    item
+                };
+                let key = KeyByLabelValues::deserialize_from_json(key_data)?;
+                added.insert(key);
+            }
+        }
+
+        if let Some(removed_array) = data["removed"].as_array() {
+            for item in removed_array {
+                let key_data = if let Some(values) = item.get("values") {
+                    values
+                } else {
+                    item
+                };
+                let key = KeyByLabelValues::deserialize_from_json(key_data)?;
+                removed.insert(key);
+            }
+        }
+
+        Ok(Self { added, removed })
+    }
+
+    pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        let mut offset = 0;
+        let mut added = HashSet::new();
+        let mut removed = HashSet::new();
+
+        // Read added set
+        if offset + 4 > buffer.len() {
+            return Err("Buffer too short for added set size".into());
+        }
+        let added_size = u32::from_le_bytes([
+            buffer[offset],
+            buffer[offset + 1],
+            buffer[offset + 2],
+            buffer[offset + 3],
+        ]) as usize;
+        offset += 4;
+
+        for _ in 0..added_size {
+            if offset + 4 > buffer.len() {
+                return Err("Buffer too short for added item size".into());
+            }
+            let item_size = u32::from_le_bytes([
+                buffer[offset],
+                buffer[offset + 1],
+                buffer[offset + 2],
+                buffer[offset + 3],
+            ]) as usize;
+            offset += 4;
+
+            if offset + item_size > buffer.len() {
+                return Err("Buffer too short for added item data".into());
+            }
+            let key =
+                KeyByLabelValues::deserialize_from_bytes(&buffer[offset..offset + item_size])?;
+            offset += item_size;
+            added.insert(key);
+        }
+
+        // Read removed set
+        if offset + 4 > buffer.len() {
+            return Err("Buffer too short for removed set size".into());
+        }
+        let removed_size = u32::from_le_bytes([
+            buffer[offset],
+            buffer[offset + 1],
+            buffer[offset + 2],
+            buffer[offset + 3],
+        ]) as usize;
+        offset += 4;
+
+        for _ in 0..removed_size {
+            if offset + 4 > buffer.len() {
+                return Err("Buffer too short for removed item size".into());
+            }
+            let item_size = u32::from_le_bytes([
+                buffer[offset],
+                buffer[offset + 1],
+                buffer[offset + 2],
+                buffer[offset + 3],
+            ]) as usize;
+            offset += 4;
+
+            if offset + item_size > buffer.len() {
+                return Err("Buffer too short for removed item data".into());
+            }
+            let key =
+                KeyByLabelValues::deserialize_from_bytes(&buffer[offset..offset + item_size])?;
+            offset += item_size;
+            removed.insert(key);
+        }
+
+        Ok(Self { added, removed })
+    }
+
+    pub fn deserialize_from_bytes_arroyo(
+        buffer: &[u8],
+    ) -> Result<Self, Box<dyn std::error::Error>> {
+        // Delegate to sketch-core canonical DeltaResult msgpack format
+        let delta = deserialize_msgpack(buffer)?;
+
+        let mut added = HashSet::new();
+        for item in &delta.added {
+            added.insert(KeyByLabelValues::from_semicolon_str(item));
+        }
+
+        let mut removed = HashSet::new();
+        for item in &delta.removed {
+            removed.insert(KeyByLabelValues::from_semicolon_str(item));
+        }
+
+        Ok(Self { added, removed })
+    }
+}
+
+impl Default for DeltaSetAggregatorAccumulator {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SerializableToSink for DeltaSetAggregatorAccumulator {
+    fn serialize_to_json(&self) -> Value {
+        let added_json: Vec<Value> = self
+            .added
+            .iter()
+            .map(|key| key.serialize_to_json())
+            .collect();
+        let removed_json: Vec<Value> = self
+            .removed
+            .iter()
+            .map(|key| key.serialize_to_json())
+            .collect();
+        serde_json::json!({ "added": added_json, "removed": removed_json })
+    }
+
+    fn serialize_to_bytes(&self) -> Vec<u8> {
+        // Delegate to sketch-core canonical DeltaResult msgpack format
+        let added: HashSet<String> = self
+            .added
+            .iter()
+            .map(|key| key.to_semicolon_str())
+            .collect();
+        let removed: HashSet<String> = self
+            .removed
+            .iter()
+            .map(|key| key.to_semicolon_str())
+            .collect();
+        serialize_msgpack(&added, &removed)
+    }
+}
+
+impl AggregateCore for DeltaSetAggregatorAccumulator {
+    fn type_name(&self) -> &'static str {
+        "DeltaSetAggregatorAccumulator"
+    }
+
+    fn clone_boxed_core(&self) -> Box<dyn AggregateCore> {
+        Box::new(self.clone())
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn merge_with(
+        &self,
+        other: &dyn AggregateCore,
+    ) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error + Send + Sync>> {
+        if other.get_accumulator_type() != self.get_accumulator_type() {
+            return Err(format!(
+                "Cannot merge DeltaSetAggregatorAccumulator with {}",
+                other.get_accumulator_type()
+            )
+            .into());
+        }
+
+        let other_delta = other
+            .as_any()
+            .downcast_ref::<DeltaSetAggregatorAccumulator>()
+            .ok_or("Failed to downcast to DeltaSetAggregatorAccumulator")?;
+
+        let merged = Self::merge_accumulators(vec![self.clone(), other_delta.clone()])?;
+        Ok(Box::new(merged))
+    }
+
+    fn get_accumulator_type(&self) -> &'static str {
+        "DeltaSetAggregatorAccumulator"
+    }
+
+    fn get_keys(&self) -> Option<Vec<KeyByLabelValues>> {
+        if !self.removed.is_empty() {
+            panic!("DeltaSetAggregatorAccumulator does not support get_keys when removed items are present");
+        }
+        Some(self.added.iter().cloned().collect())
+    }
+}
+
+impl MultipleSubpopulationAggregate for DeltaSetAggregatorAccumulator {
+    fn query(
+        &self,
+        _statistic: Statistic,
+        _key: &KeyByLabelValues,
+        _query_kwargs: Option<&HashMap<String, String>>,
+    ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+        Err("DeltaSetAggregatorAccumulator does not support query operation".into())
+    }
+
+    fn clone_boxed(&self) -> Box<dyn MultipleSubpopulationAggregate> {
+        Box::new(self.clone())
+    }
+}
+
+impl MergeableAccumulator<DeltaSetAggregatorAccumulator> for DeltaSetAggregatorAccumulator {
+    fn merge_accumulators(
+        accumulators: Vec<DeltaSetAggregatorAccumulator>,
+    ) -> Result<DeltaSetAggregatorAccumulator, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+
+        let mut all_added = HashSet::new();
+        let mut all_removed = HashSet::new();
+
+        for accumulator in accumulators {
+            all_added.extend(accumulator.added);
+            all_removed.extend(accumulator.removed);
+        }
+
+        let conflicts: HashSet<KeyByLabelValues> =
+            all_added.intersection(&all_removed).cloned().collect();
+        for key in &conflicts {
+            all_added.remove(key);
+            all_removed.remove(key);
+        }
+
+        Ok(DeltaSetAggregatorAccumulator {
+            added: all_added,
+            removed: all_removed,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn create_test_key(service: &str) -> KeyByLabelValues {
+        KeyByLabelValues::new_with_labels(vec![service.to_string()])
+    }
+
+    #[test]
+    fn test_delta_set_aggregator_creation() {
+        let acc = DeltaSetAggregatorAccumulator::new();
+        assert!(acc.added.is_empty());
+        assert!(acc.removed.is_empty());
+    }
+
+    #[test]
+    fn test_delta_set_aggregator_add_remove() {
+        let mut acc = DeltaSetAggregatorAccumulator::new();
+        let key1 = create_test_key("web");
+        let key2 = create_test_key("api");
+        acc.add_key(key1.clone());
+        acc.remove_key(key2.clone());
+        assert!(acc.added.contains(&key1));
+        assert!(acc.removed.contains(&key2));
+        assert_eq!(acc.added.len(), 1);
+        assert_eq!(acc.removed.len(), 1);
+    }
+
+    #[test]
+    fn test_delta_set_aggregator_merge() {
+        let mut acc1 = DeltaSetAggregatorAccumulator::new();
+        let mut acc2 = DeltaSetAggregatorAccumulator::new();
+        let mut acc3 = DeltaSetAggregatorAccumulator::new();
+
+        let key1 = create_test_key("web");
+        let key2 = create_test_key("api");
+        let key3 = create_test_key("db");
+        let key4 = create_test_key("cache");
+
+        acc1.add_key(key1.clone());
+        acc1.remove_key(key2.clone());
+        acc2.add_key(key2.clone());
+        acc2.remove_key(key3.clone());
+        acc3.add_key(key4.clone());
+
+        let merged =
+            DeltaSetAggregatorAccumulator::merge_accumulators(vec![acc1, acc2, acc3]).unwrap();
+
+        assert!(merged.added.contains(&key1));
+        assert!(merged.added.contains(&key4));
+        assert!(!merged.added.contains(&key2));
+        assert!(merged.removed.contains(&key3));
+        assert!(!merged.removed.contains(&key2));
+        assert_eq!(merged.added.len(), 2);
+        assert_eq!(merged.removed.len(), 1);
+    }
+
+    #[test]
+    fn test_delta_set_aggregator_serialization() {
+        let mut acc = DeltaSetAggregatorAccumulator::new();
+        let key1 = create_test_key("web");
+        let key2 = create_test_key("api");
+        acc.add_key(key1.clone());
+        acc.remove_key(key2.clone());
+
+        // Test binary (msgpack) serialization roundtrip
+        let bytes = acc.serialize_to_bytes();
+        let deserialized_bytes =
+            DeltaSetAggregatorAccumulator::deserialize_from_bytes_arroyo(&bytes).unwrap();
+
+        assert_eq!(deserialized_bytes.added.len(), 1);
+        assert_eq!(deserialized_bytes.removed.len(), 1);
+        assert!(deserialized_bytes.added.contains(&key1));
+        assert!(deserialized_bytes.removed.contains(&key2));
+    }
+
+    #[test]
+    fn test_delta_set_aggregator_query() {
+        let acc = DeltaSetAggregatorAccumulator::new();
+        let key = create_test_key("test");
+        assert!(acc.query(Statistic::Sum, &key, None).is_err());
+    }
+
+    #[test]
+    fn test_trait_object() {
+        let mut acc = DeltaSetAggregatorAccumulator::new();
+        let key = create_test_key("web");
+        acc.add_key(key.clone());
+
+        let trait_obj: Box<dyn AggregateCore> = Box::new(acc);
+        assert_eq!(trait_obj.type_name(), "DeltaSetAggregatorAccumulator");
+
+        let multi_trait_obj: Box<dyn MultipleSubpopulationAggregate> =
+            Box::new(DeltaSetAggregatorAccumulator::new());
+        let keys = multi_trait_obj.get_keys().unwrap();
+        assert_eq!(keys.len(), 0);
+    }
+}
diff --git a/QueryEngineRust/src/precompute_operators/hydra_kll_accumulator.rs b/QueryEngineRust/src/precompute_operators/hydra_kll_accumulator.rs
new file mode 100644
index 0000000..d08cbdc
--- /dev/null
+++ b/QueryEngineRust/src/precompute_operators/hydra_kll_accumulator.rs
@@ -0,0 +1,151 @@
+use crate::{
+    data_model::{
+        AggregateCore, MergeableAccumulator, MultipleSubpopulationAggregate, SerializableToSink,
+    },
+    KeyByLabelValues,
+};
+use base64::{engine::general_purpose, Engine as _};
+use sketch_core::hydra_kll::HydraKllSketch;
+use std::collections::HashMap;
+
+use promql_utilities::query_logics::enums::Statistic;
+
+/// HydraKLL sketch accumulator — wraps sketch_core::HydraKllSketch.
+/// Core struct, update/merge/serde logic live in sketch-core.
+/// This file retains QE-specific trait impls and JSON output.
+#[derive(Debug, Clone)]
+pub struct HydraKllSketchAccumulator {
+    pub inner: HydraKllSketch,
+}
+
+impl HydraKllSketchAccumulator {
+    pub fn new(row_num: usize, col_num: usize, k: u16) -> Self {
+        Self {
+            inner: HydraKllSketch::new(row_num, col_num, k),
+        }
+    }
+
+    pub fn update(&mut self, key: &KeyByLabelValues, value: f64) {
+        self.inner.update(&key.to_semicolon_str(), value);
+    }
+
+    pub fn deserialize_from_bytes(_buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        // HydraKLLSketch is only used with Arroyo, not Flink
+        Err("deserialize_from_bytes for HydraKllSketchAccumulator not implemented for Flink".into())
+    }
+
+    pub fn deserialize_from_bytes_arroyo(
+        buffer: &[u8],
+    ) -> Result<Self, Box<dyn std::error::Error>> {
+        Ok(Self {
+            inner: HydraKllSketch::deserialize_msgpack(buffer)?,
+        })
+    }
+
+    pub fn query_key(&self, key: &KeyByLabelValues, quantile: f64) -> f64 {
+        self.inner.query(&key.to_semicolon_str(), quantile)
+    }
+}
+
+impl SerializableToSink for HydraKllSketchAccumulator {
+    fn serialize_to_json(&self) -> serde_json::Value {
+        // Mirror Python implementation: {"sketch": base64_encoded_string}
+        let sketch_bytes = self.inner.serialize_msgpack();
+        let sketch_b64 = general_purpose::STANDARD.encode(&sketch_bytes);
+        serde_json::json!({ "sketch": sketch_b64 })
+    }
+
+    fn serialize_to_bytes(&self) -> Vec<u8> {
+        self.inner.serialize_msgpack()
+    }
+}
+
+impl MergeableAccumulator<HydraKllSketchAccumulator> for HydraKllSketchAccumulator {
+    fn merge_accumulators(
+        accumulators: Vec<HydraKllSketchAccumulator>,
+    ) -> Result<HydraKllSketchAccumulator, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+        let inners: Vec<HydraKllSketch> = accumulators.into_iter().map(|acc| acc.inner).collect();
+        let merged_inner = HydraKllSketch::merge(inners)?;
+        Ok(Self {
+            inner: merged_inner,
+        })
+    }
+}
+
+impl AggregateCore for HydraKllSketchAccumulator {
+    fn clone_boxed_core(&self) -> Box<dyn AggregateCore> {
+        Box::new(self.clone())
+    }
+
+    fn type_name(&self) -> &'static str {
+        "HydraKllSketchAccumulator"
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn merge_with(
+        &self,
+        other: &dyn AggregateCore,
+    ) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error + Send + Sync>> {
+        if other.get_accumulator_type() != self.get_accumulator_type() {
+            return Err(format!(
+                "Cannot merge HydraKllSketchAccumulator with {}",
+                other.get_accumulator_type()
+            )
+            .into());
+        }
+
+        let hk = other
+            .as_any()
+            .downcast_ref::<HydraKllSketchAccumulator>()
+            .ok_or("Failed to downcast to HydraKllSketchAccumulator")?;
+
+        let merged = Self::merge_accumulators(vec![self.clone(), hk.clone()])?;
+        Ok(Box::new(merged))
+    }
+
+    fn get_accumulator_type(&self) -> &'static str {
+        "HydraKllSketchAccumulator"
+    }
+
+    fn get_keys(&self) -> Option<Vec<crate::KeyByLabelValues>> {
+        None
+    }
+}
+
+impl MultipleSubpopulationAggregate for HydraKllSketchAccumulator {
+    fn query(
+        &self,
+        statistic: Statistic,
+        key: &KeyByLabelValues,
+        query_kwargs: Option<&HashMap<String, String>>,
+    ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+        match statistic {
+            Statistic::Quantile => {
+                let quantile = query_kwargs
+                    .and_then(|kwargs| kwargs.get("quantile"))
+                    .ok_or("Missing quantile parameter for quantile query")?
+                    .parse::<f64>()
+                    .map_err(|_| "Invalid quantile parameter format")?;
+
+                if !(0.0..=1.0).contains(&quantile) {
+                    return Err("Quantile must be between 0.0 and 1.0".into());
+                }
+
+                Ok(self.query_key(key, quantile))
+            }
+            _ => Err(
+                format!("Unsupported statistic in HydraKllSketchAccumulator: {statistic:?}").into(),
+            ),
+        }
+    }
+
+    fn clone_boxed(&self) -> Box<dyn MultipleSubpopulationAggregate> {
+        Box::new(self.clone())
+    }
+}
diff --git a/QueryEngineRust/src/precompute_operators/increase_accumulator.rs b/QueryEngineRust/src/precompute_operators/increase_accumulator.rs
new file mode 100644
index 0000000..9dd5af0
--- /dev/null
+++ b/QueryEngineRust/src/precompute_operators/increase_accumulator.rs
@@ -0,0 +1,472 @@
+use crate::data_model::{
+    AggregateCore, Measurement, MergeableAccumulator, SerializableToSink,
+    SingleSubpopulationAggregate, SingleSubpopulationAggregateFactory,
+};
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use std::collections::HashMap;
+
+use promql_utilities::query_logics::enums::Statistic;
+
+/// Accumulator for tracking increases in counter metrics
+/// Stores the starting and last seen measurements with timestamps
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct IncreaseAccumulator {
+    pub starting_measurement: Measurement,
+    pub starting_timestamp: i64,
+    pub last_seen_measurement: Measurement,
+    pub last_seen_timestamp: i64,
+}
+
+impl IncreaseAccumulator {
+    pub fn new(
+        starting_measurement: Measurement,
+        starting_timestamp: i64,
+        last_seen_measurement: Measurement,
+        last_seen_timestamp: i64,
+    ) -> Self {
+        Self {
+            starting_measurement,
+            starting_timestamp,
+            last_seen_measurement,
+            last_seen_timestamp,
+        }
+    }
+
+    pub fn update(&mut self, measurement: Measurement, timestamp: i64) {
+        self.last_seen_measurement = measurement;
+        self.last_seen_timestamp = timestamp;
+    }
+
+    pub fn deserialize_from_json(data: &Value) -> Result<Self, Box<dyn std::error::Error>> {
+        let starting_measurement =
+            Measurement::deserialize_from_json(&data["starting_measurement"])?;
+        let starting_timestamp = data["starting_timestamp"]
+            .as_i64()
+            .ok_or("Missing or invalid 'starting_timestamp' field")?;
+        let last_seen_measurement =
+            Measurement::deserialize_from_json(&data["last_seen_measurement"])?;
+        let last_seen_timestamp = data["last_seen_timestamp"]
+            .as_i64()
+            .ok_or("Missing or invalid 'last_seen_timestamp' field")?;
+
+        Ok(Self::new(
+            starting_measurement,
+            starting_timestamp,
+            last_seen_measurement,
+            last_seen_timestamp,
+        ))
+    }
+
+    pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        let mut offset = 0;
+
+        // Read starting measurement length and data
+        if buffer.len() < offset + 4 {
+            return Err("Buffer too short for starting measurement length".into());
+        }
+        let starting_measurement_length = u32::from_le_bytes([
+            buffer[offset],
+            buffer[offset + 1],
+            buffer[offset + 2],
+            buffer[offset + 3],
+        ]) as usize;
+        offset += 4;
+
+        if buffer.len() < offset + starting_measurement_length {
+            return Err("Buffer too short for starting measurement".into());
+        }
+        let starting_measurement = Measurement::deserialize_from_bytes(
+            &buffer[offset..offset + starting_measurement_length],
+        )?;
+        offset += starting_measurement_length;
+
+        // Read starting timestamp
+        if buffer.len() < offset + 8 {
+            return Err("Buffer too short for starting timestamp".into());
+        }
+        let starting_timestamp = i64::from_le_bytes([
+            buffer[offset],
+            buffer[offset + 1],
+            buffer[offset + 2],
+            buffer[offset + 3],
+            buffer[offset + 4],
+            buffer[offset + 5],
+            buffer[offset + 6],
+            buffer[offset + 7],
+        ]);
+        offset += 8;
+
+        // Read last seen measurement length and data
+        if buffer.len() < offset + 4 {
+            return Err("Buffer too short for last seen measurement length".into());
+        }
+        let last_seen_measurement_length = u32::from_le_bytes([
+            buffer[offset],
+            buffer[offset + 1],
+            buffer[offset + 2],
+            buffer[offset + 3],
+        ]) as usize;
+        offset += 4;
+
+        if buffer.len() < offset + last_seen_measurement_length {
+            return Err("Buffer too short for last seen measurement".into());
+        }
+        let last_seen_measurement = Measurement::deserialize_from_bytes(
+            &buffer[offset..offset + last_seen_measurement_length],
+        )?;
+        offset += last_seen_measurement_length;
+
+        // Read last seen timestamp
+        if buffer.len() < offset + 8 {
+            return Err("Buffer too short for last seen timestamp".into());
+        }
+        let last_seen_timestamp = i64::from_le_bytes([
+            buffer[offset],
+            buffer[offset + 1],
+            buffer[offset + 2],
+            buffer[offset + 3],
+            buffer[offset + 4],
+            buffer[offset + 5],
+            buffer[offset + 6],
+            buffer[offset + 7],
+        ]);
+
+        Ok(Self::new(
+            starting_measurement,
+            starting_timestamp,
+            last_seen_measurement,
+            last_seen_timestamp,
+        ))
+    }
+}
+
+impl SerializableToSink for IncreaseAccumulator {
+    fn serialize_to_json(&self) -> Value {
+        serde_json::json!({
+            "starting_measurement": self.starting_measurement.serialize_to_json(),
+            "starting_timestamp": self.starting_timestamp,
+            "last_seen_measurement": self.last_seen_measurement.serialize_to_json(),
+            "last_seen_timestamp": self.last_seen_timestamp,
+        })
+    }
+
+    fn serialize_to_bytes(&self) -> Vec<u8> {
+        let starting_measurement_bytes = self.starting_measurement.serialize_to_bytes();
+        let last_seen_measurement_bytes = self.last_seen_measurement.serialize_to_bytes();
+
+        let mut buffer = Vec::new();
+
+        // Starting measurement length and data
+        buffer.extend_from_slice(&(starting_measurement_bytes.len() as u32).to_le_bytes());
+        buffer.extend_from_slice(&starting_measurement_bytes);
+
+        // Starting timestamp
+        buffer.extend_from_slice(&self.starting_timestamp.to_le_bytes());
+
+        // Last seen measurement length and data
+        buffer.extend_from_slice(&(last_seen_measurement_bytes.len() as u32).to_le_bytes());
+        buffer.extend_from_slice(&last_seen_measurement_bytes);
+
+        // Last seen timestamp
+        buffer.extend_from_slice(&self.last_seen_timestamp.to_le_bytes());
+
+        buffer
+    }
+}
+
+impl MergeableAccumulator<IncreaseAccumulator> for IncreaseAccumulator {
+    fn merge_accumulators(
+        accumulators: Vec<IncreaseAccumulator>,
+    ) -> Result<IncreaseAccumulator, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+
+        let mut result = accumulators[0].clone();
+
+        for acc in &accumulators[1..] {
+            // Use the earlier starting point
+            if acc.starting_timestamp < result.starting_timestamp {
+                result.starting_measurement = acc.starting_measurement.clone();
+                result.starting_timestamp = acc.starting_timestamp;
+            }
+
+            // Use the later last seen point
+            if acc.last_seen_timestamp > result.last_seen_timestamp {
+                result.last_seen_measurement = acc.last_seen_measurement.clone();
+                result.last_seen_timestamp = acc.last_seen_timestamp;
+            }
+        }
+
+        Ok(result)
+    }
+}
+
+impl AggregateCore for IncreaseAccumulator {
+    fn clone_boxed_core(&self) -> Box<dyn AggregateCore> {
+        Box::new(self.clone())
+    }
+
+    fn type_name(&self) -> &'static str {
+        "IncreaseAccumulator"
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn merge_with(
+        &self,
+        other: &dyn AggregateCore,
+    ) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error + Send + Sync>> {
+        // Check if other is also an IncreaseAccumulator
+        if other.get_accumulator_type() != self.get_accumulator_type() {
+            return Err(format!(
+                "Cannot merge IncreaseAccumulator with {}",
+                other.get_accumulator_type()
+            )
+            .into());
+        }
+
+        // Downcast to IncreaseAccumulator
+        let other_increase = other
+            .as_any()
+            .downcast_ref::<IncreaseAccumulator>()
+            .ok_or("Failed to downcast to IncreaseAccumulator")?;
+
+        // Use the existing merge_accumulators method
+        let merged = Self::merge_accumulators(vec![self.clone(), other_increase.clone()])?;
+
+        Ok(Box::new(merged))
+    }
+
+    fn get_accumulator_type(&self) -> &'static str {
+        "IncreaseAccumulator"
+    }
+
+    fn get_keys(&self) -> Option<Vec<crate::KeyByLabelValues>> {
+        None
+    }
+}
+
+impl SingleSubpopulationAggregate for IncreaseAccumulator {
+    fn query(
+        &self,
+        statistic: Statistic,
+        query_kwargs: Option<&HashMap<String, String>>,
+    ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+        // IncreaseAccumulator doesn't use query_kwargs, assert it's None
+        if query_kwargs.is_some() {
+            return Err("IncreaseAccumulator does not support query parameters".into());
+        }
+
+        match statistic {
+            Statistic::Increase => {
+                Ok(self.last_seen_measurement.value - self.starting_measurement.value)
+            }
+            Statistic::Rate => {
+                // Convert to per second; timestamps are in milliseconds
+                let time_diff = (self.last_seen_timestamp - self.starting_timestamp) as f64;
+                if time_diff <= 0.0 {
+                    return Err("Invalid time difference for rate calculation".into());
+                }
+                let value_diff = self.last_seen_measurement.value - self.starting_measurement.value;
+                Ok(value_diff / time_diff * 1000.0)
+            }
+            _ => Err(format!("Unsupported statistic in IncreaseAccumulator: {statistic:?}").into()),
+        }
+    }
+
+    fn clone_boxed(&self) -> Box<dyn SingleSubpopulationAggregate> {
+        Box::new(self.clone())
+    }
+}
+
+pub struct IncreaseAccumulatorFactory;
+
+impl SingleSubpopulationAggregateFactory for IncreaseAccumulatorFactory {
+    fn merge_accumulators(
+        &self,
+        accumulators: Vec<Box<dyn SingleSubpopulationAggregate>>,
+    ) -> Result<Box<dyn SingleSubpopulationAggregate>, Box<dyn std::error::Error + Send + Sync>>
+    {
+        let mut concrete_accumulators = Vec::new();
+
+        for acc in accumulators {
+            if let Some(concrete) = acc.as_any().downcast_ref::<IncreaseAccumulator>() {
+                concrete_accumulators.push(concrete.clone());
+            } else {
+                return Err("Type mismatch in merge operation".into());
+            }
+        }
+
+        if concrete_accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+
+        let merged =
+            <IncreaseAccumulator as MergeableAccumulator<IncreaseAccumulator>>::merge_accumulators(
+                concrete_accumulators,
+            )
+            .map_err(|e| -> Box<dyn std::error::Error + Send + Sync> { format!("{e}").into() })?;
+        Ok(Box::new(merged))
+    }
+
+    fn create_default(&self) -> Box<dyn SingleSubpopulationAggregate> {
+        Box::new(IncreaseAccumulator::new(
+            Measurement::new(0.0),
+            0,
+            Measurement::new(0.0),
+            0,
+        ))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_increase_accumulator_creation() {
+        let starting_measurement = Measurement::new(10.0);
+        let last_seen_measurement = Measurement::new(25.0);
+        let acc = IncreaseAccumulator::new(
+            starting_measurement.clone(),
+            1000,
+            last_seen_measurement.clone(),
+            2000,
+        );
+
+        assert_eq!(acc.starting_measurement.value, 10.0);
+        assert_eq!(acc.starting_timestamp, 1000);
+        assert_eq!(acc.last_seen_measurement.value, 25.0);
+        assert_eq!(acc.last_seen_timestamp, 2000);
+    }
+
+    #[test]
+    fn test_increase_accumulator_update() {
+        let starting_measurement = Measurement::new(10.0);
+        let mut acc = IncreaseAccumulator::new(
+            starting_measurement.clone(),
+            1000,
+            starting_measurement.clone(),
+            1000,
+        );
+
+        let new_measurement = Measurement::new(25.0);
+        acc.update(new_measurement.clone(), 2000);
+
+        assert_eq!(acc.last_seen_measurement.value, 25.0);
+        assert_eq!(acc.last_seen_timestamp, 2000);
+        assert_eq!(acc.starting_measurement.value, 10.0); // Should remain unchanged
+    }
+
+    #[test]
+    fn test_increase_accumulator_query() {
+        let starting_measurement = Measurement::new(10.0);
+        let last_seen_measurement = Measurement::new(25.0);
+        let acc = IncreaseAccumulator::new(
+            starting_measurement,
+            1000,
+            last_seen_measurement,
+            3000, // 2 second difference
+        );
+
+        // Test increase calculation
+        assert_eq!(
+            crate::SingleSubpopulationAggregate::query(&acc, Statistic::Increase, None).unwrap(),
+            15.0
+        );
+
+        // Test rate calculation (per second)
+        assert_eq!(
+            crate::SingleSubpopulationAggregate::query(&acc, Statistic::Rate, None).unwrap(),
+            7.5
+        ); // 15.0 / 2.0
+
+        assert!(crate::SingleSubpopulationAggregate::query(&acc, Statistic::Sum, None).is_err());
+    }
+
+    #[test]
+    fn test_increase_accumulator_merge() {
+        let acc1 =
+            IncreaseAccumulator::new(Measurement::new(10.0), 1000, Measurement::new(20.0), 2000);
+        let acc2 = IncreaseAccumulator::new(
+            Measurement::new(5.0),
+            500, // Earlier start
+            Measurement::new(15.0),
+            1500,
+        );
+        let acc3 = IncreaseAccumulator::new(
+            Measurement::new(20.0),
+            2000,
+            Measurement::new(30.0),
+            3000, // Later end
+        );
+
+        let merged =
+            <IncreaseAccumulator as MergeableAccumulator<IncreaseAccumulator>>::merge_accumulators(
+                vec![acc1, acc2, acc3],
+            )
+            .unwrap();
+
+        // Should use earliest start and latest end
+        assert_eq!(merged.starting_measurement.value, 5.0);
+        assert_eq!(merged.starting_timestamp, 500);
+        assert_eq!(merged.last_seen_measurement.value, 30.0);
+        assert_eq!(merged.last_seen_timestamp, 3000);
+    }
+
+    #[test]
+    fn test_increase_accumulator_serialization() {
+        let acc =
+            IncreaseAccumulator::new(Measurement::new(10.0), 1000, Measurement::new(25.0), 2000);
+
+        // Test JSON serialization
+        let json = acc.serialize_to_json();
+        let deserialized = IncreaseAccumulator::deserialize_from_json(&json).unwrap();
+        assert_eq!(
+            acc.starting_measurement.value,
+            deserialized.starting_measurement.value
+        );
+        assert_eq!(acc.starting_timestamp, deserialized.starting_timestamp);
+        assert_eq!(
+            acc.last_seen_measurement.value,
+            deserialized.last_seen_measurement.value
+        );
+        assert_eq!(acc.last_seen_timestamp, deserialized.last_seen_timestamp);
+
+        // Test byte serialization
+        let bytes = acc.serialize_to_bytes();
+        let deserialized_bytes = IncreaseAccumulator::deserialize_from_bytes(&bytes).unwrap();
+        assert_eq!(
+            acc.starting_measurement.value,
+            deserialized_bytes.starting_measurement.value
+        );
+        assert_eq!(
+            acc.starting_timestamp,
+            deserialized_bytes.starting_timestamp
+        );
+        assert_eq!(
+            acc.last_seen_measurement.value,
+            deserialized_bytes.last_seen_measurement.value
+        );
+        assert_eq!(
+            acc.last_seen_timestamp,
+            deserialized_bytes.last_seen_timestamp
+        );
+    }
+
+    #[test]
+    fn test_trait_object() {
+        let acc: Box<dyn AggregateCore> = Box::new(IncreaseAccumulator::new(
+            Measurement::new(10.0),
+            1000,
+            Measurement::new(25.0),
+            2000,
+        ));
+
+        assert_eq!(acc.type_name(), "IncreaseAccumulator");
+    }
+}
diff --git a/QueryEngineRust/src/precompute_operators/min_max_accumulator.rs b/QueryEngineRust/src/precompute_operators/min_max_accumulator.rs
new file mode 100644
index 0000000..3b8361f
--- /dev/null
+++ b/QueryEngineRust/src/precompute_operators/min_max_accumulator.rs
@@ -0,0 +1,390 @@
+use crate::data_model::{
+    AggregateCore, MergeableAccumulator, SerializableToSink, SingleSubpopulationAggregate,
+    SingleSubpopulationAggregateFactory,
+};
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use std::collections::HashMap;
+
+use promql_utilities::query_logics::enums::Statistic;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MinMaxAccumulator {
+    pub value: f64,
+    pub sub_type: String, // "min" or "max"
+}
+
+impl MinMaxAccumulator {
+    pub fn new_min() -> Self {
+        Self {
+            value: f64::INFINITY,
+            sub_type: "min".to_string(),
+        }
+    }
+
+    pub fn new_max() -> Self {
+        Self {
+            value: f64::NEG_INFINITY,
+            sub_type: "max".to_string(),
+        }
+    }
+
+    pub fn new(sub_type: String) -> Self {
+        match sub_type.as_str() {
+            "min" => Self::new_min(),
+            "max" => Self::new_max(),
+            _ => panic!("sub_type must be 'min' or 'max'"),
+        }
+    }
+
+    pub fn with_value(value: f64, sub_type: String) -> Self {
+        if sub_type != "min" && sub_type != "max" {
+            panic!("sub_type must be 'min' or 'max'");
+        }
+        Self { value, sub_type }
+    }
+
+    pub fn update(&mut self, value: f64) {
+        match self.sub_type.as_str() {
+            "min" => {
+                if value < self.value {
+                    self.value = value;
+                }
+            }
+            "max" => {
+                if value > self.value {
+                    self.value = value;
+                }
+            }
+            _ => panic!("Invalid sub_type"),
+        }
+    }
+
+    pub fn deserialize_from_json(data: &Value) -> Result<Self, Box<dyn std::error::Error>> {
+        let value = data["value"]
+            .as_f64()
+            .ok_or("Missing or invalid 'value' field")?;
+        let sub_type = data["sub_type"]
+            .as_str()
+            .ok_or("Missing or invalid 'sub_type' field")?
+            .to_string();
+
+        if sub_type != "min" && sub_type != "max" {
+            return Err("sub_type must be 'min' or 'max'".into());
+        }
+
+        Ok(Self::with_value(value, sub_type))
+    }
+
+    pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        if buffer.len() < 9 {
+            return Err("Buffer too short".into());
+        }
+
+        let value = f64::from_le_bytes([
+            buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], buffer[5], buffer[6], buffer[7],
+        ]);
+
+        let sub_type = match buffer[8] {
+            0 => "min".to_string(),
+            1 => "max".to_string(),
+            _ => return Err("Invalid sub_type byte".into()),
+        };
+
+        Ok(Self::with_value(value, sub_type))
+    }
+}
+
+impl SerializableToSink for MinMaxAccumulator {
+    fn serialize_to_json(&self) -> Value {
+        serde_json::json!({
+            "value": self.value,
+            "sub_type": self.sub_type
+        })
+    }
+
+    fn serialize_to_bytes(&self) -> Vec<u8> {
+        let mut bytes = self.value.to_le_bytes().to_vec();
+        let sub_type_byte = match self.sub_type.as_str() {
+            "min" => 0u8,
+            "max" => 1u8,
+            _ => panic!("Invalid sub_type"),
+        };
+        bytes.push(sub_type_byte);
+        bytes
+    }
+}
+
+impl MergeableAccumulator<MinMaxAccumulator> for MinMaxAccumulator {
+    fn merge_accumulators(
+        accumulators: Vec<MinMaxAccumulator>,
+    ) -> Result<MinMaxAccumulator, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+
+        let sub_type = &accumulators[0].sub_type;
+
+        // Verify all accumulators have the same sub_type
+        for acc in &accumulators {
+            if acc.sub_type != *sub_type {
+                return Err("Cannot merge accumulators with different sub_types".into());
+            }
+        }
+
+        let mut result = MinMaxAccumulator::new(sub_type.clone());
+
+        for acc in accumulators {
+            result.update(acc.value);
+        }
+
+        Ok(result)
+    }
+}
+
+impl AggregateCore for MinMaxAccumulator {
+    fn clone_boxed_core(&self) -> Box<dyn AggregateCore> {
+        Box::new(self.clone())
+    }
+
+    fn type_name(&self) -> &'static str {
+        "MinMaxAccumulator"
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn merge_with(
+        &self,
+        other: &dyn AggregateCore,
+    ) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error + Send + Sync>> {
+        // Check if other is also a MinMaxAccumulator
+        if other.get_accumulator_type() != self.get_accumulator_type() {
+            return Err(format!(
+                "Cannot merge MinMaxAccumulator with {}",
+                other.get_accumulator_type()
+            )
+            .into());
+        }
+
+        // Downcast to MinMaxAccumulator
+        let other_minmax = other
+            .as_any()
+            .downcast_ref::<MinMaxAccumulator>()
+            .ok_or("Failed to downcast to MinMaxAccumulator")?;
+
+        // Use the existing merge_accumulators method
+        let merged = Self::merge_accumulators(vec![self.clone(), other_minmax.clone()])?;
+
+        Ok(Box::new(merged))
+    }
+
+    fn get_accumulator_type(&self) -> &'static str {
+        "MinMaxAccumulator"
+    }
+
+    fn get_keys(&self) -> Option<Vec<crate::KeyByLabelValues>> {
+        None
+    }
+}
+
+impl SingleSubpopulationAggregate for MinMaxAccumulator {
+    fn query(
+        &self,
+        statistic: Statistic,
+        query_kwargs: Option<&HashMap<String, String>>,
+    ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+        // MinMaxAccumulator doesn't use query_kwargs, assert it's None
+        if query_kwargs.is_some() {
+            return Err("MinMaxAccumulator does not support query parameters".into());
+        }
+
+        match (statistic, self.sub_type.as_str()) {
+            (Statistic::Min, "min") => Ok(self.value),
+            (Statistic::Max, "max") => Ok(self.value),
+            _ => Err(format!(
+                "Unsupported statistic in MinMaxAccumulator: {:?} for sub_type: {}",
+                statistic, self.sub_type
+            )
+            .into()),
+        }
+    }
+
+    fn clone_boxed(&self) -> Box<dyn SingleSubpopulationAggregate> {
+        Box::new(self.clone())
+    }
+}
+
+// Factory implementation for merging
+pub struct MinMaxAccumulatorFactory {
+    pub sub_type: String,
+}
+
+impl MinMaxAccumulatorFactory {
+    pub fn new_min() -> Self {
+        Self {
+            sub_type: "min".to_string(),
+        }
+    }
+
+    pub fn new_max() -> Self {
+        Self {
+            sub_type: "max".to_string(),
+        }
+    }
+}
+
+impl SingleSubpopulationAggregateFactory for MinMaxAccumulatorFactory {
+    fn merge_accumulators(
+        &self,
+        accumulators: Vec<Box<dyn SingleSubpopulationAggregate>>,
+    ) -> Result<Box<dyn SingleSubpopulationAggregate>, Box<dyn std::error::Error + Send + Sync>>
+    {
+        if accumulators.is_empty() {
+            return match self.sub_type.as_str() {
+                "min" => Ok(Box::new(MinMaxAccumulator::new_min())),
+                "max" => Ok(Box::new(MinMaxAccumulator::new_max())),
+                _ => Err(format!("Unsupported sub_type: {}", self.sub_type).into()),
+            };
+        }
+
+        let mut result_value = match self.sub_type.as_str() {
+            "min" => f64::INFINITY,
+            "max" => f64::NEG_INFINITY,
+            _ => return Err(format!("Unsupported sub_type: {}", self.sub_type).into()),
+        };
+
+        for acc in accumulators {
+            let value = match self.sub_type.as_str() {
+                "min" => acc.query(Statistic::Min, None)?,
+                "max" => acc.query(Statistic::Max, None)?,
+                _ => return Err(format!("Unsupported sub_type: {}", self.sub_type).into()),
+            };
+
+            result_value = match self.sub_type.as_str() {
+                "min" => result_value.min(value),
+                "max" => result_value.max(value),
+                _ => return Err(format!("Unsupported sub_type: {}", self.sub_type).into()),
+            };
+        }
+
+        Ok(Box::new(MinMaxAccumulator::with_value(
+            result_value,
+            self.sub_type.clone(),
+        )))
+    }
+
+    fn create_default(&self) -> Box<dyn SingleSubpopulationAggregate> {
+        match self.sub_type.as_str() {
+            "min" => Box::new(MinMaxAccumulator::new_min()),
+            "max" => Box::new(MinMaxAccumulator::new_max()),
+            _ => Box::new(MinMaxAccumulator::new_min()), // Default fallback
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_min_accumulator() {
+        let mut acc = MinMaxAccumulator::new_min();
+        acc.update(10.0);
+        acc.update(5.0);
+        acc.update(15.0);
+
+        assert_eq!(acc.value, 5.0);
+        assert_eq!(
+            crate::SingleSubpopulationAggregate::query(&acc, Statistic::Min, None).unwrap(),
+            5.0
+        );
+        assert!(crate::SingleSubpopulationAggregate::query(&acc, Statistic::Max, None).is_err());
+    }
+
+    #[test]
+    fn test_max_accumulator() {
+        let mut acc = MinMaxAccumulator::new_max();
+        acc.update(10.0);
+        acc.update(5.0);
+        acc.update(15.0);
+
+        assert_eq!(acc.value, 15.0);
+        assert_eq!(
+            crate::SingleSubpopulationAggregate::query(&acc, Statistic::Max, None).unwrap(),
+            15.0
+        );
+        assert!(crate::SingleSubpopulationAggregate::query(&acc, Statistic::Min, None).is_err());
+    }
+
+    #[test]
+    fn test_merge_min_accumulators() {
+        let acc1 = MinMaxAccumulator::with_value(10.0, "min".to_string());
+        let acc2 = MinMaxAccumulator::with_value(5.0, "min".to_string());
+        let acc3 = MinMaxAccumulator::with_value(15.0, "min".to_string());
+
+        let merged =
+            <MinMaxAccumulator as MergeableAccumulator<MinMaxAccumulator>>::merge_accumulators(
+                vec![acc1, acc2, acc3],
+            )
+            .unwrap();
+        assert_eq!(merged.value, 5.0);
+        assert_eq!(merged.sub_type, "min");
+    }
+
+    #[test]
+    fn test_merge_max_accumulators() {
+        let acc1 = MinMaxAccumulator::with_value(10.0, "max".to_string());
+        let acc2 = MinMaxAccumulator::with_value(5.0, "max".to_string());
+        let acc3 = MinMaxAccumulator::with_value(15.0, "max".to_string());
+
+        let merged =
+            <MinMaxAccumulator as MergeableAccumulator<MinMaxAccumulator>>::merge_accumulators(
+                vec![acc1, acc2, acc3],
+            )
+            .unwrap();
+        assert_eq!(merged.value, 15.0);
+        assert_eq!(merged.sub_type, "max");
+    }
+
+    #[test]
+    fn test_merge_different_types_error() {
+        let acc1 = MinMaxAccumulator::with_value(10.0, "min".to_string());
+        let acc2 = MinMaxAccumulator::with_value(5.0, "max".to_string());
+
+        assert!(
+            <MinMaxAccumulator as MergeableAccumulator<MinMaxAccumulator>>::merge_accumulators(
+                vec![acc1, acc2]
+            )
+            .is_err()
+        );
+    }
+
+    #[test]
+    fn test_serialization() {
+        let acc = MinMaxAccumulator::with_value(42.5, "min".to_string());
+
+        // Test JSON serialization
+        let json = acc.serialize_to_json();
+        let deserialized = MinMaxAccumulator::deserialize_from_json(&json).unwrap();
+        assert_eq!(acc.value, deserialized.value);
+        assert_eq!(acc.sub_type, deserialized.sub_type);
+
+        // Test byte serialization
+        let bytes = acc.serialize_to_bytes();
+        let deserialized_bytes = MinMaxAccumulator::deserialize_from_bytes(&bytes).unwrap();
+        assert_eq!(acc.value, deserialized_bytes.value);
+        assert_eq!(acc.sub_type, deserialized_bytes.sub_type);
+    }
+
+    #[test]
+    fn test_single_subpopulation_aggregate_trait() {
+        let acc: Box<dyn SingleSubpopulationAggregate> =
+            Box::new(MinMaxAccumulator::with_value(42.0, "max".to_string()));
+
+        assert_eq!(acc.query(Statistic::Max, None).unwrap(), 42.0);
+        assert!(acc.query(Statistic::Min, None).is_err());
+        assert_eq!(acc.type_name(), "MinMaxAccumulator");
+    }
+}
diff --git a/QueryEngineRust/src/precompute_operators/mod.rs b/QueryEngineRust/src/precompute_operators/mod.rs
new file mode 100644
index 0000000..fbbff1c
--- /dev/null
+++ b/QueryEngineRust/src/precompute_operators/mod.rs
@@ -0,0 +1,25 @@
+pub mod count_min_sketch_accumulator;
+pub mod count_min_sketch_with_heap_accumulator;
+pub mod datasketches_kll_accumulator;
+pub mod delta_set_aggregator_accumulator;
+pub mod hydra_kll_accumulator;
+pub mod increase_accumulator;
+pub mod min_max_accumulator;
+pub mod multiple_increase_accumulator;
+pub mod multiple_min_max_accumulator;
+pub mod multiple_sum_accumulator;
+pub mod set_aggregator_accumulator;
+pub mod sum_accumulator;
+
+pub use count_min_sketch_accumulator::*;
+pub use count_min_sketch_with_heap_accumulator::*;
+pub use datasketches_kll_accumulator::*;
+pub use delta_set_aggregator_accumulator::*;
+pub use hydra_kll_accumulator::*;
+pub use increase_accumulator::*;
+pub use min_max_accumulator::*;
+pub use multiple_increase_accumulator::*;
+pub use multiple_min_max_accumulator::*;
+pub use multiple_sum_accumulator::*;
+pub use set_aggregator_accumulator::*;
+pub use sum_accumulator::*;
diff --git a/QueryEngineRust/src/precompute_operators/multiple_increase_accumulator.rs b/QueryEngineRust/src/precompute_operators/multiple_increase_accumulator.rs
new file mode 100644
index 0000000..f81fc60
--- /dev/null
+++ b/QueryEngineRust/src/precompute_operators/multiple_increase_accumulator.rs
@@ -0,0 +1,562 @@
+use crate::data_model::{
+    AggregateCore, KeyByLabelValues, MergeableAccumulator, MultipleSubpopulationAggregate,
+    SerializableToSink, SingleSubpopulationAggregate,
+};
+use crate::precompute_operators::IncreaseAccumulator;
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use std::collections::HashMap;
+
+use crate::data_model::Measurement;
+use promql_utilities::query_logics::enums::Statistic;
+
+/// Accumulator that maintains separate increase accumulators for multiple keys
+/// Allows tracking rate/increase for different label combinations
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MultipleIncreaseAccumulator {
+    pub increases: HashMap<KeyByLabelValues, IncreaseAccumulator>,
+}
+
+#[derive(Serialize, Deserialize)]
+struct MeasurementData {
+    starting_measurement: f64,
+    starting_timestamp: i64,
+    last_seen_measurement: f64,
+    last_seen_timestamp: i64,
+}
+
+impl MultipleIncreaseAccumulator {
+    pub fn new() -> Self {
+        Self {
+            increases: HashMap::new(),
+        }
+    }
+
+    pub fn new_with_increases(increases: HashMap<KeyByLabelValues, IncreaseAccumulator>) -> Self {
+        Self { increases }
+    }
+
+    pub fn update(&mut self, key: KeyByLabelValues, accumulator: IncreaseAccumulator) {
+        self.increases.insert(key, accumulator);
+    }
+
+    pub fn deserialize_from_json(data: &Value) -> Result<Self, Box<dyn std::error::Error>> {
+        let mut accumulator = Self::new();
+
+        if let Some(entries) = data["entries"].as_array() {
+            for entry in entries {
+                let key = KeyByLabelValues::deserialize_from_json(&entry["key"])?;
+                let increase_data =
+                    IncreaseAccumulator::deserialize_from_json(&entry["increase_data"])?;
+                accumulator.increases.insert(key, increase_data);
+            }
+        }
+
+        Ok(accumulator)
+    }
+
+    pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        let mut accumulator = Self::new();
+        let mut offset = 0;
+
+        // Read number of entries
+        if buffer.len() < 4 {
+            return Err("Buffer too short for entry count".into());
+        }
+        let num_entries = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]) as usize;
+        offset += 4;
+
+        for _ in 0..num_entries {
+            // Read key length and key
+            if offset + 4 > buffer.len() {
+                return Err("Buffer too short for key length".into());
+            }
+            let key_length = u32::from_le_bytes([
+                buffer[offset],
+                buffer[offset + 1],
+                buffer[offset + 2],
+                buffer[offset + 3],
+            ]) as usize;
+            offset += 4;
+
+            if offset + key_length > buffer.len() {
+                return Err("Buffer too short for key data".into());
+            }
+            let key =
+                KeyByLabelValues::deserialize_from_bytes(&buffer[offset..offset + key_length])?;
+            offset += key_length;
+
+            // Read IncreaseAccumulator data
+            if offset >= buffer.len() {
+                return Err("Buffer too short for increase accumulator data".into());
+            }
+            let increase_data = IncreaseAccumulator::deserialize_from_bytes(&buffer[offset..])?;
+
+            // Calculate consumed bytes for IncreaseAccumulator
+            // Structure: starting_measurement_len(4) + starting_measurement + starting_timestamp(8) +
+            //           last_seen_measurement_len(4) + last_seen_measurement + last_seen_timestamp(8)
+            let starting_measurement_len = u32::from_le_bytes([
+                buffer[offset],
+                buffer[offset + 1],
+                buffer[offset + 2],
+                buffer[offset + 3],
+            ]) as usize;
+            let last_seen_measurement_len = u32::from_le_bytes([
+                buffer[offset + 4 + starting_measurement_len + 8],
+                buffer[offset + 4 + starting_measurement_len + 8 + 1],
+                buffer[offset + 4 + starting_measurement_len + 8 + 2],
+                buffer[offset + 4 + starting_measurement_len + 8 + 3],
+            ]) as usize;
+            let consumed_bytes =
+                4 + starting_measurement_len + 8 + 4 + last_seen_measurement_len + 8;
+            offset += consumed_bytes;
+
+            accumulator.increases.insert(key, increase_data);
+        }
+
+        Ok(accumulator)
+    }
+
+    pub fn deserialize_from_bytes_arroyo(
+        buffer: &[u8],
+    ) -> Result<Self, Box<dyn std::error::Error>> {
+        let precompute: HashMap<String, MeasurementData> =
+            rmp_serde::from_slice(buffer).map_err(|e| {
+                format!("Failed to deserialize MultipleIncreaseAccumulator from MessagePack: {e}")
+            })?;
+
+        let mut accumulator = Self::new();
+        for (key_str, values) in precompute {
+            // Parse semicolon-separated key values
+            let key_values: Vec<String> = key_str.split(';').map(|s| s.to_string()).collect();
+            // let mut labels = std::collections::BTreeMap::new();
+            // for (i, value) in key_values.into_iter().enumerate() {
+            //     labels.insert(format!("label_{i}"), value);
+            // }
+            let key_obj = KeyByLabelValues::new_with_labels(key_values);
+
+            let starting_measurement = Measurement::new(values.starting_measurement);
+            let starting_timestamp = values.starting_timestamp;
+            let last_seen_measurement = Measurement::new(values.last_seen_measurement);
+            let last_seen_timestamp = values.last_seen_timestamp;
+
+            let increase_accumulator = IncreaseAccumulator::new(
+                starting_measurement,
+                starting_timestamp,
+                last_seen_measurement,
+                last_seen_timestamp,
+            );
+
+            accumulator.increases.insert(key_obj, increase_accumulator);
+        }
+
+        Ok(accumulator)
+    }
+
+    /// Serialize to Arroyo-compatible format (MessagePack HashMap<String, MeasurementData>)
+    /// Matches the Arroyo multipleincrease_ UDF format
+    pub fn serialize_to_bytes_arroyo(&self) -> Vec<u8> {
+        use serde::Serialize;
+        let mut per_key_storage: HashMap<String, MeasurementData> = HashMap::new();
+
+        for (key, increase_acc) in &self.increases {
+            // Keys are semicolon-separated label values
+            let key_str = key.labels.join(";");
+            per_key_storage.insert(
+                key_str,
+                MeasurementData {
+                    starting_measurement: increase_acc.starting_measurement.value,
+                    starting_timestamp: increase_acc.starting_timestamp,
+                    last_seen_measurement: increase_acc.last_seen_measurement.value,
+                    last_seen_timestamp: increase_acc.last_seen_timestamp,
+                },
+            );
+        }
+
+        let mut buf = Vec::new();
+        per_key_storage
+            .serialize(&mut rmp_serde::Serializer::new(&mut buf))
+            .expect("Failed to serialize MultipleIncreaseAccumulator to MessagePack");
+        buf
+    }
+}
+
+impl Default for MultipleIncreaseAccumulator {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SerializableToSink for MultipleIncreaseAccumulator {
+    fn serialize_to_json(&self) -> Value {
+        let entries: Vec<Value> = self
+            .increases
+            .iter()
+            .map(|(key, data)| {
+                serde_json::json!({
+                    "key": key.serialize_to_json(),
+                    "increase_data": data.serialize_to_json()
+                })
+            })
+            .collect();
+
+        serde_json::json!({
+            "entries": entries
+        })
+    }
+
+    fn serialize_to_bytes(&self) -> Vec<u8> {
+        let mut buffer = Vec::new();
+
+        // Write number of entries
+        buffer.extend_from_slice(&(self.increases.len() as u32).to_le_bytes());
+
+        // Write each key-value pair
+        for (key, data) in &self.increases {
+            let key_bytes = key.serialize_to_bytes();
+            buffer.extend_from_slice(&(key_bytes.len() as u32).to_le_bytes());
+            buffer.extend_from_slice(&key_bytes);
+
+            let data_bytes = data.serialize_to_bytes();
+            buffer.extend_from_slice(&data_bytes);
+        }
+
+        buffer
+    }
+}
+
+impl AggregateCore for MultipleIncreaseAccumulator {
+    fn clone_boxed_core(&self) -> Box<dyn AggregateCore> {
+        Box::new(self.clone())
+    }
+
+    fn type_name(&self) -> &'static str {
+        "MultipleIncreaseAccumulator"
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn merge_with(
+        &self,
+        other: &dyn AggregateCore,
+    ) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error + Send + Sync>> {
+        // Check if other is also a MultipleIncreaseAccumulator
+        if other.get_accumulator_type() != self.get_accumulator_type() {
+            return Err(format!(
+                "Cannot merge MultipleIncreaseAccumulator with {}",
+                other.get_accumulator_type()
+            )
+            .into());
+        }
+
+        // Downcast to MultipleIncreaseAccumulator
+        let other_multiple_increase = other
+            .as_any()
+            .downcast_ref::<MultipleIncreaseAccumulator>()
+            .ok_or("Failed to downcast to MultipleIncreaseAccumulator")?;
+
+        // Clone self once, then merge other's data in-place
+        let mut merged = self.clone();
+        for (key, data) in &other_multiple_increase.increases {
+            if let Some(existing_data) = merged.increases.get_mut(key) {
+                // Merge in-place: take earliest start, latest end
+                if data.starting_timestamp < existing_data.starting_timestamp {
+                    existing_data.starting_measurement = data.starting_measurement.clone();
+                    existing_data.starting_timestamp = data.starting_timestamp;
+                }
+                if data.last_seen_timestamp > existing_data.last_seen_timestamp {
+                    existing_data.last_seen_measurement = data.last_seen_measurement.clone();
+                    existing_data.last_seen_timestamp = data.last_seen_timestamp;
+                }
+            } else {
+                merged.increases.insert(key.clone(), data.clone());
+            }
+        }
+
+        Ok(Box::new(merged))
+    }
+
+    fn get_accumulator_type(&self) -> &'static str {
+        "MultipleIncreaseAccumulator"
+    }
+
+    fn get_keys(&self) -> Option<Vec<KeyByLabelValues>> {
+        Some(self.increases.keys().cloned().collect())
+    }
+}
+
+impl MultipleSubpopulationAggregate for MultipleIncreaseAccumulator {
+    fn query(
+        &self,
+        statistic: Statistic,
+        key: &KeyByLabelValues,
+        _query_kwargs: Option<&HashMap<String, String>>,
+    ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+        let data = self
+            .increases
+            .get(key)
+            .ok_or_else(|| format!("Key {key} not found in MultipleIncreaseAccumulator"))?;
+
+        data.query(statistic, None)
+    }
+
+    fn clone_boxed(&self) -> Box<dyn MultipleSubpopulationAggregate> {
+        Box::new(self.clone())
+    }
+}
+
+impl MergeableAccumulator<MultipleIncreaseAccumulator> for MultipleIncreaseAccumulator {
+    fn merge_accumulators(
+        accumulators: Vec<MultipleIncreaseAccumulator>,
+    ) -> Result<MultipleIncreaseAccumulator, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+
+        let mut result = MultipleIncreaseAccumulator::new();
+
+        for accumulator in accumulators {
+            for (key, data) in accumulator.increases {
+                if let Some(existing_data) = result.increases.get_mut(&key) {
+                    // Merge in-place without cloning existing_data
+                    // Take the earliest start time and latest end time
+                    if data.starting_timestamp < existing_data.starting_timestamp {
+                        existing_data.starting_measurement = data.starting_measurement;
+                        existing_data.starting_timestamp = data.starting_timestamp;
+                    }
+                    if data.last_seen_timestamp > existing_data.last_seen_timestamp {
+                        existing_data.last_seen_measurement = data.last_seen_measurement;
+                        existing_data.last_seen_timestamp = data.last_seen_timestamp;
+                    }
+                } else {
+                    result.increases.insert(key, data);
+                }
+            }
+        }
+
+        Ok(result)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::data_model::Measurement;
+
+    fn create_test_increase_accumulator(start_val: f64, end_val: f64) -> IncreaseAccumulator {
+        IncreaseAccumulator::new(
+            Measurement::new(start_val),
+            1000,
+            Measurement::new(end_val),
+            2000,
+        )
+    }
+
+    fn create_test_increase_accumulator_with_time(
+        start_val: f64,
+        start_time: i64,
+        end_val: f64,
+        end_time: i64,
+    ) -> IncreaseAccumulator {
+        IncreaseAccumulator::new(
+            Measurement::new(start_val),
+            start_time,
+            Measurement::new(end_val),
+            end_time,
+        )
+    }
+
+    #[test]
+    fn test_multiple_increase_accumulator_creation() {
+        let acc = MultipleIncreaseAccumulator::new();
+        assert!(acc.increases.is_empty());
+    }
+
+    #[test]
+    fn test_multiple_increase_accumulator_update() {
+        let mut acc = MultipleIncreaseAccumulator::new();
+
+        let key1 = KeyByLabelValues::new_with_labels(vec!["web".to_string()]);
+
+        let key2 = KeyByLabelValues::new_with_labels(vec!["api".to_string()]);
+
+        let increase1 = create_test_increase_accumulator(10.0, 25.0);
+        let increase2 = create_test_increase_accumulator(5.0, 15.0);
+
+        acc.update(key1.clone(), increase1);
+        acc.update(key2.clone(), increase2);
+
+        assert_eq!(acc.increases.len(), 2);
+        assert!(acc.increases.contains_key(&key1));
+        assert!(acc.increases.contains_key(&key2));
+    }
+
+    #[test]
+    fn test_multiple_increase_accumulator_query() {
+        let mut acc = MultipleIncreaseAccumulator::new();
+
+        let key = KeyByLabelValues::new_with_labels(vec!["web".to_string()]);
+
+        let increase_acc = create_test_increase_accumulator(10.0, 25.0);
+        acc.update(key.clone(), increase_acc);
+
+        // Test increase query
+        assert_eq!(acc.query(Statistic::Increase, &key, None).unwrap(), 15.0);
+
+        // Test rate query (15.0 increase over 1 second = 15.0 per second)
+        assert_eq!(acc.query(Statistic::Rate, &key, None).unwrap(), 15.0);
+
+        // Test error cases
+        assert!(acc.query(Statistic::Sum, &key, None).is_err());
+
+        let unknown_key = KeyByLabelValues::new();
+        assert!(acc.query(Statistic::Increase, &unknown_key, None).is_err());
+    }
+
+    #[test]
+    fn test_multiple_increase_accumulator_merge() {
+        let mut acc1 = MultipleIncreaseAccumulator::new();
+        let mut acc2 = MultipleIncreaseAccumulator::new();
+
+        let key1 = KeyByLabelValues::new_with_labels(vec!["web".to_string()]);
+
+        let key2 = KeyByLabelValues::new_with_labels(vec!["api".to_string()]);
+
+        // Add different keys to each accumulator
+        acc1.update(key1.clone(), create_test_increase_accumulator(10.0, 20.0));
+        acc2.update(key2.clone(), create_test_increase_accumulator(5.0, 15.0));
+
+        // Also add overlapping key with different time ranges (later timestamps)
+        acc2.update(
+            key1.clone(),
+            create_test_increase_accumulator_with_time(15.0, 2000, 30.0, 3000),
+        ); // Later time range
+
+        let merged = MultipleIncreaseAccumulator::merge_accumulators(vec![acc1, acc2]).unwrap();
+
+        assert_eq!(merged.increases.len(), 2);
+        assert!(merged.increases.contains_key(&key1));
+        assert!(merged.increases.contains_key(&key2));
+
+        // The merged key1 should have the full range (earliest start to latest end)
+        let merged_key1 = merged.increases.get(&key1).unwrap();
+        assert_eq!(merged_key1.starting_measurement.value, 10.0); // Earlier start
+        assert_eq!(merged_key1.last_seen_measurement.value, 30.0); // Later end
+    }
+
+    #[test]
+    fn test_multiple_increase_accumulator_serialization() {
+        let mut acc = MultipleIncreaseAccumulator::new();
+
+        let key = KeyByLabelValues::new_with_labels(vec!["web".to_string()]);
+
+        acc.update(key.clone(), create_test_increase_accumulator(10.0, 25.0));
+
+        // Test JSON serialization
+        let json_value = acc.serialize_to_json();
+        let deserialized = MultipleIncreaseAccumulator::deserialize_from_json(&json_value).unwrap();
+
+        assert_eq!(deserialized.increases.len(), 1);
+        let deserialized_acc = deserialized.increases.get(&key).unwrap();
+        assert_eq!(deserialized_acc.starting_measurement.value, 10.0);
+        assert_eq!(deserialized_acc.last_seen_measurement.value, 25.0);
+
+        // Test binary serialization
+        let bytes = acc.serialize_to_bytes();
+        let deserialized_bytes =
+            MultipleIncreaseAccumulator::deserialize_from_bytes(&bytes).unwrap();
+
+        assert_eq!(deserialized_bytes.increases.len(), 1);
+        let deserialized_acc_bytes = deserialized_bytes.increases.get(&key).unwrap();
+        assert_eq!(deserialized_acc_bytes.starting_measurement.value, 10.0);
+        assert_eq!(deserialized_acc_bytes.last_seen_measurement.value, 25.0);
+    }
+
+    #[test]
+    fn test_multiple_increase_accumulator_get_keys() {
+        let mut acc = MultipleIncreaseAccumulator::new();
+
+        let key1 = KeyByLabelValues::new_with_labels(vec!["web".to_string()]);
+        let key2 = KeyByLabelValues::new_with_labels(vec!["api".to_string()]);
+
+        acc.update(key1.clone(), create_test_increase_accumulator(10.0, 20.0));
+        acc.update(key2.clone(), create_test_increase_accumulator(5.0, 15.0));
+
+        let keys = acc.get_keys().unwrap();
+        assert_eq!(keys.len(), 2);
+        assert!(keys.contains(&key1));
+        assert!(keys.contains(&key2));
+    }
+
+    #[test]
+    fn test_trait_object() {
+        let mut acc = MultipleIncreaseAccumulator::new();
+        let key = KeyByLabelValues::new();
+        acc.update(key.clone(), create_test_increase_accumulator(10.0, 25.0));
+
+        let trait_obj: Box<dyn MultipleSubpopulationAggregate> = Box::new(acc);
+        assert_eq!(
+            trait_obj.query(Statistic::Increase, &key, None).unwrap(),
+            15.0
+        );
+
+        let keys = trait_obj.get_keys().unwrap();
+        assert_eq!(keys.len(), 1);
+    }
+
+    // #[test]
+    // fn test_multiple_increase_accumulator_arroyo_deserialization() {
+    //     // Create test data in Arroyo MessagePack format
+    //     // Format: {key: [starting_value, starting_timestamp, last_seen_value, last_seen_timestamp]}
+    //     let mut test_data = std::collections::HashMap::new();
+    //     test_data.insert("web;service".to_string(), vec![10.0, 1000.0, 25.0, 2000.0]);
+    //     test_data.insert("api;service".to_string(), vec![5.0, 1500.0, 15.0, 2500.0]);
+
+    //     // Serialize to MessagePack
+    //     let arroyo_buffer = rmp_serde::to_vec(&test_data).unwrap();
+
+    //     // Test Arroyo deserialization
+    //     let deserialized_acc =
+    //         MultipleIncreaseAccumulator::deserialize_from_bytes_arroyo(&arroyo_buffer).unwrap();
+
+    //     // Verify the deserialized accumulator has the correct data
+    //     assert_eq!(deserialized_acc.increases.len(), 2);
+
+    //     // Check first key (web;service)
+    //     let keys: Vec<_> = deserialized_acc.increases.keys().collect();
+    //     let key1 = keys
+    //         .iter()
+    //         .find(|k| k.labels.get("label_0").is_some_and(|v| v == "web"))
+    //         .unwrap();
+
+    //     let increase1 = deserialized_acc.increases.get(key1).unwrap();
+    //     assert_eq!(increase1.starting_measurement.value, 10.0);
+    //     assert_eq!(increase1.starting_timestamp, 1000);
+    //     assert_eq!(increase1.last_seen_measurement.value, 25.0);
+    //     assert_eq!(increase1.last_seen_timestamp, 2000);
+
+    //     // Check second key (api;service)
+    //     let key2 = keys
+    //         .iter()
+    //         .find(|k| k.labels.get("label_0").is_some_and(|v| v == "api"))
+    //         .unwrap();
+
+    //     let increase2 = deserialized_acc.increases.get(key2).unwrap();
+    //     assert_eq!(increase2.starting_measurement.value, 5.0);
+    //     assert_eq!(increase2.starting_timestamp, 1500);
+    //     assert_eq!(increase2.last_seen_measurement.value, 15.0);
+    //     assert_eq!(increase2.last_seen_timestamp, 2500);
+
+    //     // Test querying
+    //     assert_eq!(
+    //         deserialized_acc.query(Statistic::Increase, key1).unwrap(),
+    //         15.0
+    //     ); // 25.0 - 10.0
+    //     assert_eq!(
+    //         deserialized_acc.query(Statistic::Increase, key2).unwrap(),
+    //         10.0
+    //     ); // 15.0 - 5.0
+    // }
+}
diff --git a/QueryEngineRust/src/precompute_operators/multiple_min_max_accumulator.rs b/QueryEngineRust/src/precompute_operators/multiple_min_max_accumulator.rs
new file mode 100644
index 0000000..d8533e7
--- /dev/null
+++ b/QueryEngineRust/src/precompute_operators/multiple_min_max_accumulator.rs
@@ -0,0 +1,471 @@
+use crate::data_model::{
+    AggregateCore, KeyByLabelValues, MergeableAccumulator, MultipleSubpopulationAggregate,
+    SerializableToSink,
+};
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use std::collections::HashMap;
+
+use promql_utilities::query_logics::enums::Statistic;
+
+/// Accumulator that maintains separate min/max values for multiple keys
+/// Allows querying min/max for specific label combinations
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MultipleMinMaxAccumulator {
+    pub values: HashMap<KeyByLabelValues, f64>,
+    pub sub_type: String, // "min" or "max"
+}
+
+impl MultipleMinMaxAccumulator {
+    pub fn new(sub_type: String) -> Self {
+        if sub_type != "min" && sub_type != "max" {
+            panic!("sub_type must be 'min' or 'max'");
+        }
+
+        Self {
+            values: HashMap::new(),
+            sub_type,
+        }
+    }
+
+    pub fn new_min() -> Self {
+        Self::new("min".to_string())
+    }
+
+    pub fn new_max() -> Self {
+        Self::new("max".to_string())
+    }
+
+    pub fn new_with_values(values: HashMap<KeyByLabelValues, f64>, sub_type: String) -> Self {
+        if sub_type != "min" && sub_type != "max" {
+            panic!("sub_type must be 'min' or 'max'");
+        }
+
+        Self { values, sub_type }
+    }
+
+    pub fn update(&mut self, key: KeyByLabelValues, value: f64) {
+        match self.sub_type.as_str() {
+            "min" => {
+                let current = self.values.entry(key).or_insert(f64::INFINITY);
+                if value < *current {
+                    *current = value;
+                }
+            }
+            "max" => {
+                let current = self.values.entry(key).or_insert(f64::NEG_INFINITY);
+                if value > *current {
+                    *current = value;
+                }
+            }
+            _ => panic!("Invalid sub_type"),
+        }
+    }
+
+    pub fn add_value(&mut self, key: KeyByLabelValues, value: f64) {
+        self.values.insert(key, value);
+    }
+
+    pub fn deserialize_from_json(data: &Value) -> Result<Self, Box<dyn std::error::Error>> {
+        let sub_type = data["sub_type"]
+            .as_str()
+            .ok_or("Missing or invalid 'sub_type' field")?
+            .to_string();
+
+        if sub_type != "min" && sub_type != "max" {
+            return Err("sub_type must be 'min' or 'max'".into());
+        }
+
+        let values_data = data["values"]
+            .as_object()
+            .ok_or("Missing or invalid 'values' field")?;
+
+        let mut values = HashMap::new();
+        for (key_str, value) in values_data {
+            let key_json: Value = serde_json::from_str(key_str)?;
+            let key = KeyByLabelValues::deserialize_from_json(&key_json)?;
+            let val = value.as_f64().ok_or("Invalid value")?;
+            values.insert(key, val);
+        }
+
+        Ok(Self { values, sub_type })
+    }
+
+    pub fn deserialize_from_bytes(
+        buffer: &[u8],
+        sub_type: String,
+    ) -> Result<Self, Box<dyn std::error::Error>> {
+        if sub_type != "min" && sub_type != "max" {
+            return Err("sub_type must be 'min' or 'max'".into());
+        }
+
+        let mut offset = 0;
+
+        // Read number of entries
+        if buffer.len() < 4 {
+            return Err("Buffer too short for entry count".into());
+        }
+        let num_entries = u32::from_le_bytes([
+            buffer[offset],
+            buffer[offset + 1],
+            buffer[offset + 2],
+            buffer[offset + 3],
+        ]) as usize;
+        offset += 4;
+
+        let mut values = HashMap::new();
+
+        for _ in 0..num_entries {
+            // Read key length and data
+            if buffer.len() < offset + 4 {
+                return Err("Buffer too short for key length".into());
+            }
+            let key_length = u32::from_le_bytes([
+                buffer[offset],
+                buffer[offset + 1],
+                buffer[offset + 2],
+                buffer[offset + 3],
+            ]) as usize;
+            offset += 4;
+
+            if buffer.len() < offset + key_length {
+                return Err("Buffer too short for key data".into());
+            }
+            let key =
+                KeyByLabelValues::deserialize_from_bytes(&buffer[offset..offset + key_length])?;
+            offset += key_length;
+
+            // Read value
+            if buffer.len() < offset + 8 {
+                return Err("Buffer too short for value".into());
+            }
+            let value = f64::from_le_bytes([
+                buffer[offset],
+                buffer[offset + 1],
+                buffer[offset + 2],
+                buffer[offset + 3],
+                buffer[offset + 4],
+                buffer[offset + 5],
+                buffer[offset + 6],
+                buffer[offset + 7],
+            ]);
+            offset += 8;
+
+            values.insert(key, value);
+        }
+
+        Ok(Self { values, sub_type })
+    }
+}
+
+impl SerializableToSink for MultipleMinMaxAccumulator {
+    fn serialize_to_json(&self) -> Value {
+        let mut values_obj = serde_json::Map::new();
+        for (key, value) in &self.values {
+            let key_json = key.serialize_to_json();
+            let key_str = serde_json::to_string(&key_json).unwrap();
+            values_obj.insert(
+                key_str,
+                Value::Number(serde_json::Number::from_f64(*value).unwrap()),
+            );
+        }
+
+        serde_json::json!({
+            "values": values_obj,
+            "sub_type": self.sub_type
+        })
+    }
+
+    fn serialize_to_bytes(&self) -> Vec<u8> {
+        let mut buffer = Vec::new();
+
+        // Write number of entries
+        buffer.extend_from_slice(&(self.values.len() as u32).to_le_bytes());
+
+        // Write each key-value pair
+        for (key, value) in &self.values {
+            let key_bytes = key.serialize_to_bytes();
+
+            // Write key length and data
+            buffer.extend_from_slice(&(key_bytes.len() as u32).to_le_bytes());
+            buffer.extend_from_slice(&key_bytes);
+
+            // Write value
+            buffer.extend_from_slice(&value.to_le_bytes());
+        }
+
+        buffer
+    }
+}
+
+impl AggregateCore for MultipleMinMaxAccumulator {
+    fn clone_boxed_core(&self) -> Box<dyn AggregateCore> {
+        Box::new(self.clone())
+    }
+
+    fn type_name(&self) -> &'static str {
+        "MultipleMinMaxAccumulator"
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn merge_with(
+        &self,
+        other: &dyn AggregateCore,
+    ) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error + Send + Sync>> {
+        // Check if other is also a MultipleMinMaxAccumulator
+        if other.get_accumulator_type() != self.get_accumulator_type() {
+            return Err(format!(
+                "Cannot merge MultipleMinMaxAccumulator with {}",
+                other.get_accumulator_type()
+            )
+            .into());
+        }
+
+        // Downcast to MultipleMinMaxAccumulator
+        let other_multiple_minmax = other
+            .as_any()
+            .downcast_ref::<MultipleMinMaxAccumulator>()
+            .ok_or("Failed to downcast to MultipleMinMaxAccumulator")?;
+
+        // Use the existing merge_accumulators method
+        let merged = Self::merge_accumulators(vec![self.clone(), other_multiple_minmax.clone()])?;
+
+        Ok(Box::new(merged))
+    }
+
+    fn get_accumulator_type(&self) -> &'static str {
+        "MultipleMinMaxAccumulator"
+    }
+
+    fn get_keys(&self) -> Option<Vec<KeyByLabelValues>> {
+        Some(self.values.keys().cloned().collect())
+    }
+}
+
+impl MultipleSubpopulationAggregate for MultipleMinMaxAccumulator {
+    fn query(
+        &self,
+        statistic: Statistic,
+        key: &KeyByLabelValues,
+        _query_kwargs: Option<&HashMap<String, String>>,
+    ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+        // Query specific key
+        match statistic {
+            Statistic::Min => {
+                if self.sub_type == "min" {
+                    self.values.get(key).copied().ok_or_else(|| {
+                        format!("Key {key} not found in MultipleMinMaxAccumulator").into()
+                    })
+                } else {
+                    Err("Cannot query Min statistic from Max accumulator".into())
+                }
+            }
+            Statistic::Max => {
+                if self.sub_type == "max" {
+                    self.values.get(key).copied().ok_or_else(|| {
+                        format!("Key {key} not found in MultipleMinMaxAccumulator").into()
+                    })
+                } else {
+                    Err("Cannot query Max statistic from Min accumulator".into())
+                }
+            }
+            _ => Err(
+                format!("Unsupported statistic in MultipleMinMaxAccumulator: {statistic:?}").into(),
+            ),
+        }
+    }
+
+    fn clone_boxed(&self) -> Box<dyn MultipleSubpopulationAggregate> {
+        Box::new(self.clone())
+    }
+}
+
+impl MergeableAccumulator<MultipleMinMaxAccumulator> for MultipleMinMaxAccumulator {
+    fn merge_accumulators(
+        accumulators: Vec<MultipleMinMaxAccumulator>,
+    ) -> Result<MultipleMinMaxAccumulator, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+
+        let sub_type = accumulators[0].sub_type.clone();
+
+        // Verify all accumulators have the same sub_type
+        for acc in &accumulators {
+            if acc.sub_type != sub_type {
+                return Err("Cannot merge accumulators with different sub_types".into());
+            }
+        }
+
+        let mut result = MultipleMinMaxAccumulator::new(sub_type.clone());
+
+        for acc in accumulators {
+            for (key, value) in acc.values {
+                match result.values.get(&key) {
+                    Some(existing_value) => match sub_type.as_str() {
+                        "min" => {
+                            if value < *existing_value {
+                                result.values.insert(key, value);
+                            }
+                        }
+                        "max" => {
+                            if value > *existing_value {
+                                result.values.insert(key, value);
+                            }
+                        }
+                        _ => unreachable!(),
+                    },
+                    None => {
+                        result.values.insert(key, value);
+                    }
+                }
+            }
+        }
+
+        Ok(result)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_multiple_min_max_accumulator_creation() {
+        let min_acc = MultipleMinMaxAccumulator::new_min();
+        assert_eq!(min_acc.sub_type, "min");
+        assert!(min_acc.values.is_empty());
+
+        let max_acc = MultipleMinMaxAccumulator::new_max();
+        assert_eq!(max_acc.sub_type, "max");
+        assert!(max_acc.values.is_empty());
+    }
+
+    #[test]
+    fn test_multiple_min_accumulator_update() {
+        let mut acc = MultipleMinMaxAccumulator::new_min();
+
+        let key1 = KeyByLabelValues::new_with_labels(vec!["web".to_string()]);
+        let key2 = KeyByLabelValues::new_with_labels(vec!["api".to_string()]);
+
+        acc.update(key1.clone(), 10.0);
+        acc.update(key1.clone(), 5.0); // Should update to smaller value
+        acc.update(key1.clone(), 15.0); // Should not update (larger)
+        acc.update(key2.clone(), 20.0);
+
+        assert_eq!(acc.values.get(&key1), Some(&5.0));
+        assert_eq!(acc.values.get(&key2), Some(&20.0));
+    }
+
+    #[test]
+    fn test_multiple_max_accumulator_update() {
+        let mut acc = MultipleMinMaxAccumulator::new_max();
+
+        let key = KeyByLabelValues::new_with_labels(vec!["web".to_string()]);
+
+        acc.update(key.clone(), 10.0);
+        acc.update(key.clone(), 5.0); // Should not update (smaller)
+        acc.update(key.clone(), 15.0); // Should update to larger value
+
+        assert_eq!(acc.values.get(&key), Some(&15.0));
+    }
+
+    #[test]
+    fn test_multiple_min_max_accumulator_query() {
+        let mut min_acc = MultipleMinMaxAccumulator::new_min();
+        let mut max_acc = MultipleMinMaxAccumulator::new_max();
+
+        let key = KeyByLabelValues::new_with_labels(vec!["web".to_string()]);
+
+        min_acc.add_value(key.clone(), 5.0);
+        max_acc.add_value(key.clone(), 15.0);
+
+        // Test queries with the specific key
+        assert_eq!(
+            crate::MultipleSubpopulationAggregate::query(&min_acc, Statistic::Min, &key, None)
+                .unwrap(),
+            5.0
+        );
+        assert_eq!(
+            crate::MultipleSubpopulationAggregate::query(&max_acc, Statistic::Max, &key, None)
+                .unwrap(),
+            15.0
+        );
+
+        // Test error cases
+        assert!(
+            crate::MultipleSubpopulationAggregate::query(&min_acc, Statistic::Max, &key, None)
+                .is_err()
+        );
+        assert!(
+            crate::MultipleSubpopulationAggregate::query(&max_acc, Statistic::Min, &key, None)
+                .is_err()
+        );
+        assert!(
+            crate::MultipleSubpopulationAggregate::query(&min_acc, Statistic::Sum, &key, None)
+                .is_err()
+        );
+    }
+
+    #[test]
+    fn test_multiple_min_max_accumulator_merge() {
+        let mut acc1 = MultipleMinMaxAccumulator::new_min();
+        let mut acc2 = MultipleMinMaxAccumulator::new_min();
+
+        let key1 = KeyByLabelValues::new_with_labels(vec!["web".to_string()]);
+
+        let key2 = KeyByLabelValues::new_with_labels(vec!["api".to_string()]);
+
+        acc1.add_value(key1.clone(), 10.0);
+        acc1.add_value(key2.clone(), 20.0);
+
+        acc2.add_value(key1.clone(), 5.0); // Smaller value, should be used
+
+        let merged = <MultipleMinMaxAccumulator as MergeableAccumulator<
+            MultipleMinMaxAccumulator,
+        >>::merge_accumulators(vec![acc1, acc2])
+        .unwrap();
+
+        assert_eq!(merged.values.get(&key1), Some(&5.0)); // Should use smaller value
+        assert_eq!(merged.values.get(&key2), Some(&20.0)); // Should be preserved
+    }
+
+    #[test]
+    fn test_multiple_min_max_accumulator_serialization() {
+        let mut acc = MultipleMinMaxAccumulator::new_min();
+
+        let key = KeyByLabelValues::new_with_labels(vec!["web".to_string()]);
+
+        acc.add_value(key.clone(), 42.5);
+
+        // Test JSON serialization
+        let json = acc.serialize_to_json();
+        let deserialized = MultipleMinMaxAccumulator::deserialize_from_json(&json).unwrap();
+        assert_eq!(deserialized.values.get(&key), Some(&42.5));
+        assert_eq!(deserialized.sub_type, "min");
+
+        // Test byte serialization
+        let bytes = acc.serialize_to_bytes();
+        let deserialized_bytes =
+            MultipleMinMaxAccumulator::deserialize_from_bytes(&bytes, "min".to_string()).unwrap();
+        assert_eq!(deserialized_bytes.values.get(&key), Some(&42.5));
+        assert_eq!(deserialized_bytes.sub_type, "min");
+    }
+
+    #[test]
+    fn test_trait_object() {
+        let mut acc = MultipleMinMaxAccumulator::new_min();
+
+        let key = KeyByLabelValues::new_with_labels(vec!["web".to_string()]);
+
+        acc.add_value(key.clone(), 42.0);
+
+        let trait_obj: Box<dyn AggregateCore> = Box::new(acc);
+
+        // Test type name through trait object
+        assert_eq!(trait_obj.type_name(), "MultipleMinMaxAccumulator");
+    }
+}
diff --git a/QueryEngineRust/src/precompute_operators/multiple_sum_accumulator.rs b/QueryEngineRust/src/precompute_operators/multiple_sum_accumulator.rs
new file mode 100644
index 0000000..c1d24af
--- /dev/null
+++ b/QueryEngineRust/src/precompute_operators/multiple_sum_accumulator.rs
@@ -0,0 +1,431 @@
+use crate::data_model::{
+    AggregateCore, KeyByLabelValues, MergeableAccumulator, MultipleSubpopulationAggregate,
+    MultipleSubpopulationAggregateFactory, SerializableToSink,
+};
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use std::collections::HashMap;
+
+use promql_utilities::query_logics::enums::Statistic;
+
+/// Accumulator that maintains separate sum values for multiple keys
+/// Allows querying sums for specific label combinations
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MultipleSumAccumulator {
+    pub sums: HashMap<KeyByLabelValues, f64>,
+}
+
+impl MultipleSumAccumulator {
+    pub fn new() -> Self {
+        Self {
+            sums: HashMap::new(),
+        }
+    }
+
+    pub fn new_with_sums(sums: HashMap<KeyByLabelValues, f64>) -> Self {
+        Self { sums }
+    }
+
+    pub fn update(&mut self, key: KeyByLabelValues, value: f64) {
+        *self.sums.entry(key).or_insert(0.0) += value;
+    }
+
+    pub fn add_sum(&mut self, key: KeyByLabelValues, sum: f64) {
+        self.sums.insert(key, sum);
+    }
+
+    pub fn deserialize_from_json(data: &Value) -> Result<Self, Box<dyn std::error::Error>> {
+        let sums_data = data["sums"]
+            .as_object()
+            .ok_or("Missing or invalid 'sums' field")?;
+
+        let mut sums = HashMap::new();
+        for (key_str, value) in sums_data {
+            let key_json: Value = serde_json::from_str(key_str)?;
+            let key = KeyByLabelValues::deserialize_from_json(&key_json)?;
+            let sum = value.as_f64().ok_or("Invalid sum value")?;
+            sums.insert(key, sum);
+        }
+
+        Ok(Self { sums })
+    }
+
+    pub fn deserialize_from_bytes_arroyo(
+        buffer: &[u8],
+    ) -> Result<Self, Box<dyn std::error::Error>> {
+        let precompute: HashMap<String, f64> = rmp_serde::from_slice(buffer).map_err(|e| {
+            format!("Failed to deserialize MultipleSumAccumulator from MessagePack: {e}")
+        })?;
+
+        let mut sums = HashMap::new();
+        for (key_str, sum) in precompute {
+            let key_values: Vec<String> = key_str.split(';').map(|s| s.to_string()).collect();
+            let key = KeyByLabelValues::new_with_labels(key_values);
+            sums.insert(key, sum);
+        }
+
+        Ok(Self { sums })
+    }
+
+    /// Serialize to Arroyo-compatible format (MessagePack HashMap<String, f64>)
+    pub fn serialize_to_bytes_arroyo(&self) -> Vec<u8> {
+        use serde::Serialize;
+        let per_key_storage: HashMap<String, f64> = self
+            .sums
+            .iter()
+            .map(|(key, &sum)| (key.labels.join(";"), sum))
+            .collect();
+
+        let mut buf = Vec::new();
+        per_key_storage
+            .serialize(&mut rmp_serde::Serializer::new(&mut buf))
+            .expect("Failed to serialize MultipleSumAccumulator to MessagePack");
+        buf
+    }
+
+    pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        let mut offset = 0;
+
+        // Read number of entries
+        if buffer.len() < 4 {
+            return Err("Buffer too short for entry count".into());
+        }
+        let num_entries = u32::from_le_bytes([
+            buffer[offset],
+            buffer[offset + 1],
+            buffer[offset + 2],
+            buffer[offset + 3],
+        ]) as usize;
+        offset += 4;
+
+        let mut sums = HashMap::new();
+
+        for _ in 0..num_entries {
+            // Read key length and data
+            if buffer.len() < offset + 4 {
+                return Err("Buffer too short for key length".into());
+            }
+            let key_length = u32::from_le_bytes([
+                buffer[offset],
+                buffer[offset + 1],
+                buffer[offset + 2],
+                buffer[offset + 3],
+            ]) as usize;
+            offset += 4;
+
+            if buffer.len() < offset + key_length {
+                return Err("Buffer too short for key data".into());
+            }
+            let key =
+                KeyByLabelValues::deserialize_from_bytes(&buffer[offset..offset + key_length])?;
+            offset += key_length;
+
+            // Read sum value
+            if buffer.len() < offset + 8 {
+                return Err("Buffer too short for sum value".into());
+            }
+            let sum = f64::from_le_bytes([
+                buffer[offset],
+                buffer[offset + 1],
+                buffer[offset + 2],
+                buffer[offset + 3],
+                buffer[offset + 4],
+                buffer[offset + 5],
+                buffer[offset + 6],
+                buffer[offset + 7],
+            ]);
+            offset += 8;
+
+            sums.insert(key, sum);
+        }
+
+        Ok(Self { sums })
+    }
+}
+
+impl Default for MultipleSumAccumulator {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SerializableToSink for MultipleSumAccumulator {
+    fn serialize_to_json(&self) -> Value {
+        let mut sums_obj = serde_json::Map::new();
+        for (key, sum) in &self.sums {
+            let key_json = key.serialize_to_json();
+            let key_str = serde_json::to_string(&key_json).unwrap();
+            sums_obj.insert(
+                key_str,
+                Value::Number(serde_json::Number::from_f64(*sum).unwrap()),
+            );
+        }
+
+        serde_json::json!({
+            "sums": sums_obj
+        })
+    }
+
+    fn serialize_to_bytes(&self) -> Vec<u8> {
+        let mut buffer = Vec::new();
+
+        // Write number of entries
+        buffer.extend_from_slice(&(self.sums.len() as u32).to_le_bytes());
+
+        // Write each key-value pair
+        for (key, sum) in &self.sums {
+            let key_bytes = key.serialize_to_bytes();
+
+            // Write key length and data
+            buffer.extend_from_slice(&(key_bytes.len() as u32).to_le_bytes());
+            buffer.extend_from_slice(&key_bytes);
+
+            // Write sum value
+            buffer.extend_from_slice(&sum.to_le_bytes());
+        }
+
+        buffer
+    }
+}
+
+impl AggregateCore for MultipleSumAccumulator {
+    fn clone_boxed_core(&self) -> Box<dyn AggregateCore> {
+        Box::new(self.clone())
+    }
+
+    fn type_name(&self) -> &'static str {
+        "MultipleSumAccumulator"
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn merge_with(
+        &self,
+        other: &dyn AggregateCore,
+    ) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error + Send + Sync>> {
+        // Check if other is also a MultipleSumAccumulator
+        if other.get_accumulator_type() != self.get_accumulator_type() {
+            return Err(format!(
+                "Cannot merge MultipleSumAccumulator with {}",
+                other.get_accumulator_type()
+            )
+            .into());
+        }
+
+        // Downcast to MultipleSumAccumulator
+        let other_multiple_sum = other
+            .as_any()
+            .downcast_ref::<MultipleSumAccumulator>()
+            .ok_or("Failed to downcast to MultipleSumAccumulator")?;
+
+        // Use the existing merge_accumulators method
+        let merged = Self::merge_accumulators(vec![self.clone(), other_multiple_sum.clone()])?;
+
+        Ok(Box::new(merged))
+    }
+
+    fn get_accumulator_type(&self) -> &'static str {
+        "MultipleSumAccumulator"
+    }
+
+    fn get_keys(&self) -> Option<Vec<KeyByLabelValues>> {
+        Some(self.sums.keys().cloned().collect())
+    }
+}
+
+impl MultipleSubpopulationAggregate for MultipleSumAccumulator {
+    fn query(
+        &self,
+        statistic: Statistic,
+        key: &KeyByLabelValues,
+        _query_kwargs: Option<&HashMap<String, String>>,
+    ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+        match statistic {
+            Statistic::Sum | Statistic::Count => self
+                .sums
+                .get(key)
+                .copied()
+                .ok_or_else(|| "Key not found in MultipleSumAccumulator".to_string().into()),
+            _ => Err(
+                format!("Unsupported statistic in MultipleSumAccumulator: {statistic:?}").into(),
+            ),
+        }
+    }
+
+    fn clone_boxed(&self) -> Box<dyn MultipleSubpopulationAggregate> {
+        Box::new(self.clone())
+    }
+}
+
+// Factory implementation for merging
+pub struct MultipleSumAccumulatorFactory;
+
+impl MultipleSubpopulationAggregateFactory for MultipleSumAccumulatorFactory {
+    fn merge_accumulators(
+        &self,
+        accumulators: Vec<Box<dyn MultipleSubpopulationAggregate>>,
+    ) -> Result<Box<dyn MultipleSubpopulationAggregate>, Box<dyn std::error::Error + Send + Sync>>
+    {
+        let mut merged_sums = HashMap::new();
+
+        for acc in accumulators {
+            if acc.type_name() != "MultipleSumAccumulator" {
+                return Err("Cannot merge different accumulator types".into());
+            }
+
+            // Get keys and merge values
+            let keys = acc.get_keys().unwrap();
+            for key in keys {
+                let value = acc.query(Statistic::Sum, &key, None)?;
+                *merged_sums.entry(key).or_insert(0.0) += value;
+            }
+        }
+
+        Ok(Box::new(MultipleSumAccumulator::new_with_sums(merged_sums)))
+    }
+
+    fn create_default(&self) -> Box<dyn MultipleSubpopulationAggregate> {
+        Box::new(MultipleSumAccumulator::new())
+    }
+}
+
+impl MergeableAccumulator<MultipleSumAccumulator> for MultipleSumAccumulator {
+    fn merge_accumulators(
+        accumulators: Vec<MultipleSumAccumulator>,
+    ) -> Result<MultipleSumAccumulator, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+
+        let mut result = MultipleSumAccumulator::new();
+
+        for acc in accumulators {
+            for (key, sum) in acc.sums {
+                *result.sums.entry(key).or_insert(0.0) += sum;
+            }
+        }
+
+        Ok(result)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::vec;
+
+    use super::*;
+
+    #[test]
+    fn test_multiple_sum_accumulator_creation() {
+        let acc = MultipleSumAccumulator::new();
+        assert!(acc.sums.is_empty());
+    }
+
+    #[test]
+    fn test_multiple_sum_accumulator_update() {
+        let mut acc = MultipleSumAccumulator::new();
+
+        let key1 = KeyByLabelValues::new_with_labels(vec!["web".to_string()]);
+
+        let key2 = KeyByLabelValues::new_with_labels(vec!["api".to_string()]);
+
+        acc.update(key1.clone(), 10.0);
+        acc.update(key2.clone(), 20.0);
+        acc.update(key1.clone(), 5.0); // Should add to existing
+
+        assert_eq!(acc.sums.get(&key1), Some(&15.0));
+        assert_eq!(acc.sums.get(&key2), Some(&20.0));
+    }
+
+    #[test]
+    fn test_multiple_sum_accumulator_query() {
+        let mut acc = MultipleSumAccumulator::new();
+
+        let key = KeyByLabelValues::new_with_labels(vec!["service".to_string()]);
+
+        acc.add_sum(key.clone(), 42.0);
+
+        // Test total queries (querying with the specific key)
+        assert_eq!(
+            crate::MultipleSubpopulationAggregate::query(&acc, Statistic::Sum, &key, None).unwrap(),
+            42.0
+        );
+
+        // Test error cases
+        assert!(
+            crate::MultipleSubpopulationAggregate::query(&acc, Statistic::Min, &key, None).is_err()
+        );
+    }
+
+    #[test]
+    fn test_multiple_sum_accumulator_get_keys() {
+        let mut acc = MultipleSumAccumulator::new();
+
+        let key1 = KeyByLabelValues::new_with_labels(vec!["web".to_string()]);
+
+        let key2 = KeyByLabelValues::new_with_labels(vec!["api".to_string()]);
+
+        acc.add_sum(key1.clone(), 10.0);
+        acc.add_sum(key2.clone(), 20.0);
+
+        let keys = crate::AggregateCore::get_keys(&acc).unwrap();
+        assert_eq!(keys.len(), 2);
+        assert!(keys.contains(&key1));
+        assert!(keys.contains(&key2));
+    }
+
+    #[test]
+    fn test_multiple_sum_accumulator_merge() {
+        let mut acc1 = MultipleSumAccumulator::new();
+        let mut acc2 = MultipleSumAccumulator::new();
+
+        let key1 = KeyByLabelValues::new_with_labels(vec!["web".to_string()]);
+
+        let key2 = KeyByLabelValues::new_with_labels(vec!["api".to_string()]);
+
+        acc1.add_sum(key1.clone(), 10.0);
+        acc1.add_sum(key2.clone(), 20.0);
+
+        acc2.add_sum(key1.clone(), 5.0); // Same key, different accumulator
+
+        let merged = <MultipleSumAccumulator as MergeableAccumulator<MultipleSumAccumulator>>::merge_accumulators(vec![acc1, acc2]).unwrap();
+
+        assert_eq!(merged.sums.get(&key1), Some(&15.0)); // Should be merged
+        assert_eq!(merged.sums.get(&key2), Some(&20.0)); // Should be preserved
+    }
+
+    #[test]
+    fn test_multiple_sum_accumulator_serialization() {
+        let mut acc = MultipleSumAccumulator::new();
+
+        let key = KeyByLabelValues::new_with_labels(vec!["service".to_string()]);
+
+        acc.add_sum(key.clone(), 42.5);
+
+        // Test JSON serialization
+        let json = acc.serialize_to_json();
+        let deserialized = MultipleSumAccumulator::deserialize_from_json(&json).unwrap();
+        assert_eq!(deserialized.sums.get(&key), Some(&42.5));
+
+        // Test byte serialization
+        let bytes = acc.serialize_to_bytes();
+        let deserialized_bytes = MultipleSumAccumulator::deserialize_from_bytes(&bytes).unwrap();
+        assert_eq!(deserialized_bytes.sums.get(&key), Some(&42.5));
+    }
+
+    #[test]
+    fn test_trait_object() {
+        let mut acc = MultipleSumAccumulator::new();
+
+        let key = KeyByLabelValues::new_with_labels(vec!["web".to_string()]);
+
+        acc.add_sum(key.clone(), 42.0);
+
+        let trait_obj: Box<dyn AggregateCore> = Box::new(acc);
+
+        // Test type name through trait object
+        assert_eq!(trait_obj.type_name(), "MultipleSumAccumulator");
+    }
+}
diff --git a/QueryEngineRust/src/precompute_operators/set_aggregator_accumulator.rs b/QueryEngineRust/src/precompute_operators/set_aggregator_accumulator.rs
new file mode 100644
index 0000000..80cc628
--- /dev/null
+++ b/QueryEngineRust/src/precompute_operators/set_aggregator_accumulator.rs
@@ -0,0 +1,349 @@
+use crate::data_model::{
+    AggregateCore, KeyByLabelValues, MergeableAccumulator, MultipleSubpopulationAggregate,
+    SerializableToSink,
+};
+use serde_json::Value;
+use sketch_core::set_aggregator::SetAggregator;
+use std::collections::{HashMap, HashSet};
+
+use promql_utilities::query_logics::enums::Statistic;
+
+/// Set aggregator accumulator — wraps sketch_core::SetAggregator.
+/// Core struct, merge/serde logic live in sketch-core.
+/// This file retains QE-specific trait impls, KeyByLabelValues conversion,
+/// and legacy deserializers.
+#[derive(Debug, Clone)]
+pub struct SetAggregatorAccumulator {
+    pub added: HashSet<KeyByLabelValues>,
+}
+
+impl SetAggregatorAccumulator {
+    pub fn new() -> Self {
+        Self {
+            added: HashSet::new(),
+        }
+    }
+
+    pub fn with_added(added: HashSet<KeyByLabelValues>) -> Self {
+        Self { added }
+    }
+
+    pub fn add_key(&mut self, key: KeyByLabelValues) {
+        self.added.insert(key);
+    }
+
+    pub fn deserialize_from_json(data: &Value) -> Result<Self, Box<dyn std::error::Error>> {
+        let mut added = HashSet::new();
+
+        if let Some(added_array) = data["added"].as_array() {
+            for item in added_array {
+                let key_data = if let Some(values) = item.get("values") {
+                    values
+                } else {
+                    item
+                };
+                let key = KeyByLabelValues::deserialize_from_json(key_data)?;
+                added.insert(key);
+            }
+        }
+
+        Ok(Self { added })
+    }
+
+    pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        let mut offset = 0;
+        let mut added = HashSet::new();
+
+        if offset + 4 > buffer.len() {
+            return Err("Buffer too short for added set size".into());
+        }
+        let added_size = u32::from_le_bytes([
+            buffer[offset],
+            buffer[offset + 1],
+            buffer[offset + 2],
+            buffer[offset + 3],
+        ]) as usize;
+        offset += 4;
+
+        for _ in 0..added_size {
+            if offset + 4 > buffer.len() {
+                return Err("Buffer too short for added item size".into());
+            }
+            let item_size = u32::from_le_bytes([
+                buffer[offset],
+                buffer[offset + 1],
+                buffer[offset + 2],
+                buffer[offset + 3],
+            ]) as usize;
+            offset += 4;
+
+            if offset + item_size > buffer.len() {
+                return Err("Buffer too short for added item data".into());
+            }
+            let key =
+                KeyByLabelValues::deserialize_from_bytes(&buffer[offset..offset + item_size])?;
+            offset += item_size;
+            added.insert(key);
+        }
+
+        Ok(Self { added })
+    }
+
+    pub fn deserialize_from_bytes_arroyo(
+        buffer: &[u8],
+    ) -> Result<Self, Box<dyn std::error::Error>> {
+        let sa = SetAggregator::deserialize_msgpack(buffer)?;
+        let added = sa
+            .values
+            .into_iter()
+            .map(|s| KeyByLabelValues::from_semicolon_str(&s))
+            .collect();
+        Ok(Self { added })
+    }
+
+    /// Serialize to Arroyo-compatible format (MessagePack StringSet).
+    /// Delegates to sketch-core's canonical wire format.
+    pub fn serialize_to_bytes_arroyo(&self) -> Vec<u8> {
+        let mut sa = SetAggregator::new();
+        for key in &self.added {
+            sa.insert(&key.to_semicolon_str());
+        }
+        sa.serialize_msgpack()
+    }
+}
+
+impl Default for SetAggregatorAccumulator {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SerializableToSink for SetAggregatorAccumulator {
+    fn serialize_to_json(&self) -> Value {
+        let added_json: Vec<Value> = self
+            .added
+            .iter()
+            .map(|key| key.serialize_to_json())
+            .collect();
+        serde_json::json!({ "added": added_json })
+    }
+
+    fn serialize_to_bytes(&self) -> Vec<u8> {
+        // Legacy binary format (Flink); matches deserialize_from_bytes().
+        let mut buffer = Vec::new();
+        buffer.extend_from_slice(&(self.added.len() as u32).to_le_bytes());
+        for key in &self.added {
+            let key_bytes = key.serialize_to_bytes();
+            buffer.extend_from_slice(&(key_bytes.len() as u32).to_le_bytes());
+            buffer.extend_from_slice(&key_bytes);
+        }
+        buffer
+    }
+}
+
+impl AggregateCore for SetAggregatorAccumulator {
+    fn type_name(&self) -> &'static str {
+        "SetAggregatorAccumulator"
+    }
+
+    fn clone_boxed_core(&self) -> Box<dyn AggregateCore> {
+        Box::new(self.clone())
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn merge_with(
+        &self,
+        other: &dyn AggregateCore,
+    ) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error + Send + Sync>> {
+        if other.get_accumulator_type() != self.get_accumulator_type() {
+            return Err(format!(
+                "Cannot merge SetAggregatorAccumulator with {}",
+                other.get_accumulator_type()
+            )
+            .into());
+        }
+
+        let other_set = other
+            .as_any()
+            .downcast_ref::<SetAggregatorAccumulator>()
+            .ok_or("Failed to downcast to SetAggregatorAccumulator")?;
+
+        let merged = Self::merge_accumulators(vec![self.clone(), other_set.clone()])?;
+        Ok(Box::new(merged))
+    }
+
+    fn get_accumulator_type(&self) -> &'static str {
+        "SetAggregatorAccumulator"
+    }
+
+    fn get_keys(&self) -> Option<Vec<KeyByLabelValues>> {
+        Some(self.added.iter().cloned().collect())
+    }
+}
+
+impl MultipleSubpopulationAggregate for SetAggregatorAccumulator {
+    fn query(
+        &self,
+        _statistic: Statistic,
+        _key: &KeyByLabelValues,
+        _query_kwargs: Option<&HashMap<String, String>>,
+    ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+        Err("SetAggregatorAccumulator does not support query operation".into())
+    }
+
+    fn clone_boxed(&self) -> Box<dyn MultipleSubpopulationAggregate> {
+        Box::new(self.clone())
+    }
+}
+
+impl MergeableAccumulator<SetAggregatorAccumulator> for SetAggregatorAccumulator {
+    fn merge_accumulators(
+        accumulators: Vec<SetAggregatorAccumulator>,
+    ) -> Result<SetAggregatorAccumulator, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+
+        let mut merged = SetAggregatorAccumulator::new();
+        for accumulator in accumulators {
+            merged.added.extend(accumulator.added);
+        }
+
+        Ok(merged)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn create_test_key(service: &str) -> KeyByLabelValues {
+        KeyByLabelValues::new_with_labels(vec![service.to_string()])
+    }
+
+    #[test]
+    fn test_set_aggregator_creation() {
+        let acc = SetAggregatorAccumulator::new();
+        assert!(acc.added.is_empty());
+    }
+
+    #[test]
+    fn test_set_aggregator_add() {
+        let mut acc = SetAggregatorAccumulator::new();
+        let key1 = create_test_key("web");
+        let key2 = create_test_key("api");
+        acc.add_key(key1.clone());
+        acc.add_key(key2.clone());
+        assert_eq!(acc.added.len(), 2);
+        assert!(acc.added.contains(&key1));
+        assert!(acc.added.contains(&key2));
+    }
+
+    #[test]
+    fn test_set_aggregator_get_keys() {
+        let mut acc = SetAggregatorAccumulator::new();
+        let key1 = create_test_key("web");
+        let key2 = create_test_key("api");
+        acc.add_key(key1.clone());
+        acc.add_key(key2.clone());
+        let keys = acc.get_keys().unwrap();
+        assert_eq!(keys.len(), 2);
+        assert!(keys.contains(&key1));
+        assert!(keys.contains(&key2));
+    }
+
+    #[test]
+    fn test_set_aggregator_merge() {
+        let mut acc1 = SetAggregatorAccumulator::new();
+        let mut acc2 = SetAggregatorAccumulator::new();
+        let key1 = create_test_key("web");
+        let key2 = create_test_key("api");
+        let key3 = create_test_key("db");
+        acc1.add_key(key1.clone());
+        acc1.add_key(key2.clone());
+        acc2.add_key(key2.clone());
+        acc2.add_key(key3.clone());
+        let merged = SetAggregatorAccumulator::merge_accumulators(vec![acc1, acc2]).unwrap();
+        assert_eq!(merged.added.len(), 3);
+        assert!(merged.added.contains(&key1));
+        assert!(merged.added.contains(&key2));
+        assert!(merged.added.contains(&key3));
+    }
+
+    #[test]
+    fn test_set_aggregator_query() {
+        let acc = SetAggregatorAccumulator::new();
+        let key = create_test_key("test");
+        assert!(acc.query(Statistic::Sum, &key, None).is_err());
+    }
+
+    #[test]
+    fn test_set_aggregator_serialization() {
+        let mut acc = SetAggregatorAccumulator::new();
+        let key1 = create_test_key("web");
+        let key2 = create_test_key("api");
+        acc.add_key(key1.clone());
+        acc.add_key(key2.clone());
+
+        // Test JSON serialization
+        let json_value = acc.serialize_to_json();
+        let deserialized = SetAggregatorAccumulator::deserialize_from_json(&json_value).unwrap();
+        assert_eq!(deserialized.added.len(), 2);
+        assert!(deserialized.added.contains(&key1));
+        assert!(deserialized.added.contains(&key2));
+
+        // Test binary serialization
+        let bytes = acc.serialize_to_bytes();
+        let deserialized_bytes = SetAggregatorAccumulator::deserialize_from_bytes(&bytes).unwrap();
+        assert_eq!(deserialized_bytes.added.len(), 2);
+        assert!(deserialized_bytes.added.contains(&key1));
+        assert!(deserialized_bytes.added.contains(&key2));
+    }
+
+    #[test]
+    fn test_trait_object() {
+        let mut acc = SetAggregatorAccumulator::new();
+        let key = create_test_key("web");
+        acc.add_key(key.clone());
+        let trait_obj: Box<dyn AggregateCore> = Box::new(acc);
+        assert_eq!(trait_obj.type_name(), "SetAggregatorAccumulator");
+
+        let multi_trait_obj: Box<dyn MultipleSubpopulationAggregate> =
+            Box::new(SetAggregatorAccumulator::new());
+        let keys = multi_trait_obj.get_keys().unwrap();
+        assert_eq!(keys.len(), 0);
+    }
+
+    #[test]
+    fn test_arroyo_roundtrip() {
+        // Verify serialize_to_bytes_arroyo / deserialize_from_bytes_arroyo round-trip.
+        // Both now delegate to sketch-core's SetAggregator which uses the same
+        // StringSet { values: HashSet<String> } format as Arroyo's setaggregator_ UDF.
+        let mut acc = SetAggregatorAccumulator::new();
+        acc.add_key(KeyByLabelValues::new_with_labels(vec![
+            "web".to_string(),
+            "prod".to_string(),
+        ]));
+        acc.add_key(KeyByLabelValues::new_with_labels(vec!["api".to_string()]));
+
+        let bytes = acc.serialize_to_bytes_arroyo();
+        let deserialized = SetAggregatorAccumulator::deserialize_from_bytes_arroyo(&bytes).expect(
+            "deserialize_from_bytes_arroyo failed — format mismatch with serialize_to_bytes_arroyo",
+        );
+
+        assert_eq!(
+            deserialized.added.len(),
+            acc.added.len(),
+            "roundtrip changed the number of keys"
+        );
+        for key in &acc.added {
+            assert!(
+                deserialized.added.contains(key),
+                "key {key:?} missing after arroyo roundtrip"
+            );
+        }
+    }
+}
diff --git a/QueryEngineRust/src/precompute_operators/sum_accumulator.rs b/QueryEngineRust/src/precompute_operators/sum_accumulator.rs
new file mode 100644
index 0000000..66f03a4
--- /dev/null
+++ b/QueryEngineRust/src/precompute_operators/sum_accumulator.rs
@@ -0,0 +1,261 @@
+use crate::data_model::{
+    AggregateCore, MergeableAccumulator, SerializableToSink, SingleSubpopulationAggregate,
+    SingleSubpopulationAggregateFactory,
+};
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use std::collections::HashMap;
+
+use promql_utilities::query_logics::enums::Statistic;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SumAccumulator {
+    pub sum: f64,
+}
+
+impl SumAccumulator {
+    pub fn new() -> Self {
+        Self { sum: 0.0 }
+    }
+
+    pub fn with_sum(sum: f64) -> Self {
+        Self { sum }
+    }
+
+    pub fn update(&mut self, value: f64) {
+        self.sum += value;
+    }
+
+    pub fn deserialize_from_json(data: &Value) -> Result<Self, Box<dyn std::error::Error>> {
+        let sum = data["sum"]
+            .as_f64()
+            .ok_or("Missing or invalid 'sum' field")?;
+        Ok(Self::with_sum(sum))
+    }
+
+    pub fn deserialize_from_bytes(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        if buffer.len() < 4 {
+            return Err("Buffer too short for f32".into());
+        }
+        // Python uses struct.pack("<f", self.sum) which is 4-byte little-endian float
+        let sum = f32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]) as f64;
+        Ok(Self::with_sum(sum))
+    }
+
+    pub fn deserialize_from_bytes_arroyo(
+        buffer: &[u8],
+    ) -> Result<Self, Box<dyn std::error::Error>> {
+        // Arroyo uses MessagePack format
+        let sum: f64 = rmp_serde::from_slice(buffer)
+            .map_err(|e| format!("Failed to deserialize from MessagePack: {e}"))?;
+        Ok(Self::with_sum(sum))
+    }
+
+    /// Serialize to Arroyo-compatible format (MessagePack f64)
+    pub fn serialize_to_bytes_arroyo(&self) -> Vec<u8> {
+        rmp_serde::to_vec(&self.sum).expect("Failed to serialize sum to MessagePack")
+    }
+}
+
+impl Default for SumAccumulator {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SerializableToSink for SumAccumulator {
+    fn serialize_to_json(&self) -> Value {
+        serde_json::json!({
+            "sum": self.sum
+        })
+    }
+
+    fn serialize_to_bytes(&self) -> Vec<u8> {
+        // Match Python's struct.pack("<f", self.sum) - 4-byte little-endian float
+        (self.sum as f32).to_le_bytes().to_vec()
+    }
+}
+
+impl AggregateCore for SumAccumulator {
+    fn clone_boxed_core(&self) -> Box<dyn AggregateCore> {
+        Box::new(self.clone())
+    }
+
+    fn type_name(&self) -> &'static str {
+        "SumAccumulator"
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn merge_with(
+        &self,
+        other: &dyn AggregateCore,
+    ) -> Result<Box<dyn AggregateCore>, Box<dyn std::error::Error + Send + Sync>> {
+        // Check if other is also a SumAccumulator
+        if other.get_accumulator_type() != self.get_accumulator_type() {
+            return Err(format!(
+                "Cannot merge SumAccumulator with {}",
+                other.get_accumulator_type()
+            )
+            .into());
+        }
+
+        // Downcast to SumAccumulator
+        let other_sum = other
+            .as_any()
+            .downcast_ref::<SumAccumulator>()
+            .ok_or("Failed to downcast to SumAccumulator")?;
+
+        // Use the existing merge_accumulators method
+        let merged = Self::merge_accumulators(vec![self.clone(), other_sum.clone()])?;
+
+        Ok(Box::new(merged))
+    }
+
+    fn get_accumulator_type(&self) -> &'static str {
+        "SumAccumulator"
+    }
+
+    fn get_keys(&self) -> Option<Vec<crate::KeyByLabelValues>> {
+        None
+    }
+}
+
+impl SingleSubpopulationAggregate for SumAccumulator {
+    fn query(
+        &self,
+        statistic: Statistic,
+        query_kwargs: Option<&HashMap<String, String>>,
+    ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+        // SumAccumulator doesn't use query_kwargs, assert it's None
+        if query_kwargs.is_some() {
+            return Err("SumAccumulator does not support query parameters".into());
+        }
+
+        match statistic {
+            Statistic::Sum | Statistic::Count => Ok(self.sum),
+            _ => Err(format!("Unsupported statistic in SumAccumulator: {statistic:?}").into()),
+        }
+    }
+
+    fn clone_boxed(&self) -> Box<dyn SingleSubpopulationAggregate> {
+        Box::new(self.clone())
+    }
+}
+
+// Factory implementation for merging
+pub struct SumAccumulatorFactory;
+
+impl SingleSubpopulationAggregateFactory for SumAccumulatorFactory {
+    fn merge_accumulators(
+        &self,
+        accumulators: Vec<Box<dyn SingleSubpopulationAggregate>>,
+    ) -> Result<Box<dyn SingleSubpopulationAggregate>, Box<dyn std::error::Error + Send + Sync>>
+    {
+        let mut total_sum = 0.0;
+
+        for acc in accumulators {
+            if acc.type_name() != "SumAccumulator" {
+                return Err("Cannot merge different accumulator types".into());
+            }
+            let sum_value = acc.query(Statistic::Sum, None)?;
+            total_sum += sum_value;
+        }
+
+        Ok(Box::new(SumAccumulator::with_sum(total_sum)))
+    }
+
+    fn create_default(&self) -> Box<dyn SingleSubpopulationAggregate> {
+        Box::new(SumAccumulator::new())
+    }
+}
+
+impl MergeableAccumulator<SumAccumulator> for SumAccumulator {
+    fn merge_accumulators(
+        accumulators: Vec<SumAccumulator>,
+    ) -> Result<SumAccumulator, Box<dyn std::error::Error + Send + Sync>> {
+        let total_sum = accumulators.iter().map(|acc| acc.sum).sum();
+        Ok(SumAccumulator::with_sum(total_sum))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_sum_accumulator_creation() {
+        let acc = SumAccumulator::new();
+        assert_eq!(acc.sum, 0.0);
+
+        let acc2 = SumAccumulator::with_sum(42.5);
+        assert_eq!(acc2.sum, 42.5);
+    }
+
+    #[test]
+    fn test_sum_accumulator_update() {
+        let mut acc = SumAccumulator::new();
+        acc.update(10.0);
+        acc.update(20.0);
+        assert_eq!(acc.sum, 30.0);
+    }
+
+    #[test]
+    fn test_sum_accumulator_query() {
+        let acc = SumAccumulator::with_sum(42.0);
+
+        assert_eq!(
+            crate::SingleSubpopulationAggregate::query(&acc, Statistic::Sum, None).unwrap(),
+            42.0
+        );
+        assert_eq!(
+            crate::SingleSubpopulationAggregate::query(&acc, Statistic::Count, None).unwrap(),
+            42.0
+        );
+
+        assert!(crate::SingleSubpopulationAggregate::query(&acc, Statistic::Min, None).is_err());
+        // SumAccumulator is a single subpopulation accumulator, doesn't need key-based queries
+        assert_eq!(
+            crate::SingleSubpopulationAggregate::query(&acc, Statistic::Sum, None).unwrap(),
+            42.0
+        );
+    }
+
+    #[test]
+    fn test_sum_accumulator_merge() {
+        let acc1 = SumAccumulator::with_sum(10.0);
+        let acc2 = SumAccumulator::with_sum(20.0);
+        let acc3 = SumAccumulator::with_sum(30.0);
+
+        let merged =
+            <SumAccumulator as MergeableAccumulator<SumAccumulator>>::merge_accumulators(vec![
+                acc1, acc2, acc3,
+            ])
+            .unwrap();
+        assert_eq!(merged.sum, 60.0);
+    }
+
+    #[test]
+    fn test_sum_accumulator_serialization() {
+        let acc = SumAccumulator::with_sum(42.5);
+
+        // Test JSON serialization
+        let json = acc.serialize_to_json();
+        let deserialized = SumAccumulator::deserialize_from_json(&json).unwrap();
+        assert_eq!(acc.sum, deserialized.sum);
+
+        // Test byte serialization
+        let bytes = acc.serialize_to_bytes();
+        let deserialized_bytes = SumAccumulator::deserialize_from_bytes(&bytes).unwrap();
+        assert_eq!(acc.sum, deserialized_bytes.sum);
+    }
+
+    #[test]
+    fn test_trait_object() {
+        let acc: Box<dyn AggregateCore> = Box::new(SumAccumulator::with_sum(42.0));
+
+        assert_eq!(acc.type_name(), "SumAccumulator");
+    }
+}
diff --git a/QueryEngineRust/src/stores/mod.rs b/QueryEngineRust/src/stores/mod.rs
new file mode 100644
index 0000000..462a50d
--- /dev/null
+++ b/QueryEngineRust/src/stores/mod.rs
@@ -0,0 +1,7 @@
+pub mod promsketch_store;
+pub mod simple_map_store;
+pub mod traits;
+
+// pub use promsketch_store::PromSketchStore;
+pub use simple_map_store::SimpleMapStore;
+pub use traits::*;
diff --git a/QueryEngineRust/src/stores/promsketch_store/config.rs b/QueryEngineRust/src/stores/promsketch_store/config.rs
new file mode 100644
index 0000000..498751b
--- /dev/null
+++ b/QueryEngineRust/src/stores/promsketch_store/config.rs
@@ -0,0 +1,154 @@
+use anyhow::{Context, Result};
+use serde::Deserialize;
+
+/// Configuration for ExponentialHistogram wrapping UnivMon sketches.
+#[derive(Clone, Debug, Deserialize)]
+pub struct EHUnivConfig {
+    /// Number of EH buckets (k parameter for ExponentialHistogram).
+    pub k: usize,
+    /// Time window size in milliseconds.
+    pub time_window: u64,
+}
+
+impl Default for EHUnivConfig {
+    fn default() -> Self {
+        Self {
+            k: 50,
+            time_window: 1_000_000,
+        }
+    }
+}
+
+/// Configuration for ExponentialHistogram wrapping KLL sketches.
+#[derive(Clone, Debug, Deserialize)]
+pub struct EHKLLConfig {
+    /// Number of EH buckets (k parameter for ExponentialHistogram).
+    pub k: usize,
+    /// KLL sketch k parameter (controls accuracy vs memory).
+    pub kll_k: i32,
+    /// Time window size in milliseconds.
+    pub time_window: u64,
+}
+
+impl Default for EHKLLConfig {
+    fn default() -> Self {
+        Self {
+            k: 50,
+            kll_k: 256,
+            time_window: 1_000_000,
+        }
+    }
+}
+
+/// Configuration for ExponentialHistogram wrapping UniformSampling sketches.
+#[derive(Clone, Debug, Deserialize)]
+pub struct SamplingConfig {
+    /// Fraction of data points to sample (0.0 to 1.0).
+    pub sample_rate: f64,
+    /// Time window size in milliseconds.
+    pub time_window: u64,
+}
+
+impl Default for SamplingConfig {
+    fn default() -> Self {
+        Self {
+            sample_rate: 0.2,
+            time_window: 1_000_000,
+        }
+    }
+}
+
+/// Bundled configuration for all sketch types.
+#[derive(Clone, Debug, Default, Deserialize)]
+pub struct PromSketchConfig {
+    pub eh_univ: EHUnivConfig,
+    pub eh_kll: EHKLLConfig,
+    pub sampling: SamplingConfig,
+}
+
+impl PromSketchConfig {
+    /// Load a PromSketchConfig from a YAML file.
+    pub fn from_yaml_file(path: &str) -> Result<Self> {
+        let contents = std::fs::read_to_string(path)
+            .with_context(|| format!("Failed to read sketch config file: {path}"))?;
+        let config: PromSketchConfig = serde_yaml::from_str(&contents)
+            .with_context(|| format!("Failed to parse sketch config YAML from: {path}"))?;
+        Ok(config)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_default_config_values() {
+        let config = PromSketchConfig::default();
+
+        assert_eq!(config.eh_univ.k, 50);
+        assert_eq!(config.eh_univ.time_window, 1_000_000);
+
+        assert_eq!(config.eh_kll.k, 50);
+        assert_eq!(config.eh_kll.kll_k, 256);
+        assert_eq!(config.eh_kll.time_window, 1_000_000);
+
+        assert!((config.sampling.sample_rate - 0.2).abs() < f64::EPSILON);
+        assert_eq!(config.sampling.time_window, 1_000_000);
+    }
+
+    #[test]
+    fn test_yaml_deserialization() {
+        let yaml = r#"
+eh_univ:
+  k: 100
+  time_window: 2000000
+eh_kll:
+  k: 80
+  kll_k: 512
+  time_window: 3000000
+sampling:
+  sample_rate: 0.5
+  time_window: 4000000
+"#;
+        let config: PromSketchConfig = serde_yaml::from_str(yaml).unwrap();
+        assert_eq!(config.eh_univ.k, 100);
+        assert_eq!(config.eh_univ.time_window, 2_000_000);
+        assert_eq!(config.eh_kll.k, 80);
+        assert_eq!(config.eh_kll.kll_k, 512);
+        assert_eq!(config.eh_kll.time_window, 3_000_000);
+        assert!((config.sampling.sample_rate - 0.5).abs() < f64::EPSILON);
+        assert_eq!(config.sampling.time_window, 4_000_000);
+    }
+
+    #[test]
+    fn test_from_yaml_file() {
+        use std::io::Write;
+        use tempfile::NamedTempFile;
+
+        let yaml = r#"
+eh_univ:
+  k: 30
+  time_window: 500000
+eh_kll:
+  k: 40
+  kll_k: 128
+  time_window: 600000
+sampling:
+  sample_rate: 0.1
+  time_window: 700000
+"#;
+        let mut tmp = NamedTempFile::new().unwrap();
+        write!(tmp, "{}", yaml).unwrap();
+
+        let config = PromSketchConfig::from_yaml_file(tmp.path().to_str().unwrap()).unwrap();
+        assert_eq!(config.eh_univ.k, 30);
+        assert_eq!(config.eh_kll.kll_k, 128);
+        assert!((config.sampling.sample_rate - 0.1).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn test_from_yaml_file_nonexistent() {
+        let result = PromSketchConfig::from_yaml_file("/nonexistent/path.yaml");
+        assert!(result.is_err());
+    }
+}
diff --git a/QueryEngineRust/src/stores/promsketch_store/metrics.rs b/QueryEngineRust/src/stores/promsketch_store/metrics.rs
new file mode 100644
index 0000000..b9eb904
--- /dev/null
+++ b/QueryEngineRust/src/stores/promsketch_store/metrics.rs
@@ -0,0 +1,43 @@
+use lazy_static::lazy_static;
+use prometheus::{
+    register_counter, register_counter_vec, register_gauge, register_histogram, Counter,
+    CounterVec, Gauge, Histogram,
+};
+
+lazy_static! {
+    /// Number of live series in the PromSketchStore.
+    pub static ref SERIES_TOTAL: Gauge =
+        register_gauge!("promsketch_series_total", "Number of live series in store").unwrap();
+
+    /// Total raw samples successfully inserted.
+    pub static ref SAMPLES_INGESTED_TOTAL: Counter =
+        register_counter!("promsketch_samples_ingested_total", "Total raw samples inserted").unwrap();
+
+    /// Failed sample insertions.
+    pub static ref INGEST_ERRORS_TOTAL: Counter =
+        register_counter!("promsketch_ingest_errors_total", "Failed sample insertions").unwrap();
+
+    /// Time to flush a batch of raw samples.
+    pub static ref INGEST_BATCH_DURATION: Histogram =
+        register_histogram!(
+            "promsketch_ingest_batch_duration_seconds",
+            "Time to flush a batch of raw samples",
+            vec![0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0]
+        ).unwrap();
+
+    /// Sketch queries executed, labeled by result (hit or miss).
+    pub static ref SKETCH_QUERIES_TOTAL: CounterVec =
+        register_counter_vec!(
+            "promsketch_sketch_queries_total",
+            "Sketch queries executed",
+            &["result"]
+        ).unwrap();
+
+    /// Sketch query evaluation latency.
+    pub static ref SKETCH_QUERY_DURATION: Histogram =
+        register_histogram!(
+            "promsketch_sketch_query_duration_seconds",
+            "Sketch query eval latency",
+            vec![0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0]
+        ).unwrap();
+}
diff --git a/QueryEngineRust/src/stores/promsketch_store/mod.rs b/QueryEngineRust/src/stores/promsketch_store/mod.rs
new file mode 100644
index 0000000..d2c67bf
--- /dev/null
+++ b/QueryEngineRust/src/stores/promsketch_store/mod.rs
@@ -0,0 +1,9 @@
+// pub mod config;
+// pub mod metrics;
+// pub mod query;
+// pub mod series;
+// mod store;
+// mod types;
+
+// pub use store::PromSketchStore;
+// pub use types::{is_usampling_function, promsketch_func_map, PromSketchType};
diff --git a/QueryEngineRust/src/stores/promsketch_store/query.rs b/QueryEngineRust/src/stores/promsketch_store/query.rs
new file mode 100644
index 0000000..e8316a2
--- /dev/null
+++ b/QueryEngineRust/src/stores/promsketch_store/query.rs
@@ -0,0 +1,286 @@
+// use sketchlib_rust::{EHSketchList, SketchInput, UniformSampling};
+
+// use super::series::PromSketchMemSeries;
+
+// /// Evaluate a PromQL aggregation function over sketches for a given time range.
+// ///
+// /// # Arguments
+// /// * `func_name` - PromQL function name (e.g. "quantile_over_time")
+// /// * `series` - The PromSketchMemSeries containing sketch instances
+// /// * `args` - Extra argument (e.g. quantile phi value)
+// /// * `mint` - Start of query time range (milliseconds)
+// /// * `maxt` - End of query time range (milliseconds)
+// pub fn eval_function(
+//     func_name: &str,
+//     series: &PromSketchMemSeries,
+//     args: f64,
+//     mint: u64,
+//     maxt: u64,
+// ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+//     match func_name {
+//         "entropy_over_time" => eval_univmon(series, "entropy", mint, maxt),
+//         "distinct_over_time" => eval_univmon(series, "cardinality", mint, maxt),
+//         "l1_over_time" => eval_univmon(series, "l1", mint, maxt),
+//         "l2_over_time" => eval_univmon(series, "l2", mint, maxt),
+//         "quantile_over_time" => eval_kll_quantile(series, args, mint, maxt),
+//         "min_over_time" => eval_kll_quantile(series, 0.0, mint, maxt),
+//         "max_over_time" => eval_kll_quantile(series, 1.0, mint, maxt),
+//         "avg_over_time" => eval_sampling_stat(series, "avg", mint, maxt),
+//         "count_over_time" => eval_sampling_stat(series, "count", mint, maxt),
+//         "sum_over_time" => eval_sampling_stat(series, "sum", mint, maxt),
+//         "sum2_over_time" => eval_sampling_stat(series, "sum2", mint, maxt),
+//         "stddev_over_time" => eval_sampling_stat(series, "stddev", mint, maxt),
+//         "stdvar_over_time" => eval_sampling_stat(series, "stdvar", mint, maxt),
+//         _ => Err(format!("unsupported function: {}", func_name).into()),
+//     }
+// }
+
+// /// Evaluate UnivMon-based functions (entropy, cardinality, L1, L2)
+// /// using the optimized EHUnivOptimized backend.
+// fn eval_univmon(
+//     series: &PromSketchMemSeries,
+//     stat: &str,
+//     mint: u64,
+//     maxt: u64,
+// ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+//     let eh = series
+//         .sketch_instances
+//         .eh_univ
+//         .as_ref()
+//         .ok_or("eh_univ not initialized")?;
+
+//     let result = eh
+//         .query_interval(mint, maxt)
+//         .ok_or("no buckets cover the requested time range for UnivMon")?;
+
+//     match stat {
+//         "entropy" => Ok(result.calc_entropy()),
+//         "cardinality" => Ok(result.calc_card()),
+//         "l1" => Ok(result.calc_l1()),
+//         "l2" => Ok(result.calc_l2()),
+//         _ => Err(format!("unknown univmon stat: {}", stat).into()),
+//     }
+// }
+
+// /// Evaluate KLL-based functions (quantile, min, max).
+// fn eval_kll_quantile(
+//     series: &PromSketchMemSeries,
+//     phi: f64,
+//     mint: u64,
+//     maxt: u64,
+// ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+//     let eh = series
+//         .sketch_instances
+//         .eh_kll
+//         .as_ref()
+//         .ok_or("eh_kll not initialized")?;
+
+//     let merged = eh
+//         .query_interval_merge(mint, maxt)
+//         .ok_or("no volumes cover the requested time range for KLL")?;
+
+//     merged
+//         .query(&SketchInput::F64(phi))
+//         .map_err(|e| -> Box<dyn std::error::Error + Send + Sync> { e.into() })
+// }
+
+// /// Evaluate sampling-based functions (avg, count, sum, sum2, stddev, stdvar).
+// ///
+// /// Since sketchlib-rust's `UniformSampling` exposes `samples()` and `total_seen()`
+// /// but not dedicated query methods like the Go version, we compute statistics
+// /// from the raw merged samples.
+// fn eval_sampling_stat(
+//     series: &PromSketchMemSeries,
+//     stat: &str,
+//     mint: u64,
+//     maxt: u64,
+// ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+//     let eh = series
+//         .sketch_instances
+//         .eh_sampling
+//         .as_ref()
+//         .ok_or("eh_sampling not initialized")?;
+
+//     let merged = eh
+//         .query_interval_merge(mint, maxt)
+//         .ok_or("no volumes cover the requested time range for sampling")?;
+
+//     let sampler = match &merged {
+//         EHSketchList::UNIFORM(us) => us,
+//         _ => return Err("merged EHSketchList is not UniformSampling".into()),
+//     };
+
+//     compute_sampling_stat(sampler, stat)
+// }
+
+// /// Compute a statistic from a merged UniformSampling instance.
+// fn compute_sampling_stat(
+//     sampler: &UniformSampling,
+//     stat: &str,
+// ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+//     let samples = sampler.samples();
+//     if samples.is_empty() {
+//         return Err("no samples available".into());
+//     }
+
+//     let n = samples.len() as f64;
+//     let total_seen = sampler.total_seen() as f64;
+
+//     match stat {
+//         "count" => {
+//             // Estimate total count from sample rate
+//             Ok(total_seen)
+//         }
+//         "sum" => {
+//             let sample_sum: f64 = samples.iter().sum();
+//             // Scale up by (total_seen / n_samples) to estimate population sum
+//             Ok(sample_sum * (total_seen / n))
+//         }
+//         "sum2" => {
+//             let sample_sum2: f64 = samples.iter().map(|x| x * x).sum();
+//             Ok(sample_sum2 * (total_seen / n))
+//         }
+//         "avg" => {
+//             let sample_sum: f64 = samples.iter().sum();
+//             Ok(sample_sum / n)
+//         }
+//         "stddev" => {
+//             let mean = samples.iter().sum::<f64>() / n;
+//             let variance = samples.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n;
+//             Ok(variance.sqrt())
+//         }
+//         "stdvar" => {
+//             let mean = samples.iter().sum::<f64>() / n;
+//             let variance = samples.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n;
+//             Ok(variance)
+//         }
+//         _ => Err(format!("unknown sampling stat: {}", stat).into()),
+//     }
+// }
+
+// #[cfg(test)]
+// mod tests {
+//     use super::*;
+//     use crate::stores::promsketch_store::config::PromSketchConfig;
+//     use crate::stores::promsketch_store::series::PromSketchMemSeries;
+//     use crate::stores::promsketch_store::PromSketchType;
+
+//     fn create_test_series_with_kll_data() -> PromSketchMemSeries {
+//         let config = PromSketchConfig::default();
+//         let mut series = PromSketchMemSeries::new("test".to_string());
+//         series
+//             .sketch_instances
+//             .ensure_initialized(PromSketchType::EHKLL, &config);
+
+//         // Insert values 1..=100 at successive timestamps
+//         for i in 1..=100u64 {
+//             let input = SketchInput::F64(i as f64);
+//             if let Some(ref mut eh) = series.sketch_instances.eh_kll {
+//                 eh.update(i, &input);
+//             }
+//         }
+//         series
+//     }
+
+//     fn create_test_series_with_sampling_data() -> PromSketchMemSeries {
+//         let config = PromSketchConfig::default();
+//         let mut series = PromSketchMemSeries::new("test".to_string());
+//         series
+//             .sketch_instances
+//             .ensure_initialized(PromSketchType::USampling, &config);
+
+//         for i in 1..=1000u64 {
+//             let input = SketchInput::F64(i as f64);
+//             if let Some(ref mut eh) = series.sketch_instances.eh_sampling {
+//                 eh.update(i, &input);
+//             }
+//         }
+//         series
+//     }
+
+//     fn create_test_series_with_univmon_data() -> PromSketchMemSeries {
+//         let config = PromSketchConfig::default();
+//         let mut series = PromSketchMemSeries::new("test".to_string());
+//         series
+//             .sketch_instances
+//             .ensure_initialized(PromSketchType::EHUniv, &config);
+
+//         for i in 1..=100u64 {
+//             let input = SketchInput::F64(i as f64);
+//             if let Some(ref mut eh) = series.sketch_instances.eh_univ {
+//                 eh.update(i, &input, 1);
+//             }
+//         }
+//         series
+//     }
+
+//     #[test]
+//     fn test_eval_kll_quantile() {
+//         let series = create_test_series_with_kll_data();
+//         let result = eval_function("quantile_over_time", &series, 0.5, 1, 100);
+//         assert!(result.is_ok());
+//         let val = result.unwrap();
+//         // Median of 1..100 should be around 50
+//         assert!(val > 30.0 && val < 70.0, "median was {}", val);
+//     }
+
+//     #[test]
+//     fn test_eval_min_max() {
+//         let series = create_test_series_with_kll_data();
+
+//         let min_result = eval_function("min_over_time", &series, 0.0, 1, 100);
+//         assert!(min_result.is_ok());
+//         let min_val = min_result.unwrap();
+//         assert!(min_val <= 5.0, "min was {}", min_val);
+
+//         let max_result = eval_function("max_over_time", &series, 0.0, 1, 100);
+//         assert!(max_result.is_ok());
+//         let max_val = max_result.unwrap();
+//         assert!(max_val >= 95.0, "max was {}", max_val);
+//     }
+
+//     #[test]
+//     fn test_eval_sampling_avg() {
+//         let series = create_test_series_with_sampling_data();
+//         let result = eval_function("avg_over_time", &series, 0.0, 1, 1000);
+//         assert!(result.is_ok());
+//         let val = result.unwrap();
+//         // avg of 1..1000 should be around 500.5
+//         assert!(
+//             val > 300.0 && val < 700.0,
+//             "avg was {} (expected ~500.5)",
+//             val
+//         );
+//     }
+
+//     #[test]
+//     fn test_eval_sampling_count() {
+//         let series = create_test_series_with_sampling_data();
+//         let result = eval_function("count_over_time", &series, 0.0, 1, 1000);
+//         assert!(result.is_ok());
+//         let val = result.unwrap();
+//         // total_seen should be 1000
+//         assert!(
+//             val > 500.0 && val <= 1000.0,
+//             "count was {} (expected ~1000)",
+//             val
+//         );
+//     }
+
+//     #[test]
+//     fn test_eval_univmon_entropy() {
+//         let series = create_test_series_with_univmon_data();
+//         let result = eval_function("entropy_over_time", &series, 0.0, 1, 100);
+//         assert!(result.is_ok());
+//         // UnivMon entropy with small data can be 0; verify query dispatches correctly
+//         let val = result.unwrap();
+//         assert!(val >= 0.0, "entropy was {}", val);
+//     }
+
+//     #[test]
+//     fn test_unsupported_function() {
+//         let series = PromSketchMemSeries::new("test".to_string());
+//         let result = eval_function("nonexistent_func", &series, 0.0, 1, 100);
+//         assert!(result.is_err());
+//     }
+// }
diff --git a/QueryEngineRust/src/stores/promsketch_store/series.rs b/QueryEngineRust/src/stores/promsketch_store/series.rs
new file mode 100644
index 0000000..1a2514f
--- /dev/null
+++ b/QueryEngineRust/src/stores/promsketch_store/series.rs
@@ -0,0 +1,180 @@
+// use sketchlib_rust::{
+//     EHSketchList, EHUnivOptimized, ExponentialHistogram, SketchInput, UniformSampling, KLL,
+// };
+
+// use super::config::PromSketchConfig;
+// use super::PromSketchType;
+
+// /// Per-series sketch instances. Each field wraps a different EHSketchList type
+// /// inside an ExponentialHistogram for time-windowed merging.
+// pub struct PromSketchInstances {
+//     /// Optimized hybrid EH for UnivMon — entropy, cardinality, L1, L2, distinct.
+//     pub eh_univ: Option<EHUnivOptimized>,
+//     /// EH wrapping KLL — for quantile, min, max.
+//     pub eh_kll: Option<ExponentialHistogram>,
+//     /// EH wrapping UniformSampling — for avg, count, sum, stddev, stdvar.
+//     pub eh_sampling: Option<ExponentialHistogram>,
+// }
+
+// impl Default for PromSketchInstances {
+//     fn default() -> Self {
+//         Self::new()
+//     }
+// }
+
+// impl PromSketchInstances {
+//     pub fn new() -> Self {
+//         Self {
+//             eh_univ: None,
+//             eh_kll: None,
+//             eh_sampling: None,
+//         }
+//     }
+
+//     /// Lazily initialize the sketch for the given type if not already present.
+//     pub fn ensure_initialized(&mut self, stype: PromSketchType, config: &PromSketchConfig) {
+//         match stype {
+//             PromSketchType::EHUniv => {
+//                 if self.eh_univ.is_none() {
+//                     self.eh_univ = Some(EHUnivOptimized::with_defaults(
+//                         config.eh_univ.k,
+//                         config.eh_univ.time_window,
+//                     ));
+//                 }
+//             }
+//             PromSketchType::EHKLL => {
+//                 if self.eh_kll.is_none() {
+//                     let chapter = EHSketchList::KLL(KLL::init_kll(config.eh_kll.kll_k));
+//                     self.eh_kll = Some(ExponentialHistogram::new(
+//                         config.eh_kll.k,
+//                         config.eh_kll.time_window,
+//                         chapter,
+//                     ));
+//                 }
+//             }
+//             PromSketchType::USampling => {
+//                 if self.eh_sampling.is_none() {
+//                     let chapter =
+//                         EHSketchList::UNIFORM(UniformSampling::new(config.sampling.sample_rate));
+//                     self.eh_sampling = Some(ExponentialHistogram::new(
+//                         config.eh_kll.k,
+//                         config.sampling.time_window,
+//                         chapter,
+//                     ));
+//                 }
+//             }
+//         }
+//     }
+
+//     /// Insert a data point into all active sketches.
+//     pub fn insert(&mut self, time: u64, value: f64) {
+//         let input = SketchInput::F64(value);
+
+//         if let Some(ref mut eh) = self.eh_univ {
+//             // EHUnivOptimized::update(time, key, frequency_count)
+//             eh.update(time, &input, 1);
+//         }
+//         if let Some(ref mut eh) = self.eh_kll {
+//             eh.update(time, &input);
+//         }
+//         if let Some(ref mut eh) = self.eh_sampling {
+//             eh.update(time, &input);
+//         }
+//     }
+
+//     /// Check whether the sketch for the given type covers the time range.
+//     pub fn cover(&self, stype: PromSketchType, mint: u64, maxt: u64) -> bool {
+//         match stype {
+//             PromSketchType::EHUniv => self.eh_univ.as_ref().is_some_and(|eh| eh.cover(mint, maxt)),
+//             PromSketchType::EHKLL => self.eh_kll.as_ref().is_some_and(|eh| eh.cover(mint, maxt)),
+//             PromSketchType::USampling => self
+//                 .eh_sampling
+//                 .as_ref()
+//                 .is_some_and(|eh| eh.cover(mint, maxt)),
+//         }
+//     }
+// }
+
+// /// A single time series with its label string and associated sketch instances.
+// pub struct PromSketchMemSeries {
+//     pub labels: String,
+//     pub sketch_instances: PromSketchInstances,
+//     /// Earliest timestamp seen for this series (-1 means uninitialized).
+//     pub oldest_timestamp: i64,
+// }
+
+// impl PromSketchMemSeries {
+//     pub fn new(labels: String) -> Self {
+//         Self {
+//             labels,
+//             sketch_instances: PromSketchInstances::new(),
+//             oldest_timestamp: -1,
+//         }
+//     }
+
+//     /// Insert a data point, updating oldest_timestamp tracking.
+//     pub fn insert(&mut self, time: u64, value: f64) {
+//         if self.oldest_timestamp == -1 {
+//             self.oldest_timestamp = time as i64;
+//         }
+//         self.sketch_instances.insert(time, value);
+//     }
+// }
+
+// #[cfg(test)]
+// mod tests {
+//     use super::*;
+
+//     #[test]
+//     fn test_ensure_initialized_creates_correct_types() {
+//         let config = PromSketchConfig::default();
+//         let mut instances = PromSketchInstances::new();
+
+//         assert!(instances.eh_univ.is_none());
+//         assert!(instances.eh_kll.is_none());
+//         assert!(instances.eh_sampling.is_none());
+
+//         instances.ensure_initialized(PromSketchType::EHUniv, &config);
+//         assert!(instances.eh_univ.is_some());
+//         assert!(instances.eh_kll.is_none());
+
+//         instances.ensure_initialized(PromSketchType::EHKLL, &config);
+//         assert!(instances.eh_kll.is_some());
+//         assert!(instances.eh_sampling.is_none());
+
+//         instances.ensure_initialized(PromSketchType::USampling, &config);
+//         assert!(instances.eh_sampling.is_some());
+//     }
+
+//     #[test]
+//     fn test_ensure_initialized_idempotent() {
+//         let config = PromSketchConfig::default();
+//         let mut instances = PromSketchInstances::new();
+
+//         instances.ensure_initialized(PromSketchType::EHUniv, &config);
+//         let ptr1 = instances.eh_univ.as_ref().unwrap() as *const EHUnivOptimized;
+
+//         // Calling again should not replace the instance.
+//         instances.ensure_initialized(PromSketchType::EHUniv, &config);
+//         let ptr2 = instances.eh_univ.as_ref().unwrap() as *const EHUnivOptimized;
+//         assert_eq!(ptr1, ptr2);
+//     }
+
+//     #[test]
+//     fn test_mem_series_insert_updates_oldest() {
+//         let mut series = PromSketchMemSeries::new("test_metric".to_string());
+//         assert_eq!(series.oldest_timestamp, -1);
+
+//         let config = PromSketchConfig::default();
+//         series
+//             .sketch_instances
+//             .ensure_initialized(PromSketchType::EHKLL, &config);
+
+//         series.insert(100, 1.0);
+//         assert_eq!(series.oldest_timestamp, 100);
+
+//         series.insert(50, 2.0);
+//         // oldest_timestamp should not change once set
+//         assert_eq!(series.oldest_timestamp, 100);
+//     }
+// }
diff --git a/QueryEngineRust/src/stores/promsketch_store/store.rs b/QueryEngineRust/src/stores/promsketch_store/store.rs
new file mode 100644
index 0000000..3183597
--- /dev/null
+++ b/QueryEngineRust/src/stores/promsketch_store/store.rs
@@ -0,0 +1,456 @@
+// use std::sync::atomic::{AtomicU64, Ordering};
+// use std::sync::RwLock;
+
+// use dashmap::DashMap;
+
+// use super::config::PromSketchConfig;
+// use super::metrics;
+// use super::query;
+// use super::series::PromSketchMemSeries;
+// use super::types::{promsketch_func_map, PromSketchType};
+
+// /// Concurrent store for live per-time-series sketch instances.
+// ///
+// /// This is NOT a `Store` trait implementation — it stores live sketch instances
+// /// for direct insert/query, not precomputed aggregation buckets.
+// pub struct PromSketchStore {
+//     /// Concurrent outer map keyed by label string, per-series RwLock.
+//     series: DashMap<String, RwLock<PromSketchMemSeries>>,
+//     /// Total number of distinct series.
+//     num_series: AtomicU64,
+//     /// Sketch configuration shared across all series.
+//     config: PromSketchConfig,
+// }
+
+// impl PromSketchStore {
+//     /// Create a new store with the given configuration.
+//     pub fn new(config: PromSketchConfig) -> Self {
+//         Self {
+//             series: DashMap::new(),
+//             num_series: AtomicU64::new(0),
+//             config,
+//         }
+//     }
+
+//     /// Create a new store with default configuration.
+//     pub fn with_default_config() -> Self {
+//         Self::new(PromSketchConfig::default())
+//     }
+
+//     /// Return the number of distinct series tracked.
+//     pub fn num_series(&self) -> u64 {
+//         self.num_series.load(Ordering::Relaxed)
+//     }
+
+//     /// Get or create a series entry for the given labels string.
+//     /// Returns true if a new series was created.
+//     pub fn get_or_create(&self, labels: &str) -> bool {
+//         use dashmap::mapref::entry::Entry;
+//         match self.series.entry(labels.to_string()) {
+//             Entry::Occupied(_) => false,
+//             Entry::Vacant(vacant) => {
+//                 vacant.insert(RwLock::new(PromSketchMemSeries::new(labels.to_string())));
+//                 self.num_series.fetch_add(1, Ordering::Relaxed);
+//                 metrics::SERIES_TOTAL.inc();
+//                 true
+//             }
+//         }
+//     }
+
+//     /// Initialize all 3 sketch types (EHUniv, EHKLL, USampling) for a series.
+//     ///
+//     /// Idempotent — calls `ensure_initialized` which is a no-op if already initialized.
+//     /// Intended for use by the raw Kafka consumer on first data arrival for a new series.
+//     pub fn ensure_all_sketches(
+//         &self,
+//         labels: &str,
+//     ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+//         self.get_or_create(labels);
+
+//         let entry = self
+//             .series
+//             .get(labels)
+//             .ok_or("series disappeared unexpectedly")?;
+//         let mut series = entry.write().map_err(|e| format!("lock poisoned: {}", e))?;
+
+//         series
+//             .sketch_instances
+//             .ensure_initialized(PromSketchType::EHUniv, &self.config);
+//         series
+//             .sketch_instances
+//             .ensure_initialized(PromSketchType::EHKLL, &self.config);
+//         series
+//             .sketch_instances
+//             .ensure_initialized(PromSketchType::USampling, &self.config);
+
+//         Ok(())
+//     }
+
+//     /// Initialize sketch instances for a given function on a series.
+//     /// Creates the series if it doesn't exist, then lazily initializes
+//     /// whichever sketch types the function requires.
+//     pub fn new_sketch_cache_instance(
+//         &self,
+//         labels: &str,
+//         func_name: &str,
+//     ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+//         let stypes = promsketch_func_map(func_name)
+//             .ok_or_else(|| format!("unsupported function: {}", func_name))?;
+
+//         self.get_or_create(labels);
+
+//         let entry = self
+//             .series
+//             .get(labels)
+//             .ok_or("series disappeared unexpectedly")?;
+//         let mut series = entry.write().map_err(|e| format!("lock poisoned: {}", e))?;
+
+//         for &stype in stypes {
+//             series
+//                 .sketch_instances
+//                 .ensure_initialized(stype, &self.config);
+//         }
+
+//         Ok(())
+//     }
+
+//     /// Initialize sketch instances with a custom config (for overriding time windows, etc.).
+//     pub fn new_sketch_cache_instance_with_config(
+//         &self,
+//         labels: &str,
+//         func_name: &str,
+//         config: &PromSketchConfig,
+//     ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+//         let stypes = promsketch_func_map(func_name)
+//             .ok_or_else(|| format!("unsupported function: {}", func_name))?;
+
+//         self.get_or_create(labels);
+
+//         let entry = self
+//             .series
+//             .get(labels)
+//             .ok_or("series disappeared unexpectedly")?;
+//         let mut series = entry.write().map_err(|e| format!("lock poisoned: {}", e))?;
+
+//         for &stype in stypes {
+//             series.sketch_instances.ensure_initialized(stype, config);
+//         }
+
+//         Ok(())
+//     }
+
+//     /// Insert a data point into all active sketches for the given series.
+//     /// No-op if the series or its sketches haven't been initialized.
+//     pub fn sketch_insert(
+//         &self,
+//         labels: &str,
+//         time: u64,
+//         value: f64,
+//     ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+//         let entry = match self.series.get(labels) {
+//             Some(e) => e,
+//             None => return Ok(()), // No series yet — silent no-op like Go version
+//         };
+
+//         let mut series = entry.write().map_err(|e| format!("lock poisoned: {}", e))?;
+//         series.insert(time, value);
+//         Ok(())
+//     }
+
+//     /// Check whether the sketches for a given function cover the requested time range.
+//     pub fn lookup(&self, labels: &str, func_name: &str, mint: u64, maxt: u64) -> bool {
+//         let stypes = match promsketch_func_map(func_name) {
+//             Some(s) => s,
+//             None => return false,
+//         };
+
+//         let entry = match self.series.get(labels) {
+//             Some(e) => e,
+//             None => return false,
+//         };
+
+//         let series = match entry.read() {
+//             Ok(s) => s,
+//             Err(_) => return false,
+//         };
+
+//         for &stype in stypes {
+//             if !series.sketch_instances.cover(stype, mint, maxt) {
+//                 return false;
+//             }
+//         }
+
+//         true
+//     }
+
+//     /// Evaluate a PromQL function on the sketches for a given series and time range.
+//     pub fn eval(
+//         &self,
+//         func_name: &str,
+//         labels: &str,
+//         args: f64,
+//         mint: u64,
+//         maxt: u64,
+//     ) -> Result<f64, Box<dyn std::error::Error + Send + Sync>> {
+//         let entry = self
+//             .series
+//             .get(labels)
+//             .ok_or_else(|| format!("series not found: {}", labels))?;
+
+//         let series = entry.read().map_err(|e| format!("lock poisoned: {}", e))?;
+
+//         query::eval_function(func_name, &series, args, mint, maxt)
+//     }
+
+//     /// Return the labels of all series whose labels string starts with the given metric name.
+//     /// If `metric_or_labels` contains `{`, matches exactly; otherwise matches series
+//     /// whose labels string starts with `metric_or_labels`.
+//     pub fn matching_series_labels(&self, metric_or_labels: &str) -> Vec<String> {
+//         let mut matched = Vec::new();
+//         for entry in self.series.iter() {
+//             let key = entry.key();
+//             if key == metric_or_labels
+//                 || key.starts_with(&format!("{}{}", metric_or_labels, "{"))
+//                 || key.starts_with(&format!("{},", metric_or_labels))
+//             {
+//                 matched.push(key.clone());
+//             }
+//         }
+//         // If nothing matched by prefix, try exact match (the metric_or_labels IS the full key)
+//         if matched.is_empty() && self.series.contains_key(metric_or_labels) {
+//             matched.push(metric_or_labels.to_string());
+//         }
+//         matched
+//     }
+
+//     /// Evaluate a function across all series whose labels match the given metric/labels key,
+//     /// returning results keyed by full labels string.
+//     pub fn eval_matching(
+//         &self,
+//         func_name: &str,
+//         metric_or_labels: &str,
+//         args: f64,
+//         mint: u64,
+//         maxt: u64,
+//     ) -> Result<Vec<(String, f64)>, Box<dyn std::error::Error + Send + Sync>> {
+//         let matched_labels = self.matching_series_labels(metric_or_labels);
+//         let mut results = Vec::new();
+//         for labels in matched_labels {
+//             match self.eval(func_name, &labels, args, mint, maxt) {
+//                 Ok(value) => results.push((labels, value)),
+//                 Err(e) => {
+//                     tracing::debug!("Skipping series {}: {}", labels, e);
+//                 }
+//             }
+//         }
+//         Ok(results)
+//     }
+// }
+
+// #[cfg(test)]
+// mod tests {
+//     use super::*;
+
+//     #[test]
+//     fn test_get_or_create() {
+//         let store = PromSketchStore::with_default_config();
+//         assert_eq!(store.num_series(), 0);
+
+//         let created = store.get_or_create("metric{host=\"a\"}");
+//         assert!(created);
+//         assert_eq!(store.num_series(), 1);
+
+//         // Second call should not create
+//         let created = store.get_or_create("metric{host=\"a\"}");
+//         assert!(!created);
+//         assert_eq!(store.num_series(), 1);
+
+//         // Different labels should create
+//         let created = store.get_or_create("metric{host=\"b\"}");
+//         assert!(created);
+//         assert_eq!(store.num_series(), 2);
+//     }
+
+//     #[test]
+//     fn test_new_sketch_cache_instance() {
+//         let store = PromSketchStore::with_default_config();
+//         let result = store.new_sketch_cache_instance("m1", "quantile_over_time");
+//         assert!(result.is_ok());
+//         assert_eq!(store.num_series(), 1);
+
+//         // Verify EHKLL was initialized
+//         let entry = store.series.get("m1").unwrap();
+//         let series = entry.read().unwrap();
+//         assert!(series.sketch_instances.eh_kll.is_some());
+//         assert!(series.sketch_instances.eh_univ.is_none());
+//         assert!(series.sketch_instances.eh_sampling.is_none());
+//     }
+
+//     #[test]
+//     fn test_insert_lookup_eval_roundtrip() {
+//         let store = PromSketchStore::with_default_config();
+
+//         // Initialize KLL sketches for quantile queries
+//         store
+//             .new_sketch_cache_instance("ts1", "quantile_over_time")
+//             .unwrap();
+
+//         // Insert data points
+//         for i in 1..=100u64 {
+//             store.sketch_insert("ts1", i, i as f64).unwrap();
+//         }
+
+//         // Lookup should succeed
+//         assert!(store.lookup("ts1", "quantile_over_time", 1, 100));
+
+//         // Eval median
+//         let result = store.eval("quantile_over_time", "ts1", 0.5, 1, 100);
+//         assert!(result.is_ok());
+//         let val = result.unwrap();
+//         assert!(val > 30.0 && val < 70.0, "median was {}", val);
+//     }
+
+//     #[test]
+//     fn test_insert_noop_for_unknown_series() {
+//         let store = PromSketchStore::with_default_config();
+//         // Insert into non-existent series should be a no-op
+//         let result = store.sketch_insert("nonexistent", 1, 1.0);
+//         assert!(result.is_ok());
+//     }
+
+//     #[test]
+//     fn test_lookup_returns_false_for_missing() {
+//         let store = PromSketchStore::with_default_config();
+//         assert!(!store.lookup("missing", "quantile_over_time", 1, 100));
+//     }
+
+//     #[test]
+//     fn test_multiple_sketch_types_on_same_series() {
+//         let store = PromSketchStore::with_default_config();
+
+//         store
+//             .new_sketch_cache_instance("ts1", "quantile_over_time")
+//             .unwrap();
+//         store
+//             .new_sketch_cache_instance("ts1", "entropy_over_time")
+//             .unwrap();
+//         store
+//             .new_sketch_cache_instance("ts1", "avg_over_time")
+//             .unwrap();
+
+//         let entry = store.series.get("ts1").unwrap();
+//         let series = entry.read().unwrap();
+//         assert!(series.sketch_instances.eh_kll.is_some());
+//         assert!(series.sketch_instances.eh_univ.is_some());
+//         assert!(series.sketch_instances.eh_sampling.is_some());
+//     }
+
+//     #[test]
+//     fn test_concurrent_insert_and_query() {
+//         use std::sync::Arc;
+//         use std::thread;
+
+//         let store = Arc::new(PromSketchStore::with_default_config());
+
+//         // Initialize
+//         store
+//             .new_sketch_cache_instance("ts1", "quantile_over_time")
+//             .unwrap();
+
+//         let n_threads = 4;
+//         let n_inserts = 100;
+
+//         // Spawn writer threads
+//         let mut handles = Vec::new();
+//         for t in 0..n_threads {
+//             let store = Arc::clone(&store);
+//             handles.push(thread::spawn(move || {
+//                 for i in 0..n_inserts {
+//                     let time = (t * n_inserts + i + 1) as u64;
+//                     store.sketch_insert("ts1", time, time as f64).unwrap();
+//                 }
+//             }));
+//         }
+
+//         // Spawn a reader thread
+//         let store_read = Arc::clone(&store);
+//         handles.push(thread::spawn(move || {
+//             for _ in 0..50 {
+//                 let _ = store_read.lookup("ts1", "quantile_over_time", 1, 400);
+//             }
+//         }));
+
+//         for h in handles {
+//             h.join().unwrap();
+//         }
+
+//         // After all inserts, eval should work
+//         let result = store.eval(
+//             "quantile_over_time",
+//             "ts1",
+//             0.5,
+//             1,
+//             (n_threads * n_inserts) as u64,
+//         );
+//         assert!(result.is_ok());
+//     }
+
+//     #[test]
+//     fn test_ensure_all_sketches() {
+//         let store = PromSketchStore::with_default_config();
+//         store.ensure_all_sketches("ts_all").unwrap();
+
+//         assert_eq!(store.num_series(), 1);
+//         let entry = store.series.get("ts_all").unwrap();
+//         let series = entry.read().unwrap();
+//         assert!(series.sketch_instances.eh_univ.is_some());
+//         assert!(series.sketch_instances.eh_kll.is_some());
+//         assert!(series.sketch_instances.eh_sampling.is_some());
+//     }
+
+//     #[test]
+//     fn test_ensure_all_sketches_idempotent() {
+//         let store = PromSketchStore::with_default_config();
+//         store.ensure_all_sketches("ts_idem").unwrap();
+//         store.ensure_all_sketches("ts_idem").unwrap(); // second call should be no-op
+
+//         assert_eq!(store.num_series(), 1);
+//         let entry = store.series.get("ts_idem").unwrap();
+//         let series = entry.read().unwrap();
+//         assert!(series.sketch_instances.eh_univ.is_some());
+//         assert!(series.sketch_instances.eh_kll.is_some());
+//         assert!(series.sketch_instances.eh_sampling.is_some());
+//     }
+
+//     #[test]
+//     fn test_auto_init_insert_query_roundtrip() {
+//         let store = PromSketchStore::with_default_config();
+
+//         // Auto-init all sketches (as the raw consumer would do)
+//         store.ensure_all_sketches("ts_rt").unwrap();
+
+//         // Insert data points
+//         for i in 1..=100u64 {
+//             store.sketch_insert("ts_rt", i, i as f64).unwrap();
+//         }
+
+//         // Verify quantile (EHKLL)
+//         assert!(store.lookup("ts_rt", "quantile_over_time", 1, 100));
+//         let val = store
+//             .eval("quantile_over_time", "ts_rt", 0.5, 1, 100)
+//             .unwrap();
+//         assert!(val > 30.0 && val < 70.0, "median was {}", val);
+
+//         // Verify avg (USampling)
+//         assert!(store.lookup("ts_rt", "avg_over_time", 1, 100));
+//         let avg = store.eval("avg_over_time", "ts_rt", 0.0, 1, 100).unwrap();
+//         assert!(avg > 30.0 && avg < 70.0, "avg was {}", avg);
+
+//         // Verify entropy (EHUniv)
+//         assert!(store.lookup("ts_rt", "entropy_over_time", 1, 100));
+//         let entropy = store
+//             .eval("entropy_over_time", "ts_rt", 0.0, 1, 100)
+//             .unwrap();
+//         assert!(entropy >= 0.0, "entropy was {}", entropy);
+//     }
+// }
diff --git a/QueryEngineRust/src/stores/promsketch_store/types.rs b/QueryEngineRust/src/stores/promsketch_store/types.rs
new file mode 100644
index 0000000..e663e1b
--- /dev/null
+++ b/QueryEngineRust/src/stores/promsketch_store/types.rs
@@ -0,0 +1,75 @@
+/// The sketch types supported by PromSketch.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum PromSketchType {
+    /// ExponentialHistogram wrapping UnivMon (entropy, cardinality, L1, L2, distinct).
+    EHUniv,
+    /// ExponentialHistogram wrapping KLL (quantile, min, max).
+    EHKLL,
+    /// ExponentialHistogram wrapping UniformSampling (avg, count, sum, stddev, stdvar).
+    USampling,
+}
+
+/// Maps a PromQL function name to the sketch types it requires.
+pub fn promsketch_func_map(func_name: &str) -> Option<&'static [PromSketchType]> {
+    match func_name {
+        "entropy_over_time" => Some(&[PromSketchType::EHUniv]),
+        "distinct_over_time" => Some(&[PromSketchType::EHUniv]),
+        "l1_over_time" => Some(&[PromSketchType::EHUniv]),
+        "l2_over_time" => Some(&[PromSketchType::EHUniv]),
+        "quantile_over_time" => Some(&[PromSketchType::EHKLL]),
+        "min_over_time" => Some(&[PromSketchType::EHKLL]),
+        "max_over_time" => Some(&[PromSketchType::EHKLL]),
+        "avg_over_time" => Some(&[PromSketchType::USampling]),
+        "count_over_time" => Some(&[PromSketchType::USampling]),
+        "sum_over_time" => Some(&[PromSketchType::USampling]),
+        "sum2_over_time" => Some(&[PromSketchType::USampling]),
+        "stddev_over_time" => Some(&[PromSketchType::USampling]),
+        "stdvar_over_time" => Some(&[PromSketchType::USampling]),
+        _ => None,
+    }
+}
+
+/// Returns `true` when `func_name` maps to a USampling-backed sketch function.
+pub fn is_usampling_function(func_name: &str) -> bool {
+    matches!(promsketch_func_map(func_name),
+        Some(types) if types.contains(&PromSketchType::USampling))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_promsketch_func_map_coverage() {
+        assert_eq!(
+            promsketch_func_map("entropy_over_time"),
+            Some([PromSketchType::EHUniv].as_slice())
+        );
+        assert_eq!(
+            promsketch_func_map("quantile_over_time"),
+            Some([PromSketchType::EHKLL].as_slice())
+        );
+        assert_eq!(
+            promsketch_func_map("avg_over_time"),
+            Some([PromSketchType::USampling].as_slice())
+        );
+        assert!(promsketch_func_map("nonexistent").is_none());
+    }
+
+    #[test]
+    fn test_is_usampling_function() {
+        // USampling functions
+        assert!(is_usampling_function("avg_over_time"));
+        assert!(is_usampling_function("count_over_time"));
+        assert!(is_usampling_function("sum_over_time"));
+        assert!(is_usampling_function("sum2_over_time"));
+        assert!(is_usampling_function("stddev_over_time"));
+        assert!(is_usampling_function("stdvar_over_time"));
+
+        // Non-USampling functions
+        assert!(!is_usampling_function("entropy_over_time"));
+        assert!(!is_usampling_function("quantile_over_time"));
+        assert!(!is_usampling_function("min_over_time"));
+        assert!(!is_usampling_function("nonexistent"));
+    }
+}
diff --git a/QueryEngineRust/src/stores/simple_map_store/global.rs b/QueryEngineRust/src/stores/simple_map_store/global.rs
new file mode 100644
index 0000000..f2a2063
--- /dev/null
+++ b/QueryEngineRust/src/stores/simple_map_store/global.rs
@@ -0,0 +1,549 @@
+use crate::data_model::{
+    AggregateCore, CleanupPolicy, KeyByLabelValues, PrecomputedOutput, StreamingConfig,
+};
+use crate::stores::{Store, StoreResult, TimestampedBucketsMap};
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::sync::Mutex;
+use std::time::Instant;
+use tracing::{debug, error, info};
+
+type TimestampRange = (u64, u64); // (start_timestamp, end_timestamp)
+type StoreKey = u64; // aggregation_id
+type StoreValue = Vec<(Option<KeyByLabelValues>, Box<dyn AggregateCore>)>;
+
+/// In-memory storage implementation using single mutex (like Python version)
+pub struct SimpleMapStoreGlobal {
+    // Single global mutex protecting all data structures
+    lock: Mutex<StoreData>,
+
+    // Store the streaming configuration
+    streaming_config: Arc<StreamingConfig>,
+
+    // Policy for cleaning up old aggregates
+    cleanup_policy: CleanupPolicy,
+}
+
+struct StoreData {
+    // Main storage: aggregation_id -> (start_time, end_time) -> [(key, precompute)]
+    store: HashMap<StoreKey, HashMap<TimestampRange, StoreValue>>,
+
+    // Track metrics that have been created
+    metrics: std::collections::HashSet<String>,
+
+    // Count items inserted per metric for logging
+    items_inserted: HashMap<String, u64>,
+
+    // Track earliest timestamp per aggregation ID
+    earliest_timestamp_per_aggregation_id: HashMap<u64, u64>,
+
+    // Track how many times each aggregate window has been read
+    read_counts: HashMap<StoreKey, HashMap<TimestampRange, u64>>,
+}
+
+impl SimpleMapStoreGlobal {
+    pub fn new(streaming_config: Arc<StreamingConfig>, cleanup_policy: CleanupPolicy) -> Self {
+        Self {
+            lock: Mutex::new(StoreData {
+                store: HashMap::new(),
+                metrics: std::collections::HashSet::new(),
+                items_inserted: HashMap::new(),
+                earliest_timestamp_per_aggregation_id: HashMap::new(),
+                read_counts: HashMap::new(),
+            }),
+            streaming_config,
+            cleanup_policy,
+        }
+    }
+
+    fn create_table(&self, data: &mut StoreData, metric: &str) {
+        // In the in-memory implementation, "creating a table" just means
+        // marking the metric as known
+        data.metrics.insert(metric.to_string());
+    }
+
+    fn cleanup_old_aggregates_fixed_count(
+        &self,
+        data: &mut StoreData,
+        metric: &str,
+        aggregation_id: u64,
+        num_aggregates_to_retain: Option<u64>,
+    ) {
+        // Return early if no retention limit configured
+        let configured_limit = match num_aggregates_to_retain {
+            Some(limit) => limit as usize,
+            None => return,
+        };
+
+        let retention_limit = configured_limit * 4;
+        let store_key = aggregation_id;
+
+        // Get the time map for this store key
+        if let Some(time_map) = data.store.get_mut(&store_key) {
+            if time_map.len() <= retention_limit {
+                return; // Nothing to clean up
+            }
+
+            // Collect all timestamp ranges and sort by start timestamp (oldest first)
+            let mut timestamp_windows: Vec<TimestampRange> = time_map.keys().copied().collect();
+            timestamp_windows.sort_by_key(|&(start, _end)| start);
+
+            // Calculate which ones to remove (oldest first)
+            let num_to_remove = timestamp_windows.len() - retention_limit;
+            let windows_to_remove: Vec<TimestampRange> =
+                timestamp_windows.into_iter().take(num_to_remove).collect();
+
+            // Remove old windows
+            for window in windows_to_remove {
+                if time_map.remove(&window).is_some() {
+                    debug!(
+                        "Removed old aggregate for {} aggregation_id {} window {}-{} (retention limit: {}, configured: {})",
+                        metric,
+                        aggregation_id,
+                        window.0,
+                        window.1,
+                        retention_limit,
+                        configured_limit
+                    );
+                }
+            }
+        }
+    }
+
+    fn cleanup_old_aggregates_read_based(
+        &self,
+        data: &mut StoreData,
+        metric: &str,
+        aggregation_id: u64,
+        read_count_threshold: Option<u64>,
+    ) {
+        // Return early if no threshold configured
+        let threshold = match read_count_threshold {
+            Some(t) => t,
+            None => return,
+        };
+
+        let store_key = aggregation_id;
+
+        // Get both the time map and read count map
+        let time_map = match data.store.get_mut(&store_key) {
+            Some(map) => map,
+            None => return,
+        };
+
+        let read_count_map = data.read_counts.entry(store_key).or_default();
+
+        // Collect windows where read_count >= threshold
+        let mut windows_to_remove: Vec<TimestampRange> = Vec::new();
+
+        for (timestamp_range, _) in time_map.iter() {
+            let read_count = read_count_map.get(timestamp_range).copied().unwrap_or(0);
+
+            if read_count >= threshold {
+                windows_to_remove.push(*timestamp_range);
+            }
+        }
+
+        // Remove windows that exceeded threshold
+        for window in &windows_to_remove {
+            if time_map.remove(window).is_some() {
+                let read_count = read_count_map.get(window).copied().unwrap_or(0);
+                read_count_map.remove(window);
+
+                debug!(
+                    "Removed aggregate for {} aggregation_id {} window {}-{} (read_count: {} >= threshold: {})",
+                    metric,
+                    aggregation_id,
+                    window.0,
+                    window.1,
+                    read_count,
+                    threshold
+                );
+            }
+        }
+    }
+
+    fn cleanup_old_aggregates(
+        &self,
+        data: &mut StoreData,
+        metric: &str,
+        aggregation_id: u64,
+        num_aggregates_to_retain: Option<u64>,
+        read_count_threshold: Option<u64>,
+    ) {
+        match self.cleanup_policy {
+            CleanupPolicy::CircularBuffer => {
+                self.cleanup_old_aggregates_fixed_count(
+                    data,
+                    metric,
+                    aggregation_id,
+                    num_aggregates_to_retain,
+                );
+            }
+            CleanupPolicy::ReadBased => {
+                self.cleanup_old_aggregates_read_based(
+                    data,
+                    metric,
+                    aggregation_id,
+                    read_count_threshold,
+                );
+            }
+            CleanupPolicy::NoCleanup => {
+                // Do nothing - no cleanup
+            }
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl Store for SimpleMapStoreGlobal {
+    fn insert_precomputed_output(
+        &self,
+        output: PrecomputedOutput,
+        precompute: Box<dyn AggregateCore>,
+    ) -> StoreResult<()> {
+        self.insert_precomputed_output_batch(vec![(output, precompute)])
+    }
+
+    fn insert_precomputed_output_batch(
+        &self,
+        outputs: Vec<(PrecomputedOutput, Box<dyn AggregateCore>)>,
+    ) -> StoreResult<()> {
+        let batch_insert_start_time = Instant::now();
+        let batch_size = outputs.len();
+
+        // Measure lock acquisition time
+        #[cfg(feature = "lock_profiling")]
+        let lock_wait_start = Instant::now();
+
+        // Single lock for entire batch (like Python version)
+        let mut data = self.lock.lock().unwrap();
+
+        #[cfg(feature = "lock_profiling")]
+        {
+            let lock_wait_duration = lock_wait_start.elapsed();
+            info!(
+                "🔒 Insert lock wait time: {:.2}ms (batch_size: {})",
+                lock_wait_duration.as_secs_f64() * 1000.0,
+                batch_size
+            );
+        }
+
+        #[cfg(feature = "lock_profiling")]
+        let lock_hold_start = Instant::now();
+
+        for (output, precompute) in outputs {
+            let aggregation_config = self
+                .streaming_config
+                .get_aggregation_config(output.aggregation_id);
+
+            if aggregation_config.is_none() {
+                error!(
+                    "Aggregation config not found for aggregation_id {}. Skipping insert.",
+                    output.aggregation_id
+                );
+                continue;
+            }
+            let aggregation_config = aggregation_config.unwrap();
+
+            let metric = aggregation_config.metric.clone();
+            let aggregation_id = output.aggregation_id;
+
+            // Create table if it doesn't exist
+            if !data.metrics.contains(&metric) {
+                self.create_table(&mut data, &metric);
+            }
+
+            // Update earliest timestamp tracking
+            if let Some(current_earliest) = data
+                .earliest_timestamp_per_aggregation_id
+                .get_mut(&aggregation_id)
+            {
+                if output.start_timestamp < *current_earliest {
+                    *current_earliest = output.start_timestamp;
+                }
+            } else {
+                data.earliest_timestamp_per_aggregation_id
+                    .insert(aggregation_id, output.start_timestamp);
+            }
+
+            let store_key = aggregation_id;
+            let timestamp_range = (output.start_timestamp, output.end_timestamp);
+
+            // Get or create the time-based map for this aggregation
+            let time_map = data.store.entry(store_key).or_default();
+
+            // Get or create the value vector for this timestamp range
+            let store_value = time_map.entry(timestamp_range).or_default();
+
+            // Add the new entry with the real precompute data
+            store_value.push((output.key, precompute));
+
+            // Apply retention policy if configured (but exclude DeltaSetAggregator)
+            if aggregation_config.aggregation_type != "DeltaSetAggregator" {
+                self.cleanup_old_aggregates(
+                    &mut data,
+                    &metric,
+                    aggregation_id,
+                    aggregation_config.num_aggregates_to_retain,
+                    aggregation_config.read_count_threshold,
+                );
+            }
+
+            // Update insertion count
+            let current_count = data.items_inserted.entry(metric.clone()).or_insert(0);
+            *current_count += 1;
+
+            if (*current_count).is_multiple_of(1000) {
+                debug!("Inserted {} items into {}", current_count, metric);
+            }
+        }
+
+        #[cfg(feature = "lock_profiling")]
+        {
+            let lock_hold_duration = lock_hold_start.elapsed();
+            info!(
+                "🔓 Insert lock hold time: {:.2}ms (batch_size: {})",
+                lock_hold_duration.as_secs_f64() * 1000.0,
+                batch_size
+            );
+        }
+
+        // Lock will be dropped here when `data` goes out of scope
+
+        let batch_insert_duration = batch_insert_start_time.elapsed();
+        debug!(
+            "Batch insert of {} items took: {:.2}ms",
+            batch_size,
+            batch_insert_duration.as_secs_f64() * 1000.0
+        );
+        Ok(())
+    }
+
+    fn query_precomputed_output(
+        &self,
+        metric: &str,
+        aggregation_id: u64,
+        start: u64,
+        end: u64,
+    ) -> Result<TimestampedBucketsMap, Box<dyn std::error::Error + Send + Sync>> {
+        let query_start_time = Instant::now();
+        let store_key = aggregation_id;
+
+        // Measure lock acquisition time
+        #[cfg(feature = "lock_profiling")]
+        let lock_wait_start = Instant::now();
+
+        // Single lock for entire query - now mutable to track read counts
+        let mut data = self.lock.lock().unwrap();
+
+        #[cfg(feature = "lock_profiling")]
+        {
+            let lock_wait_duration = lock_wait_start.elapsed();
+            info!(
+                "🔒 Query lock wait time: {:.2}ms (metric: {}, agg_id: {})",
+                lock_wait_duration.as_secs_f64() * 1000.0,
+                metric,
+                aggregation_id
+            );
+        }
+
+        #[cfg(feature = "lock_profiling")]
+        let lock_hold_start = Instant::now();
+
+        let time_map = match data.store.get(&store_key) {
+            Some(map) => map,
+            None => {
+                info!("Metric {} not found in store", metric);
+                return Ok(HashMap::new());
+            }
+        };
+
+        let mut results: TimestampedBucketsMap = HashMap::new();
+        let mut total_entries = 0;
+
+        // Find all timestamp ranges that overlap with our query range
+        let range_scan_start_time = Instant::now();
+
+        // First, collect all matching timestamp ranges
+        let mut matching_ranges: Vec<TimestampRange> = time_map
+            .keys()
+            .filter(|(range_start, range_end)| start <= *range_start && end >= *range_end)
+            .copied()
+            .collect();
+
+        // Sort by start timestamp to ensure chronological order
+        // This is important for range queries that use sliding windows
+        matching_ranges.sort_by_key(|(range_start, _)| *range_start);
+
+        // Now iterate in sorted order, including timestamp with each bucket
+        for timestamp_range in &matching_ranges {
+            if let Some(store_values) = time_map.get(timestamp_range) {
+                for (key_opt, precompute) in store_values.iter() {
+                    results
+                        .entry(key_opt.clone())
+                        .or_default()
+                        .push((*timestamp_range, precompute.clone_boxed_core()));
+
+                    total_entries += 1;
+                }
+            }
+        }
+
+        // Update read counts for accessed ranges (after we're done with time_map to avoid borrow conflicts)
+        let read_count_map = data.read_counts.entry(store_key).or_default();
+        for timestamp_range in &matching_ranges {
+            *read_count_map.entry(*timestamp_range).or_insert(0) += 1;
+        }
+
+        let range_scan_duration = range_scan_start_time.elapsed();
+        debug!(
+            "Range scanning took: {:.2}ms",
+            range_scan_duration.as_secs_f64() * 1000.0
+        );
+
+        let query_duration = query_start_time.elapsed();
+        debug!(
+            "Total query took: {:.2}ms",
+            query_duration.as_secs_f64() * 1000.0
+        );
+
+        debug!(
+            "Found {} entries for query on {} (aggregation_id: {}, start: {}, end: {})",
+            total_entries, metric, aggregation_id, start, end
+        );
+        debug!("Found {} unique keys", results.len());
+
+        #[cfg(feature = "lock_profiling")]
+        {
+            let lock_hold_duration = lock_hold_start.elapsed();
+            info!(
+                "🔓 Query lock hold time: {:.2}ms (metric: {}, agg_id: {}, entries: {})",
+                lock_hold_duration.as_secs_f64() * 1000.0,
+                metric,
+                aggregation_id,
+                total_entries
+            );
+        }
+
+        // Lock will be dropped here when `data` goes out of scope
+
+        Ok(results)
+    }
+
+    fn query_precomputed_output_exact(
+        &self,
+        metric: &str,
+        aggregation_id: u64,
+        exact_start: u64,
+        exact_end: u64,
+    ) -> Result<TimestampedBucketsMap, Box<dyn std::error::Error + Send + Sync>> {
+        let query_start_time = Instant::now();
+        let store_key = aggregation_id;
+
+        // Measure lock acquisition time
+        #[cfg(feature = "lock_profiling")]
+        let lock_wait_start = Instant::now();
+
+        let mut data = self.lock.lock().unwrap();
+
+        #[cfg(feature = "lock_profiling")]
+        {
+            let lock_wait_duration = lock_wait_start.elapsed();
+            info!(
+                "🔒 Exact query lock wait time: {:.2}ms (metric: {}, agg_id: {})",
+                lock_wait_duration.as_secs_f64() * 1000.0,
+                metric,
+                aggregation_id
+            );
+        }
+
+        #[cfg(feature = "lock_profiling")]
+        let lock_hold_start = Instant::now();
+
+        let time_map = match data.store.get(&store_key) {
+            Some(map) => map,
+            None => {
+                debug!("Metric {} not found in store for exact query", metric);
+                return Ok(HashMap::new());
+            }
+        };
+
+        let mut results: TimestampedBucketsMap = HashMap::new();
+
+        // Look for exact timestamp match (strict - no tolerance)
+        let timestamp_range = (exact_start, exact_end);
+        let mut found_match = false;
+
+        // First, collect the results (immutable borrow of time_map)
+        if let Some(store_values) = time_map.get(&timestamp_range) {
+            found_match = true;
+
+            // Collect results with timestamp
+            let mut total_entries = 0;
+            for (key_opt, precompute) in store_values.iter() {
+                results
+                    .entry(key_opt.clone())
+                    .or_default()
+                    .push((timestamp_range, precompute.clone_boxed_core()));
+                total_entries += 1;
+            }
+
+            debug!(
+                "Exact match FOUND for [{}, {}]: {} entries across {} keys",
+                exact_start,
+                exact_end,
+                total_entries,
+                results.len()
+            );
+        } else {
+            debug!(
+                "Exact match NOT FOUND for metric: {}, agg_id: {}, range: [{}, {}]",
+                metric, aggregation_id, exact_start, exact_end
+            );
+        }
+
+        // Now update read count (mutable borrow of data.read_counts)
+        // This happens after we're done with time_map
+        if found_match {
+            let read_count_map = data.read_counts.entry(store_key).or_default();
+            *read_count_map.entry(timestamp_range).or_insert(0) += 1;
+        }
+
+        #[cfg(feature = "lock_profiling")]
+        {
+            let lock_hold_duration = lock_hold_start.elapsed();
+            info!(
+                "🔓 Exact query lock hold time: {:.2}ms (metric: {}, agg_id: {}, found: {})",
+                lock_hold_duration.as_secs_f64() * 1000.0,
+                metric,
+                aggregation_id,
+                !results.is_empty()
+            );
+        }
+
+        let query_duration = query_start_time.elapsed();
+        debug!(
+            "Exact timestamp query took: {:.2}ms (found: {})",
+            query_duration.as_secs_f64() * 1000.0,
+            !results.is_empty()
+        );
+
+        // Lock will be dropped here when `data` goes out of scope
+
+        Ok(results)
+    }
+
+    fn get_earliest_timestamp_per_aggregation_id(
+        &self,
+    ) -> Result<HashMap<u64, u64>, Box<dyn std::error::Error + Send + Sync>> {
+        let data = self.lock.lock().unwrap();
+        Ok(data.earliest_timestamp_per_aggregation_id.clone())
+    }
+
+    fn close(&self) -> StoreResult<()> {
+        // For in-memory store, no cleanup needed
+        info!("SimpleMapStoreGlobal closed");
+        Ok(())
+    }
+}
diff --git a/QueryEngineRust/src/stores/simple_map_store/mod.rs b/QueryEngineRust/src/stores/simple_map_store/mod.rs
new file mode 100644
index 0000000..991dd14
--- /dev/null
+++ b/QueryEngineRust/src/stores/simple_map_store/mod.rs
@@ -0,0 +1,115 @@
+mod global;
+mod per_key;
+
+use crate::data_model::{
+    AggregateCore, CleanupPolicy, LockStrategy, PrecomputedOutput, StreamingConfig,
+};
+use crate::stores::{Store, StoreResult, TimestampedBucketsMap};
+use std::collections::HashMap;
+use std::sync::Arc;
+
+pub use global::SimpleMapStoreGlobal;
+pub use per_key::SimpleMapStorePerKey;
+
+/// Enum wrapper that dispatches to either global or per-key lock implementation
+pub enum SimpleMapStore {
+    Global(SimpleMapStoreGlobal),
+    PerKey(SimpleMapStorePerKey),
+}
+
+impl SimpleMapStore {
+    /// Constructor with default strategy (backward compatibility for tests)
+    pub fn new(streaming_config: Arc<StreamingConfig>, cleanup_policy: CleanupPolicy) -> Self {
+        Self::new_with_strategy(streaming_config, cleanup_policy, LockStrategy::PerKey)
+    }
+
+    /// Constructor with explicit lock strategy (used by main.rs)
+    pub fn new_with_strategy(
+        streaming_config: Arc<StreamingConfig>,
+        cleanup_policy: CleanupPolicy,
+        lock_strategy: LockStrategy,
+    ) -> Self {
+        match lock_strategy {
+            LockStrategy::Global => {
+                SimpleMapStore::Global(SimpleMapStoreGlobal::new(streaming_config, cleanup_policy))
+            }
+            LockStrategy::PerKey => {
+                SimpleMapStore::PerKey(SimpleMapStorePerKey::new(streaming_config, cleanup_policy))
+            }
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl Store for SimpleMapStore {
+    fn insert_precomputed_output(
+        &self,
+        output: PrecomputedOutput,
+        precompute: Box<dyn AggregateCore>,
+    ) -> StoreResult<()> {
+        match self {
+            SimpleMapStore::Global(store) => store.insert_precomputed_output(output, precompute),
+            SimpleMapStore::PerKey(store) => store.insert_precomputed_output(output, precompute),
+        }
+    }
+
+    fn insert_precomputed_output_batch(
+        &self,
+        outputs: Vec<(PrecomputedOutput, Box<dyn AggregateCore>)>,
+    ) -> StoreResult<()> {
+        match self {
+            SimpleMapStore::Global(store) => store.insert_precomputed_output_batch(outputs),
+            SimpleMapStore::PerKey(store) => store.insert_precomputed_output_batch(outputs),
+        }
+    }
+
+    fn query_precomputed_output(
+        &self,
+        metric: &str,
+        aggregation_id: u64,
+        start: u64,
+        end: u64,
+    ) -> Result<TimestampedBucketsMap, Box<dyn std::error::Error + Send + Sync>> {
+        match self {
+            SimpleMapStore::Global(store) => {
+                store.query_precomputed_output(metric, aggregation_id, start, end)
+            }
+            SimpleMapStore::PerKey(store) => {
+                store.query_precomputed_output(metric, aggregation_id, start, end)
+            }
+        }
+    }
+
+    fn query_precomputed_output_exact(
+        &self,
+        metric: &str,
+        aggregation_id: u64,
+        exact_start: u64,
+        exact_end: u64,
+    ) -> Result<TimestampedBucketsMap, Box<dyn std::error::Error + Send + Sync>> {
+        match self {
+            SimpleMapStore::Global(store) => {
+                store.query_precomputed_output_exact(metric, aggregation_id, exact_start, exact_end)
+            }
+            SimpleMapStore::PerKey(store) => {
+                store.query_precomputed_output_exact(metric, aggregation_id, exact_start, exact_end)
+            }
+        }
+    }
+
+    fn get_earliest_timestamp_per_aggregation_id(
+        &self,
+    ) -> Result<HashMap<u64, u64>, Box<dyn std::error::Error + Send + Sync>> {
+        match self {
+            SimpleMapStore::Global(store) => store.get_earliest_timestamp_per_aggregation_id(),
+            SimpleMapStore::PerKey(store) => store.get_earliest_timestamp_per_aggregation_id(),
+        }
+    }
+
+    fn close(&self) -> StoreResult<()> {
+        match self {
+            SimpleMapStore::Global(store) => store.close(),
+            SimpleMapStore::PerKey(store) => store.close(),
+        }
+    }
+}
diff --git a/QueryEngineRust/src/stores/simple_map_store/per_key.rs b/QueryEngineRust/src/stores/simple_map_store/per_key.rs
new file mode 100644
index 0000000..002b123
--- /dev/null
+++ b/QueryEngineRust/src/stores/simple_map_store/per_key.rs
@@ -0,0 +1,638 @@
+use crate::data_model::{
+    AggregateCore, CleanupPolicy, KeyByLabelValues, PrecomputedOutput, StreamingConfig,
+};
+use crate::stores::{Store, StoreResult, TimestampedBucketsMap};
+use dashmap::DashMap;
+use std::collections::HashMap;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::{Arc, RwLock};
+use std::time::Instant;
+use tracing::{debug, error, info};
+
+type TimestampRange = (u64, u64); // (start_timestamp, end_timestamp)
+type StoreKey = u64; // aggregation_id
+type StoreValue = Vec<(Option<KeyByLabelValues>, Box<dyn AggregateCore>)>;
+
+/// Per-aggregation_id data protected by RwLock
+struct StoreKeyData {
+    // Main storage: (start_time, end_time) -> [(key, precompute)]
+    time_map: HashMap<TimestampRange, StoreValue>,
+
+    // Track how many times each timestamp range has been read
+    read_counts: HashMap<TimestampRange, u64>,
+}
+
+impl StoreKeyData {
+    fn new() -> Self {
+        Self {
+            time_map: HashMap::new(),
+            read_counts: HashMap::new(),
+        }
+    }
+}
+
+/// In-memory storage implementation using per-key locks for concurrency
+pub struct SimpleMapStorePerKey {
+    // Lock-free concurrent outer map - per aggregation_id
+    store: DashMap<StoreKey, Arc<RwLock<StoreKeyData>>>,
+
+    // Separate concurrent maps for global state
+    earliest_timestamps: DashMap<u64, AtomicU64>,
+    metrics: DashMap<String, ()>, // HashSet equivalent
+    items_inserted: DashMap<String, AtomicU64>,
+
+    // Store the streaming configuration
+    streaming_config: Arc<StreamingConfig>,
+
+    // Policy for cleaning up old aggregates
+    cleanup_policy: CleanupPolicy,
+}
+
+impl SimpleMapStorePerKey {
+    pub fn new(streaming_config: Arc<StreamingConfig>, cleanup_policy: CleanupPolicy) -> Self {
+        Self {
+            store: DashMap::new(),
+            earliest_timestamps: DashMap::new(),
+            metrics: DashMap::new(),
+            items_inserted: DashMap::new(),
+            streaming_config,
+            cleanup_policy,
+        }
+    }
+
+    fn cleanup_old_aggregates_fixed_count(
+        &self,
+        data: &mut StoreKeyData,
+        metric: &str,
+        aggregation_id: u64,
+        num_aggregates_to_retain: Option<u64>,
+    ) {
+        // Return early if no retention limit configured
+        let configured_limit = match num_aggregates_to_retain {
+            Some(limit) => limit as usize,
+            None => return,
+        };
+
+        let retention_limit = configured_limit * 4;
+
+        if data.time_map.len() <= retention_limit {
+            return; // Nothing to clean up
+        }
+
+        // Collect all timestamp ranges and sort by start timestamp (oldest first)
+        let mut timestamp_windows: Vec<TimestampRange> = data.time_map.keys().copied().collect();
+        timestamp_windows.sort_by_key(|&(start, _end)| start);
+
+        // Calculate which ones to remove (oldest first)
+        let num_to_remove = timestamp_windows.len() - retention_limit;
+        let windows_to_remove: Vec<TimestampRange> =
+            timestamp_windows.into_iter().take(num_to_remove).collect();
+
+        // Remove old windows from both time_map and read_counts
+        for window in windows_to_remove {
+            if data.time_map.remove(&window).is_some() {
+                data.read_counts.remove(&window); // Also remove from read_counts
+                debug!(
+                    "Removed old aggregate for {} aggregation_id {} window {}-{} (retention limit: {}, configured: {})",
+                    metric,
+                    aggregation_id,
+                    window.0,
+                    window.1,
+                    retention_limit,
+                    configured_limit
+                );
+            }
+        }
+    }
+
+    fn cleanup_old_aggregates_read_based(
+        &self,
+        data: &mut StoreKeyData,
+        metric: &str,
+        aggregation_id: u64,
+        read_count_threshold: Option<u64>,
+    ) {
+        // Return early if no threshold configured
+        let threshold = match read_count_threshold {
+            Some(t) => t,
+            None => return,
+        };
+
+        // Collect windows where read_count >= threshold
+        let mut windows_to_remove: Vec<TimestampRange> = Vec::new();
+
+        for (timestamp_range, _) in data.time_map.iter() {
+            let read_count = data.read_counts.get(timestamp_range).copied().unwrap_or(0);
+
+            if read_count >= threshold {
+                windows_to_remove.push(*timestamp_range);
+            }
+        }
+
+        // Remove windows that exceeded threshold
+        for window in &windows_to_remove {
+            //if let Some(_) = data.time_map.remove(window) {
+            if data.time_map.remove(window).is_some() {
+                let read_count = data.read_counts.get(window).copied().unwrap_or(0);
+                data.read_counts.remove(window);
+
+                debug!(
+                    "Removed aggregate for {} aggregation_id {} window {}-{} (read_count: {} >= threshold: {})",
+                    metric,
+                    aggregation_id,
+                    window.0,
+                    window.1,
+                    read_count,
+                    threshold
+                );
+            }
+        }
+    }
+
+    fn cleanup_old_aggregates(
+        &self,
+        data: &mut StoreKeyData,
+        metric: &str,
+        aggregation_id: u64,
+        num_aggregates_to_retain: Option<u64>,
+        read_count_threshold: Option<u64>,
+    ) {
+        match self.cleanup_policy {
+            CleanupPolicy::CircularBuffer => {
+                self.cleanup_old_aggregates_fixed_count(
+                    data,
+                    metric,
+                    aggregation_id,
+                    num_aggregates_to_retain,
+                );
+            }
+            CleanupPolicy::ReadBased => {
+                self.cleanup_old_aggregates_read_based(
+                    data,
+                    metric,
+                    aggregation_id,
+                    read_count_threshold,
+                );
+            }
+            CleanupPolicy::NoCleanup => {
+                // Do nothing - no cleanup
+            }
+        }
+    }
+
+    fn insert_for_store_key(
+        &self,
+        store_key: &StoreKey,
+        metric: &str,
+        items: Vec<(PrecomputedOutput, Box<dyn AggregateCore>)>,
+    ) -> StoreResult<()> {
+        let aggregation_id = *store_key;
+
+        // Measure lock acquisition time
+        #[cfg(feature = "lock_profiling")]
+        let lock_wait_start = Instant::now();
+
+        // Get or create the store data for this key
+        let store_data_lock = self
+            .store
+            .entry(*store_key)
+            .or_insert_with(|| Arc::new(RwLock::new(StoreKeyData::new())));
+
+        #[cfg(feature = "lock_profiling")]
+        {
+            let lock_wait_duration = lock_wait_start.elapsed();
+            info!(
+                "🔒 Insert DashMap get time: {:.2}ms (metric: {}, agg_id: {}, items: {})",
+                lock_wait_duration.as_secs_f64() * 1000.0,
+                metric,
+                *store_key,
+                items.len()
+            );
+        }
+
+        #[cfg(feature = "lock_profiling")]
+        let rwlock_wait_start = Instant::now();
+
+        // Acquire write lock for this aggregation_id only
+        let mut data = store_data_lock.write().map_err(|e| {
+            format!(
+                "Failed to acquire write lock for aggregation_id {}: {}",
+                store_key, e
+            )
+        })?;
+
+        #[cfg(feature = "lock_profiling")]
+        {
+            let rwlock_wait_duration = rwlock_wait_start.elapsed();
+            info!(
+                "🔒 Insert RwLock wait time: {:.2}ms (metric: {}, agg_id: {}, items: {})",
+                rwlock_wait_duration.as_secs_f64() * 1000.0,
+                metric,
+                *store_key,
+                items.len()
+            );
+        }
+
+        #[cfg(feature = "lock_profiling")]
+        let lock_hold_start = Instant::now();
+
+        for (output, precompute) in items {
+            // Create metric if needed (lock-free DashMap insert)
+            self.metrics.entry(metric.to_string()).or_insert(());
+
+            // Update earliest timestamp (lock-free atomic operation)
+            self.earliest_timestamps
+                .entry(aggregation_id)
+                .and_modify(|earliest| {
+                    let current = earliest.load(Ordering::Relaxed);
+                    if output.start_timestamp < current {
+                        earliest.store(output.start_timestamp, Ordering::Relaxed);
+                    }
+                })
+                .or_insert_with(|| AtomicU64::new(output.start_timestamp));
+
+            // Insert into time map
+            let timestamp_range = (output.start_timestamp, output.end_timestamp);
+            data.time_map
+                .entry(timestamp_range)
+                .or_default()
+                .push((output.key, precompute));
+
+            // Update insertion count (lock-free atomic increment)
+            self.items_inserted
+                .entry(metric.to_string())
+                .and_modify(|count| {
+                    let new_count = count.fetch_add(1, Ordering::Relaxed) + 1;
+                    if new_count.is_multiple_of(1000) {
+                        debug!("Inserted {} items into {}", new_count, metric);
+                    }
+                })
+                .or_insert_with(|| AtomicU64::new(1));
+        }
+
+        // Apply retention policy if configured (but exclude DeltaSetAggregator)
+        let aggregation_config = self
+            .streaming_config
+            .get_aggregation_config(aggregation_id)
+            .ok_or_else(|| format!("Aggregation config not found for {}", aggregation_id))?;
+
+        if aggregation_config.aggregation_type != "DeltaSetAggregator" {
+            self.cleanup_old_aggregates(
+                &mut data,
+                metric,
+                aggregation_id,
+                aggregation_config.num_aggregates_to_retain,
+                aggregation_config.read_count_threshold,
+            );
+        }
+
+        #[cfg(feature = "lock_profiling")]
+        {
+            let lock_hold_duration = lock_hold_start.elapsed();
+            info!(
+                "🔓 Insert lock hold time: {:.2}ms (metric: {}, agg_id: {})",
+                lock_hold_duration.as_secs_f64() * 1000.0,
+                metric,
+                *store_key
+            );
+        }
+
+        Ok(())
+    }
+}
+
+#[async_trait::async_trait]
+impl Store for SimpleMapStorePerKey {
+    fn insert_precomputed_output(
+        &self,
+        output: PrecomputedOutput,
+        precompute: Box<dyn AggregateCore>,
+    ) -> StoreResult<()> {
+        self.insert_precomputed_output_batch(vec![(output, precompute)])
+    }
+
+    fn insert_precomputed_output_batch(
+        &self,
+        outputs: Vec<(PrecomputedOutput, Box<dyn AggregateCore>)>,
+    ) -> StoreResult<()> {
+        let batch_insert_start_time = Instant::now();
+        let batch_size = outputs.len();
+
+        // Group by aggregation_id
+        #[allow(clippy::type_complexity)]
+        let mut grouped: HashMap<
+            StoreKey,
+            (String, Vec<(PrecomputedOutput, Box<dyn AggregateCore>)>),
+        > = HashMap::new();
+
+        for (output, precompute) in outputs {
+            let aggregation_config = self
+                .streaming_config
+                .get_aggregation_config(output.aggregation_id);
+
+            if aggregation_config.is_none() {
+                error!(
+                    "Aggregation config not found for aggregation_id {}. Skipping insert.",
+                    output.aggregation_id
+                );
+                continue;
+            }
+            let aggregation_config = aggregation_config.unwrap();
+
+            let metric = aggregation_config.metric.clone();
+            let store_key = output.aggregation_id;
+
+            grouped
+                .entry(store_key)
+                .or_insert_with(|| (metric.clone(), Vec::new()))
+                .1
+                .push((output, precompute));
+        }
+
+        // Sort keys to avoid deadlock when acquiring multiple locks
+        let mut keys: Vec<_> = grouped.keys().cloned().collect();
+        keys.sort();
+
+        // Process each group
+        for store_key in keys {
+            let (metric, items) = grouped.remove(&store_key).unwrap();
+            self.insert_for_store_key(&store_key, &metric, items)?;
+        }
+
+        let batch_insert_duration = batch_insert_start_time.elapsed();
+        debug!(
+            "Batch insert of {} items took: {:.2}ms",
+            batch_size,
+            batch_insert_duration.as_secs_f64() * 1000.0
+        );
+        Ok(())
+    }
+
+    fn query_precomputed_output(
+        &self,
+        metric: &str,
+        aggregation_id: u64,
+        start: u64,
+        end: u64,
+    ) -> Result<TimestampedBucketsMap, Box<dyn std::error::Error + Send + Sync>> {
+        let query_start_time = Instant::now();
+        let store_key = aggregation_id;
+
+        // Measure lock acquisition time
+        #[cfg(feature = "lock_profiling")]
+        let lock_wait_start = Instant::now();
+
+        // Get the store data for this aggregation_id
+        let store_data_lock = match self.store.get(&store_key) {
+            Some(lock) => lock,
+            None => {
+                info!("Metric {} not found in store", metric);
+                return Ok(HashMap::new());
+            }
+        };
+
+        #[cfg(feature = "lock_profiling")]
+        {
+            let lock_wait_duration = lock_wait_start.elapsed();
+            info!(
+                "🔒 Query DashMap get time: {:.2}ms (metric: {}, agg_id: {})",
+                lock_wait_duration.as_secs_f64() * 1000.0,
+                metric,
+                aggregation_id
+            );
+        }
+
+        #[cfg(feature = "lock_profiling")]
+        let rwlock_wait_start = Instant::now();
+
+        // Acquire write lock (needed to update read_counts)
+        let mut data = store_data_lock.write().map_err(|e| {
+            format!(
+                "Failed to acquire write lock for query aggregation_id {}: {}",
+                store_key, e
+            )
+        })?;
+
+        #[cfg(feature = "lock_profiling")]
+        {
+            let rwlock_wait_duration = rwlock_wait_start.elapsed();
+            info!(
+                "🔒 Query RwLock wait time: {:.2}ms (metric: {}, agg_id: {})",
+                rwlock_wait_duration.as_secs_f64() * 1000.0,
+                metric,
+                aggregation_id
+            );
+        }
+
+        #[cfg(feature = "lock_profiling")]
+        let lock_hold_start = Instant::now();
+
+        let mut results: TimestampedBucketsMap = HashMap::new();
+        let mut total_entries = 0;
+
+        // Find all timestamp ranges that overlap with our query range
+        let range_scan_start_time = Instant::now();
+
+        // First, collect all matching timestamp ranges
+        let mut matching_ranges: Vec<TimestampRange> = data
+            .time_map
+            .keys()
+            .filter(|(range_start, range_end)| start <= *range_start && end >= *range_end)
+            .copied()
+            .collect();
+
+        // Sort by start timestamp to ensure chronological order
+        // This is important for range queries that use sliding windows
+        matching_ranges.sort_by_key(|(range_start, _)| *range_start);
+
+        // Now iterate in sorted order, including timestamp with each bucket
+        for timestamp_range in &matching_ranges {
+            if let Some(store_values) = data.time_map.get(timestamp_range) {
+                for (key_opt, precompute) in store_values.iter() {
+                    results
+                        .entry(key_opt.clone())
+                        .or_default()
+                        .push((*timestamp_range, precompute.clone_boxed_core()));
+
+                    total_entries += 1;
+                }
+            }
+        }
+
+        // Update read counts for accessed ranges
+        for timestamp_range in &matching_ranges {
+            *data.read_counts.entry(*timestamp_range).or_insert(0) += 1;
+        }
+
+        let range_scan_duration = range_scan_start_time.elapsed();
+        debug!(
+            "Range scanning took: {:.2}ms",
+            range_scan_duration.as_secs_f64() * 1000.0
+        );
+
+        let query_duration = query_start_time.elapsed();
+        debug!(
+            "Total query took: {:.2}ms",
+            query_duration.as_secs_f64() * 1000.0
+        );
+
+        debug!(
+            "Found {} entries for query on {} (aggregation_id: {}, start: {}, end: {})",
+            total_entries, metric, aggregation_id, start, end
+        );
+        debug!("Found {} unique keys", results.len());
+
+        #[cfg(feature = "lock_profiling")]
+        {
+            let lock_hold_duration = lock_hold_start.elapsed();
+            info!(
+                "🔓 Query lock hold time: {:.2}ms (metric: {}, agg_id: {}, entries: {})",
+                lock_hold_duration.as_secs_f64() * 1000.0,
+                metric,
+                aggregation_id,
+                total_entries
+            );
+        }
+
+        Ok(results)
+    }
+
+    fn query_precomputed_output_exact(
+        &self,
+        metric: &str,
+        aggregation_id: u64,
+        exact_start: u64,
+        exact_end: u64,
+    ) -> Result<TimestampedBucketsMap, Box<dyn std::error::Error + Send + Sync>> {
+        let query_start_time = Instant::now();
+        let store_key = aggregation_id;
+
+        // Measure lock acquisition time
+        #[cfg(feature = "lock_profiling")]
+        let lock_wait_start = Instant::now();
+
+        // Get the store data for this aggregation_id
+        let store_data_lock = match self.store.get(&store_key) {
+            Some(lock) => lock,
+            None => {
+                debug!("Metric {} not found in store for exact query", metric);
+                return Ok(HashMap::new());
+            }
+        };
+
+        #[cfg(feature = "lock_profiling")]
+        {
+            let lock_wait_duration = lock_wait_start.elapsed();
+            info!(
+                "🔒 Exact query DashMap get time: {:.2}ms (metric: {}, agg_id: {})",
+                lock_wait_duration.as_secs_f64() * 1000.0,
+                metric,
+                aggregation_id
+            );
+        }
+
+        #[cfg(feature = "lock_profiling")]
+        let rwlock_wait_start = Instant::now();
+
+        // Acquire write lock (needed to update read_counts)
+        let mut data = store_data_lock.write().map_err(|e| {
+            format!(
+                "Failed to acquire write lock for exact query aggregation_id {}: {}",
+                store_key, e
+            )
+        })?;
+
+        #[cfg(feature = "lock_profiling")]
+        {
+            let rwlock_wait_duration = rwlock_wait_start.elapsed();
+            info!(
+                "🔒 Exact query RwLock wait time: {:.2}ms (metric: {}, agg_id: {})",
+                rwlock_wait_duration.as_secs_f64() * 1000.0,
+                metric,
+                aggregation_id
+            );
+        }
+
+        #[cfg(feature = "lock_profiling")]
+        let lock_hold_start = Instant::now();
+
+        let mut results: TimestampedBucketsMap = HashMap::new();
+
+        // Look for exact timestamp match (strict - no tolerance)
+        let timestamp_range = (exact_start, exact_end);
+        let mut found_match = false;
+
+        // First, collect the results (immutable borrow of time_map)
+        if let Some(store_values) = data.time_map.get(&timestamp_range) {
+            found_match = true;
+
+            // Collect results with timestamp
+            let mut total_entries = 0;
+            for (key_opt, precompute) in store_values.iter() {
+                results
+                    .entry(key_opt.clone())
+                    .or_default()
+                    .push((timestamp_range, precompute.clone_boxed_core()));
+                total_entries += 1;
+            }
+
+            debug!(
+                "Exact match FOUND for [{}, {}]: {} entries across {} keys",
+                exact_start,
+                exact_end,
+                total_entries,
+                results.len()
+            );
+        } else {
+            debug!(
+                "Exact match NOT FOUND for metric: {}, agg_id: {}, range: [{}, {}]",
+                metric, aggregation_id, exact_start, exact_end
+            );
+        }
+
+        // Now update read count (mutable borrow of data.read_counts)
+        if found_match {
+            *data.read_counts.entry(timestamp_range).or_insert(0) += 1;
+        }
+
+        #[cfg(feature = "lock_profiling")]
+        {
+            let lock_hold_duration = lock_hold_start.elapsed();
+            info!(
+                "🔓 Exact query lock hold time: {:.2}ms (metric: {}, agg_id: {}, found: {})",
+                lock_hold_duration.as_secs_f64() * 1000.0,
+                metric,
+                aggregation_id,
+                !results.is_empty()
+            );
+        }
+
+        let query_duration = query_start_time.elapsed();
+        debug!(
+            "Exact timestamp query took: {:.2}ms (found: {})",
+            query_duration.as_secs_f64() * 1000.0,
+            !results.is_empty()
+        );
+
+        Ok(results)
+    }
+
+    fn get_earliest_timestamp_per_aggregation_id(
+        &self,
+    ) -> Result<HashMap<u64, u64>, Box<dyn std::error::Error + Send + Sync>> {
+        // No lock needed - DashMap with AtomicU64
+        let result = self
+            .earliest_timestamps
+            .iter()
+            .map(|entry| (*entry.key(), entry.value().load(Ordering::Relaxed)))
+            .collect();
+
+        Ok(result)
+    }
+
+    fn close(&self) -> StoreResult<()> {
+        // For in-memory store, no cleanup needed
+        info!("SimpleMapStorePerKey closed");
+        Ok(())
+    }
+}
diff --git a/QueryEngineRust/src/stores/traits.rs b/QueryEngineRust/src/stores/traits.rs
new file mode 100644
index 0000000..a851071
--- /dev/null
+++ b/QueryEngineRust/src/stores/traits.rs
@@ -0,0 +1,62 @@
+use crate::data_model::{AggregateCore, KeyByLabelValues, PrecomputedOutput};
+use std::collections::HashMap;
+
+/// A bucket with its timestamp range: ((start_timestamp, end_timestamp), aggregate)
+pub type TimestampedBucket = ((u64, u64), Box<dyn AggregateCore>);
+
+/// Map from key to timestamped buckets (sparse - only contains buckets that exist)
+pub type TimestampedBucketsMap = HashMap<Option<KeyByLabelValues>, Vec<TimestampedBucket>>;
+
+/// Trait defining the interface for precomputed data storage backends
+// #[async_trait::async_trait]
+pub trait Store: Send + Sync {
+    /// Insert a single precomputed output
+    fn insert_precomputed_output(
+        &self,
+        output: PrecomputedOutput,
+        precompute: Box<dyn AggregateCore>,
+    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;
+
+    /// Insert multiple precomputed outputs in a batch (for Kafka consumer)
+    fn insert_precomputed_output_batch(
+        &self,
+        outputs: Vec<(PrecomputedOutput, Box<dyn AggregateCore>)>,
+    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;
+
+    /// Query precomputed outputs for a given metric and time range.
+    /// Returns timestamped buckets sorted by timestamp.
+    /// Results are sparse - only contains buckets that actually exist.
+    fn query_precomputed_output(
+        &self,
+        metric: &str,
+        aggregation_id: u64,
+        start: u64,
+        end: u64,
+    ) -> Result<TimestampedBucketsMap, Box<dyn std::error::Error + Send + Sync>>;
+
+    /// Query precomputed outputs for exact timestamp match (Issue #236 - Sliding Windows)
+    ///
+    /// For sliding windows, we need to find a precompute with EXACTLY matching start and end timestamps.
+    /// This is used to retrieve a single sliding window aggregate without merging.
+    ///
+    /// Returns precomputes only if an exact match is found for the timestamp range [exact_start, exact_end].
+    /// Returns empty HashMap if no exact match exists (strict matching, no tolerance).
+    fn query_precomputed_output_exact(
+        &self,
+        metric: &str,
+        aggregation_id: u64,
+        exact_start: u64,
+        exact_end: u64,
+    ) -> Result<TimestampedBucketsMap, Box<dyn std::error::Error + Send + Sync>>;
+
+    /// Get earliest timestamp for each aggregation ID (for monitoring)
+    fn get_earliest_timestamp_per_aggregation_id(
+        &self,
+    ) -> Result<HashMap<u64, u64>, Box<dyn std::error::Error + Send + Sync>>;
+
+    /// Close the store and clean up resources
+    fn close(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;
+}
+
+/// Result type for store operations
+pub type StoreResult<T> = Result<T, Box<dyn std::error::Error + Send + Sync>>;
diff --git a/QueryEngineRust/src/tests/clickhouse_forwarding_tests.rs b/QueryEngineRust/src/tests/clickhouse_forwarding_tests.rs
new file mode 100644
index 0000000..e05e93a
--- /dev/null
+++ b/QueryEngineRust/src/tests/clickhouse_forwarding_tests.rs
@@ -0,0 +1,288 @@
+#[cfg(test)]
+use crate::data_model::{CleanupPolicy, InferenceConfig, QueryLanguage, StreamingConfig};
+use crate::drivers::query::adapters::AdapterConfig;
+use crate::drivers::query::servers::http::{HttpServer, HttpServerConfig};
+use crate::engines::SimpleEngine;
+use crate::stores::simple_map_store::SimpleMapStore;
+use reqwest::Client;
+use std::sync::Arc;
+use tokio::net::TcpListener;
+use tokio::time::{sleep, Duration};
+
+/// Mock ClickHouse server for testing
+async fn start_mock_clickhouse_server(port: u16) -> Result<(), Box<dyn std::error::Error>> {
+    use axum::{http::StatusCode, routing::post, Router};
+
+    async fn mock_query_handler(body: String) -> (StatusCode, String) {
+        // Simulate different types of queries based on the SQL content
+        if body.contains("error_query") {
+            // Return ClickHouse-style error in TSV format
+            (
+                StatusCode::BAD_REQUEST,
+                "Code: 60. DB::Exception: Table doesn't exist".to_string(),
+            )
+        } else if body.contains("SELECT 1") {
+            // Return TSV response (ClickHouse default format)
+            (StatusCode::OK, "1\n".to_string())
+        } else {
+            // Generic success TSV response
+            (StatusCode::OK, "success\n".to_string())
+        }
+    }
+
+    let app = Router::new().route("/", post(mock_query_handler));
+
+    let listener = TcpListener::bind(format!("127.0.0.1:{port}")).await?;
+
+    tokio::spawn(async move {
+        axum::serve(listener, app).await.unwrap();
+    });
+
+    // Give the server time to start
+    sleep(Duration::from_millis(100)).await;
+    Ok(())
+}
+
+async fn setup_test_server(clickhouse_port: u16, database: &str) -> (HttpServer, u16) {
+    let config = HttpServerConfig {
+        port: 0, // Use random port
+        handle_http_requests: true,
+        adapter_config: AdapterConfig::clickhouse_sql(
+            format!("http://127.0.0.1:{clickhouse_port}"),
+            database.to_string(),
+            true, // Always forward for now
+        ),
+    };
+
+    let inference_config = InferenceConfig::new(QueryLanguage::sql, CleanupPolicy::NoCleanup);
+    let streaming_config = Arc::new(StreamingConfig::default());
+    let store = Arc::new(SimpleMapStore::new(
+        streaming_config.clone(),
+        CleanupPolicy::NoCleanup,
+    ));
+    let query_engine = Arc::new(SimpleEngine::new(
+        store.clone(),
+        // None,
+        inference_config,
+        streaming_config.clone(),
+        15000, // 15s scrape interval
+        QueryLanguage::sql,
+    ));
+
+    let server = HttpServer::new(config, query_engine, store);
+    let actual_port = server
+        .start_test_server()
+        .await
+        .expect("Failed to start test server");
+
+    (server, actual_port)
+}
+
+/// Test 13: Full forwarding flow with mock ClickHouse
+#[tokio::test]
+async fn test_clickhouse_forwarding_instant_query() {
+    // Start mock ClickHouse server
+    let clickhouse_port = 18123;
+    start_mock_clickhouse_server(clickhouse_port).await.unwrap();
+
+    // Start our HTTP server with ClickHouse adapter and forwarding enabled
+    let (_server, server_port) = setup_test_server(clickhouse_port, "default").await;
+
+    let client = Client::new();
+
+    // Test forwarding of SQL query via GET
+    let response = client
+        .get(format!("http://127.0.0.1:{server_port}/clickhouse/query"))
+        .query(&[("query", "SELECT 1")])
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    assert_eq!(response.status(), reqwest::StatusCode::OK);
+
+    let tsv_response = response.text().await.expect("Failed to read response");
+
+    // Verify ClickHouse TSV response
+    assert!(!tsv_response.is_empty(), "Response should not be empty");
+    // TSV format should contain the value "1"
+    assert!(
+        tsv_response.contains("1"),
+        "Response should contain the value '1'"
+    );
+}
+
+/// Test 14: Database parameter in URL
+#[tokio::test]
+async fn test_database_parameter_in_url() {
+    // Start mock ClickHouse server
+    let clickhouse_port = 18124;
+    start_mock_clickhouse_server(clickhouse_port).await.unwrap();
+
+    // Start server with a specific database name
+    let (_server, server_port) = setup_test_server(clickhouse_port, "test_db").await;
+
+    let client = Client::new();
+
+    // Send a query
+    let response = client
+        .get(format!("http://127.0.0.1:{server_port}/clickhouse/query"))
+        .query(&[("query", "SELECT 1")])
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    assert_eq!(response.status(), reqwest::StatusCode::OK);
+
+    // The mock server should have received the request with the database parameter
+    // Since we can't directly verify the URL, we verify the response is TSV
+    let tsv_response = response.text().await.expect("Failed to read response");
+    assert!(!tsv_response.is_empty(), "TSV response should not be empty");
+}
+
+/// Test 15: Error forwarding from ClickHouse
+#[tokio::test]
+async fn test_error_handling() {
+    // Start mock ClickHouse server
+    let clickhouse_port = 18125;
+    start_mock_clickhouse_server(clickhouse_port).await.unwrap();
+
+    // Start our HTTP server
+    let (_server, server_port) = setup_test_server(clickhouse_port, "default").await;
+
+    let client = Client::new();
+
+    // Test forwarding of query that causes error
+    let response = client
+        .get(format!("http://127.0.0.1:{server_port}/clickhouse/query"))
+        .query(&[("query", "SELECT * FROM error_query")])
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    // ClickHouse errors should return HTTP error status codes
+    assert!(
+        !response.status().is_success(),
+        "Error query should return error status (got: {})",
+        response.status()
+    );
+    assert_eq!(
+        response.status(),
+        reqwest::StatusCode::BAD_REQUEST,
+        "Should return BAD_REQUEST for ClickHouse error"
+    );
+}
+
+/// Test 16: Unreachable ClickHouse server
+#[tokio::test]
+async fn test_server_unreachable() {
+    // Don't start a mock server - use a port that's not listening
+    let clickhouse_port = 19999;
+
+    // Start our HTTP server pointing to non-existent ClickHouse
+    let (_server, server_port) = setup_test_server(clickhouse_port, "default").await;
+
+    let client = Client::new();
+
+    // Try to query
+    let response = client
+        .get(format!("http://127.0.0.1:{server_port}/clickhouse/query"))
+        .query(&[("query", "SELECT 1")])
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    // Should return an error status (likely BAD_GATEWAY or similar)
+    assert!(
+        !response.status().is_success(),
+        "Unreachable server should return error status"
+    );
+}
+
+/// Test 17: Fallback is always used (no local execution)
+#[tokio::test]
+async fn test_fallback_always_used() {
+    // Start mock ClickHouse server
+    let clickhouse_port = 18126;
+    start_mock_clickhouse_server(clickhouse_port).await.unwrap();
+
+    // Start our HTTP server
+    let (_server, server_port) = setup_test_server(clickhouse_port, "default").await;
+
+    let client = Client::new();
+
+    // Send any query - it should always be forwarded to fallback
+    let response = client
+        .get(format!("http://127.0.0.1:{server_port}/clickhouse/query"))
+        .query(&[("query", "SELECT 1")])
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    assert_eq!(
+        response.status(),
+        reqwest::StatusCode::OK,
+        "Query should be forwarded successfully"
+    );
+
+    let tsv_response = response.text().await.expect("Failed to read response");
+
+    // Verify we got a TSV response (from fallback)
+    assert!(
+        !tsv_response.is_empty(),
+        "Should receive TSV format from fallback"
+    );
+}
+
+/// Test 18: POST request support
+#[tokio::test]
+async fn test_post_request() {
+    // Start mock ClickHouse server
+    let clickhouse_port = 18127;
+    start_mock_clickhouse_server(clickhouse_port).await.unwrap();
+
+    // Start our HTTP server
+    let (_server, server_port) = setup_test_server(clickhouse_port, "default").await;
+
+    let client = Client::new();
+
+    // Test POST request with form data
+    let response = client
+        .post(format!("http://127.0.0.1:{server_port}/clickhouse/query"))
+        .form(&[("query", "SELECT 1")])
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    assert_eq!(response.status(), reqwest::StatusCode::OK);
+
+    let tsv_response = response.text().await.expect("Failed to read response");
+
+    // Verify ClickHouse TSV response
+    assert!(!tsv_response.is_empty(), "TSV response should not be empty");
+}
+
+/// Test 19: Missing query parameter
+#[tokio::test]
+async fn test_missing_query_parameter() {
+    // Start mock ClickHouse server
+    let clickhouse_port = 18128;
+    start_mock_clickhouse_server(clickhouse_port).await.unwrap();
+
+    // Start our HTTP server
+    let (_server, server_port) = setup_test_server(clickhouse_port, "default").await;
+
+    let client = Client::new();
+
+    // Send request without query parameter
+    let response = client
+        .get(format!("http://127.0.0.1:{server_port}/clickhouse/query"))
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    // Should return error for missing parameter
+    assert!(
+        !response.status().is_success(),
+        "Missing query parameter should return error"
+    );
+}
diff --git a/QueryEngineRust/src/tests/datafusion/accumulator_serde_tests.rs b/QueryEngineRust/src/tests/datafusion/accumulator_serde_tests.rs
new file mode 100644
index 0000000..401f09f
--- /dev/null
+++ b/QueryEngineRust/src/tests/datafusion/accumulator_serde_tests.rs
@@ -0,0 +1,336 @@
+//! Accumulator Serde Round-Trip Tests
+//!
+//! Tests that exercise accumulator_serde.rs directly (no engine needed).
+//! Verifies serialize -> deserialize round-trip for all accumulator types.
+
+#[cfg(test)]
+mod tests {
+    use crate::data_model::SerializableToSink;
+    use crate::data_model::{KeyByLabelValues, Measurement};
+    use crate::engines::physical::accumulator_serde::{
+        deserialize_accumulator, deserialize_keys_accumulator, deserialize_multiple_subpopulation,
+        deserialize_single_subpopulation, serialize_accumulator_arroyo,
+    };
+    use crate::precompute_operators::{
+        CountMinSketchAccumulator, DatasketchesKLLAccumulator, DeltaSetAggregatorAccumulator,
+        HydraKllSketchAccumulator, IncreaseAccumulator, MultipleIncreaseAccumulator,
+        SetAggregatorAccumulator, SumAccumulator,
+    };
+    use datafusion_summary_library::SketchType;
+    use promql_utilities::query_logics::enums::Statistic;
+    use std::collections::HashMap;
+
+    // ========================================================================
+    // Full round-trip tests (serialize_arroyo -> deserialize)
+    // ========================================================================
+
+    #[test]
+    fn test_round_trip_sum() {
+        let acc = SumAccumulator::with_sum(42.5);
+        let bytes = serialize_accumulator_arroyo(&acc);
+        let restored = deserialize_accumulator(&bytes, &SketchType::Sum).unwrap();
+        assert_eq!(restored.get_accumulator_type(), "SumAccumulator");
+
+        // Query the restored accumulator via single subpopulation
+        let restored_single = deserialize_single_subpopulation(&bytes, &SketchType::Sum).unwrap();
+        let value = restored_single.query(Statistic::Sum, None).unwrap();
+        assert!((value - 42.5).abs() < 1e-10, "Expected 42.5, got {}", value);
+    }
+
+    #[test]
+    fn test_round_trip_kll() {
+        let mut kll = DatasketchesKLLAccumulator::new(200);
+        for v in [1.0, 2.0, 3.0, 4.0, 5.0] {
+            kll._update(v);
+        }
+
+        let bytes = serialize_accumulator_arroyo(&kll);
+        let restored = deserialize_accumulator(&bytes, &SketchType::KLL).unwrap();
+        assert_eq!(
+            restored.get_accumulator_type(),
+            "DatasketchesKLLAccumulator"
+        );
+
+        // Query quantile via single subpopulation
+        let restored_single = deserialize_single_subpopulation(&bytes, &SketchType::KLL).unwrap();
+        let mut kwargs = HashMap::new();
+        kwargs.insert("quantile".to_string(), "0.5".to_string());
+        let median = restored_single
+            .query(Statistic::Quantile, Some(&kwargs))
+            .unwrap();
+        // Median of [1,2,3,4,5] should be ~3.0
+        assert!(
+            (1.0..=5.0).contains(&median),
+            "Median should be in [1,5], got {}",
+            median
+        );
+    }
+
+    #[test]
+    fn test_round_trip_set_aggregator() {
+        let mut set_acc = SetAggregatorAccumulator::new();
+        set_acc.add_key(KeyByLabelValues {
+            labels: vec!["web".to_string()],
+        });
+        set_acc.add_key(KeyByLabelValues {
+            labels: vec!["api".to_string()],
+        });
+        set_acc.add_key(KeyByLabelValues {
+            labels: vec!["worker".to_string()],
+        });
+
+        let bytes = serialize_accumulator_arroyo(&set_acc);
+        let restored = deserialize_accumulator(&bytes, &SketchType::SetAggregator).unwrap();
+        let keys = restored.get_keys().unwrap();
+        assert_eq!(keys.len(), 3, "Expected 3 keys, got {}", keys.len());
+    }
+
+    #[test]
+    fn test_round_trip_delta_set_aggregator() {
+        let mut delta_set = DeltaSetAggregatorAccumulator::new();
+        delta_set.add_key(KeyByLabelValues {
+            labels: vec!["endpoint-a".to_string()],
+        });
+        delta_set.add_key(KeyByLabelValues {
+            labels: vec!["endpoint-b".to_string()],
+        });
+
+        let bytes = serialize_accumulator_arroyo(&delta_set);
+        let restored = deserialize_accumulator(&bytes, &SketchType::DeltaSetAggregator).unwrap();
+        let keys = restored.get_keys().unwrap();
+        assert_eq!(keys.len(), 2, "Expected 2 keys, got {}", keys.len());
+    }
+
+    #[test]
+    fn test_round_trip_multiple_increase() {
+        let key1 = KeyByLabelValues {
+            labels: vec!["web".to_string()],
+        };
+        let key2 = KeyByLabelValues {
+            labels: vec!["api".to_string()],
+        };
+        let mut increases = HashMap::new();
+        increases.insert(
+            key1.clone(),
+            IncreaseAccumulator::new(Measurement::new(0.0), 0, Measurement::new(100.0), 10),
+        );
+        increases.insert(
+            key2.clone(),
+            IncreaseAccumulator::new(Measurement::new(0.0), 0, Measurement::new(200.0), 10),
+        );
+
+        let acc = MultipleIncreaseAccumulator::new_with_increases(increases);
+        let bytes = serialize_accumulator_arroyo(&acc);
+
+        let restored = deserialize_accumulator(&bytes, &SketchType::MultipleIncrease).unwrap();
+        let keys = restored.get_keys().unwrap();
+        assert_eq!(keys.len(), 2, "Expected 2 keys, got {}", keys.len());
+
+        // Query via multiple subpopulation
+        let restored_multi =
+            deserialize_multiple_subpopulation(&bytes, &SketchType::MultipleIncrease).unwrap();
+        let val = restored_multi
+            .query(Statistic::Increase, &key1, None)
+            .unwrap();
+        assert!(
+            (val - 100.0).abs() < 1e-10,
+            "Expected increase=100.0 for key1, got {}",
+            val
+        );
+    }
+
+    #[test]
+    fn test_round_trip_hydra_kll() {
+        // HydraKLL: serialize_to_bytes IS MessagePack, so serialize_accumulator_arroyo
+        // falls through to serialize_to_bytes
+        let mut hydra = HydraKllSketchAccumulator::new(1, 1, 200);
+        // Use the public update method with a key
+        hydra.update(
+            &KeyByLabelValues {
+                labels: vec!["sub-key".to_string()],
+            },
+            42.0,
+        );
+
+        let bytes = serialize_accumulator_arroyo(&hydra);
+        let restored = deserialize_accumulator(&bytes, &SketchType::HydraKLL).unwrap();
+        assert_eq!(restored.get_accumulator_type(), "HydraKllSketchAccumulator");
+    }
+
+    #[test]
+    fn test_round_trip_count_min_sketch() {
+        // CountMinSketch: serialize_to_bytes IS MessagePack
+        // Supported by both deserialize_accumulator (for merging) and
+        // deserialize_multiple_subpopulation (for querying by sub-key)
+        let cms = CountMinSketchAccumulator::new(2, 3);
+
+        let bytes = serialize_accumulator_arroyo(&cms);
+
+        let restored = deserialize_accumulator(&bytes, &SketchType::CountMinSketch).unwrap();
+        assert_eq!(restored.get_accumulator_type(), "CountMinSketchAccumulator");
+
+        let restored =
+            deserialize_multiple_subpopulation(&bytes, &SketchType::CountMinSketch).unwrap();
+        assert_eq!(
+            restored.clone_boxed().as_ref().get_accumulator_type(),
+            "CountMinSketchAccumulator"
+        );
+    }
+
+    // ========================================================================
+    // Keys accumulator round-trip
+    // ========================================================================
+
+    #[test]
+    fn test_deserialize_keys_delta_set() {
+        let mut delta_set = DeltaSetAggregatorAccumulator::new();
+        delta_set.add_key(KeyByLabelValues {
+            labels: vec!["key-a".to_string()],
+        });
+        let bytes = serialize_accumulator_arroyo(&delta_set);
+
+        let restored =
+            deserialize_keys_accumulator(&bytes, &SketchType::DeltaSetAggregator).unwrap();
+        let keys = restored.get_keys().unwrap();
+        assert_eq!(keys.len(), 1);
+    }
+
+    #[test]
+    fn test_deserialize_keys_set_aggregator() {
+        let mut set_acc = SetAggregatorAccumulator::new();
+        set_acc.add_key(KeyByLabelValues {
+            labels: vec!["k1".to_string()],
+        });
+        let bytes = serialize_accumulator_arroyo(&set_acc);
+
+        let restored = deserialize_keys_accumulator(&bytes, &SketchType::SetAggregator).unwrap();
+        let keys = restored.get_keys().unwrap();
+        assert_eq!(keys.len(), 1);
+    }
+
+    // ========================================================================
+    // Not-implemented types
+    // ========================================================================
+
+    #[test]
+    fn test_not_implemented_increase() {
+        let bytes = vec![1, 2, 3, 4];
+        let result = deserialize_accumulator(&bytes, &SketchType::Increase);
+        assert!(result.is_err());
+        let err_msg = match result {
+            Err(e) => format!("{}", e),
+            Ok(_) => panic!("Expected error"),
+        };
+        assert!(
+            err_msg.contains("Not") || err_msg.contains("not"),
+            "Expected NotImplemented error, got: {}",
+            err_msg
+        );
+    }
+
+    #[test]
+    fn test_not_implemented_minmax() {
+        let bytes = vec![1, 2, 3, 4];
+        let result = deserialize_accumulator(&bytes, &SketchType::MinMax);
+        assert!(result.is_err());
+    }
+
+    // ========================================================================
+    // Error handling tests
+    // ========================================================================
+
+    #[test]
+    fn test_corrupted_bytes_sum() {
+        // MessagePack can decode some short byte sequences as valid integers/floats
+        // (e.g., 0xFF = -1 as negative fixint). Use 0xCB (float64 marker) followed
+        // by insufficient bytes to force a decode error.
+        let garbage = vec![0xCB, 0x01, 0x02];
+        let result = deserialize_accumulator(&garbage, &SketchType::Sum);
+        assert!(
+            result.is_err(),
+            "Corrupted bytes should produce an error, not a panic"
+        );
+    }
+
+    #[test]
+    fn test_corrupted_bytes_kll() {
+        let garbage = vec![0xFF, 0xFE, 0xFD, 0xFC];
+        let result = deserialize_accumulator(&garbage, &SketchType::KLL);
+        assert!(
+            result.is_err(),
+            "Corrupted bytes should produce an error, not a panic"
+        );
+    }
+
+    #[test]
+    fn test_empty_bytes_sum() {
+        let result = deserialize_accumulator(&[], &SketchType::Sum);
+        assert!(result.is_err(), "Empty bytes should produce an error");
+    }
+
+    #[test]
+    fn test_empty_bytes_kll() {
+        let result = deserialize_accumulator(&[], &SketchType::KLL);
+        assert!(result.is_err(), "Empty bytes should produce an error");
+    }
+
+    #[test]
+    fn test_empty_bytes_set_aggregator() {
+        let result = deserialize_accumulator(&[], &SketchType::SetAggregator);
+        assert!(result.is_err(), "Empty bytes should produce an error");
+    }
+
+    #[test]
+    fn test_empty_bytes_delta_set() {
+        let result = deserialize_accumulator(&[], &SketchType::DeltaSetAggregator);
+        assert!(result.is_err(), "Empty bytes should produce an error");
+    }
+
+    // ========================================================================
+    // Serialize dispatch verification
+    // ========================================================================
+
+    #[test]
+    fn test_serialize_arroyo_dispatch_sum_uses_arroyo_path() {
+        // SumAccumulator has a separate arroyo format (MessagePack f64)
+        // while its native serialize_to_bytes uses little-endian f64
+        let acc = SumAccumulator::with_sum(42.0);
+        let arroyo_bytes = serialize_accumulator_arroyo(&acc);
+        let native_bytes = acc.serialize_to_bytes();
+
+        // They should differ because arroyo uses MessagePack
+        assert_ne!(
+            arroyo_bytes, native_bytes,
+            "SumAccumulator arroyo and native serialization should differ"
+        );
+
+        // Verify the arroyo bytes can be deserialized
+        let restored = deserialize_accumulator(&arroyo_bytes, &SketchType::Sum).unwrap();
+        assert_eq!(restored.get_accumulator_type(), "SumAccumulator");
+    }
+
+    #[test]
+    fn test_serialize_arroyo_dispatch_kll_uses_native() {
+        // KLL's serialize_to_bytes already uses MessagePack, so arroyo falls through
+        let mut kll = DatasketchesKLLAccumulator::new(200);
+        kll._update(1.0);
+        let arroyo_bytes = serialize_accumulator_arroyo(&kll);
+        let native_bytes = kll.serialize_to_bytes();
+
+        // They should be the same since KLL's native IS MessagePack
+        assert_eq!(
+            arroyo_bytes, native_bytes,
+            "KLL arroyo and native serialization should be the same"
+        );
+    }
+
+    #[test]
+    fn test_unsupported_keys_type() {
+        let bytes = vec![1, 2, 3, 4];
+        let result = deserialize_keys_accumulator(&bytes, &SketchType::Sum);
+        assert!(
+            result.is_err(),
+            "Sum should not be supported as a keys accumulator"
+        );
+    }
+}
diff --git a/QueryEngineRust/src/tests/datafusion/mod.rs b/QueryEngineRust/src/tests/datafusion/mod.rs
new file mode 100644
index 0000000..563954c
--- /dev/null
+++ b/QueryEngineRust/src/tests/datafusion/mod.rs
@@ -0,0 +1,10 @@
+//! DataFusion execution path tests.
+//!
+//! Tests for the logical plan builder, physical execution operators,
+//! and accumulator serialization that back the DataFusion-based query path.
+
+pub mod accumulator_serde_tests;
+pub mod plan_builder_regression_tests;
+pub mod plan_execution_dual_input_tests;
+pub mod plan_execution_temporal_tests;
+pub mod plan_execution_tests;
diff --git a/QueryEngineRust/src/tests/datafusion/plan_builder_regression_tests.rs b/QueryEngineRust/src/tests/datafusion/plan_builder_regression_tests.rs
new file mode 100644
index 0000000..6f3e4cf
--- /dev/null
+++ b/QueryEngineRust/src/tests/datafusion/plan_builder_regression_tests.rs
@@ -0,0 +1,208 @@
+//! Plan Builder Regression Tests
+//!
+//! Tests covering gaps in the existing plan_builder.rs inline tests:
+//! all Statistic variants, kwargs propagation, error paths, edge cases.
+
+#[cfg(test)]
+mod tests {
+    use crate::engines::simple_engine::{
+        AggregationIdInfo, QueryExecutionContext, QueryMetadata, StoreQueryParams, StoreQueryPlan,
+    };
+    use promql_utilities::data_model::KeyByLabelNames;
+    use promql_utilities::query_logics::enums::Statistic;
+    use std::collections::HashMap;
+
+    fn create_context(
+        statistic: Statistic,
+        aggregation_type: &str,
+        output_labels: Vec<&str>,
+        kwargs: HashMap<String, String>,
+    ) -> QueryExecutionContext {
+        let output_labels_vec: Vec<String> = output_labels.into_iter().map(String::from).collect();
+        let labels = KeyByLabelNames {
+            labels: output_labels_vec,
+        };
+        QueryExecutionContext {
+            metric: "test_metric".to_string(),
+            metadata: QueryMetadata {
+                query_output_labels: labels.clone(),
+                statistic_to_compute: statistic,
+                query_kwargs: kwargs,
+            },
+            store_plan: StoreQueryPlan {
+                values_query: StoreQueryParams {
+                    metric: "test_metric".to_string(),
+                    aggregation_id: 1,
+                    start_timestamp: 1000,
+                    end_timestamp: 2000,
+                    is_exact_query: true,
+                },
+                keys_query: None,
+            },
+            agg_info: AggregationIdInfo {
+                aggregation_id_for_key: 1,
+                aggregation_id_for_value: 1,
+                aggregation_type_for_key: String::new(),
+                aggregation_type_for_value: aggregation_type.to_string(),
+            },
+            do_merge: false,
+            spatial_filter: String::new(),
+            query_time: 2000,
+            grouping_labels: labels,
+            aggregated_labels: KeyByLabelNames::empty(),
+        }
+    }
+
+    // ========================================================================
+    // All Statistic variants map without panic
+    // ========================================================================
+
+    #[test]
+    fn test_all_statistics_map_without_panic() {
+        let statistics = vec![
+            (Statistic::Sum, "SumAccumulator"),
+            (Statistic::Min, "MinMaxAccumulator"),
+            (Statistic::Max, "MinMaxAccumulator"),
+            (Statistic::Count, "SumAccumulator"),
+            (Statistic::Increase, "IncreaseAccumulator"),
+            (Statistic::Rate, "IncreaseAccumulator"),
+            (Statistic::Quantile, "DatasketchesKLLAccumulator"),
+            (Statistic::Cardinality, "SetAggregator"),
+            (Statistic::Topk, "CountMinSketchAccumulator"),
+        ];
+
+        for (stat, agg_type) in statistics {
+            let ctx = create_context(stat, agg_type, vec!["host"], HashMap::new());
+            let result = ctx.map_statistic_to_infer_operation();
+            assert!(
+                result.is_ok(),
+                "Statistic {:?} should map successfully, got: {:?}",
+                stat,
+                result.err()
+            );
+        }
+    }
+
+    // ========================================================================
+    // TopK kwargs propagation
+    // ========================================================================
+
+    #[test]
+    fn test_topk_kwargs_propagate() {
+        use datafusion_summary_library::InferOperation;
+        let mut kwargs = HashMap::new();
+        kwargs.insert("k".to_string(), "5".to_string());
+
+        let ctx = create_context(
+            Statistic::Topk,
+            "CountMinSketchAccumulator",
+            vec!["host"],
+            kwargs,
+        );
+        match ctx.map_statistic_to_infer_operation().unwrap() {
+            InferOperation::TopK(k) => assert_eq!(k, 5, "Expected k=5, got {}", k),
+            other => panic!("Expected TopK, got {:?}", other),
+        }
+    }
+
+    #[test]
+    fn test_topk_default_k() {
+        use datafusion_summary_library::InferOperation;
+        let ctx = create_context(
+            Statistic::Topk,
+            "CountMinSketchAccumulator",
+            vec!["host"],
+            HashMap::new(),
+        );
+        match ctx.map_statistic_to_infer_operation().unwrap() {
+            InferOperation::TopK(k) => assert_eq!(k, 10, "Default k should be 10, got {}", k),
+            other => panic!("Expected TopK, got {:?}", other),
+        }
+    }
+
+    // ========================================================================
+    // Statistic-to-operation mapping
+    // ========================================================================
+
+    #[test]
+    fn test_cardinality_to_count_distinct() {
+        use datafusion_summary_library::InferOperation;
+        let ctx = create_context(
+            Statistic::Cardinality,
+            "SetAggregator",
+            vec!["host"],
+            HashMap::new(),
+        );
+        assert!(matches!(
+            ctx.map_statistic_to_infer_operation().unwrap(),
+            InferOperation::CountDistinct
+        ));
+    }
+
+    #[test]
+    fn test_rate_to_extract_rate() {
+        use datafusion_summary_library::InferOperation;
+        let ctx = create_context(
+            Statistic::Rate,
+            "IncreaseAccumulator",
+            vec!["host"],
+            HashMap::new(),
+        );
+        assert!(matches!(
+            ctx.map_statistic_to_infer_operation().unwrap(),
+            InferOperation::ExtractRate
+        ));
+    }
+
+    #[test]
+    fn test_count_to_extract_count() {
+        use datafusion_summary_library::InferOperation;
+        let ctx = create_context(
+            Statistic::Count,
+            "SumAccumulator",
+            vec!["host"],
+            HashMap::new(),
+        );
+        assert!(matches!(
+            ctx.map_statistic_to_infer_operation().unwrap(),
+            InferOperation::ExtractCount
+        ));
+    }
+
+    // ========================================================================
+    // Error paths
+    // ========================================================================
+
+    #[test]
+    fn test_unknown_agg_type_errors() {
+        let ctx = create_context(
+            Statistic::Sum,
+            "FooBarAccumulator",
+            vec!["host"],
+            HashMap::new(),
+        );
+        let result = ctx.to_logical_plan();
+        assert!(result.is_err(), "Unknown aggregation type should error");
+        let err_msg = format!("{}", result.unwrap_err());
+        assert!(
+            err_msg.contains("FooBarAccumulator") || err_msg.contains("Unknown"),
+            "Error should mention the unknown type, got: {}",
+            err_msg
+        );
+    }
+
+    // ========================================================================
+    // Edge cases
+    // ========================================================================
+
+    #[test]
+    fn test_plan_with_empty_labels() {
+        let ctx = create_context(Statistic::Sum, "SumAccumulator", vec![], HashMap::new());
+        let result = ctx.to_logical_plan();
+        assert!(
+            result.is_ok(),
+            "Empty labels should still build a plan: {:?}",
+            result.err()
+        );
+    }
+}
diff --git a/QueryEngineRust/src/tests/datafusion/plan_execution_dual_input_tests.rs b/QueryEngineRust/src/tests/datafusion/plan_execution_dual_input_tests.rs
new file mode 100644
index 0000000..c20ac2a
--- /dev/null
+++ b/QueryEngineRust/src/tests/datafusion/plan_execution_dual_input_tests.rs
@@ -0,0 +1,312 @@
+//! Plan Execution Multi-Population Tests
+//!
+//! Tests for multi-population accumulator execution:
+//!
+//! **Self-keyed (single-input):** MultipleIncrease, MultipleSum, MultipleMinMax
+//!   These carry their own keys via get_keys(). No separate keys stream needed.
+//!   grouping_labels = [], aggregated_labels = [all output labels]
+//!
+//! **Dual-input:** HydraKLL, CountMinSketch
+//!   These need a separate DeltaSetAggregator keys stream to enumerate sub-keys.
+
+#[cfg(test)]
+mod tests {
+    use crate::data_model::{KeyByLabelValues, Measurement};
+    use crate::precompute_operators::{
+        CountMinSketchAccumulator, DeltaSetAggregatorAccumulator, HydraKllSketchAccumulator,
+        IncreaseAccumulator, MultipleIncreaseAccumulator,
+    };
+    use crate::tests::test_utilities::engine_factories::*;
+    use std::collections::HashMap;
+
+    /// Helper: create a MultipleIncreaseAccumulator with given sub-keys.
+    /// Each key is a Vec of label values (matching aggregated_labels order).
+    fn make_multi_increase(
+        keys_and_values: Vec<(Vec<&str>, f64, f64)>,
+    ) -> MultipleIncreaseAccumulator {
+        let mut increases = HashMap::new();
+        for (key_labels, start_val, end_val) in keys_and_values {
+            let key = KeyByLabelValues {
+                labels: key_labels.iter().map(|s| s.to_string()).collect(),
+            };
+            increases.insert(
+                key,
+                IncreaseAccumulator::new(
+                    Measurement::new(start_val),
+                    0,
+                    Measurement::new(end_val),
+                    10,
+                ),
+            );
+        }
+        MultipleIncreaseAccumulator::new_with_increases(increases)
+    }
+
+    // ========================================================================
+    // Self-keyed: MultipleIncrease (single-input, keys from accumulator)
+    // MultipleIncrease is a collection of Increase accumulators.
+    // grouping_labels = [], aggregated_labels = [host, endpoint]
+    // Query is simply increase(metric[window]).
+    // ========================================================================
+
+    #[tokio::test]
+    async fn test_self_keyed_multiple_increase() {
+        // Sub-keys: (host-a, endpoint-1) increase=100, (host-a, endpoint-2) increase=200
+        let acc = make_multi_increase(vec![
+            (vec!["host-a", "endpoint-1"], 0.0, 100.0),
+            (vec!["host-a", "endpoint-2"], 0.0, 200.0),
+        ]);
+
+        let engine = create_engine_single_pop_with_aggregated(
+            "http_requests_total",
+            "MultipleIncreaseAccumulator",
+            vec![],
+            vec!["host", "endpoint"],
+            vec![(None, Box::new(acc))],
+            "increase(http_requests_total[10s])",
+        );
+
+        let results = execute_new_plan(&engine, "increase(http_requests_total[10s])", 1000.0).await;
+
+        assert!(
+            !results.is_empty(),
+            "Self-keyed multi-pop should produce results; got 0"
+        );
+
+        // Should have 2 results (one per sub-key)
+        assert_eq!(
+            results.len(),
+            2,
+            "Expected 2 results (2 sub-keys), got {}",
+            results.len()
+        );
+    }
+
+    #[tokio::test]
+    async fn test_self_keyed_multiple_increase_single_key() {
+        let acc = make_multi_increase(vec![(vec!["host-a", "svc-web"], 0.0, 50.0)]);
+
+        let engine = create_engine_single_pop_with_aggregated(
+            "http_requests_total",
+            "MultipleIncreaseAccumulator",
+            vec![],
+            vec!["host", "service"],
+            vec![(None, Box::new(acc))],
+            "increase(http_requests_total[10s])",
+        );
+
+        let results = execute_new_plan(&engine, "increase(http_requests_total[10s])", 1000.0).await;
+
+        assert_eq!(
+            results.len(),
+            1,
+            "Expected 1 result (1 sub-key), got {}",
+            results.len()
+        );
+    }
+
+    // ========================================================================
+    // HydraKLL + DeltaSetAggregator (quantile dual-input)
+    // ========================================================================
+
+    #[tokio::test]
+    async fn test_dual_hydra_kll_delta_set() {
+        // HydraKLL: single accumulator, no spatial GROUP BY.
+        // Sub-keys are ["host", "endpoint"] — tracked by DeltaSet, queryable in HydraKLL.
+        // 2 columns per sub-key (host, endpoint).
+        let mut hydra = HydraKllSketchAccumulator::new(1, 2, 200);
+        hydra.update(
+            &KeyByLabelValues {
+                labels: vec!["host-a".to_string(), "endpoint-a".to_string()],
+            },
+            10.0,
+        );
+        hydra.update(
+            &KeyByLabelValues {
+                labels: vec!["host-a".to_string(), "endpoint-a".to_string()],
+            },
+            20.0,
+        );
+        hydra.update(
+            &KeyByLabelValues {
+                labels: vec!["host-a".to_string(), "endpoint-b".to_string()],
+            },
+            100.0,
+        );
+
+        // DeltaSet enumerates which sub-keys exist
+        let mut keys = DeltaSetAggregatorAccumulator::new();
+        keys.add_key(KeyByLabelValues {
+            labels: vec!["host-a".to_string(), "endpoint-a".to_string()],
+        });
+        keys.add_key(KeyByLabelValues {
+            labels: vec!["host-a".to_string(), "endpoint-b".to_string()],
+        });
+
+        let engine = create_engine_dual_input(
+            "request_duration",
+            "HydraKllSketchAccumulator",
+            "DeltaSetAggregator",
+            vec![],                        // grouping_labels: no store GROUP BY
+            vec!["host", "endpoint"],      // aggregated_labels: sub-keys tracked by DeltaSet
+            vec![(None, Box::new(hydra))], // store key = None
+            vec![(None, Box::new(keys))],  // store key = None
+            "quantile(0.5, request_duration) by (host, endpoint)",
+        );
+
+        let results = execute_new_plan(
+            &engine,
+            "quantile(0.5, request_duration) by (host, endpoint)",
+            1000.0,
+        )
+        .await;
+
+        // Should have 2 results: one per (host, endpoint) sub-key
+        assert_eq!(
+            results.len(),
+            2,
+            "HydraKLL dual-input should produce 2 results, got {}",
+            results.len()
+        );
+    }
+
+    // ========================================================================
+    // CountMinSketch + DeltaSetAggregator (frequency dual-input)
+    // ========================================================================
+
+    #[tokio::test]
+    async fn test_dual_count_min_delta_set() {
+        // CountMinSketch: single accumulator, no spatial GROUP BY.
+        // Sub-keys are ["host", "event"] — tracked by DeltaSet, queryable in CMS.
+        let cms = CountMinSketchAccumulator::new(2, 3);
+
+        let mut keys = DeltaSetAggregatorAccumulator::new();
+        keys.add_key(KeyByLabelValues {
+            labels: vec!["host-a".to_string(), "evt-1".to_string()],
+        });
+
+        let engine = create_engine_dual_input(
+            "event_frequency",
+            "CountMinSketchAccumulator",
+            "DeltaSetAggregator",
+            vec![],                       // grouping_labels: no store GROUP BY
+            vec!["host", "event"],        // aggregated_labels: sub-keys tracked by DeltaSet
+            vec![(None, Box::new(cms))],  // store key = None
+            vec![(None, Box::new(keys))], // store key = None
+            "count(event_frequency) by (host, event)",
+        );
+
+        let results =
+            execute_new_plan(&engine, "count(event_frequency) by (host, event)", 1000.0).await;
+
+        // CMS query may return 0 for un-updated keys, but should not error
+        assert!(!results.is_empty(), "CMS dual-input should produce results");
+    }
+
+    // ========================================================================
+    // Self-keyed: multiple accumulators (multiple store entries)
+    // ========================================================================
+
+    #[tokio::test]
+    async fn test_self_keyed_multiple_accumulators() {
+        // Two separate MultipleIncrease accumulators in the store (both with key=None)
+        // acc1 has (host-a, ep-1) and (host-a, ep-2)
+        // acc2 has (host-b, ep-3)
+        let acc1 = make_multi_increase(vec![
+            (vec!["host-a", "ep-1"], 0.0, 10.0),
+            (vec!["host-a", "ep-2"], 0.0, 20.0),
+        ]);
+        let acc2 = make_multi_increase(vec![(vec!["host-b", "ep-3"], 0.0, 30.0)]);
+
+        let engine = create_engine_single_pop_with_aggregated(
+            "requests",
+            "MultipleIncreaseAccumulator",
+            vec![],
+            vec!["host", "endpoint"],
+            vec![(None, Box::new(acc1)), (None, Box::new(acc2))],
+            "increase(requests[10s])",
+        );
+
+        let results = execute_new_plan(&engine, "increase(requests[10s])", 1000.0).await;
+
+        // 2 from acc1 + 1 from acc2 = 3 total
+        assert_eq!(
+            results.len(),
+            3,
+            "Expected 3 results (2 + 1), got {}",
+            results.len()
+        );
+    }
+
+    // ========================================================================
+    // Self-keyed: empty accumulator
+    // ========================================================================
+
+    #[tokio::test]
+    async fn test_self_keyed_empty_keys() {
+        // MultipleIncrease with no sub-keys -> 0 results
+        let empty = MultipleIncreaseAccumulator::new_with_increases(HashMap::new());
+
+        let engine = create_engine_single_pop_with_aggregated(
+            "requests",
+            "MultipleIncreaseAccumulator",
+            vec![],
+            vec!["host", "endpoint"],
+            vec![(None, Box::new(empty))],
+            "increase(requests[10s])",
+        );
+
+        let results = execute_new_plan(&engine, "increase(requests[10s])", 1000.0).await;
+
+        assert!(
+            results.is_empty(),
+            "Empty MultipleIncrease should give 0 results, got {}",
+            results.len()
+        );
+    }
+
+    // ========================================================================
+    // Dual-input: mismatched spatial groups (HydraKLL)
+    // ========================================================================
+
+    #[tokio::test]
+    async fn test_dual_mismatched_spatial_groups() {
+        // No spatial GROUP BY: single HydraKLL and single DeltaSet.
+        // HydraKLL has data for (host-a, ep-1), DeltaSet tracks (host-b, ep-2).
+        // The DeltaSet key doesn't match any data in HydraKLL, so query returns 0/default.
+        let mut hydra = HydraKllSketchAccumulator::new(1, 2, 200);
+        hydra.update(
+            &KeyByLabelValues {
+                labels: vec!["host-a".to_string(), "ep-1".to_string()],
+            },
+            42.0,
+        );
+
+        let mut keys = DeltaSetAggregatorAccumulator::new();
+        keys.add_key(KeyByLabelValues {
+            labels: vec!["host-b".to_string(), "ep-2".to_string()],
+        });
+
+        let engine = create_engine_dual_input(
+            "request_duration",
+            "HydraKllSketchAccumulator",
+            "DeltaSetAggregator",
+            vec![],                   // no store GROUP BY
+            vec!["host", "endpoint"], // aggregated_labels
+            vec![(None, Box::new(hydra))],
+            vec![(None, Box::new(keys))],
+            "quantile(0.5, request_duration) by (host, endpoint)",
+        );
+
+        let results = execute_new_plan(
+            &engine,
+            "quantile(0.5, request_duration) by (host, endpoint)",
+            1000.0,
+        )
+        .await;
+
+        // DeltaSet enumerates (host-b, ep-2) but HydraKLL has no data for that key.
+        // We just verify it doesn't panic.
+        let _ = results;
+    }
+}
diff --git a/QueryEngineRust/src/tests/datafusion/plan_execution_temporal_tests.rs b/QueryEngineRust/src/tests/datafusion/plan_execution_temporal_tests.rs
new file mode 100644
index 0000000..609d747
--- /dev/null
+++ b/QueryEngineRust/src/tests/datafusion/plan_execution_temporal_tests.rs
@@ -0,0 +1,541 @@
+//! Plan Execution Integration Tests — Temporal & Collapsable Queries
+//!
+//! Tests that verify the DataFusion plan-based execution path produces correct
+//! results for temporal queries (sum_over_time, quantile_over_time)
+//! and collapsable queries (spatial of temporal, e.g. sum by () (sum_over_time(...))).
+
+use crate::precompute_operators::sum_accumulator::SumAccumulator;
+use std::collections::HashMap;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::precompute_operators::DatasketchesKLLAccumulator;
+    use crate::tests::test_utilities::engine_factories::*;
+
+    type TemporalData = Vec<(u64, Option<Vec<String>>, Box<dyn crate::AggregateCore>)>;
+
+    // ========================================================================
+    // Helper: build temporal data at 5 timestamps within a [5s] window
+    // Timestamps: 996_000, 997_000, 998_000, 999_000, 1_000_000
+    // Query time: 1000.0 (= 1_000_000 ms)
+    // ========================================================================
+
+    const QUERY_TIME: f64 = 1000.0;
+    const TEMPORAL_TIMESTAMPS: [u64; 5] = [996_000, 997_000, 998_000, 999_000, 1_000_000];
+
+    // ========================================================================
+    // OnlyTemporal Tests
+    // ========================================================================
+
+    #[tokio::test]
+    async fn test_temporal_sum_over_time_merges_across_timestamps() {
+        // Insert SumAccumulator data at 5 timestamps for one label group.
+        // sum_over_time should merge (sum) all values across timestamps.
+        let data: TemporalData = TEMPORAL_TIMESTAMPS
+            .iter()
+            .map(|&ts| {
+                (
+                    ts,
+                    Some(vec!["host-a".to_string()]),
+                    Box::new(SumAccumulator::with_sum(10.0)) as Box<dyn crate::AggregateCore>,
+                )
+            })
+            .collect();
+
+        let query = "sum_over_time(http_requests[5s])";
+        let engine = create_engine_multi_timestamp_with_window(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            data,
+            query,
+            5,
+            "tumbling",
+        );
+
+        let results = execute_new_plan(&engine, query, QUERY_TIME).await;
+        assert_eq!(results.len(), 1, "Expected 1 result for single group");
+        // Merged sum: 10.0 * 5 timestamps = 50.0
+        assert!(
+            (results[0].value - 50.0).abs() < 1e-10,
+            "Expected merged sum 50.0, got {}",
+            results[0].value
+        );
+    }
+
+    #[tokio::test]
+    async fn test_temporal_sum_over_time_single_timestamp() {
+        // Only 1 timestamp in range — should still work.
+        let data: TemporalData = vec![(
+            1_000_000,
+            Some(vec!["host-a".to_string()]),
+            Box::new(SumAccumulator::with_sum(42.0)),
+        )];
+
+        let query = "sum_over_time(http_requests[5s])";
+        let engine = create_engine_multi_timestamp_with_window(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            data,
+            query,
+            5,
+            "tumbling",
+        );
+
+        let results = execute_new_plan(&engine, query, QUERY_TIME).await;
+        assert_eq!(results.len(), 1);
+        assert!(
+            (results[0].value - 42.0).abs() < 1e-10,
+            "Expected 42.0, got {}",
+            results[0].value
+        );
+    }
+
+    #[tokio::test]
+    async fn test_temporal_sum_over_time_varying_values() {
+        // sum_over_time with different values at each timestamp.
+        // Verifies merge sums all values, not just takes latest.
+        let data: TemporalData = TEMPORAL_TIMESTAMPS
+            .iter()
+            .enumerate()
+            .map(|(i, &ts)| {
+                (
+                    ts,
+                    Some(vec!["host-a".to_string()]),
+                    Box::new(SumAccumulator::with_sum((i as f64 + 1.0) * 10.0))
+                        as Box<dyn crate::AggregateCore>,
+                )
+            })
+            .collect();
+
+        let query = "sum_over_time(http_requests[5s])";
+        let engine = create_engine_multi_timestamp_with_window(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            data,
+            query,
+            5,
+            "tumbling",
+        );
+
+        let results = execute_new_plan(&engine, query, QUERY_TIME).await;
+        assert_eq!(results.len(), 1);
+        // 10 + 20 + 30 + 40 + 50 = 150
+        assert!(
+            (results[0].value - 150.0).abs() < 1e-10,
+            "Expected merged sum 150.0, got {}",
+            results[0].value
+        );
+    }
+
+    #[tokio::test]
+    async fn test_temporal_quantile_over_time() {
+        // DatasketchesKLL at multiple timestamps, quantile_over_time(0.5, ...).
+        // Each KLL sketch has different values; merged sketch should give median.
+        let mut data: TemporalData = Vec::new();
+        for (i, &ts) in TEMPORAL_TIMESTAMPS.iter().enumerate() {
+            let mut kll = DatasketchesKLLAccumulator::new(200);
+            // Insert values 10, 20, 30, 40, 50 at successive timestamps
+            kll._update((i as f64 + 1.0) * 10.0);
+            data.push((
+                ts,
+                Some(vec!["host-a".to_string()]),
+                Box::new(kll) as Box<dyn crate::AggregateCore>,
+            ));
+        }
+
+        let query = "quantile_over_time(0.5, latency[5s])";
+        let engine = create_engine_multi_timestamp_with_window(
+            "latency",
+            "DatasketchesKLLAccumulator",
+            vec!["host"],
+            data,
+            query,
+            5,
+            "tumbling",
+        );
+
+        let results = execute_new_plan(&engine, query, QUERY_TIME).await;
+        assert_eq!(results.len(), 1);
+        // Median of {10, 20, 30, 40, 50} = 30.0
+        assert!(
+            (results[0].value - 30.0).abs() < 5.0,
+            "Expected median ~30.0, got {}",
+            results[0].value
+        );
+    }
+
+    #[tokio::test]
+    async fn test_temporal_sum_over_time_multi_group() {
+        // Multiple label groups at multiple timestamps — verify per-group merging.
+        let mut data: TemporalData = Vec::new();
+        for &ts in &TEMPORAL_TIMESTAMPS {
+            data.push((
+                ts,
+                Some(vec!["host-a".to_string()]),
+                Box::new(SumAccumulator::with_sum(10.0)) as Box<dyn crate::AggregateCore>,
+            ));
+            data.push((
+                ts,
+                Some(vec!["host-b".to_string()]),
+                Box::new(SumAccumulator::with_sum(20.0)) as Box<dyn crate::AggregateCore>,
+            ));
+        }
+
+        let query = "sum_over_time(http_requests[5s])";
+        let engine = create_engine_multi_timestamp_with_window(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            data,
+            query,
+            5,
+            "tumbling",
+        );
+
+        let results = execute_new_plan(&engine, query, QUERY_TIME).await;
+        assert_eq!(results.len(), 2, "Expected 2 groups");
+
+        let result_map: HashMap<String, f64> = results
+            .iter()
+            .map(|r| (r.labels.labels[0].clone(), r.value))
+            .collect();
+
+        assert!(
+            (result_map["host-a"] - 50.0).abs() < 1e-10,
+            "host-a: expected 50.0, got {}",
+            result_map["host-a"]
+        );
+        assert!(
+            (result_map["host-b"] - 100.0).abs() < 1e-10,
+            "host-b: expected 100.0, got {}",
+            result_map["host-b"]
+        );
+    }
+
+    #[tokio::test]
+    async fn test_temporal_sum_over_time_empty_store() {
+        let query = "sum_over_time(http_requests[5s])";
+        let engine = create_engine_multi_timestamp_with_window(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            vec![],
+            query,
+            5,
+            "tumbling",
+        );
+
+        let results = execute_new_plan(&engine, query, QUERY_TIME).await;
+        assert!(
+            results.is_empty(),
+            "Empty store should return 0 results, got {}",
+            results.len()
+        );
+    }
+
+    #[tokio::test]
+    async fn test_temporal_context_has_do_merge_true() {
+        let query = "sum_over_time(http_requests[5s])";
+        let engine = create_engine_multi_timestamp_with_window(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            vec![(
+                1_000_000,
+                Some(vec!["host-a".to_string()]),
+                Box::new(SumAccumulator::with_sum(1.0)),
+            )],
+            query,
+            5,
+            "tumbling",
+        );
+
+        let context = engine
+            .build_query_execution_context_promql(query.to_string(), QUERY_TIME)
+            .expect("Failed to build context");
+
+        assert!(context.do_merge, "Temporal queries must have do_merge=true");
+    }
+
+    #[tokio::test]
+    async fn test_temporal_context_has_correct_time_range() {
+        let query = "sum_over_time(http_requests[5s])";
+        let engine = create_engine_multi_timestamp_with_window(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            vec![(
+                1_000_000,
+                Some(vec!["host-a".to_string()]),
+                Box::new(SumAccumulator::with_sum(1.0)),
+            )],
+            query,
+            5,
+            "tumbling",
+        );
+
+        let context = engine
+            .build_query_execution_context_promql(query.to_string(), QUERY_TIME)
+            .expect("Failed to build context");
+
+        // query_time = 1000.0 -> 1_000_000 ms
+        assert_eq!(context.query_time, 1_000_000);
+        // For [5s] range: start = end - 5000 = 995_000
+        let start = context.store_plan.values_query.start_timestamp;
+        let end = context.store_plan.values_query.end_timestamp;
+        assert_eq!(end, 1_000_000, "End timestamp should be 1_000_000");
+        assert_eq!(
+            start, 995_000,
+            "Start timestamp should be 995_000 for [5s] range"
+        );
+    }
+
+    // ========================================================================
+    // Collapsable Tests (spatial of temporal)
+    // ========================================================================
+
+    #[tokio::test]
+    async fn test_collapsable_sum_of_sum_over_time() {
+        // sum by (host) (sum_over_time(metric[5s]))
+        // Multiple hosts at multiple timestamps.
+        let mut data: TemporalData = Vec::new();
+        for &ts in &TEMPORAL_TIMESTAMPS {
+            data.push((
+                ts,
+                Some(vec!["host-a".to_string()]),
+                Box::new(SumAccumulator::with_sum(10.0)) as Box<dyn crate::AggregateCore>,
+            ));
+            data.push((
+                ts,
+                Some(vec!["host-b".to_string()]),
+                Box::new(SumAccumulator::with_sum(20.0)) as Box<dyn crate::AggregateCore>,
+            ));
+        }
+
+        let query = "sum by (host) (sum_over_time(http_requests[5s]))";
+        let engine = create_engine_multi_timestamp_with_window(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            data,
+            query,
+            5,
+            "tumbling",
+        );
+
+        let results = execute_new_plan(&engine, query, QUERY_TIME).await;
+        assert_eq!(results.len(), 2, "Expected 2 groups (host-a, host-b)");
+
+        let result_map: HashMap<String, f64> = results
+            .iter()
+            .map(|r| (r.labels.labels[0].clone(), r.value))
+            .collect();
+
+        // host-a: 10.0 * 5 timestamps = 50.0
+        assert!(
+            (result_map["host-a"] - 50.0).abs() < 1e-10,
+            "host-a: expected 50.0, got {}",
+            result_map["host-a"]
+        );
+        // host-b: 20.0 * 5 timestamps = 100.0
+        assert!(
+            (result_map["host-b"] - 100.0).abs() < 1e-10,
+            "host-b: expected 100.0, got {}",
+            result_map["host-b"]
+        );
+    }
+
+    #[tokio::test]
+    async fn test_collapsable_sum_of_sum_over_time_varying_values() {
+        // sum by (host) (sum_over_time(metric[5s])) with varying values per timestamp
+        let mut data: TemporalData = Vec::new();
+        for (i, &ts) in TEMPORAL_TIMESTAMPS.iter().enumerate() {
+            data.push((
+                ts,
+                Some(vec!["host-a".to_string()]),
+                Box::new(SumAccumulator::with_sum((i as f64 + 1.0) * 10.0))
+                    as Box<dyn crate::AggregateCore>,
+            ));
+            data.push((
+                ts,
+                Some(vec!["host-b".to_string()]),
+                Box::new(SumAccumulator::with_sum((i as f64 + 1.0) * 5.0))
+                    as Box<dyn crate::AggregateCore>,
+            ));
+        }
+
+        let query = "sum by (host) (sum_over_time(http_requests[5s]))";
+        let engine = create_engine_multi_timestamp_with_window(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            data,
+            query,
+            5,
+            "tumbling",
+        );
+
+        let results = execute_new_plan(&engine, query, QUERY_TIME).await;
+        assert_eq!(results.len(), 2, "Expected 2 groups");
+
+        let result_map: HashMap<String, f64> = results
+            .iter()
+            .map(|r| (r.labels.labels[0].clone(), r.value))
+            .collect();
+
+        // host-a: 10 + 20 + 30 + 40 + 50 = 150.0
+        assert!(
+            (result_map["host-a"] - 150.0).abs() < 1e-10,
+            "host-a: expected 150.0, got {}",
+            result_map["host-a"]
+        );
+        // host-b: 5 + 10 + 15 + 20 + 25 = 75.0
+        assert!(
+            (result_map["host-b"] - 75.0).abs() < 1e-10,
+            "host-b: expected 75.0, got {}",
+            result_map["host-b"]
+        );
+    }
+
+    #[tokio::test]
+    async fn test_collapsable_context_has_do_merge_true() {
+        let query = "sum by (host) (sum_over_time(http_requests[5s]))";
+        let engine = create_engine_multi_timestamp_with_window(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            vec![(
+                1_000_000,
+                Some(vec!["host-a".to_string()]),
+                Box::new(SumAccumulator::with_sum(1.0)),
+            )],
+            query,
+            5,
+            "tumbling",
+        );
+
+        let context = engine
+            .build_query_execution_context_promql(query.to_string(), QUERY_TIME)
+            .expect("Failed to build context");
+
+        assert!(
+            context.do_merge,
+            "Collapsable (OneTemporalOneSpatial) queries must have do_merge=true"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_collapsable_output_labels_are_spatial() {
+        // Verify output labels are the spatial GROUP BY labels (host), not all labels.
+        let query = "sum by (host) (sum_over_time(http_requests[5s]))";
+        let engine = create_engine_multi_timestamp_with_window(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            vec![(
+                1_000_000,
+                Some(vec!["host-a".to_string()]),
+                Box::new(SumAccumulator::with_sum(1.0)),
+            )],
+            query,
+            5,
+            "tumbling",
+        );
+
+        let context = engine
+            .build_query_execution_context_promql(query.to_string(), QUERY_TIME)
+            .expect("Failed to build context");
+
+        assert_eq!(
+            context.metadata.query_output_labels.labels,
+            vec!["host".to_string()],
+            "Collapsable query output labels should be spatial GROUP BY labels"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_collapsable_empty_store() {
+        let query = "sum by (host) (sum_over_time(http_requests[5s]))";
+        let engine = create_engine_multi_timestamp_with_window(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            vec![],
+            query,
+            5,
+            "tumbling",
+        );
+
+        let results = execute_new_plan(&engine, query, QUERY_TIME).await;
+        assert!(
+            results.is_empty(),
+            "Empty store should return 0 results, got {}",
+            results.len()
+        );
+    }
+
+    // ========================================================================
+    // Old-vs-New comparison tests for temporal queries
+    // ========================================================================
+
+    #[tokio::test]
+    async fn test_temporal_sum_over_time_old_vs_new() {
+        let data: TemporalData = TEMPORAL_TIMESTAMPS
+            .iter()
+            .map(|&ts| {
+                (
+                    ts,
+                    Some(vec!["host-a".to_string()]),
+                    Box::new(SumAccumulator::with_sum(10.0)) as Box<dyn crate::AggregateCore>,
+                )
+            })
+            .collect();
+
+        let query = "sum_over_time(http_requests[5s])";
+        let engine = create_engine_multi_timestamp_with_window(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            data,
+            query,
+            5,
+            "tumbling",
+        );
+
+        assert_old_new_match(&engine, query, QUERY_TIME).await;
+    }
+
+    #[tokio::test]
+    async fn test_collapsable_sum_of_sum_over_time_old_vs_new() {
+        let mut data: TemporalData = Vec::new();
+        for &ts in &TEMPORAL_TIMESTAMPS {
+            data.push((
+                ts,
+                Some(vec!["host-a".to_string()]),
+                Box::new(SumAccumulator::with_sum(10.0)) as Box<dyn crate::AggregateCore>,
+            ));
+            data.push((
+                ts,
+                Some(vec!["host-b".to_string()]),
+                Box::new(SumAccumulator::with_sum(20.0)) as Box<dyn crate::AggregateCore>,
+            ));
+        }
+
+        let query = "sum by (host) (sum_over_time(http_requests[5s]))";
+        let engine = create_engine_multi_timestamp_with_window(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            data,
+            query,
+            5,
+            "tumbling",
+        );
+
+        assert_old_new_match(&engine, query, QUERY_TIME).await;
+    }
+}
diff --git a/QueryEngineRust/src/tests/datafusion/plan_execution_tests.rs b/QueryEngineRust/src/tests/datafusion/plan_execution_tests.rs
new file mode 100644
index 0000000..3431d06
--- /dev/null
+++ b/QueryEngineRust/src/tests/datafusion/plan_execution_tests.rs
@@ -0,0 +1,587 @@
+//! Plan Execution Integration Tests
+//!
+//! Tests that verify the new DataFusion plan-based execution path
+//! produces correct results for spatial queries.
+//!
+//! These tests use an actual store with test data.
+
+use crate::data_model::{KeyByLabelValues, Measurement};
+use crate::engines::simple_engine::SimpleEngine;
+use crate::precompute_operators::sum_accumulator::SumAccumulator;
+use std::collections::HashMap;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::precompute_operators::{
+        DatasketchesKLLAccumulator, IncreaseAccumulator, MinMaxAccumulator,
+        MultipleMinMaxAccumulator, MultipleSumAccumulator, SetAggregatorAccumulator,
+    };
+    use crate::tests::test_utilities::engine_factories::*;
+
+    /// Creates a test engine and store with sample data for spatial sum queries
+    fn create_test_engine_with_data() -> SimpleEngine {
+        create_engine_single_pop(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            vec![
+                (
+                    Some(vec!["host-a".to_string()]),
+                    Box::new(SumAccumulator::with_sum(100.0)),
+                ),
+                (
+                    Some(vec!["host-b".to_string()]),
+                    Box::new(SumAccumulator::with_sum(200.0)),
+                ),
+            ],
+            "sum(http_requests) by (host)",
+        )
+    }
+
+    #[test]
+    fn test_to_logical_plan_produces_valid_structure() {
+        let engine = create_test_engine_with_data();
+
+        let query_time_sec = 1000.0;
+        let context = engine
+            .build_query_execution_context_promql(
+                "sum(http_requests) by (host)".to_string(),
+                query_time_sec,
+            )
+            .expect("Failed to build context");
+
+        let plan = context.to_logical_plan();
+        assert!(
+            plan.is_ok(),
+            "Failed to build logical plan: {:?}",
+            plan.err()
+        );
+
+        let plan = plan.unwrap();
+        match &plan {
+            datafusion::logical_expr::LogicalPlan::Extension(ext) => {
+                assert_eq!(
+                    ext.node.name(),
+                    "SummaryInfer",
+                    "Root should be SummaryInfer"
+                );
+            }
+            _ => panic!("Expected Extension node at root, got {:?}", plan),
+        }
+    }
+
+    #[tokio::test]
+    async fn test_execute_plan_returns_results() {
+        let engine = create_test_engine_with_data();
+
+        let query_time_sec = 1000.0;
+        let context = engine
+            .build_query_execution_context_promql(
+                "sum(http_requests) by (host)".to_string(),
+                query_time_sec,
+            )
+            .expect("Failed to build context");
+
+        let result = engine.execute_plan(&context).await;
+        assert!(result.is_ok(), "execute_plan failed: {:?}", result.err());
+
+        let results = result.unwrap();
+        assert_eq!(
+            results.len(),
+            2,
+            "Expected 2 results, got {}",
+            results.len()
+        );
+
+        let values: Vec<f64> = results.iter().map(|r| r.value).collect();
+        assert!(
+            values.contains(&100.0) && values.contains(&200.0),
+            "Expected values [100.0, 200.0], got {:?}",
+            values
+        );
+    }
+
+    #[tokio::test]
+    async fn test_execute_plan_correct_labels() {
+        let engine = create_test_engine_with_data();
+
+        let query_time_sec = 1000.0;
+        let context = engine
+            .build_query_execution_context_promql(
+                "sum(http_requests) by (host)".to_string(),
+                query_time_sec,
+            )
+            .expect("Failed to build context");
+
+        let results = engine
+            .execute_plan(&context)
+            .await
+            .expect("execute_plan failed");
+
+        let result_map: HashMap<String, f64> = results
+            .iter()
+            .map(|r| (r.labels.labels[0].clone(), r.value))
+            .collect();
+
+        assert_eq!(result_map.get("host-a"), Some(&100.0));
+        assert_eq!(result_map.get("host-b"), Some(&200.0));
+    }
+
+    #[tokio::test]
+    async fn test_execute_plan_multiple_timestamps() {
+        let engine = create_engine_multi_timestamp(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            vec![
+                (
+                    999_000,
+                    Some(vec!["host-a".to_string()]),
+                    Box::new(SumAccumulator::with_sum(50.0)),
+                ),
+                (
+                    1_000_000,
+                    Some(vec!["host-a".to_string()]),
+                    Box::new(SumAccumulator::with_sum(50.0)),
+                ),
+            ],
+            "sum(http_requests) by (host)",
+        );
+
+        let context = engine
+            .build_query_execution_context_promql(
+                "sum(http_requests) by (host)".to_string(),
+                1000.0,
+            )
+            .expect("Failed to build context");
+
+        let results = engine
+            .execute_plan(&context)
+            .await
+            .expect("execute_plan failed");
+
+        assert_eq!(results.len(), 1, "Expected 1 result");
+        assert_eq!(results[0].labels.labels[0], "host-a");
+        assert_eq!(
+            results[0].value, 50.0,
+            "Spatial-only queries use latest timestamp only"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_execute_plan_matches_old_pipeline() {
+        let engine = create_test_engine_with_data();
+        assert_old_new_match(&engine, "sum(http_requests) by (host)", 1000.0).await;
+    }
+
+    // ========================================================================
+    // Category 1 - Old-vs-New Comparison for more accumulator types
+    // ========================================================================
+
+    #[tokio::test]
+    async fn test_old_vs_new_kll_quantile() {
+        let mut kll_a = DatasketchesKLLAccumulator::new(200);
+        for v in [10.0, 20.0, 30.0, 40.0, 50.0] {
+            kll_a._update(v);
+        }
+        let mut kll_b = DatasketchesKLLAccumulator::new(200);
+        for v in [100.0, 200.0, 300.0] {
+            kll_b._update(v);
+        }
+
+        let engine = create_engine_single_pop(
+            "latency",
+            "DatasketchesKLLAccumulator",
+            vec!["host"],
+            vec![
+                (Some(vec!["host-a".to_string()]), Box::new(kll_a)),
+                (Some(vec!["host-b".to_string()]), Box::new(kll_b)),
+            ],
+            "quantile(0.5, latency) by (host)",
+        );
+
+        assert_old_new_match(&engine, "quantile(0.5, latency) by (host)", 1000.0).await;
+    }
+
+    #[tokio::test]
+    async fn test_old_vs_new_kll_quantile_p99() {
+        let mut kll = DatasketchesKLLAccumulator::new(200);
+        for v in 1..=100 {
+            kll._update(v as f64);
+        }
+
+        let engine = create_engine_single_pop(
+            "latency",
+            "DatasketchesKLLAccumulator",
+            vec!["host"],
+            vec![(Some(vec!["host-a".to_string()]), Box::new(kll))],
+            "quantile(0.99, latency) by (host)",
+        );
+
+        assert_old_new_match(&engine, "quantile(0.99, latency) by (host)", 1000.0).await;
+    }
+
+    #[tokio::test]
+    async fn test_old_vs_new_kll_quantile_p0() {
+        let mut kll = DatasketchesKLLAccumulator::new(200);
+        for v in [5.0, 10.0, 15.0, 20.0, 25.0] {
+            kll._update(v);
+        }
+
+        let engine = create_engine_single_pop(
+            "latency",
+            "DatasketchesKLLAccumulator",
+            vec!["host"],
+            vec![(Some(vec!["host-a".to_string()]), Box::new(kll))],
+            "quantile(0.0, latency) by (host)",
+        );
+
+        assert_old_new_match(&engine, "quantile(0.0, latency) by (host)", 1000.0).await;
+    }
+
+    #[tokio::test]
+    async fn test_old_vs_new_kll_quantile_p1() {
+        let mut kll = DatasketchesKLLAccumulator::new(200);
+        for v in [5.0, 10.0, 15.0, 20.0, 25.0] {
+            kll._update(v);
+        }
+
+        let engine = create_engine_single_pop(
+            "latency",
+            "DatasketchesKLLAccumulator",
+            vec!["host"],
+            vec![(Some(vec!["host-a".to_string()]), Box::new(kll))],
+            "quantile(1.0, latency) by (host)",
+        );
+
+        assert_old_new_match(&engine, "quantile(1.0, latency) by (host)", 1000.0).await;
+    }
+
+    #[tokio::test]
+    async fn test_old_vs_new_kll_quantile_p25() {
+        let mut kll = DatasketchesKLLAccumulator::new(200);
+        for v in 1..=1000 {
+            kll._update(v as f64);
+        }
+
+        let engine = create_engine_single_pop(
+            "latency",
+            "DatasketchesKLLAccumulator",
+            vec!["host"],
+            vec![(Some(vec!["host-a".to_string()]), Box::new(kll))],
+            "quantile(0.25, latency) by (host)",
+        );
+
+        assert_old_new_match(&engine, "quantile(0.25, latency) by (host)", 1000.0).await;
+    }
+
+    #[tokio::test]
+    #[ignore = "Blocked: SetAggregatorAccumulator does not support old pipeline query"]
+    async fn test_old_vs_new_set_aggregator_cardinality() {
+        let mut set_a = SetAggregatorAccumulator::new();
+        set_a.add_key(KeyByLabelValues {
+            labels: vec!["user-1".to_string()],
+        });
+        set_a.add_key(KeyByLabelValues {
+            labels: vec!["user-2".to_string()],
+        });
+        let mut set_b = SetAggregatorAccumulator::new();
+        set_b.add_key(KeyByLabelValues {
+            labels: vec!["user-3".to_string()],
+        });
+
+        let engine = create_engine_single_pop(
+            "active_users",
+            "SetAggregator",
+            vec!["host"],
+            vec![
+                (Some(vec!["host-a".to_string()]), Box::new(set_a)),
+                (Some(vec!["host-b".to_string()]), Box::new(set_b)),
+            ],
+            "count(active_users) by (host)",
+        );
+
+        assert_old_new_match(&engine, "count(active_users) by (host)", 1000.0).await;
+    }
+
+    #[tokio::test]
+    #[ignore = "Blocked: IncreaseAccumulator has no arroyo serde"]
+    async fn test_old_vs_new_increase() {
+        let inc_a = IncreaseAccumulator::new(Measurement::new(0.0), 0, Measurement::new(100.0), 10);
+        let inc_b = IncreaseAccumulator::new(Measurement::new(0.0), 0, Measurement::new(200.0), 10);
+
+        let engine = create_engine_single_pop(
+            "http_requests_total",
+            "IncreaseAccumulator",
+            vec!["host"],
+            vec![
+                (Some(vec!["host-a".to_string()]), Box::new(inc_a)),
+                (Some(vec!["host-b".to_string()]), Box::new(inc_b)),
+            ],
+            "sum(increase(http_requests_total[10s])) by (host)",
+        );
+
+        assert_old_new_match(
+            &engine,
+            "sum(increase(http_requests_total[10s])) by (host)",
+            1000.0,
+        )
+        .await;
+    }
+
+    #[tokio::test]
+    #[ignore = "Blocked: MinMaxAccumulator has no arroyo serde"]
+    async fn test_old_vs_new_minmax_min() {
+        let mm_a = MinMaxAccumulator::with_value(10.0, "min".to_string());
+        let mm_b = MinMaxAccumulator::with_value(5.0, "min".to_string());
+
+        let engine = create_engine_single_pop(
+            "temperature",
+            "MinMaxAccumulator",
+            vec!["host"],
+            vec![
+                (Some(vec!["host-a".to_string()]), Box::new(mm_a)),
+                (Some(vec!["host-b".to_string()]), Box::new(mm_b)),
+            ],
+            "min(temperature) by (host)",
+        );
+
+        assert_old_new_match(&engine, "min(temperature) by (host)", 1000.0).await;
+    }
+
+    #[tokio::test]
+    #[ignore = "Blocked: MinMaxAccumulator has no arroyo serde"]
+    async fn test_old_vs_new_minmax_max() {
+        let mm_a = MinMaxAccumulator::with_value(90.0, "max".to_string());
+        let mm_b = MinMaxAccumulator::with_value(95.0, "max".to_string());
+
+        let engine = create_engine_single_pop(
+            "temperature",
+            "MinMaxAccumulator",
+            vec!["host"],
+            vec![
+                (Some(vec!["host-a".to_string()]), Box::new(mm_a)),
+                (Some(vec!["host-b".to_string()]), Box::new(mm_b)),
+            ],
+            "max(temperature) by (host)",
+        );
+
+        assert_old_new_match(&engine, "max(temperature) by (host)", 1000.0).await;
+    }
+
+    #[tokio::test]
+    async fn test_old_vs_new_multiple_sum() {
+        let mut ms = MultipleSumAccumulator::new();
+        let key = KeyByLabelValues {
+            labels: vec!["host-a".to_string()],
+        };
+        ms.update(key, 42.0);
+
+        let engine = create_engine_single_pop(
+            "requests",
+            "MultipleSumAccumulator",
+            vec!["host"],
+            vec![(Some(vec!["host-a".to_string()]), Box::new(ms))],
+            "sum(requests) by (host)",
+        );
+
+        assert_old_new_match(&engine, "sum(requests) by (host)", 1000.0).await;
+    }
+
+    #[tokio::test]
+    #[ignore = "Blocked: MultipleMinMaxAccumulator has no arroyo serde"]
+    async fn test_old_vs_new_multiple_minmax() {
+        let mm = MultipleMinMaxAccumulator::new("min".to_string());
+
+        let engine = create_engine_single_pop(
+            "latency",
+            "MultipleMinMaxAccumulator",
+            vec!["host"],
+            vec![(Some(vec!["host-a".to_string()]), Box::new(mm))],
+            "min(latency) by (host)",
+        );
+
+        assert_old_new_match(&engine, "min(latency) by (host)", 1000.0).await;
+    }
+
+    // ========================================================================
+    // Category 3 - Edge Cases
+    // ========================================================================
+
+    #[tokio::test]
+    async fn test_execute_plan_empty_store() {
+        let engine = create_engine_single_pop(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            vec![], // No data
+            "sum(http_requests) by (host)",
+        );
+
+        let results = execute_new_plan(&engine, "sum(http_requests) by (host)", 1000.0).await;
+        assert!(
+            results.is_empty(),
+            "Empty store should return 0 results, got {}",
+            results.len()
+        );
+    }
+
+    #[tokio::test]
+    async fn test_execute_plan_many_groups() {
+        #[allow(clippy::type_complexity)]
+        let data: Vec<(Option<Vec<String>>, Box<dyn crate::AggregateCore>)> = (0..100)
+            .map(|i| {
+                (
+                    Some(vec![format!("host-{:03}", i)]),
+                    Box::new(SumAccumulator::with_sum(i as f64)) as Box<dyn crate::AggregateCore>,
+                )
+            })
+            .collect();
+
+        let engine = create_engine_single_pop(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            data,
+            "sum(http_requests) by (host)",
+        );
+
+        let results = execute_new_plan(&engine, "sum(http_requests) by (host)", 1000.0).await;
+        assert_eq!(
+            results.len(),
+            100,
+            "Expected 100 results, got {}",
+            results.len()
+        );
+    }
+
+    #[tokio::test]
+    async fn test_execute_plan_multi_timestamp_multi_group() {
+        // 3 timestamps x 3 groups; spatial-only uses latest timestamp only -> 3 results
+        let mut data = Vec::new();
+        for ts in [998_000u64, 999_000, 1_000_000] {
+            for i in 0..3 {
+                data.push((
+                    ts,
+                    Some(vec![format!("host-{}", i)]),
+                    Box::new(SumAccumulator::with_sum(10.0)) as Box<dyn crate::AggregateCore>,
+                ));
+            }
+        }
+
+        let engine = create_engine_multi_timestamp(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            data,
+            "sum(http_requests) by (host)",
+        );
+
+        let results = execute_new_plan(&engine, "sum(http_requests) by (host)", 1000.0).await;
+        assert_eq!(
+            results.len(),
+            3,
+            "3 groups merged across 3 timestamps should give 3 results"
+        );
+        // Spatial-only queries use latest timestamp only, so each group = 10.0
+        for r in &results {
+            assert!(
+                (r.value - 10.0).abs() < 1e-10,
+                "Each group should be 10.0 (latest timestamp only), got {}",
+                r.value
+            );
+        }
+    }
+
+    #[tokio::test]
+    async fn test_execute_plan_single_group_many_timestamps() {
+        // Spatial-only queries only consider the latest timestamp (query time).
+        // Include data at the query time (1_000_000) plus older timestamps.
+        #[allow(clippy::type_complexity)]
+        let mut data: Vec<(u64, Option<Vec<String>>, Box<dyn crate::AggregateCore>)> = (0..9)
+            .map(|i| {
+                (
+                    (991_000 + i * 1000) as u64,
+                    Some(vec!["host-a".to_string()]),
+                    Box::new(SumAccumulator::with_sum(5.0)) as Box<dyn crate::AggregateCore>,
+                )
+            })
+            .collect();
+        data.push((
+            1_000_000,
+            Some(vec!["host-a".to_string()]),
+            Box::new(SumAccumulator::with_sum(5.0)) as Box<dyn crate::AggregateCore>,
+        ));
+
+        let engine = create_engine_multi_timestamp(
+            "http_requests",
+            "SumAccumulator",
+            vec!["host"],
+            data,
+            "sum(http_requests) by (host)",
+        );
+
+        let results = execute_new_plan(&engine, "sum(http_requests) by (host)", 1000.0).await;
+        assert_eq!(results.len(), 1, "Single group should give 1 result");
+        assert!(
+            (results[0].value - 5.0).abs() < 1e-10,
+            "Spatial-only uses latest timestamp only, expected 5.0, got {}",
+            results[0].value
+        );
+    }
+
+    // ========================================================================
+    // Category 8 - Error Paths
+    // ========================================================================
+
+    #[tokio::test]
+    async fn test_execute_plan_not_implemented_increase() {
+        // IncreaseAccumulator has no arroyo serde, so execute_plan should fail
+        let inc = IncreaseAccumulator::new(Measurement::new(0.0), 0, Measurement::new(100.0), 10);
+
+        let engine = create_engine_single_pop(
+            "http_requests_total",
+            "IncreaseAccumulator",
+            vec!["host"],
+            vec![(Some(vec!["host-a".to_string()]), Box::new(inc))],
+            "sum(increase(http_requests_total[10s])) by (host)",
+        );
+
+        let context = engine
+            .build_query_execution_context_promql(
+                "sum(increase(http_requests_total[10s])) by (host)".to_string(),
+                1000.0,
+            )
+            .expect("Should build context");
+
+        let result = engine.execute_plan(&context).await;
+        assert!(
+            result.is_err(),
+            "execute_plan should fail for IncreaseAccumulator (no arroyo serde)"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_execute_plan_not_implemented_minmax() {
+        let mm = MinMaxAccumulator::with_value(42.0, "min".to_string());
+
+        let engine = create_engine_single_pop(
+            "temperature",
+            "MinMaxAccumulator",
+            vec!["host"],
+            vec![(Some(vec!["host-a".to_string()]), Box::new(mm))],
+            "min(temperature) by (host)",
+        );
+
+        let context = engine
+            .build_query_execution_context_promql("min(temperature) by (host)".to_string(), 1000.0)
+            .expect("Should build context");
+
+        let result = engine.execute_plan(&context).await;
+        assert!(
+            result.is_err(),
+            "execute_plan should fail for MinMaxAccumulator (no arroyo serde)"
+        );
+    }
+}
diff --git a/QueryEngineRust/src/tests/elastic_forwarding_tests.rs b/QueryEngineRust/src/tests/elastic_forwarding_tests.rs
new file mode 100644
index 0000000..c06a9d3
--- /dev/null
+++ b/QueryEngineRust/src/tests/elastic_forwarding_tests.rs
@@ -0,0 +1,605 @@
+#[cfg(test)]
+use crate::data_model::{CleanupPolicy, InferenceConfig, QueryLanguage, StreamingConfig};
+use crate::drivers::query::adapters::AdapterConfig;
+use crate::drivers::query::servers::http::{HttpServer, HttpServerConfig};
+use crate::engines::SimpleEngine;
+use crate::stores::simple_map_store::SimpleMapStore;
+use reqwest::Client;
+use serde_json::{json, Value};
+use std::sync::Arc;
+use tokio::net::TcpListener;
+use tokio::time::{sleep, Duration};
+
+/// Mock Elasticsearch server for testing Query DSL
+async fn start_mock_elasticsearch_server(port: u16) -> Result<(), Box<dyn std::error::Error>> {
+    use axum::{http::StatusCode, response::Json, routing::post, Router};
+
+    async fn mock_handler(Json(body): Json<Value>) -> (StatusCode, Json<Value>) {
+        // Simulate different query scenarios based on body content
+        let body_str = serde_json::to_string(&body).unwrap_or_default();
+
+        if body_str.contains("error_query") {
+            (
+                StatusCode::BAD_REQUEST,
+                Json(json!({
+                    "error": {
+                        "type": "parsing_exception",
+                        "reason": "Unknown query type"
+                    },
+                    "status": 400
+                })),
+            )
+        } else {
+            (
+                StatusCode::OK,
+                Json(json!({
+                    "took": 5,
+                    "timed_out": false,
+                    "hits": {
+                        "total": {"value": 1, "relation": "eq"},
+                        "hits": [{
+                            "_index": "test_index",
+                            "_id": "1",
+                            "_source": {"field1": "value1"}
+                        }]
+                    }
+                })),
+            )
+        }
+    }
+
+    let app = Router::new()
+        .route("/_search", post(mock_handler))
+        .route("/:index/_search", post(mock_handler));
+
+    let listener = TcpListener::bind(format!("127.0.0.1:{port}")).await?;
+
+    tokio::spawn(async move {
+        axum::serve(listener, app).await.unwrap();
+    });
+
+    // Give the server time to start
+    sleep(Duration::from_millis(100)).await;
+    Ok(())
+}
+
+/// Mock Elasticsearch SQL server for testing SQL queries
+async fn start_mock_elasticsearch_sql_server(port: u16) -> Result<(), Box<dyn std::error::Error>> {
+    use axum::{http::StatusCode, response::Json, routing::post, Router};
+
+    async fn mock_sql_handler(Json(body): Json<Value>) -> (StatusCode, Json<Value>) {
+        let body_str = serde_json::to_string(&body).unwrap_or_default();
+
+        if body_str.contains("error_query") {
+            (
+                StatusCode::BAD_REQUEST,
+                Json(json!({
+                    "error": {
+                        "type": "parsing_exception",
+                        "reason": "Invalid SQL syntax"
+                    },
+                    "status": 400
+                })),
+            )
+        } else {
+            (
+                StatusCode::OK,
+                Json(json!({
+                    "columns": [
+                        {"name": "field1", "type": "text"},
+                        {"name": "count", "type": "long"}
+                    ],
+                    "rows": [
+                        ["value1", 100],
+                        ["value2", 200]
+                    ]
+                })),
+            )
+        }
+    }
+
+    let app = Router::new().route("/_sql", post(mock_sql_handler));
+
+    let listener = TcpListener::bind(format!("127.0.0.1:{port}")).await?;
+
+    tokio::spawn(async move {
+        axum::serve(listener, app).await.unwrap();
+    });
+
+    sleep(Duration::from_millis(100)).await;
+    Ok(())
+}
+
+async fn setup_test_server(elasticsearch_port: u16, index: &str) -> (HttpServer, u16) {
+    let config = HttpServerConfig {
+        port: 0, // Use random port
+        handle_http_requests: true,
+        adapter_config: AdapterConfig::elastic_querydsl(
+            format!("http://127.0.0.1:{elasticsearch_port}"),
+            index.to_string(),
+            true, // Always forward for now
+        ),
+    };
+
+    let inference_config =
+        InferenceConfig::new(QueryLanguage::elastic_querydsl, CleanupPolicy::NoCleanup);
+    let streaming_config = Arc::new(StreamingConfig::default());
+    let store = Arc::new(SimpleMapStore::new(
+        streaming_config.clone(),
+        CleanupPolicy::NoCleanup,
+    ));
+    let query_engine = Arc::new(SimpleEngine::new(
+        store.clone(),
+        // None,
+        inference_config,
+        streaming_config.clone(),
+        15000, // 15s scrape interval
+        QueryLanguage::elastic_querydsl,
+    ));
+
+    let server = HttpServer::new(config, query_engine, store);
+    let actual_port = server
+        .start_test_server()
+        .await
+        .expect("Failed to start test server");
+
+    (server, actual_port)
+}
+
+async fn setup_test_server_sql(elasticsearch_port: u16, index: &str) -> (HttpServer, u16) {
+    let config = HttpServerConfig {
+        port: 0,
+        handle_http_requests: true,
+        adapter_config: AdapterConfig::elastic_sql(
+            format!("http://127.0.0.1:{elasticsearch_port}"),
+            index.to_string(),
+            true, // Always forward for now
+        ),
+    };
+
+    let inference_config =
+        InferenceConfig::new(QueryLanguage::elastic_sql, CleanupPolicy::NoCleanup);
+    let streaming_config = Arc::new(StreamingConfig::default());
+    let store = Arc::new(SimpleMapStore::new(
+        streaming_config.clone(),
+        CleanupPolicy::NoCleanup,
+    ));
+    let query_engine = Arc::new(SimpleEngine::new(
+        store.clone(),
+        // None,
+        inference_config,
+        streaming_config.clone(),
+        15000,
+        QueryLanguage::elastic_sql,
+    ));
+
+    let server = HttpServer::new(config, query_engine, store);
+    let actual_port = server
+        .start_test_server()
+        .await
+        .expect("Failed to start test server");
+
+    (server, actual_port)
+}
+
+/// Test 1: Full forwarding flow with mock Elasticsearch
+#[tokio::test]
+async fn test_elasticsearch_forwarding_search() {
+    // Start mock Elasticsearch server
+    let elasticsearch_port = 19200;
+    start_mock_elasticsearch_server(elasticsearch_port)
+        .await
+        .unwrap();
+
+    // Start our HTTP server with Elasticsearch adapter and forwarding enabled
+    let (_server, server_port) = setup_test_server(elasticsearch_port, "test_index").await;
+
+    let client = Client::new();
+
+    // Test forwarding of search query
+    let search_body = json!({
+        "query": {
+            "match_all": {}
+        }
+    });
+
+    let response = client
+        .post(format!("http://127.0.0.1:{server_port}/_search"))
+        .json(&search_body)
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    assert_eq!(response.status(), reqwest::StatusCode::OK);
+
+    let json_response: Value = response.json().await.expect("Failed to read response");
+
+    // Verify Elasticsearch response structure
+    assert!(json_response.get("hits").is_some());
+    assert_eq!(json_response["hits"]["total"]["value"], 1);
+    assert_eq!(
+        json_response["hits"]["hits"][0]["_source"]["field1"],
+        "value1"
+    );
+}
+
+/// Test 2: Error handling from Elasticsearch
+#[tokio::test]
+async fn test_error_handling() {
+    // Start mock Elasticsearch server
+    let elasticsearch_port = 19201;
+    start_mock_elasticsearch_server(elasticsearch_port)
+        .await
+        .unwrap();
+
+    // Start our HTTP server
+    let (_server, server_port) = setup_test_server(elasticsearch_port, "test_index").await;
+
+    let client = Client::new();
+
+    // Test forwarding of query that causes error
+    let error_body = json!({
+        "query": {
+            "error_query": {}
+        }
+    });
+
+    let response = client
+        .post(format!("http://127.0.0.1:{server_port}/_search"))
+        .json(&error_body)
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    let status = response.status();
+    let json_response: Value = response.json().await.expect("Failed to parse JSON");
+
+    // Elasticsearch errors can be returned as JSON with OK status or with error status
+    // Check for either case
+    assert!(
+        !status.is_success() || json_response.get("error").is_some(),
+        "Error query should return error status or error in JSON (got status: {}, body: {})",
+        status,
+        json_response
+    );
+
+    // Verify error structure if present in response
+    if let Some(error) = json_response.get("error") {
+        assert_eq!(error["type"], "parsing_exception");
+    }
+}
+
+/// Test 3: Unreachable Elasticsearch server
+#[tokio::test]
+async fn test_server_unreachable() {
+    // Don't start a mock server - use a port that's not listening
+    let elasticsearch_port = 19999;
+
+    // Start our HTTP server pointing to non-existent Elasticsearch
+    let (_server, server_port) = setup_test_server(elasticsearch_port, "test_index").await;
+
+    let client = Client::new();
+
+    let search_body = json!({
+        "query": {
+            "match_all": {}
+        }
+    });
+
+    // Try to query
+    let response = client
+        .post(format!("http://127.0.0.1:{server_port}/_search"))
+        .json(&search_body)
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    // Should return an error status (likely BAD_GATEWAY or similar)
+    assert!(
+        !response.status().is_success(),
+        "Unreachable server should return error status (got: {})",
+        response.status()
+    );
+}
+
+/// Test 4: Fallback is always used (no local execution)
+#[tokio::test]
+async fn test_fallback_always_used() {
+    // Start mock Elasticsearch server
+    let elasticsearch_port = 19202;
+    start_mock_elasticsearch_server(elasticsearch_port)
+        .await
+        .unwrap();
+
+    // Start our HTTP server
+    let (_server, server_port) = setup_test_server(elasticsearch_port, "test_index").await;
+
+    let client = Client::new();
+
+    // Send any query - it should always be forwarded to fallback
+    let search_body = json!({
+        "query": {
+            "match_all": {}
+        }
+    });
+
+    let response = client
+        .post(format!("http://127.0.0.1:{server_port}/_search"))
+        .json(&search_body)
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    assert_eq!(
+        response.status(),
+        reqwest::StatusCode::OK,
+        "Query should be forwarded successfully"
+    );
+
+    let json_response: Value = response.json().await.expect("Failed to parse JSON");
+
+    // Verify we got a proper Elasticsearch response (from fallback)
+    assert!(
+        json_response.get("hits").is_some(),
+        "Should receive Elasticsearch format from fallback"
+    );
+}
+
+/// Test 5: POST request support
+#[tokio::test]
+async fn test_post_request() {
+    // Start mock Elasticsearch server
+    let elasticsearch_port = 19203;
+    start_mock_elasticsearch_server(elasticsearch_port)
+        .await
+        .unwrap();
+
+    // Start our HTTP server
+    let (_server, server_port) = setup_test_server(elasticsearch_port, "test_index").await;
+
+    let client = Client::new();
+
+    let search_body = json!({
+        "query": {
+            "term": {"status": "active"}
+        }
+    });
+
+    let response = client
+        .post(format!("http://127.0.0.1:{server_port}/_search"))
+        .json(&search_body)
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    assert_eq!(response.status(), reqwest::StatusCode::OK);
+
+    let json_response: Value = response.json().await.expect("Failed to parse JSON");
+    assert!(json_response.get("hits").is_some());
+}
+
+/// Test 6: Forwarding disabled
+#[tokio::test]
+async fn test_forwarding_disabled() {
+    let config = HttpServerConfig {
+        port: 0,
+        handle_http_requests: true,
+        adapter_config: AdapterConfig::elastic_querydsl(
+            "http://127.0.0.1:19204".to_string(),
+            "test_index".to_string(),
+            false, // Forwarding disabled
+        ),
+    };
+
+    let inference_config =
+        InferenceConfig::new(QueryLanguage::elastic_querydsl, CleanupPolicy::NoCleanup);
+    let streaming_config = Arc::new(StreamingConfig::default());
+    let store = Arc::new(SimpleMapStore::new(
+        streaming_config.clone(),
+        CleanupPolicy::NoCleanup,
+    ));
+
+    let query_engine = Arc::new(SimpleEngine::new(
+        store.clone(),
+        // None,
+        inference_config,
+        streaming_config.clone(),
+        15000,
+        QueryLanguage::elastic_querydsl,
+    ));
+
+    let server = HttpServer::new(config, query_engine, store);
+    let server_port = server
+        .start_test_server()
+        .await
+        .expect("Failed to start test server");
+
+    let client = Client::new();
+
+    let search_body = json!({
+        "query": {
+            "complex_unsupported_query": {}
+        }
+    });
+
+    // Test that query returns error when forwarding is disabled
+    let response = client
+        .post(format!("http://127.0.0.1:{server_port}/_search"))
+        .json(&search_body)
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    let status = response.status();
+    let response_json: Value = response.json().await.expect("Failed to parse JSON");
+    println!("Response JSON: {response_json}");
+
+    // Should return an error response when forwarding is disabled
+    assert!(
+        !status.is_success() || response_json.get("error").is_some(),
+        "Should return error when forwarding is disabled"
+    );
+}
+
+// ===== SQL-specific tests =====
+
+/// Test 7: SQL query forwarding
+#[tokio::test]
+async fn test_elasticsearch_sql_forwarding() {
+    // Start mock Elasticsearch SQL server
+    let elasticsearch_port = 19205;
+    start_mock_elasticsearch_sql_server(elasticsearch_port)
+        .await
+        .unwrap();
+
+    // Start our HTTP server with SQL adapter
+    let (_server, server_port) = setup_test_server_sql(elasticsearch_port, "test_index").await;
+
+    let client = Client::new();
+
+    // Test SQL query
+    let sql_body = json!({
+        "query": "SELECT * FROM logs WHERE status = 200"
+    });
+
+    let response = client
+        .post(format!("http://127.0.0.1:{server_port}/_sql"))
+        .json(&sql_body)
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    assert_eq!(response.status(), reqwest::StatusCode::OK);
+
+    let json_response: Value = response.json().await.expect("Failed to read response");
+
+    // Verify SQL response structure
+    assert!(json_response.get("columns").is_some());
+    assert!(json_response.get("rows").is_some());
+    assert_eq!(json_response["rows"].as_array().unwrap().len(), 2);
+}
+
+/// Test 8: SQL error handling
+#[tokio::test]
+async fn test_sql_error_handling() {
+    let elasticsearch_port = 19206;
+    start_mock_elasticsearch_sql_server(elasticsearch_port)
+        .await
+        .unwrap();
+
+    let (_server, server_port) = setup_test_server_sql(elasticsearch_port, "test_index").await;
+
+    let client = Client::new();
+
+    let error_body = json!({
+        "query": "SELECT error_query FROM invalid"
+    });
+
+    let response = client
+        .post(format!("http://127.0.0.1:{server_port}/_sql"))
+        .json(&error_body)
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    let status = response.status();
+    let json_response: Value = response.json().await.expect("Failed to parse JSON");
+
+    assert!(
+        !status.is_success() || json_response.get("error").is_some(),
+        "SQL error query should return error"
+    );
+
+    if let Some(error) = json_response.get("error") {
+        assert_eq!(error["type"], "parsing_exception");
+    }
+}
+
+/// Test 9: SQL with parameters
+#[tokio::test]
+async fn test_sql_with_parameters() {
+    let elasticsearch_port = 19207;
+    start_mock_elasticsearch_sql_server(elasticsearch_port)
+        .await
+        .unwrap();
+
+    let (_server, server_port) = setup_test_server_sql(elasticsearch_port, "test_index").await;
+
+    let client = Client::new();
+
+    let sql_body = json!({
+        "query": "SELECT status, COUNT(*) FROM logs GROUP BY status",
+        "fetch_size": 100,
+        "time_zone": "UTC"
+    });
+
+    let response = client
+        .post(format!("http://127.0.0.1:{server_port}/_sql"))
+        .json(&sql_body)
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    assert_eq!(response.status(), reqwest::StatusCode::OK);
+
+    let json_response: Value = response.json().await.expect("Failed to parse JSON");
+    assert!(json_response.get("columns").is_some());
+}
+
+/// Test 10: SQL forwarding disabled
+#[tokio::test]
+async fn test_sql_forwarding_disabled() {
+    let config = HttpServerConfig {
+        port: 0,
+        handle_http_requests: true,
+        adapter_config: AdapterConfig::elastic_sql(
+            "http://127.0.0.1:19208".to_string(),
+            "test_index".to_string(),
+            false, // Forwarding disabled
+        ),
+    };
+
+    let inference_config =
+        InferenceConfig::new(QueryLanguage::elastic_sql, CleanupPolicy::NoCleanup);
+    let streaming_config = Arc::new(StreamingConfig::default());
+    let store = Arc::new(SimpleMapStore::new(
+        streaming_config.clone(),
+        CleanupPolicy::NoCleanup,
+    ));
+
+    let query_engine = Arc::new(SimpleEngine::new(
+        store.clone(),
+        // None,
+        inference_config,
+        streaming_config.clone(),
+        15000,
+        QueryLanguage::elastic_sql,
+    ));
+
+    let server = HttpServer::new(config, query_engine, store);
+    let server_port = server
+        .start_test_server()
+        .await
+        .expect("Failed to start test server");
+
+    let client = Client::new();
+
+    let sql_body = json!({
+        "query": "SELECT * FROM logs"
+    });
+
+    let response = client
+        .post(format!("http://127.0.0.1:{server_port}/_sql"))
+        .json(&sql_body)
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    let status = response.status();
+    let response_json: Value = response.json().await.expect("Failed to parse JSON");
+
+    assert!(
+        !status.is_success() || response_json.get("error").is_some(),
+        "Should return error when SQL forwarding is disabled"
+    );
+}
diff --git a/QueryEngineRust/src/tests/mod.rs b/QueryEngineRust/src/tests/mod.rs
new file mode 100644
index 0000000..3457e5b
--- /dev/null
+++ b/QueryEngineRust/src/tests/mod.rs
@@ -0,0 +1,9 @@
+pub mod clickhouse_forwarding_tests;
+pub mod datafusion;
+pub mod elastic_forwarding_tests;
+pub mod prometheus_forwarding_tests;
+pub mod query_equivalence_tests;
+pub mod trait_design_tests;
+
+#[cfg(test)]
+pub mod test_utilities;
diff --git a/QueryEngineRust/src/tests/prometheus_forwarding_tests.rs b/QueryEngineRust/src/tests/prometheus_forwarding_tests.rs
new file mode 100644
index 0000000..ab01b05
--- /dev/null
+++ b/QueryEngineRust/src/tests/prometheus_forwarding_tests.rs
@@ -0,0 +1,271 @@
+#[cfg(test)]
+use crate::data_model::{CleanupPolicy, InferenceConfig, QueryLanguage, StreamingConfig};
+use crate::drivers::query::adapters::AdapterConfig;
+use crate::drivers::query::servers::http::{HttpServer, HttpServerConfig};
+use crate::engines::SimpleEngine;
+use crate::stores::simple_map_store::SimpleMapStore;
+use reqwest::Client;
+use serde_json::Value;
+use std::sync::Arc;
+use tokio::net::TcpListener;
+use tokio::time::{sleep, Duration};
+
+/// Mock Prometheus server for testing
+async fn start_mock_prometheus_server(port: u16) -> Result<(), Box<dyn std::error::Error>> {
+    use axum::{extract::Query, response::Json, routing::get, Router};
+    use serde_json::json;
+    use std::collections::HashMap;
+
+    async fn mock_query_handler(Query(params): Query<HashMap<String, String>>) -> Json<Value> {
+        let query = params.get("query").unwrap_or(&"".to_string()).clone();
+
+        // Simulate different types of queries
+        if query.contains("unsupported_metric") {
+            Json(json!({
+                "status": "success",
+                "data": {
+                    "resultType": "vector",
+                    "result": [
+                        {
+                            "metric": {"__name__": "unsupported_metric"},
+                            "value": [1672531200, "42.0"]
+                        }
+                    ]
+                }
+            }))
+        } else if query.contains("error_query") {
+            Json(json!({
+                "status": "error",
+                "errorType": "bad_data",
+                "error": "invalid query syntax"
+            }))
+        } else {
+            Json(json!({
+                "status": "success",
+                "data": {
+                    "resultType": "vector",
+                    "result": []
+                }
+            }))
+        }
+    }
+
+    let app = Router::new().route("/api/v1/query", get(mock_query_handler));
+
+    let listener = TcpListener::bind(format!("127.0.0.1:{port}")).await?;
+
+    tokio::spawn(async move {
+        axum::serve(listener, app).await.unwrap();
+    });
+
+    // Give the server time to start
+    sleep(Duration::from_millis(100)).await;
+    Ok(())
+}
+
+async fn setup_test_server(prometheus_port: u16) -> (HttpServer, u16) {
+    let config = HttpServerConfig {
+        port: 0, // Use random port
+        handle_http_requests: true,
+        adapter_config: AdapterConfig::prometheus_promql(
+            format!("http://127.0.0.1:{prometheus_port}"),
+            true,
+        ),
+    };
+
+    let inference_config = InferenceConfig::new(QueryLanguage::promql, CleanupPolicy::NoCleanup);
+    let streaming_config = Arc::new(StreamingConfig::default());
+    let store = Arc::new(SimpleMapStore::new(
+        streaming_config.clone(),
+        CleanupPolicy::NoCleanup,
+    ));
+    let query_engine = Arc::new(SimpleEngine::new(
+        store.clone(),
+        // None,
+        inference_config,
+        streaming_config.clone(),
+        15000, // 15s scrape interval
+        crate::data_model::QueryLanguage::promql,
+    ));
+
+    let server = HttpServer::new(config, query_engine, store);
+    let actual_port = server
+        .start_test_server()
+        .await
+        .expect("Failed to start test server");
+
+    (server, actual_port)
+}
+
+#[tokio::test]
+async fn test_prometheus_forwarding_instant_query() {
+    // Start mock Prometheus server
+    let prometheus_port = 19090;
+    start_mock_prometheus_server(prometheus_port).await.unwrap();
+
+    // Start our HTTP server with forwarding enabled
+    let (_server, server_port) = setup_test_server(prometheus_port).await;
+
+    let client = Client::new();
+
+    // Test forwarding of unsupported query
+    let response = client
+        .get(format!("http://127.0.0.1:{server_port}/api/v1/query"))
+        .query(&[("query", "unsupported_metric")])
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    assert_eq!(response.status(), reqwest::StatusCode::OK);
+
+    let json_response: Value = response.json().await.expect("Failed to parse JSON");
+    assert_eq!(json_response["status"], "success");
+    assert_eq!(json_response["data"]["resultType"], "vector");
+
+    // Verify the forwarded response contains the expected data
+    let result = &json_response["data"]["result"][0];
+    assert_eq!(result["metric"]["__name__"], "unsupported_metric");
+    assert_eq!(result["value"][1], "42.0");
+}
+
+#[tokio::test]
+async fn test_prometheus_forwarding_error_handling() {
+    // Start mock Prometheus server
+    let prometheus_port = 19092;
+    start_mock_prometheus_server(prometheus_port).await.unwrap();
+
+    // Start our HTTP server with forwarding enabled
+    let (_server, server_port) = setup_test_server(prometheus_port).await;
+
+    let client = Client::new();
+
+    // Test forwarding of query that causes error in Prometheus
+    let response = client
+        .get(format!("http://127.0.0.1:{server_port}/api/v1/query"))
+        .query(&[("query", "error_query")])
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    assert_eq!(response.status(), reqwest::StatusCode::OK);
+
+    let json_response: Value = response.json().await.expect("Failed to parse JSON");
+    assert_eq!(json_response["status"], "error");
+    assert_eq!(json_response["errorType"], "bad_data");
+    assert_eq!(json_response["error"], "invalid query syntax");
+}
+
+#[tokio::test]
+async fn test_forwarding_disabled() {
+    let config = HttpServerConfig {
+        port: 0,
+        handle_http_requests: true,
+        adapter_config: AdapterConfig::prometheus_promql(
+            "http://127.0.0.1:19093".to_string(),
+            false, // Forwarding disabled
+        ),
+    };
+
+    let inference_config = InferenceConfig::new(QueryLanguage::promql, CleanupPolicy::NoCleanup);
+    let streaming_config = Arc::new(StreamingConfig::default());
+    let store = Arc::new(SimpleMapStore::new(
+        streaming_config.clone(),
+        CleanupPolicy::NoCleanup,
+    ));
+
+    let query_engine = Arc::new(SimpleEngine::new(
+        store.clone(),
+        // None,
+        inference_config,
+        streaming_config.clone(),
+        15000, // 15s scrape interval
+        crate::data_model::QueryLanguage::promql,
+    ));
+
+    let server = HttpServer::new(config, query_engine, store);
+    let server_port = server
+        .start_test_server()
+        .await
+        .expect("Failed to start test server");
+
+    let client = Client::new();
+
+    // Test that unsupported query returns error when forwarding is disabled
+    let response = client
+        .get(format!("http://127.0.0.1:{server_port}/api/v1/query"))
+        .query(&[(
+            "query",
+            "definitely_unsupported_complex_query{invalid=syntax}",
+        )])
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    assert_eq!(response.status(), reqwest::StatusCode::OK);
+
+    let response_json: serde_json::Value = response.json().await.expect("Failed to parse JSON");
+    println!("Response JSON: {response_json}");
+
+    // Should return an error response when forwarding is disabled
+    assert_eq!(response_json["status"], "error");
+}
+
+#[tokio::test]
+async fn test_prometheus_server_unreachable() {
+    // Use an unreachable port for Prometheus
+    let config = HttpServerConfig {
+        port: 0,
+        handle_http_requests: true,
+        adapter_config: AdapterConfig::prometheus_promql(
+            "http://127.0.0.1:99999".to_string(), // Unreachable port
+            true,
+        ),
+    };
+
+    let inference_config = InferenceConfig::new(QueryLanguage::promql, CleanupPolicy::NoCleanup);
+    let streaming_config = Arc::new(StreamingConfig::default());
+    let store = Arc::new(SimpleMapStore::new(
+        streaming_config.clone(),
+        CleanupPolicy::NoCleanup,
+    ));
+
+    let query_engine = Arc::new(SimpleEngine::new(
+        store.clone(),
+        // None,
+        inference_config,
+        streaming_config.clone(),
+        15000, // 15s scrape interval
+        crate::data_model::QueryLanguage::promql,
+    ));
+
+    let server = HttpServer::new(config, query_engine, store);
+    let server_port = server
+        .start_test_server()
+        .await
+        .expect("Failed to start test server");
+
+    let client = Client::new();
+
+    // Test that unreachable Prometheus server returns error when forwarding fails
+    let response = client
+        .get(format!("http://127.0.0.1:{server_port}/api/v1/query"))
+        .query(&[(
+            "query",
+            "definitely_unsupported_complex_query{invalid=syntax}",
+        )])
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    // Should return 500 or similar error status when Prometheus is unreachable
+    let status = response.status();
+    println!("Response status: {status:?}");
+    let response_json: serde_json::Value = response.json().await.expect("Failed to parse JSON");
+    println!("Response JSON: {response_json}");
+
+    // When Prometheus is unreachable, should return 500 status or error response
+    assert!(
+        status.is_server_error()
+            || (status == reqwest::StatusCode::OK && response_json["status"] == "error")
+    );
+}
diff --git a/QueryEngineRust/src/tests/query_equivalence_tests.rs b/QueryEngineRust/src/tests/query_equivalence_tests.rs
new file mode 100644
index 0000000..80aa93e
--- /dev/null
+++ b/QueryEngineRust/src/tests/query_equivalence_tests.rs
@@ -0,0 +1,236 @@
+//! Query Equivalence Tests
+//!
+//! Tests that semantically equivalent PromQL and SQL queries produce equivalent
+//! internal logic (QueryExecutionContext) in the SimpleEngine.
+//!
+//! These tests verify parser equivalence, pattern matching, metadata extraction,
+//! timestamp calculation, and aggregation selection - WITHOUT actually executing
+//! queries against a store.
+
+use crate::data_model::QueryLanguage;
+use crate::engines::simple_engine::SimpleEngine;
+use crate::stores::{Store, TimestampedBucketsMap};
+use crate::tests::test_utilities::{assert_execution_context_equivalent, TestConfigBuilder};
+use std::collections::HashMap;
+use std::sync::Arc;
+
+/// Minimal no-op store that panics if queried
+///
+/// This ensures that tests don't accidentally query the store.
+/// Context building should not require store access.
+struct NoOpStore;
+
+impl Store for NoOpStore {
+    fn query_precomputed_output(
+        &self,
+        _metric: &str,
+        _aggregation_id: u64,
+        _start_timestamp: u64,
+        _end_timestamp: u64,
+    ) -> Result<TimestampedBucketsMap, Box<dyn std::error::Error + Send + Sync>> {
+        panic!("NoOpStore: query_precomputed_output should not be called in equivalence tests");
+    }
+
+    fn query_precomputed_output_exact(
+        &self,
+        _metric: &str,
+        _aggregation_id: u64,
+        _exact_start: u64,
+        _exact_end: u64,
+    ) -> Result<TimestampedBucketsMap, Box<dyn std::error::Error + Send + Sync>> {
+        panic!(
+            "NoOpStore: query_precomputed_output_exact should not be called in equivalence tests"
+        );
+    }
+
+    fn insert_precomputed_output(
+        &self,
+        _output: crate::data_model::PrecomputedOutput,
+        _precompute: Box<dyn crate::data_model::AggregateCore>,
+    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+        panic!("NoOpStore: insert_precomputed_output should not be called in equivalence tests");
+    }
+
+    fn insert_precomputed_output_batch(
+        &self,
+        _outputs: Vec<(
+            crate::data_model::PrecomputedOutput,
+            Box<dyn crate::data_model::AggregateCore>,
+        )>,
+    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+        panic!(
+            "NoOpStore: insert_precomputed_output_batch should not be called in equivalence tests"
+        );
+    }
+
+    fn get_earliest_timestamp_per_aggregation_id(
+        &self,
+    ) -> Result<HashMap<u64, u64>, Box<dyn std::error::Error + Send + Sync>> {
+        Ok(HashMap::new())
+    }
+
+    fn close(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_temporal_sum_equivalence() {
+        let scrape_interval = 1;
+        let promql_query = "sum_over_time(cpu_usage[10s])";
+        let sql_query = "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4";
+        let grouping_labels = vec!["L1", "L2", "L3", "L4"];
+        let window_seconds = 10;
+
+        // Setup test configuration
+        let (promql_config, sql_config, streaming_config) = TestConfigBuilder::new("cpu_usage")
+            .with_grouping_labels(grouping_labels)
+            .with_scrape_interval(scrape_interval)
+            .add_temporal_query(promql_query, sql_query, 1, window_seconds, "tumbling")
+            .build_both();
+
+        // Create engines (they won't query the store)
+        let promql_engine = SimpleEngine::new(
+            Arc::new(NoOpStore),
+            // None,
+            promql_config,
+            streaming_config.clone(),
+            scrape_interval,
+            QueryLanguage::promql,
+        );
+
+        let sql_engine = SimpleEngine::new(
+            Arc::new(NoOpStore),
+            // None,
+            sql_config,
+            streaming_config,
+            scrape_interval,
+            QueryLanguage::sql,
+        );
+
+        // Extract internal contexts
+        let query_time_sec: f64 = 1_000.0; // Arbitrary timestamp in seconds
+
+        let promql_context = promql_engine
+            .build_query_execution_context_promql(promql_query.to_string(), query_time_sec)
+            .expect("Failed to build PromQL context");
+
+        let sql_context = sql_engine
+            .build_query_execution_context_sql(sql_query.to_string(), query_time_sec)
+            .expect("Failed to build SQL context");
+
+        // Assert equivalence
+        assert_execution_context_equivalent(&promql_context, &sql_context, "temporal_sum");
+    }
+
+    #[test]
+    fn test_spatial_sum_equivalence() {
+        let scrape_interval = 1;
+        let promql_query = "sum(cpu_usage) by (L1, L2)";
+        let sql_query = "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1, L2";
+        let grouping_labels = vec!["L1", "L2"];
+        let rollup_labels = vec!["L3", "L4"];
+
+        // Setup test configuration
+        let (promql_config, sql_config, streaming_config) = TestConfigBuilder::new("cpu_usage")
+            .with_grouping_labels(grouping_labels)
+            .with_rollup_labels(rollup_labels)
+            .with_scrape_interval(scrape_interval)
+            .add_spatial_query(promql_query, sql_query, 2)
+            .build_both();
+
+        // Create engines
+        let promql_engine = SimpleEngine::new(
+            Arc::new(NoOpStore),
+            // None,
+            promql_config,
+            streaming_config.clone(),
+            scrape_interval,
+            QueryLanguage::promql,
+        );
+
+        let sql_engine = SimpleEngine::new(
+            Arc::new(NoOpStore),
+            // None,
+            sql_config,
+            streaming_config,
+            scrape_interval,
+            QueryLanguage::sql,
+        );
+
+        // Extract contexts
+        let query_time_sec: f64 = 1_000.0; // Arbitrary timestamp in seconds
+
+        let promql_context = promql_engine
+            .build_query_execution_context_promql(promql_query.to_string(), query_time_sec)
+            .expect("Failed to build PromQL context");
+
+        let sql_context = sql_engine
+            .build_query_execution_context_sql(sql_query.to_string(), query_time_sec)
+            .expect("Failed to build SQL context");
+
+        // Assert equivalence
+        assert_execution_context_equivalent(&promql_context, &sql_context, "spatial_avg");
+    }
+
+    #[test]
+    fn test_spatial_of_temporal_sum_equivalence() {
+        let scrape_interval = 1;
+        let promql_query = "sum(sum_over_time(cpu_usage[10s])) by (L1)";
+        let sql_query = "SELECT SUM(result) FROM (SELECT SUM(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1";
+        // let all_labels = vec!["L1", "L2", "L3", "L4"];
+        let grouping_labels = vec!["L1"];
+        let rollup_labels = vec!["L2", "L3", "L4"];
+        let window_seconds = 10;
+
+        // Setup test configuration
+        // Using SUM of SUM which is collapsable (spatial="sum", temporal="sum_over_time")
+        let (promql_config, sql_config, streaming_config) = TestConfigBuilder::new("cpu_usage")
+            .with_grouping_labels(grouping_labels)
+            .with_rollup_labels(rollup_labels)
+            .with_scrape_interval(scrape_interval)
+            .add_spatial_of_temporal_query(promql_query, sql_query, 3, window_seconds)
+            .build_both();
+
+        // Create engines
+        let promql_engine = SimpleEngine::new(
+            Arc::new(NoOpStore),
+            // None,
+            promql_config,
+            streaming_config.clone(),
+            scrape_interval,
+            QueryLanguage::promql,
+        );
+
+        let sql_engine = SimpleEngine::new(
+            Arc::new(NoOpStore),
+            // None,
+            sql_config,
+            streaming_config,
+            scrape_interval,
+            QueryLanguage::sql,
+        );
+
+        // Extract contexts
+        let query_time_sec: f64 = 1_000.0; // Arbitrary timestamp in seconds
+
+        let promql_context = promql_engine
+            .build_query_execution_context_promql(promql_query.to_string(), query_time_sec)
+            .expect("Failed to build PromQL context");
+
+        let sql_context = sql_engine
+            .build_query_execution_context_sql(sql_query.to_string(), query_time_sec)
+            .expect("Failed to build SQL context");
+
+        // Assert equivalence
+        assert_execution_context_equivalent(
+            &promql_context,
+            &sql_context,
+            "spatial_of_temporal_sum",
+        );
+    }
+}
diff --git a/QueryEngineRust/src/tests/test_utilities/comparison.rs b/QueryEngineRust/src/tests/test_utilities/comparison.rs
new file mode 100644
index 0000000..ed3cd3e
--- /dev/null
+++ b/QueryEngineRust/src/tests/test_utilities/comparison.rs
@@ -0,0 +1,251 @@
+//! Comparison utilities for query equivalence tests
+//!
+//! Provides assertion helpers for deep equality checking of query execution contexts.
+
+use crate::engines::simple_engine::{
+    AggregationIdInfo, QueryExecutionContext, QueryMetadata, StoreQueryParams, StoreQueryPlan,
+};
+use promql_utilities::data_model::KeyByLabelNames;
+
+/// Assert that two QueryExecutionContext objects are equivalent
+///
+/// Compares all fields and provides detailed error messages on mismatch
+pub fn assert_execution_context_equivalent(
+    context1: &QueryExecutionContext,
+    context2: &QueryExecutionContext,
+    test_name: &str,
+) {
+    // Compare metric
+    assert_eq!(
+        context1.metric, context2.metric,
+        "{}: Metric mismatch",
+        test_name
+    );
+
+    // Compare do_merge
+    assert_eq!(
+        context1.do_merge, context2.do_merge,
+        "{}: do_merge mismatch",
+        test_name
+    );
+
+    // Compare metadata
+    assert_metadata_equivalent(&context1.metadata, &context2.metadata, test_name);
+
+    // Compare store plans
+    assert_store_plan_equivalent(&context1.store_plan, &context2.store_plan, test_name);
+
+    // Compare aggregation info
+    assert_agg_info_equivalent(&context1.agg_info, &context2.agg_info, test_name);
+
+    // Note: We don't compare spatial_filter as it may have different representations
+    // that are semantically equivalent (e.g., different string formats)
+}
+
+/// Assert that two QueryMetadata objects are equivalent
+pub fn assert_metadata_equivalent(meta1: &QueryMetadata, meta2: &QueryMetadata, test_name: &str) {
+    // Compare output labels (KeyByLabelNames maintains sorted order, so direct comparison works)
+    assert_label_names_equivalent(
+        &meta1.query_output_labels,
+        &meta2.query_output_labels,
+        test_name,
+    );
+
+    // Compare statistic
+    assert_eq!(
+        meta1.statistic_to_compute, meta2.statistic_to_compute,
+        "{}: Statistic mismatch - PromQL={:?}, SQL={:?}",
+        test_name, meta1.statistic_to_compute, meta2.statistic_to_compute
+    );
+
+    // Compare kwargs
+    assert_eq!(
+        meta1.query_kwargs, meta2.query_kwargs,
+        "{}: Query kwargs mismatch - PromQL={:?}, SQL={:?}",
+        test_name, meta1.query_kwargs, meta2.query_kwargs
+    );
+}
+
+/// Assert that two StoreQueryPlan objects are equivalent
+pub fn assert_store_plan_equivalent(
+    plan1: &StoreQueryPlan,
+    plan2: &StoreQueryPlan,
+    test_name: &str,
+) {
+    // Compare values query
+    assert_store_params_equivalent(&plan1.values_query, &plan2.values_query, test_name);
+
+    // Compare keys query (both Some or both None)
+    match (&plan1.keys_query, &plan2.keys_query) {
+        (Some(k1), Some(k2)) => assert_store_params_equivalent(k1, k2, test_name),
+        (None, None) => {}
+        (Some(_), None) => panic!(
+            "{}: Keys query presence mismatch - PromQL has keys query, SQL doesn't",
+            test_name
+        ),
+        (None, Some(_)) => panic!(
+            "{}: Keys query presence mismatch - SQL has keys query, PromQL doesn't",
+            test_name
+        ),
+    }
+}
+
+/// Assert that two StoreQueryParams objects are equivalent
+pub fn assert_store_params_equivalent(
+    params1: &StoreQueryParams,
+    params2: &StoreQueryParams,
+    test_name: &str,
+) {
+    assert_eq!(
+        params1.metric, params2.metric,
+        "{}: Metric mismatch - PromQL='{}', SQL='{}'",
+        test_name, params1.metric, params2.metric
+    );
+
+    assert_eq!(
+        params1.aggregation_id, params2.aggregation_id,
+        "{}: Aggregation ID mismatch - PromQL={}, SQL={}",
+        test_name, params1.aggregation_id, params2.aggregation_id
+    );
+
+    assert_eq!(
+        params1.start_timestamp, params2.start_timestamp,
+        "{}: Start timestamp mismatch - PromQL={}, SQL={}",
+        test_name, params1.start_timestamp, params2.start_timestamp
+    );
+
+    assert_eq!(
+        params1.end_timestamp, params2.end_timestamp,
+        "{}: End timestamp mismatch - PromQL={}, SQL={}",
+        test_name, params1.end_timestamp, params2.end_timestamp
+    );
+
+    assert_eq!(
+        params1.is_exact_query, params2.is_exact_query,
+        "{}: Query type mismatch - PromQL={}, SQL={}",
+        test_name, params1.is_exact_query, params2.is_exact_query
+    );
+}
+
+/// Assert that two KeyByLabelNames objects are equivalent
+pub fn assert_label_names_equivalent(
+    labels1: &KeyByLabelNames,
+    labels2: &KeyByLabelNames,
+    test_name: &str,
+) {
+    // KeyByLabelNames maintains sorted order, so direct comparison works
+    assert_eq!(
+        labels1, labels2,
+        "{}: Label names mismatch - PromQL={:?}, SQL={:?}",
+        test_name, labels1.labels, labels2.labels
+    );
+}
+
+/// Assert that two AggregationIdInfo objects are equivalent
+pub fn assert_agg_info_equivalent(
+    agg1: &AggregationIdInfo,
+    agg2: &AggregationIdInfo,
+    test_name: &str,
+) {
+    assert_eq!(
+        agg1.aggregation_id_for_key, agg2.aggregation_id_for_key,
+        "{}: Aggregation ID for key mismatch - PromQL={}, SQL={}",
+        test_name, agg1.aggregation_id_for_key, agg2.aggregation_id_for_key
+    );
+
+    assert_eq!(
+        agg1.aggregation_id_for_value, agg2.aggregation_id_for_value,
+        "{}: Aggregation ID for value mismatch - PromQL={}, SQL={}",
+        test_name, agg1.aggregation_id_for_value, agg2.aggregation_id_for_value
+    );
+
+    assert_eq!(
+        agg1.aggregation_type_for_key, agg2.aggregation_type_for_key,
+        "{}: Aggregation type for key mismatch - PromQL='{}', SQL='{}'",
+        test_name, agg1.aggregation_type_for_key, agg2.aggregation_type_for_key
+    );
+
+    assert_eq!(
+        agg1.aggregation_type_for_value, agg2.aggregation_type_for_value,
+        "{}: Aggregation type for value mismatch - PromQL='{}', SQL='{}'",
+        test_name, agg1.aggregation_type_for_value, agg2.aggregation_type_for_value
+    );
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use promql_utilities::query_logics::enums::Statistic;
+    use std::collections::HashMap;
+
+    fn create_test_context() -> QueryExecutionContext {
+        QueryExecutionContext {
+            metric: "test_metric".to_string(),
+            metadata: QueryMetadata {
+                query_output_labels: KeyByLabelNames::new(vec!["L1".to_string(), "L2".to_string()]),
+                statistic_to_compute: Statistic::Sum,
+                query_kwargs: HashMap::new(),
+            },
+            store_plan: StoreQueryPlan {
+                values_query: StoreQueryParams {
+                    metric: "test_metric".to_string(),
+                    aggregation_id: 1,
+                    start_timestamp: 1000,
+                    end_timestamp: 2000,
+                    is_exact_query: false,
+                },
+                keys_query: None,
+            },
+            agg_info: AggregationIdInfo {
+                aggregation_id_for_key: 1,
+                aggregation_id_for_value: 1,
+                aggregation_type_for_key: "SumAccumulator".to_string(),
+                aggregation_type_for_value: "SumAccumulator".to_string(),
+            },
+            do_merge: true, // OnlyTemporal queries merge
+            spatial_filter: String::new(),
+            query_time: 2_000_000, // query timestamp in milliseconds
+            grouping_labels: KeyByLabelNames::new(vec!["L1".to_string(), "L2".to_string()]),
+            aggregated_labels: KeyByLabelNames::empty(),
+        }
+    }
+
+    #[test]
+    fn test_identical_contexts_are_equivalent() {
+        let ctx1 = create_test_context();
+        let ctx2 = create_test_context();
+
+        // Should not panic
+        assert_execution_context_equivalent(&ctx1, &ctx2, "test_identical");
+    }
+
+    #[test]
+    #[should_panic(expected = "Metric mismatch")]
+    fn test_different_metrics_fail() {
+        let ctx1 = create_test_context();
+        let mut ctx2 = create_test_context();
+        ctx2.metric = "different_metric".to_string();
+
+        assert_execution_context_equivalent(&ctx1, &ctx2, "test_different_metrics");
+    }
+
+    #[test]
+    #[should_panic(expected = "Statistic mismatch")]
+    fn test_different_statistics_fail() {
+        let ctx1 = create_test_context();
+        let mut ctx2 = create_test_context();
+        ctx2.metadata.statistic_to_compute = Statistic::Max;
+
+        assert_execution_context_equivalent(&ctx1, &ctx2, "test_different_stats");
+    }
+
+    #[test]
+    #[should_panic(expected = "Start timestamp mismatch")]
+    fn test_different_timestamps_fail() {
+        let ctx1 = create_test_context();
+        let mut ctx2 = create_test_context();
+        ctx2.store_plan.values_query.start_timestamp = 5000;
+
+        assert_execution_context_equivalent(&ctx1, &ctx2, "test_different_timestamps");
+    }
+}
diff --git a/QueryEngineRust/src/tests/test_utilities/config_builders.rs b/QueryEngineRust/src/tests/test_utilities/config_builders.rs
new file mode 100644
index 0000000..5af70ce
--- /dev/null
+++ b/QueryEngineRust/src/tests/test_utilities/config_builders.rs
@@ -0,0 +1,347 @@
+//! Configuration builders for query equivalence tests
+//!
+//! Provides utilities to easily construct InferenceConfig, StreamingConfig,
+//! and SQLSchema objects for testing.
+
+use crate::data_model::{
+    AggregationConfig, AggregationReference, CleanupPolicy, InferenceConfig, PromQLSchema,
+    QueryConfig, SchemaConfig, StreamingConfig,
+};
+use promql_utilities::data_model::KeyByLabelNames;
+use sql_utilities::sqlhelper::{SQLSchema, Table};
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+
+/// Builder for creating test configurations
+pub struct TestConfigBuilder {
+    metric: String,
+    time_col: String,
+    value_col: String,
+    grouping_labels: Vec<String>,
+    rollup_labels: Vec<String>,
+    scrape_interval: u64,
+    query_configs: Vec<QueryConfig>,
+    streaming_configs: HashMap<u64, AggregationConfig>,
+}
+
+impl TestConfigBuilder {
+    /// Create a new builder for a given metric
+    pub fn new(metric: &str) -> Self {
+        Self {
+            metric: metric.to_string(),
+            time_col: "time".to_string(),
+            value_col: "value".to_string(),
+            grouping_labels: Vec::new(),
+            rollup_labels: Vec::new(),
+            scrape_interval: 1,
+            query_configs: Vec::new(),
+            streaming_configs: HashMap::new(),
+        }
+    }
+
+    pub fn with_grouping_labels(mut self, labels: Vec<&str>) -> Self {
+        self.grouping_labels = labels.iter().map(|s| s.to_string()).collect();
+        self
+    }
+
+    pub fn with_rollup_labels(mut self, labels: Vec<&str>) -> Self {
+        self.rollup_labels = labels.iter().map(|s| s.to_string()).collect();
+        self
+    }
+
+    // /// Set the labels for this metric
+    // pub fn with_labels(mut self, labels: Vec<&str>) -> Self {
+    //     self.labels = labels.iter().map(|s| s.to_string()).collect();
+    //     self
+    // }
+
+    /// Set custom column names (default: "time" and "value")
+    pub fn with_columns(mut self, time_col: &str, value_col: &str) -> Self {
+        self.time_col = time_col.to_string();
+        self.value_col = value_col.to_string();
+        self
+    }
+
+    /// Set the scrape interval in seconds (default: 1)
+    pub fn with_scrape_interval(mut self, interval: u64) -> Self {
+        self.scrape_interval = interval;
+        self
+    }
+
+    /// Add a temporal query configuration
+    pub fn add_temporal_query(
+        mut self,
+        promql: &str,
+        sql: &str,
+        agg_id: u64,
+        window_seconds: u64,
+        window_type: &str, // "sliding" or "tumbling"
+    ) -> Self {
+        // Add PromQL query config
+        let promql_config = QueryConfig::new(promql.to_string())
+            .add_aggregation(AggregationReference::new(agg_id, None));
+        self.query_configs.push(promql_config);
+
+        // Add SQL query config
+        let sql_config = QueryConfig::new(sql.to_string())
+            .add_aggregation(AggregationReference::new(agg_id, None));
+        self.query_configs.push(sql_config);
+
+        // Create streaming config for this aggregation
+        let agg_config = AggregationConfig {
+            aggregation_id: agg_id,
+            aggregation_type: "SumAccumulator".to_string(),
+            aggregation_sub_type: String::new(),
+            parameters: HashMap::new(),
+            grouping_labels: KeyByLabelNames::new(self.grouping_labels.clone()),
+            aggregated_labels: KeyByLabelNames::empty(),
+            rollup_labels: KeyByLabelNames::new(self.rollup_labels.clone()),
+            original_yaml: String::new(),
+            window_size: window_seconds,
+            slide_interval: window_seconds,
+            window_type: window_type.to_string(),
+            tumbling_window_size: window_seconds,
+            spatial_filter: String::new(),
+            spatial_filter_normalized: String::new(),
+            metric: self.metric.clone(),
+            num_aggregates_to_retain: None,
+            read_count_threshold: None,
+            table_name: None,
+            value_column: None,
+        };
+        self.streaming_configs.insert(agg_id, agg_config);
+
+        self
+    }
+
+    /// Add a spatial query configuration
+    pub fn add_spatial_query(mut self, promql: &str, sql: &str, agg_id: u64) -> Self {
+        // Add PromQL query config
+        let promql_config = QueryConfig::new(promql.to_string())
+            .add_aggregation(AggregationReference::new(agg_id, None));
+        self.query_configs.push(promql_config);
+
+        // Add SQL query config
+        let sql_config = QueryConfig::new(sql.to_string())
+            .add_aggregation(AggregationReference::new(agg_id, None));
+        self.query_configs.push(sql_config);
+
+        let agg_config = AggregationConfig {
+            aggregation_id: agg_id,
+            aggregation_type: "SumAccumulator".to_string(),
+            aggregation_sub_type: String::new(),
+            parameters: HashMap::new(),
+            grouping_labels: KeyByLabelNames::new(self.grouping_labels.clone()),
+            aggregated_labels: KeyByLabelNames::empty(),
+            rollup_labels: KeyByLabelNames::new(self.rollup_labels.clone()),
+            original_yaml: String::new(),
+            window_size: self.scrape_interval,
+            slide_interval: self.scrape_interval,
+            window_type: "tumbling".to_string(),
+            tumbling_window_size: self.scrape_interval,
+            spatial_filter: String::new(),
+            spatial_filter_normalized: String::new(),
+            metric: self.metric.clone(),
+            num_aggregates_to_retain: None,
+            read_count_threshold: None,
+            table_name: None,
+            value_column: None,
+        };
+        self.streaming_configs.insert(agg_id, agg_config);
+
+        self
+    }
+
+    /// Add a spatial-of-temporal query configuration
+    pub fn add_spatial_of_temporal_query(
+        mut self,
+        promql: &str,
+        sql: &str,
+        agg_id: u64,
+        window_seconds: u64,
+    ) -> Self {
+        // Add PromQL query config
+        let promql_config = QueryConfig::new(promql.to_string())
+            .add_aggregation(AggregationReference::new(agg_id, None));
+        self.query_configs.push(promql_config);
+
+        // Add SQL query config
+        let sql_config = QueryConfig::new(sql.to_string())
+            .add_aggregation(AggregationReference::new(agg_id, None));
+        self.query_configs.push(sql_config);
+
+        let agg_config = AggregationConfig {
+            aggregation_id: agg_id,
+            aggregation_type: "SumAccumulator".to_string(), // For collapsable sum queries
+            aggregation_sub_type: String::new(),
+            parameters: HashMap::new(),
+            grouping_labels: KeyByLabelNames::new(self.grouping_labels.clone()),
+            aggregated_labels: KeyByLabelNames::empty(),
+            rollup_labels: KeyByLabelNames::new(self.rollup_labels.clone()),
+            original_yaml: String::new(),
+            window_size: window_seconds,
+            slide_interval: window_seconds,
+            window_type: "tumbling".to_string(),
+            tumbling_window_size: window_seconds,
+            spatial_filter: String::new(),
+            spatial_filter_normalized: String::new(),
+            metric: self.metric.clone(),
+            num_aggregates_to_retain: None,
+            read_count_threshold: None,
+            table_name: None,
+            value_column: None,
+        };
+        self.streaming_configs.insert(agg_id, agg_config);
+
+        self
+    }
+
+    /// Build the InferenceConfig (with PromQL schema) and StreamingConfig
+    pub fn build(self) -> (InferenceConfig, Arc<StreamingConfig>) {
+        // Create PromQLSchema
+        let promql_schema = PromQLSchema::new().add_metric(
+            self.metric.clone(),
+            KeyByLabelNames::new([&self.grouping_labels[..], &self.rollup_labels[..]].concat()),
+        );
+
+        // Create InferenceConfig
+        let inference_config = InferenceConfig {
+            schema: SchemaConfig::PromQL(promql_schema),
+            query_configs: self.query_configs,
+            cleanup_policy: CleanupPolicy::NoCleanup,
+        };
+
+        // Create StreamingConfig
+        let streaming_config = StreamingConfig {
+            aggregation_configs: self.streaming_configs,
+        };
+
+        (inference_config, Arc::new(streaming_config))
+    }
+
+    /// Build separate InferenceConfigs for PromQL and SQL, plus StreamingConfig
+    ///
+    /// Returns (promql_config, sql_config, streaming_config)
+    pub fn build_both(self) -> (InferenceConfig, InferenceConfig, Arc<StreamingConfig>) {
+        let all_labels = [&self.grouping_labels[..], &self.rollup_labels[..]].concat();
+
+        // Create PromQL InferenceConfig
+        let promql_schema = PromQLSchema::new().add_metric(
+            self.metric.clone(),
+            KeyByLabelNames::new(all_labels.clone()),
+        );
+        let promql_inference_config = InferenceConfig {
+            schema: SchemaConfig::PromQL(promql_schema),
+            query_configs: self.query_configs.clone(),
+            cleanup_policy: CleanupPolicy::NoCleanup,
+        };
+
+        // Create SQL InferenceConfig
+        let metadata_columns: HashSet<String> = all_labels.into_iter().collect();
+        let value_columns: HashSet<String> = [self.value_col.clone()].into_iter().collect();
+        let table = Table::new(
+            self.metric.clone(),
+            self.time_col.clone(),
+            value_columns,
+            metadata_columns,
+        );
+        let sql_schema = SQLSchema::new(vec![table]);
+        let sql_inference_config = InferenceConfig {
+            schema: SchemaConfig::SQL(sql_schema),
+            query_configs: self.query_configs,
+            cleanup_policy: CleanupPolicy::NoCleanup,
+        };
+
+        // Create StreamingConfig
+        let streaming_config = StreamingConfig {
+            aggregation_configs: self.streaming_configs,
+        };
+
+        (
+            promql_inference_config,
+            sql_inference_config,
+            Arc::new(streaming_config),
+        )
+    }
+}
+
+/// Helper to create a SQLSchema for SQL parsing
+///
+/// Creates a schema with a single table matching the metric configuration
+pub fn create_test_schema(
+    metric: &str,
+    time_col: &str,
+    value_col: &str,
+    labels: Vec<&str>,
+) -> SQLSchema {
+    let metadata_columns: HashSet<String> = labels.iter().map(|s| s.to_string()).collect();
+    let value_columns: HashSet<String> = [value_col.to_string()].into_iter().collect();
+    let table = Table::new(
+        metric.to_string(),
+        time_col.to_string(),
+        value_columns,
+        metadata_columns,
+    );
+    SQLSchema::new(vec![table])
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_builder_creates_valid_configs() {
+        let (inference_config, streaming_config) = TestConfigBuilder::new("cpu_usage")
+            .with_grouping_labels(vec!["L1", "L2", "L3", "L4"])
+            .with_scrape_interval(1)
+            .add_temporal_query(
+                "sum_over_time(cpu_usage[10s])",
+                "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN NOW() AND DATEADD(s, -10, NOW()) GROUP BY L1, L2, L3, L4",
+                1,
+                10,
+                "tumbling",
+            )
+            .build();
+
+        // Verify schema has PromQL type
+        match &inference_config.schema {
+            SchemaConfig::PromQL(promql_schema) => {
+                assert!(promql_schema.get_labels("cpu_usage").is_some());
+            }
+            SchemaConfig::SQL(_) => panic!("Expected PromQL schema"),
+            SchemaConfig::ElasticQueryDSL => panic!("Expected PromQL schema"),
+            SchemaConfig::ElasticSQL => panic!("Expected PromQL schema"),
+        }
+
+        // Verify query configs (2 queries: PromQL + SQL)
+        assert_eq!(inference_config.query_configs.len(), 2);
+
+        // Verify streaming config
+        assert!(streaming_config.get_aggregation_config(1).is_some());
+        let agg_config = streaming_config.get_aggregation_config(1).unwrap();
+        assert_eq!(agg_config.window_size, 10);
+        assert_eq!(agg_config.window_type, "tumbling");
+    }
+
+    #[test]
+    fn test_schema_creation() {
+        let schema = create_test_schema("cpu_usage", "time", "value", vec!["L1", "L2", "L3"]);
+
+        // Verify schema has the table and correct columns
+        assert_eq!(
+            schema.get_time_column("cpu_usage"),
+            Some(&"time".to_string())
+        );
+
+        // Verify value columns
+        let value_cols = schema.get_value_columns("cpu_usage").unwrap();
+        assert!(value_cols.contains("value"));
+
+        // Verify metadata columns
+        let metadata_cols = schema.get_metadata_columns("cpu_usage").unwrap();
+        assert_eq!(metadata_cols.len(), 3);
+        assert!(metadata_cols.contains("L1"));
+        assert!(metadata_cols.contains("L2"));
+        assert!(metadata_cols.contains("L3"));
+    }
+}
diff --git a/QueryEngineRust/src/tests/test_utilities/engine_factories.rs b/QueryEngineRust/src/tests/test_utilities/engine_factories.rs
new file mode 100644
index 0000000..7e40da0
--- /dev/null
+++ b/QueryEngineRust/src/tests/test_utilities/engine_factories.rs
@@ -0,0 +1,491 @@
+//! Engine factory helpers for integration tests
+//!
+//! Provides reusable construction helpers for SimpleEngine + SimpleMapStore
+//! populated with various accumulator types. Unlike TestConfigBuilder which
+//! hardcodes "SumAccumulator", these helpers build AggregationConfig with
+//! the correct aggregation_type string.
+
+use crate::data_model::{
+    AggregationConfig, AggregationReference, CleanupPolicy, InferenceConfig, KeyByLabelValues,
+    PrecomputedOutput, PromQLSchema, QueryConfig, QueryLanguage, SchemaConfig, StreamingConfig,
+};
+use crate::engines::query_result::InstantVectorElement;
+use crate::engines::simple_engine::SimpleEngine;
+use crate::stores::simple_map_store::SimpleMapStore;
+use crate::stores::Store;
+use crate::AggregateCore;
+use promql_utilities::data_model::KeyByLabelNames;
+use std::collections::HashMap;
+use std::sync::Arc;
+
+/// Data to insert into a store: (label_values, accumulator)
+pub type AccumulatorData = Vec<(Option<Vec<String>>, Box<dyn AggregateCore>)>;
+
+/// Creates a SimpleEngine with a single aggregation populated with given data.
+///
+/// # Arguments
+/// * `metric` - Metric name
+/// * `aggregation_type` - Accumulator type string (e.g. "SumAccumulator", "DatasketchesKLLAccumulator")
+/// * `grouping_labels` - Label names for GROUP BY
+/// * `data` - Vec of (label_values, accumulator) pairs to insert
+/// * `promql_query` - The PromQL query string
+pub fn create_engine_single_pop(
+    metric: &str,
+    aggregation_type: &str,
+    grouping_labels: Vec<&str>,
+    data: AccumulatorData,
+    promql_query: &str,
+) -> SimpleEngine {
+    create_engine_single_pop_with_aggregated(
+        metric,
+        aggregation_type,
+        grouping_labels,
+        vec![],
+        data,
+        promql_query,
+    )
+}
+
+/// Creates a SimpleEngine with aggregated labels (sub-key labels within the accumulator).
+///
+/// Use for self-keyed multi-population accumulators (Multiple* types) where
+/// `aggregated_labels` are the labels that key the accumulator internally
+/// (e.g. "endpoint" within a MultipleIncrease accumulator).
+pub fn create_engine_single_pop_with_aggregated(
+    metric: &str,
+    aggregation_type: &str,
+    grouping_labels: Vec<&str>,
+    aggregated_labels: Vec<&str>,
+    data: AccumulatorData,
+    promql_query: &str,
+) -> SimpleEngine {
+    let grouping_label_strings: Vec<String> =
+        grouping_labels.iter().map(|s| s.to_string()).collect();
+    let aggregated_label_strings: Vec<String> =
+        aggregated_labels.iter().map(|s| s.to_string()).collect();
+    let all_schema_labels: Vec<String> = grouping_label_strings
+        .iter()
+        .chain(aggregated_label_strings.iter())
+        .cloned()
+        .collect();
+
+    let mut aggregation_configs = HashMap::new();
+    let agg_config = AggregationConfig {
+        aggregation_id: 1,
+        aggregation_type: aggregation_type.to_string(),
+        aggregation_sub_type: String::new(),
+        parameters: HashMap::new(),
+        grouping_labels: KeyByLabelNames::new(grouping_label_strings.clone()),
+        aggregated_labels: KeyByLabelNames::new(aggregated_label_strings),
+        rollup_labels: KeyByLabelNames::empty(),
+        original_yaml: String::new(),
+        window_size: 1,
+        slide_interval: 1,
+        window_type: "tumbling".to_string(),
+        tumbling_window_size: 1,
+        spatial_filter: String::new(),
+        spatial_filter_normalized: String::new(),
+        metric: metric.to_string(),
+        num_aggregates_to_retain: None,
+        read_count_threshold: None,
+        table_name: None,
+        value_column: None,
+    };
+    aggregation_configs.insert(1u64, agg_config);
+
+    let streaming_config = Arc::new(StreamingConfig {
+        aggregation_configs,
+    });
+
+    let store = Arc::new(SimpleMapStore::new(
+        streaming_config.clone(),
+        CleanupPolicy::NoCleanup,
+    ));
+
+    // Insert data
+    let timestamp = 1_000_000_u64;
+    for (label_values_opt, acc) in data {
+        let key = label_values_opt.map(|labels| KeyByLabelValues { labels });
+        let output = PrecomputedOutput::new(timestamp, timestamp, key, 1);
+        store.insert_precomputed_output(output, acc).unwrap();
+    }
+
+    // Create inference config — schema includes grouping + rollup labels
+    let promql_schema =
+        PromQLSchema::new().add_metric(metric.to_string(), KeyByLabelNames::new(all_schema_labels));
+
+    let query_config = QueryConfig::new(promql_query.to_string())
+        .add_aggregation(AggregationReference::new(1, None));
+
+    let inference_config = InferenceConfig {
+        schema: SchemaConfig::PromQL(promql_schema),
+        query_configs: vec![query_config],
+        cleanup_policy: CleanupPolicy::NoCleanup,
+    };
+
+    SimpleEngine::new(
+        store,
+        // None,
+        inference_config,
+        streaming_config,
+        1,
+        QueryLanguage::promql,
+    )
+}
+
+/// Creates a SimpleEngine with dual-input (separate value and keys aggregations).
+///
+/// # Arguments
+/// * `metric` - Metric name
+/// * `value_agg_type` - Accumulator type for values (e.g. "HydraKllSketchAccumulator")
+/// * `key_agg_type` - Accumulator type for keys (e.g. "DeltaSetAggregator")
+/// * `grouping_labels` - Store GROUP BY columns
+/// * `aggregated_labels` - Labels that key the accumulator internally (tracked by DeltaSet)
+/// * `value_data` - Data for value aggregation (agg_id=1)
+/// * `keys_data` - Data for keys aggregation (agg_id=2)
+/// * `promql_query` - The PromQL query string
+#[allow(clippy::too_many_arguments)]
+pub fn create_engine_dual_input(
+    metric: &str,
+    value_agg_type: &str,
+    key_agg_type: &str,
+    grouping_labels: Vec<&str>,
+    aggregated_labels: Vec<&str>,
+    value_data: AccumulatorData,
+    keys_data: AccumulatorData,
+    promql_query: &str,
+) -> SimpleEngine {
+    let grouping_label_strings: Vec<String> =
+        grouping_labels.iter().map(|s| s.to_string()).collect();
+    let aggregated_label_strings: Vec<String> =
+        aggregated_labels.iter().map(|s| s.to_string()).collect();
+    let all_labels: Vec<String> = grouping_label_strings
+        .iter()
+        .chain(aggregated_label_strings.iter())
+        .cloned()
+        .collect();
+
+    let mut aggregation_configs = HashMap::new();
+
+    // Value aggregation (id=1)
+    let value_agg_config = AggregationConfig {
+        aggregation_id: 1,
+        aggregation_type: value_agg_type.to_string(),
+        aggregation_sub_type: String::new(),
+        parameters: HashMap::new(),
+        grouping_labels: KeyByLabelNames::new(grouping_label_strings.clone()),
+        aggregated_labels: KeyByLabelNames::empty(),
+        rollup_labels: KeyByLabelNames::empty(),
+        original_yaml: String::new(),
+        window_size: 1,
+        slide_interval: 1,
+        window_type: "tumbling".to_string(),
+        tumbling_window_size: 1,
+        spatial_filter: String::new(),
+        spatial_filter_normalized: String::new(),
+        metric: metric.to_string(),
+        num_aggregates_to_retain: None,
+        read_count_threshold: None,
+        table_name: None,
+        value_column: None,
+    };
+    aggregation_configs.insert(1u64, value_agg_config);
+
+    // Keys aggregation (id=2)
+    let keys_agg_config = AggregationConfig {
+        aggregation_id: 2,
+        aggregation_type: key_agg_type.to_string(),
+        aggregation_sub_type: String::new(),
+        parameters: HashMap::new(),
+        grouping_labels: KeyByLabelNames::new(grouping_label_strings.clone()),
+        aggregated_labels: KeyByLabelNames::new(aggregated_label_strings),
+        rollup_labels: KeyByLabelNames::empty(),
+        original_yaml: String::new(),
+        window_size: 1,
+        slide_interval: 1,
+        window_type: "tumbling".to_string(),
+        tumbling_window_size: 1,
+        spatial_filter: String::new(),
+        spatial_filter_normalized: String::new(),
+        metric: metric.to_string(),
+        num_aggregates_to_retain: None,
+        read_count_threshold: None,
+        table_name: None,
+        value_column: None,
+    };
+    aggregation_configs.insert(2u64, keys_agg_config);
+
+    let streaming_config = Arc::new(StreamingConfig {
+        aggregation_configs,
+    });
+
+    let store = Arc::new(SimpleMapStore::new(
+        streaming_config.clone(),
+        CleanupPolicy::NoCleanup,
+    ));
+
+    // Insert value data
+    let timestamp = 1_000_000_u64;
+    for (label_values_opt, acc) in value_data {
+        let key = label_values_opt.map(|labels| KeyByLabelValues { labels });
+        let output = PrecomputedOutput::new(timestamp, timestamp, key, 1);
+        store.insert_precomputed_output(output, acc).unwrap();
+    }
+
+    // Insert keys data
+    for (label_values_opt, acc) in keys_data {
+        let key = label_values_opt.map(|labels| KeyByLabelValues { labels });
+        let output = PrecomputedOutput::new(timestamp, timestamp, key, 2);
+        store.insert_precomputed_output(output, acc).unwrap();
+    }
+
+    // Create inference config
+    let promql_schema =
+        PromQLSchema::new().add_metric(metric.to_string(), KeyByLabelNames::new(all_labels));
+
+    let query_config = QueryConfig::new(promql_query.to_string())
+        .add_aggregation(AggregationReference::new(1, None))
+        .add_aggregation(AggregationReference::new(2, None));
+
+    let inference_config = InferenceConfig {
+        schema: SchemaConfig::PromQL(promql_schema),
+        query_configs: vec![query_config],
+        cleanup_policy: CleanupPolicy::NoCleanup,
+    };
+
+    SimpleEngine::new(
+        store,
+        // None,
+        inference_config,
+        streaming_config,
+        1,
+        QueryLanguage::promql,
+    )
+}
+
+/// Creates a single-pop engine with data at multiple timestamps for testing merge.
+#[allow(clippy::type_complexity)]
+pub fn create_engine_multi_timestamp(
+    metric: &str,
+    aggregation_type: &str,
+    grouping_labels: Vec<&str>,
+    data: Vec<(u64, Option<Vec<String>>, Box<dyn AggregateCore>)>,
+    promql_query: &str,
+) -> SimpleEngine {
+    let grouping_label_strings: Vec<String> =
+        grouping_labels.iter().map(|s| s.to_string()).collect();
+
+    let mut aggregation_configs = HashMap::new();
+    let agg_config = AggregationConfig {
+        aggregation_id: 1,
+        aggregation_type: aggregation_type.to_string(),
+        aggregation_sub_type: String::new(),
+        parameters: HashMap::new(),
+        grouping_labels: KeyByLabelNames::new(grouping_label_strings.clone()),
+        aggregated_labels: KeyByLabelNames::empty(),
+        rollup_labels: KeyByLabelNames::empty(),
+        original_yaml: String::new(),
+        window_size: 1,
+        slide_interval: 1,
+        window_type: "tumbling".to_string(),
+        tumbling_window_size: 1,
+        spatial_filter: String::new(),
+        spatial_filter_normalized: String::new(),
+        metric: metric.to_string(),
+        num_aggregates_to_retain: None,
+        read_count_threshold: None,
+        table_name: None,
+        value_column: None,
+    };
+    aggregation_configs.insert(1u64, agg_config);
+
+    let streaming_config = Arc::new(StreamingConfig {
+        aggregation_configs,
+    });
+
+    let store = Arc::new(SimpleMapStore::new(
+        streaming_config.clone(),
+        CleanupPolicy::NoCleanup,
+    ));
+
+    for (timestamp, label_values_opt, acc) in data {
+        let key = label_values_opt.map(|labels| KeyByLabelValues { labels });
+        let output = PrecomputedOutput::new(timestamp - 1000, timestamp, key, 1);
+        store.insert_precomputed_output(output, acc).unwrap();
+    }
+
+    let promql_schema = PromQLSchema::new().add_metric(
+        metric.to_string(),
+        KeyByLabelNames::new(grouping_label_strings),
+    );
+
+    let query_config = QueryConfig::new(promql_query.to_string())
+        .add_aggregation(AggregationReference::new(1, None));
+
+    let inference_config = InferenceConfig {
+        schema: SchemaConfig::PromQL(promql_schema),
+        query_configs: vec![query_config],
+        cleanup_policy: CleanupPolicy::NoCleanup,
+    };
+
+    SimpleEngine::new(
+        store,
+        // None,
+        inference_config,
+        streaming_config,
+        1,
+        QueryLanguage::promql,
+    )
+}
+
+/// Creates a single-pop engine with data at multiple timestamps and configurable window.
+///
+/// Like `create_engine_multi_timestamp` but allows setting `window_size` and `window_type`
+/// on the AggregationConfig (needed for temporal queries like `sum_over_time(metric[5s])`).
+#[allow(clippy::too_many_arguments)]
+#[allow(clippy::type_complexity)]
+pub fn create_engine_multi_timestamp_with_window(
+    metric: &str,
+    aggregation_type: &str,
+    grouping_labels: Vec<&str>,
+    data: Vec<(u64, Option<Vec<String>>, Box<dyn AggregateCore>)>,
+    promql_query: &str,
+    window_size: u64,
+    window_type: &str,
+) -> SimpleEngine {
+    let grouping_label_strings: Vec<String> =
+        grouping_labels.iter().map(|s| s.to_string()).collect();
+
+    let mut aggregation_configs = HashMap::new();
+    let agg_config = AggregationConfig {
+        aggregation_id: 1,
+        aggregation_type: aggregation_type.to_string(),
+        aggregation_sub_type: String::new(),
+        parameters: HashMap::new(),
+        grouping_labels: KeyByLabelNames::new(grouping_label_strings.clone()),
+        aggregated_labels: KeyByLabelNames::empty(),
+        rollup_labels: KeyByLabelNames::empty(),
+        original_yaml: String::new(),
+        window_size,
+        slide_interval: 1,
+        window_type: window_type.to_string(),
+        tumbling_window_size: window_size,
+        spatial_filter: String::new(),
+        spatial_filter_normalized: String::new(),
+        metric: metric.to_string(),
+        num_aggregates_to_retain: None,
+        read_count_threshold: None,
+        table_name: None,
+        value_column: None,
+    };
+    aggregation_configs.insert(1u64, agg_config);
+
+    let streaming_config = Arc::new(StreamingConfig {
+        aggregation_configs,
+    });
+
+    let store = Arc::new(SimpleMapStore::new(
+        streaming_config.clone(),
+        CleanupPolicy::NoCleanup,
+    ));
+
+    for (timestamp, label_values_opt, acc) in data {
+        let key = label_values_opt.map(|labels| KeyByLabelValues { labels });
+        let output = PrecomputedOutput::new(timestamp - 1000, timestamp, key, 1);
+        store.insert_precomputed_output(output, acc).unwrap();
+    }
+
+    let promql_schema = PromQLSchema::new().add_metric(
+        metric.to_string(),
+        KeyByLabelNames::new(grouping_label_strings),
+    );
+
+    let query_config = QueryConfig::new(promql_query.to_string())
+        .add_aggregation(AggregationReference::new(1, None));
+
+    let inference_config = InferenceConfig {
+        schema: SchemaConfig::PromQL(promql_schema),
+        query_configs: vec![query_config],
+        cleanup_policy: CleanupPolicy::NoCleanup,
+    };
+
+    SimpleEngine::new(
+        store,
+        // None,
+        inference_config,
+        streaming_config,
+        1,
+        QueryLanguage::promql,
+    )
+}
+
+/// Execute both old pipeline and new plan-based path, compare results with epsilon tolerance.
+pub async fn assert_old_new_match(engine: &SimpleEngine, query: &str, query_time_sec: f64) {
+    let context = engine
+        .build_query_execution_context_promql(query.to_string(), query_time_sec)
+        .expect("Failed to build context");
+
+    let old_results = engine
+        .execute_query_pipeline(&context, false)
+        .expect("Old pipeline failed");
+
+    let new_results = engine
+        .execute_plan(&context)
+        .await
+        .expect("New plan path failed");
+
+    assert_eq!(
+        old_results.len(),
+        new_results.len(),
+        "Result count mismatch: old={}, new={}",
+        old_results.len(),
+        new_results.len()
+    );
+
+    let old_map: HashMap<Vec<String>, f64> = old_results
+        .iter()
+        .map(|r| (r.labels.labels.clone(), r.value))
+        .collect();
+
+    let new_map: HashMap<Vec<String>, f64> = new_results
+        .iter()
+        .map(|r| (r.labels.labels.clone(), r.value))
+        .collect();
+
+    for (key, old_value) in &old_map {
+        let new_value = new_map
+            .get(key)
+            .unwrap_or_else(|| panic!("Key {:?} missing from new results", key));
+        assert!(
+            (old_value - new_value).abs() < 1e-10,
+            "Value mismatch for key {:?}: old={}, new={}",
+            key,
+            old_value,
+            new_value
+        );
+    }
+
+    for key in new_map.keys() {
+        assert!(
+            old_map.contains_key(key),
+            "Extra key {:?} in new results",
+            key
+        );
+    }
+}
+
+/// Convenience wrapper to execute via the new plan path.
+pub async fn execute_new_plan(
+    engine: &SimpleEngine,
+    query: &str,
+    query_time_sec: f64,
+) -> Vec<InstantVectorElement> {
+    let context = engine
+        .build_query_execution_context_promql(query.to_string(), query_time_sec)
+        .expect("Failed to build context");
+
+    engine
+        .execute_plan(&context)
+        .await
+        .expect("execute_plan failed")
+}
diff --git a/QueryEngineRust/src/tests/test_utilities/mod.rs b/QueryEngineRust/src/tests/test_utilities/mod.rs
new file mode 100644
index 0000000..e495b22
--- /dev/null
+++ b/QueryEngineRust/src/tests/test_utilities/mod.rs
@@ -0,0 +1,13 @@
+//! Test utilities for query equivalence testing
+//!
+//! This module provides utilities for testing that semantically equivalent
+//! PromQL and SQL queries produce equivalent internal logic in the QueryEngine.
+
+pub mod comparison;
+pub mod config_builders;
+pub mod engine_factories;
+
+// Re-export commonly used items
+pub use comparison::*;
+pub use config_builders::*;
+pub use engine_factories::*;
diff --git a/QueryEngineRust/src/tests/trait_design_tests.rs b/QueryEngineRust/src/tests/trait_design_tests.rs
new file mode 100644
index 0000000..8d84cc4
--- /dev/null
+++ b/QueryEngineRust/src/tests/trait_design_tests.rs
@@ -0,0 +1,81 @@
+#[cfg(test)]
+use crate::data_model::{
+    KeyByLabelValues, MultipleSubpopulationAggregate, SingleSubpopulationAggregate,
+};
+use crate::precompute_operators::{MultipleSumAccumulator, SumAccumulator};
+use promql_utilities::Statistic;
+
+#[test]
+fn test_single_subpopulation_interface() {
+    // Single accumulator - matches Python behavior exactly
+    let acc: Box<dyn SingleSubpopulationAggregate> = Box::new(SumAccumulator::with_sum(42.0));
+
+    // ✅ Query without key - this is the correct interface for Single accumulators
+    let result = acc.query(Statistic::Sum, None).unwrap();
+    assert_eq!(result, 42.0);
+}
+
+#[test]
+fn test_multiple_subpopulation_interface() {
+    // Multiple accumulator - matches Python behavior exactly
+    let mut multi_acc = MultipleSumAccumulator::new();
+
+    let mut key = KeyByLabelValues::new();
+    key.insert("web".to_string());
+    multi_acc.add_sum(key.clone(), 100.0);
+
+    let acc: Box<dyn MultipleSubpopulationAggregate> = Box::new(multi_acc);
+
+    // ✅ Query with key - this is the correct interface for Multiple accumulators
+    let result = acc.query(Statistic::Sum, &key, None).unwrap();
+    assert_eq!(result, 100.0);
+
+    // ✅ Get all keys
+    let keys = acc.get_keys().unwrap();
+    assert_eq!(keys.len(), 1);
+    assert_eq!(keys[0], key);
+}
+
+#[test]
+fn test_interface_prevents_misuse() {
+    // This test documents what WON'T compile - which is exactly what we want!
+
+    let single_acc: Box<dyn SingleSubpopulationAggregate> =
+        Box::new(SumAccumulator::with_sum(42.0));
+    let multi_acc: Box<dyn MultipleSubpopulationAggregate> =
+        Box::new(MultipleSumAccumulator::new());
+
+    // ✅ These work - correct usage
+    let _result1 = single_acc.query(Statistic::Sum, None);
+    let key = KeyByLabelValues::new();
+    let _result2 = multi_acc.query(Statistic::Sum, &key, None);
+
+    // ❌ These would be compile-time errors (commented out):
+    // let _result3 = single_acc.query(Statistic::Sum, &key);  // Too many args for Single
+    // let _result4 = multi_acc.query(Statistic::Sum);         // Too few args for Multiple
+
+    // This is exactly the type safety we wanted to achieve!
+}
+
+#[test]
+fn test_python_alignment() {
+    // Demonstrate that the Rust interface now matches Python exactly
+
+    // Python: sum_accumulator.query(Statistic.SUM)
+    // Rust:   sum_accumulator.query(Statistic::Sum)
+    let sum_acc: Box<dyn SingleSubpopulationAggregate> = Box::new(SumAccumulator::with_sum(42.0));
+    assert_eq!(sum_acc.query(Statistic::Sum, None).unwrap(), 42.0);
+
+    // Python: multiple_accumulator.query(Statistic.SUM, key)
+    // Rust:   multiple_accumulator.query(Statistic::Sum, &key)
+    let mut multi_acc = MultipleSumAccumulator::new();
+    let key = KeyByLabelValues::new();
+    multi_acc.add_sum(key.clone(), 100.0);
+    let multi_trait: Box<dyn MultipleSubpopulationAggregate> = Box::new(multi_acc);
+    assert_eq!(
+        multi_trait.query(Statistic::Sum, &key, None).unwrap(),
+        100.0
+    );
+
+    // Perfect alignment with Python behavior!
+}
diff --git a/QueryEngineRust/src/utils/file_io.rs b/QueryEngineRust/src/utils/file_io.rs
new file mode 100644
index 0000000..6ec8fb9
--- /dev/null
+++ b/QueryEngineRust/src/utils/file_io.rs
@@ -0,0 +1,180 @@
+use crate::data_model::{InferenceConfig, QueryLanguage, StreamingConfig};
+// use crate::stores::promsketch_store::config::PromSketchConfig;
+use anyhow::{Context, Result};
+
+/// Read inference configuration from a YAML file
+pub fn read_inference_config(
+    yaml_file: &str,
+    query_language: QueryLanguage,
+) -> Result<InferenceConfig> {
+    let config = InferenceConfig::from_yaml_file(yaml_file, query_language)
+        .with_context(|| format!("Failed to parse YAML config from: {yaml_file}"))?;
+
+    Ok(config)
+}
+
+// /// Read sketch configuration from a YAML file
+// pub fn read_promsketch_config(yaml_file: &str) -> Result<PromSketchConfig> {
+//     PromSketchConfig::from_yaml_file(yaml_file)
+//         .with_context(|| format!("Failed to load sketch config from: {yaml_file}"))
+// }
+
+pub fn read_streaming_config(
+    yaml_file: &str,
+    inference_config: &InferenceConfig,
+) -> Result<StreamingConfig> {
+    let yaml_data = std::fs::read_to_string(yaml_file)
+        .with_context(|| format!("Failed to read YAML file: {yaml_file}"))?;
+    let yaml_data: serde_yaml::Value = serde_yaml::from_str(&yaml_data)
+        .with_context(|| format!("Failed to parse YAML file: {yaml_file}"))?;
+
+    let config = StreamingConfig::from_yaml_data(&yaml_data, Some(inference_config))
+        .with_context(|| format!("Failed to parse YAML config from: {yaml_file}"))?;
+
+    Ok(config)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::data_model::QueryLanguage;
+    use std::io::Write;
+    use tempfile::NamedTempFile;
+
+    #[test]
+    fn test_read_streaming_config() {
+        let streaming_yaml_content = r#"
+aggregations:
+- aggregationId: 1
+  aggregationSubType: ''
+  aggregationType: DatasketchesKLL
+  labels:
+    aggregated: []
+    grouping:
+    - instance
+    - job
+    - label_0
+    - label_1
+    - label_2
+    rollup: []
+
+  metric: fake_metric_total
+  parameters:
+    K: 200
+  spatialFilter: ''
+  tumblingWindowSize: 10
+metrics:
+  fake_metric_total:
+  - instance
+  - job
+  - label_0
+  - label_1
+  - label_2
+"#;
+
+        let inference_yaml_content = r#"
+cleanup_policy:
+  name: "circular_buffer"
+metrics:
+  fake_metric_total:
+  - instance
+  - job
+  - label_0
+  - label_1
+  - label_2
+queries:
+- aggregations:
+  - aggregation_id: 1
+    num_aggregates_to_retain: 6
+  query: quantile_over_time(0.5, fake_metric_total[1m])
+- aggregations:
+  - aggregation_id: 1
+    num_aggregates_to_retain: 6
+  query: quantile_over_time(0.95, fake_metric_total[1m])
+- aggregations:
+  - aggregation_id: 1
+    num_aggregates_to_retain: 6
+  query: quantile_over_time(0.99, fake_metric_total[1m])
+        "#;
+
+        let mut inference_temp_file = NamedTempFile::new().unwrap();
+        write!(inference_temp_file, "{inference_yaml_content}").unwrap();
+        let inference_config = read_inference_config(
+            inference_temp_file.path().to_str().unwrap(),
+            QueryLanguage::promql,
+        )
+        .unwrap();
+        assert!(!inference_config.query_configs.is_empty());
+
+        let mut streaming_temp_file = NamedTempFile::new().unwrap();
+        write!(streaming_temp_file, "{streaming_yaml_content}").unwrap();
+
+        let config = read_streaming_config(
+            streaming_temp_file.path().to_str().unwrap(),
+            &inference_config,
+        )
+        .unwrap();
+        assert!(!config.aggregation_configs.is_empty());
+    }
+
+    #[test]
+    //     fn test_read_promsketch_config() {
+    //         let yaml_content = r#"
+    // eh_univ:
+    //   k: 75
+    //   time_window: 500000
+    // eh_kll:
+    //   k: 60
+    //   kll_k: 128
+    //   time_window: 750000
+    // sampling:
+    //   sample_rate: 0.3
+    //   time_window: 900000
+    // "#;
+    //         let mut temp_file = NamedTempFile::new().unwrap();
+    //         write!(temp_file, "{yaml_content}").unwrap();
+
+    //         let config = read_promsketch_config(temp_file.path().to_str().unwrap()).unwrap();
+    //         assert_eq!(config.eh_univ.k, 75);
+    //         assert_eq!(config.eh_univ.time_window, 500_000);
+    //         assert_eq!(config.eh_kll.k, 60);
+    //         assert_eq!(config.eh_kll.kll_k, 128);
+    //         assert!((config.sampling.sample_rate - 0.3).abs() < f64::EPSILON);
+    //     }
+    // #[test]
+
+    fn test_read_inference_config() {
+        let yaml_content = r#"
+cleanup_policy:
+  name: "circular_buffer"
+metrics:
+  fake_metric_total:
+  - instance
+  - job
+  - label_0
+  - label_1
+  - label_2
+queries:
+- aggregations:
+  - aggregation_id: 1
+    num_aggregates_to_retain: 6
+  query: quantile_over_time(0.5, fake_metric_total[1m])
+- aggregations:
+  - aggregation_id: 1
+    num_aggregates_to_retain: 6
+  query: quantile_over_time(0.95, fake_metric_total[1m])
+- aggregations:
+  - aggregation_id: 1
+    num_aggregates_to_retain: 6
+  query: quantile_over_time(0.99, fake_metric_total[1m])
+        "#;
+
+        let mut temp_file = NamedTempFile::new().unwrap();
+        write!(temp_file, "{yaml_content}").unwrap();
+
+        let config =
+            read_inference_config(temp_file.path().to_str().unwrap(), QueryLanguage::promql)
+                .unwrap();
+        assert!(!config.query_configs.is_empty());
+    }
+}
diff --git a/QueryEngineRust/src/utils/http.rs b/QueryEngineRust/src/utils/http.rs
new file mode 100644
index 0000000..9b5c4fd
--- /dev/null
+++ b/QueryEngineRust/src/utils/http.rs
@@ -0,0 +1,373 @@
+use promql_utilities::KeyByLabelNames;
+use serde_json::{json, Value};
+use std::collections::HashMap;
+
+use crate::engines::QueryResult;
+
+// /// Prometheus-compatible response structure
+// #[derive(Debug, serde::Serialize, serde::Deserialize)]
+// pub struct PrometheusResponse {
+//     pub status: String,
+//     pub data: PrometheusData,
+// }
+
+// #[derive(Debug, serde::Serialize, serde::Deserialize)]
+// pub struct PrometheusData {
+//     #[serde(rename = "resultType")]
+//     pub result_type: String,
+//     pub result: Vec<PrometheusResult>,
+// }
+
+// #[derive(Debug, serde::Serialize, serde::Deserialize)]
+// pub struct PrometheusResult {
+//     pub metric: HashMap<String, String>,
+//     pub value: (f64, String), // [timestamp, value]
+// }
+
+// /// Format results as Prometheus-compatible HTTP response
+// pub fn format_results_as_http_response(
+//     results: &[PrecomputedOutput],
+//     timestamp: f64,
+// ) -> Result<PrometheusResponse> {
+//     let mut prometheus_results = Vec::new();
+
+//     for result in results {
+//         if let Some(ref key) = result.key {
+//             let prometheus_result = PrometheusResult {
+//                 metric: key.labels.clone(),
+//                 value: (timestamp, "0.0".to_string()), // TODO: Extract actual value from accumulator
+//             };
+//             prometheus_results.push(prometheus_result);
+//         }
+//     }
+
+//     let response = PrometheusResponse {
+//         status: "success".to_string(),
+//         data: PrometheusData {
+//             result_type: "vector".to_string(),
+//             result: prometheus_results,
+//         },
+//     };
+
+//     Ok(response)
+// }
+
+// /// Format error response in Prometheus format
+// pub fn format_error_response(error_msg: &str) -> PrometheusResponse {
+//     tracing::error!("Error: {}", error_msg);
+//     PrometheusResponse {
+//         status: "error".to_string(),
+//         data: PrometheusData {
+//             result_type: "vector".to_string(),
+//             result: vec![],
+//         },
+//     }
+// }
+
+// /// Parse query parameters from HTTP request
+// pub fn parse_query_params(query_string: &str) -> HashMap<String, Vec<String>> {
+//     let mut params = HashMap::new();
+
+//     for pair in query_string.split('&') {
+//         if let Some((key, value)) = pair.split_once('=') {
+//             let decoded_key = urlencoding::decode(key).unwrap_or_default().into_owned();
+//             let decoded_value = urlencoding::decode(value).unwrap_or_default().into_owned();
+
+//             params
+//                 .entry(decoded_key)
+//                 .or_insert_with(Vec::new)
+//                 .push(decoded_value);
+//         }
+//     }
+
+//     params
+// }
+
+// /// Format results as Prometheus-compatible HTTP response
+// pub fn format_results_as_http_response(
+//     result_type: QueryResultType,
+//     results: &HashMap<String, f64>, // Simplified - key as string, value as f64
+//     grouping_labels: &KeyByLabelNames,
+//     time: u64,
+// ) -> Value {
+//     match result_type {
+//         QueryResultType::InstantVector => {
+//             let mut result = Vec::new();
+//             for (k, v) in results.iter() {
+//                 // Parse the key string back to values - this is a simplification
+//                 // In the Python version, k is a Key object with values attribute
+//                 let key_values: Vec<&str> = k.split(',').collect();
+
+//                 let metric: HashMap<String, String> = grouping_labels
+//                     .keys
+//                     .iter()
+//                     .zip(key_values.iter())
+//                     .map(|(label, value)| (label.clone(), value.to_string()))
+//                     .collect();
+
+//                 result.push(json!({
+//                     "metric": metric,
+//                     "value": [time as f64 / 1000.0, v.to_string()]
+//                 }));
+//             }
+
+//             json!({
+//                 "status": "success",
+//                 "data": {
+//                     "resultType": "vector",
+//                     "result": result
+//                 }
+//             })
+//         }
+//     }
+// }
+
+/// Convert QueryResult to Prometheus-compatible format (for instant queries only)
+///
+/// Returns an error if passed a Matrix result - use `convert_range_result_to_prometheus` for that.
+pub fn convert_query_result_to_prometheus(
+    result: &QueryResult,
+    query_output_labels: &KeyByLabelNames,
+) -> Result<Value, &'static str> {
+    match result {
+        QueryResult::Vector(instant_vector) => {
+            let mut prometheus_results = Vec::new();
+            let timestamp = instant_vector.timestamp as f64 / 1000.0;
+
+            for element in &instant_vector.values {
+                // zip over query_output_labels.keys and element.labels.labels and collect into metric_map
+                let mut metric_map = HashMap::new();
+                for (key, label) in query_output_labels
+                    .labels
+                    .iter()
+                    .zip(element.labels.labels.iter())
+                {
+                    metric_map.insert(key, label);
+                }
+
+                let prometheus_result = json!({
+                    "metric": metric_map,
+                    "value": [timestamp, element.value.to_string()]
+                });
+                prometheus_results.push(prometheus_result);
+            }
+            Ok(json!({
+                "resultType": "vector",
+                "result": prometheus_results
+            }))
+        }
+        QueryResult::Matrix(_) => {
+            Err("convert_query_result_to_prometheus called with Matrix result; use convert_range_result_to_prometheus instead")
+        }
+    }
+}
+
+/// Convert range query result to Prometheus matrix format (for range queries only)
+///
+/// Returns an error if passed a Vector result - use `convert_query_result_to_prometheus` for that.
+pub fn convert_range_result_to_prometheus(
+    result: &QueryResult,
+    label_names: &KeyByLabelNames,
+) -> Result<Value, &'static str> {
+    match result {
+        QueryResult::Matrix(matrix) => {
+            let results: Vec<Value> = matrix
+                .values
+                .iter()
+                .map(|element| {
+                    // Build metric labels object
+                    let mut metric = serde_json::Map::new();
+                    for (i, label_name) in label_names.labels.iter().enumerate() {
+                        if i < element.labels.labels.len() {
+                            metric.insert(
+                                label_name.clone(),
+                                Value::String(element.labels.labels[i].clone()),
+                            );
+                        }
+                    }
+
+                    // Build values array: [[timestamp, "value"], ...]
+                    let values: Vec<Value> = element
+                        .samples
+                        .iter()
+                        .map(|sample| {
+                            json!([
+                                sample.timestamp as f64 / 1000.0, // Convert ms to seconds
+                                sample.value.to_string()
+                            ])
+                        })
+                        .collect();
+
+                    json!({
+                        "metric": metric,
+                        "values": values
+                    })
+                })
+                .collect();
+
+            Ok(json!({
+                "resultType": "matrix",
+                "result": results
+            }))
+        }
+        QueryResult::Vector(_) => {
+            Err("convert_range_result_to_prometheus called with Vector result; use convert_query_result_to_prometheus instead")
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::data_model::KeyByLabelValues;
+    use crate::engines::query_result::{InstantVectorElement, RangeVectorElement};
+
+    fn create_test_labels() -> KeyByLabelValues {
+        KeyByLabelValues::new_with_labels(vec!["host1".to_string(), "job1".to_string()])
+    }
+
+    fn create_test_label_names() -> KeyByLabelNames {
+        KeyByLabelNames::new(vec!["instance".to_string(), "job".to_string()])
+    }
+
+    // Tests for convert_query_result_to_prometheus
+
+    #[test]
+    fn test_convert_instant_vector_to_prometheus() {
+        let labels = create_test_labels();
+        let label_names = create_test_label_names();
+        let element = InstantVectorElement::new(labels, 42.0);
+        let result = QueryResult::vector(vec![element], 1000);
+
+        let prometheus_data = convert_query_result_to_prometheus(&result, &label_names);
+
+        assert!(prometheus_data.is_ok());
+        let data = prometheus_data.unwrap();
+
+        assert_eq!(data["resultType"], "vector");
+        assert_eq!(data["result"].as_array().unwrap().len(), 1);
+
+        let first_result = &data["result"][0];
+        assert_eq!(first_result["metric"]["instance"], "host1");
+        assert_eq!(first_result["metric"]["job"], "job1");
+        // Timestamp is converted from ms to seconds: 1000ms -> 1.0s
+        assert_eq!(first_result["value"][0], 1.0);
+        assert_eq!(first_result["value"][1], "42");
+    }
+
+    #[test]
+    fn test_convert_instant_vector_empty() {
+        let label_names = create_test_label_names();
+        let result = QueryResult::vector(vec![], 1000);
+
+        let prometheus_data = convert_query_result_to_prometheus(&result, &label_names);
+
+        assert!(prometheus_data.is_ok());
+        let data = prometheus_data.unwrap();
+        assert_eq!(data["resultType"], "vector");
+        assert!(data["result"].as_array().unwrap().is_empty());
+    }
+
+    #[test]
+    fn test_convert_instant_vector_rejects_matrix() {
+        let label_names = create_test_label_names();
+        let labels = create_test_labels();
+        let element = RangeVectorElement::new(labels);
+        let result = QueryResult::matrix(vec![element]);
+
+        let prometheus_data = convert_query_result_to_prometheus(&result, &label_names);
+
+        assert!(prometheus_data.is_err());
+        assert!(prometheus_data.err().unwrap().contains("Matrix"));
+    }
+
+    // Tests for convert_range_result_to_prometheus
+
+    #[test]
+    fn test_convert_range_vector_to_prometheus() {
+        let labels = create_test_labels();
+        let label_names = create_test_label_names();
+
+        let mut element = RangeVectorElement::new(labels);
+        element.add_sample(1000, 10.0);
+        element.add_sample(2000, 20.0);
+        element.add_sample(3000, 30.0);
+
+        let result = QueryResult::matrix(vec![element]);
+
+        let prometheus_data = convert_range_result_to_prometheus(&result, &label_names);
+
+        assert!(prometheus_data.is_ok());
+        let data = prometheus_data.unwrap();
+
+        assert_eq!(data["resultType"], "matrix");
+        assert_eq!(data["result"].as_array().unwrap().len(), 1);
+
+        let first_result = &data["result"][0];
+        assert_eq!(first_result["metric"]["instance"], "host1");
+        assert_eq!(first_result["metric"]["job"], "job1");
+
+        let values = first_result["values"].as_array().unwrap();
+        assert_eq!(values.len(), 3);
+
+        // Check timestamps converted from ms to seconds
+        assert_eq!(values[0][0], 1.0); // 1000ms -> 1.0s
+        assert_eq!(values[0][1], "10");
+        assert_eq!(values[1][0], 2.0); // 2000ms -> 2.0s
+        assert_eq!(values[1][1], "20");
+        assert_eq!(values[2][0], 3.0); // 3000ms -> 3.0s
+        assert_eq!(values[2][1], "30");
+    }
+
+    #[test]
+    fn test_convert_range_vector_empty_samples() {
+        let labels = create_test_labels();
+        let label_names = create_test_label_names();
+        let element = RangeVectorElement::new(labels);
+        let result = QueryResult::matrix(vec![element]);
+
+        let prometheus_data = convert_range_result_to_prometheus(&result, &label_names);
+
+        assert!(prometheus_data.is_ok());
+        let data = prometheus_data.unwrap();
+
+        assert_eq!(data["resultType"], "matrix");
+        let first_result = &data["result"][0];
+        assert!(first_result["values"].as_array().unwrap().is_empty());
+    }
+
+    #[test]
+    fn test_convert_range_vector_multiple_series() {
+        let labels1 = KeyByLabelValues::new_with_labels(vec!["host1".to_string()]);
+        let labels2 = KeyByLabelValues::new_with_labels(vec!["host2".to_string()]);
+        let label_names = KeyByLabelNames::new(vec!["instance".to_string()]);
+
+        let mut element1 = RangeVectorElement::new(labels1);
+        element1.add_sample(1000, 10.0);
+
+        let mut element2 = RangeVectorElement::new(labels2);
+        element2.add_sample(1000, 100.0);
+
+        let result = QueryResult::matrix(vec![element1, element2]);
+
+        let prometheus_data = convert_range_result_to_prometheus(&result, &label_names);
+
+        assert!(prometheus_data.is_ok());
+        let data = prometheus_data.unwrap();
+
+        assert_eq!(data["result"].as_array().unwrap().len(), 2);
+    }
+
+    #[test]
+    fn test_convert_range_vector_rejects_vector() {
+        let label_names = create_test_label_names();
+        let labels = create_test_labels();
+        let element = InstantVectorElement::new(labels, 42.0);
+        let result = QueryResult::vector(vec![element], 1000);
+
+        let prometheus_data = convert_range_result_to_prometheus(&result, &label_names);
+
+        assert!(prometheus_data.is_err());
+        assert!(prometheus_data.err().unwrap().contains("Vector"));
+    }
+}
diff --git a/QueryEngineRust/src/utils/mod.rs b/QueryEngineRust/src/utils/mod.rs
new file mode 100644
index 0000000..c437170
--- /dev/null
+++ b/QueryEngineRust/src/utils/mod.rs
@@ -0,0 +1,9 @@
+pub mod file_io;
+pub mod http;
+pub mod precompute_dumper;
+pub mod promql;
+
+pub use file_io::*;
+pub use http::*;
+pub use precompute_dumper::*;
+pub use promql::*;
diff --git a/QueryEngineRust/src/utils/precompute_dumper.rs b/QueryEngineRust/src/utils/precompute_dumper.rs
new file mode 100644
index 0000000..bb645f0
--- /dev/null
+++ b/QueryEngineRust/src/utils/precompute_dumper.rs
@@ -0,0 +1,176 @@
+use crate::data_model::{AggregateCore, PrecomputedOutput};
+use serde::Serialize;
+use std::fs::{create_dir_all, File};
+use std::io::{BufWriter, Write};
+use std::path::Path;
+use std::time::{SystemTime, UNIX_EPOCH};
+use tracing::{debug, error, info};
+
+#[derive(Serialize)]
+struct PrecomputeDump {
+    timestamp: u64,
+    metadata: PrecomputedOutput,
+    accumulator_type: String,
+    accumulator_data_bytes: Vec<u8>,
+}
+
+pub struct PrecomputeDumper {
+    file: BufWriter<File>,
+    dump_count: u64,
+    file_path: String,
+}
+
+impl PrecomputeDumper {
+    pub fn new(output_dir: &str) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+        // Create precompute_dumps subdirectory
+        let dump_dir = Path::new(output_dir).join("precompute_dumps");
+        create_dir_all(&dump_dir)?;
+
+        // Generate filename with timestamp
+        let timestamp = SystemTime::now()
+            .duration_since(UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_secs();
+        let filename = format!("precomputes_{timestamp}.msgpack");
+        let file_path = dump_dir.join(filename);
+
+        let file = File::create(&file_path)?;
+        let buffered_writer = BufWriter::new(file);
+
+        info!("Created precompute dump file: {:?}", file_path);
+
+        Ok(Self {
+            file: buffered_writer,
+            dump_count: 0,
+            file_path: file_path.to_string_lossy().to_string(),
+        })
+    }
+
+    pub fn dump_precompute(
+        &mut self,
+        output: &PrecomputedOutput,
+        accumulator: &dyn AggregateCore,
+    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+        let timestamp = SystemTime::now()
+            .duration_since(UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_secs();
+
+        // Create the dump record
+        let dump = PrecomputeDump {
+            timestamp,
+            metadata: output.clone(),
+            accumulator_type: accumulator.type_name().to_string(),
+            accumulator_data_bytes: accumulator.serialize_to_bytes(),
+        };
+
+        // Serialize to MessagePack
+        let serialized_data = rmp_serde::to_vec(&dump)
+            .map_err(|e| format!("Failed to serialize precompute dump: {e}"))?;
+
+        // Write length prefix (4 bytes, little-endian)
+        let length = serialized_data.len() as u32;
+        self.file.write_all(&length.to_le_bytes())?;
+
+        // Write the serialized data
+        self.file.write_all(&serialized_data)?;
+
+        self.dump_count += 1;
+
+        debug!(
+            "Dumped precompute #{}: type={}, aggregation_id={}, size={} bytes",
+            self.dump_count,
+            dump.accumulator_type,
+            output.aggregation_id,
+            serialized_data.len()
+        );
+
+        // Flush every 100 records to ensure data is written
+        if self.dump_count.is_multiple_of(100) {
+            self.file.flush()?;
+            debug!(
+                "Flushed precompute dump file after {} records",
+                self.dump_count
+            );
+        }
+
+        Ok(())
+    }
+
+    pub fn flush(&mut self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+        self.file.flush()?;
+        debug!(
+            "Flushed precompute dump file with {} total records",
+            self.dump_count
+        );
+        Ok(())
+    }
+
+    pub fn get_dump_count(&self) -> u64 {
+        self.dump_count
+    }
+
+    pub fn get_file_path(&self) -> &str {
+        &self.file_path
+    }
+}
+
+impl Drop for PrecomputeDumper {
+    fn drop(&mut self) {
+        if let Err(e) = self.flush() {
+            error!("Failed to flush precompute dump file on drop: {}", e);
+        } else {
+            info!(
+                "Closed precompute dump file {} with {} records",
+                self.file_path, self.dump_count
+            );
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::precompute_operators::SumAccumulator;
+    use tempfile::TempDir;
+
+    #[test]
+    fn test_precompute_dumper_creation() {
+        let temp_dir = TempDir::new().unwrap();
+        let output_dir = temp_dir.path().to_str().unwrap();
+
+        let dumper = PrecomputeDumper::new(output_dir);
+        assert!(dumper.is_ok());
+
+        let dumper = dumper.unwrap();
+        assert_eq!(dumper.get_dump_count(), 0);
+        assert!(dumper.get_file_path().contains("precomputes_"));
+        assert!(dumper.get_file_path().ends_with(".msgpack"));
+    }
+
+    #[test]
+    fn test_precompute_dumping() {
+        let temp_dir = TempDir::new().unwrap();
+        let output_dir = temp_dir.path().to_str().unwrap();
+
+        let mut dumper = PrecomputeDumper::new(output_dir).unwrap();
+
+        // Create test precompute data
+        let accumulator = SumAccumulator::with_sum(42.5);
+        let output = PrecomputedOutput {
+            start_timestamp: 1000,
+            end_timestamp: 2000,
+            key: None,
+            aggregation_id: 1,
+        };
+
+        // Dump the precompute
+        let result = dumper.dump_precompute(&output, &accumulator);
+        assert!(result.is_ok());
+        assert_eq!(dumper.get_dump_count(), 1);
+
+        // Test flushing
+        let flush_result = dumper.flush();
+        assert!(flush_result.is_ok());
+    }
+}
diff --git a/QueryEngineRust/src/utils/promql.rs b/QueryEngineRust/src/utils/promql.rs
new file mode 100644
index 0000000..7d12a8f
--- /dev/null
+++ b/QueryEngineRust/src/utils/promql.rs
@@ -0,0 +1,23 @@
+pub use sketch_db_common::utils::normalize_spatial_filter;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_normalize_spatial_filter() {
+        assert_eq!(normalize_spatial_filter("").as_str(), "");
+
+        let result = normalize_spatial_filter("instance=\"localhost:9090\"");
+        assert_eq!(result, "{instance=\"localhost:9090\"}");
+
+        let result = normalize_spatial_filter("{instance=\"localhost:9090\"}");
+        assert_eq!(result, "{instance=\"localhost:9090\"}");
+
+        let result = normalize_spatial_filter("{job=\"prometheus\",instance=\"localhost:9090\"}");
+        assert_eq!(result, "{instance=\"localhost:9090\",job=\"prometheus\"}");
+
+        let result = normalize_spatial_filter("job=\"prometheus\",instance=\"localhost:9090\"");
+        assert_eq!(result, "{instance=\"localhost:9090\",job=\"prometheus\"}");
+    }
+}
diff --git a/QueryEngineRust/tests/e2e_promsketch.rs b/QueryEngineRust/tests/e2e_promsketch.rs
new file mode 100644
index 0000000..7965fb7
--- /dev/null
+++ b/QueryEngineRust/tests/e2e_promsketch.rs
@@ -0,0 +1,649 @@
+// /// End-to-end integration test for the PromSketch pipeline.
+// ///
+// /// Exercises the full path: config loading → store creation → sample insertion
+// /// → sketch query (instant + range) → Prometheus metrics export.
+// /// Also tests the Prometheus remote write HTTP endpoint end-to-end.
+// use std::sync::Arc;
+
+// use query_engine_rust::data_model::{
+//     CleanupPolicy, InferenceConfig, QueryLanguage, StreamingConfig,
+// };
+// use query_engine_rust::drivers::ingest::prometheus_remote_write::{
+//     Label, PrometheusRemoteWriteConfig, PrometheusRemoteWriteServer, Sample, TimeSeries,
+//     WriteRequest,
+// };
+// use query_engine_rust::engines::{QueryResult, SimpleEngine};
+// use query_engine_rust::stores::promsketch_store::config::PromSketchConfig;
+// use query_engine_rust::stores::promsketch_store::metrics as ps_metrics;
+// use query_engine_rust::PromSketchStore;
+
+// /// Helper: build a SimpleEngine backed by a PromSketchStore (no precomputed data).
+// fn make_engine(store: Arc<PromSketchStore>) -> SimpleEngine {
+//     let inference_config = InferenceConfig::new(QueryLanguage::promql, CleanupPolicy::NoCleanup);
+//     let streaming_config = Arc::new(StreamingConfig::default());
+//     let map_store = Arc::new(query_engine_rust::SimpleMapStore::new(
+//         streaming_config.clone(),
+//         CleanupPolicy::NoCleanup,
+//     ));
+//     SimpleEngine::new(
+//         map_store,
+//         Some(store),
+//         inference_config,
+//         streaming_config,
+//         15_000,
+//         QueryLanguage::promql,
+//     )
+// }
+
+// // ─── 1. YAML config round-trip ────────────────────────────────────────────
+
+// #[test]
+// fn e2e_yaml_config_loading() {
+//     let yaml = r#"
+// eh_univ:
+//   k: 40
+//   time_window: 500000
+// eh_kll:
+//   k: 40
+//   kll_k: 200
+//   time_window: 500000
+// sampling:
+//   sample_rate: 0.3
+//   time_window: 500000
+// "#;
+//     let mut tmp = tempfile::NamedTempFile::new().unwrap();
+//     std::io::Write::write_all(&mut tmp, yaml.as_bytes()).unwrap();
+
+//     let config = PromSketchConfig::from_yaml_file(tmp.path().to_str().unwrap()).unwrap();
+//     assert_eq!(config.eh_univ.k, 40);
+//     assert_eq!(config.eh_kll.kll_k, 200);
+//     assert!((config.sampling.sample_rate - 0.3).abs() < f64::EPSILON);
+
+//     // Store can be created from the loaded config
+//     let store = PromSketchStore::new(config);
+//     assert_eq!(store.num_series(), 0);
+// }
+
+// // ─── 2. Ingestion + instant query ─────────────────────────────────────────
+
+// #[test]
+// fn e2e_ingest_and_instant_query() {
+//     let store = Arc::new(PromSketchStore::with_default_config());
+
+//     // Insert samples for multiple series:
+//     let labels = vec![
+//         r#"test_metric{instance="i0",job="j0"}"#,
+//         r#"test_metric{instance="i0",job="j1"}"#,
+//         r#"test_metric{instance="i1",job="j0"}"#,
+//         r#"test_metric{instance="i1",job="j1"}"#,
+//     ];
+
+//     // Auto-init all sketches for each series
+//     for l in &labels {
+//         store.ensure_all_sketches(l).unwrap();
+//     }
+//     assert_eq!(store.num_series(), 4);
+
+//     // Insert exactly 1000 samples per series: timestamps 1000..=1999, value = timestamp
+//     // Exact ground truth for values 1000..=1999:
+//     //   count  = 1000
+//     //   sum    = 1000 + 1001 + ... + 1999 = 1_499_500
+//     //   mean   = 1499.5
+//     //   median = 1499.5
+//     //   min    = 1000
+//     //   max    = 1999
+//     //   stddev = sqrt(variance) where variance = (999*1000)/12 ≈ 83250 → stddev ≈ 288.6
+//     //   entropy = log2(1000) ≈ 9.9658
+//     let n = 1000u64;
+//     let start = 1000u64;
+//     let end = start + n - 1; // 1999
+
+//     let exact_mean = (start + end) as f64 / 2.0; // 1499.5
+//     let exact_sum: f64 = (start..=end).map(|v| v as f64).sum(); // 1_499_500
+//     let exact_count = n as f64; // 1000
+//     let exact_min = start as f64; // 1000
+//     let exact_max = end as f64; // 1999
+//     let exact_entropy = (n as f64).log2(); // 9.9658
+
+//     println!(
+//         "=== Inserting {} samples per series (values {}..={}) ===",
+//         n, start, end
+//     );
+//     println!(
+//         "  Exact mean={}, sum={}, count={}, min={}, max={}",
+//         exact_mean, exact_sum, exact_count, exact_min, exact_max
+//     );
+//     println!("  Exact entropy(log2({}))={:.4}", n, exact_entropy);
+
+//     for l in &labels {
+//         for t in start..=end {
+//             store.sketch_insert(l, t, t as f64).unwrap();
+//         }
+//     }
+
+//     // Build engine — query at time=2.0s with range [1s] covers timestamps 1000..2000
+//     let engine = make_engine(store.clone());
+
+//     // ── quantile_over_time (EHKLL) — p50 ──
+//     println!("\n--- quantile_over_time(0.5) [EHKLL] ---");
+//     let result = engine.handle_query("quantile_over_time(0.5, test_metric[1s])".to_string(), 2.0);
+//     assert!(result.is_some(), "quantile_over_time should return data");
+//     let (_labels, qr) = result.unwrap();
+//     let elements = match &qr {
+//         QueryResult::Vector(iv) => &iv.values,
+//         QueryResult::Matrix(_) => panic!("expected vector, got matrix"),
+//     };
+//     assert_eq!(elements.len(), 4, "should have 4 series");
+//     for (i, e) in elements.iter().enumerate() {
+//         let err_pct = ((e.value - exact_mean) / exact_mean * 100.0).abs();
+//         println!(
+//             "  series[{}]: got={:.1}, expected={:.1}, error={:.2}%",
+//             i, e.value, exact_mean, err_pct
+//         );
+//         assert!(err_pct < 10.0, "p50 error too large: {:.2}%", err_pct);
+//     }
+
+//     // ── min_over_time (EHKLL) ──
+//     println!("\n--- min_over_time [EHKLL] ---");
+//     let result = engine.handle_query("min_over_time(test_metric[1s])".to_string(), 2.0);
+//     assert!(result.is_some(), "min_over_time should return data");
+//     let (_labels, qr) = result.unwrap();
+//     let elements = match &qr {
+//         QueryResult::Vector(iv) => &iv.values,
+//         QueryResult::Matrix(_) => panic!("expected vector, got matrix"),
+//     };
+//     for (i, e) in elements.iter().enumerate() {
+//         let err_pct = ((e.value - exact_min) / exact_min * 100.0).abs();
+//         println!(
+//             "  series[{}]: got={:.1}, expected={:.1}, error={:.2}%",
+//             i, e.value, exact_min, err_pct
+//         );
+//     }
+
+//     // ── max_over_time (EHKLL) ──
+//     println!("\n--- max_over_time [EHKLL] ---");
+//     let result = engine.handle_query("max_over_time(test_metric[1s])".to_string(), 2.0);
+//     assert!(result.is_some(), "max_over_time should return data");
+//     let (_labels, qr) = result.unwrap();
+//     let elements = match &qr {
+//         QueryResult::Vector(iv) => &iv.values,
+//         QueryResult::Matrix(_) => panic!("expected vector, got matrix"),
+//     };
+//     for (i, e) in elements.iter().enumerate() {
+//         let err_pct = ((e.value - exact_max) / exact_max * 100.0).abs();
+//         println!(
+//             "  series[{}]: got={:.1}, expected={:.1}, error={:.2}%",
+//             i, e.value, exact_max, err_pct
+//         );
+//     }
+
+//     // ── avg_over_time (USampling) ──
+//     println!("\n--- avg_over_time [USampling] ---");
+//     let result = engine.handle_query("avg_over_time(test_metric[1s])".to_string(), 2.0);
+//     assert!(result.is_some(), "avg_over_time should return data");
+//     let (_labels, qr) = result.unwrap();
+//     let elements = match &qr {
+//         QueryResult::Vector(iv) => &iv.values,
+//         QueryResult::Matrix(_) => panic!("expected vector, got matrix"),
+//     };
+//     for (i, e) in elements.iter().enumerate() {
+//         let err_pct = ((e.value - exact_mean) / exact_mean * 100.0).abs();
+//         println!(
+//             "  series[{}]: got={:.2}, expected={:.1}, error={:.2}%",
+//             i, e.value, exact_mean, err_pct
+//         );
+//     }
+
+//     // ── count_over_time (USampling) ──
+//     println!("\n--- count_over_time [USampling] ---");
+//     let result = engine.handle_query("count_over_time(test_metric[1s])".to_string(), 2.0);
+//     assert!(result.is_some(), "count_over_time should return data");
+//     let (_labels, qr) = result.unwrap();
+//     let elements = match &qr {
+//         QueryResult::Vector(iv) => &iv.values,
+//         QueryResult::Matrix(_) => panic!("expected vector, got matrix"),
+//     };
+//     for (i, e) in elements.iter().enumerate() {
+//         let err_pct = ((e.value - exact_count) / exact_count * 100.0).abs();
+//         println!(
+//             "  series[{}]: got={:.1}, expected={:.1}, error={:.2}%",
+//             i, e.value, exact_count, err_pct
+//         );
+//     }
+
+//     // ── sum_over_time (USampling) ──
+//     println!("\n--- sum_over_time [USampling] ---");
+//     let result = engine.handle_query("sum_over_time(test_metric[1s])".to_string(), 2.0);
+//     assert!(result.is_some(), "sum_over_time should return data");
+//     let (_labels, qr) = result.unwrap();
+//     let elements = match &qr {
+//         QueryResult::Vector(iv) => &iv.values,
+//         QueryResult::Matrix(_) => panic!("expected vector, got matrix"),
+//     };
+//     for (i, e) in elements.iter().enumerate() {
+//         let err_pct = ((e.value - exact_sum) / exact_sum * 100.0).abs();
+//         println!(
+//             "  series[{}]: got={:.1}, expected={:.1}, error={:.2}%",
+//             i, e.value, exact_sum, err_pct
+//         );
+//     }
+
+//     // ── entropy_over_time (EHUniv) ──
+//     println!("\n--- entropy_over_time [EHUniv] ---");
+//     let result = engine.handle_query("entropy_over_time(test_metric[1s])".to_string(), 2.0);
+//     assert!(result.is_some(), "entropy_over_time should return data");
+//     let (_labels, qr) = result.unwrap();
+//     let elements = match &qr {
+//         QueryResult::Vector(iv) => &iv.values,
+//         QueryResult::Matrix(_) => panic!("expected vector, got matrix"),
+//     };
+//     for (i, e) in elements.iter().enumerate() {
+//         let err_pct = ((e.value - exact_entropy) / exact_entropy * 100.0).abs();
+//         println!(
+//             "  series[{}]: got={:.4}, expected={:.4}, error={:.2}%",
+//             i, e.value, exact_entropy, err_pct
+//         );
+//         assert!(
+//             e.value >= 0.0,
+//             "entropy should be non-negative: {}",
+//             e.value
+//         );
+//     }
+
+//     println!("\n=== Instant query e2e PASSED (1000 values) ===");
+// }
+
+// // ─── 3. Range query ───────────────────────────────────────────────────────
+
+// #[test]
+// fn e2e_range_query() {
+//     let store = Arc::new(PromSketchStore::with_default_config());
+
+//     let label = r#"rq_metric{host="a"}"#;
+//     store.ensure_all_sketches(label).unwrap();
+
+//     // Insert 10000 samples at timestamps 1000..=11000
+//     for t in 1000u64..=11000 {
+//         store.sketch_insert(label, t, t as f64).unwrap();
+//     }
+
+//     let engine = make_engine(store);
+
+//     // Range query: quantile_over_time over [2s..11s] step 1s, with 1s window
+//     let result = engine.handle_range_query_promql(
+//         "quantile_over_time(0.5, rq_metric[1s])".to_string(),
+//         2.0,  // start = 2000ms
+//         11.0, // end = 11000ms
+//         1.0,  // step = 1000ms
+//     );
+//     assert!(result.is_some(), "range query should return data");
+//     let (_labels, qr) = result.unwrap();
+//     let matrix = match &qr {
+//         QueryResult::Matrix(rv) => &rv.values,
+//         QueryResult::Vector(_) => panic!("expected matrix, got vector"),
+//     };
+//     assert_eq!(matrix.len(), 1, "one series");
+//     assert!(
+//         matrix[0].samples.len() >= 5,
+//         "expected >=5 time steps, got {}",
+//         matrix[0].samples.len()
+//     );
+
+//     println!("=== Range query e2e PASSED ===");
+//     println!(
+//         "  range query returned {} time steps",
+//         matrix[0].samples.len()
+//     );
+//     for s in &matrix[0].samples {
+//         println!("    t={} v={:.1}", s.timestamp, s.value);
+//     }
+// }
+
+// // ─── 4. Prometheus metrics ────────────────────────────────────────────────
+
+// #[test]
+// fn e2e_prometheus_metrics() {
+//     // Force-initialize all lazy_static metrics by touching them
+//     let _ = ps_metrics::SERIES_TOTAL.get();
+//     let _ = ps_metrics::SAMPLES_INGESTED_TOTAL.get();
+//     let _ = ps_metrics::INGEST_ERRORS_TOTAL.get();
+//     ps_metrics::INGEST_BATCH_DURATION.observe(0.0);
+//     let _ = ps_metrics::SKETCH_QUERIES_TOTAL
+//         .with_label_values(&["hit"])
+//         .get();
+//     let _ = ps_metrics::SKETCH_QUERIES_TOTAL
+//         .with_label_values(&["miss"])
+//         .get();
+//     ps_metrics::SKETCH_QUERY_DURATION.observe(0.0);
+
+//     // Read series gauge before
+//     let series_before = ps_metrics::SERIES_TOTAL.get();
+
+//     let store = Arc::new(PromSketchStore::with_default_config());
+//     store.ensure_all_sketches("metrics_test{a=\"1\"}").unwrap();
+//     store.ensure_all_sketches("metrics_test{a=\"2\"}").unwrap();
+
+//     let series_after = ps_metrics::SERIES_TOTAL.get();
+//     assert!(
+//         series_after >= series_before + 2.0,
+//         "SERIES_TOTAL should have increased by at least 2 (before={}, after={})",
+//         series_before,
+//         series_after
+//     );
+
+//     // Verify Prometheus text encoding works
+//     let encoder = prometheus::TextEncoder::new();
+//     let families = prometheus::gather();
+//     let mut buf = Vec::new();
+//     prometheus::Encoder::encode(&encoder, &families, &mut buf).unwrap();
+//     let text = String::from_utf8(buf).unwrap();
+
+//     assert!(
+//         text.contains("promsketch_series_total"),
+//         "metrics output should contain promsketch_series_total"
+//     );
+//     assert!(
+//         text.contains("promsketch_samples_ingested_total"),
+//         "metrics output should contain promsketch_samples_ingested_total"
+//     );
+//     assert!(
+//         text.contains("promsketch_sketch_queries_total"),
+//         "metrics output should contain promsketch_sketch_queries_total"
+//     );
+//     assert!(
+//         text.contains("promsketch_sketch_query_duration_seconds"),
+//         "metrics output should contain promsketch_sketch_query_duration_seconds"
+//     );
+
+//     println!("=== Prometheus metrics e2e PASSED ===");
+//     println!(
+//         "  Metrics text output ({} bytes) contains all expected metric names",
+//         text.len()
+//     );
+//     // Print a snippet of the metrics output
+//     for line in text.lines().filter(|l| l.starts_with("promsketch_")) {
+//         println!("  {}", line);
+//     }
+// }
+
+// // ─── 5. Sketch query metrics instrumentation ─────────────────────────────
+
+// #[test]
+// fn e2e_query_metrics_instrumentation() {
+//     let store = Arc::new(PromSketchStore::with_default_config());
+//     store.ensure_all_sketches("qm_metric{x=\"1\"}").unwrap();
+//     for t in 1000u64..=2000 {
+//         store
+//             .sketch_insert("qm_metric{x=\"1\"}", t, t as f64)
+//             .unwrap();
+//     }
+
+//     let engine = make_engine(store);
+
+//     let hits_before = ps_metrics::SKETCH_QUERIES_TOTAL
+//         .with_label_values(&["hit"])
+//         .get();
+//     let miss_before = ps_metrics::SKETCH_QUERIES_TOTAL
+//         .with_label_values(&["miss"])
+//         .get();
+
+//     // This should be a hit
+//     let result = engine.handle_query("quantile_over_time(0.5, qm_metric[1s])".to_string(), 2.0);
+//     assert!(result.is_some());
+
+//     let hits_after = ps_metrics::SKETCH_QUERIES_TOTAL
+//         .with_label_values(&["hit"])
+//         .get();
+//     assert!(
+//         hits_after > hits_before,
+//         "hit counter should increment (before={}, after={})",
+//         hits_before,
+//         hits_after
+//     );
+
+//     // This should be a miss (no data for this metric)
+//     let result = engine.handle_query(
+//         "quantile_over_time(0.5, nonexistent_metric[1s])".to_string(),
+//         2.0,
+//     );
+//     assert!(result.is_none());
+
+//     let miss_after = ps_metrics::SKETCH_QUERIES_TOTAL
+//         .with_label_values(&["miss"])
+//         .get();
+//     assert!(
+//         miss_after > miss_before,
+//         "miss counter should increment (before={}, after={})",
+//         miss_before,
+//         miss_after
+//     );
+
+//     println!("=== Query metrics instrumentation e2e PASSED ===");
+//     println!("  hit counter: {} -> {}", hits_before, hits_after);
+//     println!("  miss counter: {} -> {}", miss_before, miss_after);
+// }
+
+// // ─── 6. Prometheus remote write endpoint ─────────────────────────────────
+
+// /// Helper: encode a WriteRequest into snappy-compressed protobuf bytes
+// /// (the standard Prometheus remote write wire format).
+// fn encode_remote_write_body(write_req: &WriteRequest) -> Vec<u8> {
+//     use prost::Message;
+//     let proto_bytes = write_req.encode_to_vec();
+//     snap::raw::Encoder::new()
+//         .compress_vec(&proto_bytes)
+//         .expect("snappy compression failed")
+// }
+
+// /// Helper: build a WriteRequest with N samples for a single series.
+// fn build_write_request(
+//     metric_name: &str,
+//     extra_labels: &[(&str, &str)],
+//     start_ts: i64,
+//     count: usize,
+// ) -> WriteRequest {
+//     let mut labels = vec![Label {
+//         name: "__name__".into(),
+//         value: metric_name.into(),
+//     }];
+//     for (k, v) in extra_labels {
+//         labels.push(Label {
+//             name: k.to_string(),
+//             value: v.to_string(),
+//         });
+//     }
+
+//     let samples: Vec<Sample> = (0..count as i64)
+//         .map(|i| Sample {
+//             value: (start_ts + i) as f64,
+//             timestamp: start_ts + i,
+//         })
+//         .collect();
+
+//     WriteRequest {
+//         timeseries: vec![TimeSeries { labels, samples }],
+//     }
+// }
+
+// #[tokio::test]
+// async fn e2e_prometheus_remote_write_endpoint() {
+//     // Pick a random-ish port to avoid conflicts with other tests
+//     let port: u16 = 19123;
+//     let store = Arc::new(PromSketchStore::with_default_config());
+
+//     // Start the remote write server in the background
+//     let config = PrometheusRemoteWriteConfig {
+//         port,
+//         auto_init_sketches: true,
+//     };
+//     let server = PrometheusRemoteWriteServer::new(config, Some(store.clone()));
+//     let server_handle = tokio::spawn(async move {
+//         let _ = server.run().await;
+//     });
+
+//     // Give the server a moment to bind
+//     tokio::time::sleep(std::time::Duration::from_millis(200)).await;
+
+//     let client = reqwest::Client::new();
+//     let url = format!("http://127.0.0.1:{}/api/v1/write", port);
+
+//     // ── Send 1000 samples via remote write ──
+//     // Values 1000..=1999, same ground truth as the instant query test
+//     let n = 1000usize;
+//     let start = 1000i64;
+//     let end = start + n as i64 - 1; // 1999
+
+//     let exact_mean = (start + end) as f64 / 2.0; // 1499.5
+//     let exact_min = start as f64;
+//     let exact_max = end as f64;
+
+//     println!(
+//         "=== Sending {} samples via Prometheus remote write to port {} ===",
+//         n, port
+//     );
+
+//     // Send in batches of 200 to test multiple HTTP requests
+//     let batch_size = 200;
+//     for batch_start in (0..n).step_by(batch_size) {
+//         let batch_count = batch_size.min(n - batch_start);
+//         let ts_start = start + batch_start as i64;
+//         let write_req = build_write_request(
+//             "prw_metric",
+//             &[("host", "node1"), ("env", "test")],
+//             ts_start,
+//             batch_count,
+//         );
+//         let body = encode_remote_write_body(&write_req);
+
+//         let resp = client
+//             .post(&url)
+//             .body(body)
+//             .send()
+//             .await
+//             .expect("failed to send remote write request");
+
+//         assert_eq!(
+//             resp.status().as_u16(),
+//             204,
+//             "remote write should return 204 No Content, got {}",
+//             resp.status()
+//         );
+//     }
+
+//     println!(
+//         "  Sent {} samples in {} batches of {}",
+//         n,
+//         n / batch_size,
+//         batch_size
+//     );
+
+//     // ── Verify data landed in the store ──
+//     assert!(
+//         store.num_series() >= 1,
+//         "store should have at least 1 series"
+//     );
+//     println!(
+//         "  Store has {} series after remote write ingestion",
+//         store.num_series()
+//     );
+
+//     // ── Query via engine and check accuracy ──
+//     let engine = make_engine(store.clone());
+
+//     // quantile_over_time(0.5) — p50
+//     println!("\n--- quantile_over_time(0.5) via remote write [EHKLL] ---");
+//     let result = engine.handle_query("quantile_over_time(0.5, prw_metric[1s])".to_string(), 2.0);
+//     assert!(result.is_some(), "quantile query should return data");
+//     let (_labels, qr) = result.unwrap();
+//     let elements = match &qr {
+//         QueryResult::Vector(iv) => &iv.values,
+//         QueryResult::Matrix(_) => panic!("expected vector, got matrix"),
+//     };
+//     assert_eq!(elements.len(), 1, "should have 1 series");
+//     let err_pct = ((elements[0].value - exact_mean) / exact_mean * 100.0).abs();
+//     println!(
+//         "  p50: got={:.1}, expected={:.1}, error={:.2}%",
+//         elements[0].value, exact_mean, err_pct
+//     );
+//     assert!(err_pct < 10.0, "p50 error too large: {:.2}%", err_pct);
+
+//     // min_over_time
+//     println!("\n--- min_over_time via remote write [EHKLL] ---");
+//     let result = engine.handle_query("min_over_time(prw_metric[1s])".to_string(), 2.0);
+//     assert!(result.is_some());
+//     let (_labels, qr) = result.unwrap();
+//     let elements = match &qr {
+//         QueryResult::Vector(iv) => &iv.values,
+//         QueryResult::Matrix(_) => panic!("expected vector"),
+//     };
+//     let err_pct = ((elements[0].value - exact_min) / exact_min * 100.0).abs();
+//     println!(
+//         "  min: got={:.1}, expected={:.1}, error={:.2}%",
+//         elements[0].value, exact_min, err_pct
+//     );
+
+//     // max_over_time
+//     println!("\n--- max_over_time via remote write [EHKLL] ---");
+//     let result = engine.handle_query("max_over_time(prw_metric[1s])".to_string(), 2.0);
+//     assert!(result.is_some());
+//     let (_labels, qr) = result.unwrap();
+//     let elements = match &qr {
+//         QueryResult::Vector(iv) => &iv.values,
+//         QueryResult::Matrix(_) => panic!("expected vector"),
+//     };
+//     let err_pct = ((elements[0].value - exact_max) / exact_max * 100.0).abs();
+//     println!(
+//         "  max: got={:.1}, expected={:.1}, error={:.2}%",
+//         elements[0].value, exact_max, err_pct
+//     );
+
+//     // avg_over_time
+//     println!("\n--- avg_over_time via remote write [USampling] ---");
+//     let result = engine.handle_query("avg_over_time(prw_metric[1s])".to_string(), 2.0);
+//     assert!(result.is_some());
+//     let (_labels, qr) = result.unwrap();
+//     let elements = match &qr {
+//         QueryResult::Vector(iv) => &iv.values,
+//         QueryResult::Matrix(_) => panic!("expected vector"),
+//     };
+//     let err_pct = ((elements[0].value - exact_mean) / exact_mean * 100.0).abs();
+//     println!(
+//         "  avg: got={:.2}, expected={:.1}, error={:.2}%",
+//         elements[0].value, exact_mean, err_pct
+//     );
+
+//     // entropy_over_time
+//     let exact_entropy = (n as f64).log2();
+//     println!("\n--- entropy_over_time via remote write [EHUniv] ---");
+//     let result = engine.handle_query("entropy_over_time(prw_metric[1s])".to_string(), 2.0);
+//     assert!(result.is_some());
+//     let (_labels, qr) = result.unwrap();
+//     let elements = match &qr {
+//         QueryResult::Vector(iv) => &iv.values,
+//         QueryResult::Matrix(_) => panic!("expected vector"),
+//     };
+//     let err_pct = ((elements[0].value - exact_entropy) / exact_entropy * 100.0).abs();
+//     println!(
+//         "  entropy: got={:.4}, expected={:.4}, error={:.2}%",
+//         elements[0].value, exact_entropy, err_pct
+//     );
+
+//     // ── Test error handling: send garbage body ──
+//     println!("\n--- Error handling: invalid body ---");
+//     let resp = client
+//         .post(&url)
+//         .body(b"not-valid-snappy-protobuf".to_vec())
+//         .send()
+//         .await
+//         .expect("request should succeed at HTTP level");
+//     assert_eq!(
+//         resp.status().as_u16(),
+//         400,
+//         "invalid body should return 400 Bad Request"
+//     );
+//     println!("  Invalid body correctly returned 400");
+
+//     // Cleanup
+//     server_handle.abort();
+//     println!("\n=== Prometheus remote write endpoint e2e PASSED ===");
+// }
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..c1a889b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,103 @@
+# ASAPQuery
+
+**ASAPQuery** is a drop-in query accelerator for queries in various languages like PromQL and SQL. ASAPQuery delivers:
+- **100x latency reduction** for various complex aggregate queries such as quantiles
+- **Configurable query accuracy**
+- **Ease-of-use with your existing tech stack**
+
+![ASAPQuery v0.1.0 intercepts queries between Grafana and Prometheus, and accelerates them. Currently, it ingests data using Prometheus' remote_write interface](assets/img/asapquery_intro_figure.jpg)
+
+ASAPQuery v0.1.0 sits between Prometheus and Grafana. It intercepts queries from Grafana and answers them using streaming sketches, instead of scanning large volumes of raw data in Prometheus.
+Currently, it ingests data using Prometheus' remote_write interface.
+Future versions of ASAPQuery will accelerate queries against other observability systems (e.g. VictoriaMetrics) and time-series databases (e.g. Clickhouse, Elastic).
+
+## Quick Start
+
+**Try ASAPQuery in 5 minutes** with our self-contained demo:
+
+```bash
+cd quickstart
+docker compose up -d
+```
+
+Open http://localhost:3000 and see ASAPQuery vs Prometheus side-by-side!
+
+Full quickstart instructions at [**Quickstart Guide**](quickstart/README.md)
+
+## Why ASAPQuery?
+
+### The Problem
+
+Prometheus (and most time-series analytics) struggle with:
+- **High-cardinality metrics** that slow down queries
+- **Complex aggregations** such as percentiles
+- **Long time windows** - `quantile_over_time(...[1h])` can take seconds or fail
+- **Memory pressure** - Loading all raw timeseries required for query computation
+
+### The Solution
+
+ASAPQuery uses **streaming sketches** to:
+1. **Pre-compute approximate summaries** as data arrives
+2. **Answer queries in milliseconds** using compact sketches instead of raw data
+3. **Bound memory usage** - sketches are fixed-size regardless of data volume
+4. **Maintain high accuracy** - configurable error bounds (typically <1% error)
+
+## Architecture
+
+ASAPQuery has four main components: the **Controller** generates sketch configurations from your query workload, **ArroyoSketch** deploys streaming pipelines in **Arroyo** that continuously build sketches from live Prometheus metrics, and **QueryEngineRust** intercepts PromQL queries and serves them from those pre-computed sketches.
+
+### Components
+
+- **[Controller](Controller/)** - Analyzes a PromQL query workload and auto-generates sketch configurations for ArroyoSketch and QueryEngineRust
+- **[ArroyoSketch](ArroyoSketch/)** - Deploys Arroyo streaming pipelines that continuously compute and publish sketches from live metrics
+- **[arroyo](arroyo/)** - Fork of the [Arroyo](https://github.com/ArroyoSystems/Arroyo) stream processing engine that runs the sketch-building SQL pipelines
+- **[QueryEngineRust](QueryEngineRust/)** - Intercepts incoming PromQL queries and serves them from pre-computed sketches, falling back to Prometheus for unsupported queries
+
+### Repository Structure
+
+```
+├── quickstart/              # Self-contained demo (start here!)
+├── Controller/              # Auto-configuration service
+├── ArroyoSketch/            # Arroyo pipeline deployer
+├── arroyo/                  # Fork of Arroyo stream processing engine
+└── QueryEngineRust/         # Query serving engine
+```
+
+## Coming soon
+
+1. Drop-in ASAPQuery artifact that works with your existing pre-configured Prometheus-Grafana stack
+2. Drop-in ASAPQuery artifact that accelerates Clickhouse queries
+
+## Current state
+
+ASAPQuery is currently alpha. There are missing features, known bugs, and possible performance issues. We will continue to work on these and create a more mature artifact.
+
+## Research
+
+ASAPQuery is part of [ProjectASAP](https://projectasap.github.io/), a joint effort by researchers at Carnegie Mellon University and University of Maryland.
+ASAPQuery is based on academic research on query processing and sketching algorithms.
+If you are a researcher interested in using or contributing to ASAPQuery, please [contact us](README.md#contact-us). We are happy to help you.
+
+## Development
+
+<instructions coming soon>
+
+## Contributing
+
+<instructions coming soon>
+
+## License
+
+ASAPQuery is licensed under the MIT License.
+
+## Acknowledgments
+
+We are extremely grateful to the following sources of funding support for ASAPQuery and the academic research that underpins it:
+- Laude Institute's Slingshot grant
+- Juniper Networks
+- U.S. NSF grants CNS-2431093, CNS-2415758, CNS-2132639, CNS-2111751, and CNS-2106214
+- U.S. Army Research Office and U.S. Army Research Laboratory Grant W911NF-25-2-0028
+
+## Contact us
+
+Open a Github issue or email us at [contact@projectasap.dev](mailto:contact@projectasap.dev)
diff --git a/Utilities/.gitignore b/Utilities/.gitignore
new file mode 100644
index 0000000..01133af
--- /dev/null
+++ b/Utilities/.gitignore
@@ -0,0 +1,20 @@
+**/__pycache__
+**/*.pyc
+**/*.swp
+.DS_Store
+
+execution/promql_utilities/promql_utilities.egg-info/
+
+# this prometheus config is generated by experiments/generate_prometheus_config.py
+experiments/prometheus_config/prometheus.yml
+
+experiments/experiment_configs
+experiments/experiment_outputs
+
+experiments/config/*
+!experiments/config/README.md
+!experiments/config/config.yaml
+
+experiments/post_experiment/**/*.png
+
+asap-cli/target
diff --git a/Utilities/LICENSE b/Utilities/LICENSE
new file mode 100644
index 0000000..404d657
--- /dev/null
+++ b/Utilities/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 SketchDB
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Utilities/README.md b/Utilities/README.md
new file mode 100644
index 0000000..27a1279
--- /dev/null
+++ b/Utilities/README.md
@@ -0,0 +1,208 @@
+# Experiment Framework
+
+A framework for running end-to-end distributed systems experiments on CloudLab, comparing SketchDB (sketch-based query engine) against traditional Prometheus.
+
+**Platform Support:** Linux only (tested on Ubuntu). Not compatible with macOS.
+
+## Quick Start
+
+### Prerequisites
+
+- **Local Machine:**
+  - Ubuntu
+  - Python 3.8+
+  - Local clone of `asap-internal` repo (at `$REPO_ROOT`)
+
+- **CloudLab:**
+  - Active experiment with N nodes
+  - Nodes named: `node0.<suffix>`, `node1.<suffix>`, ..., `node{N-1}.<suffix>`
+
+### Initial Setup (from scratch)
+
+This takes approximately 1 hour and sets up all infrastructure on CloudLab nodes.
+
+```bash
+cd $$REPO_ROOT/Utilities
+./deploy_from_scratch.sh <num_nodes> <cloudlab_username> <hostname_suffix>
+
+# Example:
+./deploy_from_scratch.sh 10 myuser sketchdb.cloudmigration-PG0.utah.cloudlab.us
+```
+
+**Note**: The output of this script is dumped to stdout/stderr and not logged to any file. Redirect output if you wish to.
+
+**What it does:**
+- Configures storage on all CloudLab nodes
+- Rsyncs all code from local machine to CloudLab
+- Installs external components (Prometheus, Kafka, Flink, Docker, etc.)
+- Builds internal components (Docker images, Rust binaries)
+
+### Running an Experiment
+
+**Important:** Must be run from the `Utilities/experiments/` directory.
+
+```bash
+cd $$REPO_ROOT/Utilities/experiments
+
+python experiment_run_e2e.py \
+  experiment_type=<experiment_type> \
+  experiment.name=<experiment_name> \
+  cloudlab.num_nodes=<num_worker_nodes> \
+  cloudlab.username=<your_username> \
+  cloudlab.hostname_suffix=<cloudlab_suffix>
+```
+
+#### Required Parameters
+
+| Parameter | Description | Example |
+|-----------|-------------|---------|
+| `experiment_type` | Which experiment configuration to use | `simple_config`, `cloud_demo` |
+| `experiment.name` | Unique name for this experiment run | `my_test_run` |
+| `cloudlab.num_nodes` | Number of worker nodes (not including coordinator) | `9` (for 10-node cluster) |
+| `cloudlab.username` | Your CloudLab username | `myuser` |
+| `cloudlab.hostname_suffix` | CloudLab experiment hostname suffix | `sketchdb.utah.cloudlab.us` |
+
+**Note**: For a cloudlab cluster of `N` nodes, specify `cloudlab.num_nodes=N-1`
+
+#### Example: Simple Test Run
+
+```bash
+cd experiments
+
+python experiment_run_e2e.py \
+  experiment_type=simple_config \
+  experiment.name=my_first_test \
+  cloudlab.num_nodes=9 \
+  cloudlab.username=myuser \
+  cloudlab.hostname_suffix=sketchdb.cloudmigration-PG0.utah.cloudlab.us
+```
+
+**Results:** Automatically rsynced to `$REPO_ROOT/experiment_outputs/my_first_test/`
+
+### Deploying Code Changes
+
+After making code changes locally, use `deploy_changes.sh` to sync and rebuild only what changed:
+
+```bash
+# Make your changes
+vim QueryEngineRust/src/main.rs
+
+# Deploy changes
+cd $REPO_ROOT/Utilities
+./deploy_changes.sh <num_nodes> <cloudlab_username> <hostname_suffix>
+
+# Example:
+./deploy_changes.sh 10 myuser sketchdb.cloudmigration-PG0.utah.cloudlab.us
+```
+
+## Common Usage Patterns
+
+### Comparing cost/latency for ASAP vs Prometheus
+
+In your experiment config yaml file (e.g. `$REPO_DIR/Utilities/experiments/config/experiment_type/simple_config.yaml`), set:
+
+```yaml
+experiments:
+- mode: sketchdb
+- mode: prometheus
+```
+
+With this config, 2 experiments are run independently. In the first experiment, `PrometheusClient` only sends queries to ASAP. After this experiment finishes, the infra is torn down. Then the second experiment is set up and `PrometheusClient` sends queries only to Prometheus directly. In the second experiment (i.e. when `mode=prometheus`), none of ASAP's components are set up (apart from `PrometheusClient`).
+
+Post-experiment analysis:
+- Use `compare_costs.py` and `compare_latencies.py` from `$REPO_DIR/Utilities/experiments/post_experiments/`.
+- `run_compare_latencies.sh` is an easy wrapper around `compare_latencies.py`
+
+### Comparing query accuracy for ASAP vs Prometheus
+
+```yaml
+experiments:
+- mode: sketchdb
+  query_prometheus_too: true
+# DO NOT NEED mode: prometheus
+```
+
+With this config, only one experiment is run. In the same experiment, `PrometheusClient` sends a query to ASAP and then immediately after that, sends a query to Prometheus too.
+
+Post-experiment analysis:
+- Use `calculate_fidelity.py` from `$REPO_DIR/Utilities/experiments/post_experiments/`.
+- `run_calculate_fidelity.sh` is an easy wrapper around `calculate_fidelity.py`
+
+### Debugging with Verbose Logging
+
+```bash
+python experiment_run_e2e.py \
+  experiment_type=simple_config \
+  experiment.name=debug_test \
+  cloudlab.num_nodes=9 \
+  cloudlab.username=myuser \
+  cloudlab.hostname_suffix=sketchdb.cloudmigration-PG0.utah.cloudlab.us \
+  logging.level=DEBUG \
+  flow.no_teardown=true
+```
+
+- `logging.level=DEBUG`: Enables verbose output from all services
+- `flow.no_teardown=true`: Keeps services running for inspection after experiment
+
+### Running a service as a baremetal process
+
+```bash
+# Disable Docker for that service
+python experiment_run_e2e.py ... \
+  use_container.query_engine=false \
+  use_container.arroyo=false
+```
+
+### Running Parallel Experiments on a single Cloudlab cluster
+
+Use `node_offset` to run multiple experiments on same cluster:
+
+```bash
+# Experiment 1: Uses nodes 0-9
+./deploy_from_scratch.sh 10 myuser sketchdb.utah.cloudlab.us
+
+# Experiment 2: Uses nodes 10-19
+cd experiments
+python experiment_run_e2e.py ... cloudlab.node_offset=10 cloudlab.num_nodes=10
+```
+
+## Documentation
+
+- **[Deployment Guide](docs/deployment.md)** - Detailed deployment scripts documentation
+- **[Configuration Reference](docs/configuration.md)** - Complete configuration parameter reference
+- **[Experiment Types](docs/experiment_types.md)** - Detailed documentation of all experiment configurations
+- **[Usage Guide](docs/usage.md)** - Usage patterns and workflows
+- **[Architecture](docs/architecture.md)** - System architecture and extension points
+- **[Troubleshooting](docs/troubleshooting.md)** - Common issues and debugging tips
+
+## Quick Troubleshooting
+
+**Checking service health**
+```bash
+# Check service logs
+ssh user@node0.suffix
+docker ps  # See running containers
+docker logs <container_name> -f  # Check logs
+```
+
+See [docs/troubleshooting.md](docs/troubleshooting.md) for more detailed debugging help.
+
+
+
+## Project Structure
+
+```
+Utilities/
+├── deploy_from_scratch.sh       # Initial CloudLab setup
+├── deploy_changes.sh            # Incremental code deployment
+├── components.conf              # Components to sync
+├── cloudlab_setup/              # CloudLab infrastructure setup scripts
+├── installation/                # Component installation scripts
+├── experiments/
+│   ├── experiment_run_e2e.py    # Main experiment orchestrator
+│   ├── config/                  # Hydra configuration files
+│   │   ├── config.yaml          # Base configuration
+│   │   └── experiment_type/     # Experiment-specific configs
+│   └── experiment_utils/        # Utilities and services
+└── docs/                        # Documentation
+```
diff --git a/Utilities/asap-cli/Cargo.lock b/Utilities/asap-cli/Cargo.lock
new file mode 100644
index 0000000..65562c3
--- /dev/null
+++ b/Utilities/asap-cli/Cargo.lock
@@ -0,0 +1,796 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "addr2line"
+version = "0.25.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b"
+dependencies = [
+ "gimli",
+]
+
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
+[[package]]
+name = "anstream"
+version = "0.6.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2"
+dependencies = [
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "asap-cli"
+version = "0.1.0"
+dependencies = [
+ "clap",
+ "lazy_static",
+ "openssh",
+ "serde",
+ "serde_yaml",
+ "tokio",
+]
+
+[[package]]
+name = "backtrace"
+version = "0.3.76"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6"
+dependencies = [
+ "addr2line",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+ "windows-link",
+]
+
+[[package]]
+name = "bitflags"
+version = "2.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
+
+[[package]]
+name = "bytes"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
+
+[[package]]
+name = "clap"
+version = "4.5.48"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2134bb3ea021b78629caa971416385309e0131b351b25e01dc16fb54e1b5fae"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.48"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2ba64afa3c0a6df7fa517765e31314e983f51dda798ffba27b988194fb65dc9"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.47"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "errno"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
+dependencies = [
+ "libc",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "fastrand"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+
+[[package]]
+name = "getrandom"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasi 0.14.7+wasi-0.2.4",
+]
+
+[[package]]
+name = "gimli"
+version = "0.32.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7"
+
+[[package]]
+name = "hashbrown"
+version = "0.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "indexmap"
+version = "2.11.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "io-uring"
+version = "0.7.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b"
+dependencies = [
+ "bitflags",
+ "cfg-if",
+ "libc",
+]
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
+
+[[package]]
+name = "itoa"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.177"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
+[[package]]
+name = "memchr"
+version = "2.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+]
+
+[[package]]
+name = "mio"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
+dependencies = [
+ "libc",
+ "wasi 0.11.1+wasi-snapshot-preview1",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "object"
+version = "0.37.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
+
+[[package]]
+name = "openssh"
+version = "0.11.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea0bb128ba90e86bc55dae66031935f361cda4cbc1f011547c55a7d80079bc3e"
+dependencies = [
+ "libc",
+ "once_cell",
+ "shell-escape",
+ "tempfile",
+ "thiserror",
+ "tokio",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "rustc-demangle"
+version = "0.1.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
+
+[[package]]
+name = "rustix"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
+dependencies = [
+ "bitflags",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "ryu"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_yaml"
+version = "0.9.34+deprecated"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
+dependencies = [
+ "indexmap",
+ "itoa",
+ "ryu",
+ "serde",
+ "unsafe-libyaml",
+]
+
+[[package]]
+name = "shell-escape"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "45bb67a18fa91266cc7807181f62f9178a6873bfad7dc788c42e6430db40184f"
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "slab"
+version = "0.4.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "socket2"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881"
+dependencies = [
+ "libc",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "syn"
+version = "2.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "tempfile"
+version = "3.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
+dependencies = [
+ "fastrand",
+ "getrandom",
+ "once_cell",
+ "rustix",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tokio"
+version = "1.47.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038"
+dependencies = [
+ "backtrace",
+ "bytes",
+ "io-uring",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "slab",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
+
+[[package]]
+name = "unsafe-libyaml"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasi"
+version = "0.14.7+wasi-0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c"
+dependencies = [
+ "wasip2",
+]
+
+[[package]]
+name = "wasip2"
+version = "1.0.1+wasi-0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.5",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm 0.53.1",
+ "windows_aarch64_msvc 0.53.1",
+ "windows_i686_gnu 0.53.1",
+ "windows_i686_gnullvm 0.53.1",
+ "windows_i686_msvc 0.53.1",
+ "windows_x86_64_gnu 0.53.1",
+ "windows_x86_64_gnullvm 0.53.1",
+ "windows_x86_64_msvc 0.53.1",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
+
+[[package]]
+name = "wit-bindgen"
+version = "0.46.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
diff --git a/Utilities/asap-cli/Cargo.toml b/Utilities/asap-cli/Cargo.toml
new file mode 100644
index 0000000..245cf85
--- /dev/null
+++ b/Utilities/asap-cli/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "asap-cli"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+clap = { version = "4.4", features = ["derive"] }
+tokio = { version = "1.0", features = ["full"] }
+openssh = "0.11"
+serde = { version = "1.0", features = ["derive"] }
+serde_yaml = "0.9"
+lazy_static = "1.5.0"
diff --git a/Utilities/asap-cli/src/docker_util.rs b/Utilities/asap-cli/src/docker_util.rs
new file mode 100644
index 0000000..5fd2d35
--- /dev/null
+++ b/Utilities/asap-cli/src/docker_util.rs
@@ -0,0 +1,723 @@
+use crate::util::HydraConfig;
+use tokio::process::Command;
+
+/// Parse scrape interval string (e.g., "5s", "10m") to seconds as integer
+fn parse_scrape_interval_to_seconds(interval: &str) -> Result<u32, Box<dyn std::error::Error>> {
+    let interval = interval.trim();
+    if interval.ends_with('s') {
+        Ok(interval.trim_end_matches('s').parse()?)
+    } else if interval.ends_with('m') {
+        let minutes: u32 = interval.trim_end_matches('m').parse()?;
+        Ok(minutes * 60)
+    } else {
+        // If no suffix, assume it's already in seconds
+        let value: u32 = interval.parse()?;
+        Ok(value)
+    }
+}
+
+/** CONSTANTS - matching the Python infrastructure */
+// These match constants.py values
+// Note: ASAP_DOCKER_COMPOSE_PATH is constructed dynamically in asap_up/asap_down functions
+const _CLOUDLAB_HOME_DIR: &str = "/scratch/sketch_db_for_prometheus";
+const FLINK_OUTPUT_TOPIC: &str = "flink_output";
+const QUERY_ENGINE_RS_CONTAINER_NAME: &str = "sketchdb-queryengine-rust";
+const CONTROLLER_CONTAINER_NAME: &str = "sketchdb-controller";
+const ARROYOSKETCH_CONTAINER_NAME: &str = "asap-arroyosketch";
+
+/// Get the ProjectASAP root directory based on the binary's location
+/// Binary is at: ProjectASAP/Utilities/asap-cli/target/debug/asap-cli
+/// So we go up 4 levels: debug -> target -> asap-cli -> Utilities -> ProjectASAP
+fn get_project_root() -> Result<std::path::PathBuf, Box<dyn std::error::Error>> {
+    let binary_path = std::env::current_exe()?;
+
+    let project_root = binary_path
+        .parent() // Remove 'asap-cli' binary
+        .and_then(|p| p.parent()) // Remove 'debug'
+        .and_then(|p| p.parent()) // Remove 'target'
+        .and_then(|p| p.parent()) // Remove 'asap-cli'
+        .and_then(|p| p.parent()) // Remove 'Utilities'
+        .ok_or("Failed to determine project root from binary location")?;
+
+    Ok(project_root.to_path_buf())
+}
+
+pub struct ControllerComposeArgs {
+    template_path: String,
+    compose_output_path: String,
+    controller_dir: String,
+    container_name: String,
+    controller_config_path: String,
+    controller_output_dir: String,
+    prometheus_scrape_interval: String,
+    streaming_engine: String,
+}
+
+pub struct QueryEngineComposeArgs {
+    template_path: String,
+    compose_output_path: String,
+    query_engine_dir: String,
+    container_name: String,
+    experiment_output_dir: String,
+    controller_remote_output_dir: String,
+    kafka_topic: String,
+    input_format: String,
+    prometheus_scrape_interval: String,
+    log_level: String,
+    streaming_engine: String,
+    query_language: String,
+    kafka_host: String,
+    prometheus_host: String,
+    prometheus_port: String,
+    compress_json: bool,
+    profile_query_engine: bool,
+    forward_unsupported_queries: bool,
+    manual: bool,
+    kafka_proxy_container_name: String,
+    http_port: String,
+    lock_strategy: String,
+    dump_precomputes: bool,
+}
+
+pub struct FakeExporterComposeArgs {
+    template_path: String,
+    compose_output_path: String,
+    fake_exporter_dir: String,
+    container_name: String,
+    port: u16,
+    valuescale: u32,
+    dataset: String,
+    num_labels: u8,
+    num_values_per_label: u16,
+    metric_type: String,
+    experiment_output_dir: String,
+    exporter_output_dir: String,
+}
+
+pub struct ArroyoSketchComposeArgs {
+    template_path: String,
+    compose_output_path: String,
+    arroyosketch_dir: String,
+    container_name: String,
+    controller_output_dir: String,
+    arroyosketch_output_dir: String,
+    prometheus_base_port: u16,
+    prometheus_path: String,
+    prometheus_bind_ip: String,
+    parallelism: u32,
+    output_kafka_topic: String,
+    output_format: String,
+    pipeline_name: String,
+    arroyo_url: String,
+    bootstrap_servers: String,
+}
+
+/// Generate query engine docker-compose file using the Python script
+/// This matches the Python infrastructure's QueryEngineRustService._start_containerized method
+pub async fn generate_query_engine_compose(
+    hydra_config: &HydraConfig,
+    experiment_outputs_abs_path: &str,
+) -> Result<(), Box<dyn std::error::Error>> {
+    // Get project root based on binary location
+    let project_root = get_project_root()?;
+    let code_dir = project_root.to_string_lossy();
+
+    // Paths for template and script
+    let queryengine_dir = format!("{}/QueryEngineRust", code_dir);
+    let template_path = format!("{}/query-engine-rust-cli-compose.yml.j2", queryengine_dir);
+    let helper_script = format!("{}/Utilities/experiments/generate_queryengine_compose.py", code_dir);
+
+    // Compose file output path (inside ProjectASAP)
+    let compose_output_path = format!("{}/Utilities/docker/generated_compose_files/query-engine-compose.yml", code_dir);
+
+    // Ensure compose output directory exists
+    tokio::fs::create_dir_all(format!("{}/Utilities/docker/generated_compose_files", code_dir)).await?;
+
+    // Experiment output paths (outside ProjectASAP, absolute paths for Docker volume mounts)
+    let controller_remote_output_dir = format!("{}/controller_output", experiment_outputs_abs_path);
+    let experiment_output_dir = format!("{}/sketchdb", experiment_outputs_abs_path);
+    let output_dir = format!("{}/query_engine_output", experiment_output_dir);
+
+    // Extract values from HydraConfig with defaults matching Python code
+    // let streaming_engine = hydra_config.streaming.engine
+    //     .as_ref()
+    //     .unwrap_or(&"flink".to_string())
+    //     .clone();
+    // CLI only uses arroyo for now
+    println!("Overriding streaming engine with 'arroyo' (asap-cli only supports arroyo for now)");
+    let streaming_engine = String::from("arroyo");
+
+    // Use flink_output_format for input_format (this is what Python does)
+    let flink_output_format = hydra_config.streaming.flink_output_format
+        .as_ref()
+        .unwrap_or(&"json".to_string())
+        .clone();
+
+    let log_level = hydra_config.logging.level
+        .as_ref()
+        .unwrap_or(&"INFO".to_string())
+        .clone();
+
+    let prometheus_scrape_interval_str = hydra_config.prometheus.scrape_interval
+        .as_ref()
+        .unwrap_or(&"5s".to_string())
+        .clone();
+
+    // Parse scrape interval to seconds for the Python script
+    let prometheus_scrape_interval = parse_scrape_interval_to_seconds(&prometheus_scrape_interval_str)?;
+
+    // Based on Python's COMPRESS_JSON = True constant
+    let compress_json = true;
+    let profile_query_engine = hydra_config.profiling.query_engine.unwrap_or(false);
+    // let forward_unsupported_queries = hydra_config.streaming.forward_unsupported_queries.unwrap_or(true);
+    let forward_unsupported_queries = true; // always true for now
+    let manual = hydra_config.manual.query_engine.unwrap_or(false);
+
+    // Read query engine config with defaults matching config.yaml
+    let lock_strategy = hydra_config.query_engine.lock_strategy
+        .clone()
+        .unwrap_or_else(|| "per-key".to_string());
+    let dump_precomputes = hydra_config.query_engine.dump_precomputes.unwrap_or(false);
+
+    // Should be 10.10.1.1 for cloudlab, but probably should not be localhost ever
+    let kafka_host = "10.10.1.1".to_string();
+    let prometheus_host = "10.10.1.1".to_string();
+    let prometheus_port = "9090".to_string();  // Hardcoded for CloudLab, will be configurable via CLI later
+
+    let args = QueryEngineComposeArgs {
+        template_path,
+        compose_output_path: compose_output_path.to_string(),
+        query_engine_dir: queryengine_dir,
+        container_name: QUERY_ENGINE_RS_CONTAINER_NAME.to_string(),
+        experiment_output_dir: output_dir.clone(),
+        controller_remote_output_dir,
+        kafka_topic: FLINK_OUTPUT_TOPIC.to_string(),
+        input_format: flink_output_format,
+        prometheus_scrape_interval: prometheus_scrape_interval.to_string(),
+        log_level,
+        streaming_engine,
+        query_language: "PROMQL".to_string(),  // Hardcoded to PROMQL for asap-cli
+        kafka_host,
+        prometheus_host,
+        prometheus_port,
+        compress_json,
+        profile_query_engine,
+        forward_unsupported_queries,
+        manual,
+        kafka_proxy_container_name: "sketchdb-kafka-proxy".to_string(),
+        http_port: "8088".to_string(),
+        lock_strategy,
+        dump_precomputes,
+    };
+
+    // Call the Python script to generate the compose file
+    call_generate_queryengine_compose_script(args, helper_script).await
+}
+
+/// Generate controller docker-compose file using the Python script
+/// This matches the Python infrastructure's ControllerService._start_containerized method
+pub async fn generate_controller_compose(
+    hydra_config: &HydraConfig,
+    _experiment_name: &str,
+    controller_client_config: &str,
+    experiment_outputs_abs_path: &str,
+) -> Result<(), Box<dyn std::error::Error>> {
+    // Get project root based on binary location
+    let project_root = get_project_root()?;
+    let code_dir = project_root.to_string_lossy();
+
+    // Paths for template and script
+    let controller_dir = format!("{}/Controller", code_dir);
+    let template_path = format!("{}/controller-cli-compose.yml.j2", controller_dir);
+    let helper_script = format!("{}/Utilities/experiments/generate_controller_compose.py", code_dir);
+
+    // Compose file output path (inside ProjectASAP)
+    let compose_output_path = format!("{}/Utilities/docker/generated_compose_files/controller-compose.yml", code_dir);
+
+    // Ensure compose output directory exists
+    tokio::fs::create_dir_all(format!("{}/Utilities/docker/generated_compose_files", code_dir)).await?;
+
+    // Controller output directory (outside ProjectASAP, absolute path for Docker volume mounts)
+    let controller_remote_output_dir = format!("{}/controller_output", experiment_outputs_abs_path);
+
+    // Extract values from HydraConfig with defaults
+    // CLI only uses arroyo for now
+    println!("Overriding streaming engine with 'arroyo' (asap-cli only supports arroyo for now)");
+    let streaming_engine = String::from("arroyo");
+
+    let prometheus_scrape_interval_str = hydra_config.prometheus.scrape_interval
+        .as_ref()
+        .unwrap_or(&"5s".to_string())
+        .clone();
+
+    // Parse scrape interval to seconds for the Python script
+    let prometheus_scrape_interval = parse_scrape_interval_to_seconds(&prometheus_scrape_interval_str)?;
+
+    let args = ControllerComposeArgs {
+        template_path,
+        compose_output_path: compose_output_path.to_string(),
+        controller_dir,
+        container_name: CONTROLLER_CONTAINER_NAME.to_string(),
+        controller_config_path: controller_client_config.to_string(),
+        controller_output_dir: controller_remote_output_dir,
+        prometheus_scrape_interval: prometheus_scrape_interval.to_string(),
+        streaming_engine,
+    };
+
+    // Call the Python script to generate the compose file
+    call_generate_controller_compose_script(args, helper_script).await
+}
+
+/// Call generate_queryengine_compose.py using uv run
+/// This matches Python's implementation but uses uv for environment management
+async fn call_generate_queryengine_compose_script(
+    args: QueryEngineComposeArgs,
+    helper_script: String,
+) -> Result<(), Box<dyn std::error::Error>> {
+    // Use uv run with the generate_compose project directory (absolute path)
+    let project_root = get_project_root()?;
+    let uv_project = format!("{}/Utilities/asap-cli/uv_configs/generate_compose", project_root.to_string_lossy());
+
+    let mut cmd = Command::new("uv");
+    cmd.arg("run")
+        .arg("--project")
+        .arg(uv_project)
+        .arg(&helper_script)
+        .arg("--template-path").arg(&args.template_path)
+        .arg("--output-path").arg(&args.compose_output_path)
+        .arg("--queryengine-dir").arg(&args.query_engine_dir)
+        .arg("--container-name").arg(&args.container_name)
+        .arg("--experiment-output-dir").arg(&args.experiment_output_dir)
+        .arg("--controller-remote-output-dir").arg(&args.controller_remote_output_dir)
+        .arg("--kafka-topic").arg(&args.kafka_topic)
+        .arg("--input-format").arg(&args.input_format)
+        .arg("--prometheus-scrape-interval").arg(&args.prometheus_scrape_interval)
+        .arg("--log-level").arg(&args.log_level)
+        .arg("--streaming-engine").arg(&args.streaming_engine)
+        .arg("--query-language").arg(&args.query_language)
+        .arg("--lock-strategy").arg(&args.lock_strategy)
+        .arg("--kafka-host").arg(&args.kafka_host)
+        .arg("--prometheus-host").arg(&args.prometheus_host)
+        .arg("--prometheus-port").arg(&args.prometheus_port)
+        .arg("--kafka-proxy-container-name").arg(&args.kafka_proxy_container_name)
+        .arg("--http-port").arg(&args.http_port);
+
+    if args.compress_json {
+        cmd.arg("--compress-json");
+    }
+    if args.profile_query_engine {
+        cmd.arg("--profile-query-engine");
+    }
+    if args.forward_unsupported_queries {
+        cmd.arg("--forward-unsupported-queries");
+    }
+    if args.manual {
+        cmd.arg("--manual");
+    }
+    if args.dump_precomputes {
+        cmd.arg("--dump-precomputes");
+    }
+
+    // Python also includes --kafka-proxy-container-name and --http-port
+    cmd.arg("--kafka-proxy-container-name").arg(&args.kafka_proxy_container_name);
+    cmd.arg("--http-port").arg(&args.http_port);
+
+    println!("Calling generate_queryengine_compose.py with uv...");
+    let output = cmd.output().await?;
+
+    if !output.status.success() {
+        eprintln!("stderr: {}", String::from_utf8_lossy(&output.stderr));
+        return Err("Failed to generate query engine compose file".into());
+    }
+
+    println!("stdout: {}", String::from_utf8_lossy(&output.stdout));
+    Ok(())
+}
+
+/// Call generate_controller_compose.py using uv run
+/// This matches Python's ControllerService._start_containerized implementation
+async fn call_generate_controller_compose_script(
+    args: ControllerComposeArgs,
+    helper_script: String,
+) -> Result<(), Box<dyn std::error::Error>> {
+    // Use uv run with the generate_compose project directory (absolute path)
+    let project_root = get_project_root()?;
+    let uv_project = format!("{}/Utilities/asap-cli/uv_configs/generate_compose", project_root.to_string_lossy());
+
+    let mut cmd = Command::new("uv");
+    cmd.arg("run")
+        .arg("--project")
+        .arg(uv_project)
+        .arg(&helper_script)
+        .arg("--template-path").arg(&args.template_path)
+        .arg("--compose-output-path").arg(&args.compose_output_path)
+        .arg("--controller-dir").arg(&args.controller_dir)
+        .arg("--container-name").arg(&args.container_name)
+        .arg("--input-config-path").arg(&args.controller_config_path)
+        .arg("--controller-output-dir").arg(&args.controller_output_dir)
+        .arg("--prometheus-scrape-interval").arg(&args.prometheus_scrape_interval)
+        .arg("--streaming-engine").arg(&args.streaming_engine);
+
+    println!("Calling generate_controller_compose.py with uv...");
+    let output = cmd.output().await?;
+
+    if !output.status.success() {
+        eprintln!("stderr: {}", String::from_utf8_lossy(&output.stderr));
+        return Err("Failed to generate controller compose file".into());
+    }
+
+    println!("stdout: {}", String::from_utf8_lossy(&output.stdout));
+    Ok(())
+}
+
+/// Call generate_fake_exporter_compose.py using uv run
+/// This generates docker-compose files for fake exporters
+async fn call_generate_fake_exporter_compose_script(
+    args: FakeExporterComposeArgs,
+    helper_script: String,
+) -> Result<(), Box<dyn std::error::Error>> {
+    // Use uv run with the generate_compose project directory (absolute path)
+    let project_root = get_project_root()?;
+    let uv_project = format!("{}/Utilities/asap-cli/uv_configs/generate_compose", project_root.to_string_lossy());
+
+    let mut cmd = Command::new("uv");
+    cmd.arg("run")
+        .arg("--project")
+        .arg(uv_project)
+        .arg(helper_script)
+        .arg("--fake-exporter-dir").arg(args.fake_exporter_dir)
+        .arg("--port").arg(args.port.to_string())
+        .arg("--valuescale").arg(args.valuescale.to_string())
+        .arg("--dataset").arg(args.dataset)
+        .arg("--num-labels").arg(args.num_labels.to_string())
+        .arg("--num-values-per-label").arg(args.num_values_per_label.to_string())
+        .arg("--metric-type").arg(args.metric_type)
+        .arg("--template-path").arg(args.template_path)
+        .arg("--container-name").arg(args.container_name)
+        .arg("--exporter-output-dir").arg(args.exporter_output_dir)
+        .arg("--experiment-output-dir").arg(args.experiment_output_dir)
+        .arg("--compose-output-path").arg(args.compose_output_path);
+
+    println!("Calling generate_fake_exporter_compose.py with uv...");
+    let output = cmd.output().await?;
+
+    if !output.status.success() {
+        eprintln!("stderr: {}", String::from_utf8_lossy(&output.stderr));
+        return Err("Failed to generate fake exporter compose file".into());
+    }
+
+    println!("stdout: {}", String::from_utf8_lossy(&output.stdout));
+    Ok(())
+}
+
+/// Generate fake exporter docker-compose files using the Python script
+/// This generates individual compose files for each fake exporter and a master compose file
+pub async fn generate_fake_exporters_compose(
+    hydra_config: &HydraConfig,
+    experiment_outputs_abs_path: &str,
+) -> Result<(), Box<dyn std::error::Error>> {
+    // Check if experiment params exist and have fake exporter config
+    let experiment_params = match &hydra_config.experiment_params {
+        Some(params) => params,
+        None => {
+            println!("No experiment params found, skipping fake exporter generation");
+            return Ok(());
+        }
+    };
+
+    // Check if fake exporter is configured
+    let fake_exporter_config = match experiment_params.exporters.exporter_list.get("fake_exporter") {
+        Some(crate::util::ExporterConfig::FakeExporter {
+            num_ports_per_server,
+            start_port,
+            dataset,
+            synthetic_data_value_scale,
+            num_labels,
+            num_values_per_label,
+            metric_type,
+        }) => {
+            (num_ports_per_server, start_port, dataset, synthetic_data_value_scale,
+             num_labels, num_values_per_label, metric_type)
+        },
+        _ => {
+            println!("No fake exporter config found, skipping fake exporter generation");
+            return Ok(());
+        }
+    };
+
+    let (num_ports_per_server, start_port, dataset, valuescale, num_labels, num_values_per_label, metric_type) = fake_exporter_config;
+
+    // Get fake exporter language (default to python)
+    // let language = hydra_config.fake_exporter_language
+    //     .as_ref()
+    //     .map(|s| s.as_str())
+    //     .unwrap_or("python");
+    let language = hydra_config.fake_exporter_language
+            .as_deref()
+            .unwrap_or("python");
+
+    // Get project root based on binary location
+    let project_root = get_project_root()?;
+    let code_dir = project_root.to_string_lossy();
+
+    // Paths for template and script
+    let fake_exporter_dir = match language {
+        "python" => format!("{}/PrometheusExporters/fake_exporter/fake_exporter_python", code_dir),
+        "rust" => format!("{}/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter", code_dir),
+        _ => return Err(format!("Unsupported fake exporter language: {}", language).into()),
+    };
+
+    // Use CLI-specific template
+    let template_filename = match language {
+        "python" => "fake-exporter-python-cli-compose.yml.j2",
+        "rust" => "fake-exporter-rust-cli-compose.yml.j2",
+        _ => return Err(format!("Unsupported fake exporter language for CLI template: {}", language).into()),
+    };
+    let template_path = format!("{}/{}", fake_exporter_dir, template_filename);
+    let helper_script = format!("{}/Utilities/experiments/generate_fake_exporter_compose.py", code_dir);
+
+    // Ensure compose output directories exist
+    tokio::fs::create_dir_all(format!("{}/Utilities/docker/generated_compose_files", code_dir)).await?;
+    tokio::fs::create_dir_all(format!("{}/Utilities/docker/generated_compose_files/fake_exporter_composes", code_dir)).await?;
+
+    // Experiment output paths
+    let experiment_output_dir = format!("{}/fake_exporter_output", experiment_outputs_abs_path);
+    tokio::fs::create_dir_all(&experiment_output_dir).await?;
+
+    // Generate compose file for each fake exporter port
+    let mut compose_files = Vec::new();
+    for i in 0..*num_ports_per_server {
+        let port = start_port + i;
+        let container_name = format!("sketchdb-fake-exporter-{}-{}", port, language);
+        let compose_name = format!("fake-exporter-compose-{}-{}.yml", port, language);
+        let compose_output_path = format!("{}/Utilities/docker/generated_compose_files/fake_exporter_composes/{}", code_dir, compose_name);
+
+        let args = FakeExporterComposeArgs {
+            template_path: template_path.clone(),
+            compose_output_path: compose_output_path.clone(),
+            fake_exporter_dir: fake_exporter_dir.clone(),
+            container_name,
+            port,
+            valuescale: *valuescale,
+            dataset: dataset.clone(),
+            num_labels: *num_labels,
+            num_values_per_label: *num_values_per_label,
+            metric_type: metric_type.clone(),
+            experiment_output_dir: experiment_outputs_abs_path.to_string(),
+            exporter_output_dir: experiment_output_dir.clone(),
+        };
+
+        call_generate_fake_exporter_compose_script(args, helper_script.clone()).await?;
+        compose_files.push(compose_name);
+    }
+
+    // Generate master compose file that includes all individual compose files
+    generate_master_fake_exporters_compose(&compose_files).await?;
+
+    Ok(())
+}
+
+/// Generate the master fake-exporters-compose.yml by reading and merging all individual compose files
+async fn generate_master_fake_exporters_compose(
+    compose_files: &[String],
+) -> Result<(), Box<dyn std::error::Error>> {
+    let project_root = get_project_root()?;
+    let master_compose_path = format!("{}/Utilities/docker/generated_compose_files/fake-exporter-compose.yml", project_root.to_string_lossy());
+
+    // Create YAML content with merged services
+    let mut content = String::from("# Master compose file for all fake exporters\n");
+    content.push_str("# This file is auto-generated by asap-cli\n\n");
+    content.push_str("services:\n");
+
+    // Read each individual compose file and extract the service definition
+    for (idx, compose_file) in compose_files.iter().enumerate() {
+        let compose_path = format!("{}/Utilities/docker/generated_compose_files/fake_exporter_composes/{}", project_root.to_string_lossy(), compose_file);
+        let compose_content = tokio::fs::read_to_string(&compose_path).await?;
+
+        // Parse the YAML to extract the fake-exporter service
+        // For now, use simple string manipulation to extract the service and rename it
+        if let Some(services_start) = compose_content.find("services:") {
+            if let Some(fake_exporter_start) = compose_content[services_start..].find("fake-exporter:") {
+                let service_content_start = services_start + fake_exporter_start + "fake-exporter:".len();
+
+                // Find the next top-level key (or end of file) to know where service definition ends
+                let remaining = &compose_content[service_content_start..];
+                let service_lines: Vec<&str> = remaining.lines()
+                    .take_while(|line| line.is_empty() || line.starts_with(' ') || line.starts_with('\t'))
+                    .collect();
+
+                // Generate unique service name
+                let service_name = format!("fake-exporter-{}", idx);
+                content.push_str(&format!("  {}:\n", service_name));
+
+                // Add the service content (already indented from the original file)
+                for line in service_lines {
+                    if !line.trim().is_empty() {
+                        content.push_str(&format!("  {}\n", line));
+                    }
+                }
+            }
+        }
+    }
+
+    //// Add networks section so fake exporters can join asap-network
+    //content.push_str("\nnetworks:\n");
+    //content.push_str("  asap-network:\n");
+    //content.push_str("    external: true\n");
+
+    // Write the master compose file
+    tokio::fs::write(&master_compose_path, content).await?;
+
+    println!("Generated master fake exporters compose file: {}", master_compose_path);
+    Ok(())
+}
+
+/// Generate ArroyoSketch docker-compose file using the Python script
+pub async fn generate_arroyosketch_compose(
+    hydra_config: &HydraConfig,
+    experiment_name: &str,
+    experiment_outputs_abs_path: &str,
+) -> Result<(), Box<dyn std::error::Error>> {
+    // Get project root
+    let project_root = get_project_root()?;
+    let code_dir = project_root.to_string_lossy();
+
+    // Paths
+    let arroyosketch_dir = format!("{}/ArroyoSketch", code_dir);
+    let template_path = format!("{}/arroyosketch-cli-compose.yml.j2", arroyosketch_dir);
+    let helper_script = format!("{}/Utilities/experiments/generate_arroyosketch_compose.py", code_dir);
+    let compose_output_path = format!("{}/Utilities/docker/generated_compose_files/arroyosketch-compose.yml", code_dir);
+
+    // Ensure compose output directory exists
+    tokio::fs::create_dir_all(format!("{}/Utilities/docker/generated_compose_files", code_dir)).await?;
+
+    // Experiment output paths (absolute)
+    let controller_output_dir = format!("{}/controller_output", experiment_outputs_abs_path);
+    let arroyosketch_output_dir = format!("{}/arroyosketch_output", experiment_outputs_abs_path);
+
+    // Extract config values with defaults
+    let parallelism = hydra_config.streaming.parallelism.unwrap_or(1);
+    let output_format = hydra_config.streaming.flink_output_format
+        .as_ref()
+        .unwrap_or(&"json".to_string())
+        .clone();
+
+    let args = ArroyoSketchComposeArgs {
+        template_path,
+        compose_output_path: compose_output_path.to_string(),
+        arroyosketch_dir,
+        container_name: ARROYOSKETCH_CONTAINER_NAME.to_string(),
+        controller_output_dir,
+        arroyosketch_output_dir,
+        prometheus_base_port: 9091,
+        prometheus_path: "/receive".to_string(),
+        prometheus_bind_ip: "0.0.0.0".to_string(),
+        parallelism,
+        output_kafka_topic: FLINK_OUTPUT_TOPIC.to_string(),
+        output_format,
+        pipeline_name: experiment_name.to_string(),
+        arroyo_url: "http://arroyo:5115/api/v1".to_string(),
+        bootstrap_servers: "kafka:9092".to_string(),
+    };
+
+    call_generate_arroyosketch_compose_script(args, helper_script).await
+}
+
+/// Call generate_arroyosketch_compose.py using uv run
+async fn call_generate_arroyosketch_compose_script(
+    args: ArroyoSketchComposeArgs,
+    helper_script: String,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let project_root = get_project_root()?;
+    let uv_project = format!("{}/Utilities/asap-cli/uv_configs/generate_compose", project_root.to_string_lossy());
+
+    let mut cmd = Command::new("uv");
+    cmd.arg("run")
+        .arg("--project")
+        .arg(uv_project)
+        .arg(&helper_script)
+        .arg("--template-path").arg(&args.template_path)
+        .arg("--compose-output-path").arg(&args.compose_output_path)
+        .arg("--arroyosketch-dir").arg(&args.arroyosketch_dir)
+        .arg("--container-name").arg(&args.container_name)
+        .arg("--controller-output-dir").arg(&args.controller_output_dir)
+        .arg("--arroyosketch-output-dir").arg(&args.arroyosketch_output_dir)
+        .arg("--prometheus-base-port").arg(args.prometheus_base_port.to_string())
+        .arg("--prometheus-path").arg(&args.prometheus_path)
+        .arg("--prometheus-bind-ip").arg(&args.prometheus_bind_ip)
+        .arg("--parallelism").arg(args.parallelism.to_string())
+        .arg("--output-kafka-topic").arg(&args.output_kafka_topic)
+        .arg("--output-format").arg(&args.output_format)
+        .arg("--pipeline-name").arg(&args.pipeline_name)
+        .arg("--arroyo-url").arg(&args.arroyo_url)
+        .arg("--bootstrap-servers").arg(&args.bootstrap_servers);
+
+    println!("Calling generate_arroyosketch_compose.py with uv...");
+    let output = cmd.output().await?;
+
+    if !output.status.success() {
+        eprintln!("stderr: {}", String::from_utf8_lossy(&output.stderr));
+        return Err("Failed to generate ArroyoSketch compose file".into());
+    }
+
+    println!("stdout: {}", String::from_utf8_lossy(&output.stdout));
+    Ok(())
+}
+
+/// Starts ProjectASAP by bringing up containers using asap-docker-compose.yml
+pub async fn asap_up() -> Result<(), Box<dyn std::error::Error>> {
+    let project_root = get_project_root()?;
+    let compose_path = format!("{}/Utilities/docker/asap-docker-compose.yml", project_root.to_string_lossy());
+
+    let mut cmd = Command::new("docker");
+    println!("Starting ProjectASAP...");
+    cmd.arg("compose")
+       .arg("-f").arg(&compose_path)
+       .arg("up")
+       .arg("-d");
+
+    let output = cmd.output().await?;
+
+    if !output.status.success() {
+        eprintln!("stderr: {}", String::from_utf8_lossy(&output.stderr));
+        Err("Failed to start ProjectASAP".into())
+    } else {
+        println!("ProjectASAP started successfully");
+        Ok(())
+    }
+}
+
+pub async fn asap_down() -> Result<(), Box<dyn std::error::Error>> {
+    let project_root = get_project_root()?;
+    let compose_path = format!("{}/Utilities/docker/asap-docker-compose.yml", project_root.to_string_lossy());
+
+    let mut cmd = Command::new("docker");
+    cmd.arg("compose")
+       .arg("-f").arg(&compose_path)
+       .arg("down")
+       .arg("-v");  // Remove named volumes declared in the compose file
+
+    let output = cmd.output().await?;
+
+    if !output.status.success() {
+        eprintln!("stderr: {}", String::from_utf8_lossy(&output.stderr));
+        Err("Failed to stop ProjectASAP".into())
+    } else {
+        println!("ProjectASAP stopped successfully (containers and volumes removed)");
+        Ok(())
+    }
+}
+
+pub async fn docker_ps() -> Result<(), Box<dyn std::error::Error>> {
+    let mut cmd = Command::new("docker");
+    cmd.arg("ps");
+    println!("Running 'docker ps'...");
+    let output = cmd.output().await?;
+    if !output.status.success() {
+        eprintln!("stderr: {}", String::from_utf8_lossy(&output.stderr));
+        Err("Failed to run 'docker ps'".into())
+    } else {
+        println!("{}", String::from_utf8_lossy(&output.stdout));
+        Ok(())
+    }
+}
diff --git a/Utilities/asap-cli/src/main.rs b/Utilities/asap-cli/src/main.rs
new file mode 100644
index 0000000..9010beb
--- /dev/null
+++ b/Utilities/asap-cli/src/main.rs
@@ -0,0 +1,572 @@
+use clap::{Args, Parser, Subcommand};
+use std::path::{Path, PathBuf};
+use std::{thread, time::Duration};
+use tokio::process::Command;
+use lazy_static::lazy_static;
+mod util;
+mod docker_util;
+
+const DEFAULT_RUN_NAME: &str = "demo";
+const EXPERIMENT_CONFIG_SUFFIX: &str = ".yaml";
+
+lazy_static! {
+    pub static ref PROJECT_ROOT_DIR: PathBuf = get_project_root().unwrap();
+    pub static ref HYDRA_CONFIGS_RELATIVE_PATH: PathBuf = PathBuf::from("Utilities/experiments/config");
+}
+
+#[derive(Parser)]
+#[command(name = "asap")]
+#[command(about = "ASAP CLI - A command line interface for deploying ProjectASAP")]
+#[command(version)]
+struct Cli {
+    #[command(subcommand)]
+    command: Commands,
+}
+
+#[derive(Subcommand)]
+enum Commands {
+    #[command(about = "Start a demo deployment")]
+    Start(StartArgs),
+    #[command(about = "Stop a demo deployment")]
+    Stop,
+}
+
+#[derive(Args)]
+struct StartArgs {
+    #[arg(
+        long = "experiment_config",
+        required = false,
+        value_name = "PATH",
+        help = "Path to YAML experiment configuration file"
+    )]
+    experiment_config: Option<PathBuf>,
+
+    #[arg(
+        long = "experiment_type",
+        required = false,
+        value_name = "TYPE",
+        help = "The name of the experiment type, e.g. 'cloud_demo'"
+    )]
+    experiment_type: Option<String>,
+
+    #[arg(
+        long = "experiment_name",
+        short = 'n',
+        required = false,
+        value_name = "NAME",
+        help = "Name for the experiment output directory (overrides config file). Defaults to 'demo' if not specified"
+    )]
+    experiment_name: Option<String>,
+}
+
+/// Get the ProjectASAP root directory based on the binary's location
+/// Binary is at: ProjectASAP/Utilities/asap-cli/target/debug/asap-cli
+/// So we go up 4 levels: debug -> target -> asap-cli -> Utilities -> ProjectASAP
+fn get_project_root() -> Result<PathBuf, Box<dyn std::error::Error>> {
+    let binary_path = std::env::current_exe()?;
+
+    // Go up 4 levels: asap-cli -> debug -> target -> asap-cli -> Utilities -> ProjectASAP
+    let project_root = binary_path
+        .parent() // Remove 'asap-cli' binary
+        .and_then(|p| p.parent()) // Remove 'debug'
+        .and_then(|p| p.parent()) // Remove 'target'
+        .and_then(|p| p.parent()) // Remove 'asap-cli'
+        .and_then(|p| p.parent()) // Remove 'Utilities'
+        .ok_or("Failed to determine project root from binary location")?;
+
+    Ok(project_root.to_path_buf())
+}
+
+/// Gets the path to the experiment config yaml file using the given experiment
+/// type and the expected location in Utilities/experiments/config/experiment_type/
+pub fn get_experiment_config_path(experiment_type: &str) -> Result<PathBuf, Box<dyn std::error::Error>> {
+
+    let experiment_types_directory = PathBuf::from("experiment_type");
+    let experiment_config_filename = PathBuf::from(
+        format!("{}{}", experiment_type, EXPERIMENT_CONFIG_SUFFIX)
+    );
+
+    // ROOT/ + /Utilities/experiments/config/ + /experiment_type/ + /<exp_type>.yaml
+    let experiment_config_path = PROJECT_ROOT_DIR.join(HYDRA_CONFIGS_RELATIVE_PATH.as_path())
+                                                 .join(experiment_types_directory.as_path())
+                                                 .join(experiment_config_filename.as_path());
+
+    Ok(experiment_config_path)
+}
+
+/// Generate all configuration and compose files needed for the experiment
+async fn generate_configs_and_compose_files(
+    config: &util::HydraConfig,
+    experiment_config_path: &Path,
+    experiment_name_override: Option<&str>,
+) -> Result<(), Box<dyn std::error::Error>> {
+    // Get project root based on binary location
+    let project_root = get_project_root()?;
+
+    let experiments_dir = project_root.join("Utilities/experiments");
+
+    println!("Generating configuration and compose files...");
+
+    // Get experiment name from CLI flag, config file, or default to "demo"
+    // Priority: CLI flag > config file > "demo"
+    let experiment_name = experiment_name_override
+        .or_else(
+            || config.experiment.as_ref()
+                                .and_then(|e| e.name.as_ref())
+                                .map(|s| s.as_str())
+        ).unwrap_or(DEFAULT_RUN_NAME);
+
+    // Setup experiment output directories (relative to project root)
+    let experiment_outputs_base = project_root.join("experiment_outputs");
+    let experiment_output_dir = experiment_outputs_base.join(experiment_name);
+
+    // If experiment name is "demo" and directory exists, delete it for fresh start
+    // (demo/ is only used when no explicit experiment name is provided)
+    if experiment_name == "demo" && experiment_output_dir.exists() {
+        println!("  - No experiment name given, removing existing 'demo' experiment directory...");
+        if let Err(e) = tokio::fs::remove_dir_all(&experiment_output_dir).await {
+            println!("  - Warning: Could not delete existing demo/ directory: {}. Continuing anyway...", e);
+            println!("  - Note: You may want to manually delete experiment_outputs/demo/ or provide an explicit experiment name.");
+        }
+    }
+
+    // Create experiment output directory structure
+    println!("  - Creating experiment output directories...");
+    tokio::fs::create_dir_all(&experiment_outputs_base).await?;
+    tokio::fs::create_dir_all(&experiment_output_dir).await?;
+    tokio::fs::create_dir_all(experiment_output_dir.join("controller_output")).await?;
+    tokio::fs::create_dir_all(experiment_output_dir.join("arroyosketch_output")).await?;
+    tokio::fs::create_dir_all(experiment_output_dir.join("sketchdb")).await?;
+    tokio::fs::create_dir_all(experiment_output_dir.join("sketchdb/query_engine_output")).await?;
+
+    // Generate controller client configs
+    if config.experiment_params.is_none() {
+        return Err(
+            "No experiment parameters found in experiment config: Unable to generate controller client config".into()
+        );
+    }
+
+    let experiment_parameters = config.experiment_params.clone().unwrap();
+    println!("  - Generating controller client configs...");
+    let controller_client_config = util::generate_controller_client_config(
+        experiment_parameters,
+        &experiment_output_dir
+    ).await?;
+
+    let controller_client_config = controller_client_config.to_str()
+                                                           .unwrap();
+
+    // Get absolute path for Docker volume mounts
+    let experiment_outputs_abs = experiment_output_dir
+        .canonicalize()?
+        .to_string_lossy()
+        .to_string();
+
+    // 1. Generate query engine compose file
+    println!("  - Generating query engine compose...");
+    docker_util::generate_query_engine_compose(
+        config,
+        &experiment_outputs_abs,
+    ).await?;
+
+    // 2. Generate controller compose file
+    println!("  - Generating controller compose...");
+    println!(" controller_client_config: {}", controller_client_config);
+    docker_util::generate_controller_compose(
+        config,
+        experiment_name,
+        controller_client_config,
+        &experiment_outputs_abs,
+    ).await?;
+
+    // 2.5. Generate ArroyoSketch compose file
+    println!("  - Generating ArroyoSketch compose...");
+    docker_util::generate_arroyosketch_compose(
+        config,
+        experiment_name,
+        &experiment_outputs_abs,
+    ).await?;
+
+    // 2.6. Generate fake exporter compose files
+    println!("  - Generating fake exporter compose files...");
+    docker_util::generate_fake_exporters_compose(
+        config,
+        &experiment_outputs_abs,
+    ).await?;
+
+    // 3. Generate prometheus config
+    println!("  - Generating prometheus config...");
+    let prometheus_output_dir = project_root.join("Utilities/docker/prometheus");
+    tokio::fs::create_dir_all(&prometheus_output_dir).await?;
+    let num_nodes = config.cloudlab.num_nodes.unwrap_or(1);
+
+    // For local deployment, use localhost for all IPs
+    let node_ip_prefix = "localhost";
+    let prometheus_client_ip = "localhost";
+
+    // Get scrape and evaluation intervals from config
+    let scrape_interval = config.prometheus.scrape_interval
+        .as_ref()
+        .unwrap_or(&"5s".to_string())
+        .clone();
+
+    let evaluation_interval = config.prometheus.evaluation_interval
+        .as_ref()
+        .unwrap_or(&"1s".to_string())
+        .clone();
+
+    // TODO: TEMPORARY SOLUTION - Currently using "localhost" for node_ip_prefix
+    // which will generate prometheus targets like "localhost.1:50000", "localhost.2:50001"
+    // This doesn't work for Docker networking without host mode.
+    //
+    // Future improvements:
+    // 1. Use host machine IP address (e.g., host.docker.internal or actual IP)
+    // 2. Use a pre-defined Docker network with service discovery
+    // 3. Post-process the prometheus config to replace IPs with Docker service names
+    //
+    // For now, the Python script generates the config, and Docker service names
+    // are already being used in the fake-exporters-compose.yml file.
+    // The prometheus config will need manual adjustment or post-processing.
+
+    let uv_project = project_root.join("Utilities/asap-cli/uv_configs/generate_prometheus_config");
+
+    // For local deployment, node_offset is 0
+    // For CloudLab, this would be read from config
+    let node_offset = 0;
+
+    // Set up remote write configuration for SketchDB
+    // For Docker deployment, use arroyo hostname and port 9091
+    let remote_write_ip = config.streaming.remote_write
+        .as_ref()
+        .and_then(|rw| rw.ip.as_deref())
+        .unwrap_or("arroyo");
+
+    let remote_write_base_port = config.streaming.remote_write
+        .as_ref()
+        .and_then(|rw| rw.base_port)
+        .unwrap_or(9091);
+
+    let remote_write_path = config.streaming.remote_write
+        .as_ref()
+        .and_then(|rw| rw.path.as_deref())
+        .unwrap_or("/receive");
+
+    let parallelism = config.streaming.parallelism.unwrap_or(1);
+
+    // Construct remote_write_url
+    let remote_write_url = format!("http://{}:{}{}", remote_write_ip, remote_write_base_port, remote_write_path);
+
+    // Get metrics to remote write from experiment params
+    let metrics_to_remote_write = if let Some(ref experiment_params) = config.experiment_params {
+        util::get_metrics_to_remote_write(experiment_params)
+    } else {
+        Vec::new()
+    };
+    let metrics_to_remote_write_str = metrics_to_remote_write.join(",");
+
+    let mut cmd = Command::new("uv");
+    cmd.args([
+        "run",
+        "--project",
+        uv_project.to_str().unwrap(),
+        experiments_dir
+            .join("generate_prometheus_config.py")
+            .to_str()
+            .unwrap(),
+        "--num_nodes",
+        &num_nodes.to_string(),
+        "--node-offset",
+        &node_offset.to_string(),
+        "--output_dir",
+        prometheus_output_dir.to_str().unwrap(),
+        "--experiment_config_file",
+        experiment_config_path.to_str().unwrap(),
+        "--node-ip-prefix",
+        node_ip_prefix,
+        "--prometheus-client-ip",
+        prometheus_client_ip,
+        "--scrape_interval",
+        &scrape_interval,
+        "--evaluation_interval",
+        &evaluation_interval,
+        "--remote_write_url",
+        &remote_write_url,
+        "--remote_write_base_port",
+        &remote_write_base_port.to_string(),
+        "--parallelism",
+        &parallelism.to_string(),
+    ]);
+
+    // Add remote_write_metric_names if not empty
+    if !metrics_to_remote_write_str.is_empty() {
+        cmd.arg("--remote_write_metric_names");
+        cmd.arg(&metrics_to_remote_write_str);
+    }
+
+    let status = cmd.status().await?;
+
+    if !status.success() {
+        return Err("Failed to generate prometheus config".into());
+    }
+
+    // Post-process Prometheus config to fix targets for Docker networking
+    // The Python script generates targets like "localhost.2:50000" but for Docker
+    // we need service names like "sketchdb-fake-exporter-50000-python:50000"
+    // The Python script generates prometheus.yml in the output_dir
+    let generated_config = prometheus_output_dir.join("prometheus.yml");
+    fix_prometheus_targets_for_docker(config, &generated_config, &project_root).await?;
+
+    println!("All configuration and compose files generated successfully!");
+    Ok(())
+}
+
+/// Fix Prometheus targets to use Docker service names instead of localhost.X
+async fn fix_prometheus_targets_for_docker(
+    config: &util::HydraConfig,
+    prometheus_config_file: &std::path::Path,
+    _project_root: &std::path::Path,
+) -> Result<(), Box<dyn std::error::Error>> {
+    // Read the prometheus config
+    let content = tokio::fs::read_to_string(prometheus_config_file).await?;
+
+    // Get fake exporter info from config
+    let experiment_params = match &config.experiment_params {
+        Some(params) => params,
+        None => return Ok(()), // No experiment params, nothing to fix
+    };
+
+    let (num_ports, start_port, language) = match experiment_params.exporters.exporter_list.get("fake_exporter") {
+        Some(util::ExporterConfig::FakeExporter {
+            num_ports_per_server,
+            start_port,
+            ..
+        }) => {
+            let lang = config.fake_exporter_language.as_deref().unwrap_or("python");
+            (*num_ports_per_server, *start_port, lang)
+        },
+        _ => return Ok(()), // No fake exporter, nothing to fix
+    };
+
+    // Build replacement map: localhost.X:port -> service-name:port
+    // Note: Python generates all targets on same node (localhost.2:PORT for num_nodes=1)
+    // not incrementing node number per port
+    let mut replacements = Vec::new();
+    for i in 0..num_ports {
+        let port = start_port + i;
+        // For num_nodes=1, Python generates localhost.2:port for all ports
+        let old_target = format!("localhost.2:{}", port);
+        let new_target = format!("sketchdb-fake-exporter-{}-{}:{}", port, language, port);
+        replacements.push((old_target, new_target));
+    }
+
+    // Replace all occurrences
+    let mut new_content = content;
+    for (old, new) in replacements {
+        new_content = new_content.replace(&old, &new);
+    }
+
+    // Write back the fixed config
+    tokio::fs::write(prometheus_config_file, new_content).await?;
+
+    println!("  - Fixed Prometheus targets to use Docker service names");
+    Ok(())
+}
+
+/// Configure Grafana after containers are running
+async fn configure_grafana(experiment_type: &str, experiment_name: Option<&str>) -> Result<(), Box<dyn std::error::Error>> {
+    let project_root = get_project_root()?;
+    let experiments_dir = project_root.join("Utilities/experiments");
+    let uv_project = project_root.join("Utilities/asap-cli/uv_configs/grafana_config");
+
+    let experiment_type_arg: String = format!("experiment_type={}", experiment_type);
+    let experiment_name_arg: String;
+
+    if let Some(name) = experiment_name {
+        experiment_name_arg = format!("experiment.name={}", name);
+    } else {
+        experiment_name_arg = format!("experiment.name={}", DEFAULT_RUN_NAME);
+    }
+
+    // Wait for Grafana health endpoint to be ready
+    println!("Waiting for Grafana to be ready...");
+    let max_retries = 30;
+    let mut ready = false;
+
+    for i in 1..=max_retries {
+        let result = Command::new("curl")
+            .args(["-s", "http://localhost:3000/api/health"])
+            .output()
+            .await;
+
+        if let Ok(output) = result {
+            if output.status.success() {
+                println!("Grafana ready after {} attempts", i);
+                ready = true;
+                break;
+            }
+        }
+
+        if i < max_retries {
+            println!("Waiting for Grafana to be ready... ({}/{})", i, max_retries);
+            thread::sleep(Duration::from_secs(5));
+        }
+    }
+
+    if !ready {
+        return Err("Grafana failed to become ready after 30 attempts".into());
+    }
+
+    println!("Configuring Grafana...");
+    let status = Command::new("uv")
+        .args([
+            "run",
+            "--project",
+            uv_project.to_str().unwrap(),
+            experiments_dir.join("grafana_config.py").to_str().unwrap(),
+            &experiment_type_arg,
+            &experiment_name_arg,
+            // Override server URLs to use Docker service discovery
+            "experiment_params.servers.0.url=http://prometheus:9090",
+            "experiment_params.servers.1.url=http://queryengine-rust:8088",
+            "--configure",
+        ])
+        .status()
+        .await?;
+
+    if !status.success() {
+        return Err("Failed to configure Grafana".into());
+    }
+
+    println!("Grafana configured successfully!");
+    Ok(())
+}
+
+/// Checks if a container is in the 'running' state
+async fn check_container(container_name: &str) -> Result<bool, Box<dyn std::error::Error>> {
+    let mut cmd = Command::new("docker");
+    cmd.arg("container")
+       .arg("inspect")
+       .arg("-f").arg("'{{.State.Status}}'")
+       .arg(container_name);
+
+    let output = cmd.output().await?;
+
+    if !output.status.success() {
+        eprintln!("stderr: {}", String::from_utf8_lossy(&output.stderr));
+        Err("Failed to inspect docker container".into())
+    } else {
+        let is_running = String::from_utf8_lossy(&output.stdout).trim() == "'running'";
+        Ok(is_running)
+    }
+
+}
+
+/// Start routine
+async fn start(args: StartArgs) -> Result<(), Box<dyn std::error::Error>> {
+    // Initialize project root
+    let _project_root = &PROJECT_ROOT_DIR;
+    println!("Starting ProjectASAP...");
+
+    // Get Path to experiment configuration yaml file
+    let experiment_config_path: PathBuf;
+    let experiment_type: String;
+
+    if let Some(exp_type) = args.experiment_type {
+        experiment_config_path = get_experiment_config_path(&exp_type)?;
+        experiment_type = exp_type.clone();
+    } else {
+        experiment_config_path = args.experiment_config.expect(
+            "Error: Require one of either experiment type or experiment config path"
+        );
+        experiment_type = experiment_config_path.file_name()
+                                                .expect("Error getting experiment type")
+                                                .to_str()
+                                                .expect("Error parsing experiment type to string")
+                                                .strip_suffix(EXPERIMENT_CONFIG_SUFFIX)
+                                                .expect("Given path to experiment config does not end in '.yaml'")
+                                                .to_owned();
+    }
+
+    // Parse the experiment config
+    println!("Experiment type: {}", experiment_type);
+    println!("Experiment config path: {}", experiment_config_path.to_str().unwrap());
+    println!("Parsing experiment config...");
+    let config = util::parse_config_auto(&experiment_config_path).await?;
+    println!("Successfully parsed experiment config:");
+
+    println!("- Experiment Type: '{}'", experiment_type);
+    // Display experiment params if available
+    if let Some(exp_params) = &config.experiment_params {
+        println!("- Experiment modes: {:?}", exp_params.experiment);
+        println!("- Number of servers: {}", exp_params.servers.len());
+        println!("- Number of query groups: {}", exp_params.query_groups.len());
+        println!("- Number of metrics: {}", exp_params.metrics.len());
+    }
+
+    // Display streaming config if available
+    if let Some(engine) = &config.streaming.engine {
+        println!("- Streaming engine: {}", engine);
+    }
+    if let Some(format) = &config.streaming.flink_output_format {
+        println!("- Flink output format: {}", format);
+    }
+
+    // Display other configs
+    if let Some(lang) = &config.fake_exporter_language {
+        println!("- Fake exporter language: {}", lang);
+    }
+    if let Some(num_nodes) = config.cloudlab.num_nodes {
+        println!("- CloudLab nodes: {}", num_nodes);
+    }
+
+    // Convert experiment_config path to absolute path
+    let experiment_config_abs = if experiment_config_path.is_absolute() {
+        experiment_config_path.clone()
+    } else {
+        std::env::current_dir()?.join(&experiment_config_path)
+    };
+
+    // Generate all configuration and compose files
+    generate_configs_and_compose_files(&config, &experiment_config_abs, args.experiment_name.as_deref()).await?;
+
+    // Start the containers
+    docker_util::asap_up().await?;
+
+    println!("Waiting for containers to start...");
+    while !check_container("asap-grafana").await? {
+        thread::sleep(Duration::from_secs(5));
+    }
+    // Configure Grafana after containers are running
+    configure_grafana(&experiment_type, args.experiment_name.as_deref()).await?;
+
+    // Check container status
+    docker_util::docker_ps().await?;
+    Ok(())
+}
+
+async fn stop() -> Result<(), Box<dyn std::error::Error>> {
+    println!("Stopping deployment...");
+    docker_util::asap_down().await?;
+    docker_util::docker_ps().await?;
+    Ok(())
+}
+
+#[tokio::main]
+async fn main() {
+    let cli = Cli::parse();
+
+    match cli.command {
+        Commands::Start(args) => {
+            if let Err(e) = start(args).await {
+                eprintln!("Error: {}", e);
+                std::process::exit(1);
+            }
+        }
+        Commands::Stop => {
+            if let Err(e) = stop().await {
+                eprintln!("Error: {}", e);
+                std::process::exit(1);
+            }
+        }
+    }
+}
diff --git a/Utilities/asap-cli/src/util.rs b/Utilities/asap-cli/src/util.rs
new file mode 100644
index 0000000..e493b86
--- /dev/null
+++ b/Utilities/asap-cli/src/util.rs
@@ -0,0 +1,361 @@
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::path::Path;
+
+type BoxedErr = Box<dyn std::error::Error>;
+
+/// Complete Hydra configuration combining all config sources
+/// This represents the fully resolved Hydra config with all overrides applied
+#[derive(Serialize, Deserialize, Debug, Default)]
+pub struct HydraConfig {
+    #[serde(default)]
+    pub experiment: Option<ExperimentMetadata>,
+    #[serde(default)]
+    pub cloudlab: CloudLabConfig,
+    #[serde(default)]
+    pub prometheus: PrometheusConfig,
+    #[serde(default)]
+    pub streaming: StreamingConfig,
+    #[serde(default)]
+    pub logging: LoggingConfig,
+    #[serde(default)]
+    pub profiling: ProfilingConfig,
+    #[serde(default)]
+    pub manual: ManualConfig,
+    #[serde(default)]
+    pub flow: FlowConfig,
+    #[serde(default)]
+    pub experiment_variants: ExperimentVariants,
+    #[serde(default)]
+    pub fake_exporter_language: Option<String>,
+    #[serde(default)]
+    pub query_engine: QueryEngineConfig,
+    #[serde(default)]
+    pub aggregate_cleanup: AggregateCleanupConfig,
+
+    /// The experiment parameters from the experiment_type config group
+    #[serde(default)]
+    pub experiment_params: Option<ExperimentConfig>,
+}
+
+/// Experiment configuration from experiment_type configs
+/// This represents the YAML files in config/experiment_type/
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct ExperimentConfig {
+    pub experiment: Vec<ExperimentMode>,
+    pub servers: Vec<Server>,
+    pub workloads: Option<HashMap<String, Workload>>,
+    pub exporters: Exporters,
+    pub query_groups: Vec<QueryGroup>,
+    pub metrics: Vec<Metric>,
+}
+
+/// CloudLab infrastructure configuration
+#[derive(Serialize, Deserialize, Debug, Default)]
+pub struct CloudLabConfig {
+    pub num_nodes: Option<u32>,
+    pub username: Option<String>,
+    pub hostname_suffix: Option<String>,
+}
+
+/// Prometheus configuration overrides
+#[derive(Serialize, Deserialize, Debug, Default)]
+pub struct PrometheusConfig {
+    pub local_config_dir: Option<String>,
+    pub scrape_interval: Option<String>,
+    pub evaluation_interval: Option<String>,
+    pub query_log_file: Option<String>,
+    pub recording_rules: Option<RecordingRulesConfig>,
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct RecordingRulesConfig {
+    pub interval: String,
+}
+
+/// Remote write configuration for Prometheus
+#[derive(Serialize, Deserialize, Debug, Default)]
+pub struct RemoteWriteConfig {
+    pub ip: Option<String>,
+    pub base_port: Option<u16>,
+    pub path: Option<String>,
+}
+
+/// Streaming engine configuration
+#[derive(Serialize, Deserialize, Debug, Default)]
+pub struct StreamingConfig {
+    pub engine: Option<String>,              // "flink" | "arroyo"
+    pub flink_input_format: Option<String>,  // "json" | "avro-json" | "avro-binary"
+    pub flink_output_format: Option<String>, // "json" | "byte"
+    pub enable_object_reuse: Option<bool>,
+    pub do_local_flink: Option<bool>,
+    pub forward_unsupported_queries: Option<bool>,
+    pub parallelism: Option<u32>,            // Pipeline parallelism
+    pub remote_write: Option<RemoteWriteConfig>, // Prometheus remote write config
+}
+
+/// Logging configuration
+#[derive(Serialize, Deserialize, Debug, Default)]
+pub struct LoggingConfig {
+    pub level: Option<String>, // "DEBUG" | "INFO" | "WARNING" | "ERROR"
+}
+
+/// Profiling configuration
+#[derive(Serialize, Deserialize, Debug, Default)]
+pub struct ProfilingConfig {
+    pub query_engine: Option<bool>,
+    pub prometheus_time: Option<u32>,
+    pub flink: Option<bool>,
+    pub arroyo: Option<bool>,
+}
+
+/// Manual mode configuration
+#[derive(Serialize, Deserialize, Debug, Default)]
+pub struct ManualConfig {
+    pub query_engine: Option<bool>,
+    pub remote_monitor: Option<bool>,
+}
+
+/// Flow control configuration
+#[derive(Serialize, Deserialize, Debug, Default)]
+pub struct FlowConfig {
+    pub no_teardown: Option<bool>,
+    pub steady_state_wait: Option<u32>,
+}
+
+/// Query engine configuration
+#[derive(Serialize, Deserialize, Debug, Default)]
+pub struct QueryEngineConfig {
+    pub dump_precomputes: Option<bool>,
+    pub lock_strategy: Option<String>,  // "global" or "per-key"
+}
+
+/// Aggregate cleanup configuration
+/// Policy options: "circular_buffer", "read_based", "no_cleanup"
+#[derive(Serialize, Deserialize, Debug, Default)]
+pub struct AggregateCleanupConfig {
+    pub policy: Option<String>,
+}
+
+/// Experiment metadata
+#[derive(Serialize, Deserialize, Debug, Default)]
+pub struct ExperimentMetadata {
+    pub name: Option<String>,
+    pub config_file: Option<String>,
+}
+
+/// Experiment variants for specific scripts
+#[derive(Serialize, Deserialize, Debug, Default)]
+pub struct ExperimentVariants {
+    pub sketchdboffline: Option<SketchDbOfflineConfig>,
+    pub flink_aggregations: Option<FlinkAggregationsConfig>,
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct SketchDbOfflineConfig {
+    pub experiment_dir: String,
+    pub labels: Option<Vec<String>>,
+    pub groupby: Vec<String>,
+    pub aggregation: String, // "sum" | "avg" | "count" | "min" | "max"
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct FlinkAggregationsConfig {
+    pub config: String,
+    pub aggregation_id: u32,
+    pub min_aggregations: u32,
+    pub max_aggregations: u32,
+    pub profile_duration: Option<u32>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct ExperimentMode {
+    pub mode: String,
+    pub query_prometheus_too: Option<bool>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct Server {
+    pub name: String,
+    pub url: String,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct Workload {
+    #[serde(rename = "use")]
+    pub use_workload: bool,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct Exporters {
+    pub only_start_if_queries_exist: bool,
+    pub exporter_list: HashMap<String, ExporterConfig>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+#[serde(untagged)]
+pub enum ExporterConfig {
+    NodeExporter {
+        port: u16,
+        extra_flags: Option<String>,
+    },
+    FakeExporter {
+        num_ports_per_server: u16,
+        start_port: u16,
+        dataset: String,
+        synthetic_data_value_scale: u32,
+        num_labels: u8,
+        num_values_per_label: u16,
+        metric_type: String,
+    },
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct QueryGroup {
+    pub id: u32,
+    pub queries: Vec<String>,
+    pub repetition_delay: u32,
+    pub client_options: ClientOptions,
+    pub controller_options: ControllerOptions,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct ClientOptions {
+    pub repetitions: u32,
+    pub query_time_offset: Option<u32>,
+    pub starting_delay: Option<u32>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct ControllerOptions {
+    pub accuracy_sla: f64,
+    pub latency_sla: f64,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct Metric {
+    pub metric: String,
+    pub labels: Vec<String>,
+    pub exporter: String,
+}
+
+/**
+ * @brief Auto-detects and parses the config file.
+ * If it's an experiment_type config, wraps it in HydraConfig.
+ * If it's a full Hydra config, parses directly.
+ *
+ * @returns A HydraConfig struct with all fields populated.
+ */
+pub async fn parse_config_auto(config: &Path) -> Result<HydraConfig, BoxedErr> {
+    let content = tokio::fs::read_to_string(config).await?;
+
+    // Try parsing as HydraConfig first
+    if let Ok(hydra_config) = serde_yaml::from_str::<HydraConfig>(&content) {
+        // Check if it has experiment_params or looks like a full Hydra config
+        if hydra_config.experiment_params.is_some() ||
+           hydra_config.cloudlab.num_nodes.is_some() ||
+           hydra_config.streaming.engine.is_some() {
+            return Ok(hydra_config);
+        }
+    }
+
+    // Otherwise, try parsing as ExperimentConfig and wrap it
+    let exp_config: ExperimentConfig = serde_yaml::from_str(&content)?;
+    Ok(HydraConfig {
+        experiment_params: Some(exp_config),
+        ..Default::default()
+    })
+}
+
+/// Controller client configuration - same as ExperimentConfig but without experiment and workloads
+#[derive(Serialize, Deserialize, Debug)]
+struct ControllerClientConfig {
+    pub servers: Vec<Server>,
+    pub exporters: Exporters,
+    pub query_groups: Vec<QueryGroup>,
+    pub metrics: Vec<Metric>,
+}
+
+pub async fn generate_controller_client_config(
+    experiment_params: ExperimentConfig,
+    experiment_output_dir: &Path
+) -> Result<std::path::PathBuf, Box<dyn std::error::Error>> {
+    let output_dir = experiment_output_dir.join("controller_client_configs");
+    tokio::fs::create_dir_all(&output_dir).await?;
+
+    // Build a map from server name to server config
+    let mut server_map: HashMap<String, Server> = HashMap::new();
+    for server in &experiment_params.servers {
+        server_map.insert(server.name.clone(), server.clone());
+    }
+
+    // Generate a config file for each experiment mode
+    for mode in &experiment_params.experiment {
+        // Determine which servers to include
+        let servers = if mode.mode == "sketchdb"
+            && mode.query_prometheus_too.unwrap_or(false) {
+            // Special case: include all servers
+            experiment_params.servers.clone()
+        } else {
+            // Normal case: only include the server matching the mode
+            vec![server_map.get(&mode.mode)
+                .ok_or_else(|| format!("Server '{}' not found in servers config", mode.mode))?
+                .clone()]
+        };
+
+        // Create the controller client config
+        let controller_config = ControllerClientConfig {
+            servers,
+            exporters: experiment_params.exporters.clone(),
+            query_groups: experiment_params.query_groups.clone(),
+            metrics: experiment_params.metrics.clone(),
+        };
+
+        // Write to file
+        let output_file = output_dir.join(format!("{}.yaml", mode.mode));
+        let file = std::fs::File::create(&output_file)?;
+        serde_yaml::to_writer(file, &controller_config)?;
+    }
+
+    // NOTE: An experiment configuration may have multiple modes resulting
+    // in multiple runs, but the CLI should only run a single mode for now.
+    // The CLI is hardcoded to use "sketchdb" mode only (matching experiment_run_grafana_demo.py).
+    // In the future we should probably have yaml configurations strictly for
+    // configuring a single run of ProjectASAP.
+    Ok(output_dir.join("sketchdb.yaml"))
+}
+
+/// Get list of metrics that should be written to remote write based on experiment configuration.
+/// This matches the Python implementation in experiment_utils/core.py
+pub fn get_metrics_to_remote_write(experiment_params: &ExperimentConfig) -> Vec<String> {
+    // Check if only_start_if_queries_exist flag is set
+    let only_if_queries_exist = experiment_params.exporters.only_start_if_queries_exist;
+
+    if !only_if_queries_exist {
+        // Return all metrics
+        return experiment_params
+            .metrics
+            .iter()
+            .map(|m| m.metric.clone())
+            .collect();
+    }
+
+    // Get all queries from all query groups
+    let mut all_queries = Vec::new();
+    for group in &experiment_params.query_groups {
+        all_queries.extend(group.queries.clone());
+    }
+
+    // Filter metrics that appear in queries
+    let mut metrics_to_remote_write = Vec::new();
+    for metric_config in &experiment_params.metrics {
+        for query in &all_queries {
+            if query.contains(&metric_config.metric) {
+                metrics_to_remote_write.push(metric_config.metric.clone());
+                break;
+            }
+        }
+    }
+
+    metrics_to_remote_write
+}
diff --git a/Utilities/asap-cli/uv_configs/generate_compose/pyproject.toml b/Utilities/asap-cli/uv_configs/generate_compose/pyproject.toml
new file mode 100644
index 0000000..05d7cb1
--- /dev/null
+++ b/Utilities/asap-cli/uv_configs/generate_compose/pyproject.toml
@@ -0,0 +1,8 @@
+[project]
+name = "generate-compose"
+version = "0.1.0"
+description = "Generate docker-compose files from templates"
+requires-python = ">=3.12"
+dependencies = [
+    "jinja2",
+]
diff --git a/Utilities/asap-cli/uv_configs/generate_compose/uv.lock b/Utilities/asap-cli/uv_configs/generate_compose/uv.lock
new file mode 100644
index 0000000..198d57d
--- /dev/null
+++ b/Utilities/asap-cli/uv_configs/generate_compose/uv.lock
@@ -0,0 +1,89 @@
+version = 1
+revision = 3
+requires-python = ">=3.12"
+
+[[package]]
+name = "generate-compose"
+version = "0.1.0"
+source = { virtual = "." }
+dependencies = [
+    { name = "jinja2" },
+]
+
+[package.metadata]
+requires-dist = [{ name = "jinja2" }]
+
+[[package]]
+name = "jinja2"
+version = "3.1.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markupsafe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
+]
+
+[[package]]
+name = "markupsafe"
+version = "3.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload-time = "2025-09-27T18:36:33.86Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload-time = "2025-09-27T18:36:35.099Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload-time = "2025-09-27T18:36:36.001Z" },
+    { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload-time = "2025-09-27T18:36:36.906Z" },
+    { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload-time = "2025-09-27T18:36:37.868Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload-time = "2025-09-27T18:36:38.761Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload-time = "2025-09-27T18:36:39.701Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" },
+    { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload-time = "2025-09-27T18:36:41.777Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload-time = "2025-09-27T18:36:43.257Z" },
+    { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload-time = "2025-09-27T18:36:44.508Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload-time = "2025-09-27T18:36:45.385Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload-time = "2025-09-27T18:36:46.916Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" },
+    { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" },
+    { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" },
+    { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" },
+    { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload-time = "2025-09-27T18:36:57.913Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload-time = "2025-09-27T18:36:58.833Z" },
+    { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" },
+    { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" },
+    { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" },
+    { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" },
+    { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" },
+    { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" },
+    { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" },
+    { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" },
+    { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" },
+    { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" },
+    { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" },
+    { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" },
+    { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" },
+    { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" },
+]
diff --git a/Utilities/asap-cli/uv_configs/generate_prometheus_config/pyproject.toml b/Utilities/asap-cli/uv_configs/generate_prometheus_config/pyproject.toml
new file mode 100644
index 0000000..901ca39
--- /dev/null
+++ b/Utilities/asap-cli/uv_configs/generate_prometheus_config/pyproject.toml
@@ -0,0 +1,9 @@
+[project]
+name = "generate-prometheus-config"
+version = "0.1.0"
+description = "Generate Prometheus configuration from experiment config"
+requires-python = ">=3.12"
+dependencies = [
+    "pyyaml",
+    "omegaconf",
+]
diff --git a/Utilities/asap-cli/uv_configs/generate_prometheus_config/uv.lock b/Utilities/asap-cli/uv_configs/generate_prometheus_config/uv.lock
new file mode 100644
index 0000000..b3b90fa
--- /dev/null
+++ b/Utilities/asap-cli/uv_configs/generate_prometheus_config/uv.lock
@@ -0,0 +1,83 @@
+version = 1
+revision = 3
+requires-python = ">=3.12"
+
+[[package]]
+name = "antlr4-python3-runtime"
+version = "4.9.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3e/38/7859ff46355f76f8d19459005ca000b6e7012f2f1ca597746cbcd1fbfe5e/antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b", size = 117034, upload-time = "2021-11-06T17:52:23.524Z" }
+
+[[package]]
+name = "generate-prometheus-config"
+version = "0.1.0"
+source = { virtual = "." }
+dependencies = [
+    { name = "omegaconf" },
+    { name = "pyyaml" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "omegaconf" },
+    { name = "pyyaml" },
+]
+
+[[package]]
+name = "omegaconf"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "antlr4-python3-runtime" },
+    { name = "pyyaml" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/09/48/6388f1bb9da707110532cb70ec4d2822858ddfb44f1cdf1233c20a80ea4b/omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7", size = 3298120, upload-time = "2022-12-08T20:59:22.753Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e3/94/1843518e420fa3ed6919835845df698c7e27e183cb997394e4a670973a65/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b", size = 79500, upload-time = "2022-12-08T20:59:19.686Z" },
+]
+
+[[package]]
+name = "pyyaml"
+version = "6.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" },
+    { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" },
+    { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" },
+    { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" },
+    { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" },
+    { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" },
+    { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" },
+    { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" },
+    { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" },
+    { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" },
+    { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" },
+    { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" },
+    { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" },
+    { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" },
+    { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" },
+    { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" },
+    { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" },
+    { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
+]
diff --git a/Utilities/asap-cli/uv_configs/grafana_config/pyproject.toml b/Utilities/asap-cli/uv_configs/grafana_config/pyproject.toml
new file mode 100644
index 0000000..ec8563d
--- /dev/null
+++ b/Utilities/asap-cli/uv_configs/grafana_config/pyproject.toml
@@ -0,0 +1,13 @@
+[project]
+name = "grafana-config"
+version = "0.1.0"
+description = "Grafana dashboard configuration for experiments"
+requires-python = ">=3.11,<3.12"
+dependencies = [
+    "requests",
+    "pyyaml",
+    "omegaconf",
+    "grafana-foundation-sdk",
+    "promql-parser",
+    "hydra-core",
+]
diff --git a/Utilities/cloudlab_setup/multi_node/constants.sh b/Utilities/cloudlab_setup/multi_node/constants.sh
new file mode 100755
index 0000000..ca72114
--- /dev/null
+++ b/Utilities/cloudlab_setup/multi_node/constants.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+HOSTNAME_PREFIX="node"
diff --git a/Utilities/cloudlab_setup/multi_node/oneshot_only_rsync.sh b/Utilities/cloudlab_setup/multi_node/oneshot_only_rsync.sh
new file mode 100755
index 0000000..acd6ec8
--- /dev/null
+++ b/Utilities/cloudlab_setup/multi_node/oneshot_only_rsync.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+if [ "$#" -lt 3 ] || [ "$#" -gt 4 ]; then
+    echo "Usage: $0 <num_nodes> <cloudlab_username> <hostname_suffix> [<node_offset>]"
+    exit 1
+fi
+
+NUM_NODES=$1
+USERNAME=$2
+HOSTNAME_SUFFIX=$3
+NODE_OFFSET=${4:-0}
+
+THIS_DIR=$(dirname "$(readlink -f "$0")")
+source $THIS_DIR"/constants.sh"
+source $THIS_DIR"/utils.sh"
+
+SINGLE_NODE_DIR="$THIS_DIR/../single_node"
+
+setup_nodes $NUM_NODES $USERNAME $HOSTNAME_PREFIX $HOSTNAME_SUFFIX $SINGLE_NODE_DIR"/rsync.sh" true $NODE_OFFSET
diff --git a/Utilities/cloudlab_setup/multi_node/oneshot_rsync_and_selective_install_internal.sh b/Utilities/cloudlab_setup/multi_node/oneshot_rsync_and_selective_install_internal.sh
new file mode 100755
index 0000000..2fae926
--- /dev/null
+++ b/Utilities/cloudlab_setup/multi_node/oneshot_rsync_and_selective_install_internal.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+if [ "$#" -lt 3 ] || [ "$#" -gt 4 ]; then
+    echo "Usage: $0 <num_nodes> <cloudlab_username> <hostname_suffix> [<node_offset>]"
+    exit 1
+fi
+
+NUM_NODES=$1
+USERNAME=$2
+HOSTNAME_SUFFIX=$3
+NODE_OFFSET=${4:-0}
+
+THIS_DIR=$(dirname "$(readlink -f "$0")")
+source $THIS_DIR"/constants.sh"
+source $THIS_DIR"/utils.sh"
+
+SINGLE_NODE_DIR="$THIS_DIR/../single_node"
+
+setup_nodes $NUM_NODES $USERNAME $HOSTNAME_PREFIX $HOSTNAME_SUFFIX $SINGLE_NODE_DIR"/rsync_and_selective_install_internal.sh" true $NODE_OFFSET
diff --git a/Utilities/cloudlab_setup/multi_node/oneshot_setup.sh b/Utilities/cloudlab_setup/multi_node/oneshot_setup.sh
new file mode 100755
index 0000000..7062e64
--- /dev/null
+++ b/Utilities/cloudlab_setup/multi_node/oneshot_setup.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+if [ "$#" -lt 3 ] || [ "$#" -gt 4 ]; then
+    echo "Usage: $0 <num_nodes> <cloudlab_username> <hostname_suffix> [<node_offset>]"
+    exit 1
+fi
+
+NUM_NODES=$1
+USERNAME=$2
+HOSTNAME_SUFFIX=$3
+NODE_OFFSET=${4:-0}
+
+THIS_DIR=$(dirname "$(readlink -f "$0")")
+source $THIS_DIR"/constants.sh"
+source $THIS_DIR"/utils.sh"
+
+SINGLE_NODE_DIR="$THIS_DIR/../single_node"
+
+setup_nodes $NUM_NODES $USERNAME $HOSTNAME_PREFIX $HOSTNAME_SUFFIX $SINGLE_NODE_DIR"/setup_storage.sh" true $NODE_OFFSET
+setup_nodes $NUM_NODES $USERNAME $HOSTNAME_PREFIX $HOSTNAME_SUFFIX $SINGLE_NODE_DIR"/rsync.sh" true $NODE_OFFSET
diff --git a/Utilities/cloudlab_setup/multi_node/utils.sh b/Utilities/cloudlab_setup/multi_node/utils.sh
new file mode 100755
index 0000000..40977f1
--- /dev/null
+++ b/Utilities/cloudlab_setup/multi_node/utils.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+setup_nodes() {
+    local num_nodes=$1
+    local username=$2
+    local hostname_prefix=$3
+    local hostname_suffix=$4
+    local setup_command=$5
+    local do_wait=$6
+    local node_offset=${7:-0}
+
+    for i in $(seq $node_offset $((node_offset + num_nodes - 1))); do
+        local hostname="${hostname_prefix}${i}.${hostname_suffix}"
+        echo "Setting up ${hostname}"
+        ${setup_command} ${username} ${hostname} < /dev/null &
+    done
+
+    if [ "$do_wait" = true ]; then
+        wait
+    fi
+}
diff --git a/Utilities/cloudlab_setup/single_node/constants.sh b/Utilities/cloudlab_setup/single_node/constants.sh
new file mode 100755
index 0000000..036efd5
--- /dev/null
+++ b/Utilities/cloudlab_setup/single_node/constants.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+#OPTIONS="-o StrictHostKeyChecking=no"
+OPTIONS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
+REMOTE_ROOT_VOLUME="/scratch"
+REMOTE_ROOT_DIR=$REMOTE_ROOT_VOLUME"/sketch_db_for_prometheus/"
diff --git a/Utilities/cloudlab_setup/single_node/local_storage_helper.sh b/Utilities/cloudlab_setup/single_node/local_storage_helper.sh
new file mode 100755
index 0000000..8e53532
--- /dev/null
+++ b/Utilities/cloudlab_setup/single_node/local_storage_helper.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+# check number of command line arguments
+if [ $# -ne 1 ]; then
+    echo "Usage: $0 <path_to_install>"
+    exit 1
+fi
+
+ROOT=$1
+USER=$(whoami)
+GROUP=$(groups | awk '{print $1}')
+
+sudo mkdir -p $ROOT
+sudo /usr/local/etc/emulab/mkextrafs.pl -f $ROOT
+sudo chown -R $USER:$GROUP $ROOT
+rm -rf $ROOT/lo*
diff --git a/Utilities/cloudlab_setup/single_node/rsync.sh b/Utilities/cloudlab_setup/single_node/rsync.sh
new file mode 100755
index 0000000..9a24557
--- /dev/null
+++ b/Utilities/cloudlab_setup/single_node/rsync.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+if [ "$#" -ne 2 ]; then
+    echo "Usage: $0 <username> <hostname>"
+    exit 1
+fi
+
+THIS_DIR=$(dirname "$(readlink -f "$0")")
+source $THIS_DIR"/constants.sh"
+source "$THIS_DIR/../../shared_utils.sh"
+
+USERNAME=$1
+HOSTNAME=$2
+DESTINATION_DIR=$REMOTE_ROOT_DIR"code/"
+
+COMPONENTS_CONF_FILE="$THIS_DIR/../../components.conf"
+readarray -t COMPONENTS < <(load_components_config "$COMPONENTS_CONF_FILE")
+readarray -t DIRS_TO_RSYNC < <(build_rsync_paths "$THIS_DIR" "${COMPONENTS[@]}")
+
+echo "The following directories will be rsynced to $HOSTNAME:$DESTINATION_DIR:"
+for DIR in "${DIRS_TO_RSYNC[@]}"; do
+    echo "  $DIR"
+done
+
+RSYNC_OUTPUT=$(perform_rsync "$USERNAME" "$HOSTNAME" "$DESTINATION_DIR" "$OPTIONS" "${DIRS_TO_RSYNC[@]}")
+
+echo ""
+echo $RSYNC_OUTPUT
+SYNCED_COMPONENTS=($(parse_rsync_output "$RSYNC_OUTPUT" "${DIRS_TO_RSYNC[@]}" "${COMPONENTS[@]}"))
+
+if [ ${#SYNCED_COMPONENTS[@]} -eq 0 ]; then
+    echo "No components had changes to sync."
+else
+    echo "Components that were synced due to changes:"
+    for COMPONENT in "${SYNCED_COMPONENTS[@]}"; do
+        echo "  $COMPONENT"
+    done
+fi
diff --git a/Utilities/cloudlab_setup/single_node/rsync_and_selective_install_internal.sh b/Utilities/cloudlab_setup/single_node/rsync_and_selective_install_internal.sh
new file mode 100755
index 0000000..0e67cd1
--- /dev/null
+++ b/Utilities/cloudlab_setup/single_node/rsync_and_selective_install_internal.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+if [ "$#" -ne 2 ]; then
+    echo "Usage: $0 <username> <hostname>"
+    exit 1
+fi
+
+THIS_DIR=$(dirname "$(readlink -f "$0")")
+source $THIS_DIR"/constants.sh"
+source "$THIS_DIR/../../shared_utils.sh"
+
+USERNAME=$1
+HOSTNAME=$2
+DESTINATION_DIR=$REMOTE_ROOT_DIR"code/"
+
+COMPONENTS_CONF_FILE="$THIS_DIR/../../components.conf"
+readarray -t COMPONENTS < <(load_components_config "$COMPONENTS_CONF_FILE")
+readarray -t DIRS_TO_RSYNC < <(build_rsync_paths "$THIS_DIR" "${COMPONENTS[@]}")
+
+echo "The following directories will be rsynced to $HOSTNAME:$DESTINATION_DIR:"
+for DIR in "${DIRS_TO_RSYNC[@]}"; do
+    echo "  $DIR"
+done
+
+RSYNC_OUTPUT=$(perform_rsync "$USERNAME" "$HOSTNAME" "$DESTINATION_DIR" "$OPTIONS" "${DIRS_TO_RSYNC[@]}")
+
+echo ""
+SYNCED_COMPONENTS=($(parse_rsync_output "$RSYNC_OUTPUT" "${DIRS_TO_RSYNC[@]}" "${COMPONENTS[@]}"))
+
+if [ ${#SYNCED_COMPONENTS[@]} -eq 0 ]; then
+    echo "No components had changes to sync."
+else
+    echo "Components that were synced due to changes:"
+    for COMPONENT in "${SYNCED_COMPONENTS[@]}"; do
+        echo "  $COMPONENT"
+    done
+
+    echo ""
+    echo "Installing synced internal components on $HOSTNAME..."
+
+    # Filter out Utilities component as it doesn't have an installation directory
+    INSTALLABLE_COMPONENTS=()
+    for COMPONENT in "${SYNCED_COMPONENTS[@]}"; do
+        if [ "$COMPONENT" != "Utilities" ]; then
+            INSTALLABLE_COMPONENTS+=("$COMPONENT")
+        fi
+    done
+
+    if [ ${#INSTALLABLE_COMPONENTS[@]} -gt 0 ]; then
+        # Use the existing only_install_internal_components.sh script on the remote
+        ssh $OPTIONS $USERNAME@$HOSTNAME "cd $DESTINATION_DIR/Utilities/installation && ./only_install_internal_components.sh ${INSTALLABLE_COMPONENTS[*]}"
+        echo "Installation complete for $HOSTNAME"
+    else
+        echo "No installable components to process for $HOSTNAME"
+    fi
+fi
diff --git a/Utilities/cloudlab_setup/single_node/setup_storage.sh b/Utilities/cloudlab_setup/single_node/setup_storage.sh
new file mode 100755
index 0000000..32d9901
--- /dev/null
+++ b/Utilities/cloudlab_setup/single_node/setup_storage.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+if [ "$#" -ne 2 ]; then
+    echo "Usage: $0 <username> <hostname>"
+    exit 1
+fi
+
+THIS_DIR=$(dirname "$(readlink -f "$0")")
+source $THIS_DIR"/constants.sh"
+
+USERNAME=$1
+HOSTNAME=$2
+LOCAL_FILE_NAME="local_storage_helper.sh"
+
+scp $OPTIONS $THIS_DIR"/"$LOCAL_FILE_NAME $USERNAME@$HOSTNAME:~/
+ssh $OPTIONS $USERNAME@$HOSTNAME "cd ~; ./$LOCAL_FILE_NAME $REMOTE_ROOT_VOLUME"
diff --git a/Utilities/components.conf b/Utilities/components.conf
new file mode 100644
index 0000000..86c01dd
--- /dev/null
+++ b/Utilities/components.conf
@@ -0,0 +1,19 @@
+# Configuration file for components/repositories
+# This file defines the list of all components used across the project
+# Used by installation and deployment scripts to maintain consistency
+
+Utilities
+CommonDependencies
+#FlinkSketch
+#QueryEngine
+sketch-core
+sketchlib-rust
+QueryEngineRust
+PrometheusClient
+Controller
+#prometheus-kafka-adapter
+ArroyoSketch
+ExecutionUtilities
+PrometheusExporters
+prometheus-benchmark
+quickstart
diff --git a/Utilities/deploy_changes.sh b/Utilities/deploy_changes.sh
new file mode 100755
index 0000000..a29fe30
--- /dev/null
+++ b/Utilities/deploy_changes.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+if [ "$#" -lt 3 ] || [ "$#" -gt 4 ]; then
+    echo "Usage: $0 <num_nodes> <cloudlab_username> <hostname_suffix> [<node_offset>]"
+    exit 1
+fi
+
+NUM_NODES=$1
+USERNAME=$2
+HOSTNAME_SUFFIX=$3
+NODE_OFFSET=${4:-0}
+
+THIS_DIR=$(dirname "$(readlink -f "$0")")
+(cd "${THIS_DIR}/cloudlab_setup/multi_node"; "./oneshot_rsync_and_selective_install_internal.sh" "$NUM_NODES" "$USERNAME" "$HOSTNAME_SUFFIX" "$NODE_OFFSET") || exit 1
diff --git a/Utilities/deploy_from_scratch.sh b/Utilities/deploy_from_scratch.sh
new file mode 100755
index 0000000..dcd6db3
--- /dev/null
+++ b/Utilities/deploy_from_scratch.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+if [ "$#" -lt 3 ] || [ "$#" -gt 4 ]; then
+    echo "Usage: $0 <num_nodes> <cloudlab_username> <hostname_suffix> [<node_offset>]"
+    exit 1
+fi
+
+NUM_NODES=$1
+USERNAME=$2
+HOSTNAME_SUFFIX=$3
+NODE_OFFSET=${4:-0}
+
+THIS_DIR=$(dirname "$(readlink -f "$0")")
+(cd "${THIS_DIR}/cloudlab_setup/multi_node"; "./oneshot_setup.sh" "$NUM_NODES" "$USERNAME" "$HOSTNAME_SUFFIX" "$NODE_OFFSET") || exit 1
+(cd "${THIS_DIR}/installation"; "./oneshot_setup.sh" "$NUM_NODES" "$USERNAME" "$HOSTNAME_SUFFIX" "$NODE_OFFSET") || exit 1
diff --git a/Utilities/docker/arroyo-cli-compose.yml b/Utilities/docker/arroyo-cli-compose.yml
new file mode 100644
index 0000000..c8eddc4
--- /dev/null
+++ b/Utilities/docker/arroyo-cli-compose.yml
@@ -0,0 +1,29 @@
+services:
+  arroyo:
+    image: ghcr.io/projectasap/asap-arroyo:${ARROYO_VERSION:-v0.1.0}
+    container_name: asap-arroyo
+    hostname: arroyo
+    networks:
+      - asap-network
+    ports:
+      - "5115:5115"
+    volumes:
+      - ../../ArroyoSketch/config.yaml:/config.yaml
+    command: ["--config", "/config.yaml", "cluster"]
+    environment:
+      - ARROYO__API__RUN_HTTP_PORT=5115
+      # Kafka connection
+      - KAFKA_BOOTSTRAP_SERVERS=kafka:9092
+    depends_on:
+      kafka:
+        condition: service_healthy
+    restart: no
+    healthcheck:
+      test:
+        [
+          "CMD-SHELL",
+          "curl -f http://localhost:5115/api/v1/pipelines || exit 1",
+        ]
+      interval: 10s
+      timeout: 5s
+      retries: 5
diff --git a/Utilities/docker/asap-docker-compose.yml b/Utilities/docker/asap-docker-compose.yml
new file mode 100644
index 0000000..c45fed1
--- /dev/null
+++ b/Utilities/docker/asap-docker-compose.yml
@@ -0,0 +1,21 @@
+name: asap
+
+# Bridge network for communications between containers
+networks:
+  asap-network:
+    driver: bridge
+    ipam:
+      driver: default
+      config:
+        - subnet: 172.25.0.0/16
+
+# Services used by ProjectASAP
+include:
+  - ./kafka/kafka-compose.yml
+  - ./prometheus/prometheus-compose.yml
+  - ./grafana/grafana-compose.yml
+  - ./arroyo-cli-compose.yml
+  - ./generated_compose_files/controller-compose.yml
+  - ./generated_compose_files/arroyosketch-compose.yml
+  - ./generated_compose_files/query-engine-compose.yml
+  - ./generated_compose_files/fake-exporter-compose.yml
diff --git a/Utilities/docker/grafana/grafana-compose.yml b/Utilities/docker/grafana/grafana-compose.yml
new file mode 100644
index 0000000..391f7ed
--- /dev/null
+++ b/Utilities/docker/grafana/grafana-compose.yml
@@ -0,0 +1,33 @@
+services:
+  grafana:
+    image: grafana/grafana-enterprise:latest
+    container_name: asap-grafana
+    hostname: grafana
+    networks:
+      - asap-network
+    ports:
+      - "3000:3000"
+    environment:
+      - GF_SECURITY_ADMIN_PASSWORD=admin
+      - GF_SECURITY_ADMIN_USER=admin
+      - GF_USERS_ALLOW_SIGN_UP=false
+      - GF_SERVER_ROOT_URL=http://localhost:3000
+      # Allow embedding (for demos)
+      - GF_SECURITY_ALLOW_EMBEDDING=true
+    volumes:
+      - grafana-data:/var/lib/grafana
+    restart: unless-stopped
+    healthcheck:
+      test:
+        [
+          "CMD-SHELL",
+          "wget --no-verbose --tries=1 --spider http://localhost:3000/api/health || exit 1",
+        ]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+# Docker manages the storage location and lifetime of named volumes
+volumes:
+  grafana-data:
+    driver: local
diff --git a/Utilities/docker/kafka/kafka-compose.yml b/Utilities/docker/kafka/kafka-compose.yml
new file mode 100644
index 0000000..4df4e6b
--- /dev/null
+++ b/Utilities/docker/kafka/kafka-compose.yml
@@ -0,0 +1,91 @@
+services:
+  kafka:
+    image: apache/kafka:3.7.0
+    container_name: asap-kafka
+    hostname: kafka
+    networks:
+      - asap-network
+    # ports:
+    #   - 9092:9092  # Uncomment for external access
+    environment:
+      # KRaft mode (no Zookeeper needed)
+      KAFKA_NODE_ID: 1
+      KAFKA_PROCESS_ROLES: broker,controller
+      KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092,CONTROLLER://0.0.0.0:9093
+      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
+      KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka:9093
+      KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
+      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT
+
+      # Replication settings (set to 1 for single-node setup)
+      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
+      KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
+      KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
+      KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
+
+      # CRITICAL: Settings from bare-metal install.sh for large precomputes
+      # These are required for your system to handle large CountMinSketch data
+      KAFKA_MESSAGE_MAX_BYTES: 20971520  # 20MB max message size
+      KAFKA_REPLICA_FETCH_MAX_BYTES: 20971520
+      KAFKA_LOG_RETENTION_HOURS: 1
+
+      # Storage configuration
+      KAFKA_LOG_DIRS: /tmp/kraft-combined-logs
+
+      # Fixed cluster ID for repeatability (auto-formats storage on first run)
+      CLUSTER_ID: MkU3OEVBNTcwNTJENDM2Qk
+    volumes:
+      - kafka-data:/tmp/kraft-combined-logs
+    user: "0:0"  # Run as root to fix permissions
+    entrypoint: /bin/bash
+    command:
+      - -c
+      - |
+        # Fix permissions on the volume so appuser can write
+        chown -R appuser:appuser /tmp/kraft-combined-logs
+        chmod -R 755 /tmp/kraft-combined-logs
+
+        # Let Kafka's Docker wrapper handle formatting and startup
+        exec su appuser -c "/etc/kafka/docker/run"
+    healthcheck:
+      test:
+        [
+          "CMD-SHELL",
+          "/opt/kafka/bin/kafka-broker-api-versions.sh --bootstrap-server localhost:9092 || exit 1",
+        ]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 30s  # Give Kafka time to initialize and format storage
+    restart: unless-stopped
+
+  kafka-init:
+    image: apache/kafka:3.7.0
+    container_name: asap-kafka-init
+    networks:
+      - asap-network
+    depends_on:
+      kafka:
+        condition: service_healthy
+    entrypoint: /bin/bash
+    command:
+      - -c
+      - |
+        echo "Creating Kafka topics..."
+        /opt/kafka/bin/kafka-topics.sh --bootstrap-server kafka:9092 \
+          --create --if-not-exists --topic flink_input \
+          --partitions 1 --replication-factor 1 \
+          --config max.message.bytes=20971520
+
+        /opt/kafka/bin/kafka-topics.sh --bootstrap-server kafka:9092 \
+          --create --if-not-exists --topic flink_output \
+          --partitions 1 --replication-factor 1 \
+          --config max.message.bytes=20971520
+
+        echo "Kafka topics created successfully"
+    restart: "no"
+
+# Docker manages the storage location and lifetime of named volumes
+volumes:
+  kafka-data:
+    driver: local
diff --git a/Utilities/docker/prometheus/prometheus-compose.yml b/Utilities/docker/prometheus/prometheus-compose.yml
new file mode 100644
index 0000000..df41ba6
--- /dev/null
+++ b/Utilities/docker/prometheus/prometheus-compose.yml
@@ -0,0 +1,34 @@
+services:
+  prometheus:
+    image: prom/prometheus:latest
+    container_name: asap-prometheus
+    hostname: prometheus
+    networks:
+      - asap-network
+    ports:
+      - "9090:9090"
+    volumes:
+      - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
+      - prometheus-data:/prometheus
+    command:
+      - "--config.file=/etc/prometheus/prometheus.yml"
+      - "--storage.tsdb.path=/prometheus"
+      - "--web.console.libraries=/usr/share/prometheus/console_libraries"
+      - "--web.console.templates=/usr/share/prometheus/consoles"
+      - "--web.enable-lifecycle"
+      # - "--query.lookback-delta=15m" # Should match scrape interval
+    restart: unless-stopped
+    healthcheck:
+      test:
+        [
+          "CMD-SHELL",
+          "wget --no-verbose --tries=1 --spider http://localhost:9090/-/healthy || exit 1",
+        ]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+# Docker manages the storage location and lifetime of named volumes
+volumes:
+  prometheus-data:
+    driver: local
diff --git a/Utilities/docs/architecture.md b/Utilities/docs/architecture.md
new file mode 100644
index 0000000..81d9bdd
--- /dev/null
+++ b/Utilities/docs/architecture.md
@@ -0,0 +1,957 @@
+# Architecture Documentation
+
+Detailed architecture documentation for developers who want to understand or extend the experiment framework.
+
+## Table of Contents
+
+- [System Overview](#system-overview)
+- [Experiment Lifecycle](#experiment-lifecycle)
+- [Data Flow](#data-flow)
+- [Services in the Experiment Framework](#services-in-the-experiment-framework)
+- [Service Architecture](#service-architecture)
+- [Infrastructure Provider Abstraction](#infrastructure-provider-abstraction)
+- [Configuration System](#configuration-system)
+- [Component Architecture](#component-architecture)
+- [Extension Points](#extension-points)
+- [Design Decisions](#design-decisions)
+
+## System Overview
+
+The experiment framework is a **service-oriented, provider-abstracted architecture** designed for:
+1. Deploying distributed systems experiments to cloud infrastructure (currently CloudLab)
+2. Running performance benchmarks comparing SketchDB vs Prometheus
+3. Collecting and analyzing experimental results
+
+### Key Architectural Principles
+
+1. **Service Abstraction**: Uniform interface for all components (Kafka, Flink, Prometheus, etc.)
+2. **Provider Abstraction**: Infrastructure-independent (currently CloudLab, extensible to AWS/K8s)
+3. **Declarative Configuration**: Hydra-based hierarchical configuration composition
+4. **Lifecycle Management**: Automated setup → run → teardown → data collection
+
+### High-Level Architecture
+
+```
+experiment_run_e2e.py (Main Orchestrator)
+├── InfrastructureProvider (CloudLabProvider)
+│   └── SSH-based command execution
+├── Service Layer
+│   ├── Infrastructure Services (Kafka, Prometheus)
+│   ├── Streaming Services (Flink, Arroyo)
+│   ├── Query Services (QueryEngine)
+│   ├── Data Generation (Exporters, DeathStar)
+│   ├── Monitoring Services (Throughput, Health)
+│   └── Control Services (Controller, RemoteMonitor)
+├── Configuration System (Hydra)
+│   ├── Base config (config.yaml)
+│   ├── Experiment types (experiment_type/*.yaml)
+│   └── Command-line overrides
+└── Data Collection (rsync)
+    └── Experiment outputs → local machine
+```
+
+## Experiment Lifecycle
+
+### Phase 1: Initialization
+
+**Lines 53-194 in experiment_run_e2e.py:**
+
+```
+Load Hydra configuration
+  ↓
+Validate required parameters
+  ↓
+Convert to Args object (backward compatibility)
+  ↓
+Create infrastructure provider
+  ↓
+Create output directories
+  ↓
+Initialize all services
+  ↓
+Generate controller/client configs
+  ↓
+Rsync configs to remote nodes
+```
+
+### Phase 2: Per-Mode Loop
+
+**Lines 195-569**: For each experiment mode (e.g., "sketchdb", "prometheus"):
+
+```
+Stop all services (clean slate)
+  ↓
+Generate Prometheus config for this mode
+  ↓
+Rsync configs to remote nodes
+  ↓
+───────────────────────────────────────
+IF mode == "sketchdb":
+  ├─ Start controller
+  ├─ Create Kafka topics
+  ├─ Start exporters (fake/avalanche)
+  ├─ Start DeathStar workload (if configured)
+  ├─ Start Kafka adapter (if use_kafka_ingest)
+  ├─ Start streaming engine (Flink or Arroyo)
+  ├─ Start query engine
+  └─ Start Prometheus
+ELSE IF mode == "prometheus":
+  ├─ Start exporters
+  └─ Start Prometheus
+───────────────────────────────────────
+  ↓
+Start system exporters (node_exporter, etc.)
+  ↓
+Start throughput/health monitors (if enabled)
+  ↓
+Wait for steady state (default 60s)
+  ↓
+Run remote monitor (execute queries)
+  ├─ Execute PromQL queries via PrometheusClient
+  ├─ Monitor process health
+  ├─ Profile components (if enabled)
+  └─ Record results
+  ↓
+Collect data (rsync Prometheus data)
+  ↓
+Teardown (stop services if not no_teardown)
+  ↓
+Rsync all experiment data to local machine
+```
+
+### Phase 3: Data Collection
+
+```
+Copy Prometheus data directory
+  ↓
+Rsync all experiment outputs
+  ├─ Query results
+  ├─ Logs
+  ├─ Profiling data
+  └─ Monitoring data
+  ↓
+Local analysis (separate scripts)
+```
+
+## Data Flow
+
+<!-- ### SketchDB Mode with Kafka Ingest
+
+```
+Fake Exporters
+  ↓ (expose metrics)
+Prometheus
+  ↓ (scrape metrics)
+Prometheus Remote Write API
+  ↓
+PrometheusKafkaAdapter
+  ↓ (convert to Kafka messages)
+Kafka INPUT Topic
+  ↓ (consume)
+Flink/Arroyo (SketchJob)
+  ├─ Parse metrics
+  ├─ Build sketches
+  └─ Serialize sketches
+  ↓ (produce)
+Kafka OUTPUT Topic
+  ↓ (consume)
+QueryEngine
+  ├─ Deserialize sketches
+  ├─ Parse PromQL queries
+  └─ Execute queries over sketches
+  ↓
+PromQL Query Results
+  ↓
+PrometheusClient (logs results)
+``` -->
+
+### SketchDB Mode with Ingest from Prometheus Remote Write
+
+```
+Fake Exporters
+  ↓ (expose metrics)
+Prometheus
+  ↓ (scrape metrics)
+Prometheus Remote Write API
+  ↓ (HTTP POST)
+Arroyo RemoteWrite Endpoint
+  ├─ Parse Prometheus remote write format
+  ├─ Build sketches in real-time
+  └─ Serialize sketches
+  ↓ (produce)
+Kafka OUTPUT Topic
+  ↓ (consume)
+QueryEngine
+  └─ (same as above)
+```
+
+### Prometheus Baseline Mode
+
+```
+Fake Exporters
+  ↓ (expose metrics)
+Prometheus
+  ↓ (scrape & store metrics)
+Prometheus TSDB
+  ↓ (query)
+PromQL Query API
+  ↓
+PrometheusClient (logs results)
+```
+
+## Services in the Experiment Framework
+
+When you run an experiment, you're orchestrating multiple distributed services that work together. Let's see what these services do in practice before diving into how they're implemented.
+
+### Services in Action
+
+In **experiment_run_e2e.py** (Lines 124-169), you'll see services being initialized:
+
+```python
+# Initialize all services
+kafka_service = KafkaService(provider, args.node_offset, num_tries=5)
+flink_service = FlinkService(provider, args.node_offset)
+query_engine_service = QueryEngineServiceFactory.create_query_engine_service(
+    args.query_engine_language,
+    provider,
+    use_container=args.use_container_query_engine,
+    node_offset=args.node_offset,
+)
+prometheus_service = PrometheusService(provider, args.node_offset)
+# ... more services
+```
+
+These services are then started/stopped throughout the experiment lifecycle. Let's understand what each category does:
+
+### 1. Infrastructure Services
+
+These provide the foundational messaging and monitoring infrastructure:
+
+- **`KafkaService`** - Manages Kafka broker for streaming data between components
+  - Creates topics for sketch data
+  - Handles broker lifecycle
+  - Used in: SketchDB mode for streaming sketches from Arroyo to QueryEngine
+
+- **`PrometheusService` / `DockerPrometheusService`** - Runs Prometheus server
+  - Scrapes metrics from exporters
+  - Stores time-series data (baseline mode)
+  - Sends data via remote write (SketchDB mode)
+  - Used in: Both modes
+
+- **`SystemExportersService`** - Deploys monitoring exporters
+  - node_exporter: System metrics (CPU, memory, disk)
+  - blackbox_exporter: Network probing
+  - cadvisor: Container metrics
+  - Used in: Monitoring experiment infrastructure itself
+
+### 2. Streaming Engine Services
+
+These process metrics streams and build sketches in real-time:
+
+- **`FlinkService`** - Apache Flink cluster management
+  - Starts JobManager and TaskManagers
+  - Submits sketch-building jobs
+  - Monitors job status
+  - Used in: SketchDB mode with Flink
+
+- **`ArroyoService`** - Arroyo streaming engine (containerized or bare-metal)
+  - Receives Prometheus remote write directly
+  - Builds sketches in real-time
+  - Produces to Kafka output topic
+  - Used in: SketchDB mode with Arroyo (current default)
+
+### 3. Query Processing Services
+
+These answer PromQL queries over sketches:
+
+- **`QueryEngineService` (Python)** - Legacy Python implementation
+- **`QueryEngineRustService` (Rust)** - Production Rust implementation
+  - Consumes sketches from Kafka
+  - Maintains sketch state
+  - Executes PromQL queries
+  - Returns approximate results
+- **`QueryEngineServiceFactory`** - Creates appropriate engine based on language choice
+  - Used in: SketchDB mode
+
+### 4. Data Generation Services
+
+These generate synthetic metric workloads for benchmarking:
+
+- **`PythonExporterService`** - Python-based fake metric exporters
+- **`RustExporterService`** - Rust fake exporters (much faster)
+  - Expose metrics on multiple ports
+  - Configurable cardinality, distributions
+  - Used in: All experiments for controlled workloads
+
+- **`AvalancheExporterService`** - High-cardinality load generator
+  - Stress testing with extreme cardinality
+  - Used in: Cardinality stress tests
+
+- **`DeathstarService`** - DeathStar microservices benchmark
+  - Real-world microservices architecture
+  - Used in: Realistic workload experiments
+
+### 5. Adapter Services
+
+These bridge between different components:
+
+- **`PrometheusKafkaAdapterService`** - Converts Prometheus remote write → Kafka
+  - Receives HTTP remote write requests
+  - Publishes to Kafka input topic
+  - Used in: Legacy Kafka ingestion mode (deprecated)
+
+### 6. Monitoring Services
+
+These monitor the experiment itself:
+
+- **`RemoteMonitorService`** - Orchestrates query execution
+  - Executes PromQL queries via PrometheusClient
+  - Monitors process health
+  - Profiles components (CPU, memory)
+  - Records timing and results
+
+- **`ArroyoThroughputMonitor`** - Tracks Arroyo pipeline throughput
+  - Monitors metrics/second processed
+  - Used in: Performance analysis
+
+- **`PrometheusThroughputMonitor`** - Tracks Prometheus ingestion rate
+  - Monitors samples/second ingested
+  - Used in: Performance comparison
+
+- **`PrometheusHealthMonitor`** - Monitors scrape health
+  - Tracks target health status
+  - Measures scrape duration
+  - Used in: Detecting performance degradation
+
+### 7. Control Services
+
+These provide control plane functionality:
+
+- **`ControllerService`** - Manages sketch configurations
+  - Updates accuracy/latency SLAs
+  - Controls sketch parameters
+  - Used in: Adaptive sketch sizing
+
+- **`PrometheusClientService`** - Executes PromQL queries
+  - Sends queries to Prometheus or QueryEngine
+  - Logs results and timing
+  - Used in: All query execution
+
+- **`DumbKafkaConsumerService`** - Simple Kafka consumer for debugging
+  - Consumes and prints Kafka messages
+  - Used in: Debugging data flow
+
+### How Services Work Together
+
+In a typical SketchDB experiment:
+
+1. **Setup Phase**:
+   - `KafkaService` creates topics
+   - `ArroyoService` starts and connects to Kafka
+   - `QueryEngineService` starts consuming from Kafka output topic
+   - `PrometheusService` configures remote write to Arroyo
+
+2. **Workload Phase**:
+   - `RustExporterService` exposes metrics
+   - Prometheus scrapes and sends to Arroyo via remote write
+   - Arroyo builds sketches and publishes to Kafka
+   - QueryEngine maintains sketch state
+
+3. **Query Phase**:
+   - `RemoteMonitorService` coordinates query execution
+   - `PrometheusClientService` sends PromQL queries to QueryEngine
+   - Results are logged and compared
+
+4. **Monitoring Phase**:
+   - `ArroyoThroughputMonitor` tracks throughput
+   - `PrometheusHealthMonitor` checks scrape health
+   - `SystemExportersService` provides infrastructure metrics
+
+Notice how all these services start, stop, and interact uniformly? That's because they're built on a common abstraction...
+
+## Service Architecture
+
+Now that you've seen what services do in practice, let's understand how they're implemented.
+
+### Design Philosophy
+
+All services follow a **uniform interface** pattern. Whether you're starting Kafka, Flink, or Prometheus, the code looks the same:
+
+```python
+service.start(**kwargs)  # Start the service
+service.stop(**kwargs)   # Stop the service
+service.is_healthy()     # Check health
+service.restart(**kwargs) # Restart
+```
+
+This uniformity makes the orchestration code in `experiment_run_e2e.py` clean and predictable.
+
+### Base Service Interface
+
+Located in `experiment_utils/services/base.py`:
+
+```python
+from abc import ABC, abstractmethod
+
+class BaseService(ABC):
+    """Base class for all services"""
+
+    def __init__(self, provider: InfrastructureProvider):
+        self.provider = provider
+
+    @abstractmethod
+    def start(self, **kwargs):
+        """Start the service"""
+        pass
+
+    @abstractmethod
+    def stop(self, **kwargs):
+        """Stop the service"""
+        pass
+
+    def is_healthy(self) -> bool:
+        """Check if service is healthy (overridable)"""
+        return True
+
+    def restart(self, **kwargs):
+        """Restart the service"""
+        self.stop(**kwargs)
+        self.start(**kwargs)
+```
+
+### Docker Service Base
+
+For containerized services:
+
+```python
+class DockerServiceBase(BaseService):
+    """Base class for Docker-based services"""
+
+    def build_image(self, image_name: str, build_dir: str):
+        """Build Docker image"""
+        cmd = f"docker build -t {image_name} {build_dir}"
+        self.provider.execute_command(...)
+
+    def start_container(self, container_name: str, image: str, **docker_args):
+        """Start Docker container"""
+        cmd = f"docker run --name {container_name} {image}"
+        self.provider.execute_command(...)
+
+    def stop_container(self, container_name: str):
+        """Stop Docker container"""
+        cmd = f"docker stop {container_name} && docker rm {container_name}"
+        self.provider.execute_command(...)
+```
+
+## Infrastructure Provider Abstraction
+
+### Provider Interface
+
+Located in `experiment_utils/providers/base.py`:
+
+```python
+from abc import ABC, abstractmethod
+
+class InfrastructureProvider(ABC):
+    """Abstract base class for infrastructure providers"""
+
+    @abstractmethod
+    def execute_command(self, node_idx: int, cmd: str, cmd_dir: str,
+                        nohup: bool, popen: bool) -> str:
+        """Execute command on a single node"""
+        pass
+
+    @abstractmethod
+    def execute_command_parallel(self, node_idxs: List[int], cmd: str,
+                                  cmd_dir: str, nohup: bool, popen: bool,
+                                  wait: bool) -> Dict[int, str]:
+        """Execute command on multiple nodes in parallel"""
+        pass
+
+    @abstractmethod
+    def get_node_address(self, node_idx: int) -> str:
+        """Get hostname/address for node"""
+        pass
+
+    @abstractmethod
+    def get_node_ip(self, node_idx: int) -> str:
+        """Get internal IP address for node"""
+        pass
+
+    @abstractmethod
+    def get_home_dir(self) -> str:
+        """Get home directory on remote nodes"""
+        pass
+
+    @abstractmethod
+    def get_query_log_file(self) -> str:
+        """Get query log file path"""
+        pass
+```
+
+### CloudLab Provider Implementation
+
+Located in `experiment_utils/providers/cloudlab.py`:
+
+### Provider Factory
+
+Located in `experiment_utils/providers/factory.py`:
+
+```python
+def create_provider(cfg: DictConfig) -> InfrastructureProvider:
+    """Factory function to create provider based on config"""
+    # Currently only supports CloudLab
+    return CloudLabProvider(
+        username=cfg.cloudlab.username,
+        hostname_suffix=cfg.cloudlab.hostname_suffix,
+        node_offset=cfg.cloudlab.node_offset
+    )
+
+    # Future: Detect provider type from config
+    # if cfg.provider.type == "aws":
+    #     return AWSProvider(...)
+    # elif cfg.provider.type == "kubernetes":
+    #     return KubernetesProvider(...)
+```
+
+## Configuration System
+
+### Hydra Composition
+
+**Base config.yaml:**
+```yaml
+defaults:
+  - _self_
+  - experiment_type: ???  # Required: must specify experiment type
+
+experiment:
+  name: ???  # Required
+
+cloudlab:
+  num_nodes: ???
+  username: ???
+  hostname_suffix: ???
+  node_offset: 0
+
+# ... rest of config
+```
+
+**Experiment type config** (e.g., `simple_config_fake_ports_2_card_20.yaml`):
+```yaml
+# @package experiment_params
+
+# This content gets merged into experiment_params section
+experiment:
+  - mode: sketchdb
+    query_prometheus_too: false
+
+metrics:
+  - metric: fake_metric_total
+    labels: [label_0, label_1, label_2, instance, job]
+    exporter: fake_exporter
+
+# ... more config
+```
+
+**Composition result:**
+```yaml
+# Final composed config
+experiment:
+  name: my_test
+
+cloudlab:
+  num_nodes: 9
+  username: myuser
+  # ...
+
+experiment_params:  # Merged from experiment_type config
+  experiment:
+    - mode: sketchdb
+      query_prometheus_too: false
+  metrics:
+    - metric: fake_metric_total
+      # ...
+```
+
+### Custom Resolvers
+
+Hydra/OmegaConf custom resolvers allow dynamic computation of configuration values at runtime. We use them to solve two key problems:
+
+#### Problem 1: Environment-Dependent Paths
+
+**Why needed:** The experiment output directory path is different on each developer's machine and isn't known at config-writing time. Hardcoding paths in YAML would break portability.
+
+**Solution:** `local_experiment_dir` resolver dynamically inserts the correct local path:
+
+```python
+# Registered in experiment_run_e2e.py Lines 43-47
+OmegaConf.register_new_resolver(
+    "local_experiment_dir", lambda: constants.LOCAL_EXPERIMENT_DIR
+)
+```
+
+**Usage in config:**
+```yaml
+# Instead of hardcoding: /home/alice/experiments/my_subdir
+# We use:
+some_path: ${local_experiment_dir:}/my_subdir
+
+# At runtime, resolves to the correct path for your environment
+```
+
+#### Problem 2: Network Configuration Dependent on Node Offset
+
+**Why needed:** The remote write endpoint IP address depends on `node_offset`, which can vary per deployment. We need to compute the IP dynamically based on the CloudLab network topology (10.10.1.X).
+
+**Solution:** `remote_write_ip` resolver computes the IP based on node_offset:
+
+```python
+# Registered in experiment_run_e2e.py Lines 48-50
+OmegaConf.register_new_resolver(
+    "remote_write_ip", lambda node_offset: f"10.10.1.{node_offset + 1}"
+)
+```
+
+**Usage in config:**
+```yaml
+# Dynamically computes IP based on node_offset
+prometheus:
+  remote_write:
+    url: http://${remote_write_ip:${cloudlab.node_offset}}:8491/write
+
+# If node_offset=0, resolves to: http://10.10.1.1:8491/write
+# If node_offset=5, resolves to: http://10.10.1.6:8491/write
+```
+
+**Benefits:**
+- Single config file works across all environments
+- No manual IP calculation or path updates
+- Type-safe validation of the final resolved values
+
+### Configuration Validation
+
+**experiment_utils/config.py**: `validate_experiment_config()`
+
+Validates required sections:
+- `query_groups` with queries and client options
+- `exporters` with exporter_list
+- `metrics` with metric definitions
+- Cross-validation of metric labels vs exporter num_labels
+
+## Component Architecture
+
+### File Organization
+
+```
+experiments/
+├── experiment_run_e2e.py              # Main orchestrator script
+├── config/                            # Hydra configuration files
+│   ├── config.yaml                    # Base configuration
+│   └── experiment_type/               # Experiment-specific configs
+├── experiment_utils/                  # Core utilities
+│   ├── __init__.py
+│   ├── config.py                      # Configuration validation/generation
+│   ├── sync.py                        # Rsync utilities
+│   ├── constants.py                   # System constants
+│   ├── providers/                     # Infrastructure providers
+│   │   ├── base.py                    # Provider interface
+│   │   ├── cloudlab.py                # CloudLab implementation
+│   │   └── factory.py                 # Provider factory
+│   └── services/                      # Service implementations
+│       ├── base.py                    # Base service classes
+│       ├── kafka.py                   # KafkaService
+│       ├── flink.py                   # FlinkService
+│       ├── arroyo.py                  # ArroyoService
+│       ├── query_engine.py            # QueryEngineService
+│       ├── prometheus.py              # PrometheusService
+│       ├── exporters.py               # Exporter services
+│       ├── controller.py              # ControllerService
+│       ├── remote_monitor.py          # RemoteMonitorService
+│       ├── monitoring.py              # Throughput/health monitors
+│       └── __init__.py                # Service exports
+└── post_experiment/                   # Analysis scripts
+```
+
+### Core Modules
+
+**experiment_run_e2e.py** (Lines 1-573)
+- Main orchestration script
+- Initializes providers and services
+- Manages experiment lifecycle
+- Coordinates data collection
+
+**experiment_utils/config.py**
+- Configuration validation
+- Prometheus config generation
+- Controller client config generation
+- Experiment parameter processing
+
+**experiment_utils/sync.py**
+- Rsync operations for code and configs
+- Data collection from remote nodes
+- Bidirectional sync utilities
+
+**experiment_utils/providers/**
+- Infrastructure abstraction
+- Currently supports CloudLab via SSH
+- Extensible to other cloud providers
+
+**experiment_utils/services/**
+- Service implementations
+- Each service encapsulates a system component
+- Uniform start/stop interface
+
+## Extension Points
+
+### 1. Adding a New Service
+
+**File:** `experiment_utils/services/my_new_service.py`
+
+```python
+from .base import BaseService
+
+class MyNewService(BaseService):
+    def __init__(self, provider, node_offset):
+        super().__init__(provider)
+        self.node_offset = node_offset
+
+    def start(self, **kwargs):
+        # Implementation
+        cmd = "start_my_service.sh"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir="/path/to/service",
+            nohup=True,
+            popen=False
+        )
+
+    def stop(self, **kwargs):
+        # Implementation
+        cmd = "pkill -f my_service"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir="",
+            nohup=False,
+            popen=False
+        )
+```
+
+**Update:** `experiment_utils/services/__init__.py`
+```python
+from .my_new_service import MyNewService
+```
+
+**Use in experiment_run_e2e.py:**
+```python
+# Initialize (Lines 124-169)
+my_service = MyNewService(provider, args.node_offset)
+
+# Start/stop in appropriate lifecycle phase
+my_service.start()
+# ...
+my_service.stop()
+```
+
+### 2. Adding a New Infrastructure Provider
+
+**File:** `experiment_utils/providers/aws_provider.py`
+
+```python
+from .base import InfrastructureProvider
+import boto3
+
+class AWSProvider(InfrastructureProvider):
+    def __init__(self, region, instance_ids):
+        self.region = region
+        self.instance_ids = instance_ids
+        self.ec2 = boto3.client('ec2', region_name=region)
+        self.ssm = boto3.client('ssm', region_name=region)
+
+    def execute_command(self, node_idx, cmd, cmd_dir, nohup, popen):
+        instance_id = self.instance_ids[node_idx]
+        if cmd_dir:
+            cmd = f"cd {cmd_dir} && {cmd}"
+
+        response = self.ssm.send_command(
+            InstanceIds=[instance_id],
+            DocumentName="AWS-RunShellScript",
+            Parameters={'commands': [cmd]}
+        )
+        # Handle response...
+
+    def get_node_address(self, node_idx):
+        instance_id = self.instance_ids[node_idx]
+        response = self.ec2.describe_instances(InstanceIds=[instance_id])
+        return response['Reservations'][0]['Instances'][0]['PublicDnsName']
+
+    # Implement other methods...
+```
+
+**Update:** `experiment_utils/providers/factory.py`
+```python
+def create_provider(cfg):
+    if cfg.provider.type == "cloudlab":
+        return CloudLabProvider(...)
+    elif cfg.provider.type == "aws":
+        return AWSProvider(
+            region=cfg.aws.region,
+            instance_ids=cfg.aws.instance_ids
+        )
+    else:
+        raise ValueError(f"Unknown provider: {cfg.provider.type}")
+```
+
+### 3. Adding a New Experiment Type
+
+**File:** `experiments/config/experiment_type/my_experiment.yaml`
+
+```yaml
+# @package experiment_params
+
+experiment:
+  - mode: sketchdb
+    query_prometheus_too: false
+
+metrics:
+  - metric: my_metric_total
+    labels: [label_0, instance, job]
+    exporter: fake_exporter
+
+exporters:
+  exporter_list:
+    fake_exporter:
+      num_ports_per_node: 1
+      num_labels: 1
+      label_values_card: 10
+      value_scale: 1000
+      distribution: zipf
+      sleep_time_between_ts: 5
+      metric_type: counter
+      metrics:
+        - metric: my_metric_total
+  only_start_if_queries_exist: true
+
+query_groups:
+  - queries:
+      - sum_over_time(my_metric_total[1m])
+    client_options:
+      repetitions: 10
+      starting_delay: 60
+      repetition_delay: 10
+      query_time_offset: 0
+    controller_options:
+      accuracy_sla: 0.99
+      latency_sla: 1.0
+
+servers:
+  - name: prometheus
+    url: http://localhost:9090
+  - name: sketchdb
+    url: http://localhost:8088
+```
+
+**Use:**
+```bash
+python experiment_run_e2e.py experiment_type=my_experiment ...
+```
+
+### 4. Adding a New Streaming Engine
+
+**File:** `experiment_utils/services/my_streaming_engine.py`
+
+```python
+class MyStreamingEngineService(BaseService):
+    def start(self, **kwargs):
+        # Start cluster
+        pass
+
+    def run_myengine_sketch(self, experiment_output_dir, ...):
+        # Submit sketch job
+        # Return job_id
+        pass
+
+    def stop_myengine_sketch(self, job_id):
+        # Stop specific job
+        pass
+
+    def stop(self, **kwargs):
+        # Stop cluster
+        pass
+```
+
+**Update experiment_run_e2e.py** (around Lines 334-391):
+
+```python
+if experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME:
+    if args.streaming_engine == "flink":
+        # Existing Flink code
+    elif args.streaming_engine == "arroyo":
+        # Existing Arroyo code
+    elif args.streaming_engine == "myengine":
+        my_engine_service.start()
+        job_id = my_engine_service.run_myengine_sketch(...)
+```
+
+## Design Decisions
+
+### 1. Service-Oriented Architecture
+
+**Rationale:** Each component (Kafka, Flink, Prometheus) is an independent service with uniform interface. This enables:
+- Easy addition of new components
+- Clear separation of concerns
+- Independent testing of services
+- Flexible deployment (Docker or bare-metal)
+
+### 2. Provider Abstraction
+
+**Rationale:** Infrastructure operations go through provider interface. This enables:
+- Portability to different cloud providers
+- Testing with local Docker Compose
+- Easier mocking for unit tests
+
+### 3. Hydra Configuration System
+
+**Rationale:** Hierarchical composition allows:
+- Reusable configuration fragments
+- Experiment types extend base config without duplication
+- Command-line overrides for quick iteration
+- Type-safe configuration with validation
+
+### 4. Coordinator-Worker Pattern
+
+**Rationale:** Node 0 is always coordinator. This simplifies:
+- Orchestration (single point of control)
+- Data collection (rsync from coordinator)
+- Service management (singletons on coordinator)
+
+### 5. Parallel Execution
+
+**Rationale:** All node operations use parallel SSH. This:
+- Reduces deployment time
+- Reduces experiment startup time
+- Improves scalability to many nodes
+
+### 6. Per-Service Containerization Flags
+
+**Rationale:** Individual container flags allow:
+- Hybrid deployments (some containerized, some bare-metal)
+- Debugging specific components without Docker overhead
+- Gradual migration to containerized deployment
+
+### 7. No-Teardown Mode
+
+**Rationale:** Keeps services running for post-experiment inspection. This is:
+- Critical for debugging service issues
+- Allows manual testing of queries
+- Enables inspection of system state
+
+**Limitation:** Only works with single experiment mode to avoid conflicts.
+
+### 8. Experiment Modes
+
+**Rationale:** Run multiple modes (sketchdb, prometheus) sequentially. This:
+- Allows direct comparison (same data sources)
+- Avoids resource contention
+- Simplifies orchestration logic
diff --git a/Utilities/docs/configuration.md b/Utilities/docs/configuration.md
new file mode 100644
index 0000000..44e6cb6
--- /dev/null
+++ b/Utilities/docs/configuration.md
@@ -0,0 +1,923 @@
+# Configuration Reference
+
+Complete reference for all configuration parameters in the experiment framework.
+
+## Table of Contents
+
+- [Configuration System Overview](#configuration-system-overview)
+- [Required Parameters](#required-parameters)
+- [Experiment Configuration](#experiment-configuration)
+- [CloudLab Configuration](#cloudlab-configuration)
+- [Logging and Debugging](#logging-and-debugging)
+- [Profiling Options](#profiling-options)
+- [Monitoring Options](#monitoring-options)
+- [Manual Mode Options](#manual-mode-options)
+- [Experiment Flow Control](#experiment-flow-control)
+- [Streaming Engine Configuration](#streaming-engine-configuration)
+- [Prometheus Configuration](#prometheus-configuration)
+- [Language Selection](#language-selection)
+- [Query Engine Options](#query-engine-options)
+- [Prometheus Client Configuration](#prometheus-client-configuration)
+- [Container Deployment Settings](#container-deployment-settings)
+- [Grafana Configuration](#grafana-configuration)
+- [Configuration Composition](#configuration-composition)
+
+## Configuration System Overview
+
+The experiment framework uses [Hydra](https://hydra.cc/) for hierarchical configuration management.
+
+**Configuration Files:**
+- `experiments/config/config.yaml` - Base configuration with defaults
+- `experiments/config/experiment_type/*.yaml` - Experiment-specific configurations
+- Command-line arguments - Override any parameter
+
+**Configuration Composition:**
+```
+config.yaml (base)
+  ↓
++ experiment_type/*.yaml (merged into experiment_params)
+  ↓
++ Command-line overrides
+  ↓
+= Final configuration
+```
+
+## Required Parameters
+
+These parameters **must** be specified (marked with `???` in config):
+
+| Parameter | Type | Description | Example |
+|-----------|------|-------------|---------|
+| `experiment.name` | string | **Required** - Unique name for this experiment run | `my_test_run` |
+| `cloudlab.num_nodes` | int | **Required** - Number of CloudLab nodes (not including coordinator) | `9` |
+| `cloudlab.username` | string | **Required** - Your CloudLab username | `myuser` |
+| `cloudlab.hostname_suffix` | string | **Required** - CloudLab experiment hostname suffix | `sketchdb.utah.cloudlab.us` |
+| `experiment_type` | string | **Required** - Which experiment configuration to use | `simple_config` |
+
+**Usage:**
+```bash
+python experiment_run_e2e.py \
+  experiment_type=simple_config \
+  experiment.name=my_test \
+  cloudlab.num_nodes=9 \
+  cloudlab.username=myuser \
+  cloudlab.hostname_suffix=sketchdb.utah.cloudlab.us
+```
+
+## Experiment Configuration
+
+### experiment.name
+
+**Type:** `string`
+**Required:** Yes
+**Category:** Deployment
+**Description:** Human-readable name for this experiment run. Used for creating output directories.
+
+**Example:**
+```bash
+experiment.name=vertical_scalability_test_jan_2025
+```
+
+**Output location:** `$REPO_ROOT/experiment_outputs/<experiment.name>/`
+
+## CloudLab Configuration
+
+### cloudlab.num_nodes
+
+**Type:** `int`
+**Required:** Yes
+**Category:** Deployment
+**Description:** Number of CloudLab worker nodes to use. Node 0 is always the coordinator, so specify the number of workers (total_nodes - 1).
+
+**Example:**
+```bash
+cloudlab.num_nodes=9  # Uses nodes 1-9 as workers (10 nodes total)
+```
+
+### cloudlab.node_offset
+
+**Type:** `int`
+**Default:** `0`
+**Category:** Deployment
+**Description:** Starting node index. Allows running multiple experiments in parallel on the same cluster.
+
+**Example:**
+```bash
+# Experiment 1: Uses nodes 0-9
+cloudlab.node_offset=0
+cloudlab.num_nodes=9
+
+# Experiment 2: Uses nodes 10-19
+cloudlab.node_offset=10
+cloudlab.num_nodes=9
+```
+
+### cloudlab.username
+
+**Type:** `string`
+**Required:** Yes
+**Category:** Deployment
+**Description:** Your CloudLab username for SSH access.
+
+**Example:**
+```bash
+cloudlab.username=myuser
+```
+
+### cloudlab.hostname_suffix
+
+**Type:** `string`
+**Required:** Yes
+**Category:** Deployment
+**Description:** CloudLab experiment hostname suffix (part after `node<N>.`).
+
+**Example:**
+```bash
+cloudlab.hostname_suffix=sketchdb.cloudmigration-PG0.utah.cloudlab.us
+# Results in nodes: node0.sketchdb.cloudmigration-PG0.utah.cloudlab.us, etc.
+```
+
+## Logging and Debugging
+
+### logging.level
+
+**Type:** `string`
+**Default:** `"INFO"`
+**Category:** Debugging
+**Options:** `"DEBUG"`, `"INFO"`, `"WARNING"`, `"ERROR"`
+**Description:** Log verbosity level. Use `DEBUG` for detailed debugging output.
+
+**Example:**
+```bash
+logging.level=DEBUG  # Verbose output for debugging
+```
+
+**When to use:**
+- `DEBUG`: Debugging issues, want detailed logs
+- `INFO`: Normal operation (default)
+- `WARNING`: Only important warnings
+- `ERROR`: Only errors
+
+## Profiling Options
+
+All profiling options are in the `profiling` section and are **monitoring** features. Profiling captures CPU and memory usage patterns to identify performance bottlenecks.
+
+### profiling.query_engine
+
+**Type:** `bool`
+**Default:** `false`
+**Category:** Monitoring
+**Description:** Enable CPU and memory profiling of the query engine process using `py-spy`.
+**Captures:** Function call stacks, CPU time per function, heap allocations
+**Note:** (TODO) I believe this only works for the older Python based QueryEngine, not QueryEngineRust
+
+**Example:**
+```bash
+profiling.query_engine=true
+```
+
+**Output:** Profiling data saved to experiment output directory.
+
+### profiling.prometheus_time
+
+**Type:** `int` (optional)
+**Default:** `null`
+**Category:** Monitoring
+**Description:** Time-limited Prometheus profiling in seconds. If set, profiles Prometheus for this duration.
+**Captures:** CPU profiles of Prometheus server process
+
+**Example:**
+```bash
+profiling.prometheus_time=300  # Profile for 5 minutes
+```
+
+### profiling.flink
+
+**Type:** `bool`
+**Default:** `false`
+**Category:** Monitoring
+**Description:** Enable profiling of Flink worker processes.
+**Captures:** CPU and memory profiles of Flink TaskManager JVMs
+
+**Example:**
+```bash
+profiling.flink=true
+```
+
+**Only applies when:** `streaming.engine=flink`
+
+### profiling.arroyo
+
+**Type:** `bool`
+**Default:** `false`
+**Category:** Monitoring
+**Description:** Enable profiling of Arroyo worker processes.
+**Captures:** CPU profiles of Arroyo pipeline workers
+
+**Example:**
+```bash
+profiling.arroyo=true
+```
+
+**Only applies when:** `streaming.engine=arroyo`
+
+## Monitoring Options
+
+These options collect performance metrics during the experiment to track system behavior over time.
+
+### throughput.arroyo
+
+**Type:** `bool`
+**Default:** `false`
+**Category:** Monitoring
+**Description:** Track Arroyo pipeline throughput during experiment by polling Arroyo metrics endpoint.
+**Captures:** Metrics/second ingested, sketches/second produced, pipeline lag
+
+**Example:**
+```bash
+throughput.arroyo=true
+```
+
+**Output:** Throughput metrics saved to experiment output directory.
+
+### throughput.prometheus
+
+**Type:** `bool`
+**Default:** `false`
+**Category:** Monitoring
+**Description:** Track Prometheus ingestion rate during experiment by querying internal Prometheus metrics.
+**Captures:** Samples/second ingested, active time series count, scrape duration
+
+**Example:**
+```bash
+throughput.prometheus=true
+```
+
+**Output:** Throughput metrics saved to experiment output directory.
+
+### health_check.prometheus
+
+**Type:** `bool`
+**Default:** `false`
+**Category:** Monitoring
+**Description:** Monitor Prometheus target health and scrape duration throughout experiment.
+**Captures:** Target health status (up/down), scrape duration per target, failed scrape counts
+
+**Example:**
+```bash
+health_check.prometheus=true
+```
+
+**Output:** Health check data saved to experiment output directory.
+
+## Manual Mode Options
+
+Manual mode options are for **debugging** purposes.
+
+### manual.query_engine
+
+**Type:** `bool`
+**Default:** `false`
+**Category:** Debugging
+**Description:** Don't auto-start query engine. Allows manual startup for debugging.
+
+**Example:**
+```bash
+manual.query_engine=true
+```
+
+**When to use:** Debugging query engine startup issues or want to manually configure before starting.
+
+### manual.remote_monitor
+
+**Type:** `bool`
+**Default:** `false`
+**Category:** Debugging
+**Description:** Prompt before running queries. Useful for inspecting system state before query execution.
+
+**Example:**
+```bash
+manual.remote_monitor=true
+```
+
+**When to use:** Want to manually verify services are healthy before starting queries.
+
+## Experiment Flow Control
+
+All flow control options are in the `flow` section.
+
+### flow.no_teardown
+
+**Type:** `bool`
+**Default:** `false`
+**Category:** Debugging
+**Description:** Skip teardown phase, keep all services running after experiment. Useful for debugging.
+
+**Limitation:** Only works with single experiment mode (e.g., only "sketchdb" or only "prometheus").
+
+**Example:**
+```bash
+flow.no_teardown=true
+```
+
+**When to use:**
+- Want to inspect running services after experiment
+- Need to manually test queries
+- Debugging service issues
+
+**After experiment:**
+```bash
+# SSH to coordinator and inspect
+ssh user@node0.suffix
+docker ps
+docker logs sketchdb-queryengine-rust
+curl http://localhost:8088/api/v1/query?query=...
+```
+
+### flow.replace_query_engine_with_dumb_consumer
+
+**Type:** `bool`
+**Default:** `false`
+**Category:** Experimental
+**Description:** Replace query engine with simple Kafka consumer for testing sketch output without query processing.
+
+**Example:**
+```bash
+flow.replace_query_engine_with_dumb_consumer=true
+```
+
+**When to use:** Testing streaming engine output in isolation.
+
+### flow.steady_state_wait
+
+**Type:** `int`
+**Default:** `60`
+**Category:** Experimental
+**Description:** Seconds to wait for system stabilization before starting queries. Allows metrics to accumulate and sketches to warm up.
+
+**Example:**
+```bash
+flow.steady_state_wait=120  # Wait 2 minutes
+```
+
+**When to use:**
+- Queries need longer warmup period
+- High-cardinality metrics need more time to stabilize
+
+## Streaming Engine Configuration
+
+All streaming engine options are in the `streaming` section.
+
+### streaming.engine
+
+**Type:** `string`
+**Default:** `"arroyo"`
+**Category:** Deployment
+**Options:** `"flink"`, `"arroyo"`
+**Description:** Streaming engine to use. Arroyo is recommended for production.
+
+**Example:**
+```bash
+streaming.engine=arroyo  # Use Arroyo (recommended)
+streaming.engine=flink   # Use Flink
+```
+
+### streaming.parallelism
+
+**Type:** `int`
+**Default:** `1`
+**Category:** Experimental
+**Description:** Parallelism level for streaming pipelines. Higher values enable more parallel processing.
+
+**Example:**
+```bash
+streaming.parallelism=4  # 4 parallel workers
+```
+
+**When to use:** High-throughput scenarios requiring more parallelism.
+
+### streaming.flink_input_format
+
+**Type:** `string`
+**Default:** `"json"`
+**Category:** Experimental
+**Options:** `"json"`, `"avro-json"`, `"avro-binary"`
+**Description:** Data format for streaming input from Kafka.
+
+**Example:**
+```bash
+streaming.flink_input_format=json  # Use JSON (human-readable)
+streaming.flink_input_format=avro-binary  # Use Avro (efficient)
+```
+
+### streaming.flink_output_format
+
+**Type:** `string`
+**Default:** `"json"`
+**Category:** Experimental
+**Options:** `"json"`, `"byte"`
+**Description:** Data format for streaming output to Kafka.
+
+**Example:**
+```bash
+streaming.flink_output_format=json  # Use JSON
+streaming.flink_output_format=byte  # Use binary
+```
+
+### streaming.enable_object_reuse
+
+**Type:** `bool`
+**Default:** `false`
+**Category:** Experimental
+**Description:** Flink optimization - reuse objects to reduce GC pressure.
+
+**Example:**
+```bash
+streaming.enable_object_reuse=true
+```
+
+**When to use:** High-throughput Flink jobs with GC issues.
+**Only applies when:** `streaming.engine=flink`
+
+### streaming.do_local_flink
+
+**Type:** `bool`
+**Default:** `false`
+**Category:** Debugging
+**Description:** Run Flink in local mode (single JVM) for faster iteration during development.
+
+**Example:**
+```bash
+streaming.do_local_flink=true
+```
+
+**When to use:** Developing Flink jobs, want faster startup.
+**Only applies when:** `streaming.engine=flink`
+
+### streaming.forward_unsupported_queries
+
+**Type:** `bool`
+**Default:** `false`
+**Category:** Experimental
+**Description:** Forward unsupported queries to Prometheus instead of returning error.
+
+**Example:**
+```bash
+streaming.forward_unsupported_queries=true
+```
+
+### streaming.use_kafka_ingest
+
+**Type:** `bool`
+**Default:** `false`
+**Category:** Deployment
+**Description:** Use Kafka for metric ingestion (legacy). Default is Prometheus remote write API (recommended).
+
+**Example:**
+```bash
+streaming.use_kafka_ingest=false  # Use remote write (recommended)
+streaming.use_kafka_ingest=true   # Use Kafka (legacy)
+```
+
+**Data flow comparison:**
+- Remote write: `Exporters → Prometheus → RemoteWrite API → Arroyo`
+- Kafka: `Exporters → Prometheus → KafkaAdapter → Kafka → Flink/Arroyo`
+
+### streaming.remote_write.ip
+
+**Type:** `string`
+**Default:** `"${remote_write_ip:${cloudlab.node_offset}}"`
+**Category:** Deployment
+**Description:** IP address for Prometheus remote write endpoint. Uses resolver to compute `10.10.1.{offset+1}`.
+
+**Example:**
+```bash
+# Automatically computed based on node_offset
+# node_offset=0 → 10.10.1.1
+# node_offset=5 → 10.10.1.6
+```
+
+### streaming.remote_write.base_port
+
+**Type:** `int`
+**Default:** `8080`
+**Category:** Deployment
+**Description:** Base port for remote write API. Multiple parallel instances increment from this.
+
+**Example:**
+```bash
+streaming.remote_write.base_port=8080
+streaming.parallelism=4
+# Results in ports: 8080, 8081, 8082, 8083
+```
+
+### streaming.remote_write.path
+
+**Type:** `string`
+**Default:** `"/receive"`
+**Category:** Deployment
+**Description:** HTTP path for remote write endpoint.
+
+**Example:**
+```bash
+streaming.remote_write.path=/receive
+# Full URL: http://10.10.1.1:8080/receive
+```
+
+## Prometheus Configuration
+
+All Prometheus options are in the `prometheus` section.
+
+### prometheus.scrape_interval
+
+**Type:** `string`
+**Default:** `"10s"`
+**Category:** Experimental
+**Description:** How frequently Prometheus scrapes targets. Affects data resolution and freshness.
+
+**Example:**
+```bash
+prometheus.scrape_interval=5s   # Scrape every 5 seconds
+prometheus.scrape_interval=30s  # Scrape every 30 seconds
+```
+
+**Impact:** Lower interval = more data points, higher resource usage.
+
+### prometheus.evaluation_interval
+
+**Type:** `string`
+**Default:** `"10s"`
+**Category:** Experimental
+**Description:** How frequently Prometheus evaluates recording rules.
+
+**Example:**
+```bash
+prometheus.evaluation_interval=10s
+```
+
+### prometheus.recording_rules.interval
+
+**Type:** `string`
+**Default:** `"5s"`
+**Category:** Experimental
+**Description:** Interval for recording rule evaluation.
+
+**Example:**
+```bash
+prometheus.recording_rules.interval=5s
+```
+
+**Note:** `prometheus.query_log_file` is disabled by default to avoid Docker permission issues.
+
+## Language Selection
+
+### fake_exporter_language
+
+**Type:** `string`
+**Default:** `"python"`
+**Category:** Deployment
+**Options:** `"python"`, `"rust"`
+**Description:** Language for fake metric exporters. Rust is faster for high-cardinality scenarios.
+
+**Example:**
+```bash
+fake_exporter_language=rust  # Use Rust exporter (faster)
+fake_exporter_language=python  # Use Python exporter
+```
+
+**When to use:**
+- Python: Development, lower cardinality
+- Rust: Production, high cardinality, high throughput
+
+### query_engine_language
+
+**Type:** `string`
+**Default:** `"rust"`
+**Category:** Deployment
+**Options:** `"python"`, `"rust"`
+**Description:** Query engine implementation. Rust is production-ready and recommended.
+
+**Example:**
+```bash
+query_engine_language=rust    # Use Rust (recommended)
+query_engine_language=python  # Use Python (legacy)
+```
+
+### query_language
+
+**Type:** `string`
+**Default:** `"PROMQL"`
+**Category:** Experimental
+**Options:** `"SQL"`, `"PROMQL"`
+**Description:** Query language used by Rust query engine.
+
+**Example:**
+```bash
+query_language=PROMQL  # Use PromQL (default)
+query_language=SQL     # Use SQL (experimental)
+```
+
+**Note:** Only applies when `query_engine_language=rust`.
+
+## Query Engine Options
+
+### query_engine.dump_precomputes
+
+**Type:** `bool`
+**Default:** `false`
+**Category:** Debugging
+**Description:** Dump precomputed sketch values to files (Rust query engine only). Useful for debugging sketch accuracy.
+
+**Example:**
+```bash
+query_engine.dump_precomputes=true
+```
+
+**Output:** Precomputed values saved to experiment output directory.
+**Only applies when:** `query_engine_language=rust`
+
+## Prometheus Client Configuration
+
+### prometheus_client.parallel
+
+**Type:** `bool`
+**Default:** `false`
+**Category:** Experimental
+**Description:** Enable parallel query execution in Prometheus client. Can improve throughput but may overwhelm target.
+
+**Example:**
+```bash
+prometheus_client.parallel=true
+```
+
+**When to use:** Testing query engine scalability under parallel load.
+
+## Container Deployment Settings
+
+All container deployment settings are in the `use_container` section and are **deployment** options.
+
+### use_container.query_engine
+
+**Type:** `bool`
+**Default:** `true`
+**Category:** Deployment
+**Description:** Deploy query engine as Docker container vs bare-metal binary.
+
+**Example:**
+```bash
+use_container.query_engine=false  # Run as bare-metal binary
+```
+
+**When to use false:** Debugging query engine, want direct access to binary.
+
+### use_container.arroyo
+
+**Type:** `bool`
+**Default:** `true`
+**Category:** Deployment
+**Description:** Deploy Arroyo as Docker container vs bare-metal.
+
+**Example:**
+```bash
+use_container.arroyo=false  # Run bare-metal
+```
+
+### use_container.controller
+
+**Type:** `bool`
+**Default:** `true`
+**Category:** Deployment
+**Description:** Deploy controller as Docker container vs bare-metal.
+
+**Example:**
+```bash
+use_container.controller=false
+```
+
+### use_container.fake_exporter
+
+**Type:** `bool`
+**Default:** `true`
+**Category:** Deployment
+**Description:** Deploy fake exporters as Docker containers vs bare-metal.
+
+**Example:**
+```bash
+use_container.fake_exporter=false
+```
+
+### use_container.prometheus_client
+
+**Type:** `bool`
+**Default:** `true`
+**Category:** Deployment
+**Description:** Deploy Prometheus client as Docker container vs bare-metal.
+
+**Example:**
+```bash
+use_container.prometheus_client=false
+```
+
+### use_container.grafana
+
+**Type:** `bool`
+**Default:** `true`
+**Category:** Deployment
+**Description:** Deploy Grafana as Docker container vs bare-metal.
+
+**Example:**
+```bash
+use_container.grafana=false
+```
+
+**General guidance:** Setting to `false` can help debug issues by running services directly without container overhead.
+
+## Grafana Configuration
+
+All Grafana options are in the `grafana` section.
+
+### grafana.host
+
+**Type:** `string`
+**Default:** `"localhost:3000"`
+**Category:** Deployment
+**Description:** Grafana server address.
+
+**Example:**
+```bash
+grafana.host=localhost:3000
+```
+
+### grafana.user
+
+**Type:** `string`
+**Default:** `"admin"`
+**Category:** Deployment
+**Description:** Grafana admin username.
+
+**Example:**
+```bash
+grafana.user=admin
+```
+
+### grafana.password
+
+**Type:** `string`
+**Default:** `"admin"`
+**Category:** Deployment
+**Description:** Grafana admin password.
+
+**Example:**
+```bash
+grafana.password=admin
+```
+
+## Configuration Composition
+
+### Hydra Defaults
+
+The base `config.yaml` includes:
+
+```yaml
+defaults:
+  - _self_
+  - experiment_type: ???  # REQUIRED: Must specify experiment type
+```
+
+This means:
+1. Load `config.yaml` first
+2. Load specified `experiment_type/*.yaml` and merge into `experiment_params`
+3. Apply command-line overrides
+
+### Custom Resolvers
+
+Two custom OmegaConf resolvers are registered:
+
+#### local_experiment_dir
+
+Returns `constants.LOCAL_EXPERIMENT_DIR`.
+
+**Usage in config:**
+```yaml
+some_path: ${local_experiment_dir:}/my_subdir
+```
+
+#### remote_write_ip
+
+Computes IP based on node_offset: `10.10.1.{offset+1}`.
+
+**Usage in config:**
+```yaml
+remote_write_ip: ${remote_write_ip:${cloudlab.node_offset}}
+```
+
+**Examples:**
+- `node_offset=0` → `10.10.1.1`
+- `node_offset=5` → `10.10.1.6`
+
+### Command-Line Override Examples
+
+```bash
+# Override single parameter
+python experiment_run_e2e.py ... logging.level=DEBUG
+
+# Override nested parameter
+python experiment_run_e2e.py ... streaming.engine=flink
+
+# Override deeply nested parameter
+python experiment_run_e2e.py ... experiment_params.query_groups.0.client_options.repetitions=20
+
+# Override multiple parameters
+python experiment_run_e2e.py ... \
+  logging.level=DEBUG \
+  flow.no_teardown=true \
+  streaming.parallelism=4
+```
+
+### Using Config Groups
+
+Load additional config groups:
+
+```bash
+# Apply overrides from docs/overrides/arroyo.yaml
+python experiment_run_e2e.py ... +overrides=arroyo
+```
+
+## Parameter Dependencies and Constraints
+
+Some parameters have important relationships that affect correctness and performance.
+
+### starting_delay (Query Groups)
+
+**Relationship:** `starting_delay >= query_time_offset + max_lookback_window + buffer`
+
+**Why:** The system needs enough time to accumulate metrics data for the lookback window and account for freshness delay.
+
+**Example:**
+```yaml
+query_groups:
+  - queries:
+      - sum_over_time(fake_metric_total[1m])  # 60s lookback
+    client_options:
+      query_time_offset: 10  # 10s freshness delay
+      starting_delay: 70     # >= 10 + 60 = 70s minimum
+```
+
+### repetition_delay (Query Groups)
+
+**Relationship:** `repetition_delay` should evenly divide the lookback window for good coverage
+
+**Example:**
+```yaml
+query_groups:
+  - queries:
+      - sum_over_time(fake_metric_total[1m])  # 60s lookback
+    repetition_delay: 10  # Query every 10s
+```
+
+**Good values:**
+- Lookback `[1m]`: Use `repetition_delay: 10` or `15` (divides 60)
+- Lookback `[5m]`: Use `repetition_delay: 30` or `60` (divides 300)
+
+### num_labels and metrics.labels (Exporter Configuration)
+
+**Relationship:** `len(metrics.labels) >= exporter.num_labels + 2`
+
+**Why:** Exporter generates `num_labels` custom labels, plus `instance` and `job` are always added.
+
+**Example:**
+```yaml
+exporters:
+  exporter_list:
+    fake_exporter:
+      num_labels: 3  # Generates label_0, label_1, label_2
+
+metrics:
+  - metric: fake_metric_total
+    labels: ['instance', 'job', 'label_0', 'label_1', 'label_2']
+    # Total: 5 labels (2 automatic + 3 generated) ✓
+```
+
+## Configuration Validation
+
+The framework validates configurations at runtime using `experiment_utils/config.py`.
+
+**Required sections in experiment_params:**
+
+1. **query_groups** - At least one query group with:
+   - `queries`: List of PromQL queries
+   - `client_options.repetitions`: Number of repetitions
+   - `client_options.starting_delay`: Warmup period
+   - `controller_options.accuracy_sla`: Accuracy threshold
+   - `controller_options.latency_sla`: Latency threshold
+
+2. **exporters** - Must have:
+   - `exporter_list`: Dictionary of exporter configs
+   - `only_start_if_queries_exist`: Boolean flag
+
+3. **metrics** - At least one metric with:
+   - `metric`: Metric name
+   - `labels`: List of label names
+   - `exporter`: Which exporter produces this metric
+
+**Validation errors will stop the experiment before it starts.**
diff --git a/Utilities/docs/deployment.md b/Utilities/docs/deployment.md
new file mode 100644
index 0000000..5ee7ebb
--- /dev/null
+++ b/Utilities/docs/deployment.md
@@ -0,0 +1,426 @@
+# Deployment Guide
+
+This guide covers the deployment scripts and infrastructure setup for CloudLab experiments.
+
+## Overview
+
+The deployment system consists of two main scripts:
+- **`deploy_from_scratch.sh`**: Complete initial setup (~1 hour)
+- **`deploy_changes.sh`**: Incremental updates (< 1 min)
+
+All deployment scripts are **Linux-only** and have been tested on Ubuntu. They are not compatible with macOS.
+
+## Table of Contents
+
+- [Initial Deployment](#initial-deployment)
+- [Incremental Deployment](#incremental-deployment)
+- [Component Configuration](#component-configuration)
+- [Deployment Architecture](#deployment-architecture)
+- [What Gets Installed](#what-gets-installed)
+
+## Initial Deployment
+
+### deploy_from_scratch.sh
+
+**Purpose:** Complete CloudLab cluster setup from scratch
+
+**Time:** Approximately 1 hour
+
+**Usage:**
+```bash
+cd $REPO_ROOT/Utilities
+./deploy_from_scratch.sh <num_nodes> <cloudlab_username> <hostname_suffix>
+
+# Example:
+./deploy_from_scratch.sh 10 myuser sketchdb.cloudmigration-PG0.utah.cloudlab.us
+```
+
+### What It Does
+
+The script executes these phases in order:
+
+#### Phase 1: Storage Setup
+- Runs on all CloudLab nodes in parallel
+- Configures `/scratch` volume using CloudLab's `mkextrafs.pl` script
+- Creates directory structure: `/scratch/sketch_db_for_prometheus/`
+
+#### Phase 2: Code Sync
+- Runs on all CloudLab nodes in parallel
+- Rsyncs all component repositories from local machine to `/scratch/sketch_db_for_prometheus/code/`
+- Respects `.rsyncignore` files in each component
+
+#### Phase 3: External Components Installation
+- Runs on all CloudLab nodes in parallel
+- Installs third-party software:
+  - Common dependencies (Python 3.11, Rust, Go, Docker)
+  - Prometheus v2.53.2
+  - Kafka v3.8.0
+  - Flink v1.20.0
+  - Grafana
+  - Benchmark tools and exporters
+
+#### Phase 4: Internal Components Installation
+- Runs on all CloudLab nodes in parallel
+- Builds and installs project-specific code:
+  - CommonDependencies (base Docker image)
+  - QueryEngineRust (Rust binary + Docker image)
+  - Controller (Docker image)
+  - Arroyo (Node.js frontend + Rust binary + Docker image)
+  - ArroyoSketch (Python scripts)
+  - PrometheusClient, PrometheusExporters, ExecutionUtilities, prometheus-benchmark
+
+### Directory Structure Created
+
+```
+/scratch/sketch_db_for_prometheus/
+├── code/                           # All component source code
+│   ├── Utilities/
+│   ├── QueryEngineRust/
+│   ├── Controller/
+│   ├── ArroyoSketch/
+│   ├── arroyo/
+│   ├── CommonDependencies/
+│   ├── PrometheusClient/
+│   ├── PrometheusExporters/
+│   ├── ExecutionUtilities/
+│   └── prometheus-benchmark/
+├── prometheus/                     # Prometheus binaries
+├── kafka/                          # Kafka installation
+├── flink/                          # Flink installation
+└── experiment_outputs/             # Experiment results
+```
+
+## Incremental Deployment
+
+### deploy_changes.sh
+
+**Purpose:** Deploy only changed code to CloudLab nodes
+
+**Time:** Approximately 1 min
+
+**Usage:**
+```bash
+cd $REPO_ROOT/Utilities
+
+# Make your code changes
+vim QueryEngineRust/src/main.rs
+
+# Deploy changes
+./deploy_changes.sh <num_nodes> <cloudlab_username> <hostname_suffix>
+
+# Example:
+./deploy_changes.sh 10 myuser sketchdb.cloudmigration-PG0.utah.cloudlab.us
+```
+
+### How It Works
+
+1. **Intelligent Syncing:**
+   - Runs rsync with `--itemize-changes` flag
+   - Tracks which files actually changed
+   - Parses rsync output to identify affected components
+
+2. **Selective Installation:**
+   - Only reinstalls components that had file changes
+   - Skips Utilities (has no installation step)
+   - Much faster than full rebuild
+
+3. **Example Scenario:**
+   ```bash
+   # Edit QueryEngineRust only
+   vim QueryEngineRust/src/query_executor.rs
+
+   # Deploy changes
+   ./deploy_changes.sh 10 myuser sketchdb.utah.cloudlab.us
+
+   # Result: Only QueryEngineRust is rebuilt and reinstalled
+   # Takes ~5 minutes instead of 60 minutes
+   ```
+
+## Component Configuration
+
+### components.conf
+
+Located at: `Utilities/components.conf`
+
+This file defines which components get synced to CloudLab. Each component must be a directory in `$REPO_ROOT/`.
+
+**Currently Enabled Components:**
+```bash
+Utilities
+CommonDependencies
+QueryEngineRust
+PrometheusClient
+Controller
+ArroyoSketch
+ExecutionUtilities
+arroyo
+PrometheusExporters
+prometheus-benchmark
+```
+
+**Deprecated Components (commented out):**
+```bash
+# FlinkSketch               # Legacy Flink implementation
+# QueryEngine               # Python version (replaced by Rust)
+# prometheus-kafka-adapter  # Legacy Kafka adapter
+```
+
+### Component Structure
+
+Each component directory can contain:
+- `.rsyncignore`: Files/directories to exclude from sync (like `.gitignore`)
+- `installation/install.sh`: Installation script
+- `installation/setup_dependencies.sh`: Dependency setup (optional)
+
+## Deployment Architecture
+
+### Parallelization
+
+All node operations run in parallel to maximize speed:
+- Storage setup runs on all nodes simultaneously
+- Rsync runs to all nodes simultaneously
+- Installation runs on all nodes simultaneously
+
+This is achieved using the `setup_nodes()` function in `cloudlab_setup/multi_node/utils.sh`.
+
+### Node Addressing
+
+Nodes are addressed as:
+- Hostnames: `node0.<suffix>`, `node1.<suffix>`, ..., `node{N-1}.<suffix>`
+- Internal IPs: `10.10.1.1`, `10.10.1.2`, ..., `10.10.1.N`
+- Node 0 is always the **coordinator node**
+
+### SSH Key Authentication
+
+All scripts use SSH key-based authentication:
+```bash
+ssh <username>@node<idx>.<hostname_suffix>
+```
+
+Ensure your SSH keys are properly configured before deployment.
+
+## What Gets Installed
+
+### External Components
+
+These are third-party software installed from source or packages:
+
+#### 1. Common Dependencies
+- **Python 3.11** with pip
+- **Rust** (latest stable)
+- **Go** (latest stable)
+- **Docker & Docker Compose**
+- Build tools: make, gcc, g++, cmake
+- Libraries: libssl-dev, pkg-config, etc.
+- Rust fake exporter (built from source)
+
+#### 2. Prometheus v2.53.2
+- Downloaded from official releases
+- Extracted to `/scratch/prometheus/`
+- Configured with custom scrape configs per experiment
+
+#### 3. Kafka v3.8.0
+- Downloaded from Apache mirrors
+- Configured in KRaft mode (no ZooKeeper)
+- Settings:
+  - Log retention: 10 minutes
+  - Max message size: 10 MB
+  - Replication factor: 1
+
+#### 4. Flink v1.20.0
+- Downloaded from Apache mirrors
+- Extracted to `/scratch/flink/`
+- Configured for cluster mode
+
+#### 5. Grafana
+- Installed via setup scripts
+- Used for visualization dashboards
+
+#### 6. Benchmarks & Exporters
+- Avalanche (high-cardinality load generator)
+- prometheus-benchmark tools
+- asprof (profiling tools)
+
+### Internal Components
+
+These are project-specific components built as Docker images or binaries:
+
+#### 1. CommonDependencies
+**What:** Base Docker image with shared dependencies
+**Build Process:**
+```bash
+cd CommonDependencies
+docker build -t sketchdb-base:latest .
+```
+**Purpose:** Shared base layer for other Docker images to reduce build time
+
+#### 2. QueryEngineRust
+**What:** Rust-based query engine for executing PromQL queries over sketches
+**Build Process:**
+```bash
+cd QueryEngineRust
+cargo build --release  # Compile Rust binary
+docker build -t sketchdb-queryengine-rust:latest .  # Build Docker image
+```
+**Deployment:** Can run as Docker container or bare-metal binary
+
+#### 3. Controller
+**What:** Service that generates sketch configurations based on query patterns
+**Build Process:**
+```bash
+cd Controller
+docker build -t sketchdb-controller:latest .
+```
+**Deployment:** Docker container only
+
+#### 4. arroyo
+**What:** Streaming engine for processing metrics and building sketches
+**Build Process:**
+```bash
+cd arroyo
+# Install Node.js 18 via NVM
+nvm install 18
+# Install pnpm
+npm install -g pnpm
+# Build frontend
+cd crates/arroyo-console
+pnpm install
+pnpm build
+cd ../..
+# Compile Rust binary
+cargo build --release
+# Build Docker image
+docker build -t arroyo-full:latest .
+# Install refinery CLI for migrations
+cargo install refinery_cli
+```
+**Components:**
+- PostgreSQL database (for metadata)
+- Web console (Node.js/React frontend)
+- Controller and workers (Rust binaries)
+
+#### 5. ArroyoSketch
+**What:** Python scripts for deploying ArroyoSketch pipelines
+**Build Process:**
+```bash
+cd ArroyoSketch
+pip install -r requirements.txt  # jinja2 for templating
+```
+**Deployment:** Python scripts, no Docker image
+
+#### 6. PrometheusClient
+**What:** Client for executing PromQL queries against Prometheus or SketchDB
+**Build Process:** Language-specific (Python or Rust)
+
+#### 7. PrometheusExporters, ExecutionUtilities, prometheus-benchmark
+**What:** Various testing and monitoring tools
+**Build Process:** Component-specific
+
+## Deployment Scripts Reference
+
+### Main Scripts
+
+| Script | Location | Purpose |
+|--------|----------|---------|
+| `deploy_from_scratch.sh` | `Utilities/` | Full deployment |
+| `deploy_changes.sh` | `Utilities/` | Incremental updates |
+| `oneshot_setup.sh` | `cloudlab_setup/multi_node/` | Storage + rsync |
+| `oneshot_rsync_and_selective_install_internal.sh` | `cloudlab_setup/multi_node/` | Incremental sync + install |
+
+### Utility Scripts
+
+| Script | Location | Purpose |
+|--------|----------|---------|
+| `setup_storage.sh` | `cloudlab_setup/single_node/` | Configure /scratch volume |
+| `rsync.sh` | `cloudlab_setup/single_node/` | Rsync code to node |
+| `rsync_and_selective_install_internal.sh` | `cloudlab_setup/single_node/` | Rsync + detect changes |
+| `install_components.sh` | `installation/` | Generic component installer |
+| `install_external_components.sh` | `installation/` | Install external software |
+| `setup_internal_components.sh` | `installation/` | Install internal components |
+| `only_install_internal_components.sh` | `installation/` | Reinstall specific components |
+
+### Helper Scripts
+
+| Script | Location | Purpose |
+|--------|----------|---------|
+| `shared_utils.sh` | `Utilities/` | Shared functions (rsync, component loading) |
+| `utils.sh` | `cloudlab_setup/multi_node/` | Parallel execution helper |
+| `constants.sh` | `cloudlab_setup/multi_node/` | Hostname configuration |
+| `constants.sh` | `cloudlab_setup/single_node/` | Remote path configuration |
+
+## Troubleshooting Deployment
+
+### Issue: deploy_from_scratch.sh times out
+
+**Cause:** CloudLab nodes may have slow internet or package downloads are failing
+
+**Solutions:**
+- Check CloudLab node internet connectivity: `ssh user@node0.suffix && ping google.com`
+- Try deployment again (some downloads may have cached)
+- Check for specific error messages in terminal output
+
+### Issue: deploy_changes.sh doesn't rebuild component
+
+**Cause:** Rsync didn't detect changes (file timestamps unchanged)
+
+**Solutions:**
+```bash
+# Force rebuild by touching files
+cd $REPO_ROOT/QueryEngineRust
+touch src/main.rs
+
+# Run deploy_changes again
+cd $REPO_ROOT/Utilities
+./deploy_changes.sh 10 myuser sketchdb.utah.cloudlab.us
+```
+
+OR
+
+Run `deploy_from_scratch.sh`
+
+## Best Practices
+
+1. **Always use deploy_changes.sh for iterative development**
+   - Much faster than full deployment
+   - Only rebuilds what changed
+
+2. **Verify node accessibility before deployment**
+   ```bash
+   ssh myuser@node0.sketchdb.utah.cloudlab.us echo "OK"
+   ```
+
+3. **Keep components.conf up to date**
+   - Only include components you actually need
+   - Comment out deprecated components
+
+4. **Use .rsyncignore files**
+   - Exclude build artifacts: `target/`, `node_modules/`, `.git/`
+   - Reduces sync time and storage usage
+
+5. **Monitor disk usage**
+   ```bash
+   # Check disk usage on all nodes
+   for i in {0..9}; do
+     ssh user@node$i.suffix df -h /scratch
+   done
+   ```
+
+6. **Clean up after experiments**
+   - Delete old experiment outputs
+   - Prune Docker images regularly
+
+## Known Problems
+
+1. Modifying `CommonDependencies` and then using `deploy_changes.sh` will only rebuild the `sketchdb-base` Docker image, not all the images that depend on this.
+Workaround: Run `deploy_from_scratch.sh`
+
+## Advanced Usage
+
+### Skipping Component Installation
+
+To sync code without reinstalling:
+
+```bash
+cd cloudlab_setup/multi_node
+./oneshot_only_rsync.sh <num_nodes> <username> <suffix>
+```
diff --git a/Utilities/docs/troubleshooting.md b/Utilities/docs/troubleshooting.md
new file mode 100644
index 0000000..8b5c940
--- /dev/null
+++ b/Utilities/docs/troubleshooting.md
@@ -0,0 +1,150 @@
+# Troubleshooting Guide
+
+Common issues and debugging strategies for the experiment framework.
+
+## Table of Contents
+
+- [Deployment Issues](#deployment-issues)
+- [Experiment Execution Issues](#experiment-execution-issues)
+- [Service Issues](#service-issues)
+- [Data Issues](#data-issues)
+- [Performance Issues](#performance-issues)
+- [Debugging Strategies](#debugging-strategies)
+- [Common Error Messages](#common-error-messages)
+
+## Deployment Issues
+
+### Issue: Docker permission errors
+
+**Symptoms:**
+- Running `deploy_changes.sh` or an experiment script results in `ERROR: permission denied while trying to connect to the Docker daemon socket at unix:///var/run/docker.sock`
+
+**Solutions:**
+- Run `sudo usermod -aG docker <cloudlab_username>` on each cloudlab node
+- Quick bash command for cloudlab cluster with `N` nodes
+
+```bash
+for i in {0..N-1}; do ssh -o StrictHostKeyChecking=no <cloudlab_username>@node"$i".<cloudlab_suffix> "sudo usermod -aG docker <cloudlab_username>"; done;
+```
+
+## Experiment Execution Issues
+
+### Issue: Looping over "Waiting for X seconds for remote monitor to finish"
+
+**Symptoms:**
+- `experiment_run_e2e.py` script continuously shows "Waiting for X seconds for remote monitor to finish" for multiple minutes (e.g. > 5 min)
+
+**Possible Causes:**
+- Could be a variety of causes, but usually indicates something wrong with `PrometheusClient`, `QueryEngineRust`, or `remote_monitor.py`
+
+**Solutions:**
+Look at log files for the above components on the cloudlab nodes. You can also check `docker logs`
+
+### Issue: Hydra configuration validation errors
+
+**Symptoms:**
+- "Missing required parameter" errors
+- "Invalid configuration" errors
+- Hydra fails to compose config
+
+**Solutions:**
+
+```bash
+# 1. Check all required parameters are provided
+python experiment_run_e2e.py \
+  experiment_type=simple_config_fake_ports_2_card_20 \
+  experiment.name=test \
+  cloudlab.num_nodes=9 \
+  cloudlab.username=myuser \
+  cloudlab.hostname_suffix=sketchdb.utah.cloudlab.us
+
+# 2. Validate experiment type exists
+ls experiments/config/experiment_type/<your_type>.yaml
+
+# 3. Check config syntax
+python -c "import yaml; yaml.safe_load(open('experiments/config/experiment_type/simple_config_fake_ports_2_card_20.yaml'))"
+
+# 4. Use --cfg job to see composed config
+python experiment_run_e2e.py \
+  experiment_type=simple_config_fake_ports_2_card_20 \
+  experiment.name=test \
+  cloudlab.num_nodes=9 \
+  cloudlab.username=myuser \
+  cloudlab.hostname_suffix=sketchdb.utah.cloudlab.us \
+  --cfg job
+```
+
+## Service Issues
+
+### Issue: Kafka message size limit exceeded
+
+**Symptoms:**
+- Errors about message size exceeding limits
+- Failed to send large precomputes over Kafka
+- Messages like "RecordTooLargeException" or "MessageSizeTooLargeException"
+
+**Solutions:**
+
+Kafka has three places where message size limits need to be configured (see [PR #154](https://github.com/ProjectASAP/asap-internal/pull/154)):
+
+1. **Kafka broker configuration** (`Utilities/installation/kafka/install.sh`):
+   - Set `message.max.bytes` to desired limit (e.g., `20971520` for 20MB)
+   - Set `replica.fetch.max.bytes` to the same limit
+
+2. **Kafka topic configuration** (`Utilities/experiments/experiment_utils/services/kafka.py`):
+   - Update `max.message.bytes` in the topic creation command
+
+3. **Arroyo connection profile** (`ArroyoSketch/templates/json/connection_profile.j2`):
+   - Add connection properties with `message.max.bytes` and `batch.size`
+
+**Example values:**
+- 4MB: `4194304`
+- 20MB: `20971520` (default, sufficient for large precomputes like 3x65536 CountMinSketch)
+- 100MB: `104857600`
+
+After making changes, redeploy Kafka and restart affected services.
+
+## Debugging Strategies
+
+### Strategy 1: Enable Maximum Logging and keep infrastructure running after experiment
+
+```bash
+python experiment_run_e2e.py \
+  experiment_type=simple_config_fake_ports_2_card_20 \
+  experiment.name=debug \
+  cloudlab.num_nodes=9 \
+  cloudlab.username=myuser \
+  cloudlab.hostname_suffix=sketchdb.utah.cloudlab.us \
+  logging.level=DEBUG \
+  flow.no_teardown=true
+```
+
+### Strategy 2: Step-by-Step Manual Execution
+
+```bash
+# Run experiment setup, then manually control execution
+
+python experiment_run_e2e.py \
+  experiment_type=simple_config_fake_ports_2_card_20 \
+  experiment.name=manual_test \
+  cloudlab.num_nodes=9 \
+  cloudlab.username=myuser \
+  cloudlab.hostname_suffix=sketchdb.utah.cloudlab.us \
+  manual.query_engine=true \
+  manual.remote_monitor=true \
+  flow.no_teardown=true
+
+# Script will pause at key points
+# SSH to nodes and inspect/test manually
+ssh myuser@node0.suffix
+
+# Check each service
+docker ps
+curl http://localhost:9090/-/ready
+curl http://localhost:8088/health
+
+# Manually run queries
+curl 'http://localhost:8088/api/v1/query?query=sum_over_time(fake_metric_total[1m])'
+
+# When satisfied, continue or kill experiment
+```
diff --git a/Utilities/experiments/CONFIG_PARAMETERS_REFERENCE.md b/Utilities/experiments/CONFIG_PARAMETERS_REFERENCE.md
new file mode 100644
index 0000000..6bbd3a0
--- /dev/null
+++ b/Utilities/experiments/CONFIG_PARAMETERS_REFERENCE.md
@@ -0,0 +1,752 @@
+# Configuration Parameters Reference
+
+This document provides a comprehensive reference for all configuration parameters available in the Hydra-based experiment framework.
+
+## Table of Contents
+1. [Core Infrastructure Parameters](#core-infrastructure-parameters)
+2. [Logging and Debugging](#logging-and-debugging)
+3. [Profiling Parameters](#profiling-parameters)
+4. [Manual Mode Parameters](#manual-mode-parameters)
+5. [Experiment Flow Control](#experiment-flow-control)
+6. [Streaming Engine Configuration](#streaming-engine-configuration)
+7. [Prometheus Configuration](#prometheus-configuration)
+8. [Experiment-Specific Parameters](#experiment-specific-parameters)
+9. [Experiment Parameters (from experiment_type configs)](#experiment-parameters)
+10. [Parameter Validation and Constraints](#parameter-validation-and-constraints)
+11. [Configuration Schema Issues](#configuration-schema-issues)
+12. [Usage Examples](#usage-examples)
+
+---
+
+## Core Infrastructure Parameters
+
+### Required Parameters
+These parameters must be provided for all experiment scripts:
+
+#### `experiment.name` (string, required)
+- **Description**: Human-readable experiment name used for organizing outputs
+- **Usage**: Creates output directories and experiment identification
+- **Example**: `"performance_test_2024"`
+- **Validation**: Cannot be `???` or empty
+- **Used by**: All experiment scripts
+
+#### `cloudlab.num_nodes` (int, required)
+- **Description**: Number of CloudLab nodes to allocate for the experiment
+- **Range**: 1-50 (recommended)
+- **Example**: `4`
+- **Validation**: Must be positive integer
+- **Used by**: All experiment scripts
+
+#### `cloudlab.username` (string, required)
+- **Description**: Your CloudLab username for SSH access
+- **Example**: `"myuser"`
+- **Validation**: Must be valid CloudLab username
+- **Used by**: All experiment scripts
+
+#### `cloudlab.hostname_suffix` (string, required)
+- **Description**: CloudLab experiment hostname suffix
+- **Example**: `"myexp.cloudlab.us"`
+- **Validation**: Must be valid hostname format
+- **Used by**: All experiment scripts
+
+#### `prometheus.local_config_dir` (string, conditionally required)
+- **Description**: Path to local Prometheus configuration directory
+- **Example**: `"/path/to/prometheus/config"`
+- **Validation**: Directory must exist and be readable
+- **Required by**: `experiment_run_e2e.py`, `experiment_run_e2e_no_queryengine.py`
+- **Not required by**: `experiment_run_exporters_and_prometheus.py`, `experiment_run_empty_flink.py`
+
+---
+
+## Logging and Debugging
+
+#### `logging.level` (string, optional)
+- **Description**: Logging level for the experiment
+- **Default**: `"INFO"`
+- **Choices**: `"DEBUG"`, `"INFO"`, `"WARNING"`, `"ERROR"`
+- **Example**: `"DEBUG"`
+- **Usage**: Controls verbosity of experiment output
+
+---
+
+## Profiling Parameters
+
+#### `profiling.query_engine` (boolean, optional)
+- **Description**: Enable profiling for the query engine component
+- **Default**: `false`
+- **Example**: `true`
+- **Usage**: Enables performance profiling of query processing
+
+#### `profiling.prometheus_time` (int, optional)
+- **Description**: Duration in seconds to profile Prometheus
+- **Default**: `null` (disabled)
+- **Range**: 1-3600 seconds
+- **Example**: `60`
+- **Usage**: Profiles Prometheus for specified duration
+
+#### `profiling.flink` (boolean, optional)
+- **Description**: Enable profiling for Flink streaming engine
+- **Default**: `false`
+- **Example**: `true`
+- **Usage**: Enables Flink JVM profiling
+
+#### `profiling.arroyo` (boolean, optional)
+- **Description**: Enable profiling for Arroyo streaming engine
+- **Default**: `false`
+- **Example**: `true`
+- **Usage**: Enables Arroyo profiling when using Arroyo engine
+
+---
+
+## Manual Mode Parameters
+
+#### `manual.query_engine` (boolean, optional)
+- **Description**: Run query engine in manual mode (requires user intervention)
+- **Default**: `false`
+- **Example**: `true`
+- **Usage**: Pauses for manual query engine setup
+
+#### `manual.remote_monitor` (boolean, optional)
+- **Description**: Enable remote monitoring in manual mode
+- **Default**: `false`
+- **Example**: `true`
+- **Usage**: Allows manual control of remote monitoring setup
+
+---
+
+## Experiment Flow Control
+
+#### `flow.no_teardown` (boolean, optional)
+- **Description**: Skip teardown after experiment completion
+- **Default**: `false`
+- **Example**: `true`
+- **Constraint**: Can only be used with single experiment mode
+- **Usage**: Leaves services running for debugging
+
+#### `flow.steady_state_wait` (int, optional)
+- **Description**: Time in seconds to wait for system steady state
+- **Default**: `300`
+- **Range**: 0-3600 seconds
+- **Example**: `60`
+- **Usage**: Allows system to stabilize before starting measurements
+
+---
+
+## Streaming Engine Configuration
+
+#### `streaming.engine` (string, optional)
+- **Description**: Which streaming engine to use
+- **Default**: `"flink"`
+- **Choices**: `"flink"`, `"arroyo"`
+- **Example**: `"arroyo"`
+- **Usage**: Selects streaming processing framework
+
+#### `streaming.flink_input_format` (string, optional)
+- **Description**: Input data format for Flink
+- **Default**: `"json"`
+- **Choices**: `"json"`, `"avro-json"`, `"avro-binary"`
+- **Example**: `"avro-json"`
+- **Usage**: Controls Flink data deserialization
+
+#### `streaming.flink_output_format` (string, optional)
+- **Description**: Output data format for Flink
+- **Default**: `"json"`
+- **Choices**: `"json"`, `"byte"`
+- **Example**: `"byte"`
+- **Usage**: Controls Flink data serialization
+
+#### `streaming.enable_object_reuse` (boolean, optional)
+- **Description**: Enable object reuse optimization in streaming engine
+- **Default**: `false`
+- **Example**: `true`
+- **Usage**: Performance optimization for high-throughput scenarios
+
+#### `streaming.do_local_flink` (boolean, optional)
+- **Description**: Run Flink locally instead of on CloudLab cluster
+- **Default**: `false`
+- **Example**: `true`
+- **Usage**: Development mode for local testing
+
+#### `streaming.forward_unsupported_queries` (boolean, optional)
+- **Description**: Forward unsupported queries to Prometheus
+- **Default**: `false`
+- **Example**: `true`
+- **Usage**: Fallback mechanism for complex queries
+
+---
+
+## Prometheus Configuration
+
+#### `prometheus.scrape_interval` (string, optional)
+- **Description**: How frequently Prometheus scrapes targets
+- **Default**: `"5s"`
+- **Format**: Time duration with unit (s, m, h)
+- **Example**: `"10s"`
+- **Usage**: Controls monitoring granularity
+
+#### `prometheus.evaluation_interval` (string, optional)
+- **Description**: How frequently Prometheus evaluates rules
+- **Default**: `"1s"`
+- **Format**: Time duration with unit
+- **Example**: `"5s"`
+- **Usage**: Controls rule evaluation frequency
+
+#### `prometheus.query_log_file` (string, optional)
+- **Description**: Path to Prometheus query log file
+- **Default**: `"/scratch/sketch_db_for_prometheus/prometheus/queries.log"`
+- **Example**: `"/custom/path/queries.log"`
+- **Usage**: Logs all queries for analysis
+
+#### `prometheus.recording_rules.interval` (string, optional)
+- **Description**: How frequently to evaluate recording rules
+- **Default**: `"5s"`
+- **Format**: Time duration with unit
+- **Example**: `"10s"`
+- **Usage**: Controls pre-computed metric updates
+
+---
+
+## Monitoring Configuration
+
+### `monitoring.tool` (string, required in experiment_type configs)
+- **Description**: Which monitoring/TSDB tool to use for metrics collection
+- **Choices**: `"prometheus"`, `"victoriametrics"`
+- **Example**: `"prometheus"`
+- **Location**: Specified in experiment_type config files (e.g., `cloud_demo.yaml`)
+- **Usage**: Determines which time-series database service to deploy
+
+### `monitoring.deployment_mode` (string, required in experiment_type configs)
+- **Description**: How to deploy the monitoring tool
+- **Choices**: `"bare_metal"`, `"containerized"`
+- **Example**: `"containerized"`
+- **Location**: Specified in experiment_type config files
+- **Usage**: Determines deployment strategy for the monitoring service
+- **Constraints**:
+  - VictoriaMetrics only supports `containerized` mode
+  - `bare_metal` mode only available for Prometheus
+
+### `monitoring.resource_limits` (dict, optional)
+- **Description**: Resource constraints for containerized monitoring deployments
+- **Required when**: Only applicable when `deployment_mode: containerized`
+- **Location**: Specified in experiment_type config files
+- **Validation**: Will raise error if specified with `deployment_mode: bare_metal`
+- **Example**:
+  ```yaml
+  resource_limits:
+    cpu_limit: 4.0
+    memory_limit: 8g
+  ```
+
+### `monitoring.resource_limits.cpu_limit` (float, optional)
+- **Description**: Number of CPU cores to allocate to the monitoring container
+- **Range**: 0.5-64.0 (depending on host)
+- **Example**: `4.0`
+- **Usage**: Limits CPU usage via Docker `--cpus` flag
+- **Use case**: Vertical scalability testing
+
+### `monitoring.resource_limits.memory_limit` (string, optional)
+- **Description**: Memory limit for the monitoring container
+- **Format**: Integer with unit suffix (k, m, g)
+- **Example**: `"8g"`, `"4096m"`
+- **Usage**: Limits memory usage via Docker `--memory` flag
+- **Use case**: Vertical scalability testing
+
+---
+
+## Migration from Old Configuration
+
+### Deprecated: `docker_resources`
+
+The old `docker_resources` configuration is **deprecated and no longer supported**.
+
+**Old format (NO LONGER VALID):**
+```yaml
+docker_resources:
+  cpu_limit: 2.0
+  memory_limit: 2g
+  tool: prometheus
+```
+
+**New format:**
+```yaml
+monitoring:
+  tool: prometheus
+  deployment_mode: containerized
+  resource_limits:
+    cpu_limit: 2.0
+    memory_limit: 2g
+```
+
+**Error handling:** If an old config with `docker_resources` is used, the system will raise a clear error message directing users to update their configuration.
+
+---
+
+## Monitoring Configuration Examples
+
+### Example 1: Standard Bare-Metal Prometheus (Most Common)
+```yaml
+monitoring:
+  tool: prometheus
+  deployment_mode: bare_metal
+```
+
+### Example 2: Containerized Prometheus Without Resource Limits
+```yaml
+monitoring:
+  tool: prometheus
+  deployment_mode: containerized
+```
+
+### Example 3: Containerized Prometheus With Resource Limits (Vertical Scalability)
+```yaml
+monitoring:
+  tool: prometheus
+  deployment_mode: containerized
+  resource_limits:
+    cpu_limit: 2.0
+    memory_limit: 4g
+```
+
+### Example 4: Containerized VictoriaMetrics With Resource Limits
+```yaml
+monitoring:
+  tool: victoriametrics
+  deployment_mode: containerized
+  resource_limits:
+    cpu_limit: 4.0
+    memory_limit: 8g
+```
+
+---
+
+#### `fake_exporter_language` (string, optional)
+- **Description**: Language implementation for fake metric exporter
+- **Default**: `"python"`
+- **Choices**: `"python"`, `"rust"`
+- **Example**: `"rust"`
+- **Usage**: Selects fake exporter implementation
+
+---
+
+## Experiment-Specific Parameters
+
+### For experiment_run_sketchdboffline.py
+
+#### `experiment_variants.sketchdboffline.experiment_dir` (string, required)
+- **Description**: Path to experiment data directory for offline analysis
+- **Example**: `"/path/to/experiment/data"`
+- **Validation**: Directory must exist and be readable
+
+#### `experiment_variants.sketchdboffline.labels` (list, optional)
+- **Description**: List of labels to include in analysis
+- **Default**: `["label_0", "label_1", "label_2", "instance", "job"]`
+- **Example**: `["instance", "job", "label_0"]`
+
+#### `experiment_variants.sketchdboffline.groupby` (list, required)
+- **Description**: List of labels to group by in aggregation
+- **Example**: `["label_0", "instance"]`
+- **Validation**: Must be subset of available labels
+
+#### `experiment_variants.sketchdboffline.aggregation` (string, required)
+- **Description**: Aggregation function to apply
+- **Choices**: `"sum"`, `"avg"`, `"count"`, `"min"`, `"max"`
+- **Example**: `"sum"`
+
+### For experiment_run_flink_with_different_num_aggregations.py
+
+#### `experiment_variants.flink_aggregations.aggregation_id` (int, required)
+- **Description**: ID of the aggregation query to duplicate for testing
+- **Range**: 0 to number of queries - 1
+- **Example**: `0`
+
+#### `experiment_variants.flink_aggregations.min_aggregations` (int, required)
+- **Description**: Minimum number of aggregations to test
+- **Range**: 1-100
+- **Example**: `1`
+
+#### `experiment_variants.flink_aggregations.max_aggregations` (int, required)
+- **Description**: Maximum number of aggregations to test
+- **Range**: min_aggregations to 1000
+- **Example**: `10`
+- **Constraint**: Must be >= min_aggregations
+
+#### `experiment_variants.flink_aggregations.profile_duration` (int, optional)
+- **Description**: Seconds to run Flink before starting profiling
+- **Default**: `300`
+- **Range**: 60-3600 seconds
+- **Example**: `120`
+
+#### `experiment_variants.flink_aggregations.config` (string, required)
+- **Description**: Path to base configuration file for aggregation testing
+- **Example**: `"/path/to/base_config.yaml"`
+- **Validation**: File must exist and be valid YAML
+
+---
+
+## Experiment Parameters
+
+These parameters come from the `experiment_type` config group and are prefixed with `experiment_params.`:
+
+### Experiment Mode Configuration
+
+#### `experiment_params.experiment` (list, required)
+- **Description**: List of experiment modes to run
+- **Structure**: Each item has `mode` and optional `query_prometheus_too`
+- **Example**:
+  ```yaml
+  experiment:
+    - mode: sketchdb
+      query_prometheus_too: true
+    - mode: prometheus
+  ```
+- **Choices for mode**: `"sketchdb"`, `"prometheus"`
+- **SCHEMA ISSUE**: Inconsistent use of `query_prometheus_too` parameter across configs
+
+### Server Configuration
+
+#### `experiment_params.servers` (list, required)
+- **Description**: List of server endpoints for the experiment
+- **Structure**: Each item has `name` and `url`
+- **Example**:
+  ```yaml
+  servers:
+    - name: prometheus
+      url: http://localhost:9090
+    - name: sketchdb
+      url: http://localhost:8088
+  ```
+- **SCHEMA ISSUE**: Identical across all configs - should be in shared defaults
+
+### Workload Configuration
+
+#### `experiment_params.workloads` (dict, optional)
+- **Description**: External workload configurations
+- **Structure**: Each workload has configuration options
+- **Example**:
+  ```yaml
+  workloads:
+    deathstar:
+      use: true
+  ```
+- **SCHEMA ISSUE**: Inconsistent commenting patterns across configs
+
+### Exporter Configuration
+
+#### `experiment_params.exporters.only_start_if_queries_exist` (boolean, optional)
+- **Description**: Only start exporters if queries reference their metrics
+- **Default**: `true`
+- **Example**: `false`
+- **Usage**: Optimization to avoid unnecessary metric collection
+
+#### Node Exporter Parameters
+
+#### `experiment_params.exporters.exporter_list.node_exporter.port` (int, optional)
+- **Description**: Port for node exporter service
+- **Default**: `9100`
+- **Range**: 1024-65535
+- **Example**: `9200`
+
+#### `experiment_params.exporters.exporter_list.node_exporter.extra_flags` (string, optional)
+- **Description**: Additional command line flags for node exporter
+- **Default**: `"--collector.disable-defaults --collector.cpu"`
+- **Example**: `"--collector.disable-defaults --collector.cpu --collector.memory"`
+
+#### Fake Exporter Parameters
+
+#### `experiment_params.exporters.exporter_list.fake_exporter.num_ports_per_server` (int, optional)
+- **Description**: Number of fake exporter instances per server
+- **Range**: 1-20
+- **Example**: `5`
+- **SCHEMA ISSUE**: Values vary widely (1-10) across configs without clear pattern
+
+#### `experiment_params.exporters.exporter_list.fake_exporter.start_port` (int, optional)
+- **Description**: Starting port number for fake exporters
+- **Default**: `50000`
+- **Range**: 1024-65535
+- **Example**: `51000`
+
+#### `experiment_params.exporters.exporter_list.fake_exporter.dataset` (string, optional)
+- **Description**: Distribution pattern for synthetic data generation
+- **Default**: `"zipf"`
+- **Choices**: `"zipf"`, `"uniform"`, `"normal"`
+- **Example**: `"uniform"`
+
+#### `experiment_params.exporters.exporter_list.fake_exporter.synthetic_data_value_scale` (int, optional)
+- **Description**: Maximum value for synthetic data (range: [0, value_scale])
+- **Default**: `10000`
+- **Range**: 1-1000000
+- **Example**: `50000`
+
+#### `experiment_params.exporters.exporter_list.fake_exporter.num_labels` (int, optional)
+- **Description**: Number of labels per metric
+- **Range**: 1-10
+- **Example**: `4`
+- **SCHEMA ISSUE**: Inconsistent values (2 vs 3) across configs
+
+#### `experiment_params.exporters.exporter_list.fake_exporter.num_values_per_label` (int, optional)
+- **Description**: Number of unique values per label (cardinality)
+- **Range**: 1-100
+- **Example**: `15`
+- **SCHEMA ISSUE**: Highly variable (2-20) across configs
+
+#### `experiment_params.exporters.exporter_list.fake_exporter.metric_type` (string, optional)
+- **Description**: Type of Prometheus metric to generate
+- **Default**: `"counter"`
+- **Choices**: `"counter"`, `"gauge"`
+- **Example**: `"gauge"`
+- **Note**: Counter metrics have `_total` suffix, gauge metrics don't
+
+### Query Group Configuration
+
+#### `experiment_params.query_groups` (list, required)
+- **Description**: List of query group configurations defining the experimental workload
+- **Validation**: At least one query group must be defined
+- **Structure**: Each group contains queries, timing, and client/controller options
+
+#### `experiment_params.query_groups[].id` (int, required)
+- **Description**: Unique identifier for the query group
+- **Example**: `1`
+- **Usage**: Used for result organization and debugging
+
+#### `experiment_params.query_groups[].queries` (list, required)
+- **Description**: List of PromQL queries to execute
+- **Example**:
+  ```yaml
+  queries:
+    - 'sum_over_time(fake_metric_total[10m])'
+    - 'increase(fake_metric_total[10m])'
+    - 'sum by (instance, job) (sum_over_time(fake_metric_total[10m]))'
+  ```
+- **Validation**: Must be valid PromQL syntax
+- **SCHEMA ISSUE**: Inconsistent time ranges ([1m] vs [10m]) across configs
+
+#### `experiment_params.query_groups[].repetition_delay` (int, required)
+- **Description**: Delay in seconds between query repetitions
+- **Range**: 1-3600 seconds
+- **Example**: `10`
+- **Usage**: Controls query load and timing
+
+#### `experiment_params.query_groups[].client_options.repetitions` (int, required)
+- **Description**: Number of times to repeat each query in the group
+- **Range**: 1-1000
+- **Example**: `30`
+- **SCHEMA ISSUE**: Values vary widely (3-100) across configs
+
+#### `experiment_params.query_groups[].client_options.query_time_offset` (int, optional)
+- **Description**: Time offset in seconds for queries to account for freshness delay
+- **Default**: `10`
+- **Range**: 0-300 seconds
+- **Example**: `15`
+- **Usage**: Ensures data is available for lookback queries
+
+#### `experiment_params.query_groups[].client_options.starting_delay` (int, required)
+- **Description**: Initial delay in seconds before starting query execution
+- **Example**: `610`
+- **Usage**: Allows system initialization and data collection
+- **SCHEMA ISSUE**: Two main patterns (70 for 1m queries, 610 for 10m queries)
+
+#### `experiment_params.query_groups[].controller_options.accuracy_sla` (float, optional)
+- **Description**: Accuracy SLA for query results (0.0-1.0)
+- **Default**: `0.99`
+- **Range**: 0.0-1.0
+- **Example**: `0.95`
+- **Usage**: Quality assurance for approximate query results
+
+#### `experiment_params.query_groups[].controller_options.latency_sla` (float, optional)
+- **Description**: Latency SLA in seconds
+- **Default**: `1`
+- **Range**: 0.1-60.0 seconds
+- **Example**: `2`
+- **Usage**: Performance requirement for query response time
+
+### Metrics Configuration
+
+#### `experiment_params.metrics` (list, required)
+- **Description**: List of metric definitions that will be collected
+- **Validation**: At least one metric must be defined
+- **Structure**: Each metric specifies name, labels, and source exporter
+
+#### `experiment_params.metrics[].metric` (string, required)
+- **Description**: Name of the Prometheus metric
+- **Example**: `"fake_metric_total"`, `"node_cpu_seconds_total"`
+- **Usage**: Must match actual metric names from exporters
+
+#### `experiment_params.metrics[].labels` (list, required)
+- **Description**: List of label names for the metric
+- **Example**: `['instance', 'job', 'label_0', 'label_1', 'label_2']`
+- **Usage**: Defines metric dimensionality and grouping options
+
+#### `experiment_params.metrics[].exporter` (string, required)
+- **Description**: Name of the exporter that provides this metric
+- **Example**: `"fake_exporter"`, `"node_exporter"`
+- **Validation**: Must match an exporter defined in exporter_list
+
+---
+
+## Parameter Validation and Constraints
+
+### Current Validation Gaps
+
+1. **Missing Value Range Validation**:
+   - No validation for numeric parameter ranges
+   - No validation for time format strings
+   - No validation for port number ranges
+
+2. **Missing Type Validation**:
+   - No validation that numeric parameters are actually numbers
+   - No validation of boolean parameter formats
+   - No validation of list/dict structures
+
+3. **Missing Cross-Parameter Validation**:
+   - No validation that total experiment time is reasonable
+   - No validation that port ranges don't overlap
+   - No validation that queries reference defined metrics
+
+4. **Missing File/Path Validation**:
+   - Limited validation of file existence
+   - No validation of file permissions
+   - No validation of directory writability
+
+### Recommended Validation Schema
+
+```python
+# Recommended validation rules
+VALIDATION_RULES = {
+    "cloudlab.num_nodes": {"type": int, "min": 1, "max": 50},
+    "prometheus.scrape_interval": {"type": str, "pattern": r"^\d+[smh]$"},
+    "experiment_params.exporters.exporter_list.fake_exporter.start_port": {
+        "type": int, "min": 1024, "max": 65535
+    },
+    "experiment_params.query_groups[].repetition_delay": {
+        "type": int, "min": 1, "max": 3600
+    },
+    "experiment_params.metrics[].exporter": {
+        "type": str, "must_exist_in": "experiment_params.exporters.exporter_list"
+    }
+}
+```
+
+---
+
+## Configuration Schema Issues
+
+### Major Inconsistencies Found
+
+1. **Experiment Mode Structure**:
+   - Inconsistent use of `query_prometheus_too` parameter
+   - Some configs have only one mode, others have both
+   - Different commenting patterns
+
+2. **Fake Exporter Configuration**:
+   - `num_ports_per_server`: Values vary from 1-10 without clear pattern
+   - `num_labels`: Some use 2, others use 3
+   - `num_values_per_label`: Highly variable (2-20)
+   - `metric_type`: Most use counter, some use gauge
+
+3. **Query Group Timing**:
+   - Two distinct patterns: 70s delay for 1m queries, 610s for 10m queries
+   - `repetitions` vary widely (3-100) across similar configs
+   - No clear relationship between timing parameters
+
+4. **Server Configuration Redundancy**:
+   - Identical server configurations in all 24 files
+   - Should be moved to shared defaults
+
+5. **Workload Configuration**:
+   - Inconsistent commenting of deathstar workload
+   - No clear indication of when workload should be enabled
+
+### Recommendations for Schema Standardization
+
+1. **Create Base Templates**:
+   - `base_fake_exporter.yaml` with standard parameters
+   - `base_node_exporter.yaml` for real metrics
+   - `base_timing.yaml` for standard timing patterns
+
+2. **Implement Parameter Inheritance**:
+   - Extract common configurations to shared defaults
+   - Use Hydra composition to reduce duplication
+
+3. **Add Schema Validation**:
+   - Implement comprehensive parameter validation
+   - Add cross-parameter consistency checks
+   - Validate business logic constraints
+
+4. **Standardize Naming Conventions**:
+   - Use consistent parameter names across all configs
+   - Follow clear naming patterns for related parameters
+
+---
+
+## Usage Examples
+
+### Basic Experiment Execution
+```bash
+python experiment_run_e2e.py \
+  experiment.name=my_test \
+  experiment_type=cloud_demo \
+  cloudlab.num_nodes=4 \
+  cloudlab.username=myuser \
+  cloudlab.hostname_suffix=myexp.cloudlab.us \
+  prometheus.local_config_dir=/path/to/prometheus/config
+```
+
+### Parameter Override Examples
+```bash
+# Override query timing
+python experiment_run_e2e.py \
+  experiment_type=cloud_demo \
+  experiment_params.query_groups.0.repetition_delay=30 \
+  experiment_params.query_groups.0.client_options.repetitions=50 \
+  [required params...]
+
+# Override exporter configuration
+python experiment_run_e2e.py \
+  experiment_type=cloud_demo \
+  experiment_params.exporters.exporter_list.fake_exporter.num_ports_per_server=5 \
+  experiment_params.exporters.exporter_list.fake_exporter.metric_type=gauge \
+  [required params...]
+
+# Override streaming engine
+python experiment_run_e2e.py \
+  experiment_type=cloud_demo \
+  streaming.engine=arroyo \
+  streaming.enable_object_reuse=true \
+  [required params...]
+```
+
+### Development Mode Example
+```bash
+python experiment_run_e2e.py \
+  experiment_type=cloud_demo \
+  cloudlab.num_nodes=2 \
+  logging.level=DEBUG \
+  flow.steady_state_wait=60 \
+  streaming.do_local_flink=true \
+  [required params...]
+```
+
+### Grid Search Examples
+```bash
+# Test different repetition delays
+for delay in 10 30 60; do
+  python experiment_run_e2e.py \
+    experiment.name=delay_${delay} \
+    experiment_type=cloud_demo \
+    experiment_params.query_groups.0.repetition_delay=${delay} \
+    [required params...]
+done
+
+# Test different node counts
+for nodes in 4 8 16; do
+  python experiment_run_e2e.py \
+    experiment.name=nodes_${nodes} \
+    experiment_type=cloud_demo \
+    cloudlab.num_nodes=${nodes} \
+    [required params...]
+done
+```
+
+---
+
+This reference provides comprehensive documentation of all available configuration parameters, their constraints, and current schema issues. Use this as a guide for parameter selection and to understand the current state of the configuration system.
diff --git a/Utilities/experiments/HYDRA_CONFIG_USAGE.md b/Utilities/experiments/HYDRA_CONFIG_USAGE.md
new file mode 100644
index 0000000..b351770
--- /dev/null
+++ b/Utilities/experiments/HYDRA_CONFIG_USAGE.md
@@ -0,0 +1,269 @@
+# Hydra Configuration Usage Guide
+
+This guide explains how to use the Hydra framework integration for experiment scripts.
+
+## Quick Start
+
+```bash
+# Basic experiment run
+python experiment_run_e2e.py \
+  experiment.name=my_test \
+  experiment_type=cloud_demo \
+  cloudlab.num_nodes=4 \
+  cloudlab.username=myuser \
+  cloudlab.hostname_suffix=myexp.cloudlab.us \
+  prometheus.local_config_dir=/path/to/prometheus/config
+```
+
+## Parameter Types
+
+### Required Infrastructure Parameters
+All experiment scripts require these core parameters:
+- `experiment.name`: Human-readable experiment name
+- `cloudlab.num_nodes`: Number of CloudLab nodes
+- `cloudlab.username`: Your CloudLab username
+- `cloudlab.hostname_suffix`: CloudLab experiment hostname suffix
+
+### Script-Specific Required Parameters
+- **experiment_run_e2e.py**: `experiment_type`, `prometheus.local_config_dir`
+- **experiment_run_empty_flink.py**: `experiment.config_file`
+- **experiment_run_e2e_no_queryengine.py**: `experiment.config_file`, `prometheus.local_config_dir`
+- **experiment_run_sketchdboffline.py**: `experiment_variants.sketchdboffline.*` parameters
+- **experiment_run_flink_with_different_num_aggregations.py**: `experiment_variants.flink_aggregations.*` parameters
+- **experiment_run_exporters_and_prometheus.py**: No additional required parameters
+
+### Optional Override Parameters
+```yaml
+# Logging and debugging
+logging.level: DEBUG|INFO|WARNING|ERROR
+
+# Profiling options
+profiling.query_engine: true/false
+profiling.prometheus_time: 60  # seconds
+profiling.flink: true/false
+profiling.arroyo: true/false
+
+# Manual mode
+manual.query_engine: true/false
+manual.remote_monitor: true/false
+
+# Experiment flow
+flow.no_teardown: true/false
+flow.steady_state_wait: 300  # seconds
+
+# Streaming engine
+streaming.engine: flink|arroyo
+streaming.flink_input_format: json|avro-json|avro-binary
+streaming.flink_output_format: json|byte
+streaming.enable_object_reuse: true/false
+streaming.do_local_flink: true/false
+streaming.forward_unsupported_queries: true/false
+
+# Prometheus configuration
+prometheus.scrape_interval: "5s"
+prometheus.evaluation_interval: "1s"
+prometheus.query_log_file: "/path/to/queries.log"
+prometheus.recording_rules.interval: "5s"
+
+# Fake exporter language
+fake_exporter_language: python|rust
+```
+
+## Experiment Config Groups (experiment_run_e2e.py)
+
+The main experiment script uses **config groups** to organize experiment configurations:
+
+### Available Experiment Types
+```bash
+# List available experiment types
+ls config/experiment_type/
+```
+
+### Using Config Groups
+```bash
+# Select experiment type
+python experiment_run_e2e.py experiment_type=cloud_demo [other params...]
+python experiment_run_e2e.py experiment_type=flink_compress_config [other params...]
+python experiment_run_e2e.py experiment_type=my_exp_config [other params...]
+```
+
+### Override Experiment Parameters
+```bash
+# Override parameters from the experiment config
+python experiment_run_e2e.py \
+  experiment_type=cloud_demo \
+  experiment_params.query_groups.0.repetition_delay=60 \
+  experiment_params.query_groups.0.client_options.repetitions=20 \
+  experiment_params.exporters.exporter_list.fake_exporter.num_ports_per_server=4 \
+  [other params...]
+```
+
+## Common Use Cases
+
+### Development Testing
+```bash
+# Quick test with minimal resources
+python experiment_run_e2e.py \
+  experiment.name=dev_test \
+  experiment_type=cloud_demo \
+  cloudlab.num_nodes=2 \
+  cloudlab.username=myuser \
+  cloudlab.hostname_suffix=dev.cloudlab.us \
+  prometheus.local_config_dir=/path/to/prometheus/config \
+  logging.level=DEBUG \
+  flow.steady_state_wait=60
+```
+
+### Performance Testing
+```bash
+# Test with different streaming engines
+python experiment_run_e2e.py \
+  experiment.name=arroyo_perf \
+  experiment_type=my_exp_config \
+  cloudlab.num_nodes=8 \
+  streaming.engine=arroyo \
+  streaming.enable_object_reuse=true \
+  profiling.arroyo=true \
+  [required params...]
+```
+
+### Grid Search Examples
+```bash
+# Test different repetition delays
+python experiment_run_e2e.py experiment.name=delay_30 experiment_type=cloud_demo experiment_params.query_groups.0.repetition_delay=30 [required params...]
+python experiment_run_e2e.py experiment.name=delay_60 experiment_type=cloud_demo experiment_params.query_groups.0.repetition_delay=60 [required params...]
+python experiment_run_e2e.py experiment.name=delay_120 experiment_type=cloud_demo experiment_params.query_groups.0.repetition_delay=120 [required params...]
+
+# Test different node counts
+python experiment_run_e2e.py experiment.name=nodes_4 experiment_type=cloud_demo cloudlab.num_nodes=4 [required params...]
+python experiment_run_e2e.py experiment.name=nodes_8 experiment_type=cloud_demo cloudlab.num_nodes=8 [required params...]
+python experiment_run_e2e.py experiment.name=nodes_16 experiment_type=cloud_demo cloudlab.num_nodes=16 [required params...]
+
+# Test different streaming engines
+python experiment_run_e2e.py experiment.name=flink_test experiment_type=cloud_demo streaming.engine=flink [required params...]
+python experiment_run_e2e.py experiment.name=arroyo_test experiment_type=cloud_demo streaming.engine=arroyo [required params...]
+```
+
+## All Experiment Scripts
+
+### experiment_run_e2e.py (Full E2E Pipeline)
+```bash
+# Full pipeline with query engine
+python experiment_run_e2e.py \
+  experiment.name=full_test \
+  experiment_type=cloud_demo \
+  cloudlab.num_nodes=4 \
+  cloudlab.username=myuser \
+  cloudlab.hostname_suffix=myexp.cloudlab.us \
+  prometheus.local_config_dir=/path/to/prometheus/config
+```
+
+### experiment_run_exporters_and_prometheus.py (Monitoring Only)
+```bash
+# Only exporters and Prometheus - no streaming
+python experiment_run_exporters_and_prometheus.py \
+  experiment.name=monitoring_test \
+  cloudlab.num_nodes=4 \
+  cloudlab.username=myuser \
+  cloudlab.hostname_suffix=myexp.cloudlab.us
+```
+
+### experiment_run_empty_flink.py (Simplified Streaming)
+```bash
+# Flink + Prometheus + Exporters (no query engine)
+python experiment_run_empty_flink.py \
+  experiment.name=empty_flink_test \
+  experiment.config_file=/path/to/config.yml \
+  cloudlab.num_nodes=4 \
+  cloudlab.username=myuser \
+  cloudlab.hostname_suffix=myexp.cloudlab.us
+```
+
+### experiment_run_e2e_no_queryengine.py (E2E without Query Engine)
+```bash
+# Full streaming pipeline without query engine
+python experiment_run_e2e_no_queryengine.py \
+  experiment.name=e2e_no_qe \
+  experiment.config_file=/path/to/config.yml \
+  cloudlab.num_nodes=4 \
+  cloudlab.username=myuser \
+  cloudlab.hostname_suffix=myexp.cloudlab.us \
+  prometheus.local_config_dir=/path/to/prometheus/config
+```
+
+### experiment_run_sketchdboffline.py (Offline Analysis)
+```bash
+# Offline analysis of existing data
+python experiment_run_sketchdboffline.py \
+  experiment_variants.sketchdboffline.experiment_dir=/path/to/data \
+  experiment_variants.sketchdboffline.groupby=[label_0,instance] \
+  experiment_variants.sketchdboffline.aggregation=avg
+```
+
+### experiment_run_flink_with_different_num_aggregations.py (Performance Analysis)
+```bash
+# Flink performance testing with aggregation scaling
+python experiment_run_flink_with_different_num_aggregations.py \
+  experiment.name=flink_perf \
+  cloudlab.username=myuser \
+  cloudlab.hostname_suffix=myexp.cloudlab.us \
+  experiment_variants.flink_aggregations.config=/path/to/config.yaml \
+  experiment_variants.flink_aggregations.aggregation_id=0 \
+  experiment_variants.flink_aggregations.min_aggregations=1 \
+  experiment_variants.flink_aggregations.max_aggregations=10
+```
+
+## Hydra Features
+
+### Configuration Inspection
+```bash
+# View resolved configuration
+python experiment_run_e2e.py --cfg job [params...]
+
+# Show help
+python experiment_run_e2e.py --help
+```
+
+### Output Directory Structure
+Hydra creates organized output directories:
+```
+outputs/
+├── <experiment_name>/
+│   └── <timestamp>/
+│       ├── .hydra/
+│       │   ├── config.yaml      # Final resolved config
+│       │   ├── hydra.yaml       # Hydra runtime config
+│       │   └── overrides.yaml   # Applied overrides
+│       └── experiment_outputs...
+```
+
+## Migration from argparse
+
+### Old argparse approach
+```bash
+python experiment_run_e2e.py \
+  --experiment_name test \
+  --experiment_config config.yaml \
+  --num_nodes 4 \
+  --cloudlab_username user \
+  --hostname_suffix exp.cloudlab.us \
+  --prometheus_local_config_dir /path/to/config
+```
+
+### New Hydra approach
+```bash
+python experiment_run_e2e.py \
+  experiment.name=test \
+  experiment_type=cloud_demo \
+  cloudlab.num_nodes=4 \
+  cloudlab.username=user \
+  cloudlab.hostname_suffix=exp.cloudlab.us \
+  prometheus.local_config_dir=/path/to/config
+```
+
+## Key Differences from Original
+
+1. **Config Groups**: Use `experiment_type=config_name` instead of `experiment.config_file=path`
+2. **Parameter Overrides**: Use `experiment_params.query_groups.0.repetition_delay=30` to override experiment parameters
+3. **Structured Configuration**: All parameters are organized in logical groups (cloudlab, streaming, prometheus, etc.)
+4. **No File Paths**: Experiment configs are referenced by name, not file paths
diff --git a/Utilities/experiments/README_WORKLOAD_GENERATOR.md b/Utilities/experiments/README_WORKLOAD_GENERATOR.md
new file mode 100644
index 0000000..47b9ea7
--- /dev/null
+++ b/Utilities/experiments/README_WORKLOAD_GENERATOR.md
@@ -0,0 +1,219 @@
+# Workload Generator
+
+A script to generate experiment configuration YAML files with randomized query workloads based on building blocks and distribution patterns.
+
+## Overview
+
+The `generate_workload.py` script creates experiment configs by randomly composing queries from 6 building blocks according to a specified distribution pattern.
+
+### Query Building Blocks
+
+| Block | Type | Example |
+|-------|------|---------|
+| **B1** | quantile by () | `quantile by (label_0) (0.95, fake_metric_total)` |
+| **B2** | sum/count by | `sum by (label_0) (fake_metric_total)` |
+| **B3** | quantile_over_time | `quantile_over_time(0.95, fake_metric_total[15m])` |
+| **B4** | sum/count_over_time | `sum_over_time(fake_metric_total[15m])` |
+| **B5** | rate/increase | `rate(fake_metric_total[15m])` |
+| **B6** | nested aggregation | `sum by (label_0) (sum_over_time(fake_metric_total[15m]))` |
+
+Each block randomly selects parameters:
+- **Quantiles**: 0.5, 0.7, 0.8, 0.9, 0.95, 0.99
+- **Aggregations**: sum, count
+- **Time functions**: rate, increase, sum_over_time, count_over_time
+- **Time range**: 15m (hardcoded, extensible)
+- **By label**: label_0 (hardcoded, extensible)
+
+## Usage
+
+### Basic Examples
+
+```bash
+# Generate 5 uniform workloads with 20 queries each
+python generate_workload.py --num-queries 20 --distribution uniform --num-configs 5
+
+# Generate heavy-tailed workload favoring blocks 1, 3, and 5
+python generate_workload.py --num-queries 50 --distribution heavy_tailed \
+    --favor-blocks 1,3,5 --num-configs 3 --seed 42
+
+# Allow duplicate queries
+python generate_workload.py --num-queries 30 --distribution uniform \
+    --num-configs 2 --allow-duplicates
+```
+
+### Required Arguments
+
+- `--num-queries N`: Total number of queries per config file
+- `--distribution {uniform,heavy_tailed}`: Distribution type
+- `--num-configs K`: Number of config files to generate
+
+### Optional Arguments
+
+- `--seed S`: Random seed for reproducibility (optional, auto-generated if not provided)
+- `--favor-blocks B1,B2,...`: Comma-separated block IDs to favor for heavy-tailed distribution
+- `--allow-duplicates`: Allow duplicate queries in a config (default: enforce uniqueness)
+- `--output-dir PATH`: Output directory (default: `config/experiment_type/generated`)
+
+## Distribution Types
+
+### Uniform Distribution
+
+Divides queries equally across all 6 building blocks.
+
+Example with `--num-queries 30`:
+- B1: 5 queries
+- B2: 5 queries
+- B3: 5 queries
+- B4: 5 queries
+- B5: 5 queries
+- B6: 5 queries
+
+### Heavy-Tailed Distribution
+
+Uses ordered exponential decay to favor certain blocks.
+
+Example with `--num-queries 30 --favor-blocks 1,3`:
+- Block ordering: [B1, B3, B2, B4, B5, B6]
+- Weights: [1.0, 0.5, 0.25, 0.125, 0.0625, 0.03125]
+- Resulting distribution (approx):
+  - B1: ~16 queries
+  - B3: ~8 queries
+  - B2: ~4 queries
+  - B4: ~2 queries
+  - B5: ~0-1 queries
+  - B6: ~0-1 queries
+
+## Output
+
+Generated files are saved to `config/experiment_type/generated/` with naming pattern:
+```
+generated_workload_YYYYMMDD_HHMMSS_N.yaml
+```
+
+Where `N` is the config index (1, 2, 3, ...).
+
+### Multiple Configs
+
+When generating multiple configs (`--num-configs > 1`):
+- Same distribution and query count
+- Different random query selection
+- Seeds increment: if `--seed 42`, configs use seeds 42, 43, 44, ...
+- Ensures reproducibility while maintaining variation
+
+## Extensibility
+
+The script is designed for easy extension. Key extensible functions:
+
+### Parameter Functions (lines 20-64)
+
+```python
+def get_aggregation_label() -> str:
+    """Modify to support multiple labels or different selection"""
+    return "label_0"
+
+def get_time_range() -> str:
+    """Modify to support variable time ranges"""
+    return "15m"
+
+def get_quantile_values() -> List[float]:
+    """Modify to add/remove quantile options"""
+    return [0.5, 0.7, 0.8, 0.9, 0.95, 0.99]
+```
+
+### Adding New Building Blocks
+
+1. Create generator function:
+```python
+def generate_b7_query() -> str:
+    """B7: Your new query type"""
+    # Your logic here
+    return query_string
+```
+
+2. Add to `BLOCK_GENERATORS` dict (line 141):
+```python
+BLOCK_GENERATORS = {
+    1: generate_b1_query,
+    # ... existing blocks ...
+    7: generate_b7_query,  # Add your block
+}
+```
+
+3. Update `num_blocks` parameter in distribution functions
+
+### Modifying Base Config
+
+Edit `get_base_config()` function (line 245) to change:
+- Exporter configuration (labels, cardinality, dataset type)
+- Monitoring settings
+- Client options (repetitions, delays)
+- Controller options (SLA thresholds)
+
+## Examples with Output
+
+### Example 1: Uniform Distribution
+```bash
+python generate_workload.py --num-queries 12 --distribution uniform --num-configs 1 --seed 100
+```
+
+Generated queries (sample):
+```yaml
+queries:
+  - quantile by (label_0) (0.95, fake_metric_total)
+  - sum by (label_0) (fake_metric_total)
+  - rate(fake_metric_total[15m])
+  - quantile_over_time(0.9, fake_metric_total[15m])
+  - sum_over_time(fake_metric_total[15m])
+  - sum by (label_0) (count_over_time(fake_metric_total[15m]))
+  - quantile by (label_0) (0.8, fake_metric_total)
+  - count by (label_0) (fake_metric_total)
+  - increase(fake_metric_total[15m])
+  - quantile_over_time(0.5, fake_metric_total[15m])
+  - count_over_time(fake_metric_total[15m])
+  - count by (label_0) (sum_over_time(fake_metric_total[15m]))
+```
+
+### Example 2: Heavy-Tailed Favoring Quantile Queries
+```bash
+python generate_workload.py --num-queries 20 --distribution heavy_tailed \
+    --favor-blocks 1,3 --num-configs 1 --seed 200
+```
+
+Distribution:
+- ~11 queries from B1 (quantile by)
+- ~5 queries from B3 (quantile_over_time)
+- ~2-3 queries from other blocks
+- Remaining 1-2 queries distributed among B2, B4, B5, B6
+
+## Tips
+
+1. **Reproducibility**: Always use `--seed` for experiments you need to reproduce
+2. **Uniqueness**: By default, queries are unique. Use `--allow-duplicates` only if needed
+3. **Heavy-tailed**: Order in `--favor-blocks` matters! First block gets the most queries
+4. **Testing**: Start with small `--num-queries` values to verify behavior
+5. **Batch generation**: Use `--num-configs` to generate multiple variations in one run
+
+## Current Hardcoded Values
+
+These are currently hardcoded but can be easily modified:
+
+| Parameter | Current Value | Function to Modify |
+|-----------|--------------|-------------------|
+| Aggregation label | `label_0` | `get_aggregation_label()` |
+| Time range | `15m` | `get_time_range()` |
+| Metric name | `fake_metric_total` | `get_metric_name()` |
+| Metric type | `counter` | `get_metric_type()` |
+| Number of labels | 3 | `get_base_config()` |
+| Values per label | 20 | `get_base_config()` |
+| Dataset type | `zipf` | `get_base_config()` |
+
+## Future Extensions
+
+Potential enhancements:
+- Support for multiple aggregation labels: `by (label_0, label_1)`
+- Variable time ranges: random selection from [5m, 15m, 30m, 1h]
+- Custom distribution weights via CLI: `--weights 0.3,0.2,0.2,0.1,0.1,0.1`
+- Template-based configuration for exporters
+- Support for gauge metrics alongside counters
+- Additional distributions (zipf, exponential with configurable decay)
+- Label selector support: `{label_0="value1"}`
diff --git a/Utilities/experiments/arroyo_throughput_monitor.py b/Utilities/experiments/arroyo_throughput_monitor.py
new file mode 100644
index 0000000..4b4ca91
--- /dev/null
+++ b/Utilities/experiments/arroyo_throughput_monitor.py
@@ -0,0 +1,275 @@
+#!/usr/bin/env python3
+"""
+Standalone Arroyo throughput monitoring script.
+Runs on the CloudLab host to monitor Arroyo pipeline metrics.
+"""
+
+import argparse
+import json
+import os
+import time
+import requests
+from datetime import datetime
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Monitor Arroyo pipeline throughput")
+    parser.add_argument("--pipeline_id", required=True, help="Pipeline ID to monitor")
+    parser.add_argument(
+        "--output_dir", required=True, help="Output directory for metrics"
+    )
+    parser.add_argument(
+        "--interval", type=int, default=1, help="Polling interval in seconds"
+    )
+    parser.add_argument(
+        "--api_url", default="http://localhost:5115", help="Arroyo API base URL"
+    )
+
+    args = parser.parse_args()
+
+    args.api_url = (
+        args.api_url + "/api" if not args.api_url.endswith("/api") else args.api_url
+    )
+
+    # Create output directory
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    metrics_to_collect = ["bytes_recv", "bytes_sent", "messages_recv", "messages_sent"]
+
+    print(f"Starting throughput monitoring for pipeline {args.pipeline_id}")
+
+    # Get job IDs for this pipeline
+    job_ids = get_pipeline_job_ids(args.api_url, args.pipeline_id)
+    if not job_ids:
+        print(f"Warning: No jobs found for pipeline {args.pipeline_id}")
+        return
+
+    print(f"Found jobs: {job_ids}")
+
+    # Dump pipeline graph for understanding node indices
+    dump_pipeline_graph(args.api_url, args.pipeline_id, args.output_dir)
+
+    # Single file for all timestamps
+    output_file = os.path.join(
+        args.output_dir, f"throughput_metrics_{args.pipeline_id}.json"
+    )
+
+    try:
+        while True:
+            timestamp = datetime.now().isoformat()
+            all_jobs_metrics = {
+                "timestamp": timestamp,
+                "pipeline_id": args.pipeline_id,
+                "jobs": [],
+            }
+
+            for job_id in sorted(job_ids):  # Sort job IDs for predictable order
+                try:
+                    # Get metrics for this job
+                    url = f"{args.api_url}/v1/pipelines/{args.pipeline_id}/jobs/{job_id}/operator_metric_groups"
+                    response = requests.get(url, timeout=10)
+                    response.raise_for_status()
+
+                    metrics_data = response.json()
+
+                    # Process metrics for this job
+                    processed_metrics = process_metrics(
+                        metrics_data,
+                        timestamp,
+                        args.pipeline_id,
+                        job_id,
+                        metrics_to_collect,
+                    )
+                    all_jobs_metrics["jobs"].append(processed_metrics)
+
+                except Exception as e:
+                    print(f"Error collecting metrics for job {job_id}: {e}")
+                    # Still add the job with error info
+                    all_jobs_metrics["jobs"].append(
+                        {
+                            "timestamp": timestamp,
+                            "pipeline_id": args.pipeline_id,
+                            "job_id": job_id,
+                            "error": str(e),
+                            "metrics": {},
+                        }
+                    )
+
+            # Append to single file
+            append_metrics_to_file(all_jobs_metrics, output_file)
+
+            time.sleep(args.interval)
+
+    except KeyboardInterrupt:
+        print("Monitoring stopped")
+
+
+def get_pipeline_job_ids(api_url, pipeline_id):
+    """Get all job IDs for the pipeline."""
+    try:
+        url = f"{api_url}/v1/pipelines/{pipeline_id}/jobs"
+        response = requests.get(url, timeout=10)
+        response.raise_for_status()
+
+        pipeline_data = response.json()
+        jobs = pipeline_data.get("data", {})
+        job_ids = [job.get("id") for job in jobs if job.get("id")]
+
+        return job_ids
+
+    except Exception as e:
+        print(f"Error getting job IDs for pipeline {pipeline_id}: {e}")
+        return []
+
+
+def dump_pipeline_graph(api_url, pipeline_id, output_dir):
+    """Dump the pipeline graph to understand node indices."""
+    try:
+        url = f"{api_url}/v1/pipelines/{pipeline_id}"
+        response = requests.get(url, timeout=10)
+        response.raise_for_status()
+
+        pipeline_data = response.json()
+
+        # Create a clean structure for the pipeline graph
+        graph_info = {
+            "pipeline_id": pipeline_id,
+            "pipeline_name": pipeline_data.get("name"),
+            "created_at": pipeline_data.get("createdAt"),
+            "action": pipeline_data.get("action"),
+            "action_in_progress": pipeline_data.get("actionInProgress"),
+            "stop": pipeline_data.get("stop"),
+            "query": pipeline_data.get("query"),
+            "preview": pipeline_data.get("preview"),
+            "checkpoint_interval_micros": pipeline_data.get("checkpointIntervalMicros"),
+            "nodes": {},
+            "edges": [],
+        }
+
+        # Extract node information from the graph
+        graph = pipeline_data.get("graph", {})
+        nodes = graph.get("nodes", [])
+
+        for node in nodes:
+            node_id = node.get("nodeId")
+            if node_id is not None:
+                graph_info["nodes"][str(node_id)] = {
+                    "node_id": node_id,
+                    "operator": node.get("operator"),
+                    "description": node.get("description", ""),
+                    "parallelism": node.get("parallelism", 1),
+                }
+
+        # Extract edge information to understand data flow
+        edges = graph.get("edges", [])
+        for edge in edges:
+            edge_info = {
+                "src_id": edge.get("srcId"),
+                "dest_id": edge.get("destId"),
+                "edge_type": edge.get("edgeType"),
+                "key_type": edge.get("keyType"),
+                "value_type": edge.get("valueType"),
+            }
+            graph_info["edges"].append(edge_info)
+
+        # Extract UDF information if present
+        udfs = pipeline_data.get("udfs", [])
+        if udfs:
+            graph_info["udfs"] = []
+            for udf in udfs:
+                udf_info = {
+                    "definition": udf.get("definition"),
+                    "language": udf.get("language"),
+                }
+                graph_info["udfs"].append(udf_info)
+
+        # Save pipeline graph info
+        graph_file = os.path.join(output_dir, f"pipeline_graph_{pipeline_id}.json")
+        with open(graph_file, "w") as f:
+            json.dump(graph_info, f, indent=2)
+
+        print(f"Pipeline graph saved to {graph_file}")
+
+    except Exception as e:
+        print(f"Error dumping pipeline graph: {e}")
+
+
+def process_metrics(metrics_data, timestamp, pipeline_id, job_id, metrics_to_collect):
+    """Process raw metrics data into a structured format."""
+    processed = {
+        "timestamp": timestamp,
+        "pipeline_id": pipeline_id,
+        "job_id": job_id,
+        "metrics": {},
+    }
+
+    try:
+        data = metrics_data.get("data", [])
+
+        for node_data in data:
+            node_id = node_data.get("nodeId")
+            metric_groups = node_data.get("metricGroups", [])
+
+            for metric_group in metric_groups:
+                metric_name = metric_group.get("name")
+
+                if metric_name in metrics_to_collect:
+                    if metric_name not in processed["metrics"]:
+                        processed["metrics"][metric_name] = []
+
+                    subtasks = metric_group.get("subtasks", [])
+                    for subtask in subtasks:
+                        subtask_index = subtask.get("index")
+                        metrics = subtask.get("metrics", [])
+
+                        if metrics:
+                            # Get the latest metric value
+                            latest_metric = max(metrics, key=lambda m: m.get("time", 0))
+                            processed["metrics"][metric_name].append(
+                                {
+                                    "node_id": node_id,
+                                    "subtask_index": subtask_index,
+                                    "value": latest_metric.get("value", 0),
+                                    "time": latest_metric.get("time", 0),
+                                }
+                            )
+
+        # Sort metrics within each metric type for predictable order
+        for metric_name in processed["metrics"]:
+            processed["metrics"][metric_name].sort(
+                key=lambda x: (x["node_id"], x["subtask_index"])
+            )
+
+    except Exception as e:
+        print(f"Error processing metrics: {e}")
+        processed["error"] = str(e)
+
+    return processed
+
+
+def append_metrics_to_file(metrics_entry, output_file):
+    """Append metrics entry to a single JSON file containing all timestamps."""
+    try:
+        # Load existing data if file exists
+        if os.path.exists(output_file):
+            with open(output_file, "r") as f:
+                data = json.load(f)
+        else:
+            data = {"pipeline_id": metrics_entry["pipeline_id"], "measurements": []}
+
+        # Add new measurement
+        data["measurements"].append(metrics_entry)
+
+        # Keep measurements sorted by timestamp
+        data["measurements"].sort(key=lambda x: x["timestamp"])
+
+        # Write back to file
+        with open(output_file, "w") as f:
+            json.dump(data, f, indent=2)
+
+    except Exception as e:
+        print(f"Error appending metrics to file: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/classes/ProcessMonitorHook.py b/Utilities/experiments/classes/ProcessMonitorHook.py
new file mode 100644
index 0000000..167e784
--- /dev/null
+++ b/Utilities/experiments/classes/ProcessMonitorHook.py
@@ -0,0 +1,39 @@
+from abc import ABC, abstractmethod
+from typing import Any, Optional
+
+
+class ProcessMonitorHook(ABC):
+    """
+    Abstract parent class for any hooks in process_monitor
+    """
+
+    @abstractmethod
+    def init(self):
+        pass
+
+    @abstractmethod
+    def update(self, value: Any):
+        pass
+
+    @abstractmethod
+    def close(self):
+        pass
+
+
+class ProcessMetricSnapshot:
+    """
+    Class for providing hooks with a consistent format for a single measurement
+    for a single process
+    """
+
+    def __init__(
+        self,
+        pid: int,
+        value: Any,
+        keyword: Optional[str] = None,
+        monitor_name: Optional[str] = None,
+    ):
+        self.pid = pid
+        self.keyword = keyword
+        self.monitor_name = monitor_name
+        self.value = value
diff --git a/Utilities/experiments/classes/QueryCostExporter.py b/Utilities/experiments/classes/QueryCostExporter.py
new file mode 100644
index 0000000..ca372b6
--- /dev/null
+++ b/Utilities/experiments/classes/QueryCostExporter.py
@@ -0,0 +1,328 @@
+from prometheus_client import start_http_server, Gauge
+from loguru import logger
+from typing import Dict, List, Tuple
+from classes.ProcessMonitorHook import ProcessMonitorHook, ProcessMetricSnapshot
+import classes.query_cost as query_cost
+from classes.query_cost import CostModelOption, CostModel
+
+
+class QueryCostExporterHook(ProcessMonitorHook):
+    """
+    Wrapper class for the QueryCostExporter
+    """
+
+    def __init__(
+        self,
+        monitor_to_models_map: Dict[str, List[CostModelOption]],
+        addr: str,
+        port: int,
+    ):
+        self.port = port
+        self.addr = addr
+        self.monitor_to_models_map = monitor_to_models_map
+        self.exporter = None
+
+    def init(self):
+        """
+        Instantiates the cost exporter and launches it for exporting
+        """
+        self.exporter = QueryCostExporter(
+            self.monitor_to_models_map, self.addr, self.port
+        )
+        self.exporter.launch()
+
+    def update(self, val):
+        """
+        Updates exporter metrics using the given value
+        """
+        if self.exporter is not None:
+            self.exporter.export_recent_measurement(val)
+        else:
+            raise RuntimeError(
+                "Exporter is None, remember to call init() before using this hook"
+            )
+
+    def close(self):
+        """
+        Shuts down the cost exporter
+        """
+        if self.exporter is not None:
+            self.exporter.shutdown()
+        else:
+            raise RuntimeError(
+                "Error closing hook, exporter is None. Did you remember to call init()?"
+            )
+
+
+class QueryCostExporter:
+
+    @staticmethod
+    def _IP_valid(addr):
+        """
+        Verifies that a given ip address is of the correct type and is a "valid"
+        IP address for running the exporter. At the moment, this function considers
+        any properly formatted IP address as valid.
+        """
+        if not isinstance(addr, str):
+            raise TypeError("IP address must be a string")
+
+        if addr == "localhost":
+            return
+
+        addr_nums = addr.split(sep=".")
+        if len(addr_nums) != 4:
+            raise ValueError("Improperly formatted IPv4 address")
+        for num_str in addr_nums:
+            if int(num_str) < 0 or int(num_str) > 255:
+                raise ValueError("Improperly formatted IPv4 address")
+        return
+
+    @staticmethod
+    def _port_valid(port):
+        """
+        Verifies that a given port is of the correct type and is a "valid"
+        port to be used by the exporter. At the moment, this function considers
+        any properly formatted port as valid
+        """
+        if not isinstance(port, int):
+            raise TypeError("Port must be an integer")
+        if port < 0 or port > 65535:
+            raise ValueError("Improperly formatted port")
+
+        return
+
+    @staticmethod
+    def _monitor_to_models_map_valid(monitor_to_models_map):
+        """
+        Verifies that the monitor_to_models_map given during object creation
+        is valid, e.g. is a dictionary with valid keys and values
+        """
+        # Check map itself (Correct type, non-empty)
+        if monitor_to_models_map is None:
+            raise TypeError("Monitor to cost models map is None.")
+        elif not isinstance(monitor_to_models_map, dict):
+            raise TypeError("Monitor to cost models map must be a dictionary.")
+        elif len(monitor_to_models_map) == 0:
+            raise ValueError("Monitor to cost models map must not be empty.")
+
+        # Check key-value pairs (Correct types, each monitor has at least one cost model)
+        for monitor in monitor_to_models_map:
+            if not isinstance(monitor, str):
+                raise TypeError("Monitor names in the map must be given as strings.")
+
+            cost_models = monitor_to_models_map[monitor]
+
+            if cost_models is None:
+                raise TypeError(f"Cost model list for {monitor} is None.")
+            elif not isinstance(cost_models, list):
+                raise TypeError(
+                    f"Cost models for {monitor} must be given as a list of CostModelOption."
+                )
+            elif len(cost_models) == 0:
+                raise ValueError(f"Cost model list for {monitor} is empty")
+
+            for model in cost_models:
+                if not isinstance(model, type(CostModelOption.NO_TRANSFORM)):
+                    raise TypeError(
+                        f"List of cost models for {monitor} contains one or more element that is not a CostModelOption"
+                    )
+
+    # NOTE: Implementation only uses prometheus Gauges
+    @staticmethod
+    def _create_prom_metric(
+        monitor_metric_name: str, cost_model: CostModelOption, metric_labels: List[str]
+    ) -> Gauge:
+        """
+        Creates a single prometheus metric for a single monitor (e.g. cpu_percent) and
+        one of the cost functions applied to it. The name of the metric as seen by prometheus
+        will be "<monitor_metric_name>_<cost model enumeration name>", e.g.
+        "cpu_percent_NO_TRANSFORM"
+        """
+        prom_metric_name = "{}_{}".format(monitor_metric_name, cost_model.name)
+        prom_description = "{}({})".format(cost_model.name, monitor_metric_name)
+
+        return Gauge(prom_metric_name, prom_description, metric_labels)
+
+    # NOTE Only uses prometheus gauges for metrics at the moment
+    @staticmethod
+    def _init_prom_metrics(
+        monitor_to_models_map,
+    ) -> Dict[str, List[Tuple[CostModel, Gauge]]]:
+        """
+        Creates a dictionary which maps the name of a monitor to a list of tuples,
+        where each tuple contains a cost model object as the first element
+        and the corresponding prometheus metric as the second element,
+        e.g. Dict = {"cpu_percent": [(cost_model, Gauge), ...]}
+        """
+        prometheus_metrics = {}
+
+        for monitor_metric in monitor_to_models_map:
+            models_and_prom_metrics = []
+            for cost_model_option in monitor_to_models_map[monitor_metric]:
+                cost_model = query_cost.create_model(cost_model_option)
+                prom_metric = QueryCostExporter._create_prom_metric(
+                    monitor_metric, cost_model_option, ["keyword", "PID"]
+                )
+                model_and_prom_metric = (cost_model, prom_metric)
+                models_and_prom_metrics.append(model_and_prom_metric)
+
+            prometheus_metrics[monitor_metric] = models_and_prom_metrics
+
+        return prometheus_metrics
+
+    def __init__(
+        self,
+        monitor_to_models_map: Dict[str, List[CostModelOption]],
+        addr: str,
+        port: int,
+    ):
+        self.logger = logger.bind(module="query_cost_exporter")
+
+        self.port = port
+        self.addr = addr
+        self.monitor_to_models_map = monitor_to_models_map
+
+        self.http_server = None
+        self.server_thread = None
+
+        # Verify input parameters
+        try:
+            QueryCostExporter._IP_valid(self.addr)
+            QueryCostExporter._port_valid(self.port)
+            QueryCostExporter._monitor_to_models_map_valid(self.monitor_to_models_map)
+        except (TypeError, ValueError) as e:
+            self.logger.error(f"Failed to create QueryCostExporter: {str(e)}")
+            e.add_note("Failed to create QueryCostExporter object")
+            raise e
+
+        self.prometheus_metrics_map = QueryCostExporter._init_prom_metrics(
+            self.monitor_to_models_map
+        )
+        self.logger.info("QueryCostExporter object created")
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        self.shutdown()
+
+    def launch(self):
+        """
+        Launches the exporter's http_server and server thread for exporting metrics
+        to be scraped by Prometheus
+        """
+        if self.addr is None:
+            self.logger.error("Launch failed: Exporter IP address is None")
+            raise RuntimeError("Cost exporter failed to launch: exporter IP is None")
+
+        if self.port is None:
+            self.logger.error("Launch failed: Exporter port is None")
+            raise RuntimeError("Cost exporter failed to launch: exporter port is None")
+
+        self.logger.info(f"Launching cost exporter at {self.addr}:{self.port}...")
+
+        try:
+            self.http_server, self.server_thread = start_http_server(
+                addr=self.addr, port=self.port
+            )
+        except Exception as e:
+            self.logger.error(f"Failed to start http server due to exception: {str(e)}")
+            e.add_note("Cost exporter failed to launch")
+            raise e
+
+        self.logger.info(f"Exporter successfully started at {self.addr}:{self.port}")
+        print(f"Exporter running at {self.addr}:{self.port}")
+
+        return
+
+    def shutdown(self):
+        """
+        Cleans up all resources associated with the exporter, mainly the
+        http_server and corresponding server thread
+        """
+        print("Shutting down cost exporter server and joining server thread...")
+
+        self.logger.info("Shutting down server...")
+        if self.http_server is not None:
+            try:
+                self.http_server.shutdown()
+            except Exception as e:
+                self.logger.error(f"Error shutting down http_server: {str(e)}")
+                e.add_note("Attempt to shutdown cost exporter http_server failed.")
+                raise e
+            self.logger.info("Shut down server successfully")
+        else:
+            self.logger.error("Exporter http_server is None")
+            raise RuntimeError("Cost exporter http_server is None")
+
+        self.logger.info("Joining server thread...")
+        if self.server_thread is not None:
+            try:
+                self.server_thread.join()
+            except Exception as e:
+                self.logger.error(f"Error joining server thread: {str(e)}")
+                e.add_note("Attempt to join exporter's server thread failed.")
+                raise e
+            self.logger.info("Joined server thread successfully")
+        else:
+            self.logger.error("Exporter server thread is None")
+            raise RuntimeError("Cost exporter server thread is None")
+
+        print("Exporter shut down successfully")
+        return
+
+    # NOTE: This function is blocking. Exporting the new information requires
+    #       the calling thread to perform all cost modelling calculations,
+    #       so be wary when using cost models which take substantial time to
+    #       compute
+    def export_recent_measurement(self, iteration_info: List[ProcessMetricSnapshot]):
+        """
+        Takes a list of snapshots for every process and monitor from the most
+        recent iteration in process_monitor
+        """
+        if iteration_info is None:
+            raise TypeError("Failed to export iteration, iteration_info is None")
+        elif not isinstance(iteration_info, list):
+            raise TypeError("iteration_info must be a list of ProcessMetricSnapshots")
+
+        for snapshot in iteration_info:
+            self.export_snapshot(snapshot)
+
+    # NOTE: Function logic currently assumes all prometheus metrics are Gauges
+    # NOTE: This function is blocking. Since this function makes the necessary
+    #       calls to compute costs, beware of cost models which take a while to
+    #       compute
+    def export_snapshot(self, snapshot: ProcessMetricSnapshot):
+        """
+        Updates all prometheus metrics corresponding to the given monitor. The
+        function applies the corresponding cost function to the given value
+        before exporting
+        """
+        if snapshot is None:
+            self.logger.error("Exporter given None snapshot")
+            raise TypeError("Attempt to export a None snapshot")
+        elif not isinstance(snapshot, ProcessMetricSnapshot):
+            self.logger.error("Wrong argument")
+            raise TypeError(
+                "export_snapshot() argument must be a ProcessMetricSnapshot"
+            )
+
+        pid = snapshot.pid
+        keyword = snapshot.keyword
+        monitor_name = snapshot.monitor_name
+        measurement = snapshot.value
+        self.logger.trace(
+            f"Updating for pid={pid}, keyword={keyword}, monitor_name={monitor_name}, measurement={measurement}"
+        )
+
+        if monitor_name in self.prometheus_metrics_map:
+            metric_list = self.prometheus_metrics_map[monitor_name]
+            for cost_model, prometheus_metric in metric_list:
+                # NOTE: For a computation like a sum, the cost is being computed
+                #       using every measurement, i.e. across all PIDs and keywords,
+                #       so PID and keyword labels are meaningless in these cases.
+                cost = cost_model.compute(measurement)
+                if cost is not None and prometheus_metric is not None:
+                    prometheus_metric.labels(keyword=keyword, PID=pid).set(cost)
+
+        return
diff --git a/Utilities/experiments/classes/__init__.py b/Utilities/experiments/classes/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/Utilities/experiments/classes/process_monitor.py b/Utilities/experiments/classes/process_monitor.py
new file mode 100644
index 0000000..228c877
--- /dev/null
+++ b/Utilities/experiments/classes/process_monitor.py
@@ -0,0 +1,161 @@
+import multiprocessing
+import psutil
+import traceback
+from typing import List, Any
+from classes.ProcessMonitorHook import ProcessMonitorHook, ProcessMetricSnapshot
+
+
+class MyMonitor(multiprocessing.Process):
+    def __init__(
+        self,
+        pids_to_monitor,
+        keywords,
+        pipe,
+        interval,
+        monitors,
+        hooks: List[ProcessMonitorHook],
+        include_children=False,
+    ):
+        super(MyMonitor, self).__init__()
+        self.pids_to_monitor = pids_to_monitor
+        self.keywords = keywords
+        self.pipe = pipe
+        self.interval = interval
+        self.monitors = monitors
+        self.hooks = hooks
+        self.include_children = include_children
+
+        assert len(self.pids_to_monitor) == len(self.keywords)
+
+        self.psutil_handles = {pid: psutil.Process(pid) for pid in self.pids_to_monitor}
+
+        self.pid_monitor_map = {}
+        for pid, keyword in zip(self.pids_to_monitor, self.keywords):
+            self.pid_monitor_map[pid] = {m: [] for m in self.monitors}
+            self.pid_monitor_map[pid]["keyword"] = keyword
+
+    def add_child_pid_to_map(self, pid, child_pid):
+        self.pid_monitor_map[child_pid] = {m: [] for m in self.monitors}
+        self.pid_monitor_map[child_pid]["keyword"] = self.pid_monitor_map[pid][
+            "keyword"
+        ]
+
+    def init_hooks(self):
+        """
+        Initialize all process monitor hooks, e.g. starting exporter servers, etc
+        """
+        if self.hooks is not None and len(self.hooks) > 0:
+            for hook in self.hooks:
+                hook.init()
+        return
+
+    # TODO Determine whether there should be ability to update certain hooks either
+    #      while updating pid monitor map (i.e. per process basis), after updating
+    #      entire process map, or both
+    def update_hooks(self, value: Any):
+        """
+        Update all process monitor hooks using the given value
+        """
+        if self.hooks is not None and len(self.hooks) > 0:
+            for hook in self.hooks:
+                hook.update(value)
+        return
+
+    def close_hooks(self):
+        """
+        Cleanup any resources associated with process monitor hooks
+        """
+        if self.hooks is not None and len(self.hooks) > 0:
+            for hook in self.hooks:
+                hook.close()
+        return
+
+    def update_pid_monitor_map(self, p) -> List[ProcessMetricSnapshot]:
+        # if p.pid not in self.pid_monitor_map:
+        #     self.pid_monitor_map[p.pid] = {m: [] for m in self.monitors}
+        iteration_info = []
+        measurement = p.as_dict(attrs=self.monitors)
+        for monitor in self.monitors:
+            value = None
+            if monitor == "memory_info":
+                value = measurement[monitor].rss
+                self.pid_monitor_map[p.pid][monitor].append(value)
+            else:
+                value = measurement[monitor]
+                self.pid_monitor_map[p.pid][monitor].append(value)
+
+            snapshot = ProcessMetricSnapshot(
+                p.pid, value, self.pid_monitor_map[p.pid]["keyword"], monitor
+            )
+            iteration_info.append(snapshot)
+
+        return iteration_info
+
+    def run(self):
+        # NOTE: Possibility of init() (and close()) being called more than once if multiple
+        #       processes get started up that were passed the same reference
+        #       of the list of hooks
+        self.init_hooks()
+        self.pipe.send("ready")
+        stop = False
+
+        try:
+            while True:
+                iteration_info = []  # list of process snapshots from this iteration
+                for pid, p in self.psutil_handles.items():
+                    iteration_info += self.update_pid_monitor_map(p)
+                    if self.include_children:
+                        for child in p.children(recursive=True):
+                            if child.pid not in self.pid_monitor_map:
+                                self.add_child_pid_to_map(pid, child.pid)
+                            iteration_info += self.update_pid_monitor_map(child)
+
+                self.update_hooks(iteration_info)
+                stop = self.pipe.poll(self.interval)
+                if stop:
+                    break
+
+            self.pipe.send(self.pid_monitor_map)
+            self.close_hooks()
+
+        except Exception as e:
+            print(f"Error in monitor process: {e}")
+            print(traceback.format_exc())
+            self.close_hooks()
+            self.pipe.close()
+
+
+def start_monitor(
+    pids_to_monitor,
+    keywords,
+    monitoring_interval,
+    monitor_metrics,
+    include_children,
+    hooks: List[ProcessMonitorHook],
+):
+    control_pipe, monitor_pipe = multiprocessing.Pipe()
+    monitor = MyMonitor(
+        pids_to_monitor,
+        keywords,
+        monitor_pipe,
+        monitoring_interval,
+        monitor_metrics,
+        hooks,
+        include_children=include_children,
+    )
+    monitor.start()
+    control_pipe.recv()
+    return monitor, control_pipe, monitor_pipe
+
+
+def stop_monitor(monitor, control_pipe, monitor_pipe):
+    control_pipe.send("stop")
+    can_read = control_pipe.poll(10)
+    if can_read:
+        monitor_info = control_pipe.recv()
+        monitor.join()
+    else:
+        monitor_info = None
+        monitor.terminate()
+        monitor.join()
+    return monitor_info
diff --git a/Utilities/experiments/classes/query_cost.py b/Utilities/experiments/classes/query_cost.py
new file mode 100644
index 0000000..627b993
--- /dev/null
+++ b/Utilities/experiments/classes/query_cost.py
@@ -0,0 +1,136 @@
+"""
+Rather than instantiating any of these cost models directly, it is preferred
+for the user to use create_model(CostModelOption, *args) to initialize the cost model.
+
+When implementing a new model, the abstract CostModel() class should be used
+as a parent class. Once a new model is implemented, it should be added to the
+CostModelOption enum and the create_model function.
+"""
+
+from abc import ABC, abstractmethod
+from enum import Enum, auto
+from typing import Any
+
+
+# flake8: noqa
+# Enum for available cost models
+class CostModelOption(Enum):
+    """
+    Enumeration of implemented cost models.
+    """
+
+    NO_TRANSFORM = auto()
+    SUM = auto()
+    ARITHMETIC_AVG = auto()
+
+
+class CostModel(ABC):
+    """
+    Abstract class representing any cost model. All implemented cost models
+    must be a child class of this abstract class.
+    """
+
+    @abstractmethod
+    def __init__(self):
+        """
+        Any initial setup for models which require it. Usually, these are
+        models which maintain some sort of state
+        """
+        pass
+
+    @abstractmethod
+    def compute(self, x: Any) -> Any:
+        """
+        Absract method for updating a cost model (if it has memory). It must
+        return the output of the model after updating
+        """
+        pass
+
+
+class NoTransform(CostModel):
+    """
+    CostModel which applies no transformation when computing, i.e. calls to
+    compute simply return the input argument
+    """
+
+    def __init__(self):
+        pass
+
+    def compute(self, x: Any):
+        return x
+
+    @property
+    def name(self):
+        return "NO_TRANSFORM"
+
+
+# NOTE: Assumes scalar inputs (e.g. int and float)
+class Sum(CostModel):
+    """
+    Model to represent the running sum of all samples
+    """
+
+    def __init__(self):
+        self.sum = 0
+
+    def compute(self, x: Any) -> Any:
+        """
+        Returns the sum of x and all previous values
+        """
+        if x is None:
+            raise TypeError("Input argument cannot be None")
+        self.sum += x
+        return self.sum
+
+    @property
+    def name(self):
+        return "SUM"
+
+
+# NOTE: Assumes scalar inputs (e.g. int and float)
+class ArithmeticAverage(CostModel):
+    """
+    Model to represent a running average across all samples
+    """
+
+    def __init__(self):
+        self.average = 0
+        self.n = 0
+
+    def compute(self, x: Any) -> Any:
+        """
+        Computes and returns the new average after including x
+
+        Updates the internal average
+        """
+        if x is None:
+            raise TypeError("Input argument cannot be None")
+
+        self.n += 1
+        self.average = self.average * (self.n - 1) / self.n + x / self.n
+        return self.average
+
+    @property
+    def name(self):
+        return "ARITHMETIC_AVG"
+
+
+def create_model(cost_model_option: CostModelOption, *args):
+    """
+    Given a CostModelOption, initialize and return the corresponding cost model.
+        *args is to provide a CostModel with additional creation arguments if
+        the particular model takes additional parameters during creation
+    """
+    if cost_model_option is None:
+        raise TypeError("cost_model_option cannot be None")
+    elif not isinstance(cost_model_option, type(CostModelOption.NO_TRANSFORM)):
+        raise TypeError("First argument, cost_model_option, must be a CostModelOption")
+
+    if cost_model_option == CostModelOption.NO_TRANSFORM:
+        return NoTransform()
+    elif cost_model_option == CostModelOption.SUM:
+        return Sum()
+    elif cost_model_option == CostModelOption.ARITHMETIC_AVG:
+        return ArithmeticAverage()
+    else:
+        raise ValueError("Given cost model option not implemented.")
diff --git a/Utilities/experiments/cluster_data_exporter/Cargo.toml b/Utilities/experiments/cluster_data_exporter/Cargo.toml
new file mode 100644
index 0000000..914730b
--- /dev/null
+++ b/Utilities/experiments/cluster_data_exporter/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "cluster_data_exporter"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+prometheus = "0.14.0"
+tokio = { version = "1", features = ["full"] }
+hyper = { version = "1", features = ["full"] }
+hyper-util = { version = "0.1", features = ["full"] }
+lazy_static = "1.5"
+csv = "1.3"
+serde = { version = "1.0", features = ["derive"] }
+concurrent-queue = "2.5.0"
+flate2 = "1.1.2"
+clap = { version = "4.5.41", features = ["derive"] }
\ No newline at end of file
diff --git a/Utilities/experiments/cluster_data_exporter/README.txt b/Utilities/experiments/cluster_data_exporter/README.txt
new file mode 100644
index 0000000..2697350
--- /dev/null
+++ b/Utilities/experiments/cluster_data_exporter/README.txt
@@ -0,0 +1,161 @@
+CLUSTER DATA EXPORTER
+=====================
+
+A Prometheus exporter that exposes cluster resource usage metrics from Google and Alibaba cluster trace datasets.
+
+DESCRIPTION
+===========
+
+This exporter reads CSV data from certain datasets provided by Google or Alibaba and exposes them as Prometheus metrics. The exporter supports both Google task resource usage data from 2011 and Alibaba node and microservice resource data from 2021 and 2022. Instructions for downloading this data are linked in this document.
+
+INSTALLATION
+============
+
+Prerequisites
+-------------
+
+- Rust 1.70+ (edition 2021)
+- Access to Google or Alibaba cluster datasets
+
+Building
+--------
+
+cargo build --release
+
+USAGE
+=====
+
+cluster_data_exporter -i <input_directory> -p <port> <provider> [OPTIONS]
+
+Google Provider
+---------------
+
+cluster_data_exporter -i ./google/clusterdata-2011/ -p 8080 google [OPTIONS]
+
+Alibaba Provider
+----------------
+
+cluster_data_exporter -i ./alibaba/2021/ -p 8080 alibaba [OPTIONS]
+
+DATA SOURCES
+============
+
+Google Cluster Data
+-------------------
+
+Instructions on how to download the Google Cluster 2011 task usage data:
+https://github.com/google/cluster-data/blob/master/ClusterData2011_2.md
+
+The only part of the dataset used by the exporter is the task_usage section, so there's no need to install the whole dataset
+
+Expected directory structure:
+path/to/task/resource/usage/dir/
+├── part-00000-of-00500.csv.gz
+├── part-00001-of-00500.csv.gz
+└── ...
+
+Alibaba Cluster Data
+--------------------
+
+Instructions on downloading the Alibaba microservice trace datasets:
+    - 2021: https://github.com/alibaba/clusterdata/blob/master/cluster-trace-microservices-v2021/README.md#introduction-of-trace-data
+    - 2022: https://github.com/alibaba/clusterdata/tree/master/cluster-trace-microservices-v2022#trace-data-download
+
+The only parts of the datasets used by the exporter are the Node and MSResource sections, the rest can be discarded. 
+
+Expected directory structure (after preprocessing):
+
+2021 Data:
+path/to/Node/
+├── Node_0.csv.gz
+├── Node_1.csv.gz
+└── ...
+
+path/to/MSResource/
+├── MSResource_0.csv.gz
+├── MSResource_1.csv.gz
+└── ...
+
+2022 Data:
+path/to/NodeMetrics/
+├── NodeMetrics_0.csv.gz
+├── NodeMetrics_1.csv.gz
+└── ...
+
+path/to/MSMetrics/
+├── MSMetrics_0.csv.gz
+├── MSMetrics_1.csv.gz
+└── ...
+
+DATA PREPROCESSING FOR ALIBABA
+===============================
+
+IMPORTANT: Before running the exporter on Alibaba data, you must run the preprocessing script to sort the data by timestamp and recompress it as a .csv.gz:
+
+./bin/alibaba/sort_and_format.sh <alibaba_data_directory> --year <2021|2022> [-n] [-m]
+
+This script extracts, sorts by timestamp, and recompresses the Alibaba CSV files in a format the exporter can read (.csv.gz). The sorting is necessary because some datasets (mainly 2022 data) are not sorted by timestamp, which is required for proper metric export timing.
+
+Input Directory Structure
+-------------------------
+
+The input directory should contain one or both of the subdirectories with unprocessed files, i.e. the untouched /data/ directory created from running the fetchData.sh scripts from the Alibaba github repos. For example:
+
+alibaba/2021/data/
+├── Node/
+│   ├── Node_0.tar.gz
+│   ├── Node_1.tar.gz
+│   └── ...
+└── MSResource/
+    ├── MSResource_0.tar.gz
+    ├── MSResource_1.tar.gz
+    └── ...
+
+alibaba/2022/data/
+├── NodeMetrics/
+│   ├── NodeMetrics_0.tar.gz
+│   ├── NodeMetrics_1.tar.gz
+│   └── ...
+└── MSMetrics/
+    ├── MSMetrics_0.tar.gz
+    ├── MSMetrics_1.tar.gz
+    └── ...
+
+Examples:
+
+# Process 2021 Node data
+./bin/alibaba/sort_and_format.sh alibaba/2021/data --year 2021 -n
+
+# Process 2021 MSResource data
+./bin/alibaba/sort_and_format.sh alibaba/2021/data --year 2021 -m
+
+# Process both Node and MSResource data for 2021
+./bin/alibaba/sort_and_format.sh alibaba/2021/data --year 2021 -n -m
+
+COMMAND LINE ARGUMENTS
+======================
+
+- -i, --input-directory: Path to the directory containing CSV data files
+- -p, --port: Port number for the HTTP server
+
+Provider-specific Options
+-------------------------
+
+Google
+------
+- --metrics: Specific metrics to export from task resource usage data
+- --all-parts: Process all CSV parts (default behavior)
+- --part-index: Process only a specific part index (0-499)
+
+Alibaba
+-------
+- --data-type: Type of data to export (node or msresource)
+- --data-year: Year of the dataset (2021 or 2022)
+- --all-parts: Process all CSV parts (default behavior)
+- --part-index: Process only a specific part index
+
+METRICS ENDPOINT
+================
+
+Once running, metrics are available at:
+http://localhost:<port>/metrics
diff --git a/Utilities/experiments/cluster_data_exporter/bin/alibaba/sort_and_format.sh b/Utilities/experiments/cluster_data_exporter/bin/alibaba/sort_and_format.sh
new file mode 100755
index 0000000..8de1aec
--- /dev/null
+++ b/Utilities/experiments/cluster_data_exporter/bin/alibaba/sort_and_format.sh
@@ -0,0 +1,191 @@
+#!/bin/bash
+
+# Script to process alibaba Node and MSResource data files with year-specific configurations 
+# Usage: ./sort_and_format.sh <input_directory> --year <year> [-n] [-m]
+
+usage() {
+    echo "Usage: $0 <input_directory> --year <year> [-n] [-m]"
+    echo "  <input_directory>  Path to directory containing data subdirectories"
+    echo "  --year <year>      Year of data (2021 or 2022) - REQUIRED"
+    echo "  -n                 Clean Node csv files and recompress as .csv.gz"
+    echo "  -m                 Clean MSResource/MSMetrics csv files and recompress as .csv.gz"
+    echo "  At least one of -n or -m must be specified"
+    echo ""
+    echo "Year-specific configurations:"
+    echo "  2022: Uses NodeMetrics/ and MSMetrics/ subdirectories with NodeMetrics_*.tar.gz and MSMetrics_*.tar.gz files"
+    echo "        Timestamp in first column for both"
+    echo "  2021: Uses Node/ and MSResource/ subdirectories with Node_*.tar.gz and MSResource_*.tar.gz files"
+    echo "        Timestamp in second column for Node data, seventh column for MSResource data"
+    exit 1
+}
+
+# Function to process files in a given directory with a specific pattern
+process_files() {
+    local subdir="$1"
+    local pattern="$2"
+    local timestamp_col="$3"
+    local full_path="${INPUT_DIR}/${subdir}"
+    
+    if [[ ! -d "$full_path" ]]; then
+        echo "Warning: Directory $full_path does not exist, skipping..."
+        return
+    fi
+    
+    echo "Processing files in $full_path"
+    
+    # Find all files matching the pattern, sorted by index
+    local files=($(ls "$full_path"/${pattern}_*.tar.gz 2>/dev/null | sort -V))
+    
+    if [[ ${#files[@]} -eq 0 ]]; then
+        echo "No files matching ${pattern}_*.tar.gz found in $full_path"
+        return
+    fi
+    
+    echo "Found ${#files[@]} files in $subdir:"
+    
+    for file in "${files[@]}"; do
+        echo "Processing: $(basename "$file")"
+        
+        # Create temporary directory for processing
+        local temp_dir=$(mktemp -d)
+        local base_name=$(basename "$file" .tar.gz)
+        
+        echo "  -> Extracting $file to temporary directory..."
+        if ! tar -xzf "$file" -C "$temp_dir"; then
+            echo "  -> Error: Failed to extract $file"
+            rm -rf "$temp_dir"
+            continue
+        fi
+        
+        # Find the extracted CSV file
+        local csv_file=$(find "$temp_dir" -name "*.csv" -type f | head -1)
+        if [[ -z "$csv_file" ]]; then
+            echo "  -> Error: No CSV file found in extracted archive"
+            rm -rf "$temp_dir"
+            continue
+        fi
+        
+        # Check if file is already sorted using sort -c
+        echo "  -> Checking if file is already sorted..."
+        if tail -n +2 "$csv_file" | sort -t',' -k${timestamp_col},${timestamp_col}n -c 2>/dev/null; then
+            echo "  -> File is already sorted, skipping sort step"
+        else
+            echo "  -> Sorting CSV file by timestamp (column $timestamp_col)..."
+            # Use external sort for memory efficiency with large files
+            # Preserve header line by extracting first line, sorting the rest, then combining
+            # -t',' specifies comma as field separator
+            # -k${timestamp_col},${timestamp_col}n sorts by specified field numerically
+            # -S 1G uses 1GB of memory for sorting (adjust if needed)
+            # --temporary-directory ensures temp files go to a writable location
+            local sorted_file="${temp_dir}/sorted.csv"
+            if ! (head -n 1 "$csv_file"; tail -n +2 "$csv_file" | sort -t',' -k${timestamp_col},${timestamp_col}n -S 1G --temporary-directory="$temp_dir") > "$sorted_file"; then
+                echo "  -> Error: Failed to sort CSV file"
+                rm -rf "$temp_dir"
+                continue
+            fi
+            mv "$sorted_file" "$csv_file"
+        fi
+        
+        echo "  -> Compressing sorted file..."
+        local output_file="${full_path}/${base_name}.csv.gz"
+        if ! gzip -c "$csv_file" > "$output_file"; then
+            echo "  -> Error: Failed to compress sorted file"
+            rm -rf "$temp_dir"
+            continue
+        fi
+        
+        echo "  -> Successfully processed: $(basename "$output_file")"
+        
+        # Clean up temporary directory
+        rm -rf "$temp_dir"
+    done
+}
+
+# Parse command line arguments
+if [[ $# -lt 4 ]]; then
+    usage
+fi
+
+INPUT_DIR="$1"
+shift
+
+YEAR=""
+PROCESS_NODE=false
+PROCESS_MS=false
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --year)
+            YEAR="$2"
+            if [[ "$YEAR" != "2021" && "$YEAR" != "2022" ]]; then
+                echo "Error: Year must be either 2021 or 2022"
+                usage
+            fi
+            shift 2
+            ;;
+        -n)
+            PROCESS_NODE=true
+            shift
+            ;;
+        -m)
+            PROCESS_MS=true
+            shift
+            ;;
+        *)
+            echo "Unknown option: $1"
+            usage
+            ;;
+    esac
+done
+
+# Validate required arguments
+if [[ -z "$YEAR" ]]; then
+    echo "Error: --year parameter is required"
+    usage
+fi
+
+# Validate input directory
+if [[ ! -d "$INPUT_DIR" ]]; then
+    echo "Error: Input directory '$INPUT_DIR' does not exist"
+    exit 1
+fi
+
+# Check that at least one flag is specified
+if [[ "$PROCESS_NODE" == false && "$PROCESS_MS" == false ]]; then
+    echo "Error: At least one of -n or -m must be specified"
+    usage
+fi
+
+echo "Input directory: $INPUT_DIR"
+echo "Year: $YEAR"
+echo "Process Node data: $PROCESS_NODE"
+echo "Process MSResource data: $PROCESS_MS"
+echo
+
+# Configure year-specific settings
+if [[ "$YEAR" == "2022" ]]; then
+    NODE_SUBDIR="NodeMetrics"
+    MS_SUBDIR="MSMetrics"
+    NODE_PATTERN="NodeMetrics"
+    MS_PATTERN="MSMetrics"
+    NODE_TIMESTAMP_COL=1
+    MS_TIMESTAMP_COL=1
+else  # 2021
+    NODE_SUBDIR="Node"
+    MS_SUBDIR="MSResource"
+    NODE_PATTERN="Node"
+    MS_PATTERN="MSResource"
+    NODE_TIMESTAMP_COL=2
+    MS_TIMESTAMP_COL=7
+fi
+
+# Process files based on flags
+if [[ "$PROCESS_NODE" == true ]]; then
+    process_files "$NODE_SUBDIR" "$NODE_PATTERN" "$NODE_TIMESTAMP_COL"
+fi
+
+if [[ "$PROCESS_MS" == true ]]; then
+    process_files "$MS_SUBDIR" "$MS_PATTERN" "$MS_TIMESTAMP_COL"
+fi
+
+echo "Processing complete!"
\ No newline at end of file
diff --git a/Utilities/experiments/cluster_data_exporter/src/alibaba_metrics.rs b/Utilities/experiments/cluster_data_exporter/src/alibaba_metrics.rs
new file mode 100644
index 0000000..36c0a90
--- /dev/null
+++ b/Utilities/experiments/cluster_data_exporter/src/alibaba_metrics.rs
@@ -0,0 +1,65 @@
+use clap::ValueEnum;
+use std::sync::OnceLock;
+
+type BoxedErr = Box<dyn std::error::Error + Send + Sync + 'static>;
+
+#[derive(Copy, Clone, Debug, ValueEnum)]
+pub enum MsDataType {
+    // BM Node runtime information.
+    // It records CPU and memory utilization of 1300+ BM nodes in a production cluster.
+    Node,
+    // MS runtime information.
+    // It records CPU and memory utilization of 90000+ containers for 1300+ MSs in the same production cluster.
+    MsResource,
+}
+
+pub mod ms_resource;
+pub mod node;
+
+// The type of microservice data to export. Should be initialized before any
+// reading or exporting begins
+pub static EXPORTER_DATA_TYPE: OnceLock<MsDataType> = OnceLock::new();
+
+/// @brief Calls the export_from_queue() function based on runtime initialized
+///        EXPORTER_DATA_TYPE
+pub fn export_from_queue() {
+    match EXPORTER_DATA_TYPE.get().unwrap() {
+        MsDataType::Node => node::export_from_queue(),
+        MsDataType::MsResource => ms_resource::export_from_queue(),
+    }
+}
+
+/// @brief Main routine for the thread that will be reading csv data and
+/// exporting. This function just uses a match statement to call the reading
+/// and exporting routine required by the specified mode
+///
+/// @param[in] input_dir  The input directory containing csv files
+/// @param[in] all_parts  Whether to start from part 0 of csv files and continue
+///                       until no more files are found. This should be false if
+///                       part_index is Some(part)
+/// @param[in] part_index Which csv file part to use as the data source.
+///                       This should be None if all_parts is true.
+/// @param[in] data_type  The type of data out of the different types of trace
+///                       data in the Alibaba micro-services trace data
+/// @param[in] data_year  The year of the trace data. Supported values are
+///                       2021 and 2022
+///
+/// @return The result returned by the reader thread.
+pub fn reader_thread_routine(
+    input_dir: String,
+    all_parts: bool,
+    part_index: Option<u16>,
+    data_type: MsDataType,
+    data_year: u32,
+) -> Result<(), BoxedErr> {
+    use crate::alibaba_metrics::node;
+    let _ = EXPORTER_DATA_TYPE.set(data_type);
+    let result = match EXPORTER_DATA_TYPE.get().unwrap() {
+        MsDataType::Node => node::read_and_queue(&input_dir, all_parts, part_index, data_year),
+        MsDataType::MsResource => {
+            ms_resource::read_and_queue(&input_dir, all_parts, part_index, data_year)
+        }
+    };
+
+    result
+}
diff --git a/Utilities/experiments/cluster_data_exporter/src/alibaba_metrics/ms_resource.rs b/Utilities/experiments/cluster_data_exporter/src/alibaba_metrics/ms_resource.rs
new file mode 100644
index 0000000..81227e0
--- /dev/null
+++ b/Utilities/experiments/cluster_data_exporter/src/alibaba_metrics/ms_resource.rs
@@ -0,0 +1,244 @@
+use crate::utilities;
+pub use concurrent_queue::ConcurrentQueue;
+use csv::Reader;
+use flate2::read::GzDecoder;
+use lazy_static::lazy_static;
+use prometheus::{register_gauge_vec, GaugeVec};
+use std::fs::File;
+use std::io::BufReader;
+use std::thread;
+use std::time::Duration;
+
+const FILENAME_PARTS_2021: [&str; 2] = ["MSResource_", ".csv.gz"];
+const FILENAME_PARTS_2022: [&str; 2] = ["MSMetrics_", ".csv.gz"];
+
+const DATA_QUEUE_CAP: usize = 400_000;
+const QUEUE_POLL_INTERVAL_MS: u64 = 250;
+const CSV_DELIMITER: u8 = b',';
+const LABELS: [&str; 3] = ["ms_name", "ms_instance_id", "node_id"];
+
+type CsvGzReader<File> = Reader<GzDecoder<BufReader<File>>>;
+type BoxedErr = Box<dyn std::error::Error + Send + Sync + 'static>;
+/// Struct for holding fields after deserialization
+/// for both 2021 and 2022
+#[derive(Debug, serde::Deserialize)]
+pub struct MsResourceCsvFields {
+    #[serde(rename = "", skip)]
+    _trace: u64,
+
+    #[serde(rename = "timestamp")]
+    timestamp: u64,
+
+    #[serde(rename = "nodeid")]
+    node_id: String,
+
+    #[serde(rename = "msname")]
+    ms_name: String,
+
+    #[serde(rename = "msinstanceid")]
+    ms_instance_id: String,
+
+    #[serde(alias = "instance_cpu_usage", alias = "cpu_utilization")]
+    cpu_usage: Option<f64>,
+
+    #[serde(alias = "instance_memory_usage", alias = "memory_utilization")]
+    memory_usage: Option<f64>,
+}
+
+lazy_static! {
+    pub static ref MS_RESOURCE_DATA_QUEUE: ConcurrentQueue<MsResourceCsvFields> =
+        ConcurrentQueue::bounded(DATA_QUEUE_CAP);
+    pub static ref CPU_USAGE: GaugeVec = register_gauge_vec!(
+        "alibaba_microservice_cpu_usage",
+        "Cpu usages for microservices by alibaba nodes",
+        &LABELS,
+    )
+    .unwrap();
+    pub static ref MEMORY_USAGE: GaugeVec = register_gauge_vec!(
+        "alibaba_microservice_memory_usage",
+        "Memory usages for microservices by alibaba nodes",
+        &LABELS,
+    )
+    .unwrap();
+}
+
+/// @brief Gets the filename for the MsResource csv data based on the year
+/// and the index number
+///
+/// @param[in] year     The year of the trace data. Supported values are 2021
+///                     and 2022
+/// @param[in] index_no The index of the csv file
+///
+/// @return A String of the filename based on the data year and index num
+fn get_filename(year: u32, index_no: u16) -> String {
+    let mut filename: String = String::new();
+    let prefix: &str;
+    let suffix: &str;
+    let index: &str = &format!("{}", index_no);
+
+    match year {
+        2021 => {
+            prefix = FILENAME_PARTS_2021[0];
+            suffix = FILENAME_PARTS_2021[1];
+        }
+        2022 => {
+            prefix = FILENAME_PARTS_2022[0];
+            suffix = FILENAME_PARTS_2022[1];
+        }
+        _ => {
+            panic!("Invalid year: {}", year);
+        }
+    }
+    filename.push_str(prefix);
+    filename.push_str(index);
+    filename.push_str(suffix);
+
+    filename
+}
+
+/// @brief Gets a csv reader for MsResource data
+///
+/// @param[in] input_dir The directory containing the csv file
+/// @param[in] year      Which trace data year to create the reader for.
+///                      supported years are 2021 and 2022
+/// @param[in] index     The index of the csv file
+///
+/// @return A Result type containing either the reader or an Error if the file
+///         cannot be found
+pub fn get_reader(input_dir: &str, year: u32, index: u16) -> Result<CsvGzReader<File>, BoxedErr> {
+    use csv::ReaderBuilder;
+    use std::path::Path;
+
+    let filename: String = get_filename(year, index);
+    let file_path = Path::new(input_dir).join(&filename);
+    let fd: File = File::open(file_path)?;
+    let buf_rdr: BufReader<File> = BufReader::new(fd);
+    let gz_decoder: GzDecoder<BufReader<File>> = GzDecoder::new(buf_rdr);
+
+    let csv_rdr: CsvGzReader<File> = ReaderBuilder::new()
+        .delimiter(CSV_DELIMITER)
+        .flexible(true)
+        .has_headers(true)
+        .from_reader(gz_decoder);
+
+    Ok(csv_rdr)
+}
+
+/// @brief Routine for reading MSResource csv data and enqueuing it
+///
+/// @param[in] input_dir  The input directory containing the csv file
+/// @param[in] all_parts  Whether or not to read all csv files in the
+///                       directory, starting from part 0. Once a file
+///                       cannot be found, this will return. This should
+///                       be false if a part_index is given.
+/// @param[in] part_index The part index for a single csv file to use as
+///                       the data source. This should be None if all_parts
+///                       is true.
+/// @param[in] year       The year of the trace data. Supported values are
+///                       2021 and 2022
+///
+/// @pre All csv files are uncompressed
+/// @pre If all_parts is specified, at least part 0 must exist
+/// @pre Either all_parts is true and part_index is None, or all_parts is
+///      false and part_index is Some(part)
+pub fn read_and_queue(
+    input_dir: &str,
+    all_parts: bool,
+    part_index: Option<u16>,
+    year: u32,
+) -> Result<(), BoxedErr> {
+    let mut part: u16 = 0;
+    if !all_parts {
+        part = part_index.unwrap();
+    }
+
+    while let Ok(mut rdr) = get_reader(input_dir, year, part) {
+        let csv_iter = rdr.deserialize();
+        for csv_line in csv_iter {
+            while MS_RESOURCE_DATA_QUEUE.is_full() {
+                thread::sleep(Duration::from_millis(QUEUE_POLL_INTERVAL_MS));
+            }
+            let parsed_line: MsResourceCsvFields = csv_line?;
+            let _ = MS_RESOURCE_DATA_QUEUE.push(parsed_line);
+        }
+        part += 1;
+        if !all_parts {
+            break;
+        }
+    } // No more files to read, or couldn't find initial file
+
+    if part == 0 {
+        // Reading always starts at part 0
+        panic!(
+            "Failed to read initial .csv.gz file. Check that all data files
+             are named in the correct format (2021: '{}<idx>{}', 2022: '{}<idx>{}),
+             and that the csv files contian the field headers at the top
+            ",
+            FILENAME_PARTS_2021[0],
+            FILENAME_PARTS_2021[1],
+            FILENAME_PARTS_2022[0],
+            FILENAME_PARTS_2022[1]
+        );
+    } else {
+        MS_RESOURCE_DATA_QUEUE.close();
+        Ok(())
+    }
+}
+
+/// @brief Takes the timestamp of a trace in milliseconds and
+///        returns the normalized time as a Duration
+///
+/// @param[in] time_millis The trace timestamp in milliseconds
+///
+/// @return The normalized timestamp as a Duration
+///
+/// @NOTE: Brief check of data suggests no dilation is necessary
+///
+/// @NOTE: MSResource data from 2022 is not sorted by timestamp whatsoever,
+/// sometimes the data is listed in order of decreasing timestamp and other
+/// times it's listed in order of increasing timestamp, so the timestamps
+/// are modified to work with the exporter before being queued
+pub fn get_normalized_start_time(time_millis: u64) -> Duration {
+    Duration::from_millis(time_millis)
+}
+
+/// @brief Exports a single line from the MS_RESOURCE_DATA_QUEUE
+///
+/// @param[in] csv_line A parsed line from a MsResource csv file
+pub fn export_line(csv_line: MsResourceCsvFields) {
+    let label_vals: [&str; 3] = [
+        csv_line.ms_name.as_str(),
+        csv_line.ms_instance_id.as_str(),
+        csv_line.node_id.as_str(),
+    ];
+
+    if let Some(cpu_usage) = csv_line.cpu_usage {
+        CPU_USAGE.with_label_values(&label_vals).set(cpu_usage);
+    }
+
+    if let Some(memory_usage) = csv_line.memory_usage {
+        MEMORY_USAGE
+            .with_label_values(&label_vals)
+            .set(memory_usage);
+    }
+}
+
+/// @brief Exports lines from the queue until a line is found with a timestamp
+///        later than the current runtime. This function will terminate the
+///        the program once the queue has both been closed by the reader thread
+///        and the queue is empty
+pub fn export_from_queue() {
+    let elapsed_t: Duration = utilities::get_time_elapsed();
+    let check_time =
+        |line: &MsResourceCsvFields| get_normalized_start_time(line.timestamp) <= elapsed_t;
+    MS_RESOURCE_DATA_QUEUE
+        .try_iter()
+        .take_while(check_time)
+        .for_each(export_line);
+
+    // No more files to read and empty queue
+    if MS_RESOURCE_DATA_QUEUE.is_closed() && MS_RESOURCE_DATA_QUEUE.is_empty() {
+        println!("No more MSResource data to export, shutting down...");
+        std::process::exit(0);
+    }
+}
diff --git a/Utilities/experiments/cluster_data_exporter/src/alibaba_metrics/node.rs b/Utilities/experiments/cluster_data_exporter/src/alibaba_metrics/node.rs
new file mode 100644
index 0000000..79668c0
--- /dev/null
+++ b/Utilities/experiments/cluster_data_exporter/src/alibaba_metrics/node.rs
@@ -0,0 +1,228 @@
+use crate::utilities;
+use concurrent_queue::ConcurrentQueue;
+use csv::{Reader, ReaderBuilder};
+use flate2::read::GzDecoder;
+use lazy_static::lazy_static;
+use prometheus::{register_gauge_vec, GaugeVec};
+use std::fs::File;
+use std::io::BufReader;
+use std::path::Path;
+use std::thread;
+use std::time::Duration;
+
+type BoxedErr = Box<dyn std::error::Error + Send + Sync + 'static>;
+type CsvGzReader<File> = Reader<GzDecoder<BufReader<File>>>;
+
+const FILENAME_PARTS_2021: [&str; 2] = ["Node_", ".csv.gz"];
+const FILENAME_PARTS_2022: [&str; 2] = ["NodeMetrics_", ".csv.gz"];
+
+const DATA_QUEUE_CAP: usize = 400_000;
+const QUEUE_POLL_INTERVAL_MS: u64 = 250;
+const CSV_DELIMITER: u8 = b',';
+
+const LABELS: [&str; 1] = ["node_id"];
+
+/// Struct for holding fields after deserialization
+#[derive(Debug, serde::Deserialize)]
+pub struct NodeCsvFields {
+    #[serde(rename = "", skip)]
+    _trace: u64,
+
+    #[serde(rename = "timestamp")]
+    timestamp: u64,
+
+    #[serde(rename = "nodeid")]
+    node_id: String,
+
+    #[serde(alias = "node_cpu_usage", alias = "cpu_utilization")]
+    cpu_usage: Option<f64>,
+
+    #[serde(alias = "node_memory_usage", alias = "memory_utilization")]
+    memory_usage: Option<f64>,
+}
+
+lazy_static! {
+    pub static ref NODE_DATA_QUEUE: ConcurrentQueue<NodeCsvFields> =
+        ConcurrentQueue::bounded(DATA_QUEUE_CAP);
+    pub static ref CPU_USAGE: GaugeVec = register_gauge_vec!(
+        "alibaba_node_cpu_usage",
+        "Cpu usages by alibaba nodes",
+        &LABELS,
+    )
+    .unwrap();
+    pub static ref MEMORY_USAGE: GaugeVec = register_gauge_vec!(
+        "alibaba_node_memory_usage",
+        "Memory usages by alibaba nodes",
+        &LABELS,
+    )
+    .unwrap();
+}
+
+/// @brief Gets the filename for the Node_<idx>.csv.gz data based on the year
+/// and the index number
+///
+/// @param[in] year     The year of the trace data. Supported values are 2021
+///                     and 2022
+/// @param[in] index_no The index of the csv file
+///
+/// @return A String of the filename based on the data year and index num
+fn get_filename(year: u32, index_no: u16) -> String {
+    let mut filename: String = String::new();
+    let prefix: &str;
+    let suffix: &str;
+    let index: &str = &format!("{}", index_no);
+
+    match year {
+        2021 => {
+            prefix = FILENAME_PARTS_2021[0];
+            suffix = FILENAME_PARTS_2021[1];
+        }
+        2022 => {
+            prefix = FILENAME_PARTS_2022[0];
+            suffix = FILENAME_PARTS_2022[1];
+        }
+        _ => {
+            panic!("Invalid year: {}", year);
+        }
+    }
+    filename.push_str(prefix);
+    filename.push_str(index);
+    filename.push_str(suffix);
+
+    filename
+}
+
+/// @brief Gets a csv reader for Node data
+///
+/// @param[in] input_dir The directory containing the csv file
+/// @param[in] year      Which trace data year to create the reader for.
+///                      supported years are 2021 and 2022
+///
+/// @return A reader for the .csv.gz files
+///
+/// @pre All files should have been converted to a .csv.gz format from the
+///      .tar.gz format that they come as initially.
+pub fn get_reader(
+    input_dir: &str,
+    year: u32,
+    index_no: u16,
+) -> Result<CsvGzReader<File>, BoxedErr> {
+    let filename = get_filename(year, index_no);
+    let file_path = Path::new(input_dir).join(&filename);
+    let fd: File = File::open(file_path)?;
+    let buf_rdr: BufReader<File> = BufReader::new(fd);
+    let gz_decoder: GzDecoder<BufReader<File>> = GzDecoder::new(buf_rdr);
+
+    let csv_rdr: CsvGzReader<File> = ReaderBuilder::new()
+        .delimiter(CSV_DELIMITER)
+        .flexible(true)
+        .has_headers(true)
+        .from_reader(gz_decoder);
+
+    Ok(csv_rdr)
+}
+
+/// @brief Takes the timestamp of a trace in milliseconds and
+///        returns the normalized time as a Duration
+///
+/// @param[in] time_millis The trace timestamp in milliseconds
+///
+/// @return The normalized timestamp as a Duration
+///
+/// @NOTE: Brief check of data suggests no dilation is necessary
+///
+/// @NOTE: Node data from 2022 is not sorted by timestamp whatsoever,
+/// sometimes the data is listed in order of decreasing timestamp and other
+/// times it's listed in order of increasing timestamp, so the timestamps
+/// are modified to work with the exporter before being queued
+pub fn get_normalized_start_time(time_millis: u64) -> Duration {
+    Duration::from_millis(time_millis)
+}
+
+/// @brief Reads the csv data from .csv.gz files and adds them to the queue.
+///
+/// @param[in] input_dir The input directory
+/// @param[in] data_year The year of the trace data
+///
+/// @pre All csv data should have been sorted by timestamp and compressed with
+///      gzip
+pub fn read_and_queue(
+    input_dir: &str,
+    all_parts: bool,
+    part_index: Option<u16>,
+    data_year: u32,
+) -> Result<(), BoxedErr> {
+    let mut part: u16 = 0;
+    if !all_parts {
+        part = part_index.unwrap();
+    }
+
+    while let Ok(mut rdr) = get_reader(input_dir, data_year, part) {
+        let csv_iter = rdr.deserialize();
+        for csv_line in csv_iter {
+            while NODE_DATA_QUEUE.is_full() {
+                thread::sleep(Duration::from_millis(QUEUE_POLL_INTERVAL_MS));
+            }
+            let parsed_line: NodeCsvFields = csv_line?;
+            let _ = NODE_DATA_QUEUE.push(parsed_line);
+        } // EOF
+        part += 1;
+
+        if !all_parts {
+            break;
+        }
+    } // No more files to read, or couldn't find initial file
+
+    if part == 0 {
+        // Reading always starts at part 0
+        panic!(
+            "Failed to read initial .csv.gz file. Check that all data files
+             are named in the correct format (2021: '{}<idx>{}', 2022: '{}<idx>{}),
+             and that the csv files contain the field headers at the top.
+            ",
+            FILENAME_PARTS_2021[0],
+            FILENAME_PARTS_2021[1],
+            FILENAME_PARTS_2022[0],
+            FILENAME_PARTS_2022[1]
+        );
+    } else {
+        NODE_DATA_QUEUE.close();
+        Ok(())
+    }
+}
+
+/// @brief Exports a single line from the NODE_DATA_QUEUE
+///
+/// @param[in] csv_line A parsed line from a Node csv file
+pub fn export_line(csv_line: NodeCsvFields) {
+    let label_vals: [&str; 1] = [csv_line.node_id.as_str()];
+
+    if let Some(cpu_usage) = csv_line.cpu_usage {
+        CPU_USAGE.with_label_values(&label_vals).set(cpu_usage);
+    }
+
+    if let Some(memory_usage) = csv_line.memory_usage {
+        MEMORY_USAGE
+            .with_label_values(&label_vals)
+            .set(memory_usage);
+    }
+}
+
+/// @brief Exports lines from the queue until a line is found with a timestamp
+///        later than the current runtime. This function will terminate the
+///        the program once the queue has both been closed by the reader thread
+///        and the queue is empty
+pub fn export_from_queue() {
+    let elapsed_t: Duration = utilities::get_time_elapsed();
+    let check_time = |line: &NodeCsvFields| get_normalized_start_time(line.timestamp) <= elapsed_t;
+    NODE_DATA_QUEUE
+        .try_iter()
+        .take_while(check_time)
+        .for_each(export_line);
+
+    // No more files to read and empty queue
+    if NODE_DATA_QUEUE.is_closed() && NODE_DATA_QUEUE.is_empty() {
+        println!("No more Node data to export, shutting down...");
+        std::process::exit(0);
+    }
+}
diff --git a/Utilities/experiments/cluster_data_exporter/src/google_metrics.rs b/Utilities/experiments/cluster_data_exporter/src/google_metrics.rs
new file mode 100644
index 0000000..6c7c53e
--- /dev/null
+++ b/Utilities/experiments/cluster_data_exporter/src/google_metrics.rs
@@ -0,0 +1,451 @@
+use crate::utilities;
+use crate::utilities::*;
+use clap::ValueEnum;
+use concurrent_queue::ConcurrentQueue;
+use csv::Reader;
+use flate2::read::GzDecoder;
+use lazy_static::lazy_static;
+use prometheus::{register_gauge_vec, GaugeVec};
+use std::sync::OnceLock;
+use std::thread;
+use std::time::Duration;
+use std::{fs::File, io::BufReader};
+
+type CsvGzReader<File> = Reader<GzDecoder<BufReader<File>>>;
+
+/* Standard labels for google's task resource usage data */
+const TRU_LABELS: [&str; 3] = ["job_id", "task_index", "machine_id"];
+const TRU_CSV_DELIMITER: u8 = b',';
+const DATA_QUEUE_CAP: usize = 400_000; // Max lines in the queue
+const CSV_MAX_PART_NO: u16 = 500;
+
+const MICRO_SECONDS_PER_SECOND: u64 = 1_000_000;
+const T_OFFSET_SECS: u64 = 600;
+const DILATION_FACTOR: u64 = 10; // Factor for scaling time stamps relative to when they are exported
+
+/// Each line of the csv file is serialized into the following struct.
+/// The ordering of the struct fields MUST match the order that fields
+/// appear in a line of the csv file.
+///
+/// All fields wrapped in Option<> are not considered mandatory by
+/// the schema and, therefore, may be missing from a given trace.
+/// The rest of the fields should never be missing, so failure to
+/// deserialize will result in an error and program termination
+#[derive(Debug, serde::Deserialize)]
+pub struct TruCsvFields {
+    pub start_time: u64,
+    pub _end_time: u64,     // unused, only here for parsing
+    pub job_id: String,     // label
+    pub task_index: String, // label
+    pub machine_id: String, // label
+    pub mean_cpu_usage_rate: Option<f64>,
+    pub canonical_memory_usage: Option<f64>,
+    pub assigned_memory_usage: Option<f64>,
+    pub unmapped_page_cache_memory_usage: Option<f64>,
+    pub total_page_cache_memory_usage: Option<f64>,
+    pub max_memory_usage: Option<f64>,
+    pub mean_disk_io_time: Option<f64>,
+    pub mean_local_disk_space_used: Option<f64>,
+    pub max_cpu_usage: Option<f64>,
+    pub max_disk_io_time: Option<f64>,
+    pub cycles_per_instruction: Option<f64>,
+    pub memory_accesses_per_instruction: Option<f64>,
+    pub sample_portion: Option<f64>,
+    pub aggregation_type: Option<u8>, // Divides metrics into two
+    pub sampled_cpu_usage: Option<f64>,
+}
+
+/// @brief An enum for matching the metrics to export with their
+/// corresponding prometheus gauges
+#[derive(Copy, Clone, Debug, ValueEnum)]
+pub enum TruMetrics {
+    MeanCpuUsageRate,
+    CanonicalMemoryUsage,
+    AssignedMemoryUsage,
+    UnmappedPageCacheMemoryUsage,
+    TotalPageCacheMemoryUsage,
+    MaxMemoryUsage,
+    MeanDiskIoTime,
+    MeanLocalDiskSpaceUsed,
+    MaxCpuUsage,
+    MaxDiskIoTime,
+    CyclesPerInstruction,
+    MemoryAccessesPerInstruction,
+    SamplePortion,
+    SampledCpuUsage,
+}
+
+/// @brief A tuple struct representing two of the same prometheus metrics,
+/// but partitioned by their aggregation type. Index number directly
+/// corresponds to the aggregation type, i.e. i=0 => aggregation_type=0
+pub struct GaugePair(GaugeVec, GaugeVec);
+
+impl GaugePair {
+    /// @brief Create and register both GaugeVecs in the GaugePair to the
+    /// default registry.
+    ///
+    /// @param[in] base_name The string used as the base of both metrics
+    /// names as seen by prometheus, where aggregation type will be appended
+    ///
+    /// @param[in] base_help The string used as the base of both metrics
+    /// help strings when scraped by prometheus. Aggregation type is
+    /// appended
+    fn new(base_name: &str, base_help: &str) -> GaugePair {
+        let mut name_0 = String::from(base_name);
+        name_0.push_str("_0");
+        let mut help_0 = String::from(base_help);
+        help_0.push_str(" (aggregation_type=0)");
+        let gauge_0 = register_gauge_vec!(name_0.as_str(), help_0.as_str(), &TRU_LABELS).unwrap();
+
+        let mut name_1 = String::from(base_name);
+        name_1.push_str("_1");
+        let mut help_1 = String::from(base_help);
+        help_1.push_str(" (aggregation_type=1)");
+        let gauge_1 = register_gauge_vec!(name_1.as_str(), help_1.as_str(), &TRU_LABELS).unwrap();
+
+        GaugePair(gauge_0, gauge_1)
+    }
+
+    /// @brief Retrieve a static reference to the gauge from the pair for
+    /// the given aggregation type
+    ///
+    /// @param[in] self             Statically defined GaugePair
+    /// @param[in] aggregation_type 0 or 1 (The aggregation type)
+    fn get(&'static self, aggregation_type: u8) -> &'static GaugeVec {
+        match aggregation_type {
+            0 => &self.0,
+            1 => &self.1,
+            _ => panic!("Invalid index into gauge vec"),
+        }
+    }
+}
+
+/// List of metrics to export from the google task resource usage data
+pub static GOOGLE_METRICS: OnceLock<Vec<TruMetrics>> = OnceLock::new();
+
+lazy_static! {
+    /// Queue for parsed csv lines
+    pub static ref GOOGLE_DATA_QUEUE: ConcurrentQueue<TruCsvFields> = ConcurrentQueue::bounded(DATA_QUEUE_CAP);
+
+    /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+         *                          ALL METRICS                              *
+         *                                                                   *
+         *  Each static reference is a GaugePair corresponding to a single   *
+         *  metric. Each element of the pair corresponds to an aggregation   *
+         *  type of 0 or 1. When the aggregation type is missing from a      *
+         *  trace the aggregation type defaults to 0                         *
+         *                                                                   *
+         * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+    pub static ref MEAN_CPU_USAGE_RATE_PAIR: GaugePair = GaugePair::new(
+        "mean_cpu_usage_rate", "Mean cpu usage rate by google machines",
+    );
+
+    pub static ref CANONICAL_MEMORY_USAGE_PAIR: GaugePair = GaugePair::new(
+       "canonical_memory_usage", "Canonical memory usage by google cluster machines",
+    );
+
+    pub static ref ASSIGNED_MEMORY_USAGE_PAIR: GaugePair = GaugePair::new(
+        "assigned_memory_usage", "Assigned memory usage for google cluster machines",
+    );
+
+    pub static ref UNMAPPED_PAGE_CACHE_MEMORY_USAGE_PAIR: GaugePair = GaugePair::new(
+        "unmapped_page_cache_memory_usage", "Unmapped page cache memory usage for google cluster machines",
+    );
+
+    pub static ref TOTAL_PAGE_CACHE_MEMORY_USAGE_PAIR: GaugePair = GaugePair::new(
+        "total_page_cache_memory_usage", "Total page cache memory usage for google cluster machines",
+    );
+
+    pub static ref MAX_MEMORY_USAGE_PAIR: GaugePair = GaugePair::new(
+        "max_memory_usage", "Maximum memory usage by google cluster machines",
+    );
+
+    pub static ref MEAN_DISK_IO_TIME_PAIR: GaugePair = GaugePair::new(
+        "mean_disk_io_time", "Mean disk I/O time for google cluster machines",
+    );
+
+    pub static ref MEAN_LOCAL_DISK_SPACE_USED_PAIR: GaugePair = GaugePair::new(
+        "mean_local_disk_space_used", "Mean local disk space used by google cluster machines",
+    );
+
+    pub static ref MAX_CPU_USAGE_PAIR: GaugePair = GaugePair::new(
+        "max_cpu_usage", "Maximum cpu usage for google cluster machines",
+    );
+
+    pub static ref MAX_DISK_IO_TIME_PAIR: GaugePair = GaugePair::new(
+        "max_disk_io_time", "Maximum disk I/O time for google cluster machines",
+    );
+
+    pub static ref CYCLES_PER_INSTRUCTION_PAIR: GaugePair = GaugePair::new(
+        "cycles_per_instruction", "Cycles per instruction for google cluster machines",
+    );
+
+    pub static ref MEMORY_ACCESSES_PER_INSTRUCTION_PAIR: GaugePair = GaugePair::new(
+        "memory_accesses_per_instruction", "Memory accesses per instruction for google cluster machines",
+    );
+
+    pub static ref SAMPLE_PORTION_PAIR: GaugePair = GaugePair::new(
+        "sample_portion", "Sample portion for google cluster machines",
+    );
+
+    pub static ref SAMPLED_CPU_USAGE_PAIR: GaugePair = GaugePair::new(
+        "sampled_cpu_usage", "Sampled cpu usage for google cluster machines",
+    );
+}
+
+/// @brief Given the part number, create a String for the filename.
+///
+/// @param[in] part    The csv part number such that: part ∈ [0, 500]
+/// @param[in] gzipped Whether or not .gz should be appended to the filename
+///
+/// @return The csv filename as a String, in the form:
+///             <part-00xxx-of-00500.csv(.gz)>
+pub fn get_csv_filename(part: u16, gzipped: bool) -> String {
+    const TRU_CSV_PATH_PARTS: [&str; 4] = ["part-", "00000", "-of-00500.csv", ".gz"];
+
+    let mut filename = String::new();
+    let part_name_str: String;
+
+    if part < 10 {
+        part_name_str = format!("0000{}", part);
+    } else if (10..100).contains(&part) {
+        part_name_str = format!("000{}", part);
+    } else if (100..=CSV_MAX_PART_NO).contains(&part) {
+        part_name_str = format!("00{}", part);
+    } else {
+        panic!(
+            "Invalid part number: {} => part must be between 0 and 500",
+            part
+        );
+    }
+
+    filename.push_str(TRU_CSV_PATH_PARTS[0]);
+    filename.push_str(&part_name_str);
+    filename.push_str(TRU_CSV_PATH_PARTS[2]);
+
+    if gzipped {
+        filename.push_str(TRU_CSV_PATH_PARTS[3]);
+    }
+
+    filename
+}
+
+/// @brief Creates a new csv reader wrapped around a gzip decoder which
+/// streams data from the underlying file
+///
+/// @param[in] input_dir The directory containing gzipped csv files
+/// @param[in] part      The part number out of the total number of csv files
+///
+/// @return The configured reader
+fn get_reader(input_dir: &str, part: u16) -> Result<CsvGzReader<File>, BoxedErr> {
+    use csv::ReaderBuilder;
+    use flate2::read::GzDecoder;
+    use std::fs::File;
+    use std::io::BufReader;
+    use std::path::Path;
+
+    let filename: String = get_csv_filename(part, true);
+    let file_path = Path::new(input_dir).join(&filename);
+    let fd: File = File::open(file_path)?;
+    let buf_rdr = BufReader::new(fd);
+    let gz_decoder = GzDecoder::new(buf_rdr);
+
+    let csv_rdr: CsvGzReader<File> = ReaderBuilder::new()
+        .delimiter(TRU_CSV_DELIMITER)
+        .flexible(true)
+        .has_headers(false)
+        .from_reader(gz_decoder);
+
+    Ok(csv_rdr)
+}
+
+/// @brief Main routine of the helper (reader) thread.
+///
+/// The purpose of the thread is to handle all of the work involved in
+///     reading and enqueuing lines from the csv.gz file for the
+///     main thread to then pop and export on scrape
+///
+/// @param[in] input_dir  The path to the directory containing the csv.gz files
+/// @param[in] all_parts  Whether or not to run the exporter on all 500 parts of
+///                       the task resource usage csv data. Running in this mode
+///                       and not providing all 500 parts will cause the reader
+///                       thread to panic. If this option is true, part_index
+///                       should be None
+/// @param[in] part_index Specify a single part (out of 500) to read csv data
+///                       from. The reader thread will stop after reading this
+///                       single file. If part_index is not None, then all_parts
+///                       should be false
+/// @param[in] metrics    The list of metrics, or csv fields, for the exporter
+///                       to expose to prometheus. At least one must be given
+///
+/// @pre All csv files are expected to be of the form:
+///                 "part-00xxx-of-00500.csv.gz"
+pub fn reader_thread_routine(
+    input_dir: String,
+    all_parts: bool,
+    part_index: Option<u16>,
+    metrics: Vec<TruMetrics>,
+) -> Result<(), BoxedErr> {
+    const QUEUE_POLL_INTERVAL_MS: u64 = 250;
+    GOOGLE_METRICS.set(metrics).unwrap();
+    let mut part: u16 = 0_u16;
+
+    if !all_parts {
+        part = part_index.unwrap();
+    }
+
+    while let Ok(mut rdr) = get_reader(&input_dir, part) {
+        let csv_iter = rdr.deserialize();
+        for csv_line in csv_iter {
+            while GOOGLE_DATA_QUEUE.is_full() {
+                thread::sleep(Duration::from_millis(QUEUE_POLL_INTERVAL_MS));
+            }
+            let parsed_line: TruCsvFields = csv_line?;
+            let _ = GOOGLE_DATA_QUEUE.push(parsed_line);
+        }
+        part += 1;
+
+        if !all_parts || part > CSV_MAX_PART_NO {
+            break;
+        }
+    }
+
+    // Never read any parts or all parts was specified and we never read all 500
+    // parts of the csv data
+    if part == 0 || (all_parts && part <= CSV_MAX_PART_NO) {
+        panic!(
+            "Failed to read initial .csv.gz file. Check that all data files
+             are named in the correct format ('part-?????-of-00500.csv.gz').
+             If running with --all-parts, ensure all 500 parts exist in the
+             input directory.
+            "
+        );
+    } else {
+        GOOGLE_DATA_QUEUE.close();
+        Ok(())
+    }
+}
+
+/// @brief: Converts the start time of a job into seconds and normalizes it
+///
+/// From pg.2 of the schema doc:
+///    "Each record has a timestamp, which is in microseconds since 600
+///     seconds before the beginning of the trace period, and recorded as a
+///     64 bit integer (i.e., an event 20 second after the start of the
+///     trace would have a timestamp=620s)."
+///
+/// @param[in] time_micros The event start time in microseconds,
+///                        offset by T_OFFSET_SECS (600s)
+///
+/// @return A duration representing the dilated trace start time in seconds
+///            after subtracting the offset
+pub fn get_normalized_start_time(time_micros: u64) -> Duration {
+    let time_secs = time_micros / MICRO_SECONDS_PER_SECOND;
+    Duration::from_secs((time_secs - T_OFFSET_SECS) * DILATION_FACTOR)
+}
+
+/// @brief Given a single parsed line from the csv file, update all gauges
+/// corresponding to the metrics in the list
+///
+/// @param[in] csv_line A parsed line from the csv file containing label
+///                         values and metric data to export
+pub fn export_line(csv_line: TruCsvFields) {
+    let metrics = GOOGLE_METRICS.get().unwrap();
+    let label_vals: [&str; 3] = [
+        csv_line.job_id.as_str(),
+        csv_line.task_index.as_str(),
+        csv_line.machine_id.as_str(),
+    ];
+
+    let aggregation_type = csv_line.aggregation_type.unwrap_or(0_u8);
+
+    for metric in metrics {
+        let curr_gauge: &'static GaugeVec;
+        let wrapped_value: Option<f64>;
+
+        (curr_gauge, wrapped_value) = match metric {
+            TruMetrics::MeanCpuUsageRate => (
+                MEAN_CPU_USAGE_RATE_PAIR.get(aggregation_type),
+                csv_line.mean_cpu_usage_rate,
+            ),
+            TruMetrics::CanonicalMemoryUsage => (
+                CANONICAL_MEMORY_USAGE_PAIR.get(aggregation_type),
+                csv_line.canonical_memory_usage,
+            ),
+            TruMetrics::AssignedMemoryUsage => (
+                ASSIGNED_MEMORY_USAGE_PAIR.get(aggregation_type),
+                csv_line.assigned_memory_usage,
+            ),
+            TruMetrics::UnmappedPageCacheMemoryUsage => (
+                UNMAPPED_PAGE_CACHE_MEMORY_USAGE_PAIR.get(aggregation_type),
+                csv_line.unmapped_page_cache_memory_usage,
+            ),
+            TruMetrics::TotalPageCacheMemoryUsage => (
+                TOTAL_PAGE_CACHE_MEMORY_USAGE_PAIR.get(aggregation_type),
+                csv_line.total_page_cache_memory_usage,
+            ),
+            TruMetrics::MaxMemoryUsage => (
+                MAX_MEMORY_USAGE_PAIR.get(aggregation_type),
+                csv_line.max_memory_usage,
+            ),
+            TruMetrics::MeanDiskIoTime => (
+                MEAN_DISK_IO_TIME_PAIR.get(aggregation_type),
+                csv_line.mean_disk_io_time,
+            ),
+            TruMetrics::MeanLocalDiskSpaceUsed => (
+                MEAN_LOCAL_DISK_SPACE_USED_PAIR.get(aggregation_type),
+                csv_line.mean_local_disk_space_used,
+            ),
+            TruMetrics::MaxCpuUsage => (
+                MAX_CPU_USAGE_PAIR.get(aggregation_type),
+                csv_line.max_cpu_usage,
+            ),
+            TruMetrics::MaxDiskIoTime => (
+                MAX_DISK_IO_TIME_PAIR.get(aggregation_type),
+                csv_line.max_disk_io_time,
+            ),
+            TruMetrics::CyclesPerInstruction => (
+                CYCLES_PER_INSTRUCTION_PAIR.get(aggregation_type),
+                csv_line.cycles_per_instruction,
+            ),
+            TruMetrics::MemoryAccessesPerInstruction => (
+                MEMORY_ACCESSES_PER_INSTRUCTION_PAIR.get(aggregation_type),
+                csv_line.memory_accesses_per_instruction,
+            ),
+            TruMetrics::SamplePortion => (
+                SAMPLE_PORTION_PAIR.get(aggregation_type),
+                csv_line.sample_portion,
+            ),
+            TruMetrics::SampledCpuUsage => (
+                SAMPLED_CPU_USAGE_PAIR.get(aggregation_type),
+                csv_line.sampled_cpu_usage,
+            ),
+        };
+
+        if let Some(metric_value) = wrapped_value {
+            // Set the metric, unless it was missing
+            curr_gauge.with_label_values(&label_vals).set(metric_value);
+        }
+    }
+}
+
+/// @brief Exports all parsed CSV lines from the queue
+///
+/// This function will continue popping lines from the queue until it
+/// pops one with a start timestamp which should be exported later in time.
+/// This line will be saved in FUTURE_LINE and then exported on the next
+/// scrape for which the program runtime <= start time
+pub fn export_from_queue() {
+    let elapsed_t: Duration = utilities::get_time_elapsed();
+    let check_time = |line: &TruCsvFields| get_normalized_start_time(line.start_time) <= elapsed_t;
+
+    GOOGLE_DATA_QUEUE
+        .try_iter()
+        .take_while(check_time)
+        .for_each(export_line);
+
+    if GOOGLE_DATA_QUEUE.is_closed() && GOOGLE_DATA_QUEUE.is_empty() {
+        println!("No more task resource usage to export, shutting down...");
+        std::process::exit(0);
+    }
+}
diff --git a/Utilities/experiments/cluster_data_exporter/src/main.rs b/Utilities/experiments/cluster_data_exporter/src/main.rs
new file mode 100644
index 0000000..ac63fff
--- /dev/null
+++ b/Utilities/experiments/cluster_data_exporter/src/main.rs
@@ -0,0 +1,175 @@
+/// @NOTE: As new label-value combinations are added to each metric,
+/// they will persist unless another metric with the same label-value combo
+/// overwipes it. Therefore, user should be wary about the possibility
+/// of program memory usage steadily increasing over the course of the runtime
+use crate::alibaba_metrics::*;
+use crate::google_metrics::*;
+use crate::utilities::*;
+use clap::Parser;
+use hyper::body::Incoming;
+use hyper::header::CONTENT_TYPE;
+use hyper::server::conn::http1;
+use hyper::service::service_fn;
+use hyper::Request;
+use hyper::Response;
+use hyper_util::rt::TokioIo;
+use prometheus::{Encoder, TextEncoder};
+use std::net::{Ipv4Addr, SocketAddr};
+use std::sync::OnceLock;
+use std::{panic, process, thread};
+use tokio::net::TcpListener;
+
+mod alibaba_metrics;
+mod google_metrics;
+mod utilities;
+
+type BoxedErr = Box<dyn std::error::Error + Send + Sync + 'static>;
+
+/// Google or Alibaba. Must be initialized before starting export routine
+static DATA_PROVIDER: OnceLock<Provider> = OnceLock::new();
+
+/// @brief Async call-back function for servicing http requests, like
+///        prometheus scrapes
+///
+/// @param[in] _req The incoming http request
+///
+/// @return Prometheus metrics on success
+///         BoxedErr on failure
+async fn serve_req(_req: Request<Incoming>) -> Result<Response<String>, BoxedErr> {
+    let encoder = TextEncoder::new();
+    let provider = DATA_PROVIDER.get().unwrap();
+
+    match provider {
+        Provider::Google => google_metrics::export_from_queue(),
+        Provider::Alibaba => alibaba_metrics::export_from_queue(),
+    }
+
+    let metric_families = prometheus::gather();
+    let body = encoder.encode_to_string(&metric_families)?;
+    let response = Response::builder()
+        .status(200)
+        .header(CONTENT_TYPE, encoder.format_type())
+        .body(body)?;
+
+    Ok(response)
+}
+
+/// @brief Starts a thread to read and queue Google cluster data
+///
+/// @param[in] input_dir  The input directory to Google task resource usage
+///                       cluster data
+/// @param[in] all_parts  Whether to run the exporter across all csv parts or
+///                       not. This should be false if part index is not None
+/// @param[in] part_index The part number, out of 500, of the csv file to use
+///                       when exporting task resource usage data. This should
+///                       be None if all_parts is true.
+/// @param[in] metrics    The list of metrics from the task resource usage data
+///                       to export
+///
+/// @post All globals required by the main exporter thread are initialized.
+fn start_google_thread(
+    input_dir: String,
+    all_parts: bool,
+    part_index: Option<u16>,
+    metrics: Vec<TruMetrics>,
+) {
+    thread::spawn(move || {
+        // start reader thread
+        // Drops thread handle => thread is implicitly detached
+        if let Err(err) =
+            google_metrics::reader_thread_routine(input_dir, all_parts, part_index, metrics)
+        {
+            eprintln!("error in google reader thread: {:?}", err);
+        }
+    });
+    // Must be initialized before main thread starts exporting
+    google_metrics::GOOGLE_METRICS.wait();
+}
+
+/// @brief Starts a thread to read and queue Alibaba cluster data
+///
+/// @param[in] input_dir  The input directory containing the csv files for
+///                       reading
+/// @param[in] all_parts  Whether to run the exporter from part 0 until no more
+///                       csv files are found, or not. This should be false if
+///                       part index is not None.
+/// @param[in] part_index Which csv file part to use as the data source.
+///                       This should be None if all_parts is true.
+/// @param[in] data_type  Which type of microservice data the reading thread
+///                       should be configured to read and queue
+/// @param[in] data_year  The year from which the source data comes from. Valid
+///                       options are 2021 and 2022
+///
+/// @post All globals required by the main exporter thread are initialized.
+fn start_alibaba_thread(
+    input_dir: String,
+    all_parts: bool,
+    part_index: Option<u16>,
+    data_type: MsDataType,
+    data_year: u32,
+) {
+    thread::spawn(move || {
+        if let Err(err) = alibaba_metrics::reader_thread_routine(
+            input_dir, all_parts, part_index, data_type, data_year,
+        ) {
+            eprintln!("error in alibaba reader thread: {:?}", err);
+        }
+    });
+    // Must be initialized before main thread starts exporting
+    alibaba_metrics::EXPORTER_DATA_TYPE.wait();
+}
+
+#[tokio::main]
+async fn main() -> Result<(), BoxedErr> {
+    let cli = Cli::parse();
+
+    // @TODO Test this more thoroughly
+    // This code forces the program to exit if a reader thread panics.
+    // Comment it out if it's preferable for the main thread to remain
+    let orig_hook = panic::take_hook();
+    panic::set_hook(Box::new(move |panic_info| {
+        // invoke the default handler and then exit the process
+        orig_hook(panic_info);
+        process::exit(1);
+    }));
+
+    let input_directory: String = cli.input_directory.clone();
+    let port: u16 = cli.port;
+    let addr: SocketAddr = (Ipv4Addr::UNSPECIFIED, port).into();
+    println!("Listening on http://{}", addr);
+
+    let _ = utilities::T_START; // init t_start
+
+    // Spin up reader thread to start queueing csv data
+    match cli.provider {
+        ProviderCmd::Google {
+            metrics,
+            all_parts,
+            part_index,
+        } => {
+            let _ = DATA_PROVIDER.set(Provider::Google);
+            start_google_thread(input_directory, all_parts, part_index, metrics);
+        }
+        ProviderCmd::Alibaba {
+            data_type,
+            data_year,
+            all_parts,
+            part_index,
+        } => {
+            let _ = DATA_PROVIDER.set(Provider::Alibaba);
+            start_alibaba_thread(input_directory, all_parts, part_index, data_type, data_year);
+        }
+    }
+
+    let listener = TcpListener::bind(addr).await?;
+
+    loop {
+        // Main exporter routine
+        let (stream, _) = listener.accept().await?;
+        let io = TokioIo::new(stream);
+        let service = service_fn(serve_req);
+        if let Err(err) = http1::Builder::new().serve_connection(io, service).await {
+            eprintln!("server error: {:?}", err);
+        };
+    }
+}
diff --git a/Utilities/experiments/cluster_data_exporter/src/utilities.rs b/Utilities/experiments/cluster_data_exporter/src/utilities.rs
new file mode 100644
index 0000000..a8d8d28
--- /dev/null
+++ b/Utilities/experiments/cluster_data_exporter/src/utilities.rs
@@ -0,0 +1,99 @@
+use crate::alibaba_metrics::*;
+use crate::google_metrics::*;
+use clap::{ArgGroup, Parser, Subcommand, ValueEnum};
+use lazy_static::lazy_static;
+use std::time::{Duration, Instant};
+
+pub type BoxedErr = Box<dyn std::error::Error + Send + Sync + 'static>;
+
+lazy_static! {
+    /// An instant in time to roughly represent the start time of the exporter
+    /// This is used as the reference point for calculating how much time has
+    /// elapsed, and therefore which traces should be exported during a scrape
+    /// and which ones should be held onto until later
+    pub static ref T_START: Instant = Instant::now();
+}
+
+/// @brief Returns the time since T_START as a Duration
+///
+/// @return Duration since the Instant defined by T_START
+///
+/// @note Since T_START isn't initialized until it is referenced for the first
+///       time, so if this function is called before T_START is ever referenced
+///       then T_START will be initialized here with Duration::Zero returned
+pub fn get_time_elapsed() -> Duration {
+    T_START.elapsed()
+}
+
+#[derive(Debug, Clone, ValueEnum)]
+pub enum Provider {
+    Google,
+    Alibaba,
+}
+
+#[derive(Parser, Debug)]
+#[command(name = "cluster_data_exporter", version, about)]
+#[command(subcommand_required = true)]
+pub struct Cli {
+    #[arg(short, long, aliases = ["input, in, dir, input_dir"])]
+    #[arg(required = true)]
+    pub input_directory: String,
+
+    #[arg(short, long)]
+    #[arg(required = true)]
+    pub port: u16,
+
+    #[command(subcommand)]
+    pub provider: ProviderCmd,
+}
+
+#[derive(Subcommand, Debug)]
+pub enum ProviderCmd {
+    /// Run the exporter on google task resource usage data
+    #[command(group(ArgGroup::new("csv-parts")
+                        .args(&["all_parts", "part_index"])
+                        .required(true))
+    )]
+    Google {
+        #[arg(long, value_enum, value_delimiter = ',', num_args = 1..)]
+        #[arg(required = true, require_equals = true)]
+        metrics: Vec<TruMetrics>,
+
+        #[arg(long, group = "csv-parts", alias = "all")]
+        all_parts: bool,
+
+        #[arg(long, group = "csv-parts", aliases = ["part", "index"])]
+        #[arg(require_equals = true)]
+        part_index: Option<u16>,
+    },
+
+    /// Run the exporter on Alibaba microservice data
+    #[command(group(ArgGroup::new("csv-parts")
+                .args(&["all_parts", "part_index"])
+                .required(true))
+    )]
+    Alibaba {
+        /// The type of microservice data to use
+        #[arg(long, value_enum)]
+        #[arg(required = true, require_equals = true)]
+        data_type: MsDataType,
+
+        /// Which year the microservice data comes from
+        #[arg(long)]
+        #[arg(required = true, require_equals = true)]
+        #[arg(value_parser = clap::value_parser!(u32).range(2021..=2022))]
+        data_year: u32,
+
+        /// Whether or not to run the exporter starting on part 0 of the csv
+        /// files and continue sequentially until no more files are found.
+        /// This option is mutually exclusive with --part-index
+        #[arg(long, group = "csv-parts", alias = "all")]
+        all_parts: bool,
+
+        /// Specify a single csv file to use as trace data.
+        /// This option is mutually exclusive with --all-parts  
+        #[arg(long, group = "csv-parts", aliases = ["part", "index"])]
+        #[arg(require_equals = true)]
+        part_index: Option<u16>,
+    },
+}
diff --git a/Utilities/experiments/config/config.yaml b/Utilities/experiments/config/config.yaml
new file mode 100644
index 0000000..9724b9f
--- /dev/null
+++ b/Utilities/experiments/config/config.yaml
@@ -0,0 +1,162 @@
+# Default configuration for experiment_run_e2e.py
+# This replaces the argparse command line arguments
+
+# Experiment configuration
+experiment:
+  name: ???  # REQUIRED: Human-readable experiment name
+
+# CloudLab configuration
+cloudlab:
+  num_nodes: ???  # REQUIRED: Number of CloudLab nodes to use
+  node_offset: 0  # Starting node index (default 0, allows running multiple experiments in parallel)
+  username: ???  # REQUIRED: Your CloudLab username
+  hostname_suffix: ???  # REQUIRED: CloudLab experiment hostname suffix
+
+# Logging and debugging
+logging:
+  level: "INFO"
+
+# Profiling options
+profiling:
+  query_engine: false
+  prometheus_time: null  # Optional[int]
+  flink: false
+  arroyo: false
+
+# Throughput monitoring options
+throughput:
+  arroyo: false
+  prometheus: false
+
+# Health check monitoring options
+health_check:
+  prometheus: false  # Monitor target health and scrape duration
+
+# Manual mode options
+manual:
+  query_engine: false
+  remote_monitor: false
+
+# Experiment flow options
+flow:
+  no_teardown: false
+  replace_query_engine_with_dumb_consumer: false
+  steady_state_wait: 60
+  skip_copy_prometheus_data: false  # Skip copying Prometheus data back to local machine
+
+
+# Streaming engine configuration
+streaming:
+  engine: "arroyo"  # choices: ["flink", "arroyo"]
+  parallelism: 1  # Default parallelism for streaming pipelines
+  flink_input_format: "json"  # choices: ["json", "avro-json", "avro-binary"]
+  flink_output_format: "json"  # choices: ["json", "byte"]
+  enable_object_reuse: false
+  do_local_flink: false
+  forward_unsupported_queries: true
+  use_kafka_ingest: false  # Use Prometheus remote write by default, set to true for legacy Kafka ingest
+  # Prometheus remote write configuration
+  remote_write:
+    ip: "${remote_write_ip:${cloudlab.node_offset}}"
+    base_port: 8080
+    path: "/receive"
+    enable_optimized_source: true # Use optimized Prometheus remote_write source (10-20x faster)
+
+# Prometheus configuration
+prometheus:
+  # Global Prometheus settings
+  scrape_interval: "10s"  # How frequently to scrape targets
+  evaluation_interval: "10s"  # How frequently to evaluate rules
+  # query_log_file: "/scratch/sketch_db_for_prometheus/prometheus/queries.log"  # Disabled to avoid permission issues in Docker
+  # Recording rules settings
+  recording_rules:
+    interval: "5s"  # How frequently to evaluate recording rules
+
+# Fake exporter language
+fake_exporter_language: "rust"  # choices: ["python", "rust"]
+
+# Cluster data exporter configuration
+cluster_data_directory: "/data/cluster_traces"  # Path to directory containing Google/Alibaba cluster trace data
+
+# Query engine language
+query_engine_language: "rust"  # choices: ["python", "rust"]
+
+# Query language (SQL vs PROMQL) - only used by Rust query engine
+query_language: "PROMQL"  # choices: ["SQL", "PROMQL"]
+
+# Query engine options
+query_engine:
+  dump_precomputes: false  # Whether to dump precomputed values (Rust query engine only)
+  lock_strategy: "per-key"  # Lock strategy for SimpleMapStore: "global" or "per-key" (Rust query engine only)
+
+# Controller configuration
+controller:
+  punting: true  # Enable query punting based on performance heuristics (should_be_performant check)
+
+# Aggregate cleanup configuration
+# Policy options:
+#   - "circular_buffer": Keep N most recent aggregates (fixed-count cleanup)
+#   - "read_based": Remove aggregates after N reads (default)
+#   - "no_cleanup": Never clean up aggregates (retain all indefinitely)
+aggregate_cleanup:
+  policy: "read_based"
+
+# Sketch parameters configuration
+# These control the accuracy and memory usage of sketch data structures
+sketch_parameters:
+  CountMinSketch:
+    depth: 3       # Number of hash functions (more = better accuracy, more memory)
+    width: 1024 # Width of each hash table (more = better accuracy, more memory)
+  CountMinSketchWithHeap:
+    depth: 3       # Number of hash functions
+    width: 1024   # Width of each hash table
+    heap_multiplier: 4  # Heap size = k * heap_multiplier (k from topk query)
+  DatasketchesKLL:
+    K: 20           # K parameter for KLL sketch (higher = better accuracy, more memory)
+
+# Prometheus client configuration
+prometheus_client:
+  parallel: true # Enable/disable parallel query execution
+
+# Container deployment settings
+use_container:
+  query_engine: true       # QueryEngineService - containerized query engine
+  arroyo: true            # ArroyoService - containerized Arroyo streaming engine
+  controller: true        # ControllerService - containerized controller
+  fake_exporter: true     # ExporterServiceFactory - containerized fake exporters
+  prometheus_client: true # PrometheusClientService - containerized Prometheus client
+  grafana: true           # GrafanaService - containerized Grafana
+
+# Grafana configuration
+grafana:
+  host: "localhost:3000"
+  user: "admin"
+  password: "admin"
+
+# Experiment-specific configurations for different experiment types
+experiment_variants:
+  # Configuration for experiment_run_sketchdboffline.py
+  sketchdboffline:
+    # Offline analysis parameters
+    experiment_dir: ???  # REQUIRED: Path to experiment data directory
+    labels: ["label_0", "label_1", "label_2", "instance", "job"]  # Default labels
+    groupby: ???  # REQUIRED: List of labels to group by
+    aggregation: ???  # REQUIRED: Aggregation function to apply
+    # This is a completely different type - runs offline analysis on existing data
+
+defaults:
+  - _self_
+  - experiment_type: ???  # REQUIRED: Must specify which experiment type to use
+
+# Hydra configuration
+hydra:
+  run:
+    dir: ${local_experiment_dir:}/${experiment.name}/hydra_logs/${now:%Y-%m-%d_%H-%M-%S}
+  sweep:
+    dir: ./multirun/${experiment.name}/${now:%Y-%m-%d_%H-%M-%S}
+    subdir: ${hydra:job.num}
+  job:
+    name: experiment_run_e2e
+    chdir: false
+  launcher:
+    _target_: hydra._internal.BasicLauncher
diff --git a/Utilities/experiments/constants.py b/Utilities/experiments/constants.py
new file mode 100644
index 0000000..a0bda07
--- /dev/null
+++ b/Utilities/experiments/constants.py
@@ -0,0 +1,38 @@
+import os
+
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+SSH_OPTIONS = "-o StrictHostKeyChecking=no"
+
+# CLOUDLAB_USERNAME = "milindsr"
+CLOUDLAB_HOME_DIR = "/scratch/sketch_db_for_prometheus"
+CLOUDLAB_QUERY_LOG_FILE = "/scratch/sketch_db_for_prometheus/prometheus/queries.log"
+
+LOCAL_EXPERIMENT_DIR = os.path.join(os.path.dirname(ROOT_DIR), "experiment_outputs")
+
+FLINK_INPUT_TOPIC = "flink_input"
+FLINK_OUTPUT_TOPIC = "flink_output"
+KAFKA_BROKER = "localhost:9092"
+
+QUERY_ENGINE_PY_PROCESS_KEYWORD = "main_query_engine.py"
+QUERY_ENGINE_RS_PROCESS_KEYWORD = "query_engine_rust"
+QUERY_ENGINE_PY_CONTAINER_NAME = "sketchdb-queryengine"
+QUERY_ENGINE_RS_CONTAINER_NAME = "sketchdb-queryengine-rust"
+
+ARROYO_IMAGE = "ghcr.io/projectasap/asap-arroyo:v0.1.0"
+
+ARROYO_THROUGHPUT_POLLING_INTERVAL = 1
+PROMETHEUS_THROUGHPUT_POLLING_INTERVAL = 5
+PROMETHEUS_HEALTH_POLLING_INTERVAL = 5
+
+SKETCHDB_EXPERIMENT_NAME = "sketchdb"
+BASELINE_EXPERIMENT_NAME = "baseline"
+AVOID_REMOTE_MONITOR_LONG_SSH = True
+AVOID_RUN_ARROYOSKETCH_LONG_SSH = True
+
+PROMETHEUS_CONFIG_DIR = "prometheus_config"
+PROMETHEUS_CONFIG_FILE = "prometheus.yml"
+
+# VictoriaMetrics configuration files
+VMAGENT_SCRAPE_CONFIG_FILE = "vmagent_scrape.yml"
+VMAGENT_REMOTE_WRITE_CONFIG_FILE = "vmagent_remote_write.yml"
diff --git a/Utilities/experiments/datasets/README.md b/Utilities/experiments/datasets/README.md
new file mode 100644
index 0000000..7638b52
--- /dev/null
+++ b/Utilities/experiments/datasets/README.md
@@ -0,0 +1,5 @@
+# Google Cluter Data 2019
+## Download
+```
+./download_google_cluster_2019.sh 4 zz_y zz-y-246267.softmeasure-PG0.clemson.cloudlab.us
+```
diff --git a/Utilities/experiments/datasets/download_google_cluster_2019.sh b/Utilities/experiments/datasets/download_google_cluster_2019.sh
new file mode 100755
index 0000000..f84d6e6
--- /dev/null
+++ b/Utilities/experiments/datasets/download_google_cluster_2019.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+if [ "$#" -ne 3 ]; then
+    echo "Usage: $0 <num_nodes> <cloudlab_username> <hostname_suffix>"
+    exit 1
+fi
+
+num_nodes=$1
+USERNAME=$2
+HOSTNAME_SUFFIX=$3
+
+CMD="cd /scratch/sketch_db_for_prometheus/code/Utilities/experiments/datasets; "
+CMD=$CMD"curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-linux-x86_64.tar.gz; "
+CMD=$CMD"tar -xf google-cloud-cli-linux-x86_64.tar.gz; "
+
+for i in $(seq 1 $((num_nodes-1))); do
+    echo "Downloading in node$i.$HOSTNAME_SUFFIX"
+    CMD_i=$CMD"./google-cloud-sdk/bin/gsutil cp gs://clusterdata_2019_a/instance_usage-0000000000"$i"*.json.gz ./; "
+    CMD_i=$CMD_i"gunzip instance_usage-*.json.gz; "
+    ssh -o StrictHostKeyChecking=no $USERNAME@node$i.$HOSTNAME_SUFFIX "$CMD_i" < /dev/null &
+done
diff --git a/Utilities/experiments/datasets/example.json b/Utilities/experiments/datasets/example.json
new file mode 100644
index 0000000..428f98a
--- /dev/null
+++ b/Utilities/experiments/datasets/example.json
@@ -0,0 +1,50 @@
+{
+    "start_time": "1838400000000",
+    "end_time": "1838700000000",
+    "collection_id": "330587238433",
+    "instance_index": "111",
+    "machine_id": "23624491139",
+    "alloc_collection_id": "330587160469",
+    "alloc_instance_index": "111",
+    "collection_type": "0",
+    "average_usage": {
+        "cpus": 0.008392333984375,
+        "memory": 0.0094757080078125
+    },
+    "maximum_usage": {
+        "cpus": 0.04791259765625,
+        "memory": 0.019378662109375
+    },
+    "random_sample_usage": {
+        "cpus": 0.016632080078125
+    },
+    "assigned_memory": 0,
+    "page_cache_memory": 0.0032196044921875,
+    "cycles_per_instruction": 0.89924430847167969,
+    "memory_accesses_per_instruction": 0.00151212012860924,
+    "sample_rate": 0.996666669845581,
+    "cpu_usage_distribution": [
+        0.0004711151123046875,
+        0.0006809234619140625,
+        0.00075817108154296875,
+        0.00083160400390625,
+        0.00096225738525390625,
+        0.0084381103515625,
+        0.008880615234375,
+        0.0133056640625,
+        0.016448974609375,
+        0.021942138671875,
+        0.04779052734375
+    ],
+    "tail_cpu_usage_distribution": [
+        0.022308349609375,
+        0.023681640625,
+        0.024200439453125,
+        0.025360107421875,
+        0.02606201171875,
+        0.02685546875,
+        0.028411865234375,
+        0.030853271484375,
+        0.0369873046875
+    ]
+}
diff --git a/Utilities/experiments/dumb_kafka_consumer_arroyo.py b/Utilities/experiments/dumb_kafka_consumer_arroyo.py
new file mode 100644
index 0000000..5dd21a6
--- /dev/null
+++ b/Utilities/experiments/dumb_kafka_consumer_arroyo.py
@@ -0,0 +1,131 @@
+import json
+import gzip
+import msgpack
+import argparse
+from datetime import datetime
+from confluent_kafka import Consumer, KafkaException, KafkaError
+
+kafka_config = {"auto.offset.reset": "beginning", "group.id": "flink"}
+
+
+def recurse_and_print_data(data):
+    if isinstance(data, dict):
+        print("Dict")
+        for key, value in data.items():
+            if isinstance(value, (list, dict)):
+                print(f"{key}:")
+                recurse_and_print_data(value)
+            else:
+                print(f"{key}: {value}")
+    elif isinstance(data, list):
+        print("List")
+        for item in data:
+            if isinstance(item, (list, dict)):
+                recurse_and_print_data(item)
+            else:
+                print(f"- {item}")
+    else:
+        raise TypeError("Unsupported data type for printing: {}".format(type(data)))
+
+
+def deserialize_message(message):
+    message = json.loads(message)
+    # convert "2025-05-26T20:42:30" to datetime object
+    message["start_timestamp"] = datetime.strptime(
+        message["window"]["start"] + "Z", "%Y-%m-%dT%H:%M:%S%z"
+    ).timestamp()
+    message["end_timestamp"] = datetime.strptime(
+        message["window"]["end"] + "Z", "%Y-%m-%dT%H:%M:%S%z"
+    ).timestamp()
+
+    del message["window"]
+    message["precompute"] = bytes.fromhex(message["precompute"])
+    message["precompute"] = gzip.decompress(message["precompute"])
+    message["aggregation"] = msgpack.unpackb(
+        message["aggregation"], raw=False, strict_map_key=False
+    )
+    return message
+
+
+def main(args):
+    kafka_config["bootstrap.servers"] = args.kafka_broker
+
+    consumer = None
+
+    try:
+        consumer = Consumer(kafka_config)
+        consumer.subscribe([args.kafka_topic])
+
+        with open(args.output_file, "w") as f:
+            while True:
+                try:
+                    messages = consumer.consume(num_messages=1000, timeout=1.0)
+
+                    if not messages:  # No messages received
+                        continue
+
+                    for msg in messages:
+                        if msg.error():
+                            if msg.error().code() == KafkaError._PARTITION_EOF:
+                                continue
+                            else:
+                                print(f"Consumer error: {msg.error()}")
+                                continue
+
+                        decoded_message = msg.value().decode("utf-8")
+                        f.write(decoded_message + "\n")
+                        try:
+                            if args.print_messages:
+                                print(decoded_message)
+                                deserialized_message = deserialize_message(
+                                    decoded_message
+                                )
+                                recurse_and_print_data(deserialized_message)
+                        except Exception as e:
+                            print(f"Error deserializing message: {e}")
+                            continue
+
+                    f.flush()
+
+                except KafkaException as e:
+                    print(f"Kafka error: {e}")
+                    break
+
+                except Exception as e:
+                    print(f"Error processing messages: {e}")
+                    break
+
+    except Exception as e:
+        print(f"Fatal error: {e}")
+
+    finally:
+        print("Shutting down consumer...")
+        try:
+            if consumer:
+                consumer.close()
+            print("Consumer closed.")
+        except Exception as e:
+            print(f"Error closing consumer: {e}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Dumb Kafka Consumer")
+
+    parser.add_argument(
+        "--kafka_broker", type=str, required=False, default="localhost:9092"
+    )
+    parser.add_argument(
+        "--kafka_topic", type=str, required=True, help="Kafka topic to consume from"
+    )
+    parser.add_argument(
+        "--output_file", type=str, required=True, help="File to store consumed messages"
+    )
+    parser.add_argument(
+        "--print_messages",
+        action="store_true",
+        default=False,
+        help="Print messages to stdout",
+    )
+
+    args = parser.parse_args()
+    main(args)
diff --git a/Utilities/experiments/experiment_only_ingest_path.py b/Utilities/experiments/experiment_only_ingest_path.py
new file mode 100644
index 0000000..79277fb
--- /dev/null
+++ b/Utilities/experiments/experiment_only_ingest_path.py
@@ -0,0 +1,525 @@
+import os
+import json
+import time
+
+import hydra
+from omegaconf import DictConfig, OmegaConf
+
+import constants
+import experiment_utils
+from experiment_utils import sync, config
+from experiment_utils.providers.factory import create_provider
+from experiment_utils.services import (
+    KafkaService,
+    ExporterServiceFactory,
+    SystemExportersService,
+    create_prometheus_service,
+    PrometheusService,
+    DockerPrometheusService,
+    DockerVictoriaMetricsService,
+    ArroyoService,
+    ArroyoThroughputMonitor,
+    PrometheusThroughputMonitor,
+    PrometheusHealthMonitor,
+    ControllerService,
+)
+
+# Register custom resolver for LOCAL_EXPERIMENT_DIR before Hydra processes config
+OmegaConf.register_new_resolver(
+    "local_experiment_dir", lambda: constants.LOCAL_EXPERIMENT_DIR
+)
+
+# Register custom resolver for remote write IP based on node_offset
+OmegaConf.register_new_resolver(
+    "remote_write_ip", lambda node_offset: f"10.10.1.{node_offset + 1}"
+)
+
+KAFKA_NUM_TRIES = 5
+CONTROLLER_LOCAL_OUTPUT_DIR = None
+CONTROLLER_REMOTE_OUTPUT_DIR = None
+
+
+@hydra.main(version_base=None, config_path="config", config_name="config")
+def main(cfg: DictConfig):
+    # Validate configuration
+    config.validate_config(cfg)
+    # Validate experiment configuration (queries not required for ingest path experiments)
+    config.validate_experiment_config(cfg.experiment_params, require_queries=False)
+
+    # Check that experiment_duration is specified
+    if not hasattr(cfg.experiment_params, "experiment_duration"):
+        raise ValueError(
+            "experiment_duration must be specified in experiment config. "
+            "Add it as a CLI override: experiment_duration=300"
+        )
+
+    experiment_duration = cfg.experiment_params.experiment_duration
+    print(f"Experiment duration: {experiment_duration} seconds")
+
+    # Determine experiment mode
+    if (
+        not hasattr(cfg.experiment_params, "experiment")
+        or not cfg.experiment_params.experiment
+    ):
+        raise ValueError("experiment mode must be specified in experiment config")
+
+    experiment_mode = cfg.experiment_params.experiment[0].get("mode", "")
+    if experiment_mode not in [
+        constants.BASELINE_EXPERIMENT_NAME,
+        constants.SKETCHDB_EXPERIMENT_NAME,
+    ]:
+        raise ValueError(
+            f"Invalid experiment mode: {experiment_mode}. "
+            f"Must be '{constants.BASELINE_EXPERIMENT_NAME}' (V1) or '{constants.SKETCHDB_EXPERIMENT_NAME}' (V2)"
+        )
+
+    print(f"Experiment mode: {experiment_mode}")
+    # V2 uses sketchdb mode (enables remote_write), V1 uses prometheus mode
+    is_v2 = experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME
+
+    # Convert config to args-like object for backward compatibility
+    args = config.Args(cfg)
+
+    # Create infrastructure provider
+    provider = create_provider(cfg)
+
+    local_experiment_root_dir = os.path.join(
+        constants.LOCAL_EXPERIMENT_DIR, args.experiment_name
+    )
+    os.makedirs(local_experiment_root_dir, exist_ok=True)
+
+    # dump config to a file
+    with open(os.path.join(local_experiment_root_dir, "hydra_config.yaml"), "w") as f:
+        OmegaConf.save(cfg, f)
+
+    # Also dump args to a file for backward compatibility
+    with open(os.path.join(local_experiment_root_dir, "cmdline_args.txt"), "w") as f:
+        json.dump(vars(args), f)
+
+    experiment_root_output_dir = (
+        f"{constants.CLOUDLAB_HOME_DIR}/experiment_outputs/{args.experiment_name}"
+    )
+
+    # Create output directory on coordinator node
+    provider.execute_command(
+        node_idx=args.get_coordinator_node(),
+        cmd=f"mkdir -p {experiment_root_output_dir}",
+        cmd_dir="",
+        nohup=False,
+        popen=False,
+    )
+
+    num_nodes_in_experiment = args.num_nodes
+
+    # Read exporter configuration
+    exporter_config, rejection_reason = experiment_utils.read_exporter_config(
+        cfg.experiment_params
+    )
+    if exporter_config is None:
+        raise ValueError("Invalid exporter config: {}".format(rejection_reason))
+
+    # Initialize services
+    system_exporters_service = SystemExportersService(
+        provider, args.num_nodes, args.node_offset
+    )
+    prometheus_service = create_prometheus_service(
+        cfg, provider, args.num_nodes, args.node_offset
+    )
+
+    # Initialize exporter service based on language
+    exporter_service = ExporterServiceFactory.create_exporter_service(
+        args.fake_exporter_language,
+        provider,
+        num_nodes_in_experiment,
+        use_container=args.use_container_fake_exporter,
+        node_offset=args.node_offset,
+    )
+
+    # Initialize V2-specific services (always initialize to allow cleanup from previous runs)
+    arroyo_throughput_monitor = None
+    arroyosketch_pipeline_id = None
+
+    print("Initializing services (including V2 services for cleanup)...")
+    kafka_service = KafkaService(provider, args.node_offset, num_tries=KAFKA_NUM_TRIES)
+    arroyo_service = ArroyoService(
+        provider,
+        use_container=args.use_container_arroyo,
+        node_offset=args.node_offset,
+    )
+    controller_service = ControllerService(
+        provider,
+        use_container=args.use_container_controller,
+        node_offset=args.node_offset,
+    )
+
+    if is_v2:
+        global CONTROLLER_LOCAL_OUTPUT_DIR, CONTROLLER_REMOTE_OUTPUT_DIR
+        CONTROLLER_LOCAL_OUTPUT_DIR = os.path.join(
+            local_experiment_root_dir, "controller_output"
+        )
+        CONTROLLER_REMOTE_OUTPUT_DIR = os.path.join(
+            experiment_root_output_dir, "controller_output"
+        )
+
+    # Stop any existing services to ensure clean state
+    print("Stopping any existing services...")
+    system_exporters_service.stop()
+    prometheus_service.stop()
+    exporter_service.stop()
+    prometheus_service.reset()
+    kafka_service.stop()
+    arroyo_service.stop()
+    controller_service.stop()
+    # Create local and remote experiment directories
+    experiment_output_dir = os.path.join(experiment_root_output_dir, experiment_mode)
+    local_experiment_dir = os.path.join(local_experiment_root_dir, experiment_mode)
+
+    provider.execute_command_parallel(
+        node_idxs=args.get_node_range(include_coordinator=True),
+        cmd=f"mkdir -p {experiment_output_dir}",
+        cmd_dir="",
+        nohup=False,
+        popen=True,
+        wait=True,
+    )
+
+    # Generate and copy Prometheus configuration
+    prometheus_config_output_dir = os.path.join(
+        local_experiment_dir, constants.PROMETHEUS_CONFIG_DIR
+    )
+    os.makedirs(prometheus_config_output_dir, exist_ok=True)
+
+    # For V2, we need to generate controller configs even though we're not running queries
+    if is_v2:
+        print("Generating controller and client configs for V2...")
+        sync.copy_experiment_config(cfg.experiment_params, local_experiment_root_dir)
+        experiment_modes, metrics_to_remote_write = (
+            config.generate_controller_client_configs(
+                cfg.experiment_params,
+                local_experiment_root_dir,
+                cfg.aggregate_cleanup,
+                cfg.get("sketch_parameters", None),
+            )
+        )
+        sync.rsync_controller_client_configs(
+            provider,
+            experiment_root_output_dir,
+            local_experiment_root_dir,
+            node_offset=args.node_offset,
+        )
+
+    # Generate Prometheus config (with or without remote_write based on mode)
+    # Remote write is enabled when experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME
+    # V1: experiment_mode=BASELINE_EXPERIMENT_NAME != SKETCHDB_EXPERIMENT_NAME -> no remote_write
+    # V2: experiment_mode=SKETCHDB_EXPERIMENT_NAME == SKETCHDB_EXPERIMENT_NAME -> remote_write enabled
+    config.generate_and_copy_prometheus_config(
+        num_nodes_in_experiment,
+        local_experiment_dir,
+        prometheus_config_output_dir,
+        experiment_mode,
+        cfg,
+        cfg.prometheus,
+        args.node_offset,
+        constants.SKETCHDB_EXPERIMENT_NAME,
+        provider,
+    )
+    sync.rsync_prometheus_config(
+        provider,
+        experiment_output_dir,
+        prometheus_config_output_dir,
+        node_offset=args.node_offset,
+    )
+
+    prometheus_scrape_interval = config.get_prometheus_scrape_interval(cfg.prometheus)
+
+    # Start V2-specific infrastructure before Prometheus
+    if is_v2:
+        print("Starting V2 infrastructure (Controller, Kafka, Arroyo)...")
+
+        # Start controller to generate sketch configs
+        controller_client_config = os.path.join(
+            experiment_root_output_dir,
+            "controller_client_configs",
+            f"{experiment_mode}.yaml",
+        )
+        controller_service.start(
+            controller_input_file=controller_client_config,
+            prometheus_scrape_interval=prometheus_scrape_interval,
+            streaming_engine=args.streaming_engine,
+            controller_remote_output_dir=CONTROLLER_REMOTE_OUTPUT_DIR,
+            punting=args.controller_punting,
+        )
+        sync.rsync_controller_config_remote_to_local(
+            provider,
+            CONTROLLER_REMOTE_OUTPUT_DIR,
+            CONTROLLER_LOCAL_OUTPUT_DIR,
+            node_offset=args.node_offset,
+        )
+
+        # Start Kafka
+        kafka_service.start()
+        kafka_service.wait_until_ready()
+        kafka_service.delete_topics()
+        kafka_service.create_topics()
+
+        # Start Arroyo
+        arroyo_service.stop()
+        time.sleep(10)
+        arroyo_service.start(
+            experiment_output_dir=experiment_output_dir,
+            remote_write_base_port=args.remote_write_base_port,
+            parallelism=args.parallelism,
+        )
+
+    # Start fake exporter if configured
+    if config.check_exporter_and_queries_exist("fake_exporter", cfg.experiment_params):
+        print("Starting fake exporter...")
+        exporter_service.start(
+            config=exporter_config["exporter_list"]["fake_exporter"],
+            experiment_output_dir=experiment_output_dir,
+            local_experiment_dir=local_experiment_dir,
+        )
+
+    # Start system exporters (node_exporter, blackbox_exporter, cadvisor)
+    print("Starting system exporters...")
+    system_exporters_service.start(cfg.experiment_params)
+
+    # Start Prometheus service based on deployment mode
+    print("Starting Prometheus...")
+    monitoring = cfg.experiment_params.monitoring
+
+    if monitoring.deployment_mode == "containerized":
+        # Containerized deployment (DockerPrometheusService or DockerVictoriaMetricsService)
+        assert isinstance(
+            prometheus_service, (DockerPrometheusService, DockerVictoriaMetricsService)
+        ), f"Expected Docker-based service but got {type(prometheus_service).__name__}"
+
+        # Check if resource limits are specified
+        if hasattr(monitoring, "resource_limits"):
+            prometheus_service.start(
+                experiment_output_dir=experiment_output_dir,
+                local_experiment_dir=local_experiment_dir,
+                experiment_mode=experiment_mode,
+                cpu_limit=monitoring.resource_limits.cpu_limit,
+                memory_limit=monitoring.resource_limits.memory_limit,
+            )
+        else:
+            # Containerized without resource limits
+            prometheus_service.start(
+                experiment_output_dir=experiment_output_dir,
+                local_experiment_dir=local_experiment_dir,
+            )
+    else:  # bare_metal
+        # Bare-metal deployment (PrometheusService)
+        assert isinstance(
+            prometheus_service, PrometheusService
+        ), f"Expected PrometheusService but got {type(prometheus_service).__name__}"
+        prometheus_service.start(experiment_output_dir)
+
+    # Start V2-specific: Run ArroyoSketch pipeline
+    if is_v2:
+        print("Starting ArroyoSketch pipeline...")
+        arroyosketch_pipeline_id = arroyo_service.run_arroyosketch(
+            experiment_name=args.experiment_name,
+            experiment_output_dir=experiment_output_dir,
+            flink_input_format=args.flink_input_format,
+            flink_output_format=args.flink_output_format,
+            controller_remote_output_dir=CONTROLLER_REMOTE_OUTPUT_DIR,
+            remote_write_ip=args.remote_write_ip,
+            remote_write_base_port=args.remote_write_base_port,
+            remote_write_path=args.remote_write_path,
+            parallelism=args.parallelism,
+            use_kafka_ingest=args.use_kafka_ingest,
+            enable_optimized_remote_write=cfg.streaming.remote_write.enable_optimized_source,
+            avoid_long_ssh=constants.AVOID_RUN_ARROYOSKETCH_LONG_SSH,
+        )
+        print(f"ArroyoSketch pipeline ID: {arroyosketch_pipeline_id}")
+
+    # Start monitoring services
+    print("Starting monitoring services...")
+
+    # Prometheus throughput monitoring
+    prometheus_throughput_monitor = PrometheusThroughputMonitor(
+        provider,
+        node_offset=args.node_offset,
+    )
+    prometheus_throughput_monitor.start(experiment_output_dir=experiment_output_dir)
+
+    # Prometheus health monitoring
+    prometheus_health_monitor = PrometheusHealthMonitor(
+        provider,
+        node_offset=args.node_offset,
+    )
+    prometheus_health_monitor.start(experiment_output_dir=experiment_output_dir)
+
+    # Start Arroyo throughput monitoring if V2
+    if is_v2 and arroyosketch_pipeline_id:
+        print("Starting Arroyo throughput monitoring...")
+        arroyo_throughput_monitor = ArroyoThroughputMonitor(
+            provider,
+            node_offset=args.node_offset,
+        )
+        arroyo_throughput_monitor.start(
+            pipeline_id=arroyosketch_pipeline_id,
+            experiment_output_dir=experiment_output_dir,
+        )
+
+    # Start resource cost monitoring via remote_monitor.py
+    print("Starting resource cost monitoring (CPU/memory)...")
+    start_resource_monitoring(
+        provider,
+        args.node_offset,
+        experiment_output_dir,
+        local_experiment_dir,
+        experiment_duration,
+        is_v2,
+    )
+
+    print("-" * 60)
+    print("All services started successfully!")
+    print(f"Experiment: {args.experiment_name}")
+    print(f"Mode: {experiment_mode}")
+    print(f"Duration: {experiment_duration} seconds")
+    print(f"Output directory: {experiment_output_dir}")
+    print("-" * 60)
+
+    # Wait for experiment duration
+    print(f"\nWaiting {experiment_duration} seconds for experiment to run...")
+    time.sleep(experiment_duration)
+
+    print("\nExperiment duration complete. Stopping services...")
+
+    # Stop monitoring services
+    print("Stopping monitoring services...")
+    prometheus_throughput_monitor.stop()
+    prometheus_health_monitor.stop()
+
+    if is_v2 and arroyo_throughput_monitor:
+        arroyo_throughput_monitor.stop()
+
+    # Note: remote_monitor.py will stop automatically after the timed duration
+
+    # Stop V2-specific services
+    if is_v2:
+        print("Stopping V2 services...")
+        if arroyosketch_pipeline_id:
+            arroyo_service.stop_arroyosketch(arroyosketch_pipeline_id)
+        arroyo_service.stop()
+        kafka_service.delete_topics()
+        kafka_service.stop()
+        controller_service.stop()
+
+    # Stop core services
+    print("Stopping core services...")
+    system_exporters_service.stop()
+    prometheus_service.stop()
+    exporter_service.stop()
+
+    # Sync data to local
+    print("\nSyncing Prometheus data...")
+    sync.copy_prometheus_data(provider, local_experiment_dir, args.node_offset)
+
+    print("Syncing experiment data...")
+    sync.rsync_experiment_data(
+        provider,
+        experiment_output_dir,
+        local_experiment_dir,
+        node_offset=args.node_offset,
+    )
+    prometheus_service.reset()
+
+    print("-" * 60)
+    print("Experiment completed successfully!")
+    print(f"Local output: {local_experiment_dir}")
+    print("-" * 60)
+
+
+def start_resource_monitoring(
+    provider,
+    node_offset: int,
+    experiment_output_dir: str,
+    local_experiment_dir: str,
+    duration: int,
+    is_v2: bool,
+):
+    """
+    Start resource monitoring using remote_monitor.py in timed mode.
+
+    Args:
+        provider: Infrastructure provider
+        node_offset: Node offset
+        experiment_output_dir: Remote output directory for monitoring data
+        local_experiment_dir: Local experiment directory
+        duration: Duration to run monitoring in seconds
+        is_v2: Whether this is V2 (includes Arroyo monitoring)
+    """
+    import yaml
+
+    # Determine keywords for process/container monitoring
+    keywords = ["prometheus"]  # Will match prometheus container or process
+
+    if is_v2:
+        keywords.append("arroyo")  # Will match arroyo worker containers/processes
+
+    # Create minimal config file locally (remote_monitor.py needs this)
+    local_monitor_config_dir = os.path.join(
+        local_experiment_dir, "remote_monitor_config"
+    )
+    os.makedirs(local_monitor_config_dir, exist_ok=True)
+
+    local_config_path = os.path.join(local_monitor_config_dir, "monitor_config.yaml")
+    minimal_config = {"export_cost_and_latency": False}
+
+    # Write config file locally
+    with open(local_config_path, "w") as f:
+        yaml.dump(minimal_config, f)
+
+    # Rsync config to remote
+    remote_monitor_config_dir = os.path.join(
+        experiment_output_dir, "remote_monitor_config"
+    )
+    hostname = f"node{node_offset}.{provider.hostname_suffix}"
+    rsync_cmd = 'rsync -azh -e "ssh {}" {} {}@{}:{}/'.format(
+        constants.SSH_OPTIONS,
+        local_monitor_config_dir,
+        provider.username,
+        hostname,
+        os.path.dirname(remote_monitor_config_dir),
+    )
+
+    import utils
+
+    utils.run_cmd(rsync_cmd, popen=False, ignore_errors=False)
+
+    config_file_path = os.path.join(remote_monitor_config_dir, "monitor_config.yaml")
+
+    # Build remote_monitor.py command
+    cmd = (
+        "python3 -u remote_monitor.py "
+        "--execution_mode timed "
+        "--experiment_mode ingest_path "
+        f'--keywords "{",".join(keywords)}" '
+        f"--config_file {config_file_path} "
+        f"--experiment_output_dir {experiment_output_dir} "
+        "--monitor_output_file monitor_output.json "
+        f"--time_to_run {duration} "
+        f"--node_offset {node_offset} "
+    )
+
+    cmd_dir = os.path.join(provider.get_home_dir(), "code", "Utilities", "experiments")
+
+    cmd += f" > {experiment_output_dir}/remote_monitor.out 2>&1 &"
+
+    print(f"Starting resource monitoring with command: {cmd}")
+
+    provider.execute_command(
+        node_idx=node_offset,
+        cmd=cmd,
+        cmd_dir=cmd_dir,
+        nohup=True,
+        popen=False,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/experiment_run_e2e.py b/Utilities/experiments/experiment_run_e2e.py
new file mode 100644
index 0000000..446014a
--- /dev/null
+++ b/Utilities/experiments/experiment_run_e2e.py
@@ -0,0 +1,659 @@
+import os
+import json
+import time
+
+import hydra
+from omegaconf import DictConfig, OmegaConf
+
+import constants
+import experiment_utils
+from experiment_utils import sync, config
+from experiment_utils.providers.factory import create_provider
+from experiment_utils.services import (
+    KafkaService,
+    FlinkService,
+    QueryEngineServiceFactory,
+    ExporterServiceFactory,
+    PrometheusKafkaAdapterService,
+    ArroyoService,
+    ArroyoThroughputMonitor,
+    PrometheusThroughputMonitor,
+    PrometheusHealthMonitor,
+    DeathstarService,
+    ControllerService,
+    DumbKafkaConsumerService,
+    PrometheusClientService,
+    RemoteMonitorService,
+    AvalancheExporterService,
+    DataExporterFactory,
+    create_prometheus_service,
+    PrometheusService,
+    DockerPrometheusService,
+    DockerVictoriaMetricsService,
+    SystemExportersService,
+)
+
+COMPRESS_JSON = True
+
+CONTROLLER_LOCAL_OUTPUT_DIR = None
+CONTROLLER_REMOTE_OUTPUT_DIR = None
+
+REMOTE_PROCESS_POLLING_INTERVAL = 10
+KAFKA_NUM_TRIES = 5
+
+# Register custom resolver for LOCAL_EXPERIMENT_DIR before Hydra processes config
+OmegaConf.register_new_resolver(
+    "local_experiment_dir", lambda: constants.LOCAL_EXPERIMENT_DIR
+)
+
+# Register custom resolver for remote write IP based on node_offset
+OmegaConf.register_new_resolver(
+    "remote_write_ip", lambda node_offset: f"10.10.1.{node_offset + 1}"
+)
+
+
+@hydra.main(version_base=None, config_path="config", config_name="config")
+def main(cfg: DictConfig):
+    # Validate configuration
+    config.validate_config(cfg)
+    # Validate experiment configuration
+    config.validate_experiment_config(cfg.experiment_params)
+    # Convert config to args-like object for backward compatibility
+    args = config.Args(cfg)
+
+    # Create infrastructure provider
+    provider = create_provider(cfg)
+
+    local_experiment_root_dir = os.path.join(
+        constants.LOCAL_EXPERIMENT_DIR, args.experiment_name
+    )
+    os.makedirs(local_experiment_root_dir, exist_ok=True)
+
+    # dump config to a file
+    with open(os.path.join(local_experiment_root_dir, "hydra_config.yaml"), "w") as f:
+        OmegaConf.save(cfg, f)
+
+    # Also dump args to a file for backward compatibility
+    with open(os.path.join(local_experiment_root_dir, "cmdline_args.txt"), "w") as f:
+        json.dump(vars(args), f)
+
+    experiment_root_output_dir = (
+        f"{constants.CLOUDLAB_HOME_DIR}/experiment_outputs/{args.experiment_name}"
+    )
+
+    global CONTROLLER_REMOTE_OUTPUT_DIR, CONTROLLER_LOCAL_OUTPUT_DIR
+    CONTROLLER_LOCAL_OUTPUT_DIR = os.path.join(
+        local_experiment_root_dir, "controller_output"
+    )
+    CONTROLLER_REMOTE_OUTPUT_DIR = os.path.join(
+        experiment_root_output_dir, "controller_output"
+    )
+
+    provider.execute_command(
+        node_idx=args.get_coordinator_node(),
+        cmd="mkdir -p {} {}".format(
+            os.path.dirname(constants.CLOUDLAB_QUERY_LOG_FILE),
+            experiment_root_output_dir,
+        ),
+        cmd_dir="",
+        nohup=False,
+        popen=False,
+    )
+
+    num_nodes_in_experiment = args.num_nodes
+
+    workloads_config = config.read_workloads_config(cfg.experiment_params)
+    if workloads_config is None:
+        print("-" * 40)
+        print("WARN: No workloads specified in the experiment configuration")
+        print("-" * 40)
+
+    skip_querying = cfg.experiment_params.get("skip_querying", False)
+    if skip_querying:
+        print("-" * 40)
+        print("Skip querying mode ENABLED")
+        print(
+            f"Experiment will run for {cfg.experiment_params.experiment_duration} seconds without queries"
+        )
+        print("-" * 40)
+
+    exporter_config, rejection_reason = experiment_utils.read_exporter_config(
+        cfg.experiment_params
+    )
+    if exporter_config is None:
+        raise ValueError("Invalid exporter config: {}".format(rejection_reason))
+
+    flinksketch_job_id = None
+    flinksketch_popen = None
+    flink_pids = None
+    arroyo_pids = None
+    arroyosketch_pipeline_id = None
+    arroyo_throughput_monitor = None
+    prometheus_throughput_monitor = None
+    prometheus_health_monitor = None
+
+    # Initialize services
+    kafka_service = KafkaService(provider, args.node_offset, num_tries=KAFKA_NUM_TRIES)
+    flink_service = FlinkService(provider, args.node_offset)
+    # Initialize query engine service based on language
+    query_engine_service = QueryEngineServiceFactory.create_query_engine_service(
+        args.query_engine_language,
+        provider,
+        use_container=args.use_container_query_engine,
+        node_offset=args.node_offset,
+    )
+    system_exporters_service = SystemExportersService(
+        provider, args.num_nodes, args.node_offset
+    )
+    prometheus_service = create_prometheus_service(
+        cfg, provider, args.num_nodes, args.node_offset
+    )
+    prometheus_kafka_adapter_service = PrometheusKafkaAdapterService(
+        provider, args.node_offset
+    )
+    arroyo_service = ArroyoService(
+        provider,
+        use_container=args.use_container_arroyo,
+        node_offset=args.node_offset,
+    )
+    deathstar_service = DeathstarService(
+        provider, num_nodes_in_experiment, args.node_offset
+    )
+    controller_service = ControllerService(
+        provider,
+        use_container=args.use_container_controller,
+        node_offset=args.node_offset,
+    )
+    dumb_consumer_service = DumbKafkaConsumerService(provider, args.node_offset)
+    prometheus_client_service = PrometheusClientService(
+        provider,
+        use_container=args.use_container_prometheus_client,
+        node_offset=args.node_offset,
+    )
+    remote_monitor_service = RemoteMonitorService(provider, args.node_offset)
+    avalanche_service = AvalancheExporterService(
+        provider,
+        num_nodes_in_experiment,
+        use_container=False,
+        node_offset=args.node_offset,
+    )
+
+    # Initialize exporter service based on language
+    exporter_service = ExporterServiceFactory.create_exporter_service(
+        args.fake_exporter_language,
+        provider,
+        num_nodes_in_experiment,
+        use_container=args.use_container_fake_exporter,
+        node_offset=args.node_offset,
+    )
+
+    # Initialize cluster data exporter service if configured
+    cluster_data_service = None
+    if exporter_config and "cluster_data_exporter" in exporter_config.get(
+        "exporter_list", {}
+    ):
+        cluster_data_directory = cfg.get(
+            "cluster_data_directory", "/data/cluster_traces"
+        )
+        cluster_data_service = DataExporterFactory.create_data_exporter_service(
+            "cluster_data",
+            provider,
+            node_offset=args.node_offset,
+            data_directory=cluster_data_directory,
+        )
+
+    sync.copy_experiment_config(cfg.experiment_params, local_experiment_root_dir)
+    experiment_modes, metrics_to_remote_write = (
+        config.generate_controller_client_configs(
+            cfg.experiment_params,
+            local_experiment_root_dir,
+            cfg.aggregate_cleanup,
+            cfg.get("sketch_parameters", None),
+        )
+    )
+    sync.rsync_controller_client_configs(
+        provider,
+        experiment_root_output_dir,
+        local_experiment_root_dir,
+        node_offset=args.node_offset,
+    )
+    minimum_experiment_running_time = config.get_minimum_experiment_running_time(
+        cfg.experiment_params
+    )
+
+    for experiment_mode in experiment_modes:
+        print(f"Running experiment mode: {experiment_mode}")
+        experiment_output_dir = os.path.join(
+            experiment_root_output_dir,
+            experiment_mode,
+        )
+        local_experiment_dir = os.path.join(local_experiment_root_dir, experiment_mode)
+        provider.execute_command_parallel(
+            node_idxs=args.get_node_range(include_coordinator=True),
+            cmd=f"mkdir -p {experiment_output_dir}",
+            cmd_dir="",
+            nohup=False,
+            popen=True,
+            wait=True,
+        )
+
+        controller_client_config = os.path.join(
+            experiment_root_output_dir,
+            "controller_client_configs",
+            f"{experiment_mode}.yaml",
+        )
+
+        if (
+            experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME
+            and args.streaming_engine == "flink"
+            and not args.do_local_flink
+        ):
+            flink_service.start()
+
+        if args.do_local_flink:
+            flink_service.stop()
+
+        if (
+            experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME
+            and args.streaming_engine == "arroyo"
+        ):
+            arroyo_service.stop()
+            time.sleep(10)
+            arroyo_service.start(
+                experiment_output_dir=experiment_output_dir,
+                remote_write_base_port=args.remote_write_base_port,
+                parallelism=args.parallelism,
+            )
+
+        prometheus_client_service.stop()
+        remote_monitor_service.stop()
+        flink_service.stop_all_jobs()
+        arroyo_service.stop_all_jobs()
+        if args.do_local_flink:
+            flink_service.stop_all_java_processes()
+        query_engine_service.stop()
+        kafka_service.stop()
+        prometheus_kafka_adapter_service.stop()
+        system_exporters_service.stop()
+        prometheus_service.stop()
+        exporter_service.stop()
+        deathstar_service.stop()
+        prometheus_service.reset()
+
+        # Also stop avalanche exporters if they were started
+        if config.check_exporter_and_queries_exist("avalanche", cfg.experiment_params):
+            avalanche_service.stop()
+
+        # Also stop cluster data exporter if it was started
+        if cluster_data_service and config.check_exporter_and_queries_exist(
+            "cluster_data_exporter", cfg.experiment_params
+        ):
+            cluster_data_service.stop()
+
+        prometheus_config_output_dir = os.path.join(
+            local_experiment_dir, constants.PROMETHEUS_CONFIG_DIR
+        )
+        os.makedirs(prometheus_config_output_dir, exist_ok=True)
+
+        config.generate_and_copy_prometheus_config(
+            num_nodes_in_experiment,
+            local_experiment_dir,
+            prometheus_config_output_dir,
+            experiment_mode,
+            cfg,
+            cfg.prometheus,
+            args.node_offset,
+            constants.SKETCHDB_EXPERIMENT_NAME,
+            provider,
+        )
+        sync.rsync_prometheus_config(
+            provider,
+            experiment_output_dir,
+            prometheus_config_output_dir,
+            node_offset=args.node_offset,
+        )
+        prometheus_scrape_interval = config.get_prometheus_scrape_interval(
+            cfg.prometheus
+        )
+
+        # copy_controller_client_config(args.controller_client_config, local_experiment_dir)
+        if experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME:
+            controller_service.start(
+                controller_input_file=controller_client_config,
+                prometheus_scrape_interval=prometheus_scrape_interval,
+                streaming_engine=args.streaming_engine,
+                controller_remote_output_dir=CONTROLLER_REMOTE_OUTPUT_DIR,
+                punting=args.controller_punting,
+            )
+            sync.rsync_controller_config_remote_to_local(
+                provider,
+                CONTROLLER_REMOTE_OUTPUT_DIR,
+                CONTROLLER_LOCAL_OUTPUT_DIR,
+                node_offset=args.node_offset,
+            )
+            kafka_service.start()
+            kafka_service.wait_until_ready()
+            kafka_service.delete_topics()
+            kafka_service.create_topics()
+
+        if config.check_exporter_and_queries_exist(
+            "fake_exporter", cfg.experiment_params
+        ):
+            # this DOES NOT block
+            exporter_service.start(
+                config=exporter_config["exporter_list"]["fake_exporter"],
+                experiment_output_dir=experiment_output_dir,
+                local_experiment_dir=local_experiment_dir,
+            )
+
+        # Handle avalanche exporter for vertical scalability testing
+        if config.check_exporter_and_queries_exist("avalanche", cfg.experiment_params):
+            avalanche_service.start(
+                config=exporter_config["exporter_list"]["avalanche"],
+                experiment_output_dir=experiment_output_dir,
+                local_experiment_dir=local_experiment_dir,
+            )
+
+        # Handle cluster data exporter for replaying cluster traces
+        if cluster_data_service and config.check_exporter_and_queries_exist(
+            "cluster_data_exporter", cfg.experiment_params
+        ):
+            cluster_data_service.start(
+                config=exporter_config["exporter_list"]["cluster_data_exporter"],
+                experiment_output_dir=experiment_output_dir,
+                local_experiment_dir=local_experiment_dir,
+                num_nodes=num_nodes_in_experiment,
+            )
+
+        if (
+            workloads_config is not None
+            and "deathstar" in workloads_config
+            and workloads_config["deathstar"] is not None
+            and workloads_config["deathstar"]["use"] is True
+        ):
+            deathstar_service.start()
+
+        if experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME:
+            if args.use_kafka_ingest:
+                prometheus_kafka_adapter_service.start(
+                    flink_input_format=args.flink_input_format
+                )
+            if args.streaming_engine == "flink":
+                flinksketch_job_id, flinksketch_popen = flink_service.run_flinksketch(
+                    experiment_output_dir=experiment_output_dir,
+                    flink_input_format=args.flink_input_format,
+                    flink_output_format=args.flink_output_format,
+                    enable_object_reuse=args.enable_object_reuse,
+                    do_local_flink=args.do_local_flink,
+                    controller_remote_output_dir=CONTROLLER_REMOTE_OUTPUT_DIR,
+                    compress_json=COMPRESS_JSON,
+                )
+
+                if args.profile_flink or args.do_local_flink:
+                    while flink_pids is None:
+                        flink_pids = flink_service.get_flink_pids(args.do_local_flink)
+                        print(
+                            "Waiting for Flink pids to be available. Sleeping for 10 seconds"
+                        )
+                        time.sleep(5)
+            elif args.streaming_engine == "arroyo":
+                arroyosketch_pipeline_id = arroyo_service.run_arroyosketch(
+                    experiment_name=args.experiment_name,
+                    experiment_output_dir=experiment_output_dir,
+                    flink_input_format=args.flink_input_format,
+                    flink_output_format=args.flink_output_format,
+                    controller_remote_output_dir=CONTROLLER_REMOTE_OUTPUT_DIR,
+                    remote_write_ip=args.remote_write_ip,
+                    remote_write_base_port=args.remote_write_base_port,
+                    remote_write_path=args.remote_write_path,
+                    parallelism=args.parallelism,
+                    use_kafka_ingest=args.use_kafka_ingest,
+                    enable_optimized_remote_write=cfg.streaming.remote_write.enable_optimized_source,
+                    avoid_long_ssh=constants.AVOID_RUN_ARROYOSKETCH_LONG_SSH,
+                )
+                print("ArroyoSketch pipeline ID: {}".format(arroyosketch_pipeline_id))
+
+                if args.profile_arroyo:
+                    while arroyo_pids is None:
+                        arroyo_pids = arroyo_service.get_arroyo_pids()
+                        print(
+                            "Waiting for Arroyo pids to be available. Sleeping for 5 seconds"
+                        )
+                        time.sleep(5)
+
+                # Start throughput monitoring if enabled
+                if args.throughput_arroyo:
+                    arroyo_throughput_monitor = ArroyoThroughputMonitor(
+                        provider,
+                        node_offset=args.node_offset,
+                    )
+                    arroyo_throughput_monitor.start(
+                        pipeline_id=arroyosketch_pipeline_id,
+                        experiment_output_dir=experiment_output_dir,
+                    )
+            else:
+                raise ValueError(
+                    "Invalid streaming engine: {}. Supported engines are 'flink' and 'arroyo'".format(
+                        args.streaming_engine
+                    )
+                )
+
+            # in case we want to run query engine manually
+            if not cfg.flow.replace_query_engine_with_dumb_consumer:
+                # Get prometheus port from prometheus service
+                prometheus_port = prometheus_service.get_query_endpoint_port()
+                # Get http port from query engine service
+                http_port = query_engine_service.get_http_port()
+
+                query_engine_service.start(
+                    experiment_output_dir=experiment_output_dir,
+                    flink_output_format=args.flink_output_format,
+                    prometheus_scrape_interval=prometheus_scrape_interval,
+                    log_level=args.log_level,
+                    profile_query_engine=args.profile_query_engine,
+                    manual=args.manual_query_engine,
+                    streaming_engine=args.streaming_engine,
+                    forward_unsupported_queries=args.forward_unsupported_queries,
+                    controller_remote_output_dir=CONTROLLER_REMOTE_OUTPUT_DIR,
+                    compress_json=COMPRESS_JSON,
+                    dump_precomputes=args.dump_precomputes,
+                    lock_strategy=args.lock_strategy,
+                    query_language=args.query_language,
+                    prometheus_port=prometheus_port,
+                    http_port=http_port,
+                )
+
+        # Start system exporters (node_exporter, blackbox_exporter, cadvisor)
+        system_exporters_service.start(cfg.experiment_params)
+
+        # Start Prometheus service based on deployment mode
+        monitoring = cfg.experiment_params.monitoring
+
+        if monitoring.deployment_mode == "containerized":
+            # Containerized deployment (DockerPrometheusService or DockerVictoriaMetricsService)
+            assert isinstance(
+                prometheus_service,
+                (DockerPrometheusService, DockerVictoriaMetricsService),
+            ), f"Expected Docker-based service but got {type(prometheus_service).__name__}"
+
+            # Check if resource limits are specified
+            if hasattr(monitoring, "resource_limits"):
+                prometheus_service.start(
+                    experiment_output_dir=experiment_output_dir,
+                    local_experiment_dir=local_experiment_dir,
+                    experiment_mode=experiment_mode,
+                    cpu_limit=monitoring.resource_limits.cpu_limit,
+                    memory_limit=monitoring.resource_limits.memory_limit,
+                )
+            else:
+                # Containerized without resource limits
+                prometheus_service.start(
+                    experiment_output_dir=experiment_output_dir,
+                    local_experiment_dir=local_experiment_dir,
+                    experiment_mode=experiment_mode,
+                )
+        else:  # bare_metal
+            # Bare-metal deployment (PrometheusService)
+            assert isinstance(
+                prometheus_service, PrometheusService
+            ), f"Expected PrometheusService but got {type(prometheus_service).__name__}"
+            prometheus_service.start(experiment_output_dir)
+
+        # Start Prometheus throughput monitoring if enabled
+        if args.throughput_prometheus:
+            prometheus_throughput_monitor = PrometheusThroughputMonitor(
+                provider,
+                node_offset=args.node_offset,
+            )
+            prometheus_throughput_monitor.start(
+                experiment_output_dir=experiment_output_dir
+            )
+
+        # Start Prometheus health check monitoring if enabled
+        if args.health_check_prometheus:
+            prometheus_health_monitor = PrometheusHealthMonitor(
+                provider,
+                node_offset=args.node_offset,
+            )
+            prometheus_health_monitor.start(experiment_output_dir=experiment_output_dir)
+
+        # this DOES NOT block
+        if (
+            workloads_config is not None
+            and "deathstar" in workloads_config
+            and workloads_config["deathstar"] is not None
+            and workloads_config["deathstar"]["use"] is True
+        ):
+            deathstar_service.run_workload(
+                experiment_output_dir=experiment_output_dir,
+                local_experiment_dir=local_experiment_dir,
+                minimum_experiment_running_time=minimum_experiment_running_time,
+                random_params=False,
+            )
+
+        if not skip_querying:
+            time.sleep(args.steady_state_wait)
+        else:
+            print("Skipping steady_state_wait in skip_querying mode")
+
+        if cfg.flow.replace_query_engine_with_dumb_consumer:
+            dumb_consumer_service.start(experiment_output_dir=experiment_output_dir)
+
+        # TODO: rename this function and remote_monitor.py
+        # run_remote_monitor(
+        remote_monitor_service.start(
+            controller_client_config,
+            experiment_output_dir,
+            experiment_mode,
+            args.profile_query_engine,
+            args.profile_prometheus_time,
+            args.profile_flink,
+            flink_pids,
+            args.profile_arroyo,
+            arroyo_pids,
+            args.manual_remote_monitor,
+            args.do_local_flink,
+            args.streaming_engine,
+            query_engine_service,
+            arroyo_service,
+            controller_remote_output_dir=CONTROLLER_REMOTE_OUTPUT_DIR,
+            use_container_prometheus_client=args.use_container_prometheus_client,
+            prometheus_client_parallel=args.prometheus_client_parallel,
+            monitoring_tool=cfg.experiment_params.monitoring.tool,
+            timed_duration=minimum_experiment_running_time if skip_querying else None,
+        )
+
+        if not args.manual_remote_monitor and constants.AVOID_REMOTE_MONITOR_LONG_SSH:
+            # we need to wait here and keep checking if the remote monitor has finished
+            remote_monitor_service.wait_for_remote_monitor_to_finish(
+                minimum_experiment_running_time=minimum_experiment_running_time,
+                polling_interval=REMOTE_PROCESS_POLLING_INTERVAL,
+            )
+
+        if cfg.flow.replace_query_engine_with_dumb_consumer:
+            dumb_consumer_service.stop()
+
+        # Containerized Prometheus service mounts a volume on the remote experiment directory
+        # Bare-metal Prometheus stores data locally, so we need to copy it back
+        if (
+            cfg.experiment_params.monitoring.deployment_mode == "bare_metal"
+            and not cfg.flow.get("skip_copy_prometheus_data", False)
+        ):
+            sync.copy_prometheus_data(provider, local_experiment_dir, args.node_offset)
+
+        # Skip teardown if the no_teardown flag is set
+        if not args.no_teardown:
+            if experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME:
+                query_engine_service.stop()
+                if args.streaming_engine == "flink":
+                    flink_service.stop_flinksketch(
+                        job_id=flinksketch_job_id,
+                        popen=flinksketch_popen,
+                        flink_pids=flink_pids,
+                        do_local_flink=args.do_local_flink,
+                    )
+                elif args.streaming_engine == "arroyo":
+                    # Stop throughput monitoring if it was started
+                    if args.throughput_arroyo:
+                        if arroyo_throughput_monitor is None:
+                            raise RuntimeError(
+                                "Throughput monitoring was enabled but monitor is None"
+                            )
+                        arroyo_throughput_monitor.stop()
+
+                # Stop Prometheus throughput monitoring if it was started
+                if args.throughput_prometheus:
+                    if prometheus_throughput_monitor is None:
+                        raise RuntimeError(
+                            "Prometheus throughput monitoring was enabled but monitor is None"
+                        )
+                    prometheus_throughput_monitor.stop()
+
+                # Stop Prometheus health check monitoring if it was started
+                if args.health_check_prometheus:
+                    if prometheus_health_monitor is None:
+                        raise RuntimeError(
+                            "Prometheus health check monitoring was enabled but monitor is None"
+                        )
+                    prometheus_health_monitor.stop()
+
+                if args.streaming_engine == "arroyo":
+                    assert (
+                        arroyosketch_pipeline_id is not None
+                    ), "ArroyoSketch pipeline ID is None"
+                    arroyo_service.stop_arroyosketch(arroyosketch_pipeline_id)
+                    arroyo_service.stop()
+                if args.use_kafka_ingest:
+                    prometheus_kafka_adapter_service.stop()
+                kafka_service.delete_topics()
+                kafka_service.stop()
+
+            system_exporters_service.stop()
+            prometheus_service.stop()
+            controller_service.stop()  # only does something if controller is containerized
+            exporter_service.stop()
+            deathstar_service.stop()
+            prometheus_service.reset()
+
+            # Also stop avalanche exporters if they were started
+            if config.check_exporter_and_queries_exist(
+                "avalanche", cfg.experiment_params
+            ):
+                avalanche_service.stop()
+
+            # Also stop cluster data exporter if it was started
+            if cluster_data_service and config.check_exporter_and_queries_exist(
+                "cluster_data_exporter", cfg.experiment_params
+            ):
+                cluster_data_service.stop()
+
+        sync.rsync_experiment_data(
+            provider,
+            experiment_output_dir,
+            local_experiment_dir,
+            node_offset=args.node_offset,
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/experiment_run_exporters_and_prometheus.py b/Utilities/experiments/experiment_run_exporters_and_prometheus.py
new file mode 100644
index 0000000..cae8ece
--- /dev/null
+++ b/Utilities/experiments/experiment_run_exporters_and_prometheus.py
@@ -0,0 +1,227 @@
+import os
+import json
+
+import hydra
+from omegaconf import DictConfig, OmegaConf
+
+import constants
+import experiment_utils
+from experiment_utils import sync, config
+from experiment_utils.providers.factory import create_provider
+from experiment_utils.services import (
+    ExporterServiceFactory,
+    SystemExportersService,
+    create_prometheus_service,
+    PrometheusService,
+    DockerPrometheusService,
+    DockerVictoriaMetricsService,
+)
+
+# Register custom resolver for LOCAL_EXPERIMENT_DIR before Hydra processes config
+OmegaConf.register_new_resolver(
+    "local_experiment_dir", lambda: constants.LOCAL_EXPERIMENT_DIR
+)
+
+# Register custom resolver for remote write IP based on node_offset
+OmegaConf.register_new_resolver(
+    "remote_write_ip", lambda node_offset: f"10.10.1.{node_offset + 1}"
+)
+
+
+@hydra.main(version_base=None, config_path="config", config_name="config")
+def main(cfg: DictConfig):
+    # Validate configuration
+    config.validate_config(cfg)
+    # Validate experiment configuration
+    config.validate_experiment_config(cfg.experiment_params)
+    # Convert config to args-like object for backward compatibility
+    args = config.Args(cfg)
+
+    # Create infrastructure provider
+    provider = create_provider(cfg)
+
+    local_experiment_root_dir = os.path.join(
+        constants.LOCAL_EXPERIMENT_DIR, args.experiment_name
+    )
+    os.makedirs(local_experiment_root_dir, exist_ok=True)
+
+    # dump config to a file
+    with open(os.path.join(local_experiment_root_dir, "hydra_config.yaml"), "w") as f:
+        OmegaConf.save(cfg, f)
+
+    # Also dump args to a file for backward compatibility
+    with open(os.path.join(local_experiment_root_dir, "cmdline_args.txt"), "w") as f:
+        json.dump(vars(args), f)
+
+    experiment_root_output_dir = (
+        f"{constants.CLOUDLAB_HOME_DIR}/experiment_outputs/{args.experiment_name}"
+    )
+
+    # Create output directory on coordinator node
+    provider.execute_command(
+        node_idx=args.get_coordinator_node(),
+        cmd=f"mkdir -p {experiment_root_output_dir}",
+        cmd_dir="",
+        nohup=False,
+        popen=False,
+    )
+
+    num_nodes_in_experiment = args.num_nodes
+
+    # Read exporter configuration
+    exporter_config, rejection_reason = experiment_utils.read_exporter_config(
+        cfg.experiment_params
+    )
+    if exporter_config is None:
+        raise ValueError("Invalid exporter config: {}".format(rejection_reason))
+
+    # Initialize services
+    system_exporters_service = SystemExportersService(
+        provider, args.num_nodes, args.node_offset
+    )
+    prometheus_service = create_prometheus_service(
+        cfg, provider, args.num_nodes, args.node_offset
+    )
+
+    # Initialize exporter service based on language
+    exporter_service = ExporterServiceFactory.create_exporter_service(
+        args.fake_exporter_language,
+        provider,
+        num_nodes_in_experiment,
+        use_container=args.use_container_fake_exporter,
+        node_offset=args.node_offset,
+    )
+
+    # Stop any existing services to ensure clean state
+    system_exporters_service.stop()
+    prometheus_service.stop()
+    exporter_service.stop()
+    prometheus_service.reset()
+
+    # Create local and remote experiment directories
+    experiment_output_dir = experiment_root_output_dir
+    local_experiment_dir = local_experiment_root_dir
+
+    provider.execute_command_parallel(
+        node_idxs=args.get_node_range(include_coordinator=True),
+        cmd=f"mkdir -p {experiment_output_dir}",
+        cmd_dir="",
+        nohup=False,
+        popen=True,
+        wait=True,
+    )
+
+    # Generate and copy Prometheus configuration
+    prometheus_config_output_dir = os.path.join(
+        local_experiment_dir, constants.PROMETHEUS_CONFIG_DIR
+    )
+    os.makedirs(prometheus_config_output_dir, exist_ok=True)
+
+    experiment_mode = (
+        constants.BASELINE_EXPERIMENT_NAME
+    )  # This script runs in baseline mode
+    config.generate_and_copy_prometheus_config(
+        num_nodes_in_experiment,
+        local_experiment_dir,
+        prometheus_config_output_dir,
+        experiment_mode,
+        cfg,
+        cfg.prometheus,
+        args.node_offset,
+        constants.SKETCHDB_EXPERIMENT_NAME,
+        provider,
+    )
+    sync.rsync_prometheus_config(
+        provider,
+        experiment_output_dir,
+        prometheus_config_output_dir,
+        node_offset=args.node_offset,
+    )
+
+    # Start fake exporter if configured
+    if config.check_exporter_and_queries_exist("fake_exporter", cfg.experiment_params):
+        print("Starting fake exporter...")
+        exporter_service.start(
+            config=exporter_config["exporter_list"]["fake_exporter"],
+            experiment_output_dir=experiment_output_dir,
+            local_experiment_dir=local_experiment_dir,
+        )
+
+    # Start system exporters (node_exporter, blackbox_exporter, cadvisor)
+    print("Starting system exporters...")
+    system_exporters_service.start(cfg.experiment_params)
+
+    # Start Prometheus service based on deployment mode
+    print("Starting Prometheus...")
+    monitoring = cfg.experiment_params.monitoring
+
+    if monitoring.deployment_mode == "containerized":
+        # Containerized deployment (DockerPrometheusService or DockerVictoriaMetricsService)
+        assert isinstance(
+            prometheus_service, (DockerPrometheusService, DockerVictoriaMetricsService)
+        ), f"Expected Docker-based service but got {type(prometheus_service).__name__}"
+
+        # Check if resource limits are specified
+        if hasattr(monitoring, "resource_limits"):
+            prometheus_service.start(
+                experiment_output_dir=experiment_output_dir,
+                local_experiment_dir=local_experiment_dir,
+                experiment_mode=experiment_mode,
+                cpu_limit=monitoring.resource_limits.cpu_limit,
+                memory_limit=monitoring.resource_limits.memory_limit,
+            )
+        else:
+            # Containerized without resource limits
+            prometheus_service.start(
+                experiment_output_dir=experiment_output_dir,
+                local_experiment_dir=local_experiment_dir,
+                experiment_mode=experiment_mode,
+            )
+    else:  # bare_metal
+        # Bare-metal deployment (PrometheusService)
+        assert isinstance(
+            prometheus_service, PrometheusService
+        ), f"Expected PrometheusService but got {type(prometheus_service).__name__}"
+        prometheus_service.start(experiment_output_dir)
+
+    print("-" * 60)
+    print("Services started successfully!")
+    print(f"Experiment: {args.experiment_name}")
+    print(f"Output directory: {experiment_output_dir}")
+    print("-" * 60)
+
+    # Check no_teardown flag
+    no_teardown = getattr(args, "no_teardown", False)
+
+    if no_teardown:
+        print("No teardown mode: Services will keep running.")
+        print("To stop services manually, use the appropriate stop commands.")
+    else:
+        print("Press Enter to stop services and teardown...")
+        input()
+
+        print("\nStopping services...")
+        system_exporters_service.stop()
+        prometheus_service.stop()
+        exporter_service.stop()
+        prometheus_service.reset()
+
+        # print("Syncing Prometheus data...")
+        # sync.copy_prometheus_data(provider, local_experiment_dir, args.node_offset)
+
+        # print("Syncing experiment data...")
+        # sync.rsync_experiment_data(
+        #     provider,
+        #     experiment_output_dir,
+        #     local_experiment_dir,
+        #     node_offset=args.node_offset,
+        # )
+
+        print("-" * 60)
+        print("Experiment completed successfully!")
+        print(f"Local output: {local_experiment_dir}")
+        print("-" * 60)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/experiment_run_grafana_demo.py b/Utilities/experiments/experiment_run_grafana_demo.py
new file mode 100644
index 0000000..741f632
--- /dev/null
+++ b/Utilities/experiments/experiment_run_grafana_demo.py
@@ -0,0 +1,616 @@
+import os
+import json
+import time
+import sys
+import hydra
+from omegaconf import DictConfig, OmegaConf
+
+import constants
+import experiment_utils
+from experiment_utils import sync, config
+from experiment_utils.providers.factory import create_provider
+from experiment_utils.services import (
+    KafkaService,
+    QueryEngineServiceFactory,
+    ExporterServiceFactory,
+    ArroyoService,
+    ArroyoThroughputMonitor,
+    PrometheusThroughputMonitor,
+    DeathstarService,
+    ControllerService,
+    RemoteMonitorService,
+    AvalancheExporterService,
+    create_prometheus_service,
+    PrometheusService,
+    DockerPrometheusService,
+    DockerVictoriaMetricsService,
+    SystemExportersService,
+    GrafanaService,
+)
+
+COMPRESS_JSON = True
+GRAFANA_ADMIN_PASSWORD = "admin"
+
+CONTROLLER_LOCAL_OUTPUT_DIR = None
+CONTROLLER_REMOTE_OUTPUT_DIR = None
+
+REMOTE_PROCESS_POLLING_INTERVAL = 10
+KAFKA_NUM_TRIES = 5
+
+# Register custom resolver for LOCAL_EXPERIMENT_DIR before Hydra processes config
+OmegaConf.register_new_resolver(
+    "local_experiment_dir", lambda: constants.LOCAL_EXPERIMENT_DIR
+)
+
+# Register custom resolver for remote write IP based on node_offset
+OmegaConf.register_new_resolver(
+    "remote_write_ip", lambda node_offset: f"10.10.1.{node_offset + 1}"
+)
+
+
+@hydra.main(version_base=None, config_path="config", config_name="config")
+def main(cfg: DictConfig):
+    # Validate configuration
+    config.validate_config(cfg)
+    # Validate experiment configuration
+    config.validate_experiment_config(cfg.experiment_params)
+    # Convert config to args-like object for backward compatibility
+    args = config.Args(cfg)
+
+    # Create infrastructure provider
+    provider = create_provider(cfg)
+
+    args.forward_unsupported_queries = True
+    print("Forcing forward_unsupported_queries to True for Grafana demo")
+
+    local_experiment_root_dir = os.path.join(
+        constants.LOCAL_EXPERIMENT_DIR, args.experiment_name
+    )
+    os.makedirs(local_experiment_root_dir, exist_ok=True)
+
+    # dump config to a file
+    with open(os.path.join(local_experiment_root_dir, "hydra_config.yaml"), "w") as f:
+        OmegaConf.save(cfg, f)
+
+    # Also dump args to a file for backward compatibility
+    with open(os.path.join(local_experiment_root_dir, "cmdline_args.txt"), "w") as f:
+        json.dump(vars(args), f)
+
+    experiment_root_output_dir = (
+        f"{constants.CLOUDLAB_HOME_DIR}/experiment_outputs/{args.experiment_name}"
+    )
+
+    global CONTROLLER_REMOTE_OUTPUT_DIR, CONTROLLER_LOCAL_OUTPUT_DIR
+    CONTROLLER_LOCAL_OUTPUT_DIR = os.path.join(
+        local_experiment_root_dir, "controller_output"
+    )
+    CONTROLLER_REMOTE_OUTPUT_DIR = os.path.join(
+        experiment_root_output_dir, "controller_output"
+    )
+
+    provider.execute_command(
+        node_idx=args.get_coordinator_node(),
+        cmd="mkdir -p {} {}".format(
+            os.path.dirname(constants.CLOUDLAB_QUERY_LOG_FILE),
+            experiment_root_output_dir,
+        ),
+        cmd_dir="",
+        nohup=False,
+        popen=False,
+    )
+
+    num_nodes_in_experiment = args.num_nodes
+
+    workloads_config = config.read_workloads_config(cfg.experiment_params)
+    if workloads_config is None:
+        print("-" * 40)
+        print("WARN: No workloads specified in the experiment configuration")
+        print("-" * 40)
+
+    exporter_config, rejection_reason = experiment_utils.read_exporter_config(
+        cfg.experiment_params
+    )
+    if exporter_config is None:
+        raise ValueError("Invalid exporter config: {}".format(rejection_reason))
+
+    arroyo_pids = None
+    arroyosketch_pipeline_id = None
+    arroyo_throughput_monitor = None
+    prometheus_throughput_monitor = None
+
+    # Initialize services
+    kafka_service = KafkaService(provider, args.node_offset, num_tries=KAFKA_NUM_TRIES)
+    # Initialize query engine service based on language
+    query_engine_service = QueryEngineServiceFactory.create_query_engine_service(
+        args.query_engine_language,
+        provider,
+        use_container=args.use_container_query_engine,
+        node_offset=args.node_offset,
+    )
+    system_exporters_service = SystemExportersService(
+        provider, args.num_nodes, args.node_offset
+    )
+    prometheus_service = create_prometheus_service(
+        cfg, provider, args.num_nodes, args.node_offset
+    )
+    arroyo_service = ArroyoService(
+        provider,
+        use_container=args.use_container_arroyo,
+        node_offset=args.node_offset,
+    )
+    deathstar_service = DeathstarService(
+        provider, num_nodes_in_experiment, args.node_offset
+    )
+    controller_service = ControllerService(
+        provider,
+        use_container=args.use_container_controller,
+        node_offset=args.node_offset,
+    )
+    # TODO: QueryLatencyExporter is part of PrometheusClientService. How do we export latencies if we don't use PrometheusClientService?
+    # prometheus_client_service = PrometheusClientService(
+    #     args.cloudlab_username,
+    #     args.hostname_suffix,
+    #     use_container=args.use_container_prometheus_client,
+    # )
+    remote_monitor_service = RemoteMonitorService(provider, args.node_offset)
+    grafana_service = GrafanaService(
+        provider, num_nodes_in_experiment, args.node_offset
+    )
+    avalanche_service = AvalancheExporterService(
+        provider,
+        num_nodes_in_experiment,
+        use_container=False,
+        node_offset=args.node_offset,
+    )
+
+    # Initialize exporter service based on language
+    exporter_service = ExporterServiceFactory.create_exporter_service(
+        args.fake_exporter_language,
+        provider,
+        num_nodes_in_experiment,
+        use_container=args.use_container_fake_exporter,
+        node_offset=args.node_offset,
+    )
+
+    sync.copy_experiment_config(cfg.experiment_params, local_experiment_root_dir)
+    experiment_modes, metrics_to_remote_write = (
+        config.generate_controller_client_configs(
+            cfg.experiment_params,
+            local_experiment_root_dir,
+            cfg.aggregate_cleanup,
+            cfg.get("sketch_parameters", None),
+        )
+    )
+    sync.rsync_controller_client_configs(
+        provider,
+        experiment_root_output_dir,
+        local_experiment_root_dir,
+        node_offset=args.node_offset,
+    )
+    minimum_experiment_running_time = config.get_minimum_experiment_running_time(
+        cfg.experiment_params
+    )
+
+    if cfg.flow.replace_query_engine_with_dumb_consumer:
+        raise NotImplementedError(
+            "Dumb consumer is not supported in Grafana demo experiments"
+        )
+
+    # Fixed to sketchdb mode for Grafana demo
+    experiment_mode = constants.SKETCHDB_EXPERIMENT_NAME
+    print(f"Running fixed experiment mode for Grafana demo: {experiment_mode}")
+    experiment_output_dir = os.path.join(
+        experiment_root_output_dir,
+        experiment_mode,
+    )
+    local_experiment_dir = os.path.join(local_experiment_root_dir, experiment_mode)
+    provider.execute_command_parallel(
+        node_idxs=args.get_node_range(include_coordinator=True),
+        cmd=f"mkdir -p {experiment_output_dir}",
+        cmd_dir="",
+        nohup=False,
+        popen=True,
+        wait=True,
+    )
+
+    controller_client_config = os.path.join(
+        experiment_root_output_dir,
+        "controller_client_configs",
+        f"{experiment_mode}.yaml",
+    )
+
+    if args.streaming_engine == "flink":
+        raise NotImplementedError("Flink is not supported for Grafana demo")
+
+    # if (
+    #     experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME
+    #     and args.streaming_engine == "flink"
+    #     and not args.do_local_flink
+    # ):
+    #     flink_service.start()
+
+    # if args.do_local_flink:
+    #     flink_service.stop()
+
+    if (
+        experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME
+        and args.streaming_engine == "arroyo"
+    ):
+        arroyo_service.stop()
+        time.sleep(10)
+        arroyo_service.start(
+            experiment_output_dir=experiment_output_dir,
+            remote_write_base_port=args.remote_write_base_port,
+            parallelism=args.parallelism,
+        )
+
+    # prometheus_client_service.stop()
+    remote_monitor_service.stop()
+    # flink_service.stop_all_jobs()
+    arroyo_service.stop_all_jobs()
+    # if args.do_local_flink:
+    #     flink_service.stop_all_java_processes()
+    query_engine_service.stop()
+    kafka_service.stop()
+    # prometheus_kafka_adapter_service.stop()
+    system_exporters_service.stop()
+    prometheus_service.stop()
+    exporter_service.stop()
+    deathstar_service.stop()
+    prometheus_service.reset()
+
+    # Also stop avalanche exporters if they were started
+    if config.check_exporter_and_queries_exist("avalanche", cfg.experiment_params):
+        avalanche_service.stop()
+
+    prometheus_config_output_dir = os.path.join(
+        local_experiment_dir, constants.PROMETHEUS_CONFIG_DIR
+    )
+    os.makedirs(prometheus_config_output_dir, exist_ok=True)
+
+    config.generate_and_copy_prometheus_config(
+        num_nodes_in_experiment,
+        local_experiment_dir,
+        prometheus_config_output_dir,
+        experiment_mode,
+        cfg,
+        cfg.prometheus,
+        args.node_offset,
+        constants.SKETCHDB_EXPERIMENT_NAME,
+        provider,
+    )
+    sync.rsync_prometheus_config(
+        provider,
+        experiment_output_dir,
+        prometheus_config_output_dir,
+        node_offset=args.node_offset,
+    )
+    prometheus_scrape_interval = config.get_prometheus_scrape_interval(cfg.prometheus)
+
+    # copy_controller_client_config(args.controller_client_config, local_experiment_dir)
+    if experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME:
+        controller_service.start(
+            controller_input_file=controller_client_config,
+            prometheus_scrape_interval=prometheus_scrape_interval,
+            streaming_engine=args.streaming_engine,
+            controller_remote_output_dir=CONTROLLER_REMOTE_OUTPUT_DIR,
+            punting=args.controller_punting,
+        )
+        sync.rsync_controller_config_remote_to_local(
+            provider,
+            CONTROLLER_REMOTE_OUTPUT_DIR,
+            CONTROLLER_LOCAL_OUTPUT_DIR,
+            node_offset=args.node_offset,
+        )
+        kafka_service.start()
+        kafka_service.wait_until_ready()
+        kafka_service.delete_topics()
+        kafka_service.create_topics()
+
+    if config.check_exporter_and_queries_exist("fake_exporter", cfg.experiment_params):
+        # this DOES NOT block
+        exporter_service.start(
+            config=exporter_config["exporter_list"]["fake_exporter"],
+            experiment_output_dir=experiment_output_dir,
+            local_experiment_dir=local_experiment_dir,
+        )
+
+    # Handle avalanche exporter for vertical scalability testing
+    if config.check_exporter_and_queries_exist("avalanche", cfg.experiment_params):
+        avalanche_service.start(
+            config=exporter_config["exporter_list"]["avalanche"],
+            experiment_output_dir=experiment_output_dir,
+            local_experiment_dir=local_experiment_dir,
+        )
+
+    if (
+        workloads_config is not None
+        and "deathstar" in workloads_config
+        and workloads_config["deathstar"] is not None
+        and workloads_config["deathstar"]["use"] is True
+    ):
+        deathstar_service.start()
+
+    if experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME:
+        if args.use_kafka_ingest:
+            # prometheus_kafka_adapter_service.start(
+            #     flink_input_format=args.flink_input_format
+            # )
+            raise NotImplementedError("Prometheus-Kafka adapter is not supported")
+        # if args.streaming_engine == "flink":
+        #     flinksketch_job_id, flinksketch_popen = flink_service.run_flinksketch(
+        #         experiment_output_dir=experiment_output_dir,
+        #         flink_input_format=args.flink_input_format,
+        #         flink_output_format=args.flink_output_format,
+        #         enable_object_reuse=args.enable_object_reuse,
+        #         do_local_flink=args.do_local_flink,
+        #         controller_remote_output_dir=CONTROLLER_REMOTE_OUTPUT_DIR,
+        #         compress_json=COMPRESS_JSON,
+        #     )
+
+        #     if args.profile_flink or args.do_local_flink:
+        #         while flink_pids is None:
+        #             flink_pids = flink_service.get_flink_pids(args.do_local_flink)
+        #             print(
+        #                 "Waiting for Flink pids to be available. Sleeping for 10 seconds"
+        #             )
+        #             time.sleep(5)
+        # elif args.streaming_engine == "arroyo":
+        if args.streaming_engine == "arroyo":
+            arroyosketch_pipeline_id = arroyo_service.run_arroyosketch(
+                experiment_name=args.experiment_name,
+                experiment_output_dir=experiment_output_dir,
+                flink_input_format=args.flink_input_format,
+                flink_output_format=args.flink_output_format,
+                controller_remote_output_dir=CONTROLLER_REMOTE_OUTPUT_DIR,
+                remote_write_ip=args.remote_write_ip,
+                remote_write_base_port=args.remote_write_base_port,
+                remote_write_path=args.remote_write_path,
+                parallelism=args.parallelism,
+                use_kafka_ingest=args.use_kafka_ingest,
+                enable_optimized_remote_write=cfg.streaming.remote_write.enable_optimized_source,
+                avoid_long_ssh=constants.AVOID_RUN_ARROYOSKETCH_LONG_SSH,
+            )
+            print("ArroyoSketch pipeline ID: {}".format(arroyosketch_pipeline_id))
+
+            if args.profile_arroyo:
+                while arroyo_pids is None:
+                    arroyo_pids = arroyo_service.get_arroyo_pids()
+                    print(
+                        "Waiting for Arroyo pids to be available. Sleeping for 5 seconds"
+                    )
+                    time.sleep(5)
+
+            # Start throughput monitoring if enabled
+            if args.throughput_arroyo:
+                arroyo_throughput_monitor = ArroyoThroughputMonitor(
+                    provider,
+                    node_offset=args.node_offset,
+                )
+                arroyo_throughput_monitor.start(
+                    pipeline_id=arroyosketch_pipeline_id,
+                    experiment_output_dir=experiment_output_dir,
+                )
+        else:
+            raise ValueError(
+                "Invalid streaming engine: {}. Supported engines are 'flink' and 'arroyo'".format(
+                    args.streaming_engine
+                )
+            )
+
+        # Start Prometheus throughput monitoring if enabled
+        if args.throughput_prometheus:
+            prometheus_throughput_monitor = PrometheusThroughputMonitor(
+                provider,
+                node_offset=args.node_offset,
+            )
+            prometheus_throughput_monitor.start(
+                experiment_output_dir=experiment_output_dir
+            )
+        # in case we want to run query engine manually
+        if not cfg.flow.replace_query_engine_with_dumb_consumer:
+            # Get prometheus port from prometheus service
+            prometheus_port = prometheus_service.get_query_endpoint_port()
+
+            query_engine_service.start(
+                experiment_output_dir=experiment_output_dir,
+                flink_output_format=args.flink_output_format,
+                prometheus_scrape_interval=prometheus_scrape_interval,
+                log_level=args.log_level,
+                profile_query_engine=args.profile_query_engine,
+                manual=args.manual_query_engine,
+                streaming_engine=args.streaming_engine,
+                forward_unsupported_queries=args.forward_unsupported_queries,
+                controller_remote_output_dir=CONTROLLER_REMOTE_OUTPUT_DIR,
+                compress_json=COMPRESS_JSON,
+                dump_precomputes=args.dump_precomputes,
+                lock_strategy=args.lock_strategy,
+                query_language=args.query_language,
+                prometheus_port=prometheus_port,
+            )
+
+    # Start system exporters (node_exporter, blackbox_exporter, cadvisor)
+    system_exporters_service.start(cfg.experiment_params)
+
+    # Start Prometheus service based on deployment mode
+    monitoring = cfg.experiment_params.monitoring
+
+    if monitoring.deployment_mode == "containerized":
+        # Containerized deployment (DockerPrometheusService or DockerVictoriaMetricsService)
+        assert isinstance(
+            prometheus_service, (DockerPrometheusService, DockerVictoriaMetricsService)
+        ), f"Expected Docker-based service but got {type(prometheus_service).__name__}"
+
+        # Check if resource limits are specified
+        if hasattr(monitoring, "resource_limits"):
+            prometheus_service.start(
+                experiment_output_dir=experiment_output_dir,
+                local_experiment_dir=local_experiment_dir,
+                experiment_mode=experiment_mode,
+                cpu_limit=monitoring.resource_limits.cpu_limit,
+                memory_limit=monitoring.resource_limits.memory_limit,
+            )
+        else:
+            # Containerized without resource limits
+            prometheus_service.start(
+                experiment_output_dir=experiment_output_dir,
+                local_experiment_dir=local_experiment_dir,
+                experiment_mode=experiment_mode,
+            )
+    else:  # bare_metal
+        # Bare-metal deployment (PrometheusService)
+        assert isinstance(
+            prometheus_service, PrometheusService
+        ), f"Expected PrometheusService but got {type(prometheus_service).__name__}"
+        prometheus_service.start(experiment_output_dir)
+    # this DOES NOT block
+    if (
+        workloads_config is not None
+        and "deathstar" in workloads_config
+        and workloads_config["deathstar"] is not None
+        and workloads_config["deathstar"]["use"] is True
+    ):
+        deathstar_service.run_workload(
+            experiment_output_dir=experiment_output_dir,
+            local_experiment_dir=local_experiment_dir,
+            minimum_experiment_running_time=minimum_experiment_running_time,
+            random_params=False,
+        )
+
+    time.sleep(args.steady_state_wait)
+
+    # Start and configure Grafana
+    print("Starting Grafana service...")
+    grafana_service.start(admin_password=GRAFANA_ADMIN_PASSWORD)
+    grafana_service._wait_for_service_ready()
+
+    print("Configuring Grafana datasources and dashboard...")
+    # Get experiment_type from Hydra overrides (it's not in the final config)
+    from hydra.core.hydra_config import HydraConfig
+
+    hydra_cfg = HydraConfig.get()
+
+    experiment_type = None
+    for override in hydra_cfg.overrides.task:
+        if override.startswith("experiment_type="):
+            experiment_type = override.split("=")[1]
+            break
+
+    if experiment_type is None:
+        raise ValueError(
+            "experiment_type parameter is required but not found in command line overrides"
+        )
+
+    success = grafana_service.configure_dashboard(experiment_type, args.experiment_name)
+    if not success:
+        print("ERROR: Failed to configure Grafana")
+        sys.exit(1)
+
+    print(
+        f"✓ Grafana dashboard available at: {grafana_service.get_dashboard_url(args.experiment_name)}"
+    )
+
+    # if cfg.flow.replace_query_engine_with_dumb_consumer:
+    #     dumb_consumer_service.start(experiment_output_dir=experiment_output_dir)
+
+    # TODO: rename this function and remote_monitor.py
+    # run_remote_monitor(
+    # remote_monitor_service.start(
+    #     controller_client_config,
+    #     experiment_output_dir,
+    #     experiment_mode,
+    #     args.profile_query_engine,
+    #     args.profile_prometheus_time,
+    #     args.profile_flink,
+    #     flink_pids,
+    #     args.profile_arroyo,
+    #     arroyo_pids,
+    #     args.manual_remote_monitor,
+    #     args.do_local_flink,
+    #     args.streaming_engine,
+    #     query_engine_service,
+    #     arroyo_service,
+    #     controller_remote_output_dir=CONTROLLER_REMOTE_OUTPUT_DIR,
+    #     use_container_prometheus_client=args.use_container_prometheus_client,
+    #     prometheus_client_parallel=args.prometheus_client_parallel,
+    #     monitoring_tool=cfg.experiment_params.monitoring.tool,
+    #     timed_duration=None,
+    # )
+
+    # if not args.manual_remote_monitor and constants.AVOID_REMOTE_MONITOR_LONG_SSH:
+    #     # we need to wait here and keep checking if the remote monitor has finished
+    #     remote_monitor_service.wait_for_remote_monitor_to_finish(
+    #         minimum_experiment_running_time=minimum_experiment_running_time,
+    #         polling_interval=REMOTE_PROCESS_POLLING_INTERVAL,
+    #     )
+
+    # if cfg.flow.replace_query_engine_with_dumb_consumer:
+    #     dumb_consumer_service.stop()
+
+    # sync.copy_prometheus_data(
+    #     args.cloudlab_username, args.hostname_suffix, local_experiment_dir, args.node_offset
+    # )
+
+    # # Skip teardown if the no_teardown flag is set
+    # if not args.no_teardown:
+    #     if experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME:
+    #         query_engine_service.stop()
+    #         if args.streaming_engine == "flink":
+    #             flink_service.stop_flinksketch(
+    #                 job_id=flinksketch_job_id,
+    #                 popen=flinksketch_popen,
+    #                 flink_pids=flink_pids,
+    #                 do_local_flink=args.do_local_flink,
+    #             )
+    #         elif args.streaming_engine == "arroyo":
+    #             # Stop throughput monitoring if it was started
+    #             if args.throughput_arroyo:
+    #                 if arroyo_throughput_monitor is None:
+    #                     raise RuntimeError(
+    #                         "Throughput monitoring was enabled but monitor is None"
+    #                     )
+    #                 arroyo_throughput_monitor.stop()
+
+    #         # Stop Prometheus throughput monitoring if it was started
+    #         if args.throughput_prometheus:
+    #             if prometheus_throughput_monitor is None:
+    #                 raise RuntimeError(
+    #                     "Prometheus throughput monitoring was enabled but monitor is None"
+    #                 )
+    #             prometheus_throughput_monitor.stop()
+
+    #         if args.streaming_engine == "arroyo":
+    #             assert (
+    #                 arroyosketch_pipeline_id is not None
+    #             ), "ArroyoSketch pipeline ID is None"
+    #             arroyo_service.stop_arroyosketch(arroyosketch_pipeline_id)
+    #             arroyo_service.stop()
+    #         if args.use_kafka_ingest:
+    #             prometheus_kafka_adapter_service.stop()
+    #         kafka_service.delete_topics()
+    #         kafka_service.stop()
+
+    #     system_exporters_service.stop()
+    #     prometheus_service.stop()
+    #     controller_service.stop()  # only does something if controller is containerized
+    #     exporter_service.stop()
+    #     deathstar_service.stop()
+    #     prometheus_service.reset()
+
+    #     # Also stop avalanche exporters if they were started
+    #     if config.check_exporter_and_queries_exist(
+    #         "avalanche", cfg.experiment_params
+    #     ):
+    #         avalanche_service.stop()
+
+    # sync.rsync_experiment_data(
+    #     args.cloudlab_username,
+    #     args.hostname_suffix,
+    #     experiment_output_dir,
+    #     local_experiment_dir,
+    #     node_offset=args.node_offset,
+    # )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/experiment_run_sketchdboffline.py b/Utilities/experiments/experiment_run_sketchdboffline.py
new file mode 100644
index 0000000..1b04047
--- /dev/null
+++ b/Utilities/experiments/experiment_run_sketchdboffline.py
@@ -0,0 +1,170 @@
+import os
+import json
+import shlex
+import subprocess
+
+import hydra
+from omegaconf import DictConfig, OmegaConf
+
+import constants
+from classes import process_monitor
+
+BINARY_PATH = "/scratch/sketch_db_for_prometheus/code/SketchDBOfflinePOCRust/target/release/SketchDBOfflinePOCRust"
+RESOURCES = ["cpu_percent", "memory_info"]
+
+# Register custom resolver for LOCAL_EXPERIMENT_DIR before Hydra processes config
+OmegaConf.register_new_resolver(
+    "local_experiment_dir", lambda: constants.LOCAL_EXPERIMENT_DIR
+)
+
+# Register custom resolver for remote write IP based on node_offset
+OmegaConf.register_new_resolver(
+    "remote_write_ip", lambda node_offset: f"10.10.1.{node_offset + 1}"
+)
+
+
+def run_sketchdboffline(args, output_dir) -> subprocess.Popen:
+    cmd = (
+        f"{BINARY_PATH}"
+        f" {args.experiment_dir}/prometheus/prometheus_data/exported_data/fake_metric_total.csv"
+        f" --labels {','.join(args.labels)}"
+        f" --ignore __name__"
+        f" --slide-iterations 30"
+        f" --slide-time 10"
+        f" --slide-range 600"
+        f" --groupby {','.join(args.groupby)}"
+        f" --output-dir {output_dir}"
+        f" --aggregation {args.aggregation}"
+    )
+
+    print(cmd)
+
+    # redirect stderr to stdout
+    stderr = subprocess.STDOUT
+    process = subprocess.Popen(
+        shlex.split(cmd), shell=False, stdout=subprocess.PIPE, stderr=stderr
+    )
+    return process
+
+
+def validate_config(cfg: DictConfig):
+    """
+    Validate configuration parameters for sketchdboffline experiment.
+    """
+    # Check for required parameters that must be provided via command line
+    required_params = [
+        (
+            "experiment_variants.sketchdboffline.experiment_dir",
+            "Path to experiment data directory",
+        ),
+        ("experiment_variants.sketchdboffline.groupby", "List of labels to group by"),
+        (
+            "experiment_variants.sketchdboffline.aggregation",
+            "Aggregation function to apply",
+        ),
+    ]
+
+    missing_params = []
+    for param_path, description in required_params:
+        try:
+            value = OmegaConf.select(cfg, param_path)
+            if value is None or (isinstance(value, str) and value == "???"):
+                missing_params.append((param_path, description))
+        except Exception:
+            missing_params.append((param_path, description))
+
+    if missing_params:
+        error_msg = "Required parameters must be provided via command line:\n\n"
+        for param_path, description in missing_params:
+            error_msg += f"  {param_path}: {description}\n"
+
+        error_msg += "\nExample usage:\n"
+        error_msg += "python experiment_run_sketchdboffline.py \\\n"
+        error_msg += (
+            "  experiment_variants.sketchdboffline.experiment_dir=/path/to/data \\\n"
+        )
+        error_msg += (
+            "  experiment_variants.sketchdboffline.groupby=[label_0,instance] \\\n"
+        )
+        error_msg += "  experiment_variants.sketchdboffline.aggregation=avg\n"
+
+        raise ValueError(error_msg)
+
+
+class Args:
+    """Helper class to convert Hydra config to argparse-like namespace"""
+
+    def __init__(self, cfg: DictConfig):
+        # Offline analysis configuration
+        offline_cfg = cfg.experiment_variants.sketchdboffline
+        self.experiment_dir = offline_cfg.experiment_dir
+        self.labels = offline_cfg.labels
+        self.groupby = offline_cfg.groupby
+        self.aggregation = offline_cfg.aggregation
+
+
+def main(args):
+    output_dir = os.path.join(
+        args.experiment_dir,
+        BINARY_PATH.split("/")[-1],
+        "groupby_{}".format(".".join(args.groupby)),
+    )
+
+    os.makedirs(output_dir, exist_ok=True)
+    popen = run_sketchdboffline(args, output_dir)
+
+    monitor, control_pipe, monitor_pipe = process_monitor.start_monitor(
+        [popen.pid],
+        [BINARY_PATH.split("/")[-1]],  # Use the binary name as the process name
+        1,
+        RESOURCES,
+        include_children=True,
+    )
+
+    # wait for the process to finish
+    popen.wait()
+    # dump the output to a file
+    assert popen.stdout is not None, "Process did not return stdout"
+    with open(os.path.join(output_dir, "sketchdboffline_output.txt"), "w") as fout:
+        for line in iter(popen.stdout.readline, b""):
+            fout.write(line.decode("utf-8"))
+    monitor_info = process_monitor.stop_monitor(monitor, control_pipe, monitor_pipe)
+
+    assert monitor_info is not None, "Monitor process did not return data"
+
+    with open(os.path.join(output_dir, "monitor_output.json"), "w") as fout:
+        json.dump(monitor_info, fout)
+
+
+@hydra.main(version_base=None, config_path="config", config_name="config")
+def hydra_main(cfg: DictConfig):
+    # Validate configuration
+    validate_config(cfg)
+
+    # Convert config to args-like object for backward compatibility
+    args = Args(cfg)
+
+    # Create output directory structure
+    output_dir = os.path.join(
+        args.experiment_dir,
+        BINARY_PATH.split("/")[-1],
+        "groupby_{}".format(".".join(args.groupby)),
+    )
+    os.makedirs(output_dir, exist_ok=True)
+
+    # Dump config to a file
+    with open(os.path.join(output_dir, "hydra_config.yaml"), "w") as f:
+        OmegaConf.save(cfg, f)
+
+    # Also dump args to a file for backward compatibility
+    with open(os.path.join(output_dir, "cmdline_args.txt"), "w") as f:
+        json.dump(vars(args), f)
+
+    print(f"Running sketchdboffline experiment with aggregation: {args.aggregation}")
+    print(f"Group by: {args.groupby}")
+    print(f"Output directory: {output_dir}")
+    main(args)
+
+
+if __name__ == "__main__":
+    hydra_main()
diff --git a/Utilities/experiments/experiment_teardown_everything.py b/Utilities/experiments/experiment_teardown_everything.py
new file mode 100644
index 0000000..f9a73ab
--- /dev/null
+++ b/Utilities/experiments/experiment_teardown_everything.py
@@ -0,0 +1,270 @@
+"""
+Nuclear teardown script - stops ALL services and containers regardless of configuration.
+
+This script is useful when:
+- experiment_run_e2e.py was run with no_teardown=True
+- experiment_run_grafana_demo.py was run and left services running
+- You want to clean up everything without knowing the exact experiment configuration
+
+It attempts to stop all possible services, ignoring errors if services aren't running.
+"""
+
+import hydra
+from omegaconf import DictConfig, OmegaConf
+
+import constants
+from experiment_utils import config
+from experiment_utils.providers.factory import create_provider
+from experiment_utils.services import (
+    KafkaService,
+    FlinkService,
+    QueryEngineServiceFactory,
+    ExporterServiceFactory,
+    PrometheusKafkaAdapterService,
+    ArroyoService,
+    DeathstarService,
+    ControllerService,
+    DumbKafkaConsumerService,
+    PrometheusClientService,
+    RemoteMonitorService,
+    AvalancheExporterService,
+    create_prometheus_service,
+    SystemExportersService,
+    GrafanaService,
+)
+
+KAFKA_NUM_TRIES = 5
+
+# Register custom resolver for LOCAL_EXPERIMENT_DIR before Hydra processes config
+OmegaConf.register_new_resolver(
+    "local_experiment_dir", lambda: constants.LOCAL_EXPERIMENT_DIR
+)
+
+# Register custom resolver for remote write IP based on node_offset
+OmegaConf.register_new_resolver(
+    "remote_write_ip", lambda node_offset: f"10.10.1.{node_offset + 1}"
+)
+
+
+@hydra.main(version_base=None, config_path="config", config_name="config")
+def main(cfg: DictConfig):
+    """
+    Nuclear teardown - stops all services regardless of experiment configuration.
+
+    Usage:
+        python experiment_teardown_everything.py experiment_type=<type> experiment_name=<name>
+
+    The experiment_type and experiment_name are only used to initialize the provider.
+    All services will be stopped regardless of what was actually running.
+    """
+    # Validate configuration (minimal validation for provider setup)
+    config.validate_config(cfg)
+    args = config.Args(cfg)
+
+    # Create infrastructure provider
+    provider = create_provider(cfg)
+
+    num_nodes_in_experiment = args.num_nodes
+
+    print(f"Provider: {type(provider).__name__}")
+    print(f"Nodes: {num_nodes_in_experiment}")
+
+    kafka_service = KafkaService(provider, args.node_offset, num_tries=KAFKA_NUM_TRIES)
+    flink_service = FlinkService(provider, args.node_offset)
+
+    # Initialize both query engine languages
+    query_engine_service_rust = QueryEngineServiceFactory.create_query_engine_service(
+        "rust", provider, use_container=True, node_offset=args.node_offset
+    )
+    query_engine_service_python = QueryEngineServiceFactory.create_query_engine_service(
+        "python", provider, use_container=True, node_offset=args.node_offset
+    )
+    query_engine_service_rust_native = (
+        QueryEngineServiceFactory.create_query_engine_service(
+            "rust", provider, use_container=False, node_offset=args.node_offset
+        )
+    )
+    query_engine_service_python_native = (
+        QueryEngineServiceFactory.create_query_engine_service(
+            "python", provider, use_container=False, node_offset=args.node_offset
+        )
+    )
+
+    system_exporters_service = SystemExportersService(
+        provider, num_nodes_in_experiment, args.node_offset
+    )
+    prometheus_service = create_prometheus_service(
+        cfg, provider, num_nodes_in_experiment, args.node_offset
+    )
+    prometheus_kafka_adapter_service = PrometheusKafkaAdapterService(
+        provider, args.node_offset
+    )
+
+    arroyo_service_container = ArroyoService(
+        provider, use_container=True, node_offset=args.node_offset
+    )
+    arroyo_service_native = ArroyoService(
+        provider, use_container=False, node_offset=args.node_offset
+    )
+
+    deathstar_service = DeathstarService(
+        provider, num_nodes_in_experiment, args.node_offset
+    )
+
+    controller_service_container = ControllerService(
+        provider, use_container=True, node_offset=args.node_offset
+    )
+    controller_service_native = ControllerService(
+        provider, use_container=False, node_offset=args.node_offset
+    )
+
+    dumb_consumer_service = DumbKafkaConsumerService(provider, args.node_offset)
+
+    prometheus_client_service_container = PrometheusClientService(
+        provider, use_container=True, node_offset=args.node_offset
+    )
+    prometheus_client_service_native = PrometheusClientService(
+        provider, use_container=False, node_offset=args.node_offset
+    )
+
+    remote_monitor_service = RemoteMonitorService(provider, args.node_offset)
+
+    grafana_service = GrafanaService(
+        provider, num_nodes_in_experiment, args.node_offset
+    )
+
+    avalanche_service = AvalancheExporterService(
+        provider,
+        num_nodes_in_experiment,
+        use_container=False,
+        node_offset=args.node_offset,
+    )
+
+    # Initialize both exporter languages
+    fake_exporter_service_rust = ExporterServiceFactory.create_exporter_service(
+        "rust",
+        provider,
+        num_nodes_in_experiment,
+        use_container=True,
+        node_offset=args.node_offset,
+    )
+    fake_exporter_service_python = ExporterServiceFactory.create_exporter_service(
+        "python",
+        provider,
+        num_nodes_in_experiment,
+        use_container=True,
+        node_offset=args.node_offset,
+    )
+    fake_exporter_service_rust_native = ExporterServiceFactory.create_exporter_service(
+        "rust",
+        provider,
+        num_nodes_in_experiment,
+        use_container=False,
+        node_offset=args.node_offset,
+    )
+    fake_exporter_service_python_native = (
+        ExporterServiceFactory.create_exporter_service(
+            "python",
+            provider,
+            num_nodes_in_experiment,
+            use_container=False,
+            node_offset=args.node_offset,
+        )
+    )
+
+    services_to_stop = [
+        ("Prometheus Client (container)", prometheus_client_service_container),
+        ("Prometheus Client (native)", prometheus_client_service_native),
+        ("Remote Monitor", remote_monitor_service),
+        ("Query Engine Rust (container)", query_engine_service_rust),
+        ("Query Engine Python (container)", query_engine_service_python),
+        ("Query Engine Rust (native)", query_engine_service_rust_native),
+        ("Query Engine Python (native)", query_engine_service_python_native),
+        ("Kafka", kafka_service),
+        ("Prometheus-Kafka Adapter", prometheus_kafka_adapter_service),
+        ("System Exporters", system_exporters_service),
+        ("Prometheus", prometheus_service),
+        ("Fake Exporter Rust (container)", fake_exporter_service_rust),
+        ("Fake Exporter Python (container)", fake_exporter_service_python),
+        ("Fake Exporter Rust (native)", fake_exporter_service_rust_native),
+        ("Fake Exporter Python (native)", fake_exporter_service_python_native),
+        ("Avalanche", avalanche_service),
+        ("Deathstar", deathstar_service),
+        ("Dumb Consumer", dumb_consumer_service),
+        ("Controller (container)", controller_service_container),
+        ("Controller (native)", controller_service_native),
+        ("Grafana", grafana_service),
+    ]
+
+    for service_name, service in services_to_stop:
+        try:
+            print(f"Stopping {service_name}...", end=" ")
+            service.stop()
+        except Exception as e:
+            print(f"Error in stopping {service_name}: {e}")
+
+    # Stop all Flink jobs
+    print("Stopping all Flink jobs")
+    try:
+        flink_service.stop_all_jobs()
+    except Exception as e:
+        print(f"Error in stopping Flink jobs: {e}")
+
+    # Stop all Arroyo jobs (both container and native)
+    print("Stopping all Arroyo jobs (container)")
+    try:
+        arroyo_service_container.stop_all_jobs()
+    except Exception as e:
+        print(f"Error in stopping Arroyo jobs (container): {e}")
+
+    print("Stopping all Arroyo jobs (native)")
+    try:
+        arroyo_service_native.stop_all_jobs()
+    except Exception as e:
+        print(f"Error in stopping Arroyo jobs (native): {e}")
+
+    # Stop all Java processes (for local Flink)
+    print("Stopping all Flink Java processes")
+    try:
+        flink_service.stop_all_java_processes()
+    except Exception as e:
+        print(f"Error in stopping Flink Java processes: {e}")
+
+    # Delete Kafka topics
+    print("Deleting Kafka topics")
+    try:
+        kafka_service.delete_topics()
+    except Exception as e:
+        print(f"Error in deleting Kafka topics: {e}")
+
+    # Stop Arroyo services
+    print("Stopping Arroyo service (container)")
+    try:
+        arroyo_service_container.stop()
+    except Exception as e:
+        print(f"Error in stopping Arroyo service (container): {e}")
+
+    print("Stopping Arroyo service (native)")
+    try:
+        arroyo_service_native.stop()
+    except Exception as e:
+        print(f"Error in stopping Arroyo service (native): {e}")
+
+    # Stop Flink service
+    print("Stopping Flink service")
+    try:
+        flink_service.stop()
+    except Exception as e:
+        print(f"Error in stopping Flink service: {e}")
+
+    # Reset Prometheus
+    print("Resetting Prometheus")
+    try:
+        prometheus_service.reset()
+    except Exception as e:
+        print(f"Error in resetting Prometheus: {e}")
+    print("Teardown complete.")
+
+
+if __name__ == "__main__":
+    main()  # type: ignore
diff --git a/Utilities/experiments/experiment_utils/__init__.py b/Utilities/experiments/experiment_utils/__init__.py
new file mode 100644
index 0000000..0744067
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/__init__.py
@@ -0,0 +1,6 @@
+# Re-export from core module for backward compatibility
+from .core import *  # noqa: F403, F401
+
+# Import new modules for easy access
+from . import sync  # noqa: F401
+from . import config  # noqa: F401
diff --git a/Utilities/experiments/experiment_utils/config.py b/Utilities/experiments/experiment_utils/config.py
new file mode 100644
index 0000000..02d22f4
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/config.py
@@ -0,0 +1,574 @@
+"""
+Configuration validation and management utilities for experiments.
+Contains functions for validating configs, generating controller configs, etc.
+"""
+
+import os
+import copy
+import yaml
+from typing import List, Tuple
+
+from omegaconf import DictConfig, OmegaConf
+
+import constants
+
+
+def validate_basic_config(
+    cfg: DictConfig,
+    required_params: List[Tuple[str, str]],
+    script_name: str = "experiment",
+):
+    """
+    Validate basic configuration parameters that must be provided via command line.
+
+    Args:
+        cfg: The configuration object to validate
+        required_params: List of (param_path, description) tuples for required parameters
+        script_name: Name of the script for error messages
+    """
+    missing_params = []
+    for param_path, description in required_params:
+        try:
+            value = OmegaConf.select(cfg, param_path)
+            if value is None or (isinstance(value, str) and value == "???"):
+                missing_params.append((param_path, description))
+        except Exception:
+            missing_params.append((param_path, description))
+
+    if missing_params:
+        error_msg = "Required parameters must be provided via command line:\n\n"
+        for param_path, description in missing_params:
+            error_msg += f"  {param_path}: {description}\n"
+
+        error_msg += "\nExample usage:\n"
+        error_msg += f"python {script_name}.py \\\n"
+        for param_path, _ in required_params[:4]:  # Show first 4 params as example
+            if "experiment.name" in param_path:
+                error_msg += f"  {param_path}=my_test \\\n"
+            elif "cloudlab.num_nodes" in param_path:
+                error_msg += f"  {param_path}=4 \\\n"
+            elif "cloudlab.username" in param_path:
+                error_msg += f"  {param_path}=myuser \\\n"
+            elif "cloudlab.hostname_suffix" in param_path:
+                error_msg += f"  {param_path}=myexp.cloudlab.us\n"
+
+        raise ValueError(error_msg)
+
+
+def validate_experiment_config(
+    experiment_params: DictConfig, require_queries: bool = True
+):
+    """
+    Validate the loaded experiment configuration structure.
+
+    Args:
+        experiment_params: The experiment parameters configuration
+        require_queries: Whether to require query_groups to be non-empty (default: True)
+    """
+    # Check for skip_querying mode
+    skip_querying = experiment_params.get("skip_querying", False)
+
+    if skip_querying:
+        # Require experiment_duration
+        if not hasattr(experiment_params, "experiment_duration"):
+            raise ValueError(
+                "experiment_duration must be specified when skip_querying=True. "
+                "Add it to your experiment config or as CLI override: experiment_duration=300"
+            )
+
+        # Validate no experiment mode has query_prometheus_too=True
+        if hasattr(experiment_params, "experiment") and experiment_params.experiment:
+            for mode in experiment_params.experiment:
+                if mode.get("query_prometheus_too", False):
+                    raise ValueError(
+                        "query_prometheus_too must be False when skip_querying=True. "
+                        "Cannot query Prometheus when queries are skipped."
+                    )
+
+        # Warn if query_groups is present
+        if (
+            hasattr(experiment_params, "query_groups")
+            and experiment_params.query_groups
+        ):
+            print("-" * 60)
+            print("WARNING: query_groups is present but will be IGNORED")
+            print("         skip_querying=True means no queries will be executed")
+            print("-" * 60)
+
+        # Don't require queries for validation
+        require_queries = False
+
+    # Check for required sections
+    required_sections = ["query_groups", "exporters", "metrics"]
+    missing_sections = []
+
+    for section in required_sections:
+        if section not in experiment_params:
+            missing_sections.append(section)
+
+    if missing_sections:
+        error_msg = f"Missing required sections in experiment config: {', '.join(missing_sections)}\n"
+        error_msg += "Example sections that should be present:\n"
+        error_msg += "- query_groups: List of query configurations\n"
+        error_msg += "- exporters: Exporter configurations\n"
+        error_msg += "- metrics: Metric definitions\n"
+        raise ValueError(error_msg)
+
+    # Validate query_groups structure (conditionally required)
+    if require_queries and len(experiment_params.query_groups) == 0:
+        raise ValueError(
+            "At least one query group must be defined in experiment config"
+        )
+
+    for i, group in enumerate(experiment_params.query_groups):
+        if "queries" not in group:
+            raise ValueError(f"Query group {i} missing 'queries' field")
+        if "client_options" not in group:
+            raise ValueError(f"Query group {i} missing 'client_options' field")
+        if "starting_delay" not in group.client_options:
+            raise ValueError(
+                f"Query group {i} missing 'client_options.starting_delay' field"
+            )
+        if "repetitions" not in group.client_options:
+            raise ValueError(
+                f"Query group {i} missing 'client_options.repetitions' field"
+            )
+        if "repetition_delay" not in group:
+            raise ValueError(f"Query group {i} missing 'repetition_delay' field")
+
+    # Validate exporters structure
+    if "exporter_list" not in experiment_params.exporters:
+        raise ValueError("Missing 'exporter_list' in exporters section")
+
+    # Validate metrics structure
+    if len(experiment_params.metrics) == 0:
+        raise ValueError("At least one metric must be defined in experiment config")
+
+    for i, metric in enumerate(experiment_params.metrics):
+        if "metric" not in metric:
+            raise ValueError(f"Metric {i} missing 'metric' field")
+        if "exporter" not in metric:
+            raise ValueError(f"Metric {i} missing 'exporter' field")
+
+    # Cross-validate fake_exporter num_labels with metric labels
+    if "fake_exporter" in experiment_params.exporters.exporter_list:
+        fake_exporter_config = experiment_params.exporters.exporter_list.fake_exporter
+        num_labels_in_config = fake_exporter_config.get("num_labels", 0)
+
+        # Find metrics that use fake_exporter
+        for i, metric in enumerate(experiment_params.metrics):
+            if metric.exporter == "fake_exporter":
+                if "labels" not in metric:
+                    raise ValueError(
+                        f"Metric {i} ('{metric.metric}') uses fake_exporter but has no 'labels' field"
+                    )
+
+                # Count labels excluding 'instance' and 'job'
+                metric_labels = metric.labels
+                non_system_labels = [
+                    label for label in metric_labels if label not in ["instance", "job"]
+                ]
+                num_labels_in_metric = len(non_system_labels)
+
+                if num_labels_in_metric != num_labels_in_config:
+                    raise ValueError(
+                        f"Metric {i} ('{metric.metric}'): fake_exporter num_labels mismatch. "
+                        f"Exporter config specifies num_labels={num_labels_in_config}, "
+                        f"but metric has {num_labels_in_metric} non-system labels {non_system_labels}. "
+                        f"The num_labels in fake_exporter config should match the count of labels "
+                        f"excluding 'instance' and 'job'."
+                    )
+
+
+def get_minimum_experiment_running_time(experiment_params: DictConfig) -> int:
+    """Calculate minimum experiment running time from query groups or experiment_duration."""
+    # Check for skip_querying mode
+    skip_querying = experiment_params.get("skip_querying", False)
+
+    if skip_querying:
+        # Return experiment_duration directly
+        if not hasattr(experiment_params, "experiment_duration"):
+            raise ValueError(
+                "experiment_duration must be specified when skip_querying=True"
+            )
+        experiment_duration = experiment_params.experiment_duration
+        print("Skip querying mode enabled")
+        print("Experiment duration:", experiment_duration)
+        return experiment_duration
+
+    # Original logic for calculating from query_groups
+    query_groups = experiment_params.query_groups
+    # if len(query_groups) != 1:
+    #    raise ValueError("Only one query group is supported for now")
+
+    experiment_running_time = 0
+    for query_group in query_groups:
+        query_group_starting_delay = query_group.client_options.starting_delay
+        query_group_repetitions = query_group.client_options.repetitions
+        query_group_reptition_delay = query_group.repetition_delay
+
+        query_group_running_time = (
+            query_group_starting_delay
+            + query_group_repetitions * query_group_reptition_delay
+        )
+        experiment_running_time = max(experiment_running_time, query_group_running_time)
+
+    # print("Starting delay:", starting_delay)
+    # print("Repetitions:", repetitions)
+    # print("Repetition delay:", reptition_delay)
+    print("Total experiment running time:", experiment_running_time)
+
+    return experiment_running_time
+
+
+def generate_controller_client_configs(
+    experiment_params: DictConfig,
+    local_experiment_dir: str,
+    aggregate_cleanup: DictConfig = None,
+    sketch_parameters: DictConfig = None,
+) -> Tuple[List[str], List[str]]:
+    """Generate controller client configurations from experiment parameters."""
+    # experiment_params is already loaded by Hydra
+    experiment_config = OmegaConf.to_container(experiment_params, resolve=True)
+    assert experiment_config is not None and isinstance(experiment_config, dict)
+
+    # Add aggregate_cleanup configuration if provided
+    if aggregate_cleanup is not None:
+        cleanup_config = OmegaConf.to_container(aggregate_cleanup, resolve=True)
+        experiment_config["aggregate_cleanup"] = cleanup_config
+
+    # Add sketch_parameters configuration if provided
+    if sketch_parameters is not None:
+        sketch_params_config = OmegaConf.to_container(sketch_parameters, resolve=True)
+        experiment_config["sketch_parameters"] = sketch_params_config
+
+    output_dir = os.path.join(local_experiment_dir, "controller_client_configs")
+    os.makedirs(output_dir, exist_ok=True)
+
+    servers_config = experiment_config["servers"]
+    experiment_modes = experiment_config["experiment"]
+    experiment_to_server_config_map = {}
+
+    for server_config in servers_config:
+        server_name = server_config["name"]
+        experiment_to_server_config_map[server_name] = server_config
+
+    for experiment_mode in experiment_modes:
+        controller_client_config = copy.deepcopy(experiment_config)
+        del controller_client_config["experiment"]
+        if "workloads" in controller_client_config:
+            del controller_client_config["workloads"]
+        controller_client_config["servers"] = [
+            experiment_to_server_config_map[experiment_mode["server"]]
+        ]
+
+        if (
+            experiment_mode["mode"] == constants.SKETCHDB_EXPERIMENT_NAME
+            and "query_prometheus_too" in experiment_mode
+            and experiment_mode["query_prometheus_too"]
+        ):
+            controller_client_config["servers"] = servers_config
+
+        with open(
+            os.path.join(output_dir, "{}.yaml".format(experiment_mode["mode"])), "w"
+        ) as f:
+            yaml.dump(controller_client_config, f)
+
+    metrics_to_remote_write = [
+        metric_config["metric"] for metric_config in experiment_config["metrics"]
+    ]
+
+    return [e["mode"] for e in experiment_modes], metrics_to_remote_write
+
+
+def check_exporter_and_queries_exist(
+    exporter_name: str, experiment_params: DictConfig
+) -> bool:
+    """Check if an exporter is configured and queries exist for it."""
+    if "exporters" not in experiment_params:
+        return False
+    exporters_config = experiment_params.exporters
+    if "exporter_list" not in exporters_config:
+        return False
+
+    if exporter_name not in exporters_config.exporter_list:
+        return False
+
+    if "only_start_if_queries_exist" not in experiment_params.exporters:
+        flag = False
+    else:
+        flag = experiment_params.exporters.only_start_if_queries_exist
+
+    if flag is False:
+        return True
+
+    if "query_groups" not in experiment_params:
+        return False
+
+    if "metrics" not in experiment_params:
+        return False
+
+    metric_exporter_names = [
+        [metric_config.metric, metric_config.exporter]
+        for metric_config in experiment_params.metrics
+    ]
+
+    query_groups = experiment_params.query_groups
+    for group in query_groups:
+        queries = group.queries
+        for q in queries:
+            for metric in metric_exporter_names:
+                if (
+                    metric[0] in q
+                    and metric[0] + "_" not in q
+                    and "_" + metric[0] not in q
+                ) and metric[1] == exporter_name:
+                    return True
+
+    return False
+
+
+def read_workloads_config(experiment_params: DictConfig):
+    """Read and validate workloads configuration."""
+    if "workloads" not in experiment_params:
+        return None
+    workloads_config = experiment_params.workloads
+    if workloads_config is None:
+        return None
+
+    if "deathstar" in workloads_config:
+        if any(key not in workloads_config.deathstar for key in ["use"]):
+            return None
+
+    return workloads_config
+
+
+def get_prometheus_scrape_interval(prometheus_config):
+    """Extract scrape interval from Prometheus configuration."""
+    prometheus_scrape_interval_string = prometheus_config.scrape_interval
+    # convert to seconds
+    if prometheus_scrape_interval_string.endswith("s"):
+        prometheus_scrape_interval = int(prometheus_scrape_interval_string[:-1])
+    elif prometheus_scrape_interval_string.endswith("m"):
+        prometheus_scrape_interval = int(prometheus_scrape_interval_string[:-1]) * 60
+    else:
+        raise ValueError(
+            f"Invalid scrape interval string: {prometheus_scrape_interval_string}"
+        )
+
+    return prometheus_scrape_interval
+
+
+class Args:
+    """Helper class to convert Hydra config to argparse-like namespace for backward compatibility."""
+
+    def __init__(self, cfg: DictConfig):
+        # Experiment configuration
+        self.experiment_name = cfg.experiment.name
+
+        # CloudLab configuration
+        self.num_nodes = cfg.cloudlab.num_nodes
+        self.node_offset = cfg.cloudlab.node_offset
+        self.cloudlab_username = cfg.cloudlab.username
+        self.hostname_suffix = cfg.cloudlab.hostname_suffix
+
+        # Logging and debugging
+        self.log_level = cfg.logging.level
+
+        # Profiling options
+        self.profile_query_engine = cfg.profiling.query_engine
+        self.profile_prometheus_time = cfg.profiling.prometheus_time
+        self.profile_flink = cfg.profiling.flink
+        self.profile_arroyo = cfg.profiling.arroyo
+
+        # Throughput monitoring options
+        self.throughput_arroyo = cfg.throughput.arroyo
+        self.throughput_prometheus = cfg.throughput.prometheus
+
+        # Health check monitoring options
+        self.health_check_prometheus = cfg.health_check.prometheus
+
+        # Manual mode options
+        self.manual_query_engine = cfg.manual.query_engine
+        self.manual_remote_monitor = cfg.manual.remote_monitor
+
+        # Experiment flow options
+        self.no_teardown = cfg.flow.no_teardown
+        self.steady_state_wait = cfg.flow.steady_state_wait
+
+        # Streaming engine configuration
+        self.streaming_engine = cfg.streaming.engine
+        self.parallelism = cfg.streaming.parallelism
+        self.flink_input_format = cfg.streaming.flink_input_format
+        self.flink_output_format = cfg.streaming.flink_output_format
+        self.enable_object_reuse = cfg.streaming.enable_object_reuse
+        self.do_local_flink = cfg.streaming.do_local_flink
+        self.forward_unsupported_queries = cfg.streaming.forward_unsupported_queries
+        self.use_kafka_ingest = cfg.streaming.use_kafka_ingest
+        # Remote write configuration
+        self.remote_write_ip = cfg.streaming.remote_write.ip
+        self.remote_write_base_port = cfg.streaming.remote_write.base_port
+        self.remote_write_path = cfg.streaming.remote_write.path
+
+        # Fake exporter language
+        self.fake_exporter_language = cfg.fake_exporter_language
+
+        # Query engine language
+        self.query_engine_language = cfg.query_engine_language
+
+        # Query language (SQL vs PROMQL) - only used by Rust query engine
+        self.query_language = cfg.query_language
+
+        # Query engine options
+        self.dump_precomputes = cfg.query_engine.dump_precomputes
+        self.lock_strategy = cfg.query_engine.lock_strategy
+
+        # Controller configuration
+        self.controller_punting = cfg.controller.punting
+
+        # Aggregate cleanup configuration
+        # Valid policies: "circular_buffer", "read_based", "no_cleanup"
+        self.cleanup_policy = cfg.aggregate_cleanup.policy
+
+        # Container configuration
+        self.use_container_query_engine = cfg.use_container.query_engine
+        self.use_container_arroyo = cfg.use_container.arroyo
+        self.use_container_controller = cfg.use_container.controller
+        self.use_container_fake_exporter = cfg.use_container.fake_exporter
+        self.use_container_prometheus_client = cfg.use_container.prometheus_client
+
+        # Prometheus client configuration
+        self.prometheus_client_parallel = cfg.prometheus_client.parallel
+
+    def get_node_range(self, include_coordinator: bool = True) -> list:
+        """
+        Get the range of node indices for this experiment.
+
+        Args:
+            include_coordinator: If True, includes node0/coordinator in the range
+
+        Returns:
+            List of node indices starting from node_offset
+
+        Example:
+            With num_nodes=2 and node_offset=10:
+            - get_node_range(True) returns [10, 11, 12] (coordinator + 2 workers)
+            - get_node_range(False) returns [11, 12] (2 workers only)
+        """
+        if include_coordinator:
+            return list(range(self.node_offset, self.node_offset + self.num_nodes + 1))
+        else:
+            return list(
+                range(self.node_offset + 1, self.node_offset + self.num_nodes + 1)
+            )
+
+    def get_coordinator_node(self) -> int:
+        """Get the coordinator node index (first node in the range)."""
+        return self.node_offset
+
+
+def validate_config(cfg: DictConfig, script_name: str = "experiment_run_e2e"):
+    """
+    Validate configuration parameters and experiment configuration.
+
+    Args:
+        cfg: The Hydra configuration object
+        script_name: Name of the script for error messages
+    """
+    # Check for required parameters that must be provided via command line
+    required_params = [
+        ("experiment.name", "Human-readable experiment name"),
+        ("cloudlab.num_nodes", "Number of CloudLab nodes to use"),
+        ("cloudlab.username", "Your CloudLab username"),
+        ("cloudlab.hostname_suffix", "CloudLab experiment hostname suffix"),
+    ]
+
+    # Use the existing validate_basic_config function
+    validate_basic_config(cfg, required_params, script_name)
+
+    # Validate no_teardown with experiment modes (if applicable)
+    if (
+        hasattr(cfg, "flow")
+        and hasattr(cfg.flow, "no_teardown")
+        and cfg.flow.no_teardown
+    ):
+        if (
+            hasattr(cfg, "experiment_params")
+            and hasattr(cfg.experiment_params, "experiment")
+            and len(cfg.experiment_params.experiment) > 1
+        ):
+            raise ValueError(
+                "--no_teardown can only be used with a single experiment mode"
+            )
+
+    # Validate aggregate cleanup policy
+    valid_policies = ["circular_buffer", "read_based", "no_cleanup"]
+    if hasattr(cfg, "aggregate_cleanup") and hasattr(cfg.aggregate_cleanup, "policy"):
+        policy = cfg.aggregate_cleanup.policy
+        if policy not in valid_policies:
+            raise ValueError(
+                f"Invalid aggregate_cleanup.policy: '{policy}'. "
+                f"Valid options: {valid_policies}"
+            )
+
+        # Validate Python query engine only supports no_cleanup
+        if (
+            hasattr(cfg, "query_engine_language")
+            and cfg.query_engine_language == "python"
+            and policy != "no_cleanup"
+        ):
+            raise ValueError(
+                f"aggregate_cleanup.policy='{policy}' is not supported by the Python query engine. "
+                "Either use query_engine_language='rust' or set aggregate_cleanup.policy='no_cleanup'"
+            )
+
+
+def generate_and_copy_prometheus_config(
+    num_nodes_in_experiment,
+    local_experiment_dir,
+    prometheus_config_output_dir,
+    experiment_mode,
+    cfg,
+    prometheus_config,
+    node_offset: int,
+    sketchdb_experiment_name: str,
+    provider=None,
+):
+    """
+    Generate and copy Prometheus configuration for experiment.
+
+    Args:
+        num_nodes_in_experiment: Number of nodes in experiment
+        local_experiment_dir: Local experiment directory
+        prometheus_config_output_dir: Output directory for prometheus config files
+        experiment_mode: Experiment mode
+        cfg: Configuration object
+        prometheus_config: Prometheus configuration
+        sketchdb_experiment_name: SketchDB experiment name
+        provider: Infrastructure provider
+        node_offset: Starting node index offset
+    """
+    # Import here to avoid circular imports
+    import experiment_utils
+
+    # Get IP information from provider
+    if provider is None:
+        raise ValueError("provider parameter is required for IP configuration")
+
+    prometheus_client_ip = provider.get_node_ip(node_offset)
+    # Extract IP prefix from first node (e.g., "10.10.1.1" -> "10.10.1")
+    node_ip_prefix = ".".join(prometheus_client_ip.split(".")[:-1])
+
+    args = experiment_utils.GeneratePrometheusArgs(
+        num_nodes_in_experiment,
+        local_experiment_dir,
+        prometheus_config_output_dir,
+        prometheus_config,
+        prometheus_client_ip,
+        node_ip_prefix,
+        node_offset,
+    )
+
+    experiment_utils.call_generate_prometheus_config(
+        args, cfg, experiment_mode, sketchdb_experiment_name
+    )
diff --git a/Utilities/experiments/experiment_utils/core.py b/Utilities/experiments/experiment_utils/core.py
new file mode 100644
index 0000000..b334958
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/core.py
@@ -0,0 +1,247 @@
+from typing import Dict, Tuple, Optional
+from omegaconf import DictConfig, OmegaConf
+
+
+def read_exporter_config(experiment_params: DictConfig) -> Tuple[Optional[Dict], str]:
+    if "exporters" not in experiment_params:
+        return None, "No exporters section in experiment config"
+    exporters_config = experiment_params.exporters
+    if "exporter_list" not in exporters_config:
+        return None, "No exporter_list section in exporters config"
+    if "only_start_if_queries_exist" not in exporters_config:
+        return None, "No only_start_if_queries_exist section in exporters config"
+
+    if "fake_exporter" in exporters_config.exporter_list:
+        if any(
+            key not in exporters_config.exporter_list.fake_exporter
+            for key in [
+                "num_ports_per_server",
+                "dataset",
+                "synthetic_data_value_scale",
+                "start_port",
+                "num_labels",
+                "num_values_per_label",
+                "metric_type",
+            ]
+        ):
+            return None, "Missing keys in fake_exporter section"
+
+    if "node_exporter" in exporters_config.exporter_list:
+        if any(
+            key not in exporters_config.exporter_list.node_exporter for key in ["port"]
+        ):
+            return None, "Missing keys in node_exporter section"
+
+    if "avalanche" in exporters_config.exporter_list:
+        # Validate avalanche exporter configuration
+        avalanche_config = exporters_config.exporter_list.avalanche
+        required_keys = ["cardinality", "ingestion_rate", "port"]
+        missing_keys = [key for key in required_keys if key not in avalanche_config]
+        if missing_keys:
+            return None, f"Missing keys in avalanche section: {missing_keys}"
+
+    if "cluster_data_exporter" in exporters_config.exporter_list:
+        # Validate cluster_data_exporter configuration
+        cde_config = exporters_config.exporter_list.cluster_data_exporter
+
+        # Check required keys
+        required_keys = ["provider", "port"]
+        missing_keys = [key for key in required_keys if key not in cde_config]
+        if missing_keys:
+            return (
+                None,
+                f"Missing keys in cluster_data_exporter section: {missing_keys}",
+            )
+
+        # Validate provider
+        provider = cde_config.provider
+        if provider not in ["google", "alibaba"]:
+            return (
+                None,
+                f"cluster_data_exporter provider must be 'google' or 'alibaba', got '{provider}'",
+            )
+
+        # Provider-specific validation
+        if provider == "google":
+            # Validate Google-specific configuration
+            if "metrics" not in cde_config:
+                return (
+                    None,
+                    "cluster_data_exporter with Google provider requires 'metrics'",
+                )
+            if "parts_mode" not in cde_config:
+                return (
+                    None,
+                    "cluster_data_exporter with Google provider requires 'parts_mode'",
+                )
+
+            parts_mode = cde_config.parts_mode
+            if parts_mode not in ["all-parts", "part-index"]:
+                return (
+                    None,
+                    f"cluster_data_exporter parts_mode must be 'all-parts' or 'part-index', got '{parts_mode}'",
+                )
+
+            if parts_mode == "part-index" and "part_index" not in cde_config:
+                return (
+                    None,
+                    "cluster_data_exporter with parts_mode='part-index' requires 'part_index'",
+                )
+
+        elif provider == "alibaba":
+            # Validate Alibaba-specific configuration
+            required_alibaba_keys = ["data_type", "data_year", "parts_mode"]
+            missing_alibaba_keys = [
+                key for key in required_alibaba_keys if key not in cde_config
+            ]
+            if missing_alibaba_keys:
+                return (
+                    None,
+                    f"cluster_data_exporter with Alibaba provider missing keys: {missing_alibaba_keys}",
+                )
+
+            data_type = cde_config.data_type
+            if data_type not in ["node", "msresource"]:
+                return (
+                    None,
+                    f"cluster_data_exporter data_type must be 'node' or 'msresource', got '{data_type}'",
+                )
+
+            data_year = cde_config.data_year
+            if data_year not in [2021, 2022]:
+                return (
+                    None,
+                    f"cluster_data_exporter data_year must be 2021 or 2022, got {data_year}",
+                )
+
+            parts_mode = cde_config.parts_mode
+            if parts_mode not in ["all-parts", "part-index"]:
+                return (
+                    None,
+                    f"cluster_data_exporter parts_mode must be 'all-parts' or 'part-index', got '{parts_mode}'",
+                )
+
+            if parts_mode == "part-index" and "part_index" not in cde_config:
+                return (
+                    None,
+                    "cluster_data_exporter with parts_mode='part-index' requires 'part_index'",
+                )
+
+    return exporters_config, ""
+
+
+class GeneratePrometheusArgs:
+    """Arguments class for generate_prometheus_config module."""
+
+    def __init__(
+        self,
+        num_nodes,
+        local_experiment_dir,
+        output_dir,
+        prometheus_config,
+        prometheus_client_ip,
+        node_ip_prefix,
+        node_offset,
+    ):
+        self.num_nodes = num_nodes
+        self.node_offset = node_offset
+        self.output_dir = output_dir
+        # self.copy_to_dir = os.path.join(local_experiment_dir, constants.PROMETHEUS_CONFIG_DIR)
+        self.copy_to_dir = None
+        self.rule_files = None
+        self.remote_write_url = None
+        self.remote_write_metric_names = None
+        self.remote_write_base_port = None
+        self.parallelism = None
+
+        self.query_log_file = getattr(prometheus_config, "query_log_file", None)
+        self.scrape_interval = prometheus_config.scrape_interval
+        self.evaluation_interval = prometheus_config.evaluation_interval
+        self.recording_rules_interval = prometheus_config.recording_rules.interval
+        self.input_file = None
+
+        self.prometheus_client_ip = prometheus_client_ip
+        self.node_ip_prefix = node_ip_prefix
+
+
+def call_generate_prometheus_config(
+    args, cfg, experiment_mode=None, sketchdb_experiment_name=None
+):
+    """
+    Helper function to call generate_prometheus_config with proper setup.
+
+    Args:
+        args: GeneratePrometheusArgs instance
+        cfg: DictConfig containing master configuration
+        experiment_mode: Optional experiment mode for remote write setup
+        sketchdb_experiment_name: Optional name to check for remote write setup
+    """
+    import generate_prometheus_config
+    import generate_victoriametrics_config
+
+    # Set up remote write if this is a SketchDB experiment
+    if experiment_mode == sketchdb_experiment_name and sketchdb_experiment_name:
+        metrics_to_remote_write = get_metrics_to_remote_write(cfg.experiment_params)
+        args.remote_write_metric_names = metrics_to_remote_write
+
+        # Build remote_write_url from experiment params
+        streaming_config = cfg.get("streaming", {})
+        remote_write_config = streaming_config.get("remote_write", {})
+
+        ip = remote_write_config["ip"]
+        base_port = remote_write_config["base_port"]
+        path = remote_write_config["path"]
+        parallelism = streaming_config["parallelism"]
+
+        args.remote_write_url = f"http://{ip}:{base_port}{path}"
+        args.remote_write_base_port = base_port
+        args.parallelism = parallelism
+
+    # Call the function directly instead of subprocess
+    generate_prometheus_config.main(args, OmegaConf.to_container(cfg.experiment_params))
+
+    # Also generate VictoriaMetrics configs (will be used if tool=victoriametrics)
+    generate_victoriametrics_config.main(
+        args, OmegaConf.to_container(cfg.experiment_params)
+    )
+
+
+def get_metrics_to_remote_write(experiment_params: DictConfig):
+    """
+    Get list of metrics that should be written to remote write based on experiment configuration.
+
+    Args:
+        experiment_params: DictConfig containing experiment parameters
+
+    Returns:
+        List of metric names to be written to remote write
+    """
+    if "metrics" not in experiment_params:
+        return []
+
+    if "query_groups" not in experiment_params:
+        return []
+
+    if (
+        "only_start_if_queries_exist" not in experiment_params.exporters
+        or not experiment_params.exporters.only_start_if_queries_exist
+    ):
+        return [metric_config.metric for metric_config in experiment_params.metrics]
+
+    total_queries = []
+    query_groups = experiment_params.query_groups
+    for group in query_groups:
+        queries = group.queries
+        total_queries.extend(queries)
+
+    metrics_to_remote_write = []
+    for metric_config in experiment_params.metrics:
+        find = False
+        for query in total_queries:
+            if metric_config.metric in query:
+                find = True
+                break
+        if find:
+            metrics_to_remote_write.append(metric_config.metric)
+
+    return metrics_to_remote_write
diff --git a/Utilities/experiments/experiment_utils/providers/__init__.py b/Utilities/experiments/experiment_utils/providers/__init__.py
new file mode 100644
index 0000000..ea6d66c
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/providers/__init__.py
@@ -0,0 +1,30 @@
+"""
+Infrastructure provider package for experiment management.
+
+This package contains the infrastructure provider abstraction layer that allows
+experiments to run on different infrastructure types (CloudLab, AWS, Kubernetes, local).
+
+The provider pattern abstracts away the underlying infrastructure details and
+provides a consistent interface for:
+- Command execution on nodes
+- Node addressing and networking
+- Path management
+- Resource management
+
+Usage:
+    from experiment_utils.providers.factory import create_provider
+
+    provider = create_provider(cfg)
+    result = provider.execute_command(0, "ls -la", "/home/user")
+"""
+
+from .base import InfrastructureProvider
+from .cloudlab import CloudLabProvider
+from .factory import create_provider, detect_provider_type
+
+__all__ = [
+    "InfrastructureProvider",
+    "CloudLabProvider",
+    "create_provider",
+    "detect_provider_type",
+]
diff --git a/Utilities/experiments/experiment_utils/providers/base.py b/Utilities/experiments/experiment_utils/providers/base.py
new file mode 100644
index 0000000..50bfb8d
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/providers/base.py
@@ -0,0 +1,141 @@
+"""
+Base infrastructure provider interface for experiment management.
+
+This module defines the abstract interface that all infrastructure providers
+must implement to support running experiments across different environments.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Union, Optional, List
+import subprocess
+
+
+class InfrastructureProvider(ABC):
+    """
+    Abstract base class for infrastructure providers.
+
+    This interface defines the contract that all infrastructure providers
+    (CloudLab, AWS, Kubernetes, Local) must implement.
+    """
+
+    @abstractmethod
+    def execute_command(
+        self,
+        node_idx: int,
+        cmd: str,
+        cmd_dir: Optional[str] = None,
+        nohup: bool = False,
+        popen: bool = False,
+        ignore_errors: bool = False,
+        manual: bool = False,
+    ) -> Union[subprocess.Popen, subprocess.CompletedProcess]:
+        """
+        Execute a command on the specified node.
+
+        Args:
+            node_idx: Index of the node to execute command on
+            cmd: Command to execute
+            cmd_dir: Working directory for command execution
+            nohup: Whether to run command with nohup
+            popen: Whether to return Popen object (True) or wait for completion (False)
+            ignore_errors: Whether to ignore command execution errors
+            manual: Whether to prompt user to manually run command
+
+        Returns:
+            Either a Popen object (if popen=True) or CompletedProcess (if popen=False)
+        """
+        pass
+
+    @abstractmethod
+    def execute_command_parallel(
+        self,
+        node_idxs: List[int],
+        cmd: str,
+        cmd_dir: Optional[str] = None,
+        nohup: bool = False,
+        popen: bool = True,
+        redirect: bool = False,
+        wait: bool = True,
+    ) -> List[subprocess.Popen]:
+        """
+        Execute a command on multiple nodes in parallel.
+
+        Args:
+            node_idxs: List of node indices to execute command on
+            cmd: Command to execute
+            cmd_dir: Working directory for command execution
+            nohup: Whether to run command with nohup
+            popen: Must be True for parallel execution
+            redirect: Whether to redirect output to /dev/null
+            wait: Whether to wait for all commands to complete
+
+        Returns:
+            List of Popen objects for each node
+        """
+        pass
+
+    @abstractmethod
+    def get_node_address(self, node_idx: int) -> str:
+        """
+        Get the network address for the specified node.
+
+        Args:
+            node_idx: Index of the node
+
+        Returns:
+            Network address (hostname, IP, etc.) for the node
+        """
+        pass
+
+    @abstractmethod
+    def get_node_ip(self, node_idx: int) -> str:
+        """
+        Get the internal network IP for the specified node.
+
+        This is used for service-to-service communication within the cluster.
+        Different from get_node_address() which may return hostnames for SSH.
+
+        Args:
+            node_idx: Index of the node
+
+        Returns:
+            Internal IP address for the node
+        """
+        pass
+
+    @abstractmethod
+    def get_home_dir(self) -> str:
+        """
+        Get the home directory path for experiments.
+
+        Returns:
+            Path to the experiment home directory
+        """
+        pass
+
+    @abstractmethod
+    def get_query_log_file(self) -> str:
+        """
+        Get the path to the query log file.
+
+        Returns:
+            Path to the query log file
+        """
+        pass
+
+    def get_provider_type(self) -> str:
+        """
+        Get the provider type identifier.
+
+        Returns:
+            String identifier for the provider type
+        """
+        return self.__class__.__name__.replace("Provider", "").lower()
+
+    def __str__(self) -> str:
+        """String representation of the provider."""
+        return f"{self.__class__.__name__}()"
+
+    def __repr__(self) -> str:
+        """Detailed string representation of the provider."""
+        return f"{self.__class__.__name__}()"
diff --git a/Utilities/experiments/experiment_utils/providers/cloudlab.py b/Utilities/experiments/experiment_utils/providers/cloudlab.py
new file mode 100644
index 0000000..3845c8e
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/providers/cloudlab.py
@@ -0,0 +1,168 @@
+"""
+CloudLab infrastructure provider implementation.
+
+This module provides the CloudLab-specific implementation of the infrastructure
+provider interface, wrapping the existing SSH-based node communication logic.
+"""
+
+from typing import Union, Optional, List
+import subprocess
+
+from .base import InfrastructureProvider
+import utils
+import constants
+
+
+class CloudLabProvider(InfrastructureProvider):
+    """
+    CloudLab infrastructure provider.
+
+    This provider implements the infrastructure interface using SSH connections
+    to CloudLab nodes, maintaining backward compatibility with existing code.
+    """
+
+    def __init__(self, username: str, hostname_suffix: str):
+        """
+        Initialize CloudLab provider.
+
+        Args:
+            username: CloudLab username for SSH connections
+            hostname_suffix: CloudLab hostname suffix for node addressing
+        """
+        self.username = username
+        self.hostname_suffix = hostname_suffix
+
+    def execute_command(
+        self,
+        node_idx: int,
+        cmd: str,
+        cmd_dir: Optional[str] = None,
+        nohup: bool = False,
+        popen: bool = False,
+        ignore_errors: bool = False,
+        manual: bool = False,
+    ) -> Union[subprocess.Popen, subprocess.CompletedProcess, None]:
+        """
+        Execute a command on the specified CloudLab node via SSH.
+
+        Args:
+            node_idx: Index of the CloudLab node (0-based)
+            cmd: Command to execute
+            cmd_dir: Working directory for command execution
+            nohup: Whether to run command with nohup
+            popen: Whether to return Popen object (True) or wait for completion (False)
+            ignore_errors: Whether to ignore command execution errors
+            manual: Whether to prompt user to manually run command
+
+        Returns:
+            Either a Popen object (if popen=True) or CompletedProcess (if popen=False)
+        """
+        return utils.run_on_cloudlab_node(
+            node_idx=node_idx,
+            username=self.username,
+            hostname_suffix=self.hostname_suffix,
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=nohup,
+            popen=popen,
+            ignore_errors=ignore_errors,
+            manual=manual,
+        )
+
+    def execute_command_parallel(
+        self,
+        node_idxs: List[int],
+        cmd: str,
+        cmd_dir: Optional[str] = None,
+        nohup: bool = False,
+        popen: bool = True,
+        redirect: bool = False,
+        wait: bool = True,
+    ) -> List[subprocess.Popen]:
+        """
+        Execute a command on multiple CloudLab nodes in parallel via SSH.
+
+        Args:
+            node_idxs: List of CloudLab node indices to execute command on
+            cmd: Command to execute
+            cmd_dir: Working directory for command execution
+            nohup: Whether to run command with nohup
+            popen: Must be True for parallel execution
+            redirect: Whether to redirect output to /dev/null
+            wait: Whether to wait for all commands to complete
+
+        Returns:
+            List of Popen objects for each node
+        """
+        if wait:
+            utils.run_on_cloudlab_nodes_in_parallel(
+                node_idxs=node_idxs,
+                username=self.username,
+                hostname_suffix=self.hostname_suffix,
+                cmd=cmd,
+                cmd_dir=cmd_dir,
+                nohup=nohup,
+                popen=popen,
+                redirect=redirect,
+            )
+            return []  # Original function doesn't return popen objects when waiting
+        else:
+            return utils.run_on_cloudlab_nodes_in_parallel_without_wait(
+                node_idxs=node_idxs,
+                username=self.username,
+                hostname_suffix=self.hostname_suffix,
+                cmd=cmd,
+                cmd_dir=cmd_dir,
+                nohup=nohup,
+                popen=popen,
+                redirect=redirect,
+            )
+
+    def get_node_address(self, node_idx: int) -> str:
+        """
+        Get the network address for the specified CloudLab node.
+
+        Args:
+            node_idx: Index of the CloudLab node
+
+        Returns:
+            CloudLab hostname in the format node{idx}.{hostname_suffix}
+        """
+        return f"node{node_idx}.{self.hostname_suffix}"
+
+    def get_node_ip(self, node_idx: int) -> str:
+        """
+        Get the internal network IP for the specified CloudLab node.
+
+        CloudLab nodes use the internal network 10.10.1.0/24 for
+        inter-node communication.
+
+        Args:
+            node_idx: Index of the CloudLab node
+
+        Returns:
+            Internal IP in the format 10.10.1.{idx+1}
+        """
+        return f"10.10.1.{node_idx + 1}"
+
+    def get_home_dir(self) -> str:
+        """
+        Get the CloudLab home directory path for experiments.
+
+        Returns:
+            CloudLab home directory path from constants
+        """
+        return constants.CLOUDLAB_HOME_DIR
+
+    def get_query_log_file(self) -> str:
+        """
+        Get the path to the CloudLab query log file.
+
+        Returns:
+            CloudLab query log file path from constants
+        """
+        return constants.CLOUDLAB_QUERY_LOG_FILE
+
+    def __repr__(self) -> str:
+        """Detailed string representation of the CloudLab provider."""
+        return f"CloudLabProvider(username='{self.username}', hostname_suffix='{self.hostname_suffix}')"
diff --git a/Utilities/experiments/experiment_utils/providers/cloudlab_local.py b/Utilities/experiments/experiment_utils/providers/cloudlab_local.py
new file mode 100644
index 0000000..ee799e2
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/providers/cloudlab_local.py
@@ -0,0 +1,200 @@
+"""
+CloudLab Local infrastructure provider implementation.
+
+This module provides a local execution provider for CloudLab nodes that executes
+commands locally (no SSH) but uses CloudLab paths and usernames. This is useful
+for scripts running ON CloudLab nodes that need local execution.
+"""
+
+from typing import Union, Optional, List
+import subprocess
+
+from .base import InfrastructureProvider
+import constants
+
+
+class CloudLabLocalProvider(InfrastructureProvider):
+    """
+    CloudLab Local infrastructure provider.
+
+    This provider implements the infrastructure interface for local execution
+    on CloudLab nodes, using CloudLab paths and usernames but executing commands
+    locally without SSH.
+    """
+
+    def __init__(
+        self,
+        username: str,
+        use_cloudlab_ips: bool,
+        cloudlab_home_dir: Optional[str] = None,
+    ):
+        """
+        Initialize CloudLab Local provider.
+
+        Args:
+            username: CloudLab username (for compatibility)
+            use_cloudlab_ips: If True, return CloudLab network IPs (10.10.1.x).
+                            If False, return localhost (127.0.0.1).
+            cloudlab_home_dir: CloudLab home directory path (defaults to constants.CLOUDLAB_HOME_DIR)
+        """
+        self.username = username
+        self.use_cloudlab_ips = use_cloudlab_ips
+        self.hostname_suffix = (
+            "localhost"  # For compatibility with existing localhost checks
+        )
+        self.cloudlab_home_dir = cloudlab_home_dir or constants.CLOUDLAB_HOME_DIR
+
+    def execute_command(
+        self,
+        node_idx: int,
+        cmd: str,
+        cmd_dir: Optional[str] = None,
+        nohup: bool = False,
+        popen: bool = False,
+        ignore_errors: bool = False,
+        manual: bool = False,
+    ) -> Union[subprocess.Popen, subprocess.CompletedProcess]:
+        """
+        Execute a command locally on the CloudLab node.
+
+        Args:
+            node_idx: Node index (ignored for local execution)
+            cmd: Command to execute
+            cmd_dir: Working directory for command execution
+            nohup: Whether to run command with nohup
+            popen: Whether to return Popen object (True) or wait for completion (False)
+            ignore_errors: Whether to ignore command execution errors
+            manual: Whether to prompt user to manually run command
+
+        Returns:
+            Either a Popen object (if popen=True) or CompletedProcess (if popen=False)
+        """
+        if manual:
+            print(f"Please run manually: {cmd}")
+            if cmd_dir:
+                print(f"In directory: {cmd_dir}")
+            return subprocess.CompletedProcess([], 0, "", "")
+
+        # Build the command
+        if nohup:
+            cmd = f"nohup {cmd}"
+
+        # Execute locally
+        if popen:
+            return subprocess.Popen(
+                cmd,
+                shell=True,
+                cwd=cmd_dir,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+            )
+        else:
+            try:
+                result = subprocess.run(
+                    cmd,
+                    shell=True,
+                    cwd=cmd_dir,
+                    capture_output=True,
+                    text=True,
+                    check=not ignore_errors,
+                )
+                return result
+            except subprocess.CalledProcessError as e:
+                if ignore_errors:
+                    return e
+                raise
+
+    def execute_command_parallel(
+        self,
+        node_idxs: List[int],
+        cmd: str,
+        cmd_dir: Optional[str] = None,
+        nohup: bool = False,
+        popen: bool = True,
+        redirect: bool = False,
+        wait: bool = True,
+    ) -> List[subprocess.Popen]:
+        """
+        Execute a command in parallel locally.
+
+        Note: For CloudLab local provider, this executes the command once
+        since there's only one local node.
+
+        Args:
+            node_idxs: List of node indices (ignored for local execution)
+            cmd: Command to execute
+            cmd_dir: Working directory for command execution
+            nohup: Whether to run command with nohup
+            popen: Must be True for parallel execution
+            redirect: Whether to redirect output to /dev/null
+            wait: Whether to wait for all commands to complete
+
+        Returns:
+            List containing single Popen object
+        """
+        if redirect:
+            cmd += " > /dev/null 2>&1"
+
+        # Execute once locally (use first node_idx if provided, otherwise 0)
+        node_idx = node_idxs[0] if node_idxs else 0
+        process = self.execute_command(
+            node_idx=node_idx,
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=nohup,
+            popen=True,  # Always return Popen for parallel
+        )
+
+        processes = [process]
+
+        if wait:
+            for p in processes:
+                p.wait()
+
+        return processes
+
+    def get_node_address(self, node_idx: int) -> str:
+        """
+        Get the network address for the local node.
+
+        Args:
+            node_idx: Node index (ignored for local execution)
+
+        Returns:
+            Always returns "localhost" for local execution
+        """
+        return "localhost"
+
+    def get_node_ip(self, node_idx: int) -> str:
+        """
+        Get the internal network IP for the local node.
+
+        Args:
+            node_idx: Node index
+
+        Returns:
+            CloudLab network IP (10.10.1.{idx+1}) if use_cloudlab_ips=True,
+            otherwise "127.0.0.1" for localhost
+        """
+        if self.use_cloudlab_ips:
+            return f"10.10.1.{node_idx + 1}"
+        return "127.0.0.1"
+
+    def get_home_dir(self) -> str:
+        """
+        Get the CloudLab home directory path for experiments.
+
+        Returns:
+            Path to the CloudLab experiment home directory
+        """
+        return self.cloudlab_home_dir
+
+    def get_query_log_file(self) -> str:
+        """
+        Get the path to the query log file.
+
+        Returns:
+            Path to the CloudLab query log file
+        """
+        return constants.CLOUDLAB_QUERY_LOG_FILE
diff --git a/Utilities/experiments/experiment_utils/providers/factory.py b/Utilities/experiments/experiment_utils/providers/factory.py
new file mode 100644
index 0000000..ea07d28
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/providers/factory.py
@@ -0,0 +1,92 @@
+"""
+Provider factory for creating appropriate infrastructure providers.
+
+This module contains the factory logic for instantiating the correct
+infrastructure provider based on configuration parameters.
+"""
+
+from omegaconf import DictConfig
+
+from .base import InfrastructureProvider
+from .cloudlab import CloudLabProvider
+
+
+def create_provider(cfg: DictConfig) -> InfrastructureProvider:
+    """
+    Create the appropriate infrastructure provider based on configuration.
+
+    This function analyzes the configuration to determine which infrastructure
+    provider should be used and returns an instance of that provider.
+
+    Args:
+        cfg: Hydra configuration object containing infrastructure settings
+
+    Returns:
+        Configured infrastructure provider instance
+
+    Raises:
+        ValueError: If the configuration doesn't contain required parameters
+                   or specifies an unsupported provider type
+    """
+    # Phase 1: Always return CloudLab provider for backward compatibility
+    # Future phases will add detection logic for other providers
+
+    # Validate that we have the required CloudLab parameters
+    if not hasattr(cfg, "cloudlab"):
+        raise ValueError(
+            "Missing 'cloudlab' configuration section. "
+            "CloudLab provider requires 'cloudlab.username' and 'cloudlab.hostname_suffix'"
+        )
+
+    if not hasattr(cfg.cloudlab, "username") or not cfg.cloudlab.username:
+        raise ValueError("Missing 'cloudlab.username' configuration parameter")
+
+    if not hasattr(cfg.cloudlab, "hostname_suffix") or not cfg.cloudlab.hostname_suffix:
+        raise ValueError("Missing 'cloudlab.hostname_suffix' configuration parameter")
+
+    return CloudLabProvider(
+        username=cfg.cloudlab.username, hostname_suffix=cfg.cloudlab.hostname_suffix
+    )
+
+
+def detect_provider_type(cfg: DictConfig) -> str:
+    """
+    Detect the provider type from configuration.
+
+    This function analyzes the configuration to determine which type of
+    infrastructure provider should be used.
+
+    Args:
+        cfg: Hydra configuration object
+
+    Returns:
+        String identifier for the provider type ('cloudlab', 'aws', 'local', etc.)
+    """
+    # Phase 1: Always detect as CloudLab
+    # Future phases will add logic to detect other provider types
+    # based on configuration parameters like:
+    # - cfg.infrastructure.provider
+    # - presence of aws/kubernetes/local configuration sections
+    # - environment variables
+
+    if (
+        hasattr(cfg, "cloudlab")
+        and cfg.cloudlab.username
+        and cfg.cloudlab.hostname_suffix
+    ):
+        return "cloudlab"
+
+    # For future phases, add detection logic like:
+    # if hasattr(cfg, "infrastructure") and cfg.infrastructure.provider:
+    #     return cfg.infrastructure.provider
+    # if hasattr(cfg, "aws"):
+    #     return "aws"
+    # if hasattr(cfg, "kubernetes"):
+    #     return "kubernetes"
+    # if cfg.get("local", False):
+    #     return "local"
+
+    raise ValueError(
+        "Unable to detect infrastructure provider type from configuration. "
+        "Currently supported: CloudLab (requires cloudlab.username and cloudlab.hostname_suffix)"
+    )
diff --git a/Utilities/experiments/experiment_utils/services/__init__.py b/Utilities/experiments/experiment_utils/services/__init__.py
new file mode 100644
index 0000000..1cc05ba
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/__init__.py
@@ -0,0 +1,160 @@
+"""
+Service management package for experiments.
+
+This package contains service classes for managing various components
+of the experiment infrastructure with consistent start/stop interfaces.
+"""
+
+from .base import BaseService, DockerServiceBase
+from .kafka import KafkaService
+from .flink import FlinkService
+from .query_engine import (
+    QueryEngineService,
+    QueryEngineRustService,
+    QueryEngineServiceFactory,
+)
+from .monitoring import MonitoringService
+from .fake_exporters import (
+    ExporterServiceFactory,
+    PythonExporterService,
+    RustExporterService,
+    AvalancheExporterService,
+)
+from .cluster_data_exporter import (
+    ClusterDataExporterService,
+    DataExporterFactory,
+)
+from .system_exporters import SystemExportersService
+from .prometheus import PrometheusService
+from .prometheus_kafka_adapter import PrometheusKafkaAdapterService
+from .prometheus_client_service import PrometheusClientService
+from .remote_monitor_service import RemoteMonitorService
+from .docker_prometheus import DockerPrometheusService
+from .docker_victoriametrics import DockerVictoriaMetricsService
+from .arroyo import ArroyoService
+from .arroyo_throughput_monitor import ArroyoThroughputMonitor
+from .prometheus_throughput_monitor import PrometheusThroughputMonitor
+from .prometheus_health_monitor import PrometheusHealthMonitor
+from .misc import (
+    DeathstarService,
+    ControllerService,
+    DumbKafkaConsumerService,
+)
+from .grafana import GrafanaService
+
+
+def create_prometheus_service(cfg, provider, num_nodes: int, node_offset: int):
+    """
+    Create appropriate Prometheus service based on configuration.
+
+    Args:
+        cfg: Hydra configuration object
+        provider: Infrastructure provider
+        num_nodes: Number of nodes
+        node_offset: Starting node index offset
+
+    Returns:
+        Appropriate Prometheus/VictoriaMetrics service instance
+
+    Raises:
+        ValueError: If configuration is invalid or missing
+    """
+    # Check for deprecated docker_resources without proper monitoring config
+    if hasattr(cfg.experiment_params, "docker_resources"):
+        if not hasattr(cfg.experiment_params, "monitoring"):
+            raise ValueError(
+                "ERROR: 'docker_resources' found but 'monitoring' section is missing. "
+                "Please update your experiment config to use the new 'monitoring' section. "
+                "See CONFIG_PARAMETERS_REFERENCE.md for details."
+            )
+
+    # Require explicit monitoring configuration
+    if not hasattr(cfg.experiment_params, "monitoring"):
+        raise ValueError(
+            "ERROR: 'monitoring' section is required in experiment_type config. "
+            "Please specify monitoring.tool and monitoring.deployment_mode. "
+            "See CONFIG_PARAMETERS_REFERENCE.md for examples."
+        )
+
+    monitoring = cfg.experiment_params.monitoring
+
+    # Validate required fields
+    if not hasattr(monitoring, "tool"):
+        raise ValueError(
+            "ERROR: monitoring.tool is required (prometheus | victoriametrics)"
+        )
+    if not hasattr(monitoring, "deployment_mode"):
+        raise ValueError(
+            "ERROR: monitoring.deployment_mode is required (bare_metal | containerized)"
+        )
+
+    # Validate deployment_mode value
+    if monitoring.deployment_mode not in ["bare_metal", "containerized"]:
+        raise ValueError(
+            f"ERROR: Invalid monitoring.deployment_mode='{monitoring.deployment_mode}'. "
+            "Must be 'bare_metal' or 'containerized'"
+        )
+
+    # Validate tool value
+    if monitoring.tool not in ["prometheus", "victoriametrics"]:
+        raise ValueError(
+            f"ERROR: Invalid monitoring.tool='{monitoring.tool}'. "
+            "Must be 'prometheus' or 'victoriametrics'"
+        )
+
+    # Validate resource_limits only used with containerized mode
+    if (
+        hasattr(monitoring, "resource_limits")
+        and monitoring.deployment_mode == "bare_metal"
+    ):
+        raise ValueError(
+            "ERROR: monitoring.resource_limits can only be used with deployment_mode: containerized"
+        )
+
+    # Create appropriate service based on deployment mode
+    if monitoring.deployment_mode == "containerized":
+        if monitoring.tool == "prometheus":
+            return DockerPrometheusService(provider, num_nodes, node_offset)
+        elif monitoring.tool == "victoriametrics":
+            return DockerVictoriaMetricsService(provider, num_nodes, node_offset)
+    else:  # bare_metal
+        if monitoring.tool == "victoriametrics":
+            raise ValueError(
+                "ERROR: VictoriaMetrics only supports containerized deployment. "
+                "Use tool: prometheus for bare_metal deployment."
+            )
+        return PrometheusService(provider, num_nodes, node_offset)
+
+
+__all__ = [
+    "BaseService",
+    "DockerServiceBase",
+    "KafkaService",
+    "FlinkService",
+    "QueryEngineService",
+    "QueryEngineRustService",
+    "QueryEngineServiceFactory",
+    "MonitoringService",
+    "ExporterServiceFactory",
+    "PythonExporterService",
+    "RustExporterService",
+    "AvalancheExporterService",
+    "ClusterDataExporterService",
+    "DataExporterFactory",
+    "SystemExportersService",
+    "PrometheusService",
+    "PrometheusKafkaAdapterService",
+    "PrometheusClientService",
+    "RemoteMonitorService",
+    "DockerPrometheusService",
+    "DockerVictoriaMetricsService",
+    "ArroyoService",
+    "ArroyoThroughputMonitor",
+    "PrometheusThroughputMonitor",
+    "PrometheusHealthMonitor",
+    "DeathstarService",
+    "ControllerService",
+    "DumbKafkaConsumerService",
+    "GrafanaService",
+    "create_prometheus_service",
+]
diff --git a/Utilities/experiments/experiment_utils/services/arroyo.py b/Utilities/experiments/experiment_utils/services/arroyo.py
new file mode 100644
index 0000000..5dfba18
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/arroyo.py
@@ -0,0 +1,482 @@
+"""
+Arroyo service management for experiments.
+"""
+
+import os
+import time
+import subprocess
+from typing import List, Optional
+
+import constants
+from .base import BaseService
+from experiment_utils.providers.base import InfrastructureProvider
+
+
+class ArroyoService(BaseService):
+    """Service for managing Arroyo cluster and pipelines."""
+
+    def __init__(
+        self,
+        provider: InfrastructureProvider,
+        use_container: bool,
+        node_offset: int,
+    ):
+        """
+        Initialize Arroyo service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            use_container: Whether to use containerized deployment
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider)
+        self.use_container = use_container
+        self.node_offset = node_offset
+        self.container_name = "sketchdb-arroyo"
+        self.active_pipelines = []
+
+    def start(self, experiment_output_dir: str, **kwargs) -> None:
+        """
+        Start Arroyo cluster.
+
+        Args:
+            experiment_output_dir: Directory for experiment output
+            **kwargs: Additional configuration
+        """
+        if self.use_container:
+            self._start_containerized(experiment_output_dir, **kwargs)
+        else:
+            self._start_bare_metal(experiment_output_dir, **kwargs)
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop Arroyo cluster and all pipelines.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        if self.use_container:
+            self._stop_containerized(**kwargs)
+        else:
+            self._stop_bare_metal(**kwargs)
+
+        self.active_pipelines.clear()
+
+    def stop_all_jobs(self) -> None:
+        """Stop all running Arroyo jobs."""
+        cmd = "python3 delete_pipeline.py --all_pipelines"
+        cmd_dir = os.path.join(self.provider.get_home_dir(), "code", "ArroyoSketch")
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+        self.active_pipelines.clear()
+
+    def get_arroyo_pids(self) -> Optional[List[int]]:
+        """
+        Get PIDs of running Arroyo worker processes.
+
+        Returns:
+            List of PIDs or None if no processes found
+        """
+        if self.use_container:
+            return self._get_arroyo_pids_containerized()
+        else:
+            return self._get_arroyo_pids_bare_metal()
+
+    def run_arroyosketch(
+        self,
+        experiment_name: str,
+        experiment_output_dir: str,
+        flink_input_format: str,
+        flink_output_format: str,
+        controller_remote_output_dir: str,
+        remote_write_ip: str,
+        remote_write_base_port: int,
+        remote_write_path: str,
+        parallelism: int,
+        use_kafka_ingest: bool = False,
+        enable_optimized_remote_write: bool = False,
+        avoid_long_ssh: bool = False,
+    ) -> str:
+        """
+        Run ArroyoSketch pipeline.
+
+        Args:
+            experiment_name: Name of the experiment
+            experiment_output_dir: Directory for experiment output
+            flink_input_format: Input data format
+            flink_output_format: Output data format
+            controller_remote_output_dir: Controller output directory
+            use_kafka_ingest: If True, use Kafka as input source; if False, use Prometheus remote write
+            remote_write_ip: IP address for Prometheus remote write endpoint
+            remote_write_base_port: Base port for Prometheus remote write endpoint
+            remote_write_path: Path for Prometheus remote write endpoint
+            parallelism: Pipeline parallelism
+            enable_optimized_remote_write: If True, use optimized Prometheus remote_write source (10-20x faster)
+            avoid_long_ssh: If True, run command in background to avoid long SSH connections
+
+        Returns:
+            Pipeline ID
+
+        Raises:
+            RuntimeError: If cluster is not running or pipeline creation fails
+        """
+        arroyosketch_output_dir = os.path.join(
+            experiment_output_dir, "arroyosketch_output"
+        )
+
+        if use_kafka_ingest:
+            cmd = "python run_arroyosketch.py --source_type kafka --kafka_input_format {} --output_format {} --pipeline_name {} --config_file_path {}/streaming_config.yaml  --input_kafka_topic {} --output_kafka_topic {} --output_dir {}".format(
+                flink_input_format,
+                flink_output_format,
+                experiment_name,
+                controller_remote_output_dir,
+                constants.FLINK_INPUT_TOPIC,
+                constants.FLINK_OUTPUT_TOPIC,
+                arroyosketch_output_dir,
+            )
+        else:
+            # Build base command for Prometheus remote write
+            cmd = "python run_arroyosketch.py --source_type prometheus_remote_write --prometheus_bind_ip {} --prometheus_base_port {} --prometheus_path {} --parallelism {} --output_format {} --pipeline_name {} --config_file_path {}/streaming_config.yaml --output_kafka_topic {} --output_dir {}".format(
+                remote_write_ip,
+                remote_write_base_port,
+                remote_write_path,
+                parallelism,
+                flink_output_format,
+                experiment_name,
+                controller_remote_output_dir,
+                constants.FLINK_OUTPUT_TOPIC,
+                arroyosketch_output_dir,
+            )
+            # Add optimized source flag if enabled
+            if enable_optimized_remote_write:
+                cmd += " --prometheus_remote_write_source optimized"
+        cmd_dir = os.path.join(constants.CLOUDLAB_HOME_DIR, "code", "ArroyoSketch")
+
+        if avoid_long_ssh:
+            # Run in background to avoid long SSH connection
+            cmd = "mkdir -p {}; {}".format(arroyosketch_output_dir, cmd)
+            cmd += " > {}/arroyosketch.out 2>&1 < /dev/null &".format(
+                arroyosketch_output_dir
+            )
+            self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=cmd_dir,
+                nohup=True,
+                popen=False,
+            )
+            # Poll until process finishes
+            self.wait_for_run_arroyosketch()
+
+            # Read pipeline ID from file
+            pipeline_id = self.read_pipeline_id_from_file(arroyosketch_output_dir)
+        else:
+            # Traditional synchronous execution
+            ret = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=cmd_dir,
+                nohup=False,
+                popen=False,
+            )
+            assert isinstance(ret, subprocess.CompletedProcess)
+
+            # Parse pipeline ID from stdout
+            pipeline_id = None
+            for line in ret.stdout.split("\n"):
+                if "Pipeline created with ID" in line:
+                    pipeline_id = line.strip().split(":")[-1].strip()
+                    break
+
+            if pipeline_id is None:
+                raise RuntimeError(
+                    "Failed to retrieve pipeline ID from Arroyo job submission"
+                )
+
+        self.active_pipelines.append(pipeline_id)
+        return pipeline_id
+
+    def stop_arroyosketch(self, pipeline_id: str) -> None:
+        """
+        Stop ArroyoSketch pipeline by deleting it.
+
+        Args:
+            pipeline_id: ID of the pipeline to stop
+        """
+        cmd = "python3 delete_pipeline.py --pipeline_id {}".format(pipeline_id)
+        cmd_dir = os.path.join(constants.CLOUDLAB_HOME_DIR, "code", "ArroyoSketch")
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=False,
+            popen=False,
+            manual=False,
+        )
+
+        if pipeline_id in self.active_pipelines:
+            self.active_pipelines.remove(pipeline_id)
+
+    def wait_for_run_arroyosketch(self, polling_interval: int = 10) -> None:
+        """
+        Wait for run_arroyosketch.py process to complete.
+
+        Args:
+            polling_interval: Seconds between polling checks
+        """
+        print("Waiting for run_arroyosketch.py to complete...")
+        while True:
+            cmd = "pgrep -f run_arroyosketch.py"
+            ret = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+            assert isinstance(ret, subprocess.CompletedProcess)
+            if ret.stdout.strip() == "":
+                print("run_arroyosketch.py has completed")
+                break
+            print(
+                f"run_arroyosketch.py is still running. Will check again in {polling_interval} seconds."
+            )
+            time.sleep(polling_interval)
+
+    def read_pipeline_id_from_file(self, arroyosketch_output_dir: str) -> str:
+        """
+        Read pipeline ID from file written by run_arroyosketch.py.
+
+        Args:
+            arroyosketch_output_dir: Directory containing pipeline_id.txt
+
+        Returns:
+            Pipeline ID string
+
+        Raises:
+            RuntimeError: If file doesn't exist or is empty
+        """
+        pipeline_id_file = os.path.join(arroyosketch_output_dir, "pipeline_id.txt")
+
+        cmd = f"cat {pipeline_id_file}"
+        ret = self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+        assert isinstance(ret, subprocess.CompletedProcess)
+
+        if ret.returncode != 0 or not ret.stdout.strip():
+            raise RuntimeError(
+                f"Failed to read pipeline ID from {pipeline_id_file}. "
+                f"File may not exist or is empty. Return code: {ret.returncode}"
+            )
+
+        pipeline_id = ret.stdout.strip()
+        print(f"Retrieved pipeline ID from file: {pipeline_id}")
+        return pipeline_id
+
+    def is_healthy(self) -> bool:
+        """
+        Check if Arroyo cluster is healthy.
+
+        Returns:
+            True if cluster is running
+        """
+        if self.use_container:
+            return self._is_healthy_containerized()
+        else:
+            return self._is_healthy_bare_metal()
+
+    def _start_bare_metal(self, experiment_output_dir: str, **kwargs) -> None:
+        """Start Arroyo cluster using bare metal deployment (original implementation)."""
+        arroyo_config_file_path = os.path.join(
+            constants.CLOUDLAB_HOME_DIR, "code", "ArroyoSketch", "config.yaml"
+        )
+        arroyo_bin_path = os.path.join(
+            constants.CLOUDLAB_HOME_DIR, "code", "arroyo", "target", "release", "arroyo"
+        )
+
+        arroyo_output_file = os.path.join(experiment_output_dir, "arroyo_cluster.out")
+        cmd = r"""bash -l -c \"nohup {} --config {} cluster > {} 2>&1 &\" """.format(
+            arroyo_bin_path, arroyo_config_file_path, arroyo_output_file
+        )
+
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            manual=False,
+        )
+
+    def _start_containerized(self, experiment_output_dir: str, **kwargs) -> None:
+        """Start Arroyo cluster using Docker container deployment."""
+        arroyo_config_file_path = os.path.join(
+            constants.CLOUDLAB_HOME_DIR, "code", "ArroyoSketch", "config.yaml"
+        )
+        arroyo_output_file = os.path.join(experiment_output_dir, "arroyo_cluster.out")
+
+        # Stop and remove existing container if it exists
+        self._stop_containerized()
+
+        # Use host networking to avoid port conflicts and access host Kafka service
+        # Docker run command with config mount and host networking
+        cmd = f"docker run --detach --pull never --name {self.container_name} --network host -v {arroyo_config_file_path}:/config.yaml {constants.ARROYO_IMAGE} --config /config.yaml cluster > {arroyo_output_file} 2>&1"
+
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            manual=False,
+        )
+
+    def _stop_bare_metal(self, **kwargs) -> None:
+        """Stop Arroyo cluster using bare metal deployment (original implementation)."""
+        # Stop cluster
+        # TODO: we should make this more robust. Arroyo processes sometimes do not get killed
+        cmd = "pkill -SIGKILL -f 'arroyo.*cluster'"
+
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+        cmd = "pkill -SIGKILL -f 'arroyo.*worker'"
+
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+    def _stop_containerized(self, **kwargs) -> None:
+        """Stop Arroyo cluster using Docker container deployment."""
+        try:
+            # Stop and remove container
+            cmd = f"docker stop {self.container_name}; docker rm {self.container_name}"
+            self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+        except Exception as e:
+            print(f"Error stopping Arroyo container: {e}")
+
+    def _get_arroyo_pids_bare_metal(self) -> Optional[List[int]]:
+        """Get PIDs using bare metal deployment (original implementation)."""
+        keywords = ["arroyo worker"]
+
+        cmd = ";".join(
+            r"ps aux | grep \"{}\" | grep -v grep | awk '{{print \$2}}'".format(keyword)
+            for keyword in keywords
+        )
+
+        result = self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+        assert isinstance(result, subprocess.CompletedProcess)
+        if result.stdout == "":
+            return None
+
+        pids = result.stdout.split("\n")
+        pids = [int(pid) for pid in pids if pid != ""]
+        return pids
+
+    def _get_arroyo_pids_containerized(self) -> Optional[List[int]]:
+        """Get PIDs using Docker container deployment."""
+        try:
+            # Get container PID
+            cmd = f"docker inspect --format='{{{{.State.Pid}}}}' {self.container_name}"
+            result = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+
+            assert isinstance(result, subprocess.CompletedProcess)
+            if result.stdout.strip() and result.stdout.strip() != "0":
+                return [int(result.stdout.strip())]
+            return None
+        except Exception:
+            return None
+
+    def _is_healthy_bare_metal(self) -> bool:
+        """Check if Arroyo cluster is healthy using bare metal deployment."""
+        try:
+            cmd = "pgrep -f 'arroyo.*cluster'"
+            result = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+            assert isinstance(result, subprocess.CompletedProcess)
+            return result.stdout.strip() != ""
+        except Exception:
+            return False
+
+    def _is_healthy_containerized(self) -> bool:
+        """Check if Arroyo cluster is healthy using Docker container deployment."""
+        try:
+            # Check if container is running
+            cmd = f"docker inspect -f '{{{{.State.Running}}}}' {self.container_name}"
+            result = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+            assert isinstance(result, subprocess.CompletedProcess)
+            return result.stdout.strip() == "true"
+        except Exception:
+            return False
+
+    def get_monitoring_keyword(self) -> str:
+        """
+        Get the keyword to use for process monitoring.
+
+        Returns:
+            Container name if using containers, otherwise process name
+        """
+        if self.use_container:
+            return self.container_name
+        else:
+            return "arroyo.*worker"
diff --git a/Utilities/experiments/experiment_utils/services/arroyo_throughput_monitor.py b/Utilities/experiments/experiment_utils/services/arroyo_throughput_monitor.py
new file mode 100644
index 0000000..2809cd5
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/arroyo_throughput_monitor.py
@@ -0,0 +1,114 @@
+"""
+Arroyo throughput monitoring service for experiments.
+"""
+
+import os
+from typing import Optional
+
+import constants
+from .base import BaseService
+from experiment_utils.providers.base import InfrastructureProvider
+
+
+class ArroyoThroughputMonitor(BaseService):
+    """Service for monitoring Arroyo pipeline throughput metrics."""
+
+    def __init__(self, provider: InfrastructureProvider, node_offset: int):
+        """
+        Initialize Arroyo throughput monitor.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider)
+        self.node_offset = node_offset
+        self.pipeline_id: Optional[str] = None
+        self.output_dir: Optional[str] = None
+        self.monitor_script_path = None
+
+    def start(self, pipeline_id: str, experiment_output_dir: str, **kwargs) -> None:
+        """
+        Start throughput monitoring for a specific pipeline.
+
+        Args:
+            pipeline_id: ID of the Arroyo pipeline to monitor
+            experiment_output_dir: Directory for experiment output
+            **kwargs: Additional configuration
+        """
+        self.pipeline_id = pipeline_id
+        self.output_dir = os.path.join(experiment_output_dir, "arroyo_throughput")
+
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=f"mkdir -p {self.output_dir}",
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+        )
+
+        # Path to the existing monitoring script on the remote host
+        self.monitor_script_path = os.path.join(
+            self.provider.get_home_dir(),
+            "code",
+            "Utilities",
+            "experiments",
+            "arroyo_throughput_monitor.py",
+        )
+
+        # Start the monitoring script on the remote host
+        cmd = f"python3 {self.monitor_script_path} --pipeline_id {pipeline_id} --output_dir {self.output_dir} --interval {constants.ARROYO_THROUGHPUT_POLLING_INTERVAL}"
+
+        # Run in background with nohup
+        cmd += " > {}/arroyo_throughput_monitor.out 2>&1 &".format(self.output_dir)
+
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=True,
+            popen=False,
+        )
+
+        print(f"Started Arroyo throughput monitoring for pipeline {pipeline_id}")
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop throughput monitoring.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        # Kill the monitoring script on the remote host
+        cmd = f"pkill -f 'arroyo_throughput_monitor.py.*{self.pipeline_id}'"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+        print(f"Stopped Arroyo throughput monitoring for pipeline {self.pipeline_id}")
+
+    def is_healthy(self) -> bool:
+        """
+        Check if the throughput monitor is healthy.
+
+        Returns:
+            True if monitoring process is running on remote host
+        """
+        try:
+            cmd = f"pgrep -f 'arroyo_throughput_monitor.py.*{self.pipeline_id}'"
+            result = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+            return result.stdout.strip() != ""
+        except Exception:
+            return False
diff --git a/Utilities/experiments/experiment_utils/services/base.py b/Utilities/experiments/experiment_utils/services/base.py
new file mode 100644
index 0000000..ca594bc
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/base.py
@@ -0,0 +1,233 @@
+"""
+Base service class for experiment infrastructure management.
+"""
+
+import subprocess
+import time
+from abc import ABC, abstractmethod
+from typing import Optional
+
+from experiment_utils.providers.base import InfrastructureProvider
+
+
+class BaseService(ABC):
+    """Abstract base class for all services with common interface."""
+
+    def __init__(self, provider: InfrastructureProvider):
+        """
+        Initialize base service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+        """
+        self.provider = provider
+
+        # Maintain backward compatibility properties
+        if hasattr(provider, "username"):
+            self.username = provider.username
+        if hasattr(provider, "hostname_suffix"):
+            self.hostname_suffix = provider.hostname_suffix
+
+    @abstractmethod
+    def start(self, **kwargs) -> None:
+        """
+        Start the service. Must be implemented by subclasses.
+
+        Args:
+            **kwargs: Service-specific configuration parameters
+        """
+        pass
+
+    @abstractmethod
+    def stop(self, **kwargs) -> None:
+        """
+        Stop the service. Must be implemented by subclasses.
+
+        Args:
+            **kwargs: Service-specific configuration parameters
+        """
+        pass
+
+    def is_healthy(self) -> bool:
+        """
+        Check if service is healthy. Can be overridden by subclasses.
+
+        Returns:
+            True if service is running and healthy, False otherwise
+        """
+        return True
+
+    def restart(self, **kwargs) -> None:
+        """
+        Restart the service. Default implementation stops then starts.
+
+        Args:
+            **kwargs: Service-specific configuration parameters
+        """
+        self.stop(**kwargs)
+        self.start(**kwargs)
+
+    def __str__(self) -> str:
+        """String representation of the service."""
+        return f"{self.__class__.__name__}()"
+
+    def __repr__(self) -> str:
+        """Detailed string representation of the service."""
+        return f"{self.__class__.__name__}(provider={self.provider!r})"
+
+
+class DockerServiceBase(BaseService):
+    """Abstract base class for Docker-based services with common container management."""
+
+    def __init__(
+        self, provider: InfrastructureProvider, num_nodes: int, node_offset: int
+    ):
+        """
+        Initialize Docker service base.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            num_nodes: Number of nodes to manage
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider)
+        self.num_nodes = num_nodes
+        self.node_offset = node_offset
+
+    @abstractmethod
+    def get_container_name(self) -> str:
+        """Get the Docker container name. Must be implemented by subclasses."""
+        pass
+
+    @abstractmethod
+    def get_service_url(self) -> str:
+        """Get the service URL for health checks. Must be implemented by subclasses."""
+        pass
+
+    @abstractmethod
+    def get_health_endpoint(self) -> str:
+        """Get the health check endpoint path. Must be implemented by subclasses."""
+        pass
+
+    def get_container_stats(self) -> Optional[dict]:
+        """
+        Get Docker container resource usage statistics.
+
+        Returns:
+            Dictionary with CPU and memory usage stats, or None if unavailable
+        """
+        container_name = self.get_container_name()
+        try:
+            cmd = f"docker stats {container_name} --no-stream --format 'table {{{{.CPUPerc}}}},{{{{.MemUsage}}}},{{{{.MemPerc}}}}'"
+            result = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+            assert isinstance(result, subprocess.CompletedProcess)
+            if result.returncode == 0 and result.stdout.strip():
+                # Parse the output
+                lines = result.stdout.strip().split("\n")
+                if len(lines) > 1:  # Skip header line
+                    stats_line = lines[1]
+                    cpu_perc, mem_usage, mem_perc = stats_line.split(",")
+                    return {
+                        "cpu_percent": cpu_perc.strip(),
+                        "memory_usage": mem_usage.strip(),
+                        "memory_percent": mem_perc.strip(),
+                    }
+        except Exception:
+            pass
+        return None
+
+    def reset(self) -> None:
+        """Reset service data by removing Docker volumes and data."""
+        # Stop container first
+        self.stop()
+
+        # Remove any lingering data directories
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd="docker volume prune -f",
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+    def is_healthy(self) -> bool:
+        """
+        Check if Docker container is healthy.
+
+        Returns:
+            True if container is running and healthy
+        """
+        container_name = self.get_container_name()
+        try:
+            # Check if container is running
+            cmd = f"docker ps --filter name={container_name} --format '{{{{.Status}}}}'"
+            result = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+            assert isinstance(result, subprocess.CompletedProcess)
+            return result.returncode == 0 and "Up" in result.stdout
+        except Exception:
+            return False
+
+    def _force_cleanup_container(self) -> None:
+        """Force cleanup of any existing container with the same name."""
+        container_name = self.get_container_name()
+        # Kill and remove container if it exists, ignore errors
+        cleanup_cmd = f"docker kill {container_name} 2>/dev/null || true; docker rm {container_name} 2>/dev/null || true"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cleanup_cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+    def _wait_for_service_ready(self, max_retries: int = 30) -> None:
+        """Wait for service to be ready to accept connections."""
+        service_url = self.get_service_url()
+        health_endpoint = self.get_health_endpoint()
+
+        for i in range(max_retries):
+            if self.is_healthy():
+                # Additional check - try to access service health endpoint
+                try:
+                    cmd = f"curl -s {service_url}{health_endpoint}"
+                    result = self.provider.execute_command(
+                        node_idx=self.node_offset,
+                        cmd=cmd,
+                        cmd_dir=None,
+                        nohup=False,
+                        popen=False,
+                        ignore_errors=True,
+                    )
+                    assert isinstance(result, subprocess.CompletedProcess)
+                    if result.returncode == 0:
+                        print(
+                            f"Docker container {self.get_container_name()} ready after {i+1} attempts"
+                        )
+                        return
+                except Exception:
+                    pass
+
+            print(
+                f"Waiting for Docker container {self.get_container_name()} to be ready... ({i+1}/{max_retries})"
+            )
+            time.sleep(5)
+
+        raise RuntimeError(
+            f"Docker container {self.get_container_name()} failed to become ready"
+        )
diff --git a/Utilities/experiments/experiment_utils/services/cluster_data_exporter.py b/Utilities/experiments/experiment_utils/services/cluster_data_exporter.py
new file mode 100644
index 0000000..e855c67
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/cluster_data_exporter.py
@@ -0,0 +1,564 @@
+"""
+Cluster data exporter service management for experiments.
+
+This module provides services for managing the cluster_data_exporter, which replays
+Google and Alibaba cluster trace data as Prometheus metrics.
+"""
+
+import os
+import time
+from typing import Dict, Any, Optional
+
+from .base import BaseService
+from experiment_utils.providers.base import InfrastructureProvider
+
+
+class ClusterDataExporterService(BaseService):
+    """
+    Service for managing cluster_data_exporter via Docker.
+
+    This service manages a Docker-based exporter that replays cluster trace data
+    from Google (2011) or Alibaba (2021/2022) datasets as Prometheus metrics.
+    """
+
+    DOCKER_IMAGE = "sketchdb-cluster-data-exporter:latest"
+    CONTAINER_BASE_NAME = "cluster-data-exporter"
+
+    def __init__(
+        self,
+        provider: InfrastructureProvider,
+        node_offset: int,
+        data_directory: str,
+    ):
+        """
+        Initialize cluster data exporter service.
+
+        Args:
+            provider: Infrastructure provider for node communication
+            node_offset: Starting node index offset
+            data_directory: Path to directory containing cluster trace data
+        """
+        super().__init__(provider)
+        self.node_offset = node_offset
+        self.data_directory = data_directory
+        self.container_name: Optional[str] = None
+        self.port: Optional[int] = None
+        self.provider_type: Optional[str] = None
+
+    def start(
+        self,
+        config: Dict[str, Any],
+        experiment_output_dir: str,
+        local_experiment_dir: str,
+        **kwargs,
+    ) -> None:
+        """
+        Start cluster_data_exporter in Docker container.
+
+        Args:
+            config: Cluster data exporter configuration containing:
+                - provider: "google" or "alibaba"
+                - port: Port number for metrics endpoint
+                - Provider-specific options (metrics, data_type, data_year, etc.)
+            experiment_output_dir: Directory for experiment output
+            local_experiment_dir: Local experiment directory for config dumps
+            **kwargs: Additional configuration
+
+        Raises:
+            AssertionError: If num_nodes != 1
+            ValueError: If data directory validation fails
+        """
+        # Get number of nodes from provider (assuming it has this info)
+        num_nodes = kwargs.get("num_nodes", 1)
+
+        # Assert that we have exactly 2 nodes
+        assert num_nodes == 1, (
+            f"cluster_data_exporter requires exactly 1 node (num_nodes==1), "
+            f"got {num_nodes}"
+        )
+
+        # Extract configuration
+        provider = config["provider"]
+        port = config.get("port", 40000)
+        self.provider_type = provider
+        self.port = port
+
+        # Validate data directory
+        self._validate_data_directory(provider, config)
+
+        # Create container name
+        self.container_name = f"{self.CONTAINER_BASE_NAME}-{provider}-{port}"
+
+        # Create output directory for cluster_data_exporter (similar to query engine)
+        output_dir = os.path.join(experiment_output_dir, "cluster_data_exporter")
+
+        # Create output directory command with proper permissions
+        # The Docker container runs as a non-root user, so we need to ensure
+        # the output directory is writable
+        mkdir_cmd = f"mkdir -p {output_dir} && chmod 777 {output_dir}"
+        self.provider.execute_command(
+            node_idx=self.node_offset + 1,
+            cmd=mkdir_cmd,
+            cmd_dir="",
+            nohup=False,
+            popen=False,
+        )
+
+        # Build docker run command
+        docker_cmd = self._build_docker_command(
+            config=config,
+            port=port,
+            output_dir=output_dir,
+        )
+
+        # Log configuration to file
+        os.makedirs(
+            os.path.join(local_experiment_dir, "cluster_data_exporter_config"),
+            exist_ok=True,
+        )
+        with open(
+            os.path.join(
+                local_experiment_dir,
+                "cluster_data_exporter_config",
+                f"config_{provider}.txt",
+            ),
+            "w",
+        ) as f:
+            f.write(f"provider: {provider}\n")
+            f.write(f"port: {port}\n")
+            f.write(f"data_directory: {self.data_directory}\n")
+            f.write(f"container_name: {self.container_name}\n")
+            f.write(f"docker_cmd: {docker_cmd}\n")
+            for key, value in config.items():
+                if key not in ["provider", "port"]:
+                    f.write(f"{key}: {value}\n")
+
+        # Stop any existing container with the same name
+        stop_cmd = f"docker stop {self.container_name} 2>/dev/null || true; docker rm {self.container_name} 2>/dev/null || true"
+        self.provider.execute_command(
+            node_idx=self.node_offset + 1,
+            cmd=stop_cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+        )
+
+        print(f"Starting cluster_data_exporter on node {self.node_offset + 1}")
+        print(f"  Provider: {provider}")
+        print(f"  Port: {port}")
+        print(f"  Container: {self.container_name}")
+
+        # Start the container
+        self.provider.execute_command(
+            node_idx=self.node_offset + 1,
+            cmd=docker_cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+        )
+
+        # Wait for service to be ready
+        target_node = self.node_offset + 1
+        node_ip = self.provider.get_node_ip(target_node)
+        print(
+            f"Waiting for cluster_data_exporter to be ready at http://{node_ip}:{port}/metrics"
+        )
+        self._wait_for_health(target_node, node_ip, port, timeout=60)
+        print("cluster_data_exporter is ready!")
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop cluster_data_exporter container.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        if self.container_name is None:
+            # Try to stop any cluster-data-exporter containers
+            cmd = (
+                f"docker ps -a --filter name={self.CONTAINER_BASE_NAME} "
+                f"--format '{{{{.Names}}}}' | xargs -r docker stop; "
+                f"docker ps -a --filter name={self.CONTAINER_BASE_NAME} "
+                f"--format '{{{{.Names}}}}' | xargs -r docker rm"
+            )
+        else:
+            cmd = f"docker stop {self.container_name} 2>/dev/null || true; docker rm {self.container_name} 2>/dev/null || true"
+
+        print(f"Stopping cluster_data_exporter on node {self.node_offset + 1}")
+        self.provider.execute_command(
+            node_idx=self.node_offset + 1,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+        )
+
+    def _build_docker_command(
+        self,
+        config: Dict[str, Any],
+        port: int,
+        output_dir: str,
+    ) -> str:
+        """
+        Build docker run command based on provider configuration.
+
+        Args:
+            config: Exporter configuration
+            port: Port number
+            output_dir: Output directory for logs and other data
+
+        Returns:
+            Docker run command string
+        """
+        provider = config["provider"]
+
+        # Base docker run command with common options
+        cmd_parts = [
+            "docker run -d",
+            f"--name {self.container_name}",
+            f"-p {port}:{port}",
+            f"-v {self.data_directory}:/data:ro",  # Read-only mount
+            f"-v {output_dir}:/output",  # Output directory for logs
+        ]
+
+        # Add resource limits if specified
+        if "memory_limit" in config:
+            cmd_parts.append(f"--memory {config['memory_limit']}")
+        if "cpu_limit" in config:
+            cmd_parts.append(f"--cpus {config['cpu_limit']}")
+
+        # Add restart policy
+        cmd_parts.append("--restart no")
+
+        # Add image name
+        cmd_parts.append(self.DOCKER_IMAGE)
+
+        # Add application arguments
+        cmd_parts.append("--input-directory /data")
+        cmd_parts.append(f"--port {port}")
+
+        # Add logging configuration
+        log_level = config.get("log_level", "INFO")
+        cmd_parts.append(f"--log-level {log_level}")
+        cmd_parts.append("--log-dir /output")
+
+        cmd_parts.append(provider)
+
+        # Add provider-specific arguments
+        if provider == "google":
+            if "metrics" in config:
+                cmd_parts.append(f"--metrics={config['metrics']}")
+
+            parts_mode = config.get("parts_mode", "all-parts")
+            if parts_mode == "all-parts":
+                cmd_parts.append("--all-parts")
+            elif parts_mode == "part-index":
+                part_index = config.get("part_index", 0)
+                cmd_parts.append(f"--part-index={part_index}")
+
+        elif provider == "alibaba":
+            if "data_type" in config:
+                cmd_parts.append(f"--data-type={config['data_type']}")
+            if "data_year" in config:
+                cmd_parts.append(f"--data-year={config['data_year']}")
+
+            parts_mode = config.get("parts_mode", "all-parts")
+            if parts_mode == "all-parts":
+                cmd_parts.append("--all-parts")
+            elif parts_mode == "part-index":
+                part_index = config.get("part_index", 0)
+                cmd_parts.append(f"--part-index={part_index}")
+
+            # Add speedup parameter (default: 1 for real-time)
+            speedup = config.get("speedup", 1)
+            cmd_parts.append(f"--speedup={speedup}")
+
+        # Build final command (no redirection needed, logs go to /output inside container)
+        cmd = " ".join(cmd_parts)
+
+        return cmd
+
+    def _validate_data_directory(
+        self,
+        provider: str,
+        config: Dict[str, Any],
+    ) -> None:
+        """
+        Validate that data directory exists on remote node and contains required files.
+
+        Args:
+            provider: Provider type ("google" or "alibaba")
+            config: Exporter configuration
+
+        Raises:
+            ValueError: If validation fails
+        """
+        # Determine which node to check - the cluster data exporter runs on node_offset + 1
+        target_node = self.node_offset + 1
+        print(
+            f"Validating data directory on node {target_node} (node_offset={self.node_offset})"
+        )
+
+        # Check if directory exists on remote node
+        check_dir_cmd = f"test -d {self.data_directory}"
+        result = self.provider.execute_command(
+            node_idx=target_node,
+            cmd=check_dir_cmd,
+            cmd_dir="",
+            nohup=False,
+            popen=False,
+        )
+
+        # execute_command returns the result; if directory doesn't exist, test will fail
+        # We need to check the exit code to determine if directory exists
+        # For now, we'll try to list the directory to verify it exists
+        list_dir_cmd = f"ls -la {self.data_directory} 2>&1"
+        result = self.provider.execute_command(
+            node_idx=target_node,
+            cmd=list_dir_cmd,
+            cmd_dir="",
+            nohup=False,
+            popen=False,
+        )
+
+        # Handle CompletedProcess object
+        if result is not None:
+            output = result.stdout if hasattr(result, "stdout") else str(result)
+            if "No such file or directory" in output:
+                raise ValueError(
+                    f"Data directory does not exist on remote node {target_node}: {self.data_directory}\n"
+                    f"Please ensure the cluster trace data is available at this location."
+                )
+
+        # Provider-specific validation
+        if provider == "google":
+            self._validate_google_data()
+        elif provider == "alibaba":
+            data_type = config.get("data_type")
+            data_year = config.get("data_year")
+            if not data_type or not data_year:
+                raise ValueError(
+                    "Alibaba provider requires 'data_type' and 'data_year' in config"
+                )
+            self._validate_alibaba_data(data_type, data_year)
+        else:
+            raise ValueError(f"Unknown provider: {provider}")
+
+    def _validate_google_data(self) -> None:
+        """
+        Validate Google cluster trace data files exist on remote node.
+
+        Raises:
+            ValueError: If required files are missing
+        """
+        # Check for at least one part file on remote node
+        target_node = self.node_offset + 1
+
+        # Normalize path to avoid double slashes
+        data_dir = self.data_directory.rstrip("/")
+        count_cmd = f"ls {data_dir}/part-*-of-00500.csv.gz 2>/dev/null | wc -l"
+
+        result = self.provider.execute_command(
+            node_idx=target_node,
+            cmd=count_cmd,
+            cmd_dir="",
+            nohup=False,
+            popen=False,
+        )
+
+        try:
+            # Handle CompletedProcess object
+            if hasattr(result, "stdout"):
+                output = result.stdout.strip()
+            else:
+                output = result.strip() if result else ""
+
+            num_files = int(output) if output else 0
+        except (ValueError, AttributeError):
+            num_files = 0
+
+        if num_files == 0:
+            raise ValueError(
+                f"No Google trace data files found in {self.data_directory} on remote node {target_node}\n"
+                f"Expected files matching pattern: part-*-of-00500.csv.gz"
+            )
+
+        print(f"Found {num_files} Google trace data files on remote node {target_node}")
+
+    def _validate_alibaba_data(
+        self,
+        data_type: str,
+        data_year: int,
+    ) -> None:
+        """
+        Validate Alibaba cluster trace data files exist on remote node.
+
+        Args:
+            data_type: Data type ("node" or "msresource")
+            data_year: Data year (2021 or 2022)
+
+        Raises:
+            ValueError: If required files are missing
+        """
+        # Determine expected file pattern based on data type and year
+        if data_type == "node":
+            if data_year == 2021 or data_year == 2022:
+                pattern = "Node_*.csv.gz"
+            else:
+                raise ValueError(
+                    f"Invalid data_year for Alibaba node data: {data_year}"
+                )
+        elif data_type == "msresource":
+            if data_year == 2021 or data_year == 2022:
+                pattern = "MsResource_*.csv.gz"
+            else:
+                raise ValueError(
+                    f"Invalid data_year for Alibaba msresource data: {data_year}"
+                )
+        else:
+            raise ValueError(f"Invalid data_type for Alibaba: {data_type}")
+
+        # Check for data files on remote node
+        target_node = self.node_offset + 1
+
+        # Normalize path to avoid double slashes
+        data_dir = self.data_directory.rstrip("/")
+        count_cmd = f"ls {data_dir}/{pattern} 2>/dev/null | wc -l"
+
+        # Debug: List what's actually in the directory
+        debug_cmd = f"ls -la {data_dir}/ 2>&1 | head -20"
+        debug_result = self.provider.execute_command(
+            node_idx=target_node,
+            cmd=debug_cmd,
+            cmd_dir="",
+            nohup=False,
+            popen=False,
+        )
+        print(f"Directory contents on node {target_node}:")
+        print(debug_result)
+
+        result = self.provider.execute_command(
+            node_idx=target_node,
+            cmd=count_cmd,
+            cmd_dir="",
+            nohup=False,
+            popen=False,
+        )
+
+        print(f"Raw result from count command: '{result}'")
+        print(f"Result type: {type(result)}")
+
+        try:
+            # Handle CompletedProcess object
+            if hasattr(result, "stdout"):
+                output = result.stdout.strip()
+            else:
+                output = result.strip() if result else ""
+
+            num_files = int(output) if output else 0
+        except (ValueError, AttributeError) as e:
+            print(f"Error parsing result: {e}")
+            num_files = 0
+
+        if num_files == 0:
+            raise ValueError(
+                f"No Alibaba {data_type} trace data files found in {self.data_directory} on remote node {target_node}\n"
+                f"Expected files matching pattern: {pattern}\n"
+                f"Checked with command: {count_cmd}"
+            )
+
+        print(
+            f"Found {num_files} Alibaba {data_type} {data_year} trace data files on remote node"
+        )
+
+    def _wait_for_health(
+        self,
+        node_idx: int,
+        node_ip: str,
+        port: int,
+        timeout: int = 60,
+    ) -> None:
+        """
+        Wait for the exporter to be ready by polling the metrics endpoint via SSH.
+
+        Args:
+            node_idx: Index of the node running the exporter
+            node_ip: IP address of the node
+            port: Port number
+            timeout: Maximum time to wait in seconds
+
+        Raises:
+            RuntimeError: If service doesn't become ready within timeout
+        """
+        url = f"http://{node_ip}:{port}/metrics"
+        start_time = time.time()
+        last_error = None
+
+        while time.time() - start_time < timeout:
+            # Run curl from the remote node to check health
+            check_cmd = f"curl -s -o /dev/null -w '%{{http_code}}' {url}"
+            result = self.provider.execute_command(
+                node_idx=node_idx,
+                cmd=check_cmd,
+                cmd_dir="",
+                nohup=False,
+                popen=False,
+            )
+
+            try:
+                # Extract HTTP status code from result
+                if hasattr(result, "stdout"):
+                    http_code = result.stdout.strip()
+                else:
+                    http_code = str(result).strip()
+
+                if http_code == "200":
+                    return
+                last_error = f"HTTP {http_code}"
+            except Exception as e:
+                last_error = str(e)
+
+            time.sleep(2)
+
+        raise RuntimeError(
+            f"cluster_data_exporter did not become ready within {timeout} seconds. "
+            f"Last error: {last_error}"
+        )
+
+
+class DataExporterFactory:
+    """Factory for creating data exporter services."""
+
+    @staticmethod
+    def create_data_exporter_service(
+        exporter_type: str,
+        provider: InfrastructureProvider,
+        node_offset: int,
+        data_directory: str,
+    ) -> BaseService:
+        """
+        Create a data exporter service based on type.
+
+        Args:
+            exporter_type: Type of data exporter ("cluster_data", etc.)
+            provider: Infrastructure provider for node communication
+            node_offset: Starting node index offset
+            data_directory: Path to data directory
+
+        Returns:
+            Appropriate data exporter service instance
+
+        Raises:
+            ValueError: If exporter_type is not supported
+        """
+        if exporter_type == "cluster_data":
+            return ClusterDataExporterService(
+                provider=provider,
+                node_offset=node_offset,
+                data_directory=data_directory,
+            )
+        else:
+            raise ValueError(
+                f"Invalid data exporter type: {exporter_type}. "
+                f"Supported types are: 'cluster_data'"
+            )
diff --git a/Utilities/experiments/experiment_utils/services/docker-compose.victoriametrics.yml.j2 b/Utilities/experiments/experiment_utils/services/docker-compose.victoriametrics.yml.j2
new file mode 100644
index 0000000..176a6e7
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/docker-compose.victoriametrics.yml.j2
@@ -0,0 +1,59 @@
+version: '3.8'
+
+services:
+  vmsingle:
+    image: victoriametrics/victoria-metrics:latest
+    container_name: {{ vmsingle_container_name }}
+    user: "{{ user_id }}:{{ group_id }}"
+    network_mode: host
+    volumes:
+      - {{ vm_data_dir }}:/victoria-metrics-data
+    command:
+      - "-storageDataPath=/victoria-metrics-data"
+      - "-httpListenAddr=:8428"
+      - "-retentionPeriod=1d"
+    {% if cpu_limit or memory_limit -%}
+    deploy:
+      resources:
+        limits:
+          {% if cpu_limit -%}
+          cpus: '{{ cpu_limit }}'
+          {% endif -%}
+          {% if memory_limit -%}
+          memory: {{ memory_limit }}
+          {% endif -%}
+    {% endif -%}
+    restart: "no"
+
+  vmagent:
+    image: victoriametrics/vmagent:latest
+    container_name: {{ vmagent_container_name }}
+    user: "{{ user_id }}:{{ group_id }}"
+    network_mode: host
+    volumes:
+      - {{ vm_config_dir }}/{{ vmagent_scrape_config }}:/etc/prometheus/{{ vmagent_scrape_config }}:ro
+      - {{ vm_config_dir }}/{{ vmagent_remote_write_config }}:/etc/prometheus/{{ vmagent_remote_write_config }}:ro
+      - {{ vm_data_dir }}/vmagent-data:/vmagent-remotewrite-data
+    command:
+      - "-promscrape.config=/etc/prometheus/{{ vmagent_scrape_config }}"
+      {% for url in remote_write_urls -%}
+      - "-remoteWrite.url={{ url }}"
+      {% endfor -%}
+      - "-remoteWrite.relabelConfig=/etc/prometheus/{{ vmagent_remote_write_config }}"
+      - "-remoteWrite.forcePromProto=true"
+      - "-remoteWrite.tmpDataPath=/vmagent-remotewrite-data"
+      - "-httpListenAddr=:8429"
+    {% if cpu_limit or memory_limit -%}
+    deploy:
+      resources:
+        limits:
+          {% if cpu_limit -%}
+          cpus: '{{ cpu_limit }}'
+          {% endif -%}
+          {% if memory_limit -%}
+          memory: {{ memory_limit }}
+          {% endif -%}
+    {% endif -%}
+    restart: "no"
+    depends_on:
+      - vmsingle
diff --git a/Utilities/experiments/experiment_utils/services/docker_prometheus.py b/Utilities/experiments/experiment_utils/services/docker_prometheus.py
new file mode 100644
index 0000000..88c0e51
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/docker_prometheus.py
@@ -0,0 +1,143 @@
+"""
+Docker-based Prometheus service management for vertical scalability testing.
+"""
+
+import os
+
+from .base import DockerServiceBase
+from experiment_utils.providers.base import InfrastructureProvider
+from constants import PROMETHEUS_CONFIG_DIR, PROMETHEUS_CONFIG_FILE
+
+
+class DockerPrometheusService(DockerServiceBase):
+    """Docker-based Prometheus service with resource constraints."""
+
+    def __init__(
+        self, provider: InfrastructureProvider, num_nodes: int, node_offset: int
+    ):
+        """
+        Initialize Docker Prometheus service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            num_nodes: Number of nodes to manage
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider, num_nodes, node_offset)
+        self.container_name = "prometheus-container"
+
+    def get_container_name(self) -> str:
+        """Get the Docker container name."""
+        return self.container_name
+
+    def get_service_url(self) -> str:
+        """Get Prometheus URL for queries."""
+        return "http://localhost:9090"
+
+    def get_query_endpoint_port(self) -> int:
+        """Get the query endpoint port for Prometheus."""
+        return 9090
+
+    def get_health_endpoint(self) -> str:
+        """Get Prometheus health check endpoint."""
+        return "/-/ready"
+
+    def start(
+        self,
+        experiment_output_dir: str,
+        cpu_limit: float = None,
+        memory_limit: str = None,
+        **kwargs,
+    ) -> None:
+        """
+        Start Prometheus in Docker container with optional resource limits.
+
+        Args:
+            experiment_output_dir: Directory containing prometheus config
+            cpu_limit: Optional number of CPUs to allocate (e.g., 4.0)
+            memory_limit: Optional memory limit (e.g., "8g")
+            **kwargs: Additional configuration
+        """
+        # Stop and remove any existing container first
+        self._force_cleanup_container()
+
+        # Prepare volume mounts
+        prometheus_config_dir = os.path.join(
+            experiment_output_dir, PROMETHEUS_CONFIG_DIR
+        )
+        prometheus_data_dir = os.path.join(experiment_output_dir, "prometheus_data")
+
+        # Create data directory on remote host with proper permissions
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=f"mkdir -p {prometheus_data_dir} && chmod 777 {prometheus_data_dir}",
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+        )
+
+        # Build Docker command
+        docker_cmd_parts = [
+            f"docker run -d --name {self.container_name}",
+        ]
+
+        # Add resource limits if specified
+        if cpu_limit is not None:
+            docker_cmd_parts.append(f"--cpus={cpu_limit}")
+        if memory_limit is not None:
+            docker_cmd_parts.append(f"--memory={memory_limit}")
+
+        docker_cmd_parts.extend(
+            [
+                "-p 9090:9090",
+                f"-v {prometheus_config_dir}/{PROMETHEUS_CONFIG_FILE}:/etc/prometheus/{PROMETHEUS_CONFIG_FILE}:ro",
+                f"-v {prometheus_data_dir}:/prometheus",
+                "prom/prometheus:latest",
+                f"--config.file=/etc/prometheus/{PROMETHEUS_CONFIG_FILE}",
+                "--storage.tsdb.path=/prometheus",
+                "--web.console.libraries=/etc/prometheus/console_libraries",
+                "--web.console.templates=/etc/prometheus/consoles",
+                "--web.enable-lifecycle",
+            ]
+        )
+
+        docker_cmd = " ".join(docker_cmd_parts)
+
+        # Run Docker container
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=docker_cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+        )
+
+        # Wait for Prometheus to be ready
+        self._wait_for_service_ready()
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop and remove Prometheus Docker container.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        # Stop and remove container
+        cmd = f"docker stop {self.container_name}; docker rm {self.container_name}"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+    def get_prometheus_url(self) -> str:
+        """
+        Get Prometheus URL for queries.
+
+        Returns:
+            Prometheus base URL
+        """
+        return self.get_service_url()
diff --git a/Utilities/experiments/experiment_utils/services/docker_victoriametrics.py b/Utilities/experiments/experiment_utils/services/docker_victoriametrics.py
new file mode 100644
index 0000000..e9690bb
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/docker_victoriametrics.py
@@ -0,0 +1,284 @@
+"""
+Docker-based VictoriaMetrics service management for vertical scalability testing.
+
+Uses a 2-container architecture:
+- vmsingle: Storage backend
+- vmagent: Scraping and remote write agent
+"""
+
+import os
+from typing import Optional
+from jinja2 import Template
+
+from .base import DockerServiceBase
+from experiment_utils.providers.base import InfrastructureProvider
+import constants
+from constants import (
+    PROMETHEUS_CONFIG_DIR,
+    VMAGENT_SCRAPE_CONFIG_FILE,
+    VMAGENT_REMOTE_WRITE_CONFIG_FILE,
+    SKETCHDB_EXPERIMENT_NAME,
+    BASELINE_EXPERIMENT_NAME,
+)
+import utils
+
+
+class DockerVictoriaMetricsService(DockerServiceBase):
+    """Docker-based VictoriaMetrics service with vmsingle + vmagent architecture."""
+
+    def __init__(
+        self, provider: InfrastructureProvider, num_nodes: int, node_offset: int
+    ):
+        """
+        Initialize Docker VictoriaMetrics service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            num_nodes: Number of nodes to manage
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider, num_nodes, node_offset)
+        self.vmsingle_container_name = "victoriametrics-single"
+        self.vmagent_container_name = "victoriametrics-agent"
+        self.compose_file = None
+        self.experiment_mode = None
+
+    def get_container_name(self) -> str:
+        """Get the Docker container name (returns vmsingle as primary)."""
+        return self.vmsingle_container_name
+
+    def get_service_url(self) -> str:
+        """Get VictoriaMetrics URL for queries."""
+        return "http://localhost:8428"
+
+    def get_query_endpoint_port(self) -> int:
+        """Get the query endpoint port for VictoriaMetrics."""
+        return 8428
+
+    def get_health_endpoint(self) -> str:
+        """Get VictoriaMetrics health check endpoint."""
+        return "/health"
+
+    def start(
+        self,
+        experiment_output_dir: str,
+        local_experiment_dir: str,
+        experiment_mode: str,
+        cpu_limit: Optional[float] = None,
+        memory_limit: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        """
+        Start VictoriaMetrics in Docker with vmsingle + vmagent architecture.
+
+        Args:
+            experiment_output_dir: Directory for data storage
+            local_experiment_dir: Local experiment directory for file creation
+            cpu_limit: Optional number of CPUs to allocate (e.g., 4.0)
+            memory_limit: Optional memory limit (e.g., "8g")
+            experiment_mode: Experiment mode (BASELINE_EXPERIMENT_NAME or SKETCHDB_EXPERIMENT_NAME)
+            **kwargs: Additional configuration
+        """
+        # Stop and remove any existing containers first
+        self._force_cleanup_containers()
+
+        # Store experiment mode for remote write configuration
+        self.experiment_mode = experiment_mode
+
+        # Prepare directories
+        vm_config_dir = os.path.join(experiment_output_dir, PROMETHEUS_CONFIG_DIR)
+        vm_data_dir = os.path.join(experiment_output_dir, "victoriametrics_data")
+
+        # Get current user ID and group ID for non-root container execution
+        # NOTE: VictoriaMetrics Docker images run as root by default (no User directive),
+        # while Prometheus images run as 'nobody' user by default. We need to explicitly
+        # set the user for VM containers to avoid data files being owned by root.
+        uid_result = self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd="id -u",
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+        )
+        gid_result = self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd="id -g",
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+        )
+
+        # Extract UID/GID from results
+        import subprocess
+
+        assert isinstance(
+            uid_result, subprocess.CompletedProcess
+        ), "Failed to get user ID"
+        assert isinstance(
+            gid_result, subprocess.CompletedProcess
+        ), "Failed to get group ID"
+        user_id = uid_result.stdout.strip()
+        group_id = gid_result.stdout.strip()
+
+        # Create directories on remote host with proper permissions and ownership
+        # Create both the main vm_data_dir and vmagent-data subdirectory with correct ownership.
+        # The chown is necessary because the directories might already exist from a previous run
+        # with root ownership (before we added the user directive to docker-compose), and the
+        # non-root container user won't have permission to write to root-owned directories.
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=f"mkdir -p {vm_config_dir} {vm_data_dir}/vmagent-data && chown -R {user_id}:{group_id} {vm_data_dir} && chmod 755 {vm_data_dir} && chmod 755 {vm_data_dir}/vmagent-data",
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+        )
+
+        # Determine remote write URLs based on experiment mode
+        node_ip = self.provider.get_node_ip(self.node_offset)
+        if experiment_mode == BASELINE_EXPERIMENT_NAME:
+            # Only write to vmsingle
+            remote_write_urls = [f"http://{node_ip}:8428/api/v1/write"]
+        elif experiment_mode == SKETCHDB_EXPERIMENT_NAME:
+            # Write to both vmsingle AND queryengine
+            remote_write_urls = [
+                f"http://{node_ip}:8428/api/v1/write",
+                f"http://{node_ip}:8080/receive",
+            ]
+        else:
+            # Invalid experiment mode
+            assert (
+                False
+            ), f"Invalid experiment_mode: {experiment_mode}. Must be '{BASELINE_EXPERIMENT_NAME}' or '{SKETCHDB_EXPERIMENT_NAME}'"
+
+        # Convert resource limits to strings if specified
+        if cpu_limit is not None:
+            cpu_limit = str(cpu_limit)
+
+        if memory_limit is not None:
+            memory_limit = str(memory_limit)
+
+        # Generate docker-compose file from template
+        template_path = os.path.join(
+            os.path.dirname(__file__), "docker-compose.victoriametrics.yml.j2"
+        )
+        with open(template_path, "r") as f:
+            template = Template(f.read())
+
+        compose_content = template.render(
+            vmsingle_container_name=self.vmsingle_container_name,
+            vmagent_container_name=self.vmagent_container_name,
+            user_id=user_id,
+            group_id=group_id,
+            vm_config_dir=vm_config_dir,
+            vm_data_dir=vm_data_dir,
+            vmagent_scrape_config=VMAGENT_SCRAPE_CONFIG_FILE,
+            vmagent_remote_write_config=VMAGENT_REMOTE_WRITE_CONFIG_FILE,
+            cpu_limit=cpu_limit,
+            memory_limit=memory_limit,
+            node_ip=node_ip,
+            remote_write_urls=remote_write_urls,
+        )
+
+        # Create compose file locally and rsync to remote
+        local_compose_file = os.path.join(
+            local_experiment_dir, "docker-compose.victoriametrics.yml"
+        )
+        os.makedirs(os.path.dirname(local_compose_file), exist_ok=True)
+        with open(local_compose_file, "w") as f:
+            f.write(compose_content)
+
+        # Rsync to remote host
+        remote_compose_file = os.path.join(
+            experiment_output_dir, "docker-compose.victoriametrics.yml"
+        )
+        self.compose_file = remote_compose_file
+
+        hostname = f"node{self.node_offset}.{self.provider.hostname_suffix}"
+        rsync_cmd = 'rsync -azh -e "ssh {}" {} {}@{}:{}'.format(
+            constants.SSH_OPTIONS,
+            local_compose_file,
+            self.provider.username,
+            hostname,
+            remote_compose_file,
+        )
+        utils.run_cmd_with_retry(rsync_cmd, popen=False, ignore_errors=False)
+
+        # Start containers using docker-compose
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=f"docker compose -f {remote_compose_file} up -d",
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+        )
+
+        # Wait for VictoriaMetrics to be ready
+        self._wait_for_service_ready()
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop and remove VictoriaMetrics Docker containers.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        if self.compose_file:
+            # Stop using docker-compose
+            self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=f"docker compose -f {self.compose_file} down",
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+            self.compose_file = None
+        else:
+            # Fallback: stop containers individually
+            self._force_cleanup_containers()
+
+    def reset(self, **kwargs) -> None:
+        """
+        Reset VictoriaMetrics by removing data.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        # Stop containers first
+        self.stop(**kwargs)
+
+        # Note: Data directory cleanup is handled by experiment teardown
+        # which removes the entire experiment_output_dir
+
+    def _force_cleanup_containers(self) -> None:
+        """Force cleanup of both vmsingle and vmagent containers."""
+        # Stop and remove vmsingle
+        cmd = f"docker stop {self.vmsingle_container_name} && docker rm {self.vmsingle_container_name}"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+        # Stop and remove vmagent
+        cmd = f"docker stop {self.vmagent_container_name} && docker rm {self.vmagent_container_name}"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+    def get_victoriametrics_url(self) -> str:
+        """
+        Get VictoriaMetrics URL for queries.
+
+        Returns:
+            VictoriaMetrics base URL
+        """
+        return self.get_service_url()
diff --git a/Utilities/experiments/experiment_utils/services/fake_exporters.py b/Utilities/experiments/experiment_utils/services/fake_exporters.py
new file mode 100644
index 0000000..58f1904
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/fake_exporters.py
@@ -0,0 +1,716 @@
+"""
+Exporter service management for experiments.
+"""
+
+import os
+from abc import abstractmethod
+from typing import Tuple, List, Dict, Any
+
+import constants
+from .base import BaseService
+from experiment_utils.providers.base import InfrastructureProvider
+
+
+class BaseExporterService(BaseService):
+    """Base class for exporter services."""
+
+    FAKE_EXPORTER_BASE_CONTAINER_NAME = "sketchdb-fake-exporter"
+    FAKE_EXPORTER_BASE_COMPOSE_FILENAME_PREFIX = "fake-exporter-compose"
+
+    def __init__(
+        self,
+        provider: InfrastructureProvider,
+        num_nodes: int,
+        use_container: bool,
+        node_offset: int,
+    ):
+        """
+        Initialize base exporter service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            num_nodes: Number of nodes to run exporters on
+            use_container: Whether to use containerized deployment
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider)
+        self.num_nodes: int = num_nodes
+        self.use_container: bool = use_container
+        self.node_offset: int = node_offset
+        self.container_names: List[str] = []
+        self.compose_files: List[str] = []
+
+    @abstractmethod
+    def start(
+        self,
+        config: Dict[str, Any],
+        experiment_output_dir: str,
+        local_experiment_dir: str,
+        **kwargs,
+    ) -> None:
+        """Start exporters with given configuration."""
+        pass
+
+    @abstractmethod
+    def stop(self, **kwargs) -> None:
+        """Stop all exporter processes."""
+        pass
+
+    @staticmethod
+    def get_compose_and_container_names(port, language: str) -> Tuple[str, str]:
+        """
+        Returns a tuple with the fake exporter's compose file name and container name
+        based on the port it will run on, with the compose file name as the 0th element
+        and the container name as the 1st element
+        """
+        compose_name = f"{BaseExporterService.FAKE_EXPORTER_BASE_COMPOSE_FILENAME_PREFIX}-{port}-{language}.yml"
+        container_name = (
+            f"{BaseExporterService.FAKE_EXPORTER_BASE_CONTAINER_NAME}-{port}-{language}"
+        )
+        return (compose_name, container_name)
+
+
+class PythonExporterService(BaseExporterService):
+    """Service for managing Python fake exporters."""
+
+    def start(
+        self,
+        config: Dict[str, Any],
+        experiment_output_dir: str,
+        local_experiment_dir: str,
+        **kwargs,
+    ) -> None:
+        """
+        Start Python fake exporters.
+
+        Args:
+            config: Exporter configuration
+            experiment_output_dir: Directory for experiment output
+            local_experiment_dir: Local experiment directory for config dumps
+            **kwargs: Additional configuration
+        """
+        if self.use_container:
+            self._start_containerized(
+                config,
+                experiment_output_dir,
+                local_experiment_dir,
+            )
+        else:
+            self._start_bare_metal(
+                config,
+                experiment_output_dir,
+                local_experiment_dir,
+            )
+
+    def _start_bare_metal(
+        self,
+        config: Dict[str, Any],
+        experiment_output_dir: str,
+        local_experiment_dir: str,
+        **kwargs,
+    ) -> None:
+        """
+        Start Python fake exporters.
+
+        Args:
+            config: Exporter configuration
+            experiment_output_dir: Directory for experiment output
+            local_experiment_dir: Local experiment directory for config dumps
+            **kwargs: Additional configuration
+        """
+        output_dir = os.path.join(experiment_output_dir, "fake_exporter_output")
+        num_ports = config["num_ports_per_server"]
+        dataset = config["dataset"]
+
+        cmds = []
+        for port in range(num_ports):
+            cmd = "python3 fake_exporter.py --output_dir {} --port {} --valuescale {} --dataset {} --num_labels {} --num_values_per_label {} --metric_type {}".format(
+                output_dir,
+                port + config["start_port"],
+                config["synthetic_data_value_scale"],
+                dataset,
+                config["num_labels"],
+                config["num_values_per_label"],
+                config["metric_type"],
+            )
+            cmds.append(cmd)
+
+        cmd_dir = os.path.join(
+            constants.CLOUDLAB_HOME_DIR,
+            "code",
+            "PrometheusExporters",
+            "fake_exporter",
+            "fake_exporter_python",
+        )
+
+        # Dump workload configuration to a file
+        os.makedirs(
+            os.path.join(local_experiment_dir, "fake_exporter_config"), exist_ok=True
+        )
+        with open(
+            os.path.join(local_experiment_dir, "fake_exporter_config", "cmds.sh"), "w"
+        ) as f:
+            f.write("\n".join(cmds))
+
+        # Run commands in parallel across nodes
+        for cmd in cmds:
+            self.provider.execute_command_parallel(
+                node_idxs=list(
+                    range(self.node_offset + 1, self.node_offset + self.num_nodes + 1)
+                ),
+                cmd=cmd,
+                cmd_dir=cmd_dir,
+                nohup=False,
+                popen=True,
+                redirect=True,
+                wait=False,
+            )
+
+    def _start_containerized(
+        self,
+        config: Dict[str, Any],
+        experiment_output_dir: str,
+        local_experiment_dir: str,
+        **kwargs,
+    ) -> None:
+        output_dir = os.path.join(experiment_output_dir, "fake_exporter_output")
+        num_ports = config["num_ports_per_server"]
+        dataset = config["dataset"]
+
+        # Build docker run commands for each port
+        docker_run_cmds: List[str] = []
+        container_names: List[str] = []
+
+        for port in range(num_ports):
+            actual_port = port + config["start_port"]
+            container_name = f"{BaseExporterService.FAKE_EXPORTER_BASE_CONTAINER_NAME}-{actual_port}-python"
+
+            # Build docker run command with volume mounts for Python exporter
+            docker_cmd = (
+                f"docker run -d "
+                f"--name {container_name} "
+                f"-p {actual_port}:{actual_port} "
+                f"-v {experiment_output_dir}:/app/output "
+                f"-v {output_dir}:/app/exporter_output_dir "
+                f"--restart unless-stopped "
+                f"sketchdb-fake-exporter-python:latest "
+                f"--output_dir /app/exporter_output_dir "
+                f"--port {actual_port} "
+                f"--valuescale {config['synthetic_data_value_scale']} "
+                f"--dataset {dataset} "
+                f"--num_labels {config['num_labels']} "
+                f"--num_values_per_label {config['num_values_per_label']} "
+                f"--metric_type {config['metric_type']}"
+            )
+
+            container_names.append(container_name)
+            docker_run_cmds.append(docker_cmd)
+
+        self.container_names = container_names
+
+        # Dump commands to a file for reference
+        os.makedirs(
+            os.path.join(local_experiment_dir, "fake_exporter_config"), exist_ok=True
+        )
+        with open(
+            os.path.join(
+                local_experiment_dir, "fake_exporter_config", "docker_run_cmds.sh"
+            ),
+            "w",
+        ) as f:
+            f.write("\n".join(docker_run_cmds))
+
+        # Create output directory first
+        mkdir_cmd = f"mkdir -p {output_dir}"
+        self.provider.execute_command_parallel(
+            node_idxs=list(
+                range(self.node_offset + 1, self.node_offset + self.num_nodes + 1)
+            ),
+            cmd=mkdir_cmd,
+            cmd_dir="",
+            nohup=False,
+            popen=True,
+            redirect=True,
+            wait=True,
+        )
+
+        # Start containers in batches to avoid overwhelming Docker daemon
+        BATCH_SIZE = 5
+        for i in range(0, len(docker_run_cmds), BATCH_SIZE):
+            batch = docker_run_cmds[i : i + BATCH_SIZE]
+            # Combine docker run commands in batch into single SSH command
+            batch_cmd = "; ".join(batch)
+
+            self.provider.execute_command_parallel(
+                node_idxs=list(
+                    range(self.node_offset + 1, self.node_offset + self.num_nodes + 1)
+                ),
+                cmd=batch_cmd,
+                cmd_dir="",
+                nohup=False,
+                popen=True,
+                redirect=True,
+                wait=True,  # Wait for batch to complete
+            )
+
+        return
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop Python fake exporters across nodes.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        if self.use_container:
+            self._stop_containerized()
+        else:
+            self._stop_bare_metal()
+
+    def _stop_bare_metal(self, **kwargs) -> None:
+        """
+        Stop Python fake exporters across nodes.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        cmd = "pkill -f fake_exporter.py"
+        self.provider.execute_command_parallel(
+            node_idxs=list(
+                range(self.node_offset + 1, self.node_offset + self.num_nodes + 1)
+            ),
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=True,
+            wait=True,
+        )
+
+    def _stop_containerized(self, **kwargs) -> None:
+        """Stop fake exporters using containerized deployment."""
+        try:
+            if self.container_names is not None and len(self.container_names) > 0:
+                # Stop and remove containers by name
+                # Batch container names to avoid command line length issues
+                BATCH_SIZE = 10
+                for i in range(0, len(self.container_names), BATCH_SIZE):
+                    batch = self.container_names[i : i + BATCH_SIZE]
+                    container_list = " ".join(batch)
+                    cmd = f"docker stop {container_list} 2>/dev/null || true; docker rm {container_list} 2>/dev/null || true"
+
+                    self.provider.execute_command_parallel(
+                        node_idxs=list(
+                            range(
+                                self.node_offset + 1,
+                                self.node_offset + self.num_nodes + 1,
+                            )
+                        ),
+                        cmd=cmd,
+                        cmd_dir=None,
+                        nohup=False,
+                        popen=True,
+                        wait=True,
+                    )
+            else:
+                # Fallback: stop all containers matching the base name pattern
+                cmd = f"docker ps -a --filter name={BaseExporterService.FAKE_EXPORTER_BASE_CONTAINER_NAME} --format '{{{{.Names}}}}' | xargs -r docker stop; docker ps -a --filter name={BaseExporterService.FAKE_EXPORTER_BASE_CONTAINER_NAME} --format '{{{{.Names}}}}' | xargs -r docker rm"
+                self.provider.execute_command_parallel(
+                    node_idxs=list(
+                        range(
+                            self.node_offset + 1,
+                            self.node_offset + self.num_nodes + 1,
+                        )
+                    ),
+                    cmd=cmd,
+                    cmd_dir=None,
+                    nohup=False,
+                    popen=True,
+                    wait=True,
+                )
+        except Exception as e:
+            print(f"Error stopping fake exporter containers: {e}")
+
+
+class RustExporterService(BaseExporterService):
+    """Service for managing Rust fake exporters."""
+
+    def start(
+        self,
+        config: Dict[str, Any],
+        experiment_output_dir: str,
+        local_experiment_dir: str,
+        **kwargs,
+    ) -> None:
+        """
+        Start Rust fake exporters.
+
+        Args:
+            config: Exporter configuration
+            experiment_output_dir: Directory for experiment output
+            local_experiment_dir: Local experiment directory for config dumps
+            **kwargs: Additional configuration
+        """
+        if self.use_container:
+            return self._start_containerized(
+                config,
+                experiment_output_dir,
+                local_experiment_dir,
+            )
+        else:
+            return self._start_bare_metal(
+                config,
+                experiment_output_dir,
+                local_experiment_dir,
+            )
+
+    def _start_bare_metal(
+        self,
+        config: Dict[str, Any],
+        experiment_output_dir: str,
+        local_experiment_dir: str,
+        **kwargs,
+    ) -> None:
+        """
+        Start Rust fake exporters.
+
+        Args:
+            config: Exporter configuration
+            experiment_output_dir: Directory for experiment output
+            local_experiment_dir: Local experiment directory for config dumps
+            **kwargs: Additional configuration
+        """
+        num_ports = config["num_ports_per_server"]
+        dataset = config["dataset"]
+
+        cmds = []
+        for port in range(num_ports):
+            cmd = "./target/release/fake_exporter --port {} --valuescale {} --dataset {} --num-labels {} --num-values-per-label {} --metric-type {}".format(
+                port + config["start_port"],
+                config["synthetic_data_value_scale"],
+                dataset,
+                config["num_labels"],
+                config["num_values_per_label"],
+                config["metric_type"],
+            )
+            cmds.append(cmd)
+
+        cmd_dir = f"{constants.CLOUDLAB_HOME_DIR}/code/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter"
+
+        # Dump workload configuration to a file
+        os.makedirs(
+            os.path.join(local_experiment_dir, "fake_exporter_config"), exist_ok=True
+        )
+        with open(
+            os.path.join(local_experiment_dir, "fake_exporter_config", "cmds.sh"), "w"
+        ) as f:
+            f.write("\n".join(cmds))
+
+        # Run commands in parallel across nodes
+        for cmd in cmds:
+            self.provider.execute_command_parallel(
+                node_idxs=list(
+                    range(self.node_offset + 1, self.node_offset + self.num_nodes + 1)
+                ),
+                cmd=cmd,
+                cmd_dir=cmd_dir,
+                nohup=False,
+                popen=True,
+                redirect=True,
+                wait=False,
+            )
+
+        return
+
+    def _start_containerized(
+        self,
+        config: Dict[str, Any],
+        experiment_output_dir: str,
+        local_experiment_dir: str,
+        **kwargs,
+    ) -> None:
+        num_ports = config["num_ports_per_server"]
+        dataset = config["dataset"]
+
+        # Build docker run commands for each port
+        docker_run_cmds: List[str] = []
+        container_names: List[str] = []
+
+        for port in range(num_ports):
+            actual_port = port + config["start_port"]
+            container_name = f"{BaseExporterService.FAKE_EXPORTER_BASE_CONTAINER_NAME}-{actual_port}-rust"
+
+            # Build docker run command
+            docker_cmd = (
+                f"docker run -d "
+                f"--name {container_name} "
+                f"-p {actual_port}:{actual_port} "
+                f"--restart unless-stopped "
+                f"sketchdb-fake-exporter-rust:latest "
+                f"--port {actual_port} "
+                f"--valuescale {config['synthetic_data_value_scale']} "
+                f"--dataset {dataset} "
+                f"--num-labels {config['num_labels']} "
+                f"--num-values-per-label {config['num_values_per_label']} "
+                f"--metric-type {config['metric_type']}"
+            )
+
+            container_names.append(container_name)
+            docker_run_cmds.append(docker_cmd)
+
+        self.container_names = container_names
+
+        # Dump commands to a file for reference
+        os.makedirs(
+            os.path.join(local_experiment_dir, "fake_exporter_config"), exist_ok=True
+        )
+        with open(
+            os.path.join(
+                local_experiment_dir, "fake_exporter_config", "docker_run_cmds.sh"
+            ),
+            "w",
+        ) as f:
+            f.write("\n".join(docker_run_cmds))
+
+        # Start containers in batches to avoid overwhelming Docker daemon
+        BATCH_SIZE = 5
+        for i in range(0, len(docker_run_cmds), BATCH_SIZE):
+            batch = docker_run_cmds[i : i + BATCH_SIZE]
+            # Combine docker run commands in batch into single SSH command
+            batch_cmd = "; ".join(batch)
+
+            self.provider.execute_command_parallel(
+                node_idxs=list(
+                    range(self.node_offset + 1, self.node_offset + self.num_nodes + 1)
+                ),
+                cmd=batch_cmd,
+                cmd_dir="",
+                nohup=False,
+                popen=True,
+                redirect=True,
+                wait=True,  # Wait for batch to complete
+            )
+
+        return
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop Rust fake exporters across nodes.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        if self.use_container:
+            return self._stop_containerized()
+        else:
+            return self._stop_bare_metal()
+
+    def _stop_bare_metal(self, **kwargs) -> None:
+        """
+        Stop Python fake exporters across nodes.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        cmd = "pkill -f fake_exporter"
+        self.provider.execute_command_parallel(
+            node_idxs=list(
+                range(self.node_offset + 1, self.node_offset + self.num_nodes + 1)
+            ),
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=True,
+            wait=True,
+        )
+
+    def _stop_containerized(self, **kwargs) -> None:
+        """Stop fake exporters using containerized deployment."""
+        try:
+            if self.container_names is not None and len(self.container_names) > 0:
+                # Stop and remove containers by name
+                # Batch container names to avoid command line length issues
+                BATCH_SIZE = 10
+                for i in range(0, len(self.container_names), BATCH_SIZE):
+                    batch = self.container_names[i : i + BATCH_SIZE]
+                    container_list = " ".join(batch)
+                    cmd = f"docker stop {container_list} 2>/dev/null || true; docker rm {container_list} 2>/dev/null || true"
+
+                    self.provider.execute_command_parallel(
+                        node_idxs=list(
+                            range(
+                                self.node_offset + 1,
+                                self.node_offset + self.num_nodes + 1,
+                            )
+                        ),
+                        cmd=cmd,
+                        cmd_dir=None,
+                        nohup=False,
+                        popen=True,
+                        wait=True,
+                    )
+            else:
+                # Fallback: stop all containers matching the base name pattern
+                cmd = f"docker ps -a --filter name={BaseExporterService.FAKE_EXPORTER_BASE_CONTAINER_NAME} --format '{{{{.Names}}}}' | xargs -r docker stop; docker ps -a --filter name={BaseExporterService.FAKE_EXPORTER_BASE_CONTAINER_NAME} --format '{{{{.Names}}}}' | xargs -r docker rm"
+                self.provider.execute_command_parallel(
+                    node_idxs=list(
+                        range(
+                            self.node_offset + 1,
+                            self.node_offset + self.num_nodes + 1,
+                        )
+                    ),
+                    cmd=cmd,
+                    cmd_dir=None,
+                    nohup=False,
+                    popen=True,
+                    wait=True,
+                )
+        except Exception as e:
+            print(f"Error stopping fake exporter containers: {e}")
+
+
+class AvalancheExporterService(BaseExporterService):
+    """Service for managing Avalanche exporters via Docker."""
+
+    def start(
+        self,
+        config: Dict[str, Any],
+        experiment_output_dir: str,
+        local_experiment_dir: str,
+        **kwargs,
+    ) -> None:
+        """
+        Start Avalanche exporter in Docker container.
+
+        Args:
+            config: Avalanche exporter configuration
+            experiment_output_dir: Directory for experiment output
+            local_experiment_dir: Local experiment directory for config dumps
+            **kwargs: Additional configuration
+        """
+        # Default avalanche configuration
+        cardinality = config.get("cardinality", 10000)
+        ingestion_rate = config.get("ingestion_rate", 1000000)
+        port = config.get("port", 9001)
+        container_name = f"avalanche-exporter-{port}"
+
+        # Stop any existing container
+        # self._stop_avalanche_container(container_name)
+
+        # Docker command for avalanche
+        # Avalanche generates high-cardinality metrics for load testing
+        docker_cmd = (
+            f"docker run -d --name {container_name} "
+            f"-p {port}:9001 "
+            f"quay.io/freshtracks.io/avalanche:latest "
+            f"--metric-count=1 "
+            f"--series-count={cardinality} "
+            f"--metricname-length=5 "
+            f"--labelname-length=5 "
+            f"--const-label=environment=test "
+            f"--port=9001"
+        )
+
+        # Log configuration to file
+        os.makedirs(
+            os.path.join(local_experiment_dir, "avalanche_exporter_config"),
+            exist_ok=True,
+        )
+        with open(
+            os.path.join(
+                local_experiment_dir, "avalanche_exporter_config", "config.txt"
+            ),
+            "w",
+        ) as f:
+            f.write(f"cardinality: {cardinality}\n")
+            f.write(f"ingestion_rate: {ingestion_rate}\n")
+            f.write(f"port: {port}\n")
+            f.write(f"docker_cmd: {docker_cmd}\n")
+
+        # Run on the first node (avalanche generates enough load from single instance)
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=docker_cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+        )
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop Avalanche exporter containers across nodes.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        # Stop avalanche containers (common naming pattern)
+        cmd = "docker ps --filter name=avalanche-exporter --format '{{.Names}}' | xargs -r docker stop"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=True,
+        )
+
+        # Remove containers
+        cmd = "docker ps -a --filter name=avalanche-exporter --format '{{.Names}}' | xargs -r docker rm"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=True,
+        )
+
+    # def _stop_avalanche_container(self, container_name: str) -> None:
+    #     """Stop and remove a specific avalanche container."""
+    #     cmd = f"docker stop {container_name}; docker rm {container_name}"
+    #     utils.run_on_cloudlab_node(
+    #         1,
+    #         self.username,
+    #         self.hostname_suffix,
+    #         cmd,
+    #         None,
+    #         nohup=False,
+    #         popen=False,
+    #         ignore_errors=True,
+    #     )
+
+
+class ExporterServiceFactory:
+    """Factory for creating appropriate exporter services."""
+
+    @staticmethod
+    def create_exporter_service(
+        language: str,
+        provider: "InfrastructureProvider",
+        num_nodes: int,
+        use_container: bool,
+        node_offset: int,
+    ) -> BaseExporterService:
+        """
+        Create an exporter service based on language.
+
+        Args:
+            language: Programming language ("python" or "rust")
+            provider: Infrastructure provider for node communication and management
+            num_nodes: Number of nodes
+            use_container: Whether to use containerized deployment
+            node_offset: Starting node index offset
+
+        Returns:
+            Appropriate exporter service instance
+
+        Raises:
+            ValueError: If language is not supported
+        """
+        if language == "python":
+            return PythonExporterService(
+                provider, num_nodes, use_container, node_offset
+            )
+        elif language == "rust":
+            return RustExporterService(provider, num_nodes, use_container, node_offset)
+        else:
+            raise ValueError(
+                f"Invalid fake exporter language: {language}. Supported languages are 'python' and 'rust'"
+            )
diff --git a/Utilities/experiments/experiment_utils/services/flink.py b/Utilities/experiments/experiment_utils/services/flink.py
new file mode 100644
index 0000000..2c39f0b
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/flink.py
@@ -0,0 +1,316 @@
+"""
+Flink service management for experiments.
+"""
+
+import os
+import subprocess
+from typing import List, Optional, Tuple
+
+import constants
+from .base import BaseService
+from experiment_utils.providers.base import InfrastructureProvider
+
+
+class FlinkService(BaseService):
+    """Service for managing Flink cluster and jobs."""
+
+    def __init__(self, provider: InfrastructureProvider, node_offset: int):
+        """
+        Initialize Flink service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider)
+        self.node_offset = node_offset
+        self.active_jobs = []
+
+    def start(self, **kwargs) -> None:
+        """
+        Start Flink cluster if not already running.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        # Check if already running
+        if self._is_cluster_running():
+            return
+
+        cmd = """
+        if ! jps | grep -q StandaloneSessionClusterEntrypoint; then
+            ./bin/start-cluster.sh
+        fi
+        """
+        cmd_dir = os.path.join(self.provider.get_home_dir(), "flink")
+
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=False,
+            popen=False,
+            manual=False,
+        )
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop Flink cluster and all jobs.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        # Stop all running jobs first
+        self.stop_all_jobs()
+
+        # Stop cluster
+        cmd = "./bin/stop-cluster.sh"
+        cmd_dir = os.path.join(self.provider.get_home_dir(), "flink")
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=False,
+            popen=False,
+            manual=False,
+        )
+
+        self.active_jobs.clear()
+
+    def stop_all_jobs(self) -> None:
+        """Stop all running Flink jobs."""
+        flink_dir = os.path.join(constants.CLOUDLAB_HOME_DIR, "flink", "bin")
+        flink_exe = "./flink"
+
+        cmd = (
+            flink_exe
+            + r" list -r | grep RUNNING | awk '{print \$4}' | xargs -I {} "
+            + flink_exe
+            + r" cancel {}"
+        )
+
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=flink_dir,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+        self.active_jobs.clear()
+
+    def stop_all_java_processes(self) -> None:
+        """Stop all Java processes (useful for local Flink mode)."""
+        cmd = "pkill -f java"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+        self.active_jobs.clear()
+
+    def get_flink_pids(self, do_local_flink: bool = False) -> Optional[List[int]]:
+        """
+        Get PIDs of running Flink processes.
+
+        Args:
+            do_local_flink: Whether running in local mode
+
+        Returns:
+            List of PIDs or None if no processes found
+        """
+        keywords = ["sketch-0.1.jar"]
+        if not do_local_flink:
+            keywords.append("TaskManagerRunner")
+
+        cmd = ";".join(
+            "pgrep java -a | grep {} | cut -d ' ' -f1".format(keyword)
+            for keyword in keywords
+        )
+
+        result = self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+        assert isinstance(result, subprocess.CompletedProcess)
+        if result.stdout == "":
+            return None
+
+        pids = result.stdout.split("\n")
+        pids = [int(pid) for pid in pids if pid != ""]
+        return pids
+
+    def run_flinksketch(
+        self,
+        experiment_output_dir: str,
+        flink_input_format: str,
+        flink_output_format: str,
+        enable_object_reuse: bool,
+        do_local_flink: bool,
+        controller_remote_output_dir: str,
+        compress_json: bool,
+    ) -> Tuple[Optional[str], Optional[subprocess.Popen]]:
+        """
+        Run FlinkSketch job.
+
+        Args:
+            experiment_output_dir: Directory for experiment output
+            flink_input_format: Input data format
+            flink_output_format: Output data format
+            enable_object_reuse: Whether to enable object reuse optimization
+            do_local_flink: Whether to run in local mode
+            controller_remote_output_dir: Controller output directory
+            compress_json: Whether to compress JSON output
+
+        Returns:
+            Tuple of (job_id, popen_process)
+        """
+        flink_exe = os.path.join(constants.CLOUDLAB_HOME_DIR, "flink", "bin", "flink")
+        flinksketch_dir = os.path.join(
+            constants.CLOUDLAB_HOME_DIR, "code", "FlinkSketch"
+        )
+
+        if do_local_flink:
+            cmd_prefix = "java -cp {}/lib/*:./target/sketch-0.1.jar org.myorg.flink.DataStreamJob".format(
+                os.path.join(constants.CLOUDLAB_HOME_DIR, "flink")
+            )
+        else:
+            cmd_prefix = "{} run ./target/sketch-0.1.jar".format(flink_exe)
+
+        # Original command with output redirection (commented out for stdout monitoring)
+        # cmd = "{} --inputKafkaTopic {} --outputKafkaTopic {} --configFilePath {}/streaming_config.yaml --readFlowkey false --outputFormat {} --kafkaInputFormat {} --enableObjectReuse {} {} {} {} > {} 2>&1 &".format(
+        #     cmd_prefix,
+        #     constants.FLINK_INPUT_TOPIC,
+        #     constants.FLINK_OUTPUT_TOPIC,
+        #     controller_remote_output_dir,
+        #     flink_output_format,
+        #     flink_input_format,
+        #     str(enable_object_reuse).lower(),
+        #     "--compressJson true" if compress_json else "",
+        #     "--logLevel DEBUG",
+        #     "--skipKeyByIfPossible true",
+        #     os.path.join(experiment_output_dir, "flinksketch.out"),
+        # )
+
+        # Command without output redirection to enable stdout monitoring
+        cmd = "{} --inputKafkaTopic {} --outputKafkaTopic {} --configFilePath {}/streaming_config.yaml --readFlowkey false --outputFormat {} --kafkaInputFormat {} --enableObjectReuse {} {} {} {}".format(
+            cmd_prefix,
+            constants.FLINK_INPUT_TOPIC,
+            constants.FLINK_OUTPUT_TOPIC,
+            controller_remote_output_dir,
+            flink_output_format,
+            flink_input_format,
+            str(enable_object_reuse).lower(),
+            "--compressJson true" if compress_json else "",
+            "--logLevel DEBUG",
+            "--skipKeyByIfPossible true",
+        )
+
+        popen = self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=flinksketch_dir,
+            nohup=True,
+            popen=True,
+        )
+
+        if do_local_flink:
+            return None, popen
+        else:
+            assert popen is not None and popen.stdout is not None
+            job_id = None
+            for line in iter(popen.stdout.readline, b""):
+                decoded_line = line.decode("utf-8").strip()
+                if "Job has been submitted with JobID" in decoded_line:
+                    job_id = decoded_line.split()[-1]
+                    break
+
+            if job_id is None:
+                raise RuntimeError("Failed to retrieve JobID from Flink job submission")
+
+            self.active_jobs.append(job_id)
+            return job_id, popen
+
+    def stop_flinksketch(
+        self,
+        job_id: Optional[str],
+        popen: Optional[subprocess.Popen],
+        flink_pids: Optional[List[int]],
+        do_local_flink: bool,
+    ) -> None:
+        """
+        Stop FlinkSketch job.
+
+        Args:
+            job_id: Flink job ID (for cluster mode)
+            popen: Process handle
+            flink_pids: Process IDs (for local mode)
+            do_local_flink: Whether running in local mode
+        """
+        if do_local_flink:
+            if flink_pids:
+                cmd = ";".join(["kill -9 {}".format(pid) for pid in flink_pids])
+                self.provider.execute_command(
+                    node_idx=self.node_offset,
+                    cmd=cmd,
+                    cmd_dir=None,
+                    nohup=False,
+                    popen=False,
+                    manual=False,
+                )
+        else:
+            if job_id:
+                flink_exe = os.path.join(
+                    constants.CLOUDLAB_HOME_DIR, "flink", "bin", "flink"
+                )
+                cmd = "{} cancel {}".format(flink_exe, job_id)
+                self.provider.execute_command(
+                    node_idx=self.node_offset,
+                    cmd=cmd,
+                    cmd_dir=None,
+                    nohup=False,
+                    popen=False,
+                    manual=False,
+                )
+
+                if job_id in self.active_jobs:
+                    self.active_jobs.remove(job_id)
+
+        if popen:
+            popen.terminate()
+
+    def is_healthy(self) -> bool:
+        """
+        Check if Flink cluster is healthy.
+
+        Returns:
+            True if cluster is running and responsive
+        """
+        return self._is_cluster_running()
+
+    def _is_cluster_running(self) -> bool:
+        """Check if Flink cluster is actually running."""
+        try:
+            cmd = "jps | grep -q StandaloneSessionClusterEntrypoint"
+            result = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+            assert isinstance(result, subprocess.CompletedProcess)
+            return result.returncode == 0
+        except Exception:
+            return False
diff --git a/Utilities/experiments/experiment_utils/services/grafana.py b/Utilities/experiments/experiment_utils/services/grafana.py
new file mode 100644
index 0000000..e67ab56
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/grafana.py
@@ -0,0 +1,175 @@
+"""
+Grafana service management for experiment infrastructure.
+
+This module provides a Docker-based Grafana service for dashboard visualization
+during experiments.
+"""
+
+import os
+import subprocess
+
+from .base import DockerServiceBase
+from experiment_utils.providers.base import InfrastructureProvider
+
+
+class GrafanaService(DockerServiceBase):
+    """Docker-based Grafana service for experiment dashboards."""
+
+    def __init__(
+        self, provider: InfrastructureProvider, num_nodes: int, node_offset: int
+    ):
+        """
+        Initialize Grafana service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            num_nodes: Number of nodes to manage
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider, num_nodes, node_offset)
+        self.container_name = "grafana-demo"
+
+    def get_container_name(self) -> str:
+        """Get the Docker container name."""
+        return self.container_name
+
+    def get_service_url(self) -> str:
+        """Get Grafana URL for health checks."""
+        return "http://localhost:3000"
+
+    def get_health_endpoint(self) -> str:
+        """Get Grafana health check endpoint."""
+        return "/api/health"
+
+    def start(self, admin_password: str = "admin", **kwargs) -> None:
+        """
+        Start Grafana in Docker container.
+
+        Args:
+            admin_password: Admin password for Grafana
+            **kwargs: Additional configuration parameters
+        """
+        # Force cleanup any existing container
+        self._force_cleanup_container()
+
+        # Start Grafana container
+        docker_cmd = [
+            "docker",
+            "run",
+            "-d",
+            "--name",
+            self.container_name,
+            # "-p",
+            # "3000:3000",
+            "-e",
+            f"GF_SECURITY_ADMIN_PASSWORD={admin_password}",
+            "--rm",  # Auto-cleanup when stopped
+            "--network",
+            "host",
+            "grafana/grafana-oss",
+        ]
+
+        cmd = " ".join(docker_cmd)
+        print(f"Starting Grafana container: {cmd}")
+
+        result = self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+        )
+
+        if isinstance(result, subprocess.CompletedProcess) and result.returncode != 0:
+            raise RuntimeError(f"Failed to start Grafana container: {result.stderr}")
+
+        print(f"Grafana container {self.container_name} started successfully")
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop Grafana container.
+
+        Args:
+            **kwargs: Additional configuration parameters
+        """
+        print(f"Stopping Grafana container: {self.container_name}")
+
+        # Stop the container (will auto-remove due to --rm flag)
+        stop_cmd = f"docker stop {self.container_name} 2>/dev/null || true"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=stop_cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+        print(f"Grafana container {self.container_name} stopped")
+
+    def configure_dashboard(self, experiment_type: str, experiment_name: str) -> bool:
+        """
+        Configure Grafana with datasources and dashboard from experiment config.
+
+        Args:
+            experiment_type: Experiment type (e.g., 'cloud_demo', 'collapsable')
+            experiment_name: Name of the experiment
+
+        Returns:
+            True if configuration succeeded, False otherwise
+        """
+        try:
+            print("Configuring Grafana datasources and dashboard...")
+
+            # Construct the command to call grafana_config.py with Python 3.11
+            cmd = f"python3.11 grafana_config.py experiment_type={experiment_type} experiment.name={experiment_name} --configure"
+            print(cmd)
+
+            # Use the CloudLab home directory pattern like other services
+            cmd_dir = os.path.join(
+                self.provider.get_home_dir(), "code", "Utilities", "experiments"
+            )
+
+            print(f"Calling grafana_config.py: {cmd}")
+            print(f"Working directory: {cmd_dir}")
+
+            # Execute the command on the CloudLab node
+            result = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=cmd_dir,
+                nohup=False,
+                popen=False,
+            )
+
+            if isinstance(result, subprocess.CompletedProcess):
+                if result.returncode == 0:
+                    print("✓ Grafana configuration completed successfully")
+                    return True
+                else:
+                    print(
+                        f"✗ Grafana configuration failed with exit code {result.returncode}"
+                    )
+                    if result.stderr:
+                        print(f"Error output: {result.stderr}")
+                    return False
+            else:
+                print("✗ Grafana configuration failed - unexpected result type")
+                return False
+
+        except Exception as e:
+            print(f"Error configuring Grafana: {e}")
+            return False
+
+    def get_dashboard_url(self, experiment_name: str) -> str:
+        """
+        Get the URL for the experiment dashboard.
+
+        Args:
+            experiment_name: Name of the experiment
+
+        Returns:
+            Full URL to the dashboard
+        """
+        dashboard_uid = f"exp-{experiment_name}"
+        return f"{self.get_service_url()}/d/{dashboard_uid}"
diff --git a/Utilities/experiments/experiment_utils/services/kafka.py b/Utilities/experiments/experiment_utils/services/kafka.py
new file mode 100644
index 0000000..477106b
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/kafka.py
@@ -0,0 +1,193 @@
+"""
+Kafka service management for experiments.
+"""
+
+import os
+import time
+import subprocess
+from typing import List
+
+import constants
+from .base import BaseService
+from experiment_utils.providers.base import InfrastructureProvider
+
+
+class KafkaService(BaseService):
+    """Service for managing Kafka server lifecycle and topics."""
+
+    def __init__(
+        self, provider: InfrastructureProvider, node_offset: int, num_tries: int = 5
+    ):
+        """
+        Initialize Kafka service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            num_tries: Number of retry attempts when starting Kafka
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider)
+        self.num_tries = num_tries
+        self.node_offset = node_offset
+        self.topics_created = False
+
+    def start(self, **kwargs) -> None:
+        """
+        Start Kafka server with retry logic.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+
+        Raises:
+            RuntimeError: If Kafka fails to start after all retry attempts
+        """
+        kafka_config = "./config/kraft/server.properties"
+        cmd_dir = os.path.join(self.provider.get_home_dir(), "kafka")
+        start_cmd = f"./bin/kafka-server-start.sh {kafka_config} > /dev/null 2>&1 &"
+        check_cmd = 'pgrep -f "kafka.server"'
+        reset_cmd = f"./bin/kafka-storage.sh format -t \`./bin/kafka-storage.sh random-uuid\` --config {kafka_config}"  # noqa: W605
+
+        tries_remaining = self.num_tries
+        while tries_remaining > 0:
+            self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=start_cmd,
+                cmd_dir=cmd_dir,
+                nohup=True,
+                popen=False,
+            )
+            time.sleep(30)
+
+            check_result = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=check_cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+            assert isinstance(check_result, subprocess.CompletedProcess)
+            if check_result.stdout != "":
+                return
+
+            # Try to reset kafka storage
+            self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=reset_cmd,
+                cmd_dir=cmd_dir,
+                nohup=False,
+                popen=False,
+            )
+            tries_remaining -= 1
+            time.sleep(10)
+
+        raise RuntimeError(f"Kafka failed to start after {self.num_tries} attempts")
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop Kafka server.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        cmd_dir = os.path.join(self.provider.get_home_dir(), "kafka")
+        cmd = "./bin/kafka-server-stop.sh > /dev/null 2>&1"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+        self.topics_created = False
+
+    def is_healthy(self) -> bool:
+        """
+        Check if Kafka is healthy by attempting to list topics.
+
+        Returns:
+            True if Kafka is responsive, False otherwise
+        """
+        try:
+            cmd = f"./bin/kafka-topics.sh --bootstrap-server {constants.KAFKA_BROKER} --list"
+            cmd_dir = os.path.join(self.provider.get_home_dir(), "kafka")
+            self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=cmd_dir,
+                nohup=False,
+                popen=False,
+            )
+            return True
+        except Exception:
+            return False
+
+    def wait_until_ready(self) -> None:
+        """Wait until Kafka is ready to accept connections."""
+        # kafka-topics blocks until it gets a response from the server
+        cmd = (
+            f"./bin/kafka-topics.sh --bootstrap-server {constants.KAFKA_BROKER} --list"
+        )
+        cmd_dir = os.path.join(self.provider.get_home_dir(), "kafka")
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=False,
+            popen=False,
+        )
+
+    def create_topics(self, topics: List[str] = None) -> None:
+        """
+        Create Kafka topics for the experiment.
+
+        Args:
+            topics: List of topic names to create. Defaults to standard experiment topics.
+        """
+        if topics is None:
+            topics = [constants.FLINK_INPUT_TOPIC, constants.FLINK_OUTPUT_TOPIC]
+
+        cmds = []
+        for topic in topics:
+            cmd = f"./bin/kafka-topics.sh --bootstrap-server {constants.KAFKA_BROKER} --create --topic {topic} --partitions 1 --replication-factor 1 --config max.message.bytes=20971520 &"
+            cmds.append(cmd)
+        cmds.append("wait")
+
+        final_cmd = " ".join(cmds)
+        cmd_dir = os.path.join(self.provider.get_home_dir(), "kafka")
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=final_cmd,
+            cmd_dir=cmd_dir,
+            nohup=False,
+            popen=False,
+        )
+        self.topics_created = True
+
+    def delete_topics(self, topics: List[str] = None) -> None:
+        """
+        Delete Kafka topics.
+
+        Args:
+            topics: List of topic names to delete. Defaults to standard experiment topics.
+        """
+        if topics is None:
+            topics = [constants.FLINK_INPUT_TOPIC, constants.FLINK_OUTPUT_TOPIC]
+
+        cmds = []
+        for topic in topics:
+            cmd = f"./bin/kafka-topics.sh --bootstrap-server {constants.KAFKA_BROKER} --delete --topic {topic} &"
+            cmds.append(cmd)
+        cmds.append("wait")
+
+        final_cmd = " ".join(cmds)
+        cmd_dir = os.path.join(self.provider.get_home_dir(), "kafka")
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=final_cmd,
+            cmd_dir=cmd_dir,
+            nohup=False,
+            popen=False,
+        )
+        self.topics_created = False
diff --git a/Utilities/experiments/experiment_utils/services/misc.py b/Utilities/experiments/experiment_utils/services/misc.py
new file mode 100644
index 0000000..f805659
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/misc.py
@@ -0,0 +1,431 @@
+"""
+Miscellaneous service classes for smaller services.
+"""
+
+import os
+import random
+import subprocess
+
+import constants
+from .base import BaseService
+from experiment_utils.providers.base import InfrastructureProvider
+
+
+class DeathstarService(BaseService):
+    """Service for managing DeathStar benchmark."""
+
+    def __init__(
+        self, provider: InfrastructureProvider, num_nodes: int, node_offset: int
+    ):
+        """
+        Initialize DeathStar service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            num_nodes: Number of nodes to run DeathStar on
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider)
+        self.num_nodes = num_nodes
+        self.node_offset = node_offset
+
+    def start(self, **kwargs) -> None:
+        """
+        Start DeathStar benchmark across nodes.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        cmd = "docker compose up -d"
+        cmd_dir = (
+            f"{self.provider.get_home_dir()}/benchmarks/DeathStarBench/socialNetwork"
+        )
+        self.provider.execute_command_parallel(
+            node_idxs=list(
+                range(self.node_offset + 1, self.node_offset + self.num_nodes + 1)
+            ),
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=False,
+            popen=True,
+            redirect=True,
+            wait=True,
+        )
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop DeathStar benchmark across nodes.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        cmd = "docker compose down"
+        cmd_dir = (
+            f"{self.provider.get_home_dir()}/benchmarks/DeathStarBench/socialNetwork"
+        )
+        self.provider.execute_command_parallel(
+            node_idxs=list(
+                range(self.node_offset + 1, self.node_offset + self.num_nodes + 1)
+            ),
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=False,
+            popen=True,
+            wait=True,
+        )
+
+    def run_workload(
+        self,
+        experiment_output_dir: str,
+        local_experiment_dir: str,
+        minimum_experiment_running_time: int,
+        random_params: bool = False,
+    ) -> None:
+        """
+        Run DeathStar benchmark workload across nodes.
+
+        Args:
+            experiment_output_dir: Directory for experiment output
+            local_experiment_dir: Local experiment directory for config dumps
+            minimum_experiment_running_time: Minimum time to run experiment
+            random_params: Whether to use random parameters
+        """
+        cmd_dir = (
+            f"{self.provider.get_home_dir()}/benchmarks/DeathStarBench/socialNetwork"
+        )
+
+        TOTAL_CONNECTIONS = 480
+        TOTAL_REQUESTS = 1200
+
+        connections = TOTAL_CONNECTIONS // self.num_nodes
+        requests = TOTAL_REQUESTS // self.num_nodes
+        output_file_template = (
+            "{}/deathstar_logs/connections_{}_requests_{}_nodes_{}_ip_{}.txt"
+        )
+
+        ips = []
+        output_files = []
+        for i in range(self.node_offset + 1, self.node_offset + self.num_nodes + 1):
+            ips.append(self.provider.get_node_ip(i))
+            output_files.append(
+                output_file_template.format(
+                    experiment_output_dir,
+                    TOTAL_CONNECTIONS,
+                    TOTAL_REQUESTS,
+                    self.num_nodes,
+                    i,
+                )
+            )
+
+        if not random_params:
+            cmd_template = "../wrk2/wrk -D exp -t 12 -c {} -d {} -L -s ./wrk2/scripts/social-network/compose-post.lua http://{}:8080/wrk2-api/post/compose -R {} > {} 2>&1 &"
+            cmds = [
+                cmd_template.format(
+                    connections,
+                    minimum_experiment_running_time,
+                    ip,
+                    requests,
+                    output_file,
+                )
+                for ip, output_file in zip(ips, output_files)
+            ]
+        else:
+            cmd_template = "../wrk2/wrk -D exp -t {} -c {} -d {} -L -s ./wrk2/scripts/social-network/compose-post.lua http://{}:8080/wrk2-api/post/compose -R {} -s ./wrk2/scripts/social-network/random-params.lua > {} 2>&1 &"
+            cmds = []
+            for ip, output_file in zip(ips, output_files):
+                random_threads = random.randint(1, 12)
+                random_duration = random.randint(
+                    minimum_experiment_running_time, minimum_experiment_running_time * 2
+                )
+                cmds.append(
+                    cmd_template.format(
+                        random_threads,
+                        connections,
+                        random_duration,
+                        ip,
+                        requests,
+                        output_file,
+                    )
+                )
+
+        # Dump workload configuration to a file
+        os.makedirs(
+            os.path.join(local_experiment_dir, "deathstar_config"), exist_ok=True
+        )
+        with open(
+            os.path.join(local_experiment_dir, "deathstar_config", "cmds.sh"), "w"
+        ) as f:
+            f.write("\n".join(cmds))
+
+        cmds.insert(0, "mkdir -p {};".format(os.path.dirname(output_files[0])))
+        final_cmd = " ".join(cmds)
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=final_cmd,
+            cmd_dir=cmd_dir,
+            nohup=True,
+            popen=False,
+        )
+
+
+class ControllerService(BaseService):
+    """Service for managing the controller."""
+
+    def __init__(
+        self,
+        provider: InfrastructureProvider,
+        use_container: bool,
+        node_offset: int,
+    ):
+        """
+        Initialize Controller service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            use_container: Whether to use containerized deployment
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider)
+        self.use_container = use_container
+        self.node_offset = node_offset
+        self.compose_file = None
+        self.container_name = "sketchdb-controller"
+
+    def start(
+        self,
+        controller_input_file: str,
+        prometheus_scrape_interval: int,
+        streaming_engine: str,
+        controller_remote_output_dir: str,
+        punting: bool,
+        **kwargs,
+    ) -> None:
+        """
+        Start the controller.
+
+        Args:
+            controller_input_file: Path to controller input configuration
+            prometheus_scrape_interval: Prometheus scraping interval
+            streaming_engine: Type of streaming engine
+            controller_remote_output_dir: Controller output directory
+            punting: Enable query punting based on performance heuristics
+            **kwargs: Additional configuration
+        """
+        if self.use_container:
+            return self._start_containerized(
+                controller_input_file,
+                prometheus_scrape_interval,
+                streaming_engine,
+                controller_remote_output_dir,
+                punting,
+            )
+        else:
+            return self._start_bare_metal(
+                controller_input_file,
+                prometheus_scrape_interval,
+                streaming_engine,
+                controller_remote_output_dir,
+                punting,
+            )
+
+    def _start_bare_metal(
+        self,
+        controller_input_file: str,
+        prometheus_scrape_interval: int,
+        streaming_engine: str,
+        controller_remote_output_dir: str,
+        punting: bool,
+    ) -> None:
+        cmd = "python3 main_controller.py --input_config {} --prometheus_scrape_interval {} --output_dir {} --streaming_engine {}".format(
+            controller_input_file,
+            prometheus_scrape_interval,
+            controller_remote_output_dir,
+            streaming_engine,
+        )
+        if punting:
+            cmd += " --enable-punting"
+        cmd_dir = os.path.join(self.provider.get_home_dir(), "code", "Controller")
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=False,
+            popen=False,
+            ignore_errors=False,
+        )
+
+    def _start_containerized(
+        self,
+        controller_input_file: str,
+        prometheus_scrape_interval: int,
+        streaming_engine: str,
+        controller_remote_output_dir: str,
+        punting: bool,
+    ):
+        controller_dir = os.path.join(
+            self.provider.get_home_dir(), "code", "Controller"
+        )
+
+        template_path = os.path.join(controller_dir, "docker-compose.yml.j2")
+        remote_compose_file = os.path.join(
+            controller_remote_output_dir, "controller-docker-compose.yml"
+        )
+        helper_script = os.path.join(
+            self.provider.get_home_dir(),
+            "code",
+            "Utilities",
+            "experiments",
+            "generate_controller_compose.py",
+        )
+        self.compose_file = remote_compose_file
+
+        generate_cmd = f"python3 {helper_script}"
+        generate_cmd += f" --template-path {template_path}"
+        generate_cmd += f" --compose-output-path {remote_compose_file}"
+        generate_cmd += f" --controller-dir {controller_dir}"
+        generate_cmd += f" --container-name {self.container_name}"
+        generate_cmd += f" --input-config-path {controller_input_file}"
+        generate_cmd += f" --controller-output-dir {controller_remote_output_dir}"
+        generate_cmd += f" --prometheus-scrape-interval {prometheus_scrape_interval}"
+        generate_cmd += f" --streaming-engine {streaming_engine}"
+        if punting:
+            generate_cmd += " --punting"
+
+        cmd = f"mkdir -p {controller_remote_output_dir}; {generate_cmd}; docker compose -f {remote_compose_file} up --no-build -d"
+        try:
+            self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=controller_dir,
+                nohup=False,
+                popen=False,
+                ignore_errors=False,
+            )
+        except Exception as e:
+            print(f"Failed to start Controller container: {e}")
+            raise
+
+        return None
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop the controller.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        if self.use_container:
+            return self._stop_containerized()
+        else:
+            return self._stop_bare_metal()
+
+    def _stop_containerized(self) -> None:
+        """Stop Controller using containerized deployment."""
+        try:
+            if self.compose_file:
+                # Stop using docker compose command on remote node
+                cmd = f"docker compose -f {self.compose_file} down"
+                self.provider.execute_command(
+                    node_idx=self.node_offset,
+                    cmd=cmd,
+                    cmd_dir=None,
+                    nohup=False,
+                    popen=False,
+                    ignore_errors=True,
+                )
+                self.compose_file = None
+            else:
+                # Fallback: stop by container name on remote node
+                cmd = f"docker stop {self.container_name}; docker rm {self.container_name}"
+                self.provider.execute_command(
+                    node_idx=self.node_offset,
+                    cmd=cmd,
+                    cmd_dir=None,
+                    nohup=False,
+                    popen=False,
+                    ignore_errors=True,
+                )
+        except Exception as e:
+            print(f"Error stopping QueryEngine container: {e}")
+
+    def _stop_bare_metal(self) -> None:
+        # Controller typically runs to completion, no explicit stop needed for bare metal
+        pass
+
+
+class DumbKafkaConsumerService(BaseService):
+    """Service for managing simple Kafka consumer."""
+
+    def __init__(self, provider: InfrastructureProvider, node_offset: int):
+        """
+        Initialize Dumb Kafka Consumer service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider)
+        self.node_offset = node_offset
+
+    def start(self, experiment_output_dir: str, **kwargs) -> None:
+        """
+        Start the dumb Kafka consumer.
+
+        Args:
+            experiment_output_dir: Directory for experiment output
+            **kwargs: Additional configuration
+        """
+        cmd = "python3 -u dumb_kafka_consumer.py --kafka_topic {} --output_file {} > /dev/null 2>&1 &".format(
+            constants.FLINK_OUTPUT_TOPIC,
+            os.path.join(experiment_output_dir, "dumb_kafka_consumer_output.json"),
+        )
+        cmd_dir = os.path.join(
+            self.provider.get_home_dir(), "code", "Utilities", "experiments"
+        )
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=True,
+            popen=False,
+            ignore_errors=False,
+        )
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop the dumb Kafka consumer.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        cmd = "pkill -f dumb_kafka_consumer.py"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+    def is_healthy(self) -> bool:
+        """
+        Check if consumer is healthy.
+
+        Returns:
+            True if consumer process is running
+        """
+        try:
+            cmd = "pgrep -f dumb_kafka_consumer.py"
+            result = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+            assert isinstance(result, subprocess.CompletedProcess)
+            return result.stdout.strip() != ""
+        except Exception:
+            return False
diff --git a/Utilities/experiments/experiment_utils/services/monitoring.py b/Utilities/experiments/experiment_utils/services/monitoring.py
new file mode 100644
index 0000000..c361478
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/monitoring.py
@@ -0,0 +1,74 @@
+"""
+Monitoring service management for experiments.
+"""
+
+from .base import BaseService
+from .system_exporters import SystemExportersService
+from .prometheus import PrometheusService
+from experiment_utils.providers.base import InfrastructureProvider
+
+
+class MonitoringService(BaseService):
+    """Service for managing monitoring across nodes."""
+
+    def __init__(
+        self, provider: InfrastructureProvider, num_nodes: int, node_offset: int
+    ):
+        """
+        Initialize Monitoring service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            num_nodes: Number of nodes to monitor
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider)
+        self.num_nodes = num_nodes
+        self.node_offset = node_offset
+        self.system_exporters_service = SystemExportersService(
+            provider, num_nodes, node_offset
+        )
+        self.prometheus_service = PrometheusService(provider, num_nodes, node_offset)
+
+    def start(self, experiment_params, experiment_output_dir: str, **kwargs) -> None:
+        """
+        Start monitoring service for the experiment.
+
+        Args:
+            experiment_params: Experiment configuration parameters
+            experiment_output_dir: Directory for experiment output
+            **kwargs: Additional configuration
+        """
+        # Convert experiment_params to dict if needed
+        from omegaconf import OmegaConf
+
+        if hasattr(experiment_params, "_content"):
+            experiment_params_dict = OmegaConf.to_container(experiment_params)
+        else:
+            experiment_params_dict = experiment_params
+
+        # Start system exporters (node_exporter, blackbox_exporter, cadvisor)
+        self.system_exporters_service.start(experiment_params_dict)
+
+        # Start Prometheus
+        self.prometheus_service.start(experiment_output_dir)
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop monitoring services across nodes.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        self.system_exporters_service.stop()
+        self.prometheus_service.stop()
+
+    def is_healthy(self) -> bool:
+        """
+        Check if monitoring services are healthy.
+
+        Returns:
+            True if monitoring is running
+        """
+        # Basic health check - could be enhanced to check actual monitoring processes
+        return True
diff --git a/Utilities/experiments/experiment_utils/services/prometheus.py b/Utilities/experiments/experiment_utils/services/prometheus.py
new file mode 100644
index 0000000..35b6d29
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/prometheus.py
@@ -0,0 +1,148 @@
+"""
+Prometheus service management for experiments.
+"""
+
+import os
+import time
+import subprocess
+
+from .base import BaseService
+from experiment_utils.providers.base import InfrastructureProvider
+from constants import PROMETHEUS_CONFIG_DIR, PROMETHEUS_CONFIG_FILE
+
+
+class PrometheusService(BaseService):
+    """Service for managing Prometheus operations."""
+
+    def __init__(
+        self, provider: InfrastructureProvider, num_nodes: int, node_offset: int
+    ):
+        """
+        Initialize Prometheus service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            num_nodes: Number of nodes to manage
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider)
+        self.num_nodes = num_nodes
+        self.node_offset = node_offset
+
+    def get_query_endpoint_port(self) -> int:
+        """Get the query endpoint port for Prometheus."""
+        return 9090
+
+    def start(self, experiment_output_dir: str, **kwargs) -> None:
+        """
+        Start Prometheus service.
+
+        Args:
+            experiment_output_dir: Directory containing prometheus config
+            **kwargs: Additional configuration (currently unused)
+        """
+        self._start_prometheus(experiment_output_dir)
+
+    def _check_port_open(self, port: int) -> bool:
+        """Check if a port is available (not in use)."""
+        cmd = f"netstat -antlp | grep ':{port}'"
+        result = self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+        assert isinstance(result, subprocess.CompletedProcess)
+        if result.returncode == 0:
+            return False
+        return True
+
+    def _start_prometheus(self, experiment_output_dir: str) -> None:
+        """Start Prometheus with proper configuration."""
+        home_dir = self.provider.get_home_dir()
+        prometheus_config_dir = os.path.join(
+            experiment_output_dir, PROMETHEUS_CONFIG_DIR
+        )
+        cmd_dir = os.path.join(home_dir, "prometheus")
+
+        # Copy prometheus config
+        cmd = "cp {}/{} .".format(prometheus_config_dir, PROMETHEUS_CONFIG_FILE)
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=False,
+            popen=False,
+            ignore_errors=False,
+        )
+
+        # Wait for port to be open
+        while True:
+            if self._check_port_open(9090):
+                break
+            time.sleep(3)
+
+        # Start prometheus
+        cmd = f"./prometheus --config.file={PROMETHEUS_CONFIG_FILE} > /dev/null 2>&1 < /dev/null &"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=True,
+            popen=True,
+            ignore_errors=False,
+        )
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop Prometheus service.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        self._stop_prometheus()
+
+    def _stop_prometheus(self) -> None:
+        """Stop Prometheus server."""
+        try:
+            self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd="killall -9 prometheus",
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+        except subprocess.CalledProcessError:
+            pass
+
+    def reset(self) -> None:
+        """Reset Prometheus data across nodes."""
+        # For provider-based architecture, we need to handle reset differently
+        # This maintains backward compatibility for CloudLab while allowing future provider extensions
+        if hasattr(self.provider, "username") and hasattr(
+            self.provider, "hostname_suffix"
+        ):
+            cmd = "python3 reset_prometheus.py --num_nodes {} --cloudlab_username {} --hostname_suffix {} --node_offset {}".format(
+                self.num_nodes,
+                self.provider.username,
+                self.provider.hostname_suffix,
+                self.node_offset,
+            )
+            subprocess.run(cmd, shell=True, check=True)
+        else:
+            # For non-CloudLab providers, implement provider-specific reset logic
+            raise NotImplementedError(
+                "Reset functionality not yet implemented for this provider type"
+            )
+
+    def is_healthy(self) -> bool:
+        """
+        Check if Prometheus service is healthy.
+
+        Returns:
+            True if service is running
+        """
+        return True
diff --git a/Utilities/experiments/experiment_utils/services/prometheus_client_service.py b/Utilities/experiments/experiment_utils/services/prometheus_client_service.py
new file mode 100644
index 0000000..43e7604
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/prometheus_client_service.py
@@ -0,0 +1,275 @@
+"""
+Prometheus Client Service for running experiments
+"""
+
+import os
+import subprocess
+from typing import Optional
+
+import utils
+import constants
+from .base import BaseService
+from experiment_utils.providers.base import InfrastructureProvider
+
+
+class PrometheusClientService(BaseService):
+    def __init__(
+        self,
+        provider: InfrastructureProvider,
+        use_container: bool,
+        node_offset: int,
+    ):
+        super().__init__(provider)
+        self.use_container = use_container
+        self.node_offset = node_offset
+        self.container_name = "sketchdb-prometheusclient"
+        self.latency_exporter_socket_addr = (
+            f"{self.provider.get_node_ip(node_offset)}:9150"
+        )
+        self.compose_file = None
+
+    def start(
+        self,
+        experiment_mode,
+        config_file,
+        query_engine_config_file,
+        output_dir,
+        output_file,
+        export_cost_and_latency,
+        profile_query_engine_pid: Optional[int],
+        profile_prometheus_time: Optional[int],
+        parallel: bool,
+        **kwargs,
+    ):
+        if self.use_container:
+            return self._start_containerized(
+                experiment_mode,
+                config_file,
+                query_engine_config_file,
+                output_dir,
+                output_file,
+                export_cost_and_latency,
+                profile_query_engine_pid,
+                profile_prometheus_time,
+                parallel,
+            )
+        else:
+            return self._start_bare_metal(
+                experiment_mode,
+                config_file,
+                query_engine_config_file,
+                output_dir,
+                output_file,
+                export_cost_and_latency,
+                profile_query_engine_pid,
+                profile_prometheus_time,
+                parallel,
+            )
+
+    def _start_containerized(
+        self,
+        experiment_mode: str,
+        config_file: str,
+        query_engine_config_file: str,
+        output_dir: str,
+        output_file: str,
+        export_cost_and_latency: bool,
+        profile_query_engine_pid: Optional[int],
+        profile_prometheus_time: Optional[int],
+        parallel: bool,
+    ):
+        prometheus_client_dir = os.path.join(
+            self.provider.get_home_dir(),
+            "code",
+            "PrometheusClient",
+        )
+        template_path = os.path.join(prometheus_client_dir, "docker-compose.yml.j2")
+        remote_compose_file = os.path.join(
+            output_dir, "prometheus-client-docker-compose.yml"
+        )
+        self.compose_file = remote_compose_file
+        helper_script = os.path.join(
+            self.provider.get_home_dir(),
+            "code",
+            "Utilities",
+            "experiments",
+            "generate_prometheus_client_compose.py",
+        )
+
+        gen_compose_cmd = f"python3 {helper_script}"
+        gen_compose_cmd += f" --template-path {template_path}"
+        gen_compose_cmd += f" --compose-output-path {remote_compose_file}"
+        gen_compose_cmd += f" --prometheusclient-dir {prometheus_client_dir}"
+        gen_compose_cmd += f" --container-name {self.container_name}"
+        gen_compose_cmd += f" --experiment-output-dir {output_dir}"
+        gen_compose_cmd += f" --config-file {config_file}"
+        gen_compose_cmd += f" --client-output-dir {output_dir}"
+        gen_compose_cmd += f" --client-output-file {output_file}"
+        gen_compose_cmd += (
+            f" --prometheus-host {self.provider.get_node_ip(self.node_offset)}"
+        )
+        gen_compose_cmd += (
+            f" --sketchdb-host {self.provider.get_node_ip(self.node_offset)}"
+        )
+        if parallel:
+            gen_compose_cmd += " --parallel"
+
+        if experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME:
+            assert query_engine_config_file is not None
+            gen_compose_cmd += f" --align-query-time --server-for-alignment sketchdb --query-engine-config-file {query_engine_config_file}"
+
+        if export_cost_and_latency:
+            gen_compose_cmd += (
+                f" --latency-exporter-socket-addr {self.latency_exporter_socket_addr}"
+            )
+
+        if profile_query_engine_pid is not None:
+            gen_compose_cmd += f" --profile-query-engine-pid {profile_query_engine_pid}"
+
+        if profile_prometheus_time is not None:
+            gen_compose_cmd += f" --profile-prometheus-time {profile_prometheus_time}"
+
+        cmd = f"mkdir -p {output_dir}; {gen_compose_cmd}; docker compose -f {remote_compose_file} up --no-build -d"
+        try:
+            utils.run_cmd(f"cd {prometheus_client_dir}; {cmd}", popen=False)
+        except Exception as e:
+            print(f"Failed to start PrometheusClient container: {e}")
+            raise
+        return
+
+    def _start_bare_metal(
+        self,
+        experiment_mode: str,
+        config_file: str,
+        query_engine_config_file: str,
+        output_dir: str,
+        output_file: str,
+        export_cost_and_latency: bool,
+        profile_query_engine_pid: Optional[int],
+        profile_prometheus_time: Optional[int],
+        parallel: bool,
+    ):
+        cmd = "python3 -u main_prometheus_client.py --config_file {} --output_dir {} --output_file {}{}".format(
+            config_file, output_dir, output_file, " --parallel" if parallel else ""
+        )
+
+        if experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME:
+            assert query_engine_config_file is not None
+            cmd += " --align_query_time --server_for_alignment sketchdb --query_engine_config_file {}".format(
+                query_engine_config_file
+            )
+
+        # TODO Update handling of config yaml so port:ip isn't hardcoded and always
+        #      matches the IP:PORT for scrape target in the generated prometheus config
+        if export_cost_and_latency:
+            cmd += f" --export_latencies_for_prometheus {self.provider.get_node_ip(self.node_offset)}:9150"
+
+        if profile_query_engine_pid is not None:
+            cmd += " --profile_query_engine_pid {}".format(profile_query_engine_pid)
+
+        if profile_prometheus_time is not None:
+            cmd += " --profile_prometheus_time {}".format(profile_prometheus_time)
+
+        cmd_dir = os.path.join(self.provider.get_home_dir(), "code", "PrometheusClient")
+        utils.run_cmd(f"cd {cmd_dir}; {cmd}", popen=False)
+
+        return
+
+    def stop(self, **kwargs) -> None:
+        if self.use_container:
+            return self._stop_containerized()
+        else:
+            return self._stop_bare_metal()
+
+    def _stop_containerized(self):
+        """Stop PrometheusClient using containerized deployment."""
+        try:
+            if self.compose_file:
+                cmd = f"docker compose -f {self.compose_file} down"
+                if self.provider.hostname_suffix == "localhost":
+                    utils.run_cmd(cmd, popen=False)
+                    self.compose_file = None
+                else:
+                    self.provider.execute_command(
+                        node_idx=self.node_offset,
+                        cmd=cmd,
+                        cmd_dir=None,
+                        nohup=False,
+                        popen=False,
+                        ignore_errors=True,
+                    )
+            else:
+                # Fallback: stop by container name on remote node
+                cmd = f"docker stop {self.container_name}; docker rm {self.container_name}"
+                if self.provider.hostname_suffix == "localhost":
+                    utils.run_cmd(cmd, popen=False)
+                else:
+                    self.provider.execute_command(
+                        node_idx=self.node_offset,
+                        cmd=cmd,
+                        cmd_dir=None,
+                        nohup=False,
+                        popen=False,
+                        ignore_errors=True,
+                    )
+        except Exception as e:
+            print(f"Error stopping PrometheusClient container: {e}")
+        return
+
+    def _stop_bare_metal(self):
+        """Kill Prometheus client processes."""
+        cmd = "pkill -f main_prometheus_client.py"
+        if self.provider.hostname_suffix == "localhost":
+            # If running on localhost, use pkill to stop the process (e.g. from remote_monitor)
+            utils.run_cmd(cmd, popen=False)
+        else:
+            self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+        return
+
+    def is_healthy(self) -> bool:
+        """
+        Check if prometheus client is healthy by checking if process is running.
+
+        Returns:
+            True if prometheus client process is running
+        """
+
+        if self.use_container:
+            return self._is_healthy_containerized()
+        else:
+            return self._is_healthy_bare_metal()
+
+    def _is_healthy_bare_metal(self) -> bool:
+        """Check if PrometheusClient is healthy using bare metal deployment."""
+        try:
+            cmd = "pgrep -f main_prometheus_client.py"
+            result = utils.run_cmd(cmd, popen=False)
+            import subprocess
+
+            assert isinstance(result, subprocess.CompletedProcess)
+            return result.stdout.strip() != ""
+        except Exception:
+            return False
+
+    def _is_healthy_containerized(self) -> bool:
+        """Check if PrometheusClient is healthy using containerized deployment."""
+        try:
+            # Check if container is running
+            result = subprocess.run(
+                ["docker", "inspect", "-f", "{{.State.Running}}", self.container_name],
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+            return result.stdout.strip() == "true"
+        except subprocess.CalledProcessError:
+            return False
+        except Exception:
+            return False
diff --git a/Utilities/experiments/experiment_utils/services/prometheus_health_monitor.py b/Utilities/experiments/experiment_utils/services/prometheus_health_monitor.py
new file mode 100644
index 0000000..2503ddf
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/prometheus_health_monitor.py
@@ -0,0 +1,112 @@
+"""
+Prometheus target health monitoring service for experiments.
+"""
+
+import os
+from typing import Optional
+
+import constants
+from .base import BaseService
+from experiment_utils.providers.base import InfrastructureProvider
+
+
+class PrometheusHealthMonitor(BaseService):
+    """Service for monitoring Prometheus target health and scrape performance."""
+
+    def __init__(self, provider: InfrastructureProvider, node_offset: int):
+        """
+        Initialize Prometheus health monitor.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider)
+        self.node_offset = node_offset
+        self.output_dir: Optional[str] = None
+        self.monitor_script_path = None
+        self.prometheus_url = "http://localhost:9090"
+
+    def start(
+        self,
+        experiment_output_dir: str,
+        **kwargs,
+    ) -> None:
+        """
+        Start health monitoring for Prometheus targets.
+
+        Args:
+            experiment_output_dir: Directory for experiment output
+            prometheus_url: URL of the Prometheus server to monitor
+            **kwargs: Additional configuration
+        """
+
+        self.output_dir = os.path.join(experiment_output_dir, "prometheus_health")
+
+        # Path to the monitoring script on the remote host
+        self.monitor_script_path = os.path.join(
+            self.provider.get_home_dir(),
+            "code",
+            "Utilities",
+            "experiments",
+            "prometheus_health_monitor.py",
+        )
+
+        # Start the monitoring script on the remote host
+        cmd = f"nohup python3 {self.monitor_script_path} --prometheus_url {self.prometheus_url} --output_dir {self.output_dir} --interval {constants.PROMETHEUS_HEALTH_POLLING_INTERVAL}"
+
+        # Run in background with nohup
+        cmd += " > {}/prometheus_health_monitor.out 2>&1 &".format(self.output_dir)
+
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=f"mkdir -p {self.output_dir}; {cmd}",
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+        )
+
+        print(f"Started Prometheus health monitoring for {self.prometheus_url}")
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop health monitoring.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+
+        # Kill the monitoring script on the remote host
+        cmd = "pkill -f 'prometheus_health_monitor.py'"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+        print(f"Stopped Prometheus health monitoring for {self.prometheus_url}")
+
+    def is_healthy(self) -> bool:
+        """
+        Check if the health monitor is healthy.
+
+        Returns:
+            True if monitoring process is running on remote host
+        """
+
+        try:
+            cmd = "pgrep -f 'prometheus_health_monitor.py'"
+            result = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+            return result.stdout.strip() != ""
+        except Exception:
+            return False
diff --git a/Utilities/experiments/experiment_utils/services/prometheus_kafka_adapter.py b/Utilities/experiments/experiment_utils/services/prometheus_kafka_adapter.py
new file mode 100644
index 0000000..63daf74
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/prometheus_kafka_adapter.py
@@ -0,0 +1,91 @@
+"""
+Prometheus Kafka Adapter service management for experiments.
+"""
+
+import os
+import subprocess
+
+import constants
+from .base import BaseService
+from experiment_utils.providers.base import InfrastructureProvider
+
+
+class PrometheusKafkaAdapterService(BaseService):
+    """Service for managing the Prometheus Kafka adapter."""
+
+    def __init__(self, provider: InfrastructureProvider, node_offset: int):
+        """
+        Initialize Prometheus Kafka Adapter service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider)
+        self.node_offset = node_offset
+
+    def start(self, flink_input_format: str, **kwargs) -> None:
+        """
+        Start the Prometheus Kafka adapter.
+
+        Args:
+            flink_input_format: Input format for Flink
+            **kwargs: Additional configuration
+        """
+        installed_dir = os.path.join(
+            self.provider.get_home_dir(), "code", "prometheus-kafka-adapter"
+        )
+        cmd = './run.sh {} \\"{}\\" {} {} > /dev/null 2>&1 &'.format(
+            installed_dir,
+            constants.KAFKA_BROKER,
+            constants.FLINK_INPUT_TOPIC,
+            flink_input_format,
+        )
+        cmd_dir = os.path.join(installed_dir, "installation")
+
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=True,
+            popen=False,
+        )
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop the Prometheus Kafka adapter.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        cmd = "pkill -f prometheus-kafka-adapter-musl"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+    def is_healthy(self) -> bool:
+        """
+        Check if Prometheus Kafka adapter is healthy.
+
+        Returns:
+            True if adapter process is running
+        """
+        try:
+            cmd = "pgrep -f prometheus-kafka-adapter-musl"
+            result = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+            assert isinstance(result, subprocess.CompletedProcess)
+            return result.stdout.strip() != ""
+        except Exception:
+            return False
diff --git a/Utilities/experiments/experiment_utils/services/prometheus_throughput_monitor.py b/Utilities/experiments/experiment_utils/services/prometheus_throughput_monitor.py
new file mode 100644
index 0000000..a9ee63c
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/prometheus_throughput_monitor.py
@@ -0,0 +1,114 @@
+"""
+Prometheus throughput monitoring service for experiments.
+"""
+
+import os
+from typing import Optional
+
+import constants
+from .base import BaseService
+from experiment_utils.providers.base import InfrastructureProvider
+
+
+class PrometheusThroughputMonitor(BaseService):
+    """Service for monitoring Prometheus throughput metrics."""
+
+    def __init__(self, provider: InfrastructureProvider, node_offset: int):
+        """
+        Initialize Prometheus throughput monitor.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider)
+        self.node_offset = node_offset
+        self.output_dir: Optional[str] = None
+        self.monitor_script_path = None
+        self.prometheus_url = "http://localhost:9090"
+
+    def start(
+        self,
+        experiment_output_dir: str,
+        prometheus_url: str = "http://localhost:9090",
+        **kwargs,
+    ) -> None:
+        """
+        Start throughput monitoring for Prometheus.
+
+        Args:
+            experiment_output_dir: Directory for experiment output
+            prometheus_url: URL of the Prometheus server to monitor
+            **kwargs: Additional configuration
+        """
+
+        self.prometheus_url = prometheus_url
+        self.output_dir = os.path.join(experiment_output_dir, "prometheus_throughput")
+
+        # Path to the monitoring script on the remote host
+        self.monitor_script_path = os.path.join(
+            self.provider.get_home_dir(),
+            "code",
+            "Utilities",
+            "experiments",
+            "prometheus_throughput_monitor.py",
+        )
+
+        # Start the monitoring script on the remote host
+        cmd = f"nohup python3 {self.monitor_script_path} --prometheus_url {self.prometheus_url} --output_dir {self.output_dir} --interval {constants.PROMETHEUS_THROUGHPUT_POLLING_INTERVAL}"
+
+        # Run in background with nohup
+        cmd += " > {}/prometheus_throughput_monitor.out 2>&1 &".format(self.output_dir)
+
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=f"mkdir -p {self.output_dir}; {cmd}",
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+        )
+
+        print(f"Started Prometheus throughput monitoring for {self.prometheus_url}")
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop throughput monitoring.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+
+        # Kill the monitoring script on the remote host
+        cmd = "pkill -f 'prometheus_throughput_monitor.py'"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+        print(f"Stopped Prometheus throughput monitoring for {self.prometheus_url}")
+
+    def is_healthy(self) -> bool:
+        """
+        Check if the throughput monitor is healthy.
+
+        Returns:
+            True if monitoring process is running on remote host
+        """
+
+        try:
+            cmd = "pgrep -f 'prometheus_throughput_monitor.py'"
+            result = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+            return result.stdout.strip() != ""
+        except Exception:
+            return False
diff --git a/Utilities/experiments/experiment_utils/services/query_engine.py b/Utilities/experiments/experiment_utils/services/query_engine.py
new file mode 100644
index 0000000..63c99f9
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/query_engine.py
@@ -0,0 +1,801 @@
+"""
+Query Engine service management for experiments.
+"""
+
+import os
+import subprocess
+
+import constants
+from .base import BaseService
+from experiment_utils.providers.base import InfrastructureProvider
+
+
+class BaseQueryEngineService(BaseService):
+    """Base class for query engine services."""
+
+    def __init__(
+        self,
+        provider: InfrastructureProvider,
+        use_container: bool,
+        node_offset: int,
+    ):
+        """
+        Initialize base query engine service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            use_container: Whether to use containerized deployment
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider)
+        self.use_container = use_container
+        self.node_offset = node_offset
+        self.container_name = None
+        self.compose_file = None
+
+    def get_monitoring_keyword(self) -> str:
+        pass
+
+    def get_http_port(self) -> int:
+        """Get the HTTP port for QueryEngine."""
+        return 8088
+
+
+class QueryEngineService(BaseQueryEngineService):
+    """Service for managing the Python query engine process."""
+
+    def __init__(
+        self,
+        provider: InfrastructureProvider,
+        use_container: bool,
+        node_offset: int,
+    ):
+        """
+        Initialize Python Query Engine service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            use_container: Whether to use containerized deployment
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider, use_container, node_offset)
+        self.container_name = constants.QUERY_ENGINE_PY_CONTAINER_NAME
+
+    def start(
+        self,
+        experiment_output_dir: str,
+        flink_output_format: str,
+        prometheus_scrape_interval: int,
+        log_level: str,
+        profile_query_engine: bool,
+        manual: bool,
+        streaming_engine: str,
+        forward_unsupported_queries: bool,
+        controller_remote_output_dir: str,
+        compress_json: bool,
+        dump_precomputes: bool,
+        **kwargs,
+    ) -> None:
+        """
+        Start the query engine.
+
+        Args:
+            experiment_output_dir: Directory for experiment output
+            flink_output_format: Format of data from Flink
+            prometheus_scrape_interval: Prometheus scraping interval
+            log_level: Logging level
+            profile_query_engine: Whether to enable profiling
+            manual: Whether to run in manual mode
+            streaming_engine: Type of streaming engine (flink/arroyo)
+            forward_unsupported_queries: Whether to forward unsupported queries
+            controller_remote_output_dir: Controller output directory
+            compress_json: Whether JSON is compressed
+            dump_precomputes: Whether to dump precomputed values
+            **kwargs: Additional configuration
+        """
+        if dump_precomputes:
+            raise ValueError(
+                "dump_precomputes is not supported by the Python query engine. Use the Rust query engine instead."
+            )
+        if self.use_container:
+            prometheus_host = kwargs.get(
+                "prometheus_host", self.provider.get_node_ip(self.node_offset)
+            )
+            self._start_containerized(
+                experiment_output_dir,
+                flink_output_format,
+                prometheus_scrape_interval,
+                log_level,
+                profile_query_engine,
+                manual,
+                streaming_engine,
+                forward_unsupported_queries,
+                controller_remote_output_dir,
+                compress_json,
+                prometheus_host,
+                dump_precomputes,
+            )
+        else:
+            self._start_bare_metal(
+                experiment_output_dir,
+                flink_output_format,
+                prometheus_scrape_interval,
+                log_level,
+                profile_query_engine,
+                manual,
+                streaming_engine,
+                forward_unsupported_queries,
+                controller_remote_output_dir,
+                compress_json,
+                dump_precomputes,
+            )
+
+    def _start_bare_metal(
+        self,
+        experiment_output_dir: str,
+        flink_output_format: str,
+        prometheus_scrape_interval: int,
+        log_level: str,
+        profile_query_engine: bool,
+        manual: bool,
+        streaming_engine: str,
+        forward_unsupported_queries: bool,
+        controller_remote_output_dir: str,
+        compress_json: bool,
+        dump_precomputes: bool,
+    ) -> None:
+        """Start QueryEngine using bare metal deployment (original implementation)."""
+        output_dir = os.path.join(experiment_output_dir, "query_engine_output")
+
+        cmd = (
+            "mkdir -p {}; python3 -u main_query_engine.py "
+            "--kafka_topic {} "
+            "--input_format {} "
+            "--config {}/inference_config.yaml "
+            "--streaming_config {}/streaming_config.yaml "
+            "--prometheus_scrape_interval {} "
+            "--delete_existing_db "
+            "--log_level {} "
+            "--output_dir {} "
+            "{} "
+            "--streaming_engine {} "
+        ).format(
+            output_dir,
+            constants.FLINK_OUTPUT_TOPIC,
+            flink_output_format,
+            controller_remote_output_dir,
+            controller_remote_output_dir,
+            prometheus_scrape_interval,
+            log_level,
+            output_dir,
+            "--decompress_json" if compress_json else "",
+            streaming_engine,
+        )
+
+        if profile_query_engine:
+            cmd += "--do_profiling "
+        if forward_unsupported_queries:
+            cmd += "--forward_unsupported_queries "
+        cmd += "> {}/main_query_engine.out 2>&1 &".format(output_dir)
+
+        cmd_dir = os.path.join(self.provider.get_home_dir(), "code", "QueryEngine")
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=True,
+            popen=False,
+            ignore_errors=False,
+            manual=manual,
+        )
+
+    def _start_containerized(
+        self,
+        experiment_output_dir: str,
+        flink_output_format: str,
+        prometheus_scrape_interval: int,
+        log_level: str,
+        profile_query_engine: bool,
+        manual: bool,
+        streaming_engine: str,
+        forward_unsupported_queries: bool,
+        controller_remote_output_dir: str,
+        compress_json: bool,
+        prometheus_host: str,
+        dump_precomputes: bool,
+    ) -> None:
+        """Start QueryEngine using containerized deployment with Jinja template."""
+        output_dir = os.path.join(experiment_output_dir, "query_engine_output")
+
+        # Paths on remote CloudLab node
+        queryengine_dir = os.path.join(
+            constants.CLOUDLAB_HOME_DIR, "code", "QueryEngine"
+        )
+        template_path = os.path.join(queryengine_dir, "docker-compose.yml.j2")
+        remote_compose_file = os.path.join(output_dir, "docker-compose.yml")
+        helper_script = os.path.join(
+            constants.CLOUDLAB_HOME_DIR,
+            "code",
+            "Utilities",
+            "experiments",
+            "generate_queryengine_compose.py",
+        )
+        self.compose_file = remote_compose_file
+
+        # Build command to generate docker-compose file using helper script
+        generate_cmd = f"python3 {helper_script}"
+        generate_cmd += f" --template-path '{template_path}'"
+        generate_cmd += f" --output-path '{remote_compose_file}'"
+        generate_cmd += f" --queryengine-dir '{queryengine_dir}'"
+        generate_cmd += f" --container-name '{self.container_name}'"
+        generate_cmd += f" --experiment-output-dir '{output_dir}'"
+        generate_cmd += (
+            f" --controller-remote-output-dir '{controller_remote_output_dir}'"
+        )
+        generate_cmd += f" --kafka-topic '{constants.FLINK_OUTPUT_TOPIC}'"
+        generate_cmd += f" --input-format '{flink_output_format}'"
+        generate_cmd += f" --prometheus-scrape-interval '{prometheus_scrape_interval}'"
+        generate_cmd += f" --log-level '{log_level}'"
+        generate_cmd += f" --streaming-engine '{streaming_engine}'"
+        generate_cmd += f" --kafka-host '{self.provider.get_node_ip(self.node_offset)}'"
+        generate_cmd += f" --prometheus-host '{prometheus_host}'"
+
+        # Add optional flags
+        if compress_json:
+            generate_cmd += " --compress-json"
+        if profile_query_engine:
+            generate_cmd += " --profile-query-engine"
+        if forward_unsupported_queries:
+            generate_cmd += " --forward-unsupported-queries"
+        if dump_precomputes:
+            generate_cmd += " --dump-precomputes"
+        if manual:
+            generate_cmd += " --manual"
+
+        cmd = f"mkdir -p {output_dir}; {generate_cmd}; docker compose -f {remote_compose_file} up --no-build -d"
+
+        if manual:
+            print(f"Directory to run command: {queryengine_dir}")
+            print(f"Manual mode: Run command: {cmd}")
+            input("Press Enter to continue...")
+        else:
+            try:
+                self.provider.execute_command(
+                    node_idx=self.node_offset,
+                    cmd=cmd,
+                    cmd_dir=queryengine_dir,
+                    nohup=False,
+                    popen=False,
+                    ignore_errors=False,
+                )
+            except Exception as e:
+                print(f"Failed to start QueryEngine container: {e}")
+                raise
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop the query engine process.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        if self.use_container:
+            self._stop_containerized()
+        else:
+            self._stop_bare_metal()
+
+    def _stop_bare_metal(self) -> None:
+        """Stop QueryEngine using bare metal deployment (original implementation)."""
+        cmd = "pkill -f main_query_engine.py"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+    def _stop_containerized(self) -> None:
+        """Stop QueryEngine using containerized deployment."""
+        try:
+            if self.compose_file:
+                # Stop using docker compose command on remote node
+                cmd = f"docker compose -f {self.compose_file} down"
+                self.provider.execute_command(
+                    node_idx=self.node_offset,
+                    cmd=cmd,
+                    cmd_dir=None,
+                    nohup=False,
+                    popen=False,
+                    ignore_errors=True,
+                )
+                self.compose_file = None
+            else:
+                # Fallback: stop by container name on remote node
+                cmd = f"docker stop {self.container_name}; docker rm {self.container_name}"
+                self.provider.execute_command(
+                    node_idx=self.node_offset,
+                    cmd=cmd,
+                    cmd_dir=None,
+                    nohup=False,
+                    popen=False,
+                    ignore_errors=True,
+                )
+        except Exception as e:
+            print(f"Error stopping QueryEngine container: {e}")
+
+    def is_healthy(self) -> bool:
+        """
+        Check if query engine is healthy by checking if process is running.
+
+        Returns:
+            True if query engine process is running
+        """
+        if self.use_container:
+            return self._is_healthy_containerized()
+        else:
+            return self._is_healthy_bare_metal()
+
+    def _is_healthy_bare_metal(self) -> bool:
+        """Check if QueryEngine is healthy using bare metal deployment."""
+        try:
+            cmd = "pgrep -f main_query_engine.py"
+            result = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+            import subprocess
+
+            assert isinstance(result, subprocess.CompletedProcess)
+            return result.stdout.strip() != ""
+        except Exception:
+            return False
+
+    def _is_healthy_containerized(self) -> bool:
+        """Check if QueryEngine is healthy using containerized deployment."""
+        try:
+            # Check if container is running
+            result = subprocess.run(
+                ["docker", "inspect", "-f", "{{.State.Running}}", self.container_name],
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+            return result.stdout.strip() == "true"
+        except subprocess.CalledProcessError:
+            return False
+        except Exception:
+            return False
+
+    def get_monitoring_keyword(self) -> str:
+        """
+        Get the keyword to use for process monitoring.
+
+        Returns:
+            Container name if using containers, otherwise process name
+        """
+        if self.use_container:
+            return self.container_name
+        else:
+            return constants.QUERY_ENGINE_PY_PROCESS_KEYWORD
+
+
+class QueryEngineRustService(BaseQueryEngineService):
+    """Service for managing the Rust query engine process."""
+
+    def __init__(
+        self,
+        provider: InfrastructureProvider,
+        use_container: bool,
+        node_offset: int,
+    ):
+        """
+        Initialize Rust Query Engine service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            use_container: Whether to use containerized deployment
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider, use_container, node_offset)
+        self.container_name = constants.QUERY_ENGINE_RS_CONTAINER_NAME
+
+    def start(
+        self,
+        experiment_output_dir: str,
+        flink_output_format: str,
+        prometheus_scrape_interval: int,
+        log_level: str,
+        profile_query_engine: bool,
+        manual: bool,
+        streaming_engine: str,
+        forward_unsupported_queries: bool,
+        controller_remote_output_dir: str,
+        compress_json: bool,
+        dump_precomputes: bool,
+        lock_strategy: str,
+        query_language: str = "PROMQL",
+        **kwargs,
+    ) -> None:
+        """
+        Start the Rust query engine.
+
+        Args:
+            experiment_output_dir: Directory for experiment output
+            flink_output_format: Format of data from Flink
+            prometheus_scrape_interval: Prometheus scraping interval
+            log_level: Logging level
+            profile_query_engine: Whether to enable profiling
+            manual: Whether to run in manual mode
+            streaming_engine: Type of streaming engine (flink/arroyo)
+            forward_unsupported_queries: Whether to forward unsupported queries
+            controller_remote_output_dir: Controller output directory
+            compress_json: Whether JSON is compressed
+            dump_precomputes: Whether to dump precomputed values
+            lock_strategy: Lock strategy for SimpleMapStore (global or per-key)
+            query_language: Query language (SQL or PROMQL), defaults to PROMQL
+            **kwargs: Additional configuration (requires prometheus_port, http_port)
+        """
+        # Extract prometheus configuration
+        prometheus_host = kwargs.get(
+            "prometheus_host", self.provider.get_node_ip(self.node_offset)
+        )
+        prometheus_port = kwargs["prometheus_port"]  # Required, no default
+        http_port = kwargs["http_port"]  # Required, no default
+
+        if self.use_container:
+            self._start_containerized(
+                experiment_output_dir,
+                flink_output_format,
+                prometheus_scrape_interval,
+                log_level,
+                profile_query_engine,
+                manual,
+                streaming_engine,
+                forward_unsupported_queries,
+                controller_remote_output_dir,
+                compress_json,
+                prometheus_host,
+                prometheus_port,
+                http_port,
+                dump_precomputes,
+                query_language,
+                lock_strategy,
+            )
+        else:
+            self._start_bare_metal(
+                experiment_output_dir,
+                flink_output_format,
+                prometheus_scrape_interval,
+                log_level,
+                profile_query_engine,
+                manual,
+                streaming_engine,
+                forward_unsupported_queries,
+                controller_remote_output_dir,
+                compress_json,
+                prometheus_host,
+                prometheus_port,
+                http_port,
+                dump_precomputes,
+                query_language,
+                lock_strategy,
+            )
+
+    def _start_bare_metal(
+        self,
+        experiment_output_dir: str,
+        flink_output_format: str,
+        prometheus_scrape_interval: int,
+        log_level: str,
+        profile_query_engine: bool,
+        manual: bool,
+        streaming_engine: str,
+        forward_unsupported_queries: bool,
+        controller_remote_output_dir: str,
+        compress_json: bool,
+        prometheus_host: str,
+        prometheus_port: int,
+        http_port: int,
+        dump_precomputes: bool,
+        query_language: str,
+        lock_strategy: str,
+    ) -> None:
+        """Start Rust QueryEngine using bare metal deployment."""
+        output_dir = os.path.join(experiment_output_dir, "query_engine_output")
+        prometheus_server = f"http://{prometheus_host}:{prometheus_port}"
+
+        cmd = (
+            "mkdir -p {}; ./target/release/query_engine_rust "
+            "--kafka-topic {} "
+            "--input-format {} "
+            "--config {}/inference_config.yaml "
+            "--streaming-config {}/streaming_config.yaml "
+            "--prometheus-scrape-interval {} "
+            "--prometheus-server {} "
+            "--http-port {} "
+            "--delete-existing-db "
+            "--log-level {} "
+            "--output-dir {} "
+            "{} "
+            "--streaming-engine {} "
+            "--query-language {} "
+            "--lock-strategy {} "
+        ).format(
+            output_dir,
+            constants.FLINK_OUTPUT_TOPIC,
+            flink_output_format,
+            controller_remote_output_dir,
+            controller_remote_output_dir,
+            prometheus_scrape_interval,
+            prometheus_server,
+            http_port,
+            log_level,
+            output_dir,
+            "--decompress-json" if compress_json else "",
+            streaming_engine,
+            query_language,
+            lock_strategy,
+        )
+
+        if profile_query_engine:
+            cmd += "--do-profiling "
+        if forward_unsupported_queries:
+            cmd += "--forward-unsupported-queries "
+        if dump_precomputes:
+            cmd += "--dump-precomputes "
+        cmd += "> {}/query_engine_rust.out 2>&1 &".format(output_dir)
+
+        cmd_dir = os.path.join(self.provider.get_home_dir(), "code", "QueryEngineRust")
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=True,
+            popen=False,
+            ignore_errors=False,
+            manual=manual,
+        )
+
+    def _start_containerized(
+        self,
+        experiment_output_dir: str,
+        flink_output_format: str,
+        prometheus_scrape_interval: int,
+        log_level: str,
+        profile_query_engine: bool,
+        manual: bool,
+        streaming_engine: str,
+        forward_unsupported_queries: bool,
+        controller_remote_output_dir: str,
+        compress_json: bool,
+        prometheus_host: str,
+        prometheus_port: int,
+        http_port: int,
+        dump_precomputes: bool,
+        query_language: str,
+        lock_strategy: str,
+    ) -> None:
+        """Start Rust QueryEngine using containerized deployment with Jinja template."""
+        output_dir = os.path.join(experiment_output_dir, "query_engine_output")
+
+        # Paths on remote CloudLab node
+        queryengine_dir = os.path.join(
+            constants.CLOUDLAB_HOME_DIR, "code", "QueryEngineRust"
+        )
+        template_path = os.path.join(queryengine_dir, "docker-compose.yml.j2")
+        remote_compose_file = os.path.join(output_dir, "docker-compose.yml")
+        helper_script = os.path.join(
+            constants.CLOUDLAB_HOME_DIR,
+            "code",
+            "Utilities",
+            "experiments",
+            "generate_queryengine_compose.py",
+        )
+        self.compose_file = remote_compose_file
+
+        # Build command to generate docker-compose file using helper script
+        generate_cmd = f"python3 {helper_script}"
+        generate_cmd += f" --template-path '{template_path}'"
+        generate_cmd += f" --output-path '{remote_compose_file}'"
+        generate_cmd += f" --queryengine-dir '{queryengine_dir}'"
+        generate_cmd += f" --container-name '{self.container_name}'"
+        generate_cmd += f" --experiment-output-dir '{output_dir}'"
+        generate_cmd += (
+            f" --controller-remote-output-dir '{controller_remote_output_dir}'"
+        )
+        generate_cmd += f" --kafka-topic '{constants.FLINK_OUTPUT_TOPIC}'"
+        generate_cmd += f" --input-format '{flink_output_format}'"
+        generate_cmd += f" --prometheus-scrape-interval '{prometheus_scrape_interval}'"
+        generate_cmd += f" --log-level '{log_level}'"
+        generate_cmd += f" --streaming-engine '{streaming_engine}'"
+        generate_cmd += f" --query-language '{query_language}'"
+        generate_cmd += f" --lock-strategy '{lock_strategy}'"
+        generate_cmd += f" --kafka-host '{self.provider.get_node_ip(self.node_offset)}'"
+        generate_cmd += f" --prometheus-host '{prometheus_host}'"
+        generate_cmd += f" --prometheus-port '{prometheus_port}'"
+        generate_cmd += f" --http-port '{http_port}'"
+
+        # Add optional flags
+        if compress_json:
+            generate_cmd += " --compress-json"
+        if profile_query_engine:
+            generate_cmd += " --profile-query-engine"
+        if forward_unsupported_queries:
+            generate_cmd += " --forward-unsupported-queries"
+        if dump_precomputes:
+            generate_cmd += " --dump-precomputes"
+        if manual:
+            generate_cmd += " --manual"
+
+        cmd = f"mkdir -p {output_dir}; {generate_cmd}; docker compose -f {remote_compose_file} up --no-build -d"
+
+        if manual:
+            print(f"Directory to run command: {queryengine_dir}")
+            print(f"Manual mode: Run command: {cmd}")
+            input("Press Enter to continue...")
+        else:
+            try:
+                self.provider.execute_command(
+                    node_idx=self.node_offset,
+                    cmd=cmd,
+                    cmd_dir=queryengine_dir,
+                    nohup=False,
+                    popen=False,
+                    ignore_errors=False,
+                )
+            except Exception as e:
+                print(f"Failed to start Rust QueryEngine container: {e}")
+                raise
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop the Rust query engine process.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        if self.use_container:
+            self._stop_containerized()
+        else:
+            self._stop_bare_metal()
+
+    def _stop_bare_metal(self) -> None:
+        """Stop Rust QueryEngine using bare metal deployment."""
+        cmd = "pkill -f query_engine_rust"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+    def _stop_containerized(self) -> None:
+        """Stop Rust QueryEngine using containerized deployment."""
+        try:
+            if self.compose_file:
+                # Stop using docker compose command on remote node
+                cmd = f"docker compose -f {self.compose_file} down"
+                self.provider.execute_command(
+                    node_idx=self.node_offset,
+                    cmd=cmd,
+                    cmd_dir=None,
+                    nohup=False,
+                    popen=False,
+                    ignore_errors=True,
+                )
+                self.compose_file = None
+            else:
+                # Fallback: stop by container name on remote node
+                cmd = f"docker stop {self.container_name}; docker rm {self.container_name}"
+                self.provider.execute_command(
+                    node_idx=self.node_offset,
+                    cmd=cmd,
+                    cmd_dir=None,
+                    nohup=False,
+                    popen=False,
+                    ignore_errors=True,
+                )
+        except Exception as e:
+            print(f"Error stopping Rust QueryEngine container: {e}")
+
+    def is_healthy(self) -> bool:
+        """
+        Check if Rust query engine is healthy by checking if process is running.
+
+        Returns:
+            True if Rust query engine process is running
+        """
+        if self.use_container:
+            return self._is_healthy_containerized()
+        else:
+            return self._is_healthy_bare_metal()
+
+    def _is_healthy_bare_metal(self) -> bool:
+        """Check if Rust QueryEngine is healthy using bare metal deployment."""
+        try:
+            cmd = "pgrep -f query_engine_rust"
+            result = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+            import subprocess
+
+            assert isinstance(result, subprocess.CompletedProcess)
+            return result.stdout.strip() != ""
+        except Exception:
+            return False
+
+    def _is_healthy_containerized(self) -> bool:
+        """Check if Rust QueryEngine is healthy using containerized deployment."""
+        try:
+            # Check if container is running
+            result = subprocess.run(
+                ["docker", "inspect", "-f", "{{.State.Running}}", self.container_name],
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+            return result.stdout.strip() == "true"
+        except subprocess.CalledProcessError:
+            return False
+        except Exception:
+            return False
+
+    def get_monitoring_keyword(self) -> str:
+        """
+        Get the keyword to use for process monitoring.
+
+        Returns:
+            Container name if using containers, otherwise process name
+        """
+        if self.use_container:
+            return self.container_name
+        else:
+            return constants.QUERY_ENGINE_RS_PROCESS_KEYWORD
+
+
+class QueryEngineServiceFactory:
+    """Factory for creating appropriate query engine services."""
+
+    @staticmethod
+    def create_query_engine_service(
+        language: str,
+        provider: InfrastructureProvider,
+        use_container: bool,
+        node_offset: int,
+    ) -> BaseQueryEngineService:
+        """
+        Create a query engine service based on language.
+
+        Args:
+            language: Programming language ("python" or "rust")
+            provider: Infrastructure provider for node communication and management
+            use_container: Whether to use containerized deployment
+            node_offset: Starting node index offset
+
+        Returns:
+            Appropriate query engine service instance
+
+        Raises:
+            ValueError: If language is not supported
+        """
+        if language == "python":
+            return QueryEngineService(provider, use_container, node_offset)
+        elif language == "rust":
+            return QueryEngineRustService(provider, use_container, node_offset)
+        else:
+            raise ValueError(
+                f"Invalid query engine language: {language}. Supported languages are 'python' and 'rust'"
+            )
diff --git a/Utilities/experiments/experiment_utils/services/remote_monitor_service.py b/Utilities/experiments/experiment_utils/services/remote_monitor_service.py
new file mode 100644
index 0000000..21e3adc
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/remote_monitor_service.py
@@ -0,0 +1,320 @@
+"""
+Remote monitor service management for experiments.
+"""
+
+import os
+import time
+import subprocess
+from typing import List, Optional
+
+import constants
+from .base import BaseService
+from experiment_utils.providers.base import InfrastructureProvider
+from .query_engine import BaseQueryEngineService
+from .arroyo import ArroyoService
+
+
+class RemoteMonitorService(BaseService):
+    """Service for managing remote monitor processes."""
+
+    def __init__(self, provider: InfrastructureProvider, node_offset: int):
+        """
+        Initialize Remote Monitor service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider)
+        self.node_offset = node_offset
+
+    def start(
+        self,
+        controller_client_config: str,
+        experiment_output_dir: str,
+        experiment_mode: str,
+        profile_query_engine: bool,
+        profile_prometheus_time: Optional[int],
+        profile_flink: bool,
+        flink_pids: Optional[List[int]],
+        profile_arroyo: bool,
+        arroyo_pids: Optional[List[int]],
+        manual_mode: bool,
+        do_local_flink: bool,
+        streaming_engine: str,
+        query_engine_service: "BaseQueryEngineService",
+        arroyo_service: "ArroyoService",
+        controller_remote_output_dir: str,
+        use_container_prometheus_client: bool,
+        prometheus_client_parallel: bool,
+        monitoring_tool: str,
+        timed_duration: Optional[int] = None,
+    ) -> None:
+        """
+        Start remote monitor processes.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+            timed_duration: If provided, use timed mode instead of prometheus_client mode
+            monitoring_tool: Monitoring tool being used ("prometheus" or "victoriametrics")
+        """
+        # Determine execution mode
+        use_timed_mode = timed_duration is not None
+
+        # Determine which config file to look for based on monitoring tool
+        if monitoring_tool == "victoriametrics":
+            # first one is for vmagent
+            # second one is for vmsingle
+            # TODO: remove this hardcoding and instead query the service to get this
+            config_keywords = [
+                constants.VMAGENT_SCRAPE_CONFIG_FILE,
+                "victoriametrics-single",
+            ]
+        else:
+            config_keywords = [constants.PROMETHEUS_CONFIG_FILE]
+
+        if use_timed_mode:
+            # Build command for timed mode (skip_querying)
+            keywords = config_keywords
+
+            if experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME:
+                if query_engine_service is not None:
+                    keywords.append(query_engine_service.get_monitoring_keyword())
+                else:
+                    keywords.append(constants.QUERY_ENGINE_PROCESS_KEYWORD)
+
+                if streaming_engine == "flink":
+                    keywords.append("sketch-0.1.jar")
+                    if not do_local_flink:
+                        keywords.append(
+                            "org.apache.flink.runtime.taskexecutor.TaskManagerRunner"
+                        )
+                elif streaming_engine == "arroyo":
+                    if arroyo_service is not None:
+                        keywords.append(arroyo_service.get_monitoring_keyword())
+                    else:
+                        keywords.append("arroyo.*worker")
+
+            cmd = (
+                "python3 -u remote_monitor.py "
+                "--execution_mode timed "
+                "--experiment_mode {} "
+                r"--keywords \"{}\" "
+                "--config_file {} "
+                "--experiment_output_dir {} "
+                "--monitor_output_file {} "
+                "--time_to_run {} "
+                "--node_offset {} "
+            ).format(
+                experiment_mode,
+                ",".join(keywords),
+                os.path.join(
+                    os.path.dirname(experiment_output_dir),
+                    "controller_client_configs",
+                    os.path.basename(controller_client_config),
+                ),
+                experiment_output_dir,
+                "monitor_output.json",
+                timed_duration,
+                self.node_offset,
+            )
+
+            cmd_dir = os.path.join(
+                self.provider.get_home_dir(), "code", "Utilities", "experiments"
+            )
+            cmd += " > {}/remote_monitor.out 2>&1".format(experiment_output_dir)
+
+            if manual_mode:
+                input(
+                    "In manual mode. Remote monitor is not going to be started. Press Enter to continue"
+                )
+                print(cmd_dir)
+                print(cmd)
+                input("In manual mode. Press Enter to teardown the experiment")
+            else:
+                # Timed mode always runs in background
+                cmd += " < /dev/null &"
+                self.provider.execute_command(
+                    node_idx=self.node_offset,
+                    cmd=cmd,
+                    cmd_dir=cmd_dir,
+                    nohup=True,
+                    popen=False,
+                )
+            return
+
+        # Original prometheus_client mode logic
+        assert controller_remote_output_dir is not None
+
+        keywords = config_keywords
+
+        if experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME:
+            if query_engine_service is not None:
+                keywords.append(query_engine_service.get_monitoring_keyword())
+            else:
+                keywords.append(constants.QUERY_ENGINE_PROCESS_KEYWORD)
+
+            if streaming_engine == "flink":
+                keywords.append("sketch-0.1.jar")  # flinksketch jar
+                if not do_local_flink:
+                    keywords.append(
+                        "org.apache.flink.runtime.taskexecutor.TaskManagerRunner"
+                    )
+            elif streaming_engine == "arroyo":
+                if arroyo_service is not None:
+                    keywords.append(arroyo_service.get_monitoring_keyword())
+                else:
+                    keywords.append("arroyo.*worker")
+
+        cmd = (
+            "python3 -u remote_monitor.py "
+            "--execution_mode prometheus_client "
+            "--experiment_mode {} "
+            r"--keywords \"{}\" "
+            "--config_file {} "
+            "--experiment_output_dir {} "
+            "--monitor_output_file {} "
+            "--prometheus_client_output_file {} "
+            "--node_offset {} "
+        ).format(
+            experiment_mode,
+            ",".join(keywords),
+            os.path.join(
+                os.path.dirname(experiment_output_dir),
+                "controller_client_configs",
+                os.path.basename(controller_client_config),
+            ),
+            experiment_output_dir,
+            "monitor_output.json",
+            "prometheus_client_output.txt",
+            self.node_offset,
+        )
+
+        # Add container flag if enabled
+        if use_container_prometheus_client:
+            cmd += " --use_container_prometheus_client"
+
+        # Add parallel flag if enabled
+        if prometheus_client_parallel:
+            cmd += " --prometheus_client_parallel"
+
+        if experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME:
+            cmd += " --query_engine_config_file {}".format(
+                os.path.join(controller_remote_output_dir, "inference_config.yaml")
+            )
+
+            if profile_query_engine:
+                cmd += " --profile_query_engine"
+
+            if profile_flink and flink_pids:
+                cmd += " --profile_flink_pids {}".format(",".join(map(str, flink_pids)))
+
+            if profile_arroyo and arroyo_pids:
+                cmd += " --profile_arroyo_pids {}".format(
+                    ",".join(map(str, arroyo_pids))
+                )
+
+        if profile_prometheus_time is not None:
+            cmd += " --profile_prometheus_time {}".format(profile_prometheus_time)
+
+        cmd_dir = os.path.join(
+            self.provider.get_home_dir(), "code", "Utilities", "experiments"
+        )
+
+        cmd += " > {}/remote_monitor.out 2>&1".format(experiment_output_dir)
+
+        if manual_mode:
+            input(
+                "In manual mode. Remote monitor is not going to be started. Press Enter to continue"
+            )
+            print(cmd_dir)
+            print(cmd)
+            input("In manual mode. Press Enter to teardown the experiment")
+        else:
+            if constants.AVOID_REMOTE_MONITOR_LONG_SSH:
+                cmd += " < /dev/null &"
+                self.provider.execute_command(
+                    node_idx=self.node_offset,
+                    cmd=cmd,
+                    cmd_dir=cmd_dir,
+                    nohup=True,
+                    popen=False,
+                )
+            else:
+                self.provider.execute_command(
+                    node_idx=self.node_offset,
+                    cmd=cmd,
+                    cmd_dir=cmd_dir,
+                    nohup=False,
+                    popen=False,
+                )
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop remote monitor processes.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        self.kill_remote_monitor()
+
+    def kill_remote_monitor(self) -> None:
+        """Kill remote monitor processes."""
+        cmd = "pkill -f remote_monitor.py"
+        self.provider.execute_command(
+            node_idx=self.node_offset,
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=False,
+            ignore_errors=True,
+        )
+
+    def wait_for_remote_monitor_to_finish(
+        self,
+        minimum_experiment_running_time: int,
+        polling_interval: int = 10,
+    ) -> None:
+        """
+        Wait for remote monitor process to finish.
+
+        Args:
+            minimum_experiment_running_time: Minimum time to wait before polling
+            polling_interval: Interval between polling checks
+        """
+        print(
+            "Waiting for {} seconds for remote monitor to finish".format(
+                minimum_experiment_running_time
+            )
+        )
+        time.sleep(minimum_experiment_running_time)
+        print("Done waiting for remote monitor to finish. Will start polling")
+
+        while True:
+            cmd = "pgrep -f remote_monitor.py"
+            result = self.provider.execute_command(
+                node_idx=self.node_offset,
+                cmd=cmd,
+                cmd_dir=None,
+                nohup=False,
+                popen=False,
+                ignore_errors=True,
+            )
+            assert isinstance(result, subprocess.CompletedProcess)
+            if result.stdout == "":
+                break
+            print(
+                "Remote monitor is still running. Will check again in {} seconds".format(
+                    polling_interval
+                )
+            )
+            time.sleep(polling_interval)
+
+    def is_healthy(self) -> bool:
+        """
+        Check if remote monitor service is healthy.
+
+        Returns:
+            True if remote monitor processes are manageable
+        """
+        return True
diff --git a/Utilities/experiments/experiment_utils/services/system_exporters.py b/Utilities/experiments/experiment_utils/services/system_exporters.py
new file mode 100644
index 0000000..be82a5a
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/services/system_exporters.py
@@ -0,0 +1,153 @@
+"""
+System exporters service management for experiments.
+
+Handles node_exporter, blackbox_exporter, and cadvisor.
+"""
+
+from omegaconf import DictConfig
+from .base import BaseService
+from experiment_utils.providers.base import InfrastructureProvider
+
+
+class SystemExportersService(BaseService):
+    """Service for managing system exporters (node_exporter, blackbox_exporter, cadvisor)."""
+
+    def __init__(
+        self, provider: InfrastructureProvider, num_nodes: int, node_offset: int
+    ):
+        """
+        Initialize System Exporters service.
+
+        Args:
+            provider: Infrastructure provider for node communication and management
+            num_nodes: Number of nodes to manage
+            node_offset: Starting node index offset
+        """
+        super().__init__(provider)
+        self.num_nodes = num_nodes
+        self.node_offset = node_offset
+
+    def start(self, experiment_params: DictConfig, **kwargs) -> None:
+        """
+        Start system exporters on nodes.
+
+        Args:
+            experiment_params: Experiment configuration parameters (OmegaConf DictConfig)
+            **kwargs: Additional configuration
+        """
+        # Start exporters on worker nodes
+        for node_idx in range(
+            self.node_offset + 1, self.node_offset + self.num_nodes + 1
+        ):
+            local_ip = self.provider.get_node_ip(node_idx)
+
+            # Start node_exporter
+            node_exporter_port, node_exporter_cmd_options = (
+                self._get_node_exporter_options(experiment_params)
+            )
+            cmd, cmd_dir = self._get_node_exporter_cmd(
+                local_ip, node_exporter_port, node_exporter_cmd_options
+            )
+            self.provider.execute_command(
+                node_idx=node_idx,
+                cmd=cmd,
+                cmd_dir=cmd_dir,
+                nohup=True,
+                popen=False,
+            )
+
+            # Start cadvisor
+            cmd, cmd_dir = self._get_cadvisor_cmd(local_ip)
+            self.provider.execute_command(
+                node_idx=node_idx,
+                cmd=cmd,
+                cmd_dir=cmd_dir,
+                nohup=True,
+                popen=False,
+            )
+
+        # Start blackbox_exporter on controller node
+        coordinator_node = self.node_offset
+        cmd, cmd_dir = self._get_blackbox_exporter_cmd(
+            local_ip=self.provider.get_node_ip(coordinator_node)
+        )
+        self.provider.execute_command(
+            node_idx=coordinator_node,
+            cmd=cmd,
+            cmd_dir=cmd_dir,
+            nohup=True,
+            popen=False,
+        )
+
+    def stop(self, **kwargs) -> None:
+        """
+        Stop system exporters across nodes.
+
+        Args:
+            **kwargs: Additional configuration (currently unused)
+        """
+        cmd = "killall node_exporter; killall blackbox_exporter; docker stop cadvisor; docker rm cadvisor"
+        self.provider.execute_command_parallel(
+            node_idxs=list(
+                range(self.node_offset + 1, self.node_offset + self.num_nodes + 1)
+            ),
+            cmd=cmd,
+            cmd_dir=None,
+            nohup=False,
+            popen=True,
+            wait=True,
+        )
+
+    def _get_node_exporter_cmd(
+        self, local_ip: str, local_port: int = 9100, cmd_options: str = ""
+    ):
+        """Get node_exporter command and working directory."""
+        home_dir = self.provider.get_home_dir()
+        return (
+            f"./node_exporter --web.listen-address={local_ip}:{local_port} {cmd_options} > /dev/null 2>&1 < /dev/null &",
+            f"{home_dir}/exporters/node_exporter",
+        )
+
+    def _get_blackbox_exporter_cmd(self, local_ip: str, local_port: int = 9115):
+        """Get blackbox_exporter command and working directory."""
+        home_dir = self.provider.get_home_dir()
+        return (
+            f"./blackbox_exporter --web.listen-address={local_ip}:{local_port} > /dev/null 2>&1 < /dev/null &",
+            f"{home_dir}/exporters/blackbox_exporter",
+        )
+
+    def _get_cadvisor_cmd(self, local_ip: str, local_port: int = 8082):
+        """Get cadvisor command and working directory."""
+        cadvisor_port = 8080
+        return (
+            f"docker run --volume=/:/rootfs:ro --volume=/var/run:/var/run:ro --volume=/sys:/sys:ro --volume=/scratch/var_lib_docker/:/var/lib/docker:ro --volume=/dev/disk/:/dev/disk:ro --publish={local_ip}:{local_port}:{cadvisor_port} --detach=true --name=cadvisor --privileged   --device=/dev/kmsg gcr.io/cadvisor/cadvisor:v0.49.1",
+            None,
+        )
+
+    def _get_node_exporter_options(self, experiment_config: DictConfig):
+        """Get node_exporter port and extra flags from configuration."""
+        port = 9100
+        extra_flags = ""
+
+        if "exporters" not in experiment_config:
+            return port, extra_flags
+        if "exporter_list" not in experiment_config["exporters"]:
+            return port, extra_flags
+        exporters_config = experiment_config["exporters"]["exporter_list"]
+
+        if "node_exporter" in exporters_config:
+            if "extra_flags" in exporters_config["node_exporter"]:
+                extra_flags = exporters_config["node_exporter"]["extra_flags"]
+            if "port" in exporters_config["node_exporter"]:
+                port = exporters_config["node_exporter"]["port"]
+
+        return port, extra_flags
+
+    def is_healthy(self) -> bool:
+        """
+        Check if system exporters are healthy.
+
+        Returns:
+            True if exporters are running
+        """
+        return True
diff --git a/Utilities/experiments/experiment_utils/sync.py b/Utilities/experiments/experiment_utils/sync.py
new file mode 100644
index 0000000..f35c596
--- /dev/null
+++ b/Utilities/experiments/experiment_utils/sync.py
@@ -0,0 +1,139 @@
+"""
+File synchronization and data management utilities for experiments.
+Contains functions for syncing data between local and remote machines.
+"""
+
+import os
+
+import utils
+import constants
+from .providers.base import InfrastructureProvider
+
+
+def copy_prometheus_data(
+    provider: InfrastructureProvider, experiment_name: str, node_offset: int
+):
+    """Copy Prometheus data from remote to local machine."""
+    remote_prometheus_home_dir = os.path.join(provider.get_home_dir(), "prometheus")
+    data_to_copy = [
+        f"{remote_prometheus_home_dir}/data",
+        # f"{remote_prometheus_home_dir}/queries.log",
+    ]
+    local_destination_dir = os.path.join(
+        constants.LOCAL_EXPERIMENT_DIR, experiment_name, "prometheus_data"
+    )
+    os.makedirs(local_destination_dir, exist_ok=True)
+
+    for data_path in data_to_copy:
+        cmd = f'rsync -azh -e "ssh {constants.SSH_OPTIONS}" {provider.username}@node{node_offset}.{provider.hostname_suffix}:{data_path} {local_destination_dir}/'
+        utils.run_cmd_with_retry(cmd, popen=False, ignore_errors=False)
+
+
+def rsync_experiment_data(
+    provider: InfrastructureProvider,
+    experiment_output_dir: str,
+    local_experiment_dir: str,
+    node_offset: int,
+):
+    """Sync experiment data from remote to local machine."""
+    cmd = 'rsync -azh -e "ssh {}" {}@node{}.{}:{}/ {}/'.format(
+        constants.SSH_OPTIONS,
+        provider.username,
+        node_offset,
+        provider.hostname_suffix,
+        experiment_output_dir,
+        local_experiment_dir,
+    )
+    utils.run_cmd_with_retry(cmd, popen=False, ignore_errors=False)
+
+
+def rsync_prometheus_config(
+    provider: InfrastructureProvider,
+    experiment_output_dir: str,
+    prometheus_config_output_dir: str,
+    node_offset: int,
+):
+    """
+    Sync Prometheus configuration directory to remote machine.
+
+    Syncs the entire prometheus_config/ directory which contains:
+    - prometheus.yml (Prometheus config)
+    - vmagent_scrape.yml (VictoriaMetrics scrape config)
+    - vmagent_remote_write.yml (VictoriaMetrics remote write config)
+    """
+    remote_prometheus_dir = os.path.join(
+        experiment_output_dir, constants.PROMETHEUS_CONFIG_DIR
+    )
+    cmd = "mkdir -p {}".format(remote_prometheus_dir)
+    provider.execute_command(
+        node_idx=node_offset, cmd=cmd, cmd_dir=None, nohup=False, popen=False
+    )
+
+    hostname = f"node{node_offset}.{provider.hostname_suffix}"
+    # Sync entire directory (note the trailing slash to sync contents)
+    cmd = 'rsync -azh -e "ssh {}" {}/ {}@{}:{}/'.format(
+        constants.SSH_OPTIONS,
+        prometheus_config_output_dir,
+        provider.username,
+        hostname,
+        remote_prometheus_dir,
+    )
+    utils.run_cmd_with_retry(cmd, popen=False, ignore_errors=False)
+
+
+def rsync_controller_client_configs(
+    provider: InfrastructureProvider,
+    experiment_output_dir: str,
+    local_experiment_dir: str,
+    node_offset: int,
+):
+    """Sync controller client configurations to remote machine."""
+    hostname = f"node{node_offset}.{provider.hostname_suffix}"
+    cmd = 'rsync -azh -e "ssh {}" {} {}@{}:{}/'.format(
+        constants.SSH_OPTIONS,
+        os.path.join(local_experiment_dir, "controller_client_configs"),
+        provider.username,
+        hostname,
+        os.path.join(experiment_output_dir),
+    )
+    utils.run_cmd_with_retry(cmd, popen=False, ignore_errors=False)
+
+
+def rsync_controller_config_remote_to_local(
+    provider: InfrastructureProvider,
+    controller_remote_output_dir: str,
+    controller_local_output_dir: str,
+    node_offset: int,
+):
+    """Sync controller configuration from remote to local machine."""
+    hostname = f"node{node_offset}.{provider.hostname_suffix}"
+    cmd = 'rsync -azh -e "ssh {}" {}@{}:{}/ {}/'.format(
+        constants.SSH_OPTIONS,
+        provider.username,
+        hostname,
+        controller_remote_output_dir,
+        controller_local_output_dir,
+    )
+    utils.run_cmd_with_retry(cmd, popen=False, ignore_errors=False)
+
+
+def copy_experiment_config(experiment_params, local_experiment_dir: str):
+    """Save the experiment config to local directory for reference."""
+    os.makedirs(os.path.join(local_experiment_dir, "experiment_config"), exist_ok=True)
+
+    # Handle both file paths and DictConfig objects
+    if hasattr(experiment_params, "__dict__") or hasattr(experiment_params, "_content"):
+        # It's a DictConfig object
+        from omegaconf import OmegaConf
+
+        config_file_path = os.path.join(
+            local_experiment_dir, "experiment_config", "experiment_params.yaml"
+        )
+        with open(config_file_path, "w") as f:
+            OmegaConf.save(experiment_params, f)
+    else:
+        # It's a file path
+        cmd = "cp {} {}/".format(
+            experiment_params, os.path.join(local_experiment_dir, "experiment_config")
+        )
+        utils.run_cmd_with_retry(cmd, popen=False, ignore_errors=False)
diff --git a/Utilities/experiments/export_prometheus_data.py b/Utilities/experiments/export_prometheus_data.py
new file mode 100644
index 0000000..9dfda16
--- /dev/null
+++ b/Utilities/experiments/export_prometheus_data.py
@@ -0,0 +1,292 @@
+import json
+import math
+import argparse
+import requests
+import pandas as pd
+import logging
+from datetime import datetime, timedelta
+from concurrent.futures import ThreadPoolExecutor
+import os
+
+import pyarrow.parquet as pq
+from pyarrow import Table
+
+
+class PrometheusExporter:
+    def __init__(self, base_url="http://localhost:9090", max_workers=4):
+        """Initialize the Prometheus exporter with the base URL of your Prometheus instance"""
+        self.base_url = base_url.rstrip("/")
+        self.api_endpoint = f"{self.base_url}/api/v1"
+        self.max_workers = max_workers
+
+        # Setup logging
+        logging.basicConfig(level=logging.INFO)
+        self.logger = logging.getLogger(__name__)
+
+    def get_all_metric_names(self):
+        """Get all available metric names from Prometheus"""
+        response = requests.get(f"{self.api_endpoint}/label/__name__/values")
+        response.raise_for_status()
+        return response.json()["data"]
+
+    def get_series_metadata(self, metric_name):
+        """Get metadata for a specific metric series"""
+        response = requests.get(
+            f"{self.api_endpoint}/series", params={"match[]": metric_name}
+        )
+        response.raise_for_status()
+        return response.json()["data"]
+
+    def get_metric_start_time(self, metric_name):
+        """Get the earliest timestamp for a metric"""
+        query = f"first_over_time({metric_name}[10y])"
+        response = requests.get(f"{self.api_endpoint}/query", params={"query": query})
+        response.raise_for_status()
+        data = response.json()["data"]["result"]
+        if data:
+            return min(result["value"][0] for result in data)
+        return None
+
+    def get_record_key(self, record):
+        return json.dumps(record, sort_keys=True)
+
+    def export_metric_data(self, metric_name, start_time, end_time, chunk_size=3600):
+        """Export data for a specific metric with chunking for large datasets, using a generator to reduce memory usage"""
+        seen_records = set()
+        current_start = start_time
+
+        while current_start < end_time:
+            current_end = min(current_start + chunk_size, end_time)
+            current_chunk_size = math.ceil(current_end - current_start)
+
+            query = f"{metric_name}[{current_chunk_size}s]"
+            params = {"query": query, "time": current_end}
+
+            try:
+                response = requests.get(f"{self.api_endpoint}/query", params=params)
+                response.raise_for_status()
+                data = response.json()["data"]["result"]
+
+                for series in data:
+                    metric_labels = series["metric"]
+                    values = series["values"] if "values" in series else []
+
+                    for value in values:
+                        record = {
+                            "timestamp": value[0],
+                            "value": float(value[1]) if value[1] != "NaN" else None,
+                            "metric_name": metric_name,
+                            **metric_labels,
+                        }
+
+                        record_key = self.get_record_key(record)
+
+                        if record_key not in seen_records:
+                            seen_records.add(record_key)
+                            yield record  # Yield records one by one instead of accumulating
+
+            except Exception as e:
+                self.logger.error(f"Error fetching data for {metric_name}: {e}")
+
+            current_start = current_end
+
+    def export_all_metrics(self, output_dir, start_time, end_time, metrics, formats):
+        """Export all available metrics to separate files using streaming to reduce memory usage"""
+
+        start_timestamp = start_time.timestamp()
+        end_timestamp = end_time.timestamp()
+
+        # Get all metric names
+        metric_names = self.get_all_metric_names()
+        metric_names = [name for name in metric_names if name in metrics]
+        self.logger.info(f"Found {len(metric_names)} metrics to export")
+
+        def export_metric(metric_name):
+            try:
+                safe_name = metric_name.replace(":", "_").replace("/", "_")
+
+                # Define output paths based on selected formats
+                paths = {}
+                if "csv" in formats:
+                    paths["csv"] = f"{output_dir}/{safe_name}.csv"
+                if "json" in formats:
+                    paths["json"] = f"{output_dir}/{safe_name}.json"
+                if "parquet" in formats:
+                    paths["parquet"] = f"{output_dir}/{safe_name}.parquet"
+
+                record_count = 0
+                first_batch = True
+                parquet_writer = None
+                parquet_schema = None
+
+                # Stream records directly to files
+                for batch in self._get_batched_records(
+                    metric_name, start_timestamp, end_timestamp
+                ):
+                    if not batch:
+                        continue
+
+                    df = pd.DataFrame(batch)
+
+                    # Handle CSV export
+                    if "csv" in formats:
+                        if first_batch:
+                            df.to_csv(paths["csv"], index=False, mode="w")
+                        else:
+                            df.to_csv(paths["csv"], index=False, mode="a", header=False)
+
+                    # Handle JSON export
+                    if "json" in formats:
+                        if first_batch:
+                            # Initialize JSON file with array opening and first batch
+                            with open(paths["json"], "w") as f:
+                                f.write("[\n")
+                                f.write(
+                                    ",\n".join(json.dumps(record) for record in batch)
+                                )
+                        else:
+                            # Append to JSON with proper formatting
+                            with open(paths["json"], "a") as f:
+                                f.write(",\n")
+                                f.write(
+                                    ",\n".join(json.dumps(record) for record in batch)
+                                )
+
+                    # Handle Parquet export incrementally
+                    if "parquet" in formats:
+                        table = Table.from_pandas(df)
+                        if first_batch:
+                            parquet_schema = table.schema
+                            parquet_writer = pq.ParquetWriter(
+                                paths["parquet"], parquet_schema, compression="snappy"
+                            )
+                        else:
+                            if table.schema != parquet_schema:
+                                raise ValueError(
+                                    f"Schema mismatch for {metric_name}: "
+                                    f"expected {parquet_schema}, got {table.schema}"
+                                )
+                        assert (
+                            parquet_writer is not None
+                        ), "Parquet writer should be initialized"
+                        parquet_writer.write_table(table)
+
+                    record_count += len(batch)
+                    first_batch = False
+
+                # Close the JSON array if any records were written
+                if record_count > 0:
+                    if "json" in formats:
+                        with open(paths["json"], "a") as f:
+                            f.write("\n]")
+
+                    # Close the parquet writer
+                    if "parquet" in formats and parquet_writer:
+                        parquet_writer.close()
+
+                    self.logger.info(f"Exported {metric_name} ({record_count} records)")
+
+                return record_count
+            except Exception as e:
+                self.logger.error(f"Failed to export {metric_name}: {e}")
+                return 0
+
+        # Use ThreadPoolExecutor for parallel processing
+        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+            results = list(executor.map(export_metric, metric_names))
+
+        total_records = sum(results)
+        self.logger.info(f"Export complete. Total records: {total_records}")
+
+        # Create export summary
+        summary = {
+            "export_time": datetime.now().isoformat(),
+            "start_time": start_time.isoformat(),
+            "end_time": end_time.isoformat(),
+            "total_metrics": len(metric_names),
+            "total_records": total_records,
+            "prometheus_url": self.base_url,
+            "export_formats": formats,
+        }
+
+        with open(f"{output_dir}/export_summary.json", "w") as f:
+            json.dump(summary, f, indent=2)
+
+    def _get_batched_records(
+        self, metric_name, start_timestamp, end_timestamp, batch_size=1000
+    ):
+        """Helper method to batch records from the generator for efficient CSV writing"""
+        batch = []
+        for record in self.export_metric_data(
+            metric_name, start_timestamp, end_timestamp
+        ):
+            batch.append(record)
+            if len(batch) >= batch_size:
+                yield batch
+                batch = []
+        if batch:  # Don't forget the last batch
+            yield batch
+
+
+def main(args):
+    # Configuration
+    START_TIME = datetime.now() - timedelta(days=7)  # Last 7 days
+    END_TIME = datetime.now()
+
+    # Create output directory
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    # Initialize exporter
+    exporter = PrometheusExporter(args.url)
+
+    # Start export
+    print(f"Starting export from {START_TIME} to {END_TIME}")
+    print(f"Output directory: {args.output_dir}")
+    print(f"Export formats: {args.formats}")
+
+    try:
+        exporter.export_all_metrics(
+            output_dir=args.output_dir,
+            start_time=START_TIME,
+            end_time=END_TIME,
+            metrics=args.metric_names,
+            formats=args.formats,
+        )
+        print("Export completed successfully!")
+
+    except Exception as e:
+        print(f"Error during export: {e}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Export Prometheus metrics to CSV and JSON files"
+    )
+    parser.add_argument(
+        "--url", default="http://localhost:9090", help="URL of the Prometheus server"
+    )
+    parser.add_argument(
+        "--output_dir", required=True, help="Output directory for exported files"
+    )
+    parser.add_argument(
+        "--metric_names",
+        type=str,
+        required=False,
+        help="Comma-separated list of metrics to export",
+    )
+    parser.add_argument(
+        "--formats",
+        type=str,
+        required=True,
+        help="Comma-separated list of export formats (csv,json,parquet)",
+    )
+    args = parser.parse_args()
+    if args.metric_names:
+        args.metric_names = args.metric_names.split(",")
+    args.formats = args.formats.split(",")
+    args.formats = [fmt.strip().lower() for fmt in args.formats]
+    if "parquet" in args.formats:
+        raise NotImplementedError(
+            "Parquet export is not tested yet. Please use csv and/or json."
+        )
+    main(args)
diff --git a/Utilities/experiments/generate_arroyosketch_compose.py b/Utilities/experiments/generate_arroyosketch_compose.py
new file mode 100644
index 0000000..3d9e40d
--- /dev/null
+++ b/Utilities/experiments/generate_arroyosketch_compose.py
@@ -0,0 +1,186 @@
+"""
+Helper script to generate docker-compose.yml for ArroyoSketch from Jinja2 template.
+"""
+
+import argparse
+import os
+import sys
+from jinja2 import Template
+
+
+def generate_compose_file(
+    template_path: str,
+    output_path: str,
+    arroyosketch_dir: str,
+    container_name: str,
+    controller_output_dir: str,
+    arroyosketch_output_dir: str,
+    prometheus_base_port: int,
+    prometheus_path: str,
+    prometheus_bind_ip: str,
+    parallelism: int,
+    output_kafka_topic: str,
+    output_format: str,
+    pipeline_name: str,
+    arroyo_url: str,
+    bootstrap_servers: str,
+):
+    """Generate docker-compose.yml from template with provided variables."""
+
+    # Read the Jinja template
+    try:
+        with open(template_path, "r") as f:
+            template_content = f.read()
+    except FileNotFoundError:
+        print(f"Error: Template file not found at {template_path}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error reading template file: {e}")
+        sys.exit(1)
+
+    # Prepare template variables
+    template_vars = {
+        "arroyosketch_dir": arroyosketch_dir,
+        "container_name": container_name,
+        "controller_output_dir": controller_output_dir,
+        "arroyosketch_output_dir": arroyosketch_output_dir,
+        "prometheus_base_port": prometheus_base_port,
+        "prometheus_path": prometheus_path,
+        "prometheus_bind_ip": prometheus_bind_ip,
+        "parallelism": parallelism,
+        "output_kafka_topic": output_kafka_topic,
+        "output_format": output_format,
+        "pipeline_name": pipeline_name,
+        "arroyo_url": arroyo_url,
+        "bootstrap_servers": bootstrap_servers,
+    }
+
+    # Render the template
+    try:
+        template = Template(template_content)
+        rendered_compose = template.render(**template_vars)
+    except Exception as e:
+        print(f"Error rendering template: {e}")
+        sys.exit(1)
+
+    # Ensure output directory exists
+    output_dir_path = os.path.dirname(output_path)
+    if output_dir_path:
+        os.makedirs(output_dir_path, exist_ok=True)
+
+    # Write rendered compose file
+    try:
+        with open(output_path, "w") as f:
+            f.write(rendered_compose)
+        print(f"Docker compose file generated successfully at {output_path}")
+    except Exception as e:
+        print(f"Error writing compose file: {e}")
+        sys.exit(1)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate ArroyoSketch docker-compose.yml from template"
+    )
+
+    # Required arguments
+    parser.add_argument(
+        "--template-path", required=True, help="Path to docker-compose.yml.j2 template"
+    )
+    parser.add_argument(
+        "--compose-output-path",
+        required=True,
+        help="Output path for docker-compose.yml",
+    )
+    parser.add_argument(
+        "--arroyosketch-dir",
+        required=True,
+        help="ArroyoSketch directory path for build context",
+    )
+    parser.add_argument(
+        "--container-name",
+        default="sketchdb-arroyosketch",
+        help="Container name (default: sketchdb-arroyosketch)",
+    )
+    parser.add_argument(
+        "--controller-output-dir",
+        required=True,
+        help="Controller output directory (shared volume for config)",
+    )
+    parser.add_argument(
+        "--arroyosketch-output-dir",
+        required=True,
+        help="ArroyoSketch output directory for pipeline_id.txt",
+    )
+    parser.add_argument(
+        "--prometheus-base-port",
+        type=int,
+        default=9091,
+        help="Prometheus remote write base port (default: 9091)",
+    )
+    parser.add_argument(
+        "--prometheus-path",
+        default="/receive",
+        help="Prometheus remote write path (default: /receive)",
+    )
+    parser.add_argument(
+        "--prometheus-bind-ip",
+        default="0.0.0.0",
+        help="Prometheus remote write bind IP (default: 0.0.0.0)",
+    )
+    parser.add_argument(
+        "--parallelism",
+        type=int,
+        default=1,
+        help="Pipeline parallelism (default: 1)",
+    )
+    parser.add_argument(
+        "--output-kafka-topic",
+        default="flink_output",
+        help="Output Kafka topic (default: flink_output)",
+    )
+    parser.add_argument(
+        "--output-format",
+        default="json",
+        choices=["json", "byte"],
+        help="Output format (default: json)",
+    )
+    parser.add_argument(
+        "--pipeline-name",
+        required=True,
+        help="Pipeline name (usually experiment name)",
+    )
+    parser.add_argument(
+        "--arroyo-url",
+        default="http://arroyo:5115/api/v1",
+        help="Arroyo API URL (default: http://arroyo:5115/api/v1)",
+    )
+    parser.add_argument(
+        "--bootstrap-servers",
+        default="kafka:9092",
+        help="Kafka bootstrap servers (default: kafka:9092)",
+    )
+
+    args = parser.parse_args()
+
+    generate_compose_file(
+        template_path=args.template_path,
+        output_path=args.compose_output_path,
+        arroyosketch_dir=args.arroyosketch_dir,
+        container_name=args.container_name,
+        controller_output_dir=args.controller_output_dir,
+        arroyosketch_output_dir=args.arroyosketch_output_dir,
+        prometheus_base_port=args.prometheus_base_port,
+        prometheus_path=args.prometheus_path,
+        prometheus_bind_ip=args.prometheus_bind_ip,
+        parallelism=args.parallelism,
+        output_kafka_topic=args.output_kafka_topic,
+        output_format=args.output_format,
+        pipeline_name=args.pipeline_name,
+        arroyo_url=args.arroyo_url,
+        bootstrap_servers=args.bootstrap_servers,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/generate_controller_compose.py b/Utilities/experiments/generate_controller_compose.py
new file mode 100644
index 0000000..439185d
--- /dev/null
+++ b/Utilities/experiments/generate_controller_compose.py
@@ -0,0 +1,137 @@
+"""
+Helper script to generate docker-compose.yml for Controller from Jinja2 template.
+"""
+
+import argparse
+import os
+import sys
+from jinja2 import Template
+
+
+def generate_compose_file(
+    template_path: str,
+    output_path: str,
+    controller_dir: str,
+    container_name: str,
+    input_config_path: str,
+    output_dir: str,
+    prometheus_scrape_interval: int,
+    streaming_engine: str,
+    punting: bool,
+):
+    """Generate docker-compose.yml from template with provided variables."""
+
+    # Read the Jinja template
+    try:
+        with open(template_path, "r") as f:
+            template_content = f.read()
+    except FileNotFoundError:
+        print(f"Error: Template file not found at {template_path}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error reading template file: {e}")
+        sys.exit(1)
+
+    # Prepare template variables
+    template_vars = {
+        "controller_dir": controller_dir,
+        "container_name": container_name,
+        "input_config_path": input_config_path,
+        "output_dir": output_dir,
+        "prometheus_scrape_interval": prometheus_scrape_interval,
+        "streaming_engine": streaming_engine,
+        "punting": punting,
+    }
+
+    # Render the template
+    try:
+        template = Template(template_content)
+        rendered_compose = template.render(**template_vars)
+    except Exception as e:
+        print(f"Error rendering template: {e}")
+        sys.exit(1)
+
+    # Ensure output directory exists
+    output_dir_path = os.path.dirname(output_path)
+    if output_dir_path:
+        os.makedirs(output_dir_path, exist_ok=True)
+
+    # Write rendered compose file
+    try:
+        with open(output_path, "w") as f:
+            f.write(rendered_compose)
+        print(f"Docker compose file generated successfully at {output_path}")
+    except Exception as e:
+        print(f"Error writing compose file: {e}")
+        sys.exit(1)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate Controller docker-compose.yml from template"
+    )
+
+    # Required arguments
+    parser.add_argument(
+        "--template-path", required=True, help="Path to docker-compose.yml.j2 template"
+    )
+    parser.add_argument(
+        "--compose-output-path",
+        required=True,
+        help="Output path for docker-compose.yml",
+    )
+    parser.add_argument(
+        "--controller-dir",
+        required=True,
+        help="Controller directory path for build context",
+    )
+    parser.add_argument(
+        "--container-name",
+        default="sketchdb-controller",
+        help="Container name (default: sketchdb-controller)",
+    )
+    parser.add_argument(
+        "--input-config-path",
+        required=True,
+        help="Path to input configuration YAML file",
+    )
+    parser.add_argument(
+        "--controller-output-dir",
+        required=True,
+        help="Output directory for generated configs",
+    )
+    parser.add_argument(
+        "--prometheus-scrape-interval",
+        type=int,
+        required=True,
+        help="Prometheus scrape interval in seconds",
+    )
+    parser.add_argument(
+        "--streaming-engine",
+        required=True,
+        choices=["flink", "arroyo"],
+        help="Streaming engine",
+    )
+    parser.add_argument(
+        "--punting",
+        action="store_true",
+        help="Enable query punting based on performance heuristics",
+    )
+
+    args = parser.parse_args()
+
+    generate_compose_file(
+        template_path=args.template_path,
+        output_path=args.compose_output_path,
+        controller_dir=args.controller_dir,
+        container_name=args.container_name,
+        input_config_path=args.input_config_path,
+        output_dir=args.controller_output_dir,
+        prometheus_scrape_interval=args.prometheus_scrape_interval,
+        streaming_engine=args.streaming_engine,
+        punting=args.punting,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/generate_fake_exporter_compose.py b/Utilities/experiments/generate_fake_exporter_compose.py
new file mode 100644
index 0000000..7fcb91d
--- /dev/null
+++ b/Utilities/experiments/generate_fake_exporter_compose.py
@@ -0,0 +1,153 @@
+"""
+Script to generate docker-compose.yml files from Jinja2 template for fake_exporter.
+
+This script takes command line arguments corresponding to the variables in the
+docker-compose.yml.j2 template and renders the final docker-compose.yml file.
+"""
+
+import argparse
+import sys
+from pathlib import Path
+from jinja2 import Environment, FileSystemLoader, TemplateNotFound
+
+
+def render_template(
+    template_path: Path, output_path: Path, template_vars: dict
+) -> None:
+    """Render Jinja2 template with provided variables."""
+    try:
+        # Set up Jinja2 environment
+        env = Environment(loader=FileSystemLoader(template_path.parent))
+        template = env.get_template(template_path.name)
+
+        # Render template with variables
+        rendered_content = template.render(**template_vars)
+
+        # Write to output file
+        with open(output_path, "w") as f:
+            f.write(rendered_content)
+
+    except TemplateNotFound:
+        print(f"Error: Template file not found: {template_path}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error rendering template: {e}")
+        sys.exit(1)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate docker-compose.yml from Jinja2 template for fake_exporter",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Example:
+  python generate_fake_exporter_compose.py \\
+    --fake-exporter-dir . \\
+    --port 8080 \\
+    --output-dir /app/output \\
+    --valuescale 1000 \\
+    --dataset test_dataset \\
+    --num-labels 10 \\
+    --num-values-per-label 100 \\
+    --metric-type gauge \\
+    --experiment-output-dir ./output \\
+    --container-name my-fake-exporter
+        """,
+    )
+
+    # Required arguments based on template variables
+    parser.add_argument(
+        "--fake-exporter-dir",
+        required=True,
+        help="Directory containing the fake_exporter Dockerfile (context for Docker build)",
+    )
+    parser.add_argument(
+        "--port", type=int, required=True, help="Port number for the HTTP server"
+    )
+    parser.add_argument(
+        "--valuescale",
+        type=int,
+        required=True,
+        help="Scale factor for generated metric values",
+    )
+    parser.add_argument("--dataset", required=True, help="Dataset name or identifier")
+    parser.add_argument(
+        "--num-labels",
+        type=int,
+        required=True,
+        help="Number of labels to generate for metrics",
+    )
+    parser.add_argument(
+        "--num-values-per-label",
+        type=int,
+        required=True,
+        help="Number of values to generate per label",
+    )
+    parser.add_argument(
+        "--metric-type",
+        required=True,
+        help="Type of metric to generate (e.g., gauge, counter)",
+    )
+
+    parser.add_argument(
+        "--template-path",
+        required=True,
+        help="Template file name (default: docker-compose.yml.j2)",
+    )
+    # Optional arguments
+    parser.add_argument(
+        "--container-name",
+        default="sketchdb-fake-exporter",
+        help="Docker container name (defaults to 'sketchdb-fake-exporter')",
+    )
+    parser.add_argument(
+        "--exporter-output-dir",
+        help="Output directory path to mount as a volume inthe container. This argument is only required for the python fake exporter",
+    )
+    parser.add_argument(
+        "--experiment-output-dir",
+        help="Host directory to mount as output volume. This argument is only required for the python fake exporter",
+    )
+    parser.add_argument(
+        "--compose-output-path",
+        default="docker-compose.yml",
+        help="Output file name (default: docker-compose.yml)",
+    )
+
+    args = parser.parse_args()
+
+    # Prepare template variables
+    template_vars = {
+        "fake_exporter_dir": args.fake_exporter_dir,
+        "port": args.port,
+        "output_dir": args.exporter_output_dir,
+        "valuescale": args.valuescale,
+        "dataset": args.dataset,
+        "num_labels": args.num_labels,
+        "num_values_per_label": args.num_values_per_label,
+        "metric_type": args.metric_type,
+        "experiment_output_dir": args.experiment_output_dir,
+    }
+
+    # Only include container_name if provided, so Jinja2 default filter can work
+    if args.container_name:
+        template_vars["container_name"] = args.container_name
+
+    # Set up file paths
+    script_dir = Path(__file__).parent
+    template_path = script_dir / args.template_path
+    output_path = args.compose_output_path
+
+    # Check if template file exists
+    if not template_path.exists():
+        print(f"Error: Template file not found: {template_path}")
+        sys.exit(1)
+
+    # Render template
+    render_template(template_path, output_path, template_vars)
+
+    print(f"Generated {output_path} from {template_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/generate_prometheus_client_compose.py b/Utilities/experiments/generate_prometheus_client_compose.py
new file mode 100644
index 0000000..5771b17
--- /dev/null
+++ b/Utilities/experiments/generate_prometheus_client_compose.py
@@ -0,0 +1,181 @@
+#!/usr/bin/env python3
+"""
+Helper script to generate docker-compose.yml from Jinja2 template.
+Processes the docker-compose.yml.j2 template with command line arguments.
+"""
+
+import argparse
+import os
+import sys
+from jinja2 import Template
+
+
+def generate_compose_file(template_path: str, output_path: str, **template_vars):
+    """Generate docker-compose.yml from template with provided variables."""
+
+    # Read the Jinja template
+    try:
+        with open(template_path, "r") as f:
+            template_content = f.read()
+    except FileNotFoundError:
+        print(f"Error: Template file not found at {template_path}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error reading template file: {e}")
+        sys.exit(1)
+
+    # Render the template
+    try:
+        template = Template(template_content)
+        rendered_compose = template.render(**template_vars)
+    except Exception as e:
+        print(f"Error rendering template: {e}")
+        sys.exit(1)
+
+    # Ensure output directory exists
+    output_dir = os.path.dirname(output_path)
+    if output_dir:
+        os.makedirs(output_dir, exist_ok=True)
+
+    # Write rendered compose file
+    try:
+        with open(output_path, "w") as f:
+            f.write(rendered_compose)
+        print(f"Docker compose file generated successfully at {output_path}")
+    except Exception as e:
+        print(f"Error writing compose file: {e}")
+        sys.exit(1)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate docker-compose.yml from Jinja2 template"
+    )
+
+    # Required arguments
+    parser.add_argument(
+        "--template-path",
+        default="docker-compose.yml.j2",
+        help="Path to docker-compose.yml.j2 template (default: docker-compose.yml.j2)",
+    )
+    parser.add_argument(
+        "--compose-output-path",
+        help="Output path for docker-compose.yml (default: docker-compose.yml)",
+    )
+
+    # Template variables based on docker-compose.yml.j2
+    parser.add_argument(
+        "--prometheusclient-dir",
+        required=True,
+        help="PrometheusClient directory path for build context",
+    )
+    parser.add_argument(
+        "--container-name",
+        default="sketchdb-prometheusclient",
+        help="Container name (default: sketchdb-prometheusclient)",
+    )
+    parser.add_argument(
+        "--experiment-output-dir",
+        required=True,
+        help="Experiment output directory to mount",
+    )
+    parser.add_argument(
+        "--config-file",
+        required=True,
+        help="Config file path (default: /app/prometheus_client_config.yaml)",
+    )
+    parser.add_argument(
+        "--client-output-dir",
+        required=True,
+        help="Output directory path (default: /app/outputs/prometheus_client/)",
+    )
+    parser.add_argument(
+        "--client-output-file",
+        required=True,
+        help="Output file path (default: /app/outputs/prometheus_client/prometheus_client_output.log)",
+    )
+    parser.add_argument(
+        "--server-for-alignment",
+        default="sketchdb",
+        help="Server for alignment (default: sketchdb)",
+    )
+    parser.add_argument(
+        "--prometheus-host",
+        required=True,
+        help="Prometheus host IP",
+    )
+    parser.add_argument(
+        "--sketchdb-host",
+        required=True,
+        help="SketchDB host IP",
+    )
+
+    # Optional boolean flags
+    parser.add_argument(
+        "--align-query-time", action="store_true", help="Enable query time alignment"
+    )
+    parser.add_argument("--dry-run", action="store_true", help="Enable dry run mode")
+    parser.add_argument(
+        "--compare-results", action="store_true", help="Enable result comparison"
+    )
+    parser.add_argument(
+        "--parallel", action="store_true", help="Enable parallel execution"
+    )
+
+    # Optional arguments that can be None
+    parser.add_argument("--result-output-file", help="Result output file path")
+    parser.add_argument(
+        "--query-engine-config-file",
+        help="Query engine config file path",
+        type=str,
+    )
+    parser.add_argument(
+        "--profile-query-engine-pid", type=int, help="Query engine PID for profiling"
+    )
+    parser.add_argument("--profile-prometheus-time", help="Prometheus profiling time")
+    parser.add_argument(
+        "--latency-exporter-socket-addr", help="Latency exporter socket address"
+    )
+
+    args = parser.parse_args()
+
+    # Prepare template variables
+    template_vars = {
+        "prometheusclient_dir": args.prometheusclient_dir,
+        "container_name": args.container_name,
+        "experiment_output_dir": args.experiment_output_dir,
+        "config_file": args.config_file,
+        "client_output_dir": args.client_output_dir,
+        "client_output_file": args.client_output_file,
+        "server_for_alignment": args.server_for_alignment,
+        "prometheus_host": args.prometheus_host,
+        "sketchdb_host": args.sketchdb_host,
+        "align_query_time": args.align_query_time,
+        "dry_run": args.dry_run,
+        "compare_results": args.compare_results,
+        "parallel": args.parallel,
+    }
+
+    # Only add optional args if they have values
+    if args.result_output_file is not None:
+        template_vars["result_output_file"] = args.result_output_file
+    if args.query_engine_config_file is not None:
+        template_vars["query_engine_config_file"] = args.query_engine_config_file
+    if args.profile_query_engine_pid is not None:
+        template_vars["query_engine_pid"] = args.profile_query_engine_pid
+    if args.profile_prometheus_time is not None:
+        template_vars["profile_prometheus_time"] = args.profile_prometheus_time
+    if args.latency_exporter_socket_addr is not None:
+        template_vars["latency_exporter_socket_addr"] = (
+            args.latency_exporter_socket_addr
+        )
+
+    generate_compose_file(
+        template_path=args.template_path,
+        output_path=args.compose_output_path,
+        **template_vars,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/generate_prometheus_config.py b/Utilities/experiments/generate_prometheus_config.py
new file mode 100644
index 0000000..c70ef20
--- /dev/null
+++ b/Utilities/experiments/generate_prometheus_config.py
@@ -0,0 +1,546 @@
+import os
+import yaml
+import shutil
+import argparse
+from omegaconf import DictConfig
+
+import experiment_utils
+
+
+def get_metrics_for_exporter(exporter_name, experiment_config):
+    """Extract metrics for a specific exporter from experiment config."""
+    if "metrics" not in experiment_config:
+        return []
+
+    metrics = []
+    for metric_config in experiment_config["metrics"]:
+        if metric_config["exporter"] == exporter_name:
+            metrics.append(metric_config["metric"])
+
+    return metrics
+
+
+def add_metric_relabel_configs(scrape_config, exporter_name, experiment_config):
+    """Add metric_relabel_configs to scrape_config to only keep required metrics."""
+    metrics = get_metrics_for_exporter(exporter_name, experiment_config)
+    if metrics:
+        scrape_config["metric_relabel_configs"] = [
+            {
+                "source_labels": ["__name__"],
+                "regex": "|".join(metrics),
+                "action": "keep",
+            }
+        ]
+
+
+def check_queries_exist_for_prometheus_config(exporter_name, experiment_config):
+    if "only_start_if_queries_exist" not in experiment_config["exporters"]:
+        flag = False
+    else:
+        flag = experiment_config["exporters"]["only_start_if_queries_exist"]
+
+    if flag is False:
+        return True
+
+    if "query_groups" not in experiment_config:
+        return False
+
+    if "metrics" not in experiment_config:
+        return False
+
+    metric_exporter_names = [
+        [metric_config["metric"], metric_config["exporter"]]
+        for metric_config in experiment_config["metrics"]
+    ]
+
+    query_groups = experiment_config["query_groups"]
+    for group in query_groups:
+        queries = group["queries"]
+        for q in queries:
+            for metric in metric_exporter_names:
+                # TODO: "does metric exist in query" condition should use promql AST
+                if (
+                    metric[0] in q
+                    and metric[0] + "_" not in q
+                    and "_" + metric[0] not in q
+                ) and metric[1] == exporter_name:
+                    return True
+
+    return False
+
+
+# def read_exporter_config_for_prometheus_config(experiment_config_file) -> Tuple[Optional[Dict], str]:
+#     with open(experiment_config_file, "r") as f:
+#         experiment_config = yaml.safe_load(f)
+
+#     if "exporters" not in experiment_config:
+#         return None, "No exporters section in experiment config"
+#     exporters_config = experiment_config["exporters"]
+#     if "exporter_list" not in exporters_config:
+#         return None, "No exporter_list section in exporters config"
+#     if "only_start_if_queries_exist" not in exporters_config:
+#         return None, "No only_start_if_queries_exist section in exporters config"
+
+#     if "fake_exporter" in exporters_config["exporter_list"]:
+#         if any(key not in exporters_config["exporter_list"]["fake_exporter"] for key in ["num_ports_per_server", "dataset", "synthetic_data_value_scale", "start_port", "num_labels", "num_values_per_label", "metric_type"]):
+#             return None, "Missing keys in fake_exporter section"
+
+#     if "node_exporter" in exporters_config["exporter_list"]:
+#         if any(key not in exporters_config["exporter_list"]["node_exporter"] for key in ["port"]):
+#             return None, "Missing keys in node_exporter section"
+
+#     return exporters_config, ""
+
+
+def create_base_prometheus_config(scrape_interval, evaluation_interval, query_log_file):
+    """Create base Prometheus configuration with configurable parameters."""
+    config = {
+        "global": {
+            "scrape_interval": scrape_interval,
+            "evaluation_interval": evaluation_interval,
+        },
+        "alerting": {"alertmanagers": [{"static_configs": [{"targets": []}]}]},
+    }
+
+    if query_log_file:
+        config["global"]["query_log_file"] = query_log_file
+
+    return config
+
+
+def generate_recording_rules_file(output_path, interval="5s"):
+    """Generate recording rules file with configurable interval."""
+    recording_rules = {
+        "groups": [
+            {
+                "name": "RecordingRules",
+                "interval": interval,
+                "rules": [
+                    {
+                        "record": "node_top_3_cpu_usage_60s",
+                        "expr": "topk(3, sum by (instance, mode) (sum_over_time(node_cpu_seconds_total[60s])))",
+                        "labels": {"metric_type": "high_cpu_instances_modes"},
+                    }
+                ],
+            }
+        ]
+    }
+
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+    with open(output_path, "w") as f:
+        yaml.dump(recording_rules, f)
+
+
+def main(args, experiment_config=None):
+    # Create base config using parameters instead of reading from template
+    if hasattr(args, "scrape_interval") and hasattr(args, "evaluation_interval"):
+        # New Hydra-based approach
+        config = create_base_prometheus_config(
+            scrape_interval=args.scrape_interval,
+            evaluation_interval=args.evaluation_interval,
+            query_log_file=args.query_log_file,
+        )
+    else:
+        # Fallback to old template-based approach for backward compatibility
+        with open(args.input_file, "r") as f:
+            config = yaml.load(f, Loader=yaml.FullLoader)
+
+    # Use provided experiment_config or read from file for backward compatibility
+    if experiment_config is None:
+        with open(args.experiment_config_file, "r") as experiment_config_f:
+            experiment_config = yaml.safe_load(experiment_config_f)
+
+    exporter_config, rejection_reason = experiment_utils.read_exporter_config(
+        DictConfig(experiment_config)
+    )
+
+    if exporter_config is None:
+        raise ValueError("Invalid exporter config: {}".format(rejection_reason))
+
+    # add section for "scrape_configs"
+    config["scrape_configs"] = []
+    # add target for self-monitoring prometheus
+    # config['scrape_configs'].append({
+    #     'job_name': 'prometheus',
+    #     'static_configs': [
+    #         {
+    #             'targets': ['localhost:9090']
+    #         }
+    #     ]
+    # })
+
+    # add targets for monitoring node_exporter, blackbox_exporter, and cadvisor
+    # exporters = [('node_exporter', 9099), ('blackbox_exporter', 9115), ('cadvisor', 8082)]
+    # exporters = [('node_exporter', 9100),  ('cadvisor', 8082)]
+
+    if "node_exporter" in exporter_config[
+        "exporter_list"
+    ] and check_queries_exist_for_prometheus_config("node_exporter", experiment_config):
+        node_exporters = [("node_exporter", 9100)]
+        for exporter, port in node_exporters:
+            scrape_config = {
+                "job_name": exporter,
+                "static_configs": [
+                    {
+                        "targets": [
+                            f"{args.node_ip_prefix}.{i + 1}:{port}"
+                            for i in range(
+                                args.node_offset + 1,
+                                args.node_offset + args.num_nodes + 1,
+                            )
+                        ]
+                    }
+                ],
+            }
+
+            # Add metric_relabel_configs to only keep required metrics
+            add_metric_relabel_configs(scrape_config, exporter, experiment_config)
+
+            config["scrape_configs"].append(scrape_config)
+
+    if "fake_exporter" in exporter_config[
+        "exporter_list"
+    ] and check_queries_exist_for_prometheus_config("fake_exporter", experiment_config):
+        fake_exporters = [
+            (
+                "fake_exporter",
+                [
+                    port
+                    + exporter_config["exporter_list"]["fake_exporter"]["start_port"]
+                    for port in range(
+                        exporter_config["exporter_list"]["fake_exporter"][
+                            "num_ports_per_server"
+                        ]
+                    )
+                ],
+            )
+        ]
+        for exporter, ports in fake_exporters:
+            targets = []
+            for target_ip in range(
+                args.node_offset + 1, args.node_offset + args.num_nodes + 1
+            ):
+                for port in ports:
+                    targets.append(f"{args.node_ip_prefix}.{target_ip + 1}:{port}")
+
+            scrape_config = {
+                "job_name": exporter,
+                "static_configs": [
+                    {
+                        "targets": targets,
+                    }
+                ],
+            }
+
+            # Add metric_relabel_configs to only keep required metrics
+            add_metric_relabel_configs(scrape_config, exporter, experiment_config)
+
+            config["scrape_configs"].append(scrape_config)
+
+    if "avalanche" in exporter_config[
+        "exporter_list"
+    ] and check_queries_exist_for_prometheus_config("avalanche", experiment_config):
+        # Avalanche exporters run on Docker containers with port 9001 exposed
+        avalanche_config = exporter_config["exporter_list"]["avalanche"]
+        num_exporters = avalanche_config.get("num_exporters", 1)
+        start_port = avalanche_config.get("start_port", 9001)
+
+        targets = []
+        for i in range(num_exporters):
+            port = start_port + i
+            targets.append(f"localhost:{port}")
+
+        scrape_config = {
+            "job_name": "avalanche",
+            "static_configs": [
+                {
+                    "targets": targets,
+                }
+            ],
+        }
+
+        # Add metric_relabel_configs to only keep required metrics
+        add_metric_relabel_configs(scrape_config, "avalanche", experiment_config)
+
+        config["scrape_configs"].append(scrape_config)
+
+    if "cluster_data_exporter" in exporter_config[
+        "exporter_list"
+    ] and check_queries_exist_for_prometheus_config(
+        "cluster_data_exporter", experiment_config
+    ):
+        # cluster_data_exporter runs on node after coordinator (node_offset + 1)
+        cde_config = exporter_config["exporter_list"]["cluster_data_exporter"]
+        port = cde_config.get("port", 40000)
+
+        # Target is on the node after coordinator
+        # Node indexing: node_offset + 1 = coordinator, node_offset + 2 = cluster data exporter node
+        target_node_idx = args.node_offset + 2
+        target = f"{args.node_ip_prefix}.{target_node_idx}:{port}"
+
+        scrape_config = {
+            "job_name": "cluster_data_exporter",
+            "scrape_interval": "10s",  # Fast scrape interval for high-resolution cluster data
+            "static_configs": [
+                {
+                    "targets": [target],
+                }
+            ],
+        }
+
+        # Add metric_relabel_configs to only keep required metrics
+        add_metric_relabel_configs(
+            scrape_config, "cluster_data_exporter", experiment_config
+        )
+
+        config["scrape_configs"].append(scrape_config)
+
+    # TODO:
+    #      - Make IP:PORT not hardcoded, should always match the IP:PORT
+    #             with which the cost/latency exporters are instantiated
+    if (
+        "export_cost_and_latency" in experiment_config
+        and experiment_config["export_cost_and_latency"]
+    ):
+        config["scrape_configs"].append(
+            {
+                "job_name": "query_latency_exporter",
+                "static_configs": [{"targets": [f"{args.prometheus_client_ip}:9150"]}],
+            }
+        )
+        config["scrape_configs"].append(
+            {
+                "job_name": "query_cost_exporter",
+                "static_configs": [{"targets": [f"{args.prometheus_client_ip}:9151"]}],
+            }
+        )
+
+    # add config for blackbox_exporter
+    # config['scrape_configs'].append({
+    #     'job_name': 'blackbox',
+    #     'metrics_path': '/probe',
+    #     'params': {
+    #         'module': ['http_2xx']
+    #     },
+    #     'static_configs': [
+    #         {
+    #             'targets': [f"10.10.1.{i + 1}:9115" for i in range(1, args.num_nodes + 1)]
+    #         }
+    #     ],
+    #     'relabel_configs': [
+    #         {
+    #             'source_labels': ['__address__'],
+    #             'target_label': '__param_target',
+    #         },
+    #         {
+    #             'source_labels': ['__param_target'],
+    #             'target_label': 'instance',
+    #         },
+    #         {
+    #             'target_label': '__address__',
+    #             'replacement': '10.10.1.1:9115',
+    #         }
+    #     ]
+    # })
+
+    if not (hasattr(args, "scrape_interval") and hasattr(args, "evaluation_interval")):
+        # Only update query_log_file if using template-based approach and if it's not None
+        if args.query_log_file:
+            config["global"]["query_log_file"] = args.query_log_file
+
+    if args.rule_files:
+        config["rule_files"] = args.rule_files
+
+    if args.remote_write_url:
+        parallelism = getattr(args, "parallelism")
+        base_port = getattr(args, "remote_write_base_port")
+
+        if parallelism > 1 and base_port is None:
+            raise ValueError("remote_write_base_port required when parallelism > 1")
+
+        config["remote_write"] = []
+        for i in range(parallelism):
+            if parallelism > 1:
+                # Replace port in URL for sharding
+                url_parts = args.remote_write_url.split(":")
+                port_and_path = url_parts[-1]
+                original_port = port_and_path.split("/")[0]
+                new_port = base_port + i
+                new_url = args.remote_write_url.replace(
+                    f":{original_port}", f":{new_port}"
+                )
+            else:
+                new_url = args.remote_write_url
+
+            remote_write_config = {"url": new_url}
+
+            # Add queue_config with batch_send_deadline set to scrape_interval
+            scrape_interval = config["global"]["scrape_interval"]
+            remote_write_config["queue_config"] = {
+                "batch_send_deadline": scrape_interval,
+                # "max_samples_per_send": 5000,
+                # "capacity": 100000,
+            }
+
+            # Add metric filtering and sharding logic for multiple destinations
+            if parallelism > 1:
+                remote_write_config["write_relabel_configs"] = []
+
+                # Add metric filtering first to reduce processing overhead
+                if args.remote_write_metric_names:
+                    remote_write_config["write_relabel_configs"].append(
+                        {
+                            "source_labels": ["__name__"],
+                            "regex": "|".join(args.remote_write_metric_names),
+                            "action": "keep",
+                        }
+                    )
+
+                # Add improved sharding logic using metric name + common labels
+                remote_write_config["write_relabel_configs"].extend(
+                    [
+                        {
+                            "source_labels": ["__name__", "instance", "job"],
+                            "separator": "|",
+                            "target_label": "__tmp_hash_input",
+                            "replacement": "${1}|${2}|${3}",
+                        },
+                        {
+                            "source_labels": ["__tmp_hash_input"],
+                            "modulus": parallelism,
+                            "target_label": "__tmp_shard",
+                            "action": "hashmod",
+                        },
+                        {
+                            "source_labels": ["__tmp_shard"],
+                            "regex": str(i),
+                            "action": "keep",
+                        },
+                    ]
+                )
+            else:
+                # For single destination, only add metric filtering if specified
+                if args.remote_write_metric_names:
+                    remote_write_config["write_relabel_configs"] = [
+                        {
+                            "source_labels": ["__name__"],
+                            "regex": "|".join(args.remote_write_metric_names),
+                            "action": "keep",
+                        }
+                    ]
+
+            config["remote_write"].append(remote_write_config)
+
+    # Write prometheus config to output_dir
+    import constants
+
+    output_file = os.path.join(args.output_dir, constants.PROMETHEUS_CONFIG_FILE)
+    os.makedirs(args.output_dir, exist_ok=True)
+    with open(output_file, "w") as f:
+        yaml.dump(config, f)
+
+    if args.copy_to_dir:
+        os.makedirs(args.copy_to_dir, exist_ok=True)
+        # copy output_file and rule_files to copy_to_dir
+        shutil.copy(
+            os.path.join(args.output_dir, constants.PROMETHEUS_CONFIG_FILE),
+            args.copy_to_dir,
+        )
+        if args.rule_files:
+            for rule_file in args.rule_files:
+                dst_path = os.path.join(args.copy_to_dir, os.path.dirname(rule_file))
+                os.makedirs(dst_path, exist_ok=True)
+
+                # Handle both template-based and Hydra-based approaches
+                if (
+                    hasattr(args, "scrape_interval")
+                    and hasattr(args, "evaluation_interval")
+                    and args.input_file
+                ):
+                    # Template-based: copy from input_file directory
+                    shutil.copy(
+                        os.path.join(os.path.dirname(args.input_file), rule_file),
+                        dst_path,
+                    )
+                else:
+                    # Hydra-based: copy from current directory or generate dynamically
+                    if os.path.exists(rule_file):
+                        shutil.copy(rule_file, dst_path)
+                    else:
+                        # Generate recording rules with configurable interval
+                        generate_recording_rules_file(
+                            os.path.join(dst_path, os.path.basename(rule_file)),
+                            args.recording_rules_interval or "5s",
+                        )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--num_nodes", type=int, required=True)
+    parser.add_argument(
+        "--input_file", type=str, required=False
+    )  # Made optional for Hydra mode
+    parser.add_argument("--output_dir", type=str, required=True)
+    parser.add_argument("--query_log_file", type=str, required=False)
+    parser.add_argument("--rule_files", nargs="+", required=False)
+    parser.add_argument("--remote_write_url", type=str, required=False)
+    parser.add_argument("--remote_write_metric_names", type=str, required=False)
+    parser.add_argument(
+        "--remote_write_base_port",
+        type=int,
+        required=False,
+        help="Base port for remote_write sharding",
+    )
+    parser.add_argument(
+        "--parallelism",
+        type=int,
+        required=False,
+        help="Number of parallel remote_write destinations",
+    )
+    parser.add_argument("--copy_to_dir", type=str, required=False)
+    parser.add_argument("--experiment_config_file", type=str, required=True)
+    # New Hydra-compatible parameters
+    parser.add_argument(
+        "--scrape_interval",
+        type=str,
+        required=False,
+        help="Prometheus scrape interval (e.g., '5s')",
+    )
+    parser.add_argument(
+        "--evaluation_interval",
+        type=str,
+        required=False,
+        help="Prometheus evaluation interval (e.g., '1s')",
+    )
+    parser.add_argument(
+        "--recording_rules_interval",
+        type=str,
+        required=False,
+        help="Recording rules evaluation interval (e.g., '5s')",
+    )
+    parser.add_argument(
+        "--prometheus-client-ip",
+        type=str,
+        required=True,
+        help="Prometheus client node IP address",
+    )
+    parser.add_argument(
+        "--node-ip-prefix",
+        type=str,
+        required=True,
+        help="Node IP prefix (e.g., 10.10.1 for CloudLab)",
+    )
+    parser.add_argument(
+        "--node-offset",
+        type=int,
+        required=False,
+        default=0,
+        help="Node offset for CloudLab deployments (default: 0)",
+    )
+
+    args = parser.parse_args()
+    if args.remote_write_metric_names:
+        args.remote_write_metric_names = args.remote_write_metric_names.strip().split(
+            ","
+        )
+    main(args)
diff --git a/Utilities/experiments/generate_queryengine_compose.py b/Utilities/experiments/generate_queryengine_compose.py
new file mode 100644
index 0000000..5532e89
--- /dev/null
+++ b/Utilities/experiments/generate_queryengine_compose.py
@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+"""
+Helper script to generate docker-compose.yml for QueryEngine/QueryEngineRust from Jinja2 template.
+This script runs on the remote CloudLab node to generate the compose file.
+"""
+
+import argparse
+import os
+import sys
+from jinja2 import Template
+
+
+def generate_compose_file(
+    template_path: str,
+    output_path: str,
+    queryengine_dir: str,
+    container_name: str,
+    experiment_output_dir: str,
+    controller_remote_output_dir: str,
+    kafka_topic: str,
+    input_format: str,
+    prometheus_scrape_interval: str,
+    log_level: str,
+    streaming_engine: str,
+    query_language: str,
+    kafka_host: str,
+    prometheus_host: str,
+    prometheus_port: int,
+    lock_strategy: str,
+    http_port: str,
+    compress_json: bool = False,
+    profile_query_engine: bool = False,
+    forward_unsupported_queries: bool = False,
+    manual: bool = False,
+    kafka_proxy_container_name: str = "sketchdb-kafka-proxy",
+    dump_precomputes: bool = False,
+):
+    """Generate docker-compose.yml from template with provided variables."""
+
+    # Read the Jinja template
+    try:
+        with open(template_path, "r") as f:
+            template_content = f.read()
+    except FileNotFoundError:
+        print(f"Error: Template file not found at {template_path}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error reading template file: {e}")
+        sys.exit(1)
+
+    # Prepare template variables
+    template_vars = {
+        "queryengine_dir": queryengine_dir,
+        "container_name": container_name,
+        "http_port": http_port,
+        "experiment_output_dir": experiment_output_dir,
+        "controller_remote_output_dir": controller_remote_output_dir,
+        "kafka_topic": kafka_topic,
+        "input_format": input_format,
+        "prometheus_scrape_interval": prometheus_scrape_interval,
+        "log_level": log_level,
+        "streaming_engine": streaming_engine,
+        "query_language": query_language,
+        "lock_strategy": lock_strategy,
+        "compress_json": compress_json,
+        "profile_query_engine": profile_query_engine,
+        "forward_unsupported_queries": forward_unsupported_queries,
+        "manual": manual,
+        "kafka_host": kafka_host,
+        "prometheus_host": prometheus_host,
+        "prometheus_port": prometheus_port,
+        "kafka_proxy_container_name": kafka_proxy_container_name,
+        "dump_precomputes": dump_precomputes,
+    }
+
+    # Render the template
+    try:
+        template = Template(template_content)
+        rendered_compose = template.render(**template_vars)
+    except Exception as e:
+        print(f"Error rendering template: {e}")
+        sys.exit(1)
+
+    # Ensure output directory exists
+    output_dir = os.path.dirname(output_path)
+    if output_dir:
+        os.makedirs(output_dir, exist_ok=True)
+
+    # Write rendered compose file
+    try:
+        with open(output_path, "w") as f:
+            f.write(rendered_compose)
+        print(f"Docker compose file generated successfully at {output_path}")
+    except Exception as e:
+        print(f"Error writing compose file: {e}")
+        sys.exit(1)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate QueryEngine docker-compose.yml from template"
+    )
+
+    # Required arguments
+    parser.add_argument(
+        "--template-path", required=True, help="Path to docker-compose.yml.j2 template"
+    )
+    parser.add_argument(
+        "--output-path", required=True, help="Output path for docker-compose.yml"
+    )
+    parser.add_argument(
+        "--queryengine-dir",
+        required=True,
+        help="QueryEngine directory path for build context",
+    )
+    parser.add_argument("--container-name", required=True, help="Container name")
+    parser.add_argument(
+        "--experiment-output-dir", required=True, help="Experiment output directory"
+    )
+    parser.add_argument(
+        "--controller-remote-output-dir",
+        required=True,
+        help="Controller output directory",
+    )
+    parser.add_argument("--kafka-topic", required=True, help="Kafka topic name")
+    parser.add_argument(
+        "--input-format", required=True, choices=["json", "byte"], help="Input format"
+    )
+    parser.add_argument(
+        "--prometheus-scrape-interval", required=True, help="Prometheus scrape interval"
+    )
+    parser.add_argument("--log-level", required=True, help="Log level")
+    parser.add_argument(
+        "--streaming-engine",
+        required=True,
+        choices=["flink", "arroyo"],
+        help="Streaming engine",
+    )
+    parser.add_argument(
+        "--query-language",
+        required=True,
+        choices=["SQL", "PROMQL"],
+        help="Query language (SQL or PROMQL)",
+    )
+    parser.add_argument(
+        "--lock-strategy",
+        required=True,
+        choices=["global", "per-key"],
+        help="Lock strategy for SimpleMapStore",
+    )
+
+    # Optional arguments
+    parser.add_argument(
+        "--compress-json", action="store_true", help="Enable JSON compression"
+    )
+    parser.add_argument(
+        "--profile-query-engine", action="store_true", help="Enable profiling"
+    )
+    parser.add_argument(
+        "--forward-unsupported-queries",
+        action="store_true",
+        help="Forward unsupported queries",
+    )
+    parser.add_argument("--manual", action="store_true", help="Manual mode")
+    parser.add_argument("--kafka-host", required=True, help="Kafka host IP")
+    parser.add_argument("--prometheus-host", required=True, help="Prometheus host IP")
+    parser.add_argument(
+        "--prometheus-port",
+        type=int,
+        required=True,
+        help="Prometheus server port (9090 for Prometheus, 8428 for VictoriaMetrics)",
+    )
+    parser.add_argument(
+        "--kafka-proxy-container-name",
+        default="sketchdb-kafka-proxy",
+        help="Kafka proxy container name",
+    )
+    parser.add_argument("--http-port", required=True, help="HTTP port")
+    parser.add_argument(
+        "--dump-precomputes", action="store_true", help="Dump precomputes"
+    )
+
+    args = parser.parse_args()
+
+    generate_compose_file(
+        template_path=args.template_path,
+        output_path=args.output_path,
+        queryengine_dir=args.queryengine_dir,
+        container_name=args.container_name,
+        experiment_output_dir=args.experiment_output_dir,
+        controller_remote_output_dir=args.controller_remote_output_dir,
+        kafka_topic=args.kafka_topic,
+        input_format=args.input_format,
+        prometheus_scrape_interval=args.prometheus_scrape_interval,
+        log_level=args.log_level,
+        streaming_engine=args.streaming_engine,
+        query_language=args.query_language,
+        lock_strategy=args.lock_strategy,
+        http_port=args.http_port,
+        compress_json=args.compress_json,
+        profile_query_engine=args.profile_query_engine,
+        forward_unsupported_queries=args.forward_unsupported_queries,
+        manual=args.manual,
+        kafka_host=args.kafka_host,
+        prometheus_host=args.prometheus_host,
+        prometheus_port=args.prometheus_port,
+        kafka_proxy_container_name=args.kafka_proxy_container_name,
+        dump_precomputes=args.dump_precomputes,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/generate_victoriametrics_config.py b/Utilities/experiments/generate_victoriametrics_config.py
new file mode 100644
index 0000000..11901c3
--- /dev/null
+++ b/Utilities/experiments/generate_victoriametrics_config.py
@@ -0,0 +1,325 @@
+import os
+import yaml
+import argparse
+from omegaconf import DictConfig
+
+import experiment_utils
+from constants import (
+    VMAGENT_SCRAPE_CONFIG_FILE,
+    VMAGENT_REMOTE_WRITE_CONFIG_FILE,
+)
+
+
+def get_metrics_for_exporter(exporter_name, experiment_config):
+    """Extract metrics for a specific exporter from experiment config."""
+    if "metrics" not in experiment_config:
+        return []
+
+    metrics = []
+    for metric_config in experiment_config["metrics"]:
+        if metric_config["exporter"] == exporter_name:
+            metrics.append(metric_config["metric"])
+
+    return metrics
+
+
+def add_metric_relabel_configs(scrape_config, exporter_name, experiment_config):
+    """Add metric_relabel_configs to scrape_config to only keep required metrics."""
+    metrics = get_metrics_for_exporter(exporter_name, experiment_config)
+    if metrics:
+        scrape_config["metric_relabel_configs"] = [
+            {
+                "source_labels": ["__name__"],
+                "regex": "|".join(metrics),
+                "action": "keep",
+            }
+        ]
+
+
+def check_queries_exist_for_prometheus_config(exporter_name, experiment_config):
+    """Check if queries exist for the given exporter."""
+    if "only_start_if_queries_exist" not in experiment_config["exporters"]:
+        flag = False
+    else:
+        flag = experiment_config["exporters"]["only_start_if_queries_exist"]
+
+    if flag is False:
+        return True
+
+    if "query_groups" not in experiment_config:
+        return False
+
+    if "metrics" not in experiment_config:
+        return False
+
+    metric_exporter_names = [
+        [metric_config["metric"], metric_config["exporter"]]
+        for metric_config in experiment_config["metrics"]
+    ]
+
+    query_groups = experiment_config["query_groups"]
+    for group in query_groups:
+        queries = group["queries"]
+        for q in queries:
+            for metric in metric_exporter_names:
+                # TODO: "does metric exist in query" condition should use promql AST
+                if (
+                    metric[0] in q
+                    and metric[0] + "_" not in q
+                    and "_" + metric[0] not in q
+                ) and metric[1] == exporter_name:
+                    return True
+
+    return False
+
+
+def create_base_vmagent_scrape_config(scrape_interval):
+    """Create base vmagent scrape configuration."""
+    config = {
+        "global": {
+            "scrape_interval": scrape_interval,
+        },
+        "scrape_configs": [],
+    }
+    return config
+
+
+def create_vmagent_remote_write_relabel_config(remote_write_metric_names):
+    """
+    Create vmagent remote write relabeling configuration.
+
+    This configuration filters which metrics get remote written based on the
+    remote_write_metric_names list. Only metrics matching these names will be
+    sent to remote write endpoints.
+
+    Args:
+        remote_write_metric_names: List of metric names to remote write, or None
+
+    Returns:
+        String containing YAML relabeling rules
+    """
+    if not remote_write_metric_names:
+        return ""
+
+    # Create a keep action that only allows specified metrics
+    # This will be written as raw YAML text
+    relabel_config = (
+        f"- action: keep\n"
+        f"  source_labels: [__name__]\n"
+        f"  regex: {'|'.join(remote_write_metric_names)}\n"
+    )
+    return relabel_config
+
+
+def main(args, experiment_config=None):
+    """
+    Generate VictoriaMetrics vmagent configuration files.
+
+    Creates two files:
+    1. vmagent_scrape.yml - Scraping configuration (similar to prometheus.yml)
+    2. vmagent_remote_write.yml - Remote write relabeling rules
+    """
+    # Use provided experiment_config or read from file for backward compatibility
+    if experiment_config is None:
+        with open(args.experiment_config_file, "r") as experiment_config_f:
+            experiment_config = yaml.safe_load(experiment_config_f)
+
+    exporter_config, rejection_reason = experiment_utils.read_exporter_config(
+        DictConfig(experiment_config)
+    )
+
+    if exporter_config is None:
+        raise ValueError("Invalid exporter config: {}".format(rejection_reason))
+
+    # Create vmagent scrape config
+    if hasattr(args, "scrape_interval"):
+        scrape_config = create_base_vmagent_scrape_config(
+            scrape_interval=args.scrape_interval
+        )
+    else:
+        # Fallback default
+        scrape_config = create_base_vmagent_scrape_config(scrape_interval="15s")
+
+    # Add scrape targets for node_exporter
+    if "node_exporter" in exporter_config[
+        "exporter_list"
+    ] and check_queries_exist_for_prometheus_config("node_exporter", experiment_config):
+        node_exporters = [("node_exporter", 9100)]
+        for exporter, port in node_exporters:
+            scrape_job = {
+                "job_name": exporter,
+                "static_configs": [
+                    {
+                        "targets": [
+                            f"{args.node_ip_prefix}.{i + 1}:{port}"
+                            for i in range(
+                                args.node_offset + 1,
+                                args.node_offset + args.num_nodes + 1,
+                            )
+                        ]
+                    }
+                ],
+            }
+
+            # Add metric_relabel_configs to only keep required metrics
+            add_metric_relabel_configs(scrape_job, exporter, experiment_config)
+
+            scrape_config["scrape_configs"].append(scrape_job)
+
+    # Add scrape targets for blackbox_exporter
+    if "blackbox_exporter" in exporter_config[
+        "exporter_list"
+    ] and check_queries_exist_for_prometheus_config(
+        "blackbox_exporter", experiment_config
+    ):
+        blackbox_exporters = [("blackbox_exporter", 9115)]
+        for exporter, port in blackbox_exporters:
+            scrape_job = {
+                "job_name": exporter,
+                "static_configs": [
+                    {
+                        "targets": [
+                            f"{args.node_ip_prefix}.{i + 1}:{port}"
+                            for i in range(
+                                args.node_offset + 1,
+                                args.node_offset + args.num_nodes + 1,
+                            )
+                        ]
+                    }
+                ],
+            }
+
+            add_metric_relabel_configs(scrape_job, exporter, experiment_config)
+
+            scrape_config["scrape_configs"].append(scrape_job)
+
+    # Add scrape targets for cadvisor
+    if "cadvisor" in exporter_config[
+        "exporter_list"
+    ] and check_queries_exist_for_prometheus_config("cadvisor", experiment_config):
+        cadvisor_exporters = [("cadvisor", 8082)]
+        for exporter, port in cadvisor_exporters:
+            scrape_job = {
+                "job_name": exporter,
+                "static_configs": [
+                    {
+                        "targets": [
+                            f"{args.node_ip_prefix}.{i + 1}:{port}"
+                            for i in range(
+                                args.node_offset + 1,
+                                args.node_offset + args.num_nodes + 1,
+                            )
+                        ]
+                    }
+                ],
+            }
+
+            add_metric_relabel_configs(scrape_job, exporter, experiment_config)
+
+            scrape_config["scrape_configs"].append(scrape_job)
+
+    # Add scrape targets for fake_exporter
+    if "fake_exporter" in exporter_config[
+        "exporter_list"
+    ] and check_queries_exist_for_prometheus_config("fake_exporter", experiment_config):
+        fake_exporter_config = exporter_config["exporter_list"]["fake_exporter"]
+        num_ports_per_server = fake_exporter_config["num_ports_per_server"]
+        start_port = fake_exporter_config["start_port"]
+
+        targets = []
+        for i in range(args.node_offset + 1, args.node_offset + args.num_nodes + 1):
+            for j in range(num_ports_per_server):
+                targets.append(f"{args.node_ip_prefix}.{i + 1}:{start_port + j}")
+
+        scrape_job = {
+            "job_name": "fake_exporter",
+            "static_configs": [{"targets": targets}],
+        }
+
+        add_metric_relabel_configs(scrape_job, "fake_exporter", experiment_config)
+
+        scrape_config["scrape_configs"].append(scrape_job)
+
+    # Add scrape targets for avalanche exporter
+    if "avalanche" in exporter_config[
+        "exporter_list"
+    ] and check_queries_exist_for_prometheus_config("avalanche", experiment_config):
+        avalanche_config = exporter_config["exporter_list"]["avalanche"]
+        avalanche_port = avalanche_config.get("port", 9001)
+
+        targets = []
+        for i in range(args.node_offset + 1, args.node_offset + args.num_nodes + 1):
+            targets.append(f"{args.node_ip_prefix}.{i + 1}:{avalanche_port}")
+
+        scrape_job = {
+            "job_name": "avalanche",
+            "static_configs": [{"targets": targets}],
+        }
+
+        add_metric_relabel_configs(scrape_job, "avalanche", experiment_config)
+
+        scrape_config["scrape_configs"].append(scrape_job)
+
+    # Add scrape targets for cluster_data_exporter
+    if "cluster_data_exporter" in exporter_config[
+        "exporter_list"
+    ] and check_queries_exist_for_prometheus_config(
+        "cluster_data_exporter", experiment_config
+    ):
+        cluster_data_config = exporter_config["exporter_list"]["cluster_data_exporter"]
+        cluster_data_port = cluster_data_config.get("port", 9010)
+
+        targets = []
+        for i in range(args.node_offset + 1, args.node_offset + args.num_nodes + 1):
+            targets.append(f"{args.node_ip_prefix}.{i + 1}:{cluster_data_port}")
+
+        scrape_job = {
+            "job_name": "cluster_data_exporter",
+            "static_configs": [{"targets": targets}],
+        }
+
+        add_metric_relabel_configs(
+            scrape_job, "cluster_data_exporter", experiment_config
+        )
+
+        scrape_config["scrape_configs"].append(scrape_job)
+
+    # Write vmagent scrape configuration
+    scrape_output_path = os.path.join(args.output_dir, VMAGENT_SCRAPE_CONFIG_FILE)
+    os.makedirs(os.path.dirname(scrape_output_path), exist_ok=True)
+    with open(scrape_output_path, "w") as f:
+        yaml.dump(scrape_config, f, default_flow_style=False, sort_keys=False)
+
+    # Write vmagent remote write relabeling configuration
+    # Get remote_write_metric_names from args if available
+    remote_write_metric_names = getattr(args, "remote_write_metric_names", None)
+    remote_write_relabel_config = create_vmagent_remote_write_relabel_config(
+        remote_write_metric_names
+    )
+    remote_write_output_path = os.path.join(
+        args.output_dir, VMAGENT_REMOTE_WRITE_CONFIG_FILE
+    )
+    with open(remote_write_output_path, "w") as f:
+        # Write as plain text (YAML), not using yaml.dump since it's already formatted
+        f.write(remote_write_relabel_config)
+
+    print(f"VictoriaMetrics vmagent scrape config written to {scrape_output_path}")
+    print(
+        f"VictoriaMetrics vmagent remote write config written to {remote_write_output_path}"
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Generate VictoriaMetrics vmagent configuration"
+    )
+    parser.add_argument("--num-nodes", type=int, required=True)
+    parser.add_argument("--output-dir", type=str, required=True)
+    parser.add_argument("--node-ip-prefix", type=str, default="10.10.1")
+    parser.add_argument("--node-offset", type=int, default=0)
+    parser.add_argument("--scrape-interval", type=str, default="15s")
+    parser.add_argument("--experiment-config-file", type=str, required=True)
+    parser.add_argument("--remote-write", type=str, nargs="+", help="Remote write URLs")
+
+    args = parser.parse_args()
+    main(args)
diff --git a/Utilities/experiments/generate_workload.py b/Utilities/experiments/generate_workload.py
new file mode 100644
index 0000000..d1047bb
--- /dev/null
+++ b/Utilities/experiments/generate_workload.py
@@ -0,0 +1,721 @@
+#!/usr/bin/env python3
+"""
+Workload Generator Script
+
+Generates experiment config YAML files with randomized query workloads based on
+building blocks and distribution patterns.
+
+Usage:
+    python generate_workload.py --num-queries 20 --distribution uniform --num-configs 5
+    python generate_workload.py --num-queries 50 --distribution heavy_tailed --favor-blocks 1,3,5 --num-configs 3 --seed 42
+"""
+
+import argparse
+import random
+import re
+import yaml
+from pathlib import Path
+from datetime import datetime
+from typing import List, Dict
+import copy
+
+import constants
+
+
+# ============================================================================
+# EXTENSIBLE PARAMETER FUNCTIONS
+# These functions return hardcoded values for now but can be extended later
+# ============================================================================
+
+
+def get_aggregation_label() -> str:
+    """Returns the label to use for 'by' aggregations.
+
+    Currently hardcoded to 'label_0'. Modify this function to support
+    multiple labels or different selection logic.
+    """
+    return "label_0"
+
+
+def get_time_range() -> str:
+    """Returns the time range for _over_time queries.
+
+    Currently hardcoded to '15m'. Modify this function to support
+    variable time ranges.
+    """
+    return "15m"
+
+
+def get_quantile_values() -> List[float]:
+    """Returns possible quantile values for quantile queries.
+
+    Modify this function to add/remove quantile options.
+    """
+    return [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99]
+
+
+def get_metric_name() -> str:
+    """Returns the metric name to use in queries.
+
+    Currently returns 'fake_metric_total' (counter type).
+    Modify this function to support different metric types.
+    """
+    return "fake_metric_total"
+
+
+def get_metric_type() -> str:
+    """Returns the metric type (gauge or counter).
+
+    Currently hardcoded to 'counter'. Modify to support gauge.
+    """
+    return "counter"
+
+
+def parse_time_range_to_seconds(time_range: str) -> int:
+    """Converts a time range string to seconds.
+
+    Args:
+        time_range: Time range string (e.g., '15m', '1h', '30s')
+
+    Returns:
+        Time range in seconds
+
+    Raises:
+        ValueError: If format is invalid
+    """
+    time_range = time_range.strip()
+    if not time_range:
+        raise ValueError("Empty time range")
+
+    # Parse number and unit
+    if time_range[-1] == "s":
+        value = int(time_range[:-1])
+        return value
+    elif time_range[-1] == "m":
+        return int(time_range[:-1]) * 60
+    elif time_range[-1] == "h":
+        return int(time_range[:-1]) * 3600
+    elif time_range[-1] == "d":
+        return int(time_range[:-1]) * 86400
+    else:
+        raise ValueError(f"Unknown time unit in '{time_range}'")
+
+
+def get_query_lookback(query: str) -> int:
+    """Determines the lookback period (T_lookback) for a query.
+
+    Args:
+        query: PromQL query string
+
+    Returns:
+        Lookback period in seconds:
+        - For temporal queries: parsed time range value
+        - For spatial queries: 1
+    """
+    # Check if query contains a time range (e.g., [15m])
+    match = re.search(r"\[(\d+[smhd])\]", query)
+
+    if match:
+        # Temporal query - parse the time range
+        time_range = match.group(1)
+        return parse_time_range_to_seconds(time_range)
+    else:
+        # Spatial query - return 1
+        return 1
+
+
+# ============================================================================
+# QUERY BUILDING BLOCKS (B1-B6)
+# Each function generates a random query of that type
+# ============================================================================
+
+
+def generate_b1_query() -> str:
+    """B1: quantile by () query
+
+    Example: quantile by (label_0) (0.95, fake_metric_total)
+    """
+    quantile = random.choice(get_quantile_values())
+    label = get_aggregation_label()
+    metric = get_metric_name()
+    return f"quantile by ({label}) ({quantile}, {metric})"
+
+
+def generate_b2_query() -> str:
+    """B2: sum by or count by query
+
+    Example: sum by (label_0) (fake_metric_total)
+    """
+    aggregation = random.choice(["sum", "count"])
+    label = get_aggregation_label()
+    metric = get_metric_name()
+    return f"{aggregation} by ({label}) ({metric})"
+
+
+def generate_b3_query() -> str:
+    """B3: quantile_over_time query
+
+    Example: quantile_over_time(0.95, fake_metric_total[15m])
+    """
+    quantile = random.choice(get_quantile_values())
+    time_range = get_time_range()
+    metric = get_metric_name()
+    return f"quantile_over_time({quantile}, {metric}[{time_range}])"
+
+
+def generate_b4_query() -> str:
+    """B4: sum_over_time OR count_over_time
+
+    Example: sum_over_time(fake_metric_total[15m])
+    """
+    aggregation = random.choice(["sum_over_time", "count_over_time"])
+    time_range = get_time_range()
+    metric = get_metric_name()
+    return f"{aggregation}({metric}[{time_range}])"
+
+
+def generate_b5_query() -> str:
+    """B5: rate/increase
+
+    Example: rate(fake_metric_total[15m])
+    """
+    function = random.choice(["rate", "increase"])
+    time_range = get_time_range()
+    metric = get_metric_name()
+    return f"{function}({metric}[{time_range}])"
+
+
+def generate_b6_query() -> str:
+    """B6: sum by () (sum_over_time) or sum by () (count_over_time)
+
+    Example: sum by (label_0) (sum_over_time(fake_metric_total[15m]))
+    """
+    outer_agg = "sum"
+    inner_agg = random.choice(["sum_over_time", "count_over_time"])
+    label = get_aggregation_label()
+    time_range = get_time_range()
+    metric = get_metric_name()
+    return f"{outer_agg} by ({label}) ({inner_agg}({metric}[{time_range}]))"
+
+
+# Map block IDs to generator functions
+BLOCK_GENERATORS = {
+    1: generate_b1_query,
+    2: generate_b2_query,
+    3: generate_b3_query,
+    4: generate_b4_query,
+    5: generate_b5_query,
+    6: generate_b6_query,
+}
+
+
+# ============================================================================
+# DISTRIBUTION FUNCTIONS
+# ============================================================================
+
+
+def distribute_uniform(
+    num_queries: int, num_blocks: int = None, select_blocks: List[int] = None
+) -> List[int]:
+    """Distributes queries uniformly across blocks.
+
+    Args:
+        num_queries: Total number of queries to generate
+        num_blocks: Number of building blocks (if None, uses len(BLOCK_GENERATORS))
+        select_blocks: Optional list of block IDs to use (1-indexed)
+
+    Returns:
+        List of query counts per block [B1_count, B2_count, ..., BN_count]
+    """
+    if num_blocks is None:
+        num_blocks = len(BLOCK_GENERATORS)
+    # Determine which blocks to use
+    if select_blocks:
+        active_blocks = [b for b in select_blocks if 1 <= b <= num_blocks]
+        num_active = len(active_blocks)
+    else:
+        active_blocks = list(range(1, num_blocks + 1))
+        num_active = num_blocks
+
+    if num_active == 0:
+        return [0] * num_blocks
+
+    base_count = num_queries // num_active
+    remainder = num_queries % num_active
+
+    # Initialize all counts to 0
+    counts = [0] * num_blocks
+
+    # Distribute base count to active blocks
+    for block_id in active_blocks:
+        counts[block_id - 1] = base_count
+
+    # Distribute remainder randomly among active blocks
+    remainder_blocks = random.sample(active_blocks, remainder)
+    for block_id in remainder_blocks:
+        counts[block_id - 1] += 1
+
+    return counts
+
+
+def distribute_heavy_tailed(
+    num_queries: int,
+    favor_blocks: List[int] = None,
+    num_blocks: int = None,
+    select_blocks: List[int] = None,
+) -> List[int]:
+    """Distributes queries with ordered exponential decay.
+
+    Args:
+        num_queries: Total number of queries to generate
+        favor_blocks: List of block IDs in preference order (1-indexed)
+                     First block gets most queries, then exponential decay
+        num_blocks: Number of building blocks (if None, uses len(BLOCK_GENERATORS))
+        select_blocks: Optional list of block IDs to use (1-indexed)
+
+    Returns:
+        List of query counts per block [B1_count, B2_count, ..., BN_count]
+    """
+    if num_blocks is None:
+        num_blocks = len(BLOCK_GENERATORS)
+    # Determine which blocks are available
+    if select_blocks:
+        available_blocks = [b for b in select_blocks if 1 <= b <= num_blocks]
+    else:
+        available_blocks = list(range(1, num_blocks + 1))
+
+    if not available_blocks:
+        return [0] * num_blocks
+
+    # If favor_blocks not specified, use all available blocks
+    if favor_blocks is None:
+        favor_blocks = available_blocks
+
+    # Create ranking: favored blocks first (in order), then others
+    # Only include blocks that are both favored AND available
+    ranking = []
+    for block_id in favor_blocks:
+        if block_id in available_blocks:
+            ranking.append(block_id)
+
+    # Add remaining available blocks not in favor list
+    for block_id in available_blocks:
+        if block_id not in ranking:
+            ranking.append(block_id)
+
+    num_active = len(ranking)
+    if num_active == 0:
+        return [0] * num_blocks
+
+    # Generate exponential weights (decay factor = 2)
+    weights = []
+    for i in range(num_active):
+        weights.append(1.0 / (2**i))
+
+    # Normalize weights to sum to 1
+    total_weight = sum(weights)
+    probabilities = [w / total_weight for w in weights]
+
+    # Assign counts based on probabilities
+    counts_dict = {}
+    remaining = num_queries
+
+    for i in range(num_active - 1):
+        block_id = ranking[i]
+        count = round(probabilities[i] * num_queries)
+        counts_dict[block_id] = count
+        remaining -= count
+
+    # Last block gets remainder to ensure exact total
+    counts_dict[ranking[-1]] = max(0, remaining)
+
+    # Convert to list ordered by block ID
+    counts = [counts_dict.get(i, 0) for i in range(1, num_blocks + 1)]
+
+    return counts
+
+
+# ============================================================================
+# CONFIG TEMPLATE AND GENERATION
+# ============================================================================
+
+
+def get_base_config() -> Dict:
+    """Returns the base configuration template.
+
+    Modify this function to change default exporter settings,
+    monitoring configuration, etc.
+    """
+    return {
+        "experiment": [
+            {"mode": constants.SKETCHDB_EXPERIMENT_NAME, "server": "sketchdb"},
+            {"mode": constants.BASELINE_EXPERIMENT_NAME, "server": "prometheus"},
+        ],
+        "monitoring": {"tool": "prometheus", "deployment_mode": "bare_metal"},
+        "servers": [
+            {"name": "prometheus", "url": "http://localhost:9090"},
+            {"name": "sketchdb", "url": "http://localhost:8088"},
+        ],
+        "exporters": {
+            "only_start_if_queries_exist": True,
+            "exporter_list": {
+                "node_exporter": {
+                    "port": 9100,
+                    "extra_flags": "--collector.disable-defaults --collector.cpu",
+                },
+                "fake_exporter": {
+                    "num_ports_per_server": 1,
+                    "start_port": 50000,
+                    "dataset": "zipf",
+                    "synthetic_data_value_scale": 10000,
+                    "num_labels": 3,
+                    "num_values_per_label": 20,
+                    "metric_type": get_metric_type(),
+                },
+            },
+        },
+        "query_groups": [
+            {
+                "id": 1,
+                "queries": [],  # Will be populated
+                "repetition_delay": 10,
+                "client_options": {
+                    "repetitions": 10,
+                    "query_time_offset": 10,
+                    "starting_delay": 60,
+                },
+                "controller_options": {"accuracy_sla": 0.99, "latency_sla": 1},
+            }
+        ],
+        "metrics": [
+            {
+                "metric": get_metric_name(),
+                "labels": ["instance", "job", "label_0", "label_1", "label_2"],
+                "exporter": "fake_exporter",
+            }
+        ],
+    }
+
+
+def generate_queries(
+    distribution: str,
+    num_queries: int,
+    favor_blocks: List[int],
+    allow_duplicates: bool,
+    select_blocks: List[int] = None,
+) -> List[str]:
+    """Generates queries based on distribution and parameters.
+
+    Args:
+        distribution: 'uniform' or 'heavy_tailed'
+        num_queries: Total number of queries to generate
+        favor_blocks: Block IDs to favor (for heavy_tailed distribution)
+        allow_duplicates: If False, ensures all queries are unique
+        select_blocks: Optional list of block IDs to use (1-indexed)
+
+    Returns:
+        List of query strings
+    """
+    # Get distribution of queries across blocks
+    if distribution == "uniform":
+        block_counts = distribute_uniform(num_queries, select_blocks=select_blocks)
+    elif distribution == "heavy_tailed":
+        block_counts = distribute_heavy_tailed(
+            num_queries, favor_blocks, select_blocks=select_blocks
+        )
+    else:
+        raise ValueError(f"Unknown distribution: {distribution}")
+
+    # Generate queries
+    queries = []
+    seen_queries = set()
+
+    for block_id, count in enumerate(block_counts, start=1):
+        if count == 0:
+            continue
+
+        generator = BLOCK_GENERATORS[block_id]
+        generated = 0
+        max_attempts = count * 100  # Prevent infinite loops
+        attempts = 0
+
+        while generated < count and attempts < max_attempts:
+            query = generator()
+            attempts += 1
+
+            if allow_duplicates or query not in seen_queries:
+                queries.append(query)
+                seen_queries.add(query)
+                generated += 1
+
+    # Shuffle queries to mix building blocks
+    random.shuffle(queries)
+
+    return queries
+
+
+def generate_config(
+    distribution: str,
+    num_queries: int,
+    favor_blocks: List[int] = None,
+    allow_duplicates: bool = False,
+    select_blocks: List[int] = None,
+) -> Dict:
+    """Generates a complete experiment configuration.
+
+    Args:
+        distribution: 'uniform' or 'heavy_tailed'
+        num_queries: Total number of queries
+        favor_blocks: Block IDs to favor (for heavy_tailed)
+        allow_duplicates: Allow duplicate queries
+        select_blocks: Optional list of block IDs to use (1-indexed)
+
+    Returns:
+        Complete configuration dictionary
+    """
+    config = get_base_config()
+    queries = generate_queries(
+        distribution, num_queries, favor_blocks, allow_duplicates, select_blocks
+    )
+    config["query_groups"][0]["queries"] = queries
+
+    # Calculate max T_lookback across all queries
+    max_lookback = max(get_query_lookback(q) for q in queries) if queries else 1
+
+    # Calculate starting_delay = query_time_offset + max(T_lookback)
+    query_time_offset = config["query_groups"][0]["client_options"]["query_time_offset"]
+    starting_delay = query_time_offset + max_lookback
+
+    # Update starting_delay in config
+    config["query_groups"][0]["client_options"]["starting_delay"] = starting_delay
+
+    return config
+
+
+def create_victoriametrics_variant(config: Dict) -> Dict:
+    """Creates a VictoriaMetrics variant of the configuration.
+
+    Args:
+        config: Base Prometheus configuration
+
+    Returns:
+        VictoriaMetrics configuration variant
+    """
+    vm_config = copy.deepcopy(config)
+
+    # Update monitoring settings
+    vm_config["monitoring"]["tool"] = "victoriametrics"
+    vm_config["monitoring"]["deployment_mode"] = "containerized"
+
+    # Update prometheus server URL to VictoriaMetrics port
+    for server in vm_config["servers"]:
+        if server["name"] == "prometheus":
+            server["url"] = "http://localhost:8428"
+
+    return vm_config
+
+
+def save_config(
+    config: Dict, output_dir: Path, index: int, variant: str = "prometheus"
+) -> Path:
+    """Saves configuration to YAML file.
+
+    Args:
+        config: Configuration dictionary
+        output_dir: Output directory path
+        index: Config file index
+        variant: Config variant ('prometheus' or 'victoriametrics')
+
+    Returns:
+        Path to saved file
+    """
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"generated_workload_{variant}_{timestamp}_{index}.yaml"
+    filepath = output_dir / filename
+
+    # Add @package directive at the top
+    yaml_content = "# @package experiment_params\n"
+    yaml_content += yaml.dump(config, default_flow_style=False, sort_keys=False)
+
+    with open(filepath, "w") as f:
+        f.write(yaml_content)
+
+    return filepath
+
+
+# ============================================================================
+# CLI INTERFACE
+# ============================================================================
+
+
+def parse_block_list(block_str: str, param_name: str = "Block") -> List[int]:
+    """Parses comma-separated block IDs.
+
+    Args:
+        block_str: Comma-separated block IDs (e.g., "1,3,5")
+        param_name: Name of parameter for error messages
+
+    Returns:
+        List of integer block IDs
+    """
+    if not block_str:
+        return None
+
+    try:
+        blocks = [int(x.strip()) for x in block_str.split(",")]
+        # Validate block IDs
+        max_blocks = len(BLOCK_GENERATORS)
+        for block_id in blocks:
+            if block_id < 1 or block_id > max_blocks:
+                raise ValueError(
+                    f"{param_name} ID must be between 1 and {max_blocks}, got {block_id}"
+                )
+        return blocks
+    except ValueError as e:
+        raise argparse.ArgumentTypeError(f"Invalid block IDs: {e}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate experiment workload configurations",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Generate 5 uniform workloads with 20 queries each
+  python generate_workload.py --num-queries 20 --distribution uniform --num-configs 5
+
+  # Generate heavy-tailed workload favoring blocks 1, 3, and 5
+  python generate_workload.py --num-queries 50 --distribution heavy_tailed \\
+      --favor-blocks 1,3,5 --num-configs 3 --seed 42
+
+  # Only generate queries from blocks 1 and 3 (quantile queries only)
+  python generate_workload.py --num-queries 30 --distribution uniform \\
+      --select-blocks 1,3 --num-configs 2
+
+  # Allow duplicate queries
+  python generate_workload.py --num-queries 30 --distribution uniform \\
+      --num-configs 2 --allow-duplicates
+
+Building Blocks:
+  B1: quantile by () query
+  B2: sum by / count by query
+  B3: quantile_over_time query
+  B4: sum_over_time / count_over_time
+  B5: rate / increase
+  B6: sum by () (sum_over_time / count_over_time)
+        """,
+    )
+
+    # Required arguments
+    parser.add_argument(
+        "--num-queries",
+        type=int,
+        required=True,
+        help="Total number of queries per config",
+    )
+    parser.add_argument(
+        "--distribution",
+        type=str,
+        required=True,
+        choices=["uniform", "heavy_tailed"],
+        help="Distribution type for queries across blocks",
+    )
+    parser.add_argument(
+        "--num-configs",
+        type=int,
+        required=True,
+        help="Number of config files to generate",
+    )
+
+    # Optional arguments
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=42,
+        help="Random seed for reproducibility (optional)",
+    )
+    parser.add_argument(
+        "--favor-blocks",
+        type=str,
+        default=None,
+        help='Comma-separated block IDs to favor (e.g., "1,3,5") for heavy_tailed',
+    )
+    parser.add_argument(
+        "--select-blocks",
+        type=str,
+        default=None,
+        help='Comma-separated block IDs to use (e.g., "1,3") to only generate from specific blocks',
+    )
+    parser.add_argument(
+        "--allow-duplicates",
+        action="store_true",
+        help="Allow duplicate queries in a config (default: enforce uniqueness)",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        default="config/experiment_type/generated",
+        help="Output directory for generated configs",
+    )
+
+    args = parser.parse_args()
+
+    # Validate and parse arguments
+    favor_blocks = None
+    if args.distribution == "heavy_tailed" and args.favor_blocks:
+        favor_blocks = parse_block_list(args.favor_blocks, "Favor-block")
+
+    select_blocks = None
+    if args.select_blocks:
+        select_blocks = parse_block_list(args.select_blocks, "Select-block")
+
+    # Set up output directory
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    print(f"Generating {args.num_configs} workload configs...")
+    print(f"  Queries per config: {args.num_queries}")
+    print(f"  Distribution: {args.distribution}")
+    if select_blocks:
+        print(f"  Selected blocks: {select_blocks}")
+    if favor_blocks:
+        print(f"  Favored blocks: {favor_blocks}")
+    print(f"  Allow duplicates: {args.allow_duplicates}")
+    print(f"  Seed: {args.seed}")
+    print(f"  Output directory: {output_dir}")
+    print()
+
+    generated_files = []
+
+    for i in range(args.num_configs):
+        # Use incremental seeds for reproducibility with variation
+        random.seed(args.seed + i)
+
+        # Generate base config
+        config = generate_config(
+            distribution=args.distribution,
+            num_queries=args.num_queries,
+            favor_blocks=favor_blocks,
+            allow_duplicates=args.allow_duplicates,
+            select_blocks=select_blocks,
+        )
+
+        # Save Prometheus config
+        prometheus_filepath = save_config(config, output_dir, i + 1, "prometheus")
+        generated_files.append(prometheus_filepath)
+        print(f"Generated [{2*i+1}/{2*args.num_configs}]: {prometheus_filepath.name}")
+
+        # Create and save VictoriaMetrics variant
+        vm_config = create_victoriametrics_variant(config)
+        vm_filepath = save_config(vm_config, output_dir, i + 1, "victoriametrics")
+        generated_files.append(vm_filepath)
+        print(f"Generated [{2*i+2}/{2*args.num_configs}]: {vm_filepath.name}")
+
+    print()
+    print(
+        f"Successfully generated {len(generated_files)} config files ({args.num_configs} Prometheus + {args.num_configs} VictoriaMetrics) in {output_dir}"
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/grafana_config.py b/Utilities/experiments/grafana_config.py
new file mode 100644
index 0000000..4ab14ba
--- /dev/null
+++ b/Utilities/experiments/grafana_config.py
@@ -0,0 +1,851 @@
+"""
+Grafana dashboard configuration module using Grafana Foundation SDK.
+
+This module provides functionality to generate Grafana dashboards from experiment
+configurations using the Grafana Foundation SDK's builder pattern.
+"""
+
+import json
+import urllib.parse
+import os
+import sys
+import typing
+import requests
+import re
+from dataclasses import dataclass
+from typing import List, Dict, Optional, Any
+from omegaconf import DictConfig, OmegaConf
+
+from grafana_foundation_sdk.builders.dashboard import Dashboard
+from grafana_foundation_sdk.builders.prometheus import Dataquery as PrometheusQuery
+from grafana_foundation_sdk.builders.timeseries import Panel as Timeseries
+from grafana_foundation_sdk.cog.encoder import JSONEncoder
+from grafana_foundation_sdk.models.dashboard import Dashboard as DashboardModel
+
+from promql_parser import parse as parse_promql
+
+import hydra
+import constants
+
+# Register the same resolver as experiment_run_e2e.py
+OmegaConf.register_new_resolver(
+    "local_experiment_dir", lambda: constants.LOCAL_EXPERIMENT_DIR
+)
+
+
+@dataclass
+class ServerConfig:
+    """Configuration for a server datasource."""
+
+    name: str
+    url: str
+
+
+@dataclass
+class QueryConfig:
+    """Configuration for a query with timing parameters."""
+
+    query: str
+    repetition_delay: int
+    query_time_offset: int
+
+
+class GrafanaConfig:
+    host: str
+    user: str
+    password: str
+
+    def __init__(self, host: str = "", user: str = "", password: str = ""):
+        self.host = host
+        self.user = user
+        self.password = password
+
+    @classmethod
+    def from_env(cls) -> typing.Self:
+        return cls(
+            host=os.environ.get("GRAFANA_HOST", "localhost:3000"),
+            user=os.environ.get("GRAFANA_USER", "admin"),
+            password=os.environ.get("GRAFANA_PASSWORD", "admin"),
+        )
+
+    @classmethod
+    def from_config(cls, grafana_cfg: DictConfig) -> typing.Self:
+        return cls(
+            host=grafana_cfg.get("host", "localhost:3000"),
+            user=grafana_cfg.get("user", "admin"),
+            password=grafana_cfg.get("password", "admin"),
+        )
+
+
+class GrafanaClient:
+    config: GrafanaConfig
+
+    def __init__(self, config: GrafanaConfig):
+        self.config = config
+
+    def find_or_create_folder(self, name: str) -> str:
+        auth = (self.config.user, self.config.password)
+        response = requests.get(
+            f"http://{self.config.host}/api/search?type=dash-folder&query={urllib.parse.quote_plus(name)}",
+            auth=auth,
+        )
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"could not fetch folders list: expected 200, got {response.status_code}"
+            )
+
+        # The folder exists.
+        response_json = response.json()
+        if len(response_json) == 1:
+            return response_json[0]["uid"]
+
+        # The folder doesn't exist: we create it.
+        response = requests.post(
+            f"http://{self.config.host}/api/folders",
+            auth=auth,
+            headers={"Content-Type": "application/json"},
+            data=json.dumps({"title": name}),
+        )
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"could not create new folder: expected 200, got {response.status_code}"
+            )
+
+        return response.json()["uid"]
+
+    def persist_dashboard(self, dashboard: DashboardModel):
+        auth = (self.config.user, self.config.password)
+        response = requests.post(
+            f"http://{self.config.host}/api/dashboards/db",
+            auth=auth,
+            headers={"Content-Type": "application/json"},
+            data=json.dumps(
+                {
+                    "dashboard": dashboard,
+                    "overwrite": True,
+                },
+                cls=JSONEncoder,
+            ),
+        )
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"could not persist dashboard: expected 200, got {response.status_code}"
+            )
+
+    def find_datasource_by_name(self, name: str) -> typing.Optional[dict]:
+        """Find a datasource by name. Returns the datasource dict or None if not found."""
+        auth = (self.config.user, self.config.password)
+        response = requests.get(
+            f"http://{self.config.host}/api/datasources",
+            auth=auth,
+        )
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"could not fetch datasources list: expected 200, got {response.status_code}"
+            )
+
+        datasources = response.json()
+        for ds in datasources:
+            if ds.get("name") == name:
+                return ds
+        return None
+
+    def create_or_update_datasource(self, datasource_config: dict) -> dict:
+        """Create a new datasource or update an existing one by name."""
+        auth = (self.config.user, self.config.password)
+
+        # Check if datasource already exists
+        existing_ds = self.find_datasource_by_name(datasource_config.get("name", ""))
+
+        if existing_ds:
+            # Update existing datasource
+            datasource_id = existing_ds["id"]
+            # Preserve the ID and version for updates
+            update_config = {**datasource_config, "id": datasource_id}
+            if "version" in existing_ds:
+                update_config["version"] = existing_ds["version"]
+
+            response = requests.put(
+                f"http://{self.config.host}/api/datasources/{datasource_id}",
+                auth=auth,
+                headers={"Content-Type": "application/json"},
+                data=json.dumps(update_config),
+            )
+            if response.status_code != 200:
+                raise RuntimeError(
+                    f"could not update datasource: expected 200, got {response.status_code}, response: {response.text}"
+                )
+        else:
+            # Create new datasource
+            response = requests.post(
+                f"http://{self.config.host}/api/datasources",
+                auth=auth,
+                headers={"Content-Type": "application/json"},
+                data=json.dumps(datasource_config),
+            )
+            if response.status_code not in [200, 201]:
+                raise RuntimeError(
+                    f"could not create datasource: expected 200 or 201, got {response.status_code}, response: {response.text}"
+                )
+
+        return response.json()
+
+    def test_datasource_health(self, datasource_uid: str) -> bool:
+        """
+        Test datasource health/connectivity using Grafana's health check API.
+
+        Args:
+            datasource_uid: UID of the datasource to test
+
+        Returns:
+            True if datasource is healthy, False otherwise
+        """
+        auth = (self.config.user, self.config.password)
+
+        try:
+            response = requests.get(
+                f"http://{self.config.host}/api/datasources/uid/{datasource_uid}/health",
+                auth=auth,
+                timeout=10,  # 10 second timeout for health checks
+            )
+
+            if response.status_code == 200:
+                result = response.json()
+                # Check if the health check was successful
+                # Different plugins may return different structures, but 'status' is common
+                return result.get("status") == "success" or result.get("status") == "OK"
+            else:
+                print(
+                    f"Health check failed with status {response.status_code}: {response.text}"
+                )
+                return False
+
+        except requests.exceptions.Timeout:
+            print(f"Health check timed out for datasource {datasource_uid}")
+            return False
+        except requests.exceptions.RequestException as e:
+            print(f"Health check failed for datasource {datasource_uid}: {e}")
+            return False
+        except Exception as e:
+            print(
+                f"Unexpected error during health check for datasource {datasource_uid}: {e}"
+            )
+            return False
+
+
+@dataclass
+class ExperimentDashboardConfig:
+    """
+    Parsed experiment configuration for dashboard generation.
+
+    This class extracts and structures the relevant parts of the experiment
+    configuration needed for dashboard creation.
+    """
+
+    experiment_name: str
+    servers: List[ServerConfig]
+    queries: List[QueryConfig]
+    metric_names: List[str]
+
+    @classmethod
+    def from_experiment_config(cls, cfg: DictConfig) -> "ExperimentDashboardConfig":
+        """
+        Create ExperimentDashboardConfig from experiment configuration.
+
+        Args:
+            cfg: Hydra experiment configuration (can be partial or complete)
+
+        Returns:
+            Parsed dashboard configuration
+
+        Raises:
+            ValueError: If required configuration is missing or invalid
+        """
+        # Handle both complete configs (with experiment.name) and partial configs
+        experiment_name = None
+        if hasattr(cfg, "experiment") and hasattr(cfg.experiment, "name"):
+            experiment_name = cfg.experiment.name
+        else:
+            # For partial configs, generate a name from the config structure
+            experiment_name = "experiment_dashboard"
+
+        # Look for experiment_params or use the root config if it contains servers/query_groups directly
+        if hasattr(cfg, "experiment_params"):
+            experiment_params = cfg.experiment_params
+        elif hasattr(cfg, "servers") and hasattr(cfg, "query_groups"):
+            # This is a partial config file - use the root level
+            experiment_params = cfg
+        else:
+            raise ValueError(
+                "Neither experiment_params nor direct server/query configuration found"
+            )
+
+        # Extract servers
+        if not hasattr(experiment_params, "servers") or not experiment_params.servers:
+            raise ValueError(
+                "experiment_params.servers is required and cannot be empty"
+            )
+
+        servers = []
+        for server in experiment_params.servers:
+            if not hasattr(server, "name") or not hasattr(server, "url"):
+                raise ValueError("Each server must have 'name' and 'url' fields")
+            servers.append(ServerConfig(name=server.name, url=server.url))
+
+        # Extract queries
+        if (
+            not hasattr(experiment_params, "query_groups")
+            or not experiment_params.query_groups
+        ):
+            raise ValueError(
+                "experiment_params.query_groups is required and cannot be empty"
+            )
+
+        queries = []
+        for query_group in experiment_params.query_groups:
+            if not hasattr(query_group, "queries") or not query_group.queries:
+                continue
+
+            repetition_delay = getattr(query_group, "repetition_delay", 30)
+            query_time_offset = 0
+
+            if hasattr(query_group, "client_options"):
+                client_options = query_group.client_options
+                query_time_offset = getattr(client_options, "query_time_offset", 0)
+
+            for query_str in query_group.queries:
+                queries.append(
+                    QueryConfig(
+                        query=query_str,
+                        repetition_delay=repetition_delay,
+                        query_time_offset=query_time_offset,
+                    )
+                )
+
+        if not queries:
+            raise ValueError("No valid queries found in query_groups")
+
+        # Extract metric names
+        metric_names = []
+        if hasattr(experiment_params, "metrics") and experiment_params.metrics:
+            for metric in experiment_params.metrics:
+                if hasattr(metric, "metric"):
+                    metric_names.append(metric.metric)
+
+        if not metric_names:
+            raise ValueError("No metrics found in experiment_params.metrics")
+
+        return cls(
+            experiment_name=experiment_name,
+            servers=servers,
+            queries=queries,
+            metric_names=metric_names,
+        )
+
+
+class GrafanaDashboardBuilder:
+    """
+    Main class for building Grafana dashboards from experiment configurations.
+
+    This class uses the Grafana Foundation SDK to programmatically generate
+    dashboard configurations that can be deployed to Grafana.
+    """
+
+    def __init__(self):
+        """Initialize the dashboard builder."""
+        pass
+
+    def build_dashboard_from_config(
+        self, experiment_config: DictConfig
+    ) -> Optional[str]:
+        """
+        Build a Grafana dashboard from experiment configuration.
+
+        Args:
+            experiment_config: Hydra experiment configuration
+
+        Returns:
+            Dashboard JSON string, or None if building failed
+        """
+        # Parse experiment config
+        config = ExperimentDashboardConfig.from_experiment_config(experiment_config)
+
+        # Calculate refresh interval (minimum repetition_delay)
+        refresh_interval = self._calculate_refresh_interval(config.queries)
+
+        # Calculate time range with offset
+        time_from, time_to = self._calculate_time_range(config.queries)
+
+        # Create dashboard builder
+        dashboard_title = f"Experiment Dashboard - {config.experiment_name}"
+        dashboard_uid = f"exp-{config.experiment_name}"
+
+        builder = (
+            Dashboard(dashboard_title)
+            .uid(dashboard_uid)
+            .tags(["experiment", config.experiment_name])
+            .refresh(refresh_interval)
+            .time(time_from, time_to)
+            .timezone("browser")
+        )
+
+        # Create panels
+        panels = self._create_panels_from_queries(config)
+        for panel in panels:
+            builder.with_panel(panel)
+
+        # Build and export dashboard
+        dashboard_obj = builder.build()
+        encoder = JSONEncoder(sort_keys=True, indent=2)
+        return encoder.encode(dashboard_obj)
+
+    def configure_grafana(
+        self,
+        experiment_config: DictConfig,
+        grafana_config: Optional[GrafanaConfig] = None,
+    ) -> bool:
+        """
+        Configure Grafana with datasources and dashboard from experiment configuration.
+
+        Args:
+            experiment_config: Hydra experiment configuration
+            grafana_config: Grafana connection configuration (uses experiment_config.grafana if None)
+
+        Returns:
+            True if configuration succeeded, False otherwise
+        """
+        try:
+            # Parse experiment config
+            config = ExperimentDashboardConfig.from_experiment_config(experiment_config)
+
+            # Use provided config or create from Hydra config
+            if grafana_config is None:
+                grafana_config = GrafanaConfig.from_config(experiment_config.grafana)
+
+            client = GrafanaClient(grafana_config)
+
+            # Create datasources
+            datasources = self.create_datasources_config(config.servers)
+            for datasource in datasources:
+                print(f"Creating/updating datasource: {datasource['name']}")
+                result = client.create_or_update_datasource(datasource)
+
+                # Test datasource health if it has a UID
+                datasource_uid = result.get("uid")
+                if datasource_uid:
+                    print(f"Testing datasource health: {datasource['name']}")
+                    is_healthy = client.test_datasource_health(datasource_uid)
+                    if is_healthy:
+                        print(f"✓ Datasource {datasource['name']} is healthy")
+                    else:
+                        print(f"⚠ Datasource {datasource['name']} health check failed")
+                else:
+                    print(
+                        f"⚠ No UID returned for datasource {datasource['name']}, skipping health check"
+                    )
+
+            # Build and deploy dashboard
+            dashboard_obj = self._build_dashboard_object(config)
+            print(f"Deploying dashboard: {config.experiment_name}")
+            client.persist_dashboard(dashboard_obj)
+
+            print("Grafana configuration completed successfully!")
+            return True
+
+        except Exception as e:
+            print(f"Error configuring Grafana: {e}")
+            return False
+
+    def _build_dashboard_object(
+        self, config: ExperimentDashboardConfig
+    ) -> DashboardModel:
+        """Build dashboard object for API deployment."""
+        # Calculate refresh interval
+        refresh_interval = self._calculate_refresh_interval(config.queries)
+
+        # Calculate time range with offset
+        time_from, time_to = self._calculate_time_range(config.queries)
+
+        # Create dashboard builder
+        dashboard_title = f"Experiment Dashboard - {config.experiment_name}"
+        dashboard_uid = f"exp-{config.experiment_name}"
+
+        builder = (
+            Dashboard(dashboard_title)
+            .uid(dashboard_uid)
+            .tags(["experiment", config.experiment_name])
+            .refresh(refresh_interval)
+            .time(time_from, time_to)
+            .timezone("browser")
+        )
+
+        # Create panels
+        panels = self._create_panels_from_queries(config)
+        for panel in panels:
+            builder.with_panel(panel)
+
+        return builder.build()
+
+    def _calculate_refresh_interval(self, queries: List[QueryConfig]) -> str:
+        """
+        Calculate dashboard refresh interval from query configurations.
+
+        Args:
+            queries: List of query configurations
+
+        Returns:
+            Refresh interval string (e.g., "30s")
+        """
+        if not queries:
+            return "30s"
+
+        min_delay = min(q.repetition_delay for q in queries)
+        return f"{min_delay}s"
+
+    def _calculate_time_range(self, queries: List[QueryConfig]) -> tuple[str, str]:
+        """
+        Calculate dashboard time range based on query time offsets.
+
+        Args:
+            queries: List of query configurations
+
+        Returns:
+            Tuple of (time_from, time_to) strings
+        """
+        if not queries:
+            return "now-1h", "now"
+
+        # Find the maximum time offset to determine the time range
+        max_offset = max(q.query_time_offset for q in queries)
+
+        if max_offset <= 0:
+            return "now-1h", "now"
+
+        # Create time range that accommodates the offset
+        time_from = f"now-1h-{max_offset}s"
+        time_to = f"now-{max_offset}s"
+
+        return time_from, time_to
+
+    def _create_panels_from_queries(
+        self, config: ExperimentDashboardConfig
+    ) -> List[Any]:
+        """
+        Create dashboard panels from queries and servers.
+
+        Args:
+            config: Parsed experiment configuration
+
+        Returns:
+            List of panel objects
+        """
+        panels = []
+        panel_id = 1
+
+        for query_config in config.queries:
+            for server in config.servers:
+                panel = self._create_single_panel(
+                    panel_id=panel_id, query_config=query_config, server=server
+                )
+                panels.append(panel)
+                panel_id += 1
+
+        return panels
+
+    def _create_single_panel(
+        self, panel_id: int, query_config: QueryConfig, server: ServerConfig
+    ) -> Any:
+        """
+        Create a single dashboard panel.
+
+        Args:
+            panel_id: Unique panel ID
+            query_config: Query configuration
+            server: Server configuration
+
+        Returns:
+            Panel object
+        """
+        # Use the original query without PromQL offset (time offset handled by dashboard time range)
+        # Create prometheus target for instant query
+        target = (
+            PrometheusQuery()
+            .expr(query_config.query)
+            .legend_format("")
+            .ref_id("A")
+            .instant()
+        )
+
+        # Create panel with correct method signatures
+        panel_title = f"{query_config.query} - {server.name}"
+        panel = (
+            Timeseries().title(panel_title).datasource(server.name).with_target(target)
+        )
+
+        return panel
+
+    def _apply_query_time_offset(self, query: str, offset: int) -> str:
+        """
+        Apply time offset to PromQL query.
+
+        Args:
+            query: Original PromQL query
+            offset: Time offset in seconds
+
+        Returns:
+            Modified query with time offset applied
+        """
+        if offset <= 0:
+            return query
+
+        # For time offset, we append the offset modifier to the entire query
+        # This is the standard PromQL way to apply time offsets
+        return f"{query} offset {offset}s"
+
+    def _apply_query_time_offset_regex(
+        self, query: str, offset: int, metric_names: List[str]
+    ) -> str:
+        """
+        Apply time offset to PromQL query using regex to find metric selectors.
+
+        This method uses regex pattern matching to identify metric selectors and applies
+        the offset modifier in the correct location according to PromQL syntax rules.
+
+        The offset modifier placement rules:
+        1. For instant queries: metric{labels} offset Xs
+        2. For range queries: metric{labels}[range] offset Xs
+
+        Algorithm:
+        1. For each known metric name, create a regex pattern that matches the complete selector
+        2. Check if the metric already has an offset to avoid double-application
+        3. Reconstruct the selector with offset in the correct position
+        4. Validate the result using promql_parser to ensure syntactic correctness
+
+        Args:
+            query: Original PromQL query string
+            offset: Time offset in seconds (must be > 0)
+            metric_names: List of known metric names from experiment configuration
+
+        Returns:
+            Modified query string with offset applied to all metric selectors
+
+        Raises:
+            ValueError: If the resulting query is malformed according to PromQL syntax
+        """
+        if offset <= 0:
+            return query
+
+        modified_query = query
+
+        # Apply offset to each known metric name found in the query
+        for metric_name in metric_names:
+            # Regex pattern explanation:
+            # \b{re.escape(metric_name)} - Match exact metric name with word boundaries
+            # (\{[^}]*\})? - Optional capture group for label selectors like {job="test", instance="host1"}
+            # (\[[^\]]*\])? - Optional capture group for range selectors like [5m] or [1h]
+            # (?!\w) - Negative lookahead to prevent partial matches (e.g., cpu_total shouldn't match cpu)
+            pattern = rf"\b{re.escape(metric_name)}(\{{[^}}]*\}})?(\[[^\]]*\])?(?!\w)"
+
+            def replace_metric(match):
+                full_match = match.group(0)  # Complete matched text
+                metric_part = match.group(0)  # Same as full_match
+                labels_part = (
+                    match.group(1) or ""
+                )  # Captured label selectors or empty string
+                range_part = (
+                    match.group(2) or ""
+                )  # Captured range selector or empty string
+
+                # Avoid double-applying offset by checking what follows the match
+                remaining_text = query[match.end() :]
+                if remaining_text.strip().startswith("offset"):
+                    return full_match  # Already has offset, don't modify
+
+                # Extract just the metric name (remove labels and range from full match)
+                metric_name_only = metric_part.replace(labels_part, "").replace(
+                    range_part, ""
+                )
+
+                if range_part:
+                    # Range query: place offset after range selector
+                    # Example: cpu_usage{mode="idle"}[5m] -> cpu_usage{mode="idle"}[5m] offset 10s
+                    return (
+                        f"{metric_name_only}{labels_part}{range_part} offset {offset}s"
+                    )
+                else:
+                    # Instant query: place offset after label selector (if any)
+                    # Example: cpu_usage{mode="idle"} -> cpu_usage{mode="idle"} offset 10s
+                    return f"{metric_name_only}{labels_part} offset {offset}s"
+
+            # Apply the replacement function to all matches of this metric in the query
+            modified_query = re.sub(pattern, replace_metric, modified_query)
+
+        # Validate the modified query is well-formed
+        try:
+            parse_promql(modified_query)
+        except Exception as e:
+            raise ValueError(
+                f"Generated malformed PromQL query '{modified_query}' from original '{query}': {e}"
+            )
+
+        return modified_query
+
+    def create_datasources_config(
+        self, servers: List[ServerConfig]
+    ) -> List[Dict[str, Any]]:
+        """
+        Create datasource configurations for Grafana API.
+
+        Args:
+            servers: List of server configurations
+
+        Returns:
+            List of datasource configuration dictionaries
+        """
+        datasources = []
+
+        for server in servers:
+            datasource = {
+                "name": server.name,
+                "type": "prometheus",
+                "url": server.url,
+                "access": "proxy",
+                "isDefault": False,
+                "jsonData": {"httpMethod": "POST"},
+            }
+            datasources.append(datasource)
+
+        return datasources
+
+
+def build_dashboard_from_config(experiment_config: DictConfig) -> Optional[str]:
+    """
+    Convenience function to build dashboard from experiment configuration.
+
+    Args:
+        experiment_config: Hydra experiment configuration
+
+    Returns:
+        Dashboard JSON string, or None if building failed
+    """
+    builder = GrafanaDashboardBuilder()
+    return builder.build_dashboard_from_config(experiment_config)
+
+
+def create_datasources_config(
+    experiment_config: DictConfig,
+) -> Optional[List[Dict[str, Any]]]:
+    """
+    Convenience function to create datasource configurations.
+
+    Args:
+        experiment_config: Hydra experiment configuration
+
+    Returns:
+        List of datasource configurations, or None if parsing failed
+    """
+    config = ExperimentDashboardConfig.from_experiment_config(experiment_config)
+    builder = GrafanaDashboardBuilder()
+    return builder.create_datasources_config(config.servers)
+
+
+def configure_grafana(
+    experiment_config: DictConfig, grafana_config: Optional[GrafanaConfig] = None
+) -> bool:
+    """
+    Convenience function to configure Grafana with datasources and dashboard.
+
+    Args:
+        experiment_config: Hydra experiment configuration
+        grafana_config: Grafana connection configuration (uses experiment_config.grafana if None)
+
+    Returns:
+        True if configuration succeeded, False otherwise
+    """
+    builder = GrafanaDashboardBuilder()
+    return builder.configure_grafana(experiment_config, grafana_config)
+
+
+def parse_grafana_args():
+    """Parse command line arguments for Grafana configuration mode."""
+    configure_mode = False
+    if "--configure" in sys.argv:
+        configure_mode = True
+        # Remove --configure from args so Hydra doesn't see it
+        sys.argv = [arg for arg in sys.argv if arg != "--configure"]
+
+    return configure_mode
+
+
+@hydra.main(version_base=None, config_path="config", config_name="config")
+def main(cfg: DictConfig):
+    """
+    Main function using Hydra for configuration management.
+
+    Usage examples:
+      # Generate dashboard JSON
+      python grafana_config.py experiment_type=cloud_demo experiment.name=test_dash
+
+      # Configure Grafana directly
+      python grafana_config.py experiment_type=cloud_demo experiment.name=test_dash --configure
+
+      # With additional overrides
+      python grafana_config.py experiment_type=collapsable cloudlab.num_nodes=2 experiment.name=pc_test_3 --configure
+    """
+    # Check if we're in configure mode (determined before Hydra processed args)
+    configure_mode = hasattr(main, "_configure_mode") and main._configure_mode
+
+    if configure_mode:
+        print("Configuring Grafana...")
+        # Use Grafana config from Hydra configuration
+        grafana_config = GrafanaConfig.from_config(cfg.grafana)
+        success = configure_grafana(cfg, grafana_config)
+        if success:
+            print("Grafana configuration completed successfully!")
+        else:
+            print("Failed to configure Grafana")
+            sys.exit(1)
+    else:
+        dashboard_json = build_dashboard_from_config(cfg)
+        if dashboard_json:
+            print("Dashboard JSON generated successfully:")
+            print(dashboard_json)
+        else:
+            print("Failed to generate dashboard JSON")
+
+            sys.exit(1)
+
+
+if __name__ == "__main__":
+    # Parse our custom args before Hydra processes them
+    configure_mode = parse_grafana_args()
+    main._configure_mode = configure_mode
+
+    # Print usage if no args provided
+
+    if len(sys.argv) == 1:
+        print("Usage: python grafana_config.py [hydra_overrides...] [--configure]")
+        print("")
+        print("Examples:")
+        print("  # Generate dashboard JSON")
+        print(
+            "  python grafana_config.py experiment_type=cloud_demo experiment.name=test_dash"
+        )
+        print("")
+        print("  # Configure Grafana directly")
+        print(
+            "  python grafana_config.py experiment_type=cloud_demo experiment.name=test_dash --configure"
+        )
+        print("")
+        print("  # With additional overrides")
+        print(
+            "  python grafana_config.py experiment_type=collapsable cloudlab.num_nodes=2 experiment.name=pc_test_3 --configure"
+        )
+        print("")
+        print("Environment variables for Grafana configuration:")
+        print("  GRAFANA_HOST (default: localhost:3000)")
+        print("  GRAFANA_USER (default: admin)")
+        print("  GRAFANA_PASSWORD (default: admin)")
+        sys.exit(1)
+
+    main()
diff --git a/Utilities/experiments/old_experiment_scripts/calculate_cost.py b/Utilities/experiments/old_experiment_scripts/calculate_cost.py
new file mode 100644
index 0000000..7869013
--- /dev/null
+++ b/Utilities/experiments/old_experiment_scripts/calculate_cost.py
@@ -0,0 +1,186 @@
+import json
+import requests
+import argparse
+import humanize
+import numpy as np
+
+BYTES_PER_SAMPLE = 1
+
+
+def scrape_prometheus_metrics(prometheus_url):
+    response = requests.get(f"{prometheus_url}/metrics")
+    if response.status_code == 200:
+        return response.text
+    return None
+
+
+def get_prometheus_metrics(scraped_text, metric_names):
+    result = {}
+    for line in scraped_text.split("\n"):
+        for metric in metric_names:
+            if line.startswith(f"{metric} "):
+                value = line.split(" ")[1]
+                value = float(value)
+                result[metric] = value
+    return result
+
+
+def get_prometheus_storage_metrics(prometheus_url):
+    metrics = [
+        "prometheus_tsdb_storage_blocks_bytes",
+        "prometheus_tsdb_wal_storage_size_bytes",
+        "prometheus_remote_storage_samples_in_total",
+    ]
+    result = {}
+    response = requests.get(f"{prometheus_url}/metrics")
+    if response.status_code == 200:
+        for line in response.text.split("\n"):
+            for metric in metrics:
+                if line.startswith(f"{metric} "):
+                    value = line.split(" ")[1]
+                    if "e" in value:
+                        value = float(value)
+                    else:
+                        value = int(value)
+                    result[metric] = value
+    return result
+
+
+# Function to parse Prometheus query log file and calculate total query evaluation time
+def calculate_total_evaluation_time(log_file_path):
+    total_time_ms = 0
+
+    try:
+        with open(log_file_path, "r") as log_file:
+            for line in log_file:
+                line = json.loads(line)
+                total_time_ms += line["stats"]["timings"]["execTotalTime"]
+        return total_time_ms
+    except FileNotFoundError:
+        print(f"Log file {log_file_path} not found.")
+        return None
+
+
+def get_ingestion_cost_per_tier(num_samples, tier_num_samples, tier_cost_per_sample):
+    if num_samples <= tier_num_samples:
+        return num_samples * tier_cost_per_sample, 0
+    else:
+        return tier_num_samples * tier_cost_per_sample, num_samples - tier_num_samples
+
+
+def get_ingestion_cost(num_samples):
+    cost = 0
+
+    tier_cost, num_samples = get_ingestion_cost_per_tier(num_samples, 2e9, 0.9 / 10e6)
+    cost += tier_cost
+    if num_samples == 0:
+        return cost
+    tier_cost, num_samples = get_ingestion_cost_per_tier(
+        num_samples, 250e9, 0.35 / 10e6
+    )
+    cost += tier_cost
+    if num_samples == 0:
+        return cost
+    tier_cost, num_samples = get_ingestion_cost_per_tier(
+        num_samples, np.inf, 0.16 / 10e6
+    )
+    cost += tier_cost
+
+    return cost
+
+
+def get_query_cost(query_samples_processed):
+    return query_samples_processed * 0.1 / 1e9
+
+
+def get_storage_cost(total_samples, retention_days):
+    total_bytes = total_samples * BYTES_PER_SAMPLE
+    return 0.03 * total_bytes / 1e9 * retention_days / 30
+
+
+def get_dollar_cost(storage_values, query_values, retention_days):
+    ingestion_cost = get_ingestion_cost(
+        storage_values["prometheus_remote_storage_samples_in_total"]
+    )
+    query_cost = get_query_cost(query_values["prometheus_engine_query_samples_total"])
+    storage_cost = get_storage_cost(
+        storage_values["prometheus_remote_storage_samples_in_total"], retention_days
+    )
+
+    print("Ingestion cost:", ingestion_cost)
+    print("Query cost:", query_cost)
+    print("Storage cost:", storage_cost)
+
+    return ingestion_cost + query_cost + storage_cost
+
+
+def get_resource_cost(storage_values, query_values, retention_days):
+    result = {}
+    result["query_cpu_seconds"] = query_values[
+        "prometheus_rule_group_duration_seconds_sum"
+    ]
+    return result
+
+
+def print_metrics(metric_type, metrics):
+    print(f"{metric_type} metrics:")
+    for metric, value in metrics.items():
+        if "byte" in metric:
+            print(f"{metric}: {humanize.naturalsize(value, gnu=True)}")
+        else:
+            print(f"{metric}: {humanize.intcomma(value)}")
+
+
+def main(args):
+    scraped_prometheus_metrics = scrape_prometheus_metrics(args.prometheus_url)
+
+    storage_metrics = [
+        "prometheus_tsdb_storage_blocks_bytes",
+        "prometheus_tsdb_wal_storage_size_bytes",
+        "prometheus_remote_storage_samples_in_total",
+    ]
+    query_metrics = [
+        "prometheus_engine_query_samples_total",
+        "prometheus_rule_group_duration_seconds_sum",
+    ]
+
+    storage_values = get_prometheus_metrics(scraped_prometheus_metrics, storage_metrics)
+    query_values = get_prometheus_metrics(scraped_prometheus_metrics, query_metrics)
+    query_values["total_exec_time_from_query_log"] = calculate_total_evaluation_time(
+        args.query_log_file
+    )
+
+    print_metrics("Storage", storage_values)
+    print_metrics("Query", query_values)
+
+    dollar_cost = get_dollar_cost(
+        storage_values, query_values, retention_days=args.retention_days
+    )
+    resource_cost = get_resource_cost(
+        storage_values, query_values, retention_days=args.retention_days
+    )
+
+    print(f"Dollar cost: {dollar_cost}")
+    print(f"Resource cost: {resource_cost}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Calculate cost based on Prometheus metrics and query log."
+    )
+    parser.add_argument(
+        "--prometheus_url",
+        help="URL of the Prometheus server",
+        default="http://localhost:9090",
+    )
+    parser.add_argument(
+        "--query_log_file", help="Path to the Prometheus query log file", required=True
+    )
+    parser.add_argument(
+        "--retention_days",
+        help="Number of days to retain data in Prometheus",
+        required=True,
+        type=int,
+    )
+    args = parser.parse_args()
+    main(args)
diff --git a/Utilities/experiments/old_experiment_scripts/copy_prometheus_data.py b/Utilities/experiments/old_experiment_scripts/copy_prometheus_data.py
new file mode 100644
index 0000000..f0ddc7e
--- /dev/null
+++ b/Utilities/experiments/old_experiment_scripts/copy_prometheus_data.py
@@ -0,0 +1,27 @@
+import os
+import argparse
+import subprocess
+
+import constants
+
+
+def main(args):
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    data_to_copy = [
+        f"{constants.CLOUDLAB_HOME_DIR}/prometheus/data",
+        f"{constants.CLOUDLAB_HOME_DIR}/prometheus/queries.log",
+    ]
+    for data in data_to_copy:
+        cmd = f'rsync -azh -e "ssh {constants.SSH_OPTIONS}" {args.cloudlab_username}@node{args.node_offset}.{args.hostname_suffix}:{data} {args.output_dir}/'
+        subprocess.run(cmd, shell=True, check=True)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--cloudlab_username", type=str, required=True)
+    parser.add_argument("--hostname_suffix", type=str, required=True)
+    parser.add_argument("--output_dir", type=str, required=True)
+    parser.add_argument("--node_offset", type=int)
+    args = parser.parse_args()
+    main(args)
diff --git a/Utilities/experiments/old_experiment_scripts/experiment_all_cost_sla_vs_nodes.py b/Utilities/experiments/old_experiment_scripts/experiment_all_cost_sla_vs_nodes.py
new file mode 100644
index 0000000..4e59b8b
--- /dev/null
+++ b/Utilities/experiments/old_experiment_scripts/experiment_all_cost_sla_vs_nodes.py
@@ -0,0 +1,209 @@
+import os
+import argparse
+import subprocess
+
+import utils
+import constants
+from experiment_utils.services import MonitoringService
+
+
+def generate_config(num_nodes_in_experiment):
+    rule_files = ["blackbox-exporter.yml", "node-exporter.yml", "google-cadvisor.yml"]
+    rule_files = [
+        os.path.join("recording_rules", rule_file) for rule_file in rule_files
+    ]
+    cmd = "python3 generate_prometheus_config.py --num_nodes {} --input_file {} --output_file {} --rule_files {} --query_log_file {}".format(
+        num_nodes_in_experiment,
+        "prometheus_config/template.prometheus.yml",
+        "prometheus_config/prometheus.yml",
+        " ".join(rule_files),
+        constants.CLOUDLAB_QUERY_LOG_FILE,
+    )
+    subprocess.run(cmd, shell=True, check=True)
+
+
+def rsync_config(username, hostname_suffix):
+    hostname = f"node0.{hostname_suffix}"
+    cmd = 'rsync -azh -e "ssh -o StrictHostKeyChecking=no" ./prometheus_config {}@{}:{}'.format(
+        username,
+        hostname,
+        os.path.join(constants.CLOUDLAB_HOME_DIR, "cloudlab_scripts"),
+    )
+    subprocess.run(cmd, shell=True, check=True)
+
+
+def start_deathstar(num_nodes_in_experiment, username, hostname_suffix):
+    cmd = "docker compose up -d"
+    cmd_dir = f"{constants.CLOUDLAB_HOME_DIR}/benchmarks/DeathStarBench/socialNetwork"
+    utils.run_on_cloudlab_nodes_in_parallel(
+        range(1, num_nodes_in_experiment + 1),
+        username,
+        hostname_suffix,
+        cmd,
+        cmd_dir,
+        nohup=False,
+        popen=True,
+        redirect=True,
+    )
+
+
+def start_monitoring(num_nodes, username, hostname_suffix, experiment_output_dir):
+    monitoring_service = MonitoringService(username, hostname_suffix, num_nodes)
+    # Create a minimal experiment config for system exporters
+    experiment_params = {"exporters": {"exporter_list": {}}}
+    monitoring_service.start(experiment_params, experiment_output_dir)
+
+
+def run_deathstar_workload(
+    num_nodes_in_experiment, username, hostname_suffix, experiment_output_dir
+):
+    cmd_dir = f"{constants.CLOUDLAB_HOME_DIR}/benchmarks/DeathStarBench/socialNetwork"
+    cmd_template = "../wrk2/wrk -D exp -t 12 -c {} -d 300 -L -s ./wrk2/scripts/social-network/compose-post.lua http://{}:8080/wrk2-api/post/compose -R {} > {} 2>&1 &"
+
+    TOTAL_CONNECTIONS = 480
+    TOTAL_REQUESTS = 1200
+    output_file_template = (
+        "{}/deathstar_logs/connections_{}_requests_{}_nodes_{}_ip_{}.txt"
+    )
+
+    connections = TOTAL_CONNECTIONS // num_nodes_in_experiment
+    requests = TOTAL_REQUESTS // num_nodes_in_experiment
+
+    ips = []
+    output_files = []
+    for i in range(1, num_nodes_in_experiment + 1):
+        ips.append(f"10.10.1.{i+1}")
+        output_files.append(
+            output_file_template.format(
+                experiment_output_dir,
+                TOTAL_CONNECTIONS,
+                TOTAL_REQUESTS,
+                num_nodes_in_experiment,
+                i,
+            )
+        )
+
+    cmds = [
+        cmd_template.format(connections, ip, requests, output_file)
+        for ip, output_file in zip(ips, output_files)
+    ]
+    cmds.insert(0, "mkdir -p {};".format(os.path.dirname(output_files[0])))
+    cmds.append("wait")
+    final_cmd = " ".join(cmds)
+    utils.run_on_cloudlab_node(
+        0, username, hostname_suffix, final_cmd, cmd_dir, nohup=False, popen=False
+    )
+    # input('Press enter after command has finished')
+
+
+def calculate_cost(
+    num_nodes_in_experiment, username, hostname_suffix, experiment_output_dir
+):
+    cmd_dir = f"{constants.CLOUDLAB_HOME_DIR}/cloudlab_scripts"
+    output_file = experiment_output_dir + "/cost_logs/retention_30_nodes_{}.txt"
+    cmd_template = (
+        "mkdir -p {}; python3 calculate_cost.py --query_log_file ./prometheus/queries.log --retention_days {} > "
+        + output_file
+    )
+
+    # retention = 30
+    final_cmd = cmd_template.format(
+        os.path.dirname(output_file), 30, num_nodes_in_experiment
+    )
+    utils.run_on_cloudlab_node(
+        0, username, hostname_suffix, final_cmd, cmd_dir, nohup=False, popen=False
+    )
+    # retention = 300
+    final_cmd = cmd_template.format(
+        os.path.dirname(output_file), 300, num_nodes_in_experiment
+    )
+    utils.run_on_cloudlab_node(
+        0, username, hostname_suffix, final_cmd, cmd_dir, nohup=False, popen=False
+    )
+
+
+def stop_monitoring(num_nodes, username, hostname_suffix):
+    monitoring_service = MonitoringService(username, hostname_suffix, num_nodes)
+    monitoring_service.stop()
+
+
+def stop_deathstar(num_nodes_in_experiment, username, hostname_suffix):
+    cmd = "docker compose down"
+    cmd_dir = f"{constants.CLOUDLAB_HOME_DIR}/benchmarks/DeathStarBench/socialNetwork"
+    utils.run_on_cloudlab_nodes_in_parallel(
+        range(1, num_nodes_in_experiment + 1),
+        username,
+        hostname_suffix,
+        cmd,
+        cmd_dir,
+        nohup=False,
+        popen=True,
+    )
+
+
+def reset_prometheus(num_nodes, username, hostname_suffix):
+    cmd = "python3 reset_prometheus.py --num_nodes {} --cloudlab_username {} --hostname_suffix {}".format(
+        num_nodes, username, hostname_suffix
+    )
+    subprocess.run(cmd, shell=True, check=True)
+
+
+def main(args):
+    stop_monitoring(args.num_nodes, args.cloudlab_username, args.hostname_suffix)
+    stop_deathstar(args.num_nodes, args.cloudlab_username, args.hostname_suffix)
+    reset_prometheus(args.num_nodes, args.cloudlab_username, args.hostname_suffix)
+
+    experiment_output_dir = (
+        f"{constants.CLOUDLAB_HOME_DIR}/cloudlab_scripts/{args.experiment_name}"
+    )
+    utils.run_on_cloudlab_nodes_in_parallel(
+        range(args.num_nodes + 1),
+        args.cloudlab_username,
+        args.hostname_suffix,
+        f"mkdir -p {experiment_output_dir}",
+        "",
+        nohup=False,
+        popen=True,
+    )
+
+    for num_nodes_in_experiment in range(args.start_num_nodes, args.num_nodes + 2, 2):
+        generate_config(num_nodes_in_experiment)
+        rsync_config(args.cloudlab_username, args.hostname_suffix)
+        start_deathstar(
+            num_nodes_in_experiment, args.cloudlab_username, args.hostname_suffix
+        )
+        start_monitoring(
+            args.num_nodes,
+            args.cloudlab_username,
+            args.hostname_suffix,
+            experiment_output_dir,
+        )
+        # this blocks until the workload is done
+        run_deathstar_workload(
+            num_nodes_in_experiment,
+            args.cloudlab_username,
+            args.hostname_suffix,
+            experiment_output_dir,
+        )
+        calculate_cost(
+            num_nodes_in_experiment,
+            args.cloudlab_username,
+            args.hostname_suffix,
+            experiment_output_dir,
+        )
+        stop_monitoring(args.num_nodes, args.cloudlab_username, args.hostname_suffix)
+        stop_deathstar(
+            num_nodes_in_experiment, args.cloudlab_username, args.hostname_suffix
+        )
+        reset_prometheus(args.num_nodes, args.cloudlab_username, args.hostname_suffix)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--experiment_name", type=str, required=True)
+    parser.add_argument("--num_nodes", type=int, required=True)
+    parser.add_argument("--start_num_nodes", type=int, default=2)
+    parser.add_argument("--cloudlab_username", type=str, required=True)
+    parser.add_argument("--hostname_suffix", type=str, required=True)
+    args = parser.parse_args()
+    main(args)
diff --git a/Utilities/experiments/old_experiment_scripts/experiment_collect_prom_data.py b/Utilities/experiments/old_experiment_scripts/experiment_collect_prom_data.py
new file mode 100644
index 0000000..ad5b1e1
--- /dev/null
+++ b/Utilities/experiments/old_experiment_scripts/experiment_collect_prom_data.py
@@ -0,0 +1,315 @@
+import os
+import time
+import random
+import argparse
+import subprocess
+
+import utils
+import constants
+from experiment_utils.services import MonitoringService
+from experiment_utils.providers.cloudlab import CloudLabProvider
+
+
+def generate_and_copy_config(num_nodes_in_experiment, local_experiment_dir):
+    # rule_files = ["custom_recording_rules.yml"]
+    rule_files = ["blackbox-exporter.yml", "node-exporter.yml", "google-cadvisor.yml"]
+    rule_files = [
+        os.path.join("recording_rules", rule_file) for rule_file in rule_files
+    ]
+    cmd = "python3 generate_prometheus_config.py --num_nodes {} --input_file {} --output_file {} --rule_files {} --query_log_file {} --copy_to_dir {}".format(
+        num_nodes_in_experiment,
+        "prometheus_config/template.prometheus.yml",
+        "prometheus_config/prometheus.yml",
+        " ".join(rule_files),
+        constants.CLOUDLAB_QUERY_LOG_FILE,
+        os.path.join(local_experiment_dir, "prometheus_config"),
+    )
+    subprocess.run(cmd, shell=True, check=True)
+
+
+def rsync_config(username, hostname_suffix, node_offset):
+    hostname = f"node{node_offset}.{hostname_suffix}"
+    cmd = 'rsync -azh -e "ssh -o StrictHostKeyChecking=no" ./prometheus_config {}@{}:{}'.format(
+        username,
+        hostname,
+        os.path.join(constants.CLOUDLAB_HOME_DIR, "cloudlab_scripts"),
+    )
+    subprocess.run(cmd, shell=True, check=True)
+
+
+def start_deathstar(num_nodes_in_experiment, username, hostname_suffix, node_offset):
+    cmd = "docker compose up -d"
+    cmd_dir = f"{constants.CLOUDLAB_HOME_DIR}/benchmarks/DeathStarBench/socialNetwork"
+    utils.run_on_cloudlab_nodes_in_parallel(
+        range(node_offset + 1, node_offset + num_nodes_in_experiment + 1),
+        username,
+        hostname_suffix,
+        cmd,
+        cmd_dir,
+        nohup=False,
+        popen=True,
+        redirect=True,
+    )
+
+
+def start_monitoring(
+    num_nodes, username, hostname_suffix, experiment_output_dir, node_offset
+):
+    provider = CloudLabProvider(username, hostname_suffix)
+    monitoring_service = MonitoringService(provider, num_nodes, node_offset)
+    # Create a minimal experiment config for system exporters
+    experiment_params = {"exporters": {"exporter_list": {}}}
+    monitoring_service.start(experiment_params, experiment_output_dir)
+
+
+def run_deathstar_workload(
+    num_nodes_in_experiment,
+    username,
+    hostname_suffix,
+    experiment_output_dir,
+    local_experiment_dir,
+    node_offset,
+    random_params=False,
+):
+    cmd_dir = f"{constants.CLOUDLAB_HOME_DIR}/benchmarks/DeathStarBench/socialNetwork"
+
+    TOTAL_CONNECTIONS = 480
+    TOTAL_REQUESTS = 1200
+    DURATION = 3000
+
+    connections = TOTAL_CONNECTIONS // num_nodes_in_experiment
+    requests = TOTAL_REQUESTS // num_nodes_in_experiment
+    output_file_template = (
+        "{}/deathstar_logs/connections_{}_requests_{}_nodes_{}_ip_{}.txt"
+    )
+
+    ips = []
+    output_files = []
+    for i in range(node_offset + 1, node_offset + num_nodes_in_experiment + 1):
+        ips.append(f"10.10.1.{i+1}")
+        output_files.append(
+            output_file_template.format(
+                experiment_output_dir,
+                TOTAL_CONNECTIONS,
+                TOTAL_REQUESTS,
+                num_nodes_in_experiment,
+                i,
+            )
+        )
+
+    if not random_params:
+        cmd_template = "../wrk2/wrk -D exp -t 12 -c {} -d {} -L -s ./wrk2/scripts/social-network/compose-post.lua http://{}:8080/wrk2-api/post/compose -R {} > {} 2>&1 &"
+        cmds = [
+            cmd_template.format(connections, DURATION, ip, requests, output_file)
+            for ip, output_file in zip(ips, output_files)
+        ]
+    else:
+        cmd_template = "../wrk2/wrk -D exp -t {} -c {} -d {} -L -s ./wrk2/scripts/social-network/compose-post.lua http://{}:8080/wrk2-api/post/compose -R {} -s ./wrk2/scripts/social-network/random-params.lua > {} 2>&1 &"
+        cmds = []
+        for ip, output_file in zip(ips, output_files):
+            random_threads = random.randint(1, 12)
+            random_duration = random.randint(60, 600)
+            cmds.append(
+                cmd_template.format(
+                    random_threads,
+                    connections,
+                    random_duration,
+                    ip,
+                    requests,
+                    output_file,
+                )
+            )
+
+    # dump workload configuration to a file
+    os.makedirs(os.path.join(local_experiment_dir, "deathstar_config"), exist_ok=True)
+    with open(
+        os.path.join(local_experiment_dir, "deathstar_config", "cmds.sh"), "w"
+    ) as f:
+        f.write("\n".join(cmds))
+
+    cmds.insert(0, "mkdir -p {};".format(os.path.dirname(output_files[0])))
+    cmds.append("wait")
+    final_cmd = " ".join(cmds)
+    utils.run_on_cloudlab_node(
+        node_offset,
+        username,
+        hostname_suffix,
+        final_cmd,
+        cmd_dir,
+        nohup=False,
+        popen=False,
+    )
+
+
+def stop_monitoring(num_nodes, username, hostname_suffix, node_offset):
+    provider = CloudLabProvider(username, hostname_suffix)
+    monitoring_service = MonitoringService(provider, num_nodes, node_offset)
+    monitoring_service.stop()
+
+
+def stop_deathstar(num_nodes_in_experiment, username, hostname_suffix, node_offset):
+    cmd = "docker compose down"
+    cmd_dir = f"{constants.CLOUDLAB_HOME_DIR}/benchmarks/DeathStarBench/socialNetwork"
+    utils.run_on_cloudlab_nodes_in_parallel(
+        range(node_offset + 1, node_offset + num_nodes_in_experiment + 1),
+        username,
+        hostname_suffix,
+        cmd,
+        cmd_dir,
+        nohup=False,
+        popen=True,
+    )
+
+
+def reset_prometheus(num_nodes, username, hostname_suffix, node_offset):
+    cmd = "python3 reset_prometheus.py --num_nodes {} --cloudlab_username {} --hostname_suffix {} --node_offset {}".format(
+        num_nodes, username, hostname_suffix, node_offset
+    )
+    subprocess.run(cmd, shell=True, check=True)
+
+
+def export_prometheus_data(
+    username, hostname_suffix, experiment_output_dir, node_offset
+):
+    cmd = "python3 export_prometheus_data.py --output_dir {}".format(
+        os.path.join(experiment_output_dir, "exported_prometheus_data")
+    )
+    cmd_dir = f"{constants.CLOUDLAB_HOME_DIR}/cloudlab_scripts"
+    utils.run_on_cloudlab_node(
+        node_offset,
+        username,
+        hostname_suffix,
+        cmd,
+        cmd_dir,
+        nohup=False,
+        popen=False,
+        manual=False,
+    )
+
+
+def copy_prometheus_data(username, hostname_suffix, experiment_name, node_offset):
+    cmd = "python3 copy_prometheus_data.py --cloudlab_username {} --hostname_suffix {} --output_dir {} --node_offset {}".format(
+        username,
+        hostname_suffix,
+        os.path.join(
+            constants.LOCAL_EXPERIMENT_DIR, experiment_name, "raw_prometheus_data"
+        ),
+        node_offset,
+    )
+    subprocess.run(cmd, shell=True, check=True)
+
+
+def rsync_experiment_data(username, hostname_suffix, experiment_name, node_offset):
+    cmd = f'rsync -azh -e "ssh {constants.SSH_OPTIONS}" {username}@node{node_offset}.{hostname_suffix}:{constants.CLOUDLAB_HOME_DIR}/experiments/{experiment_name} {constants.LOCAL_EXPERIMENT_DIR}/'
+    print(cmd)
+    subprocess.run(cmd, shell=True, check=True)
+
+
+def main(args):
+    local_experiment_dir = os.path.join(
+        constants.LOCAL_EXPERIMENT_DIR, args.experiment_name
+    )
+    os.makedirs(local_experiment_dir, exist_ok=True)
+
+    stop_monitoring(
+        args.num_nodes, args.cloudlab_username, args.hostname_suffix, args.node_offset
+    )
+    stop_deathstar(
+        args.num_nodes, args.cloudlab_username, args.hostname_suffix, args.node_offset
+    )
+    reset_prometheus(
+        args.num_nodes, args.cloudlab_username, args.hostname_suffix, args.node_offset
+    )
+
+    experiment_output_dir = (
+        f"{constants.CLOUDLAB_HOME_DIR}/experiments/{args.experiment_name}"
+    )
+    utils.run_on_cloudlab_nodes_in_parallel(
+        range(args.node_offset, args.node_offset + args.num_nodes + 1),
+        args.cloudlab_username,
+        args.hostname_suffix,
+        f"mkdir -p {experiment_output_dir}",
+        "",
+        nohup=False,
+        popen=True,
+    )
+    utils.run_on_cloudlab_node(
+        args.node_offset,
+        args.cloudlab_username,
+        args.hostname_suffix,
+        "mkdir -p {}".format(os.path.dirname(constants.CLOUDLAB_QUERY_LOG_FILE)),
+        "",
+        nohup=False,
+        popen=False,
+    )
+
+    num_nodes_in_experiment = args.num_nodes
+
+    generate_and_copy_config(num_nodes_in_experiment, local_experiment_dir)
+    rsync_config(args.cloudlab_username, args.hostname_suffix, args.node_offset)
+    start_deathstar(
+        num_nodes_in_experiment,
+        args.cloudlab_username,
+        args.hostname_suffix,
+        args.node_offset,
+    )
+    start_monitoring(
+        args.num_nodes,
+        args.cloudlab_username,
+        args.hostname_suffix,
+        experiment_output_dir,
+        args.node_offset,
+    )
+    # this blocks until the workload is done
+    run_deathstar_workload(
+        num_nodes_in_experiment,
+        args.cloudlab_username,
+        args.hostname_suffix,
+        experiment_output_dir,
+        local_experiment_dir,
+        args.node_offset,
+        random_params=False,
+    )
+    # prometheus returns zero metrics if we don't wait
+    time.sleep(60)
+    copy_prometheus_data(
+        args.cloudlab_username,
+        args.hostname_suffix,
+        args.experiment_name,
+        args.node_offset,
+    )
+    export_prometheus_data(
+        args.cloudlab_username,
+        args.hostname_suffix,
+        experiment_output_dir,
+        args.node_offset,
+    )
+    stop_monitoring(
+        args.num_nodes, args.cloudlab_username, args.hostname_suffix, args.node_offset
+    )
+    stop_deathstar(
+        num_nodes_in_experiment,
+        args.cloudlab_username,
+        args.hostname_suffix,
+        args.node_offset,
+    )
+    reset_prometheus(
+        args.num_nodes, args.cloudlab_username, args.hostname_suffix, args.node_offset
+    )
+    rsync_experiment_data(
+        args.cloudlab_username,
+        args.hostname_suffix,
+        args.experiment_name,
+        args.node_offset,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--experiment_name", type=str, required=True)
+    parser.add_argument("--num_nodes", type=int, required=True)
+    parser.add_argument("--cloudlab_username", type=str, required=True)
+    parser.add_argument("--hostname_suffix", type=str, required=True)
+    parser.add_argument("--node_offset", type=int, required=True)
+    args = parser.parse_args()
+    main(args)
diff --git a/Utilities/experiments/old_experiment_scripts/experiment_prom_cost_sla_vs_nodes_time.py b/Utilities/experiments/old_experiment_scripts/experiment_prom_cost_sla_vs_nodes_time.py
new file mode 100644
index 0000000..cc64045
--- /dev/null
+++ b/Utilities/experiments/old_experiment_scripts/experiment_prom_cost_sla_vs_nodes_time.py
@@ -0,0 +1,152 @@
+import os
+import time
+import argparse
+import subprocess
+
+import utils
+import constants
+from experiment_utils.services import MonitoringService
+
+NUM_MEASUREMENTS = 10
+TIME_BETWEEN_MEASUREMENTS = 600
+
+
+def generate_config(num_nodes_in_experiment):
+    rule_files = ["blackbox-exporter.yml", "node-exporter.yml", "google-cadvisor.yml"]
+    rule_files = [
+        os.path.join("recording_rules", rule_file) for rule_file in rule_files
+    ]
+    cmd = "python3 generate_prometheus_config.py --num_nodes {} --input_file {} --output_file {} --rule_files {} --query_log_file {}".format(
+        num_nodes_in_experiment,
+        "prometheus_config/template.prometheus.yml",
+        "prometheus_config/prometheus.yml",
+        " ".join(rule_files),
+        constants.CLOUDLAB_QUERY_LOG_FILE,
+    )
+    subprocess.run(cmd, shell=True, check=True)
+
+
+def rsync_config(username, hostname_suffix):
+    hostname = f"node0.{hostname_suffix}"
+    cmd = 'rsync -azh -e "ssh -o StrictHostKeyChecking=no" ./prometheus_config {}@{}:{}'.format(
+        username,
+        hostname,
+        os.path.join(constants.CLOUDLAB_HOME_DIR, "cloudlab_scripts"),
+    )
+    subprocess.run(cmd, shell=True, check=True)
+
+
+def start_deathstar(num_nodes_in_experiment, username, hostname_suffix):
+    cmd = "docker compose up -d"
+    cmd_dir = f"{constants.CLOUDLAB_HOME_DIR}/benchmarks/DeathStarBench/socialNetwork"
+    utils.run_on_cloudlab_nodes_in_parallel(
+        range(1, num_nodes_in_experiment + 1),
+        username,
+        hostname_suffix,
+        cmd,
+        cmd_dir,
+        nohup=False,
+        popen=True,
+        redirect=True,
+    )
+
+
+def start_monitoring(num_nodes, username, hostname_suffix, experiment_output_dir):
+    monitoring_service = MonitoringService(username, hostname_suffix, num_nodes)
+    # Create a minimal experiment config for system exporters
+    experiment_params = {"exporters": {"exporter_list": {}}}
+    monitoring_service.start(experiment_params, experiment_output_dir)
+
+
+def calculate_cost(num_nodes_in_experiment, measurement_idx, username, hostname_suffix):
+    cmd_dir = f"{constants.CLOUDLAB_HOME_DIR}/cloudlab_scripts"
+    output_file_name = "cost_logs/retention_{}_interval_{}_nodes_{}_measurement_{}.txt"
+    cmd_template = (
+        "mkdir -p ./cost_logs; python3 calculate_cost.py --query_log_file ./prometheus/queries.log --retention_days {} > "
+        + output_file_name
+    )
+
+    # retention = 30
+    final_cmd = cmd_template.format(
+        30, TIME_BETWEEN_MEASUREMENTS, num_nodes_in_experiment, measurement_idx
+    )
+    utils.run_on_cloudlab_node(
+        0, username, hostname_suffix, final_cmd, cmd_dir, nohup=False, popen=False
+    )
+    # retention = 300
+    final_cmd = cmd_template.format(
+        300, TIME_BETWEEN_MEASUREMENTS, num_nodes_in_experiment, measurement_idx
+    )
+    utils.run_on_cloudlab_node(
+        0, username, hostname_suffix, final_cmd, cmd_dir, nohup=False, popen=False
+    )
+
+
+def stop_monitoring(num_nodes, username, hostname_suffix):
+    monitoring_service = MonitoringService(username, hostname_suffix, num_nodes)
+    monitoring_service.stop()
+
+
+def stop_deathstar(num_nodes_in_experiment, username, hostname_suffix):
+    cmd = "docker compose down"
+    cmd_dir = f"{constants.CLOUDLAB_HOME_DIR}/benchmarks/DeathStarBench/socialNetwork"
+    utils.run_on_cloudlab_nodes_in_parallel(
+        range(1, num_nodes_in_experiment + 1),
+        username,
+        hostname_suffix,
+        cmd,
+        cmd_dir,
+        nohup=False,
+        popen=True,
+    )
+
+
+def reset_prometheus(num_nodes, username, hostname_suffix):
+    cmd = "python3 reset_prometheus.py --num_nodes {} --cloudlab_username {} --hostname_suffix {}".format(
+        num_nodes, username, hostname_suffix
+    )
+    subprocess.run(cmd, shell=True, check=True)
+
+
+def main(args):
+    stop_monitoring(args.num_nodes, args.cloudlab_username, args.hostname_suffix)
+    stop_deathstar(args.num_nodes, args.cloudlab_username, args.hostname_suffix)
+    reset_prometheus(args.num_nodes, args.cloudlab_username, args.hostname_suffix)
+
+    experiment_output_dir = f"{constants.CLOUDLAB_HOME_DIR}/cloudlab_scripts"
+
+    for num_nodes_in_experiment in range(args.start_num_nodes, args.num_nodes + 2, 2):
+        generate_config(num_nodes_in_experiment)
+        rsync_config(args.cloudlab_username, args.hostname_suffix)
+        start_deathstar(
+            num_nodes_in_experiment, args.cloudlab_username, args.hostname_suffix
+        )
+        start_monitoring(
+            args.num_nodes,
+            args.cloudlab_username,
+            args.hostname_suffix,
+            experiment_output_dir,
+        )
+        for measurement_idx in range(NUM_MEASUREMENTS):
+            time.sleep(TIME_BETWEEN_MEASUREMENTS)
+            calculate_cost(
+                num_nodes_in_experiment,
+                measurement_idx,
+                args.cloudlab_username,
+                args.hostname_suffix,
+            )
+        stop_monitoring(args.num_nodes, args.cloudlab_username, args.hostname_suffix)
+        stop_deathstar(
+            num_nodes_in_experiment, args.cloudlab_username, args.hostname_suffix
+        )
+        reset_prometheus(args.num_nodes, args.cloudlab_username, args.hostname_suffix)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--num_nodes", type=int, required=True)
+    parser.add_argument("--start_num_nodes", type=int, default=2)
+    parser.add_argument("--cloudlab_username", type=str, required=True)
+    parser.add_argument("--hostname_suffix", type=str, required=True)
+    args = parser.parse_args()
+    main(args)
diff --git a/Utilities/experiments/old_experiment_scripts/fake_exporter.py b/Utilities/experiments/old_experiment_scripts/fake_exporter.py
new file mode 100644
index 0000000..369c5bc
--- /dev/null
+++ b/Utilities/experiments/old_experiment_scripts/fake_exporter.py
@@ -0,0 +1,245 @@
+from prometheus_client.core import GaugeMetricFamily, CounterMetricFamily, REGISTRY
+from prometheus_client import start_http_server
+from prometheus_client.registry import Collector
+import argparse
+import time
+import numpy
+import os
+import itertools
+import numpy as np
+from typing import List
+
+
+class CustomCollector(Collector):
+    def __init__(
+        self, scale, dataset, num_labels, num_values_per_label: List[int], metric_type
+    ):
+        self.scale = scale
+        # self.timeseries_id_start = timeseries_id_start
+        self.dataset = dataset
+        self.rng = numpy.random.default_rng(0)
+        self.total_samples = 0
+        self.const_1M = 1000000
+        self.const_2M = 2000000
+        self.const_3M = 3000000
+
+        self.metric_type = metric_type
+
+        self.uniform_counter = 0
+        self.dynamic_counter = 0
+        self.zipf_counter = 0
+        self.normal_counter = 0
+
+        self.num_labels: int = num_labels
+        self.labels = [f"label_{i}" for i in range(self.num_labels)]
+        self.num_values_per_label: List[int]
+        self.values_per_label: List[List[str]] = []
+        self.label_value_combinations: List[List[str]] = []
+
+        self.label_value_combinations = self.compute_labels(
+            num_labels, num_values_per_label
+        )
+
+        # print("values_per_label")
+        # [print(sublist) for sublist in self.values_per_label]
+        # print("label_value_combinations")
+        # [print(sublist) for sublist in self.label_value_combinations]
+        # assert False
+
+    def compute_labels(
+        self, num_labels: int, num_values_per_label: List[int]
+    ) -> List[List[str]]:
+        if len(num_values_per_label) == 1:
+            self.num_values_per_label = [
+                num_values_per_label[0] for _ in range(num_labels)
+            ]
+        else:
+            if len(num_values_per_label) != num_labels:
+                raise ValueError(
+                    "Number of num_values_per_label must be equal to num_labels"
+                )
+            self.num_values_per_label = num_values_per_label
+
+        num_timeseries = np.prod(self.num_values_per_label)
+
+        for label_idx in range(self.num_labels):
+            values = [
+                f"value_{label_idx}_value_{value_idx}"
+                for value_idx in range(self.num_values_per_label[label_idx])
+            ]
+            self.values_per_label.append(values)
+
+        label_value_combinations = list(itertools.product(*self.values_per_label))
+        assert len(label_value_combinations) == num_timeseries
+        # convert from list[tuple[str]] to list[list[str]]
+        label_value_combinations = [
+            list(label_value_combination)
+            for label_value_combination in label_value_combinations
+        ]
+        return label_value_combinations
+
+    def get_uniform_value_gauge(self):
+        value = -1
+        while value < 0 or value > self.scale:
+            # value = numpy.random.uniform() * self.scale
+            value = self.rng.uniform(0, self.scale)
+        return value
+
+    def get_normal_value_gauge(self):
+        value = -1
+        while value < 0 or value > self.scale:
+            value = self.rng.normal(loc=self.scale / 2, scale=self.scale)
+        return value
+
+    def get_zipf_value_gauge(self):
+        value = -1
+        while value < 0 or value > self.scale:
+            # value = numpy.random.zipf(1.01)
+            value = self.rng.zipf(1.01)
+        return value
+
+    def get_dynamic_value_gauge(self):
+        value = -1
+        while value < 0 or value > self.scale:
+            if self.total_samples < self.const_1M:
+                # value = numpy.random.zipf(1.01)
+                value = self.rng.zipf(1.01)
+            elif self.total_samples < self.const_2M:
+                # value = numpy.random.uniform() * self.scale
+                value = self.rng.uniform(0, self.scale)
+            else:
+                value = self.rng.normal(loc=self.scale / 2, scale=self.scale)
+        self.total_samples = (self.total_samples + 1) % self.const_3M
+        return value
+
+    def get_uniform_value_counter(self):
+        value = -1
+        while value < 0 or value > self.scale:
+            # value = numpy.random.uniform() * self.scale
+            value = self.rng.uniform(0, self.scale)
+        self.uniform_counter += value
+        return self.uniform_counter
+
+    def get_normal_value_counter(self):
+        value = -1
+        while value < 0 or value > self.scale:
+            value = self.rng.normal(loc=self.scale / 2, scale=self.scale)
+        self.normal_counter += value
+        return self.normal_counter
+
+    def get_zipf_value_counter(self):
+        value = -1
+        while value < 0 or value > self.scale:
+            # value = numpy.random.zipf(1.01)
+            value = self.rng.zipf(1.01)
+        self.zipf_counter += value
+        return self.zipf_counter
+
+    def get_dynamic_value_counter(self):
+        value = -1
+        while value < 0 or value > self.scale:
+            if self.total_samples < self.const_1M:
+                # value = numpy.random.zipf(1.01)
+                value = self.rng.zipf(1.01)
+            elif self.total_samples < self.const_2M:
+                # value = numpy.random.uniform() * self.scale
+                value = self.rng.uniform(0, self.scale)
+            else:
+                value = self.rng.normal(loc=self.scale / 2, scale=self.scale)
+        self.total_samples = (self.total_samples + 1) % self.const_3M
+        self.dynamic_counter += value
+        return self.dynamic_counter
+
+    def collect(self):
+        if self.metric_type == "counter":
+            fake_metric = CounterMetricFamily(
+                "fake_metric",
+                "Generating fake time series data with {} dataset".format(self.dataset),
+                labels=self.labels,
+            )
+        elif self.metric_type == "gauge":
+            fake_metric = GaugeMetricFamily(
+                "fake_metric",
+                "Generating fake time series data with {} dataset".format(self.dataset),
+                labels=self.labels,
+            )
+        else:
+            fake_metric = GaugeMetricFamily(
+                "fake_metric",
+                "Generating fake time series data with {} dataset".format(self.dataset),
+                labels=self.labels,
+            )
+
+        for label_value_combination in self.label_value_combinations:
+            if self.metric_type == "counter":
+                if self.dataset == "uniform":
+                    value = self.get_uniform_value_counter()
+                elif self.dataset == "normal":
+                    value = self.get_normal_value_counter()
+                elif self.dataset == "zipf":
+                    value = self.get_zipf_value_counter()
+                elif self.dataset == "dynamic":
+                    value = self.get_dynamic_value_counter()
+                else:
+                    value = self.get_dynamic_value_counter()
+            else:  # gauge
+                if self.dataset == "uniform":
+                    value = self.get_uniform_value_gauge()
+                elif self.dataset == "normal":
+                    value = self.get_normal_value_gauge()
+                elif self.dataset == "zipf":
+                    value = self.get_zipf_value_gauge()
+                elif self.dataset == "dynamic":
+                    value = self.get_dynamic_value_gauge()
+                else:
+                    value = self.get_dynamic_value_gauge()
+
+            # labels = [f"label_value_{i}" for d in range(self.num_labels)]
+            # fake_metric.add_metric(labels, value=value)
+            fake_metric.add_metric(label_value_combination, value)
+
+        yield fake_metric
+
+
+def main(args):
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    metric_collector = CustomCollector(
+        args.valuescale,
+        args.dataset,
+        args.num_labels,
+        args.num_values_per_label,
+        args.metric_type,
+    )
+    REGISTRY.register(metric_collector)
+    start_http_server(port=args.port)
+    print("Fake exporter started on port {}".format(args.port))
+    while True:
+        time.sleep(1)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output_dir", type=str, required=True)
+    parser.add_argument("--port", type=int, required=True)
+    parser.add_argument("--valuescale", type=int, required=True)
+    # parser.add_argument("--start_instanceid", type=int, required=True)
+    # parser.add_argument("--batchsize", type=int, required=True)
+    parser.add_argument("--dataset", type=str, required=True)
+    parser.add_argument("--num_labels", type=int, required=True)
+    parser.add_argument("--num_values_per_label", type=str, required=True)
+    parser.add_argument("--metric_type", type=str, required=True)
+    args = parser.parse_args()
+
+    args.num_values_per_label = [int(i) for i in args.num_values_per_label.split(",")]
+
+    # if (
+    #     args.port is None
+    #     or args.valuescale is None
+    #     or args.start_instanceid is None
+    #     or args.batchsize is None
+    #     or args.dataset is None
+    # ):
+    #     print("Fake exporter missing argument")
+    #     sys.exit(0)
+    main(args)
diff --git a/Utilities/experiments/old_experiment_scripts/fake_exporter_rust/fake_exporter/Cargo.toml b/Utilities/experiments/old_experiment_scripts/fake_exporter_rust/fake_exporter/Cargo.toml
new file mode 100644
index 0000000..6cfa20c
--- /dev/null
+++ b/Utilities/experiments/old_experiment_scripts/fake_exporter_rust/fake_exporter/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "fake_exporter"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+prometheus = "0.14.0"
+tokio = { version = "1", features = ["full"] }
+hyper = { version = "1", features = ["full"] }
+hyper-util = { version = "0.1", features = ["full"] }
+http-body-util = "0.1"
+rand = "0.9.1"
+rand_distr = "0.5.1"
+futures = "0.3"
+lazy_static = "1.5"
diff --git a/Utilities/experiments/old_experiment_scripts/fake_exporter_rust/fake_exporter/src/main.rs b/Utilities/experiments/old_experiment_scripts/fake_exporter_rust/fake_exporter/src/main.rs
new file mode 100644
index 0000000..a1d910c
--- /dev/null
+++ b/Utilities/experiments/old_experiment_scripts/fake_exporter_rust/fake_exporter/src/main.rs
@@ -0,0 +1,409 @@
+use std::{
+    net::SocketAddr,
+    net::Ipv4Addr,
+    env,
+    sync::Mutex,
+};
+use hyper::{
+    Request,
+    Response,
+    body::Incoming,
+    header::CONTENT_TYPE,
+    server::conn::http1,
+    service::service_fn,
+};
+use hyper_util::rt::TokioIo;
+use prometheus::{
+    Encoder,
+    TextEncoder,
+    core::{Desc, Collector},
+    proto::MetricFamily,
+};
+use tokio::net::TcpListener;
+use rand_distr::{
+    Zipf,
+    Normal,
+    Uniform,
+    Distribution
+};
+use rand::{
+    SeedableRng,
+    rngs::SmallRng,
+};
+
+type BoxedErr = Box<dyn std::error::Error + Send + Sync + 'static>;
+
+const CONST_1M: u64 = 1_000_000;
+const CONST_2M: u64 = 2_000_000;
+const CONST_3M: u64 = 3_000_000;
+
+const RNG_SEED: u64 = 0; // seed for rng used by all distributions
+
+const ZIPF_ALPHA: f64 = 1.01; // zipf parameter
+
+// Normal distribution mean
+fn get_mean(valuescale: f64) -> f64 {
+   valuescale / 2.0
+}
+// Normal distribution standard deviation
+fn get_sigma(valuescale: f64) -> f64 {
+    valuescale / 8.0
+}
+
+// Converts string to vector of usize
+fn get_num_vals_per_label(num_values_per_label_str: String, num_labels: usize) -> Vec<usize> {
+    let parse = num_values_per_label_str
+                    .split(',')
+                    .map(str::trim)                   // drop any surrounding whitespace
+                    .filter(|s| !s.is_empty())       // skip empty segments, if any
+                    .map(str::parse::<usize>)        // parse each into usize
+                    .collect();
+    let num_values_per_label: Vec<usize> = match parse {
+        Ok(list) => list,
+        Err(error) => panic!("Couldn't parse num_values_per_label: {error:?}"),
+    };
+
+    let rv: Vec<usize>;
+
+    if num_values_per_label.len() == 1 {
+        rv = vec![num_values_per_label[0]; num_labels];
+    } else {
+        if num_values_per_label.len() != num_labels {
+            panic!(
+                "Number of num_values_per_label must be equal to num_labels (got {} vs {})",
+                num_values_per_label.len(),
+                num_labels
+            );
+        }
+        rv = num_values_per_label;
+    }
+
+    return rv;
+}
+
+fn compute_labels(
+        num_labels: usize,
+        num_values_per_label: Vec<usize>,
+    ) -> Vec<Vec<String>> {
+
+    // 1. Build values_per_label
+    let mut values_per_label = Vec::with_capacity(num_labels);
+    for label_idx in 0..num_labels {
+        let count = num_values_per_label[label_idx];
+        let mut bucket = Vec::with_capacity(count);
+        for value_idx in 0..count {
+            bucket.push(format!("value_{}_value_{}", label_idx, value_idx));
+        }
+        values_per_label.push(bucket);
+    }
+
+    // 2. Compute expected total combinations
+    let expected: usize = num_values_per_label.iter().product();
+
+    // 3. Cartesian product helper
+    fn cartesian_product(pools: &[Vec<String>]) -> Vec<Vec<String>> {
+        let mut result: Vec<Vec<String>> = vec![Vec::new()];
+        for pool in pools {
+            let mut next = Vec::new();
+            for prefix in &result {
+                for item in pool {
+                    let mut new_prefix = prefix.clone();
+                    new_prefix.push(item.clone());
+                    next.push(new_prefix);
+                }
+            }
+            result = next;
+        }
+        result
+    }
+
+    // 5. Generate combinations
+    let combos = cartesian_product(&values_per_label);
+    assert!(
+        combos.len() == expected,
+        "got {} combinations but expected {}",
+        combos.len(),
+        expected
+    );
+
+    combos
+}
+
+struct FakeCollector {
+    valuescale: f64, // Max magnitude of random value generation
+    dataset: String, // name of dataset (zipf, uniform, normal, dynamic)
+    label_value_combinations: Vec<Vec<String>>, // list of label sets for all metrics
+    metric_type: String, // gauge or counter
+    rng: Mutex<SmallRng>, // seeded rng
+    zipf_dist: Option<Zipf<f64>>,
+    normal_dist: Option<Normal<f64>>,
+    uniform_dist: Option<Uniform<f64>>,
+    counter_state: Mutex<f64>, // tracking counter value
+    total_samples: Mutex<u64>, // for dynamic distribution only
+}
+
+impl FakeCollector {
+    fn new(
+        valuescale: f64,
+        dataset: String,
+        num_labels: usize,
+        num_values_per_label: String,
+        metric_type: String
+    ) -> Self {
+
+        let num_values_per_label = get_num_vals_per_label(
+            num_values_per_label,
+            num_labels
+        );
+        let label_value_combinations = compute_labels(num_labels, num_values_per_label);
+        let mut zipf_dist: Option<Zipf<f64>> = None;
+        let mut normal_dist: Option<Normal<f64>> = None;
+        let mut uniform_dist: Option<Uniform<f64>> = None;
+
+        // Instantiate required distributions
+        if dataset == "zipf" {
+            zipf_dist = Some(
+                Zipf::new(valuescale, ZIPF_ALPHA)
+                .expect("Failed to create Zipf distribution")
+            );
+        } else if dataset == "normal" {
+            let mean: f64 = get_mean(valuescale);
+            let sigma: f64 = get_sigma(valuescale); // 99.997% of values will be within 4 std deviations
+            normal_dist = Some(
+                Normal::new(mean, sigma)
+                .expect("Failed to create Normal distribution")
+            );
+        } else if dataset == "dynamic" {
+            let mean: f64 = get_mean(valuescale);
+            let sigma: f64 = get_sigma(valuescale); // 99.997% of values will be within 4 std deviations
+            normal_dist = Some(
+                Normal::new(mean, sigma)
+                .expect("Failed to create Normal distribution")
+            );
+            zipf_dist = Some(
+                Zipf::new(valuescale, ZIPF_ALPHA)
+                .expect("Failed to create Zipf distribution")
+            );
+            uniform_dist = Some(
+                Uniform::new_inclusive(0.0, valuescale)
+                .expect("Failed to create Uniform distribution")
+            )
+        } else { // uniform distribution
+            uniform_dist = Some(
+                Uniform::new_inclusive(0.0, valuescale)
+                .expect("Failed to create Uniform distribution")
+            )
+        }
+
+        Self {
+            valuescale,
+            dataset,
+            label_value_combinations,
+            metric_type,
+            rng: Mutex::new(SmallRng::seed_from_u64(RNG_SEED)),
+            zipf_dist,
+            normal_dist,
+            uniform_dist,
+            counter_state: Mutex::new(0.0),
+            total_samples: Mutex::new(0),
+        }
+
+    }
+
+    fn get_sample(&self) -> f64 {
+        let rv: f64;
+        let mut samples_mutex = self.total_samples.lock().unwrap(); // lock samples cnt
+
+        if self.dataset == "zipf" {
+            rv = if let Some(zipf_dist) = &self.zipf_dist {
+                zipf_dist.sample(&mut self.rng.lock().unwrap())
+            } else {
+                panic!("Zipf distribution not initialized");
+            };
+        } else if self.dataset == "normal" {
+            rv = if let Some(normal_dist) = &self.normal_dist {
+                normal_dist.sample(&mut self.rng.lock().unwrap())
+            } else {
+                panic!("Normal distribution not initialized");
+            };
+        } else if self.dataset == "uniform" {
+            rv = if let Some(uniform_dist) = &self.uniform_dist {
+                uniform_dist.sample(&mut self.rng.lock().unwrap())
+            } else {
+                panic!("Uniform distribution not initialized")
+            }
+        } else { // Dynamic
+            if *samples_mutex < CONST_1M {
+                rv = if let Some(zipf_dist) = &self.zipf_dist {
+                    zipf_dist.sample(&mut self.rng.lock().unwrap())
+                } else {
+                    panic!("Zipf distribution not initialized");
+                };
+            } else if *samples_mutex < CONST_2M {
+                rv = if let Some(uniform_dist) = &self.uniform_dist {
+                    uniform_dist.sample(&mut self.rng.lock().unwrap())
+                } else {
+                    panic!("Uniform distribution not initialized")
+                }
+            } else {
+                rv = if let Some(normal_dist) = &self.normal_dist {
+                    normal_dist.sample(&mut self.rng.lock().unwrap())
+                } else {
+                    panic!("Normal distribution not initialized");
+                };
+            }
+        }
+
+        // update total samples
+        *samples_mutex = (*samples_mutex + 1) % CONST_3M;
+        rv
+    }
+
+    // Generates a new random value based on the dataset, updates the counter,
+    // and returns the current counter value
+    fn get_next_counter_val(&self) -> f64 {
+        let random_val: f64 = self.get_sample();
+        let mut counter_mutex = self.counter_state.lock().unwrap();
+        // Update counter with val
+        *counter_mutex += random_val;
+        *counter_mutex
+    }
+
+    // Gets a metric family containing a counter family with all label_value combos
+    fn get_counter_family(&self) -> MetricFamily {
+        let mut counter_family = MetricFamily::default();
+        counter_family.set_name("fake_metric_total".to_string());
+        counter_family.set_help(format!("Generating fake time series data with {} dataset", self.dataset));
+        counter_family.set_field_type(prometheus::proto::MetricType::COUNTER);
+
+        for label_value_combination in &self.label_value_combinations {
+            let mut metric = prometheus::proto::Metric::default();
+            let mut counter = prometheus::proto::Counter::default();
+            let mut labels = Vec::new();
+            for i in 0..label_value_combination.len() {
+                let mut label_and_value = prometheus::proto::LabelPair::default();
+                let label_val: &String = &label_value_combination[i];
+                label_and_value.set_name(format!("label_{}", i));
+                label_and_value.set_value(label_val.to_string());
+                labels.push(label_and_value);
+            }
+
+            metric.set_label(labels.into());
+            counter.set_value(self.get_next_counter_val());
+            metric.set_counter(counter);
+            counter_family.mut_metric().push(metric);
+        }
+        counter_family
+    }
+
+    // Gets a metric family containing a gauge family with all label_value combos
+    fn get_gauge_family(&self) -> MetricFamily {
+        let mut gauge_family = MetricFamily::default();
+        gauge_family.set_name("fake_metric".to_string());
+        gauge_family.set_help(format!("Generating fake time series data with {} dataset", self.dataset));
+        gauge_family.set_field_type(prometheus::proto::MetricType::GAUGE);
+
+        for label_value_combination in &self.label_value_combinations {
+            let mut metric = prometheus::proto::Metric::default();
+            let mut gauge = prometheus::proto::Gauge::default();
+            let mut labels = Vec::new();
+            for i in 0..label_value_combination.len() {
+                let mut label_and_value = prometheus::proto::LabelPair::default();
+                let label_val: &String = &label_value_combination[i];
+                label_and_value.set_name(format!("label_{}", i));
+                label_and_value.set_value(label_val.to_string());
+                labels.push(label_and_value);
+            }
+
+            metric.set_label(labels.into());
+            gauge.set_value(self.get_sample());
+            metric.set_gauge(gauge);
+            gauge_family.mut_metric().push(metric);
+        }
+        gauge_family
+    }
+}
+
+// Interface used by prometheus
+impl Collector for FakeCollector {
+
+    fn desc(&self) -> Vec<&Desc> {
+        // Return empty vec initially
+        Vec::new()
+    }
+
+    fn collect(&self) -> Vec<MetricFamily> {
+        let mut metric_families = Vec::new();
+
+        if self.metric_type == "counter" {
+            let counter_family = self.get_counter_family();
+            metric_families.push(counter_family);
+        } else if self.metric_type == "gauge" {
+            let gauge_family = self.get_gauge_family();
+            metric_families.push(gauge_family);
+        } else {
+            panic!("Metric type must be one of either 'counter' or 'gauge'")
+        }
+
+        metric_families
+    }
+}
+
+async fn serve_req(_req: Request<Incoming>) -> Result<Response<String>, BoxedErr> {
+    let encoder = TextEncoder::new();
+    let metric_families = prometheus::gather(); // Calls collect() method
+    let body = encoder.encode_to_string(&metric_families)?;
+    let response = Response::builder()
+        .status(200)
+        .header(CONTENT_TYPE, encoder.format_type())
+        .body(body)?;
+
+    Ok(response)
+}
+
+#[tokio::main]
+async fn main() -> Result<(), BoxedErr> {
+    // Parse args
+    let args: Vec<String> = env::args().collect();
+    if args.len() != 8 {
+        panic!(
+            "HELP: ./fake_exporter <output_dir> <port> <value_scale> <dataset> <num_labels> <num_values_per_label> <metric_type>"
+        )
+    }
+    let _output_dir: String = args[1].clone(); // no output at the moment
+    let port: u16 = args[2].parse::<u16>().expect(
+        "Args[2] must be valid port"
+    );
+    let valuescale: f64 = args[3].parse::<f64>().expect(
+        "Args[3] must be value scale as <f64>"
+    );
+    let dataset: String = args[4].clone();
+    let num_labels: usize = args[5].parse::<usize>().expect(
+        "Args[5] must be num_labels as an int"
+    );
+    let num_values_per_label: String = args[6].clone();
+    let metric_type: String = args[7].clone();
+
+    let fake_collector = Box::new(
+        FakeCollector::new(
+            valuescale, dataset, num_labels,
+            num_values_per_label, metric_type
+        )
+    );
+
+    // Register collector and start serving
+    let _ = prometheus::register(fake_collector);
+    let ip = Ipv4Addr::UNSPECIFIED;
+    let addr: SocketAddr = (ip, port).into();
+    println!("Listening on http://{}", addr);
+    let listener = TcpListener::bind(addr).await?;
+    loop {
+        let (stream, _) = listener.accept().await?;
+        let io = TokioIo::new(stream);
+
+        let service = service_fn(serve_req);
+        if let Err(err) = http1::Builder::new().serve_connection(io, service).await {
+            eprintln!("server error: {:?}", err);
+        };
+    }
+}
diff --git a/Utilities/experiments/old_experiment_scripts/old_prometheus_throughput_monitor.py b/Utilities/experiments/old_experiment_scripts/old_prometheus_throughput_monitor.py
new file mode 100644
index 0000000..eb557a3
--- /dev/null
+++ b/Utilities/experiments/old_experiment_scripts/old_prometheus_throughput_monitor.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python3
+"""
+Standalone Prometheus throughput monitoring script.
+Runs on the CloudLab host to monitor Prometheus throughput metrics.
+"""
+
+import argparse
+import json
+import os
+import time
+import requests
+from datetime import datetime
+from typing import List, Dict, Any, Optional
+
+
+# Metrics to collect for Prometheus throughput monitoring
+THROUGHPUT_METRICS = [
+    "prometheus_tsdb_samples_appended_total",
+    "prometheus_tsdb_head_samples_appended_total",
+    "prometheus_tsdb_symbol_table_size_bytes_total",
+    "prometheus_remote_storage_samples_total",
+]
+
+
+def query_prometheus_metric(api_url: str, metric_name: str) -> Optional[Dict[str, Any]]:
+    """
+    Query a single metric from Prometheus API.
+
+    Args:
+        api_url: Prometheus API base URL
+        metric_name: Name of the metric to query
+
+    Returns:
+        Dictionary with metric data or None if failed
+    """
+    try:
+        response = requests.get(
+            f"{api_url}/query", params={"query": metric_name}, timeout=10
+        )
+        response.raise_for_status()
+        data = response.json()
+
+        if data["status"] != "success":
+            print(f"Warning: Prometheus query failed for {metric_name}: {data}")
+            return None
+
+        return data["data"]
+    except Exception as e:
+        print(f"Error querying metric {metric_name}: {e}")
+        return None
+
+
+def process_metric_result(
+    metric_name: str, result_data: Optional[Dict[str, Any]], timestamp: str
+) -> List[Dict[str, Any]]:
+    """
+    Process the result from a Prometheus query into our format.
+
+    Args:
+        metric_name: Name of the metric
+        result_data: Raw result from Prometheus API
+        timestamp: ISO timestamp when the query was made
+
+    Returns:
+        List of processed metric entries
+    """
+    if result_data is None:
+        return [
+            {
+                "timestamp": timestamp,
+                "value": None,
+                "labels": {},
+                "error": "query_failed",
+            }
+        ]
+
+    processed_entries = []
+    result_list = result_data.get("result", [])
+
+    if not result_list:
+        # No data returned for this metric
+        processed_entries.append(
+            {"timestamp": timestamp, "value": None, "labels": {}, "error": "no_data"}
+        )
+    else:
+        for series in result_list:
+            metric_labels = series.get("metric", {})
+            value_data = series.get("value", [])
+
+            if len(value_data) >= 2:
+                # value_data is [timestamp, value_string]
+                try:
+                    value = float(value_data[1]) if value_data[1] != "NaN" else None
+                except (ValueError, TypeError):
+                    value = None
+            else:
+                value = None
+
+            processed_entries.append(
+                {"timestamp": timestamp, "value": value, "labels": metric_labels}
+            )
+
+    return processed_entries
+
+
+def append_metrics_to_file(metrics_data: Dict[str, Any], output_file: str) -> None:
+    """
+    Append metrics data to the output JSON file.
+
+    Args:
+        metrics_data: Dictionary containing timestamp and metrics data
+        output_file: Path to the output file
+    """
+    try:
+        # Load existing data if file exists
+        if os.path.exists(output_file):
+            with open(output_file, "r") as f:
+                data = json.load(f)
+        else:
+            data = {
+                "collection_start": metrics_data["collection_start"],
+                "prometheus_url": metrics_data["prometheus_url"],
+                "measurements": [],
+            }
+
+        # Add new measurement
+        measurement = {
+            "timestamp": metrics_data["timestamp"],
+            "metrics": metrics_data["metrics"],
+        }
+        data["measurements"].append(measurement)
+
+        # Keep measurements sorted by timestamp
+        data["measurements"].sort(key=lambda x: x["timestamp"])
+
+        # Write back to file
+        with open(output_file, "w") as f:
+            json.dump(data, f, indent=2)
+
+    except Exception as e:
+        print(f"Error appending metrics to file: {e}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Monitor Prometheus throughput metrics"
+    )
+    parser.add_argument(
+        "--prometheus_url",
+        default="http://localhost:9090",
+        help="Prometheus server URL",
+    )
+    parser.add_argument(
+        "--output_dir", required=True, help="Output directory for metrics"
+    )
+    parser.add_argument(
+        "--interval", type=int, default=1, help="Polling interval in seconds"
+    )
+
+    args = parser.parse_args()
+
+    # Ensure Prometheus URL doesn't end with /
+    prometheus_url = args.prometheus_url.rstrip("/")
+    api_url = f"{prometheus_url}/api/v1"
+
+    # Create output directory
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    # Output file for all metrics
+    output_file = os.path.join(args.output_dir, "prometheus_throughput_metrics.json")
+
+    collection_start = datetime.now().isoformat()
+    print("Starting Prometheus throughput monitoring")
+    print(f"Prometheus URL: {prometheus_url}")
+    print(f"Output file: {output_file}")
+    print(f"Metrics to collect: {THROUGHPUT_METRICS}")
+
+    try:
+        while True:
+            timestamp = datetime.now().isoformat()
+
+            # Collect all metrics for this timestamp
+            metrics_data = {}
+
+            for metric_name in THROUGHPUT_METRICS:
+                result_data = query_prometheus_metric(api_url, metric_name)
+                processed_entries = process_metric_result(
+                    metric_name, result_data, timestamp
+                )
+                metrics_data[metric_name] = processed_entries
+
+            # Package the data for storage
+            measurement_data = {
+                "collection_start": collection_start,
+                "prometheus_url": prometheus_url,
+                "timestamp": timestamp,
+                "metrics": metrics_data,
+            }
+
+            # Append to file
+            append_metrics_to_file(measurement_data, output_file)
+
+            # Wait for next iteration
+            time.sleep(args.interval)
+
+    except KeyboardInterrupt:
+        print("Monitoring stopped")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/old_experiment_scripts/prometheus_config/recording_rules/blackbox-exporter.yml b/Utilities/experiments/old_experiment_scripts/prometheus_config/recording_rules/blackbox-exporter.yml
new file mode 100644
index 0000000..3f90436
--- /dev/null
+++ b/Utilities/experiments/old_experiment_scripts/prometheus_config/recording_rules/blackbox-exporter.yml
@@ -0,0 +1,86 @@
+groups:
+
+- name: BlackboxExporter
+
+  rules:
+
+    - alert: BlackboxProbeFailed
+      expr: 'probe_success == 0'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Blackbox probe failed (instance {{ $labels.instance }})
+        description: "Probe failed\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxConfigurationReloadFailure
+      expr: 'blackbox_exporter_config_last_reload_successful != 1'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Blackbox configuration reload failure (instance {{ $labels.instance }})
+        description: "Blackbox configuration reload failure\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxSlowProbe
+      expr: 'avg_over_time(probe_duration_seconds[1m]) > 1'
+      for: 1m
+      labels:
+        severity: warning
+      annotations:
+        summary: Blackbox slow probe (instance {{ $labels.instance }})
+        description: "Blackbox probe took more than 1s to complete\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxProbeHttpFailure
+      expr: 'probe_http_status_code <= 199 OR probe_http_status_code >= 400'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Blackbox probe HTTP failure (instance {{ $labels.instance }})
+        description: "HTTP status code is not 200-399\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxSslCertificateWillExpireSoon
+      expr: '3 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 20'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
+        description: "SSL certificate expires in less than 20 days\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxSslCertificateWillExpireSoon
+      expr: '0 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 3'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
+        description: "SSL certificate expires in less than 3 days\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxSslCertificateExpired
+      expr: 'round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 0'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Blackbox SSL certificate expired (instance {{ $labels.instance }})
+        description: "SSL certificate has expired already\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxProbeSlowHttp
+      expr: 'avg_over_time(probe_http_duration_seconds[1m]) > 1'
+      for: 1m
+      labels:
+        severity: warning
+      annotations:
+        summary: Blackbox probe slow HTTP (instance {{ $labels.instance }})
+        description: "HTTP request took more than 1s\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxProbeSlowPing
+      expr: 'avg_over_time(probe_icmp_duration_seconds[1m]) > 1'
+      for: 1m
+      labels:
+        severity: warning
+      annotations:
+        summary: Blackbox probe slow ping (instance {{ $labels.instance }})
+        description: "Blackbox ping took more than 1s\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
diff --git a/Utilities/experiments/old_experiment_scripts/prometheus_config/recording_rules/custom_recording_rules.yml b/Utilities/experiments/old_experiment_scripts/prometheus_config/recording_rules/custom_recording_rules.yml
new file mode 100644
index 0000000..2a40152
--- /dev/null
+++ b/Utilities/experiments/old_experiment_scripts/prometheus_config/recording_rules/custom_recording_rules.yml
@@ -0,0 +1,15 @@
+groups:
+- name: RecordingRules
+  interval: 5s  # Evaluation interval set to 5 seconds
+  rules:
+    - record: node_top_3_cpu_usage_60s
+      expr: |
+        #topk(3, sum by (instance, mode) (sum_over_time(node_cpu_seconds_total{mode!="idle"}[60s])))
+        topk(3, sum by (instance, mode) (sum_over_time(node_cpu_seconds_total[60s])))
+      labels:
+        metric_type: "high_cpu_instances_modes"
+    #- record: node_top_3_cpu_usage_30s
+    #  expr: |
+    #    topk(3, sum by(instance) (sum_over_time(node_cpu_seconds_total[60s])))
+    #  labels:
+    #    metric_type: "high_cpu_instances"
diff --git a/Utilities/experiments/old_experiment_scripts/prometheus_config/recording_rules/google-cadvisor.yml b/Utilities/experiments/old_experiment_scripts/prometheus_config/recording_rules/google-cadvisor.yml
new file mode 100644
index 0000000..cfbc333
--- /dev/null
+++ b/Utilities/experiments/old_experiment_scripts/prometheus_config/recording_rules/google-cadvisor.yml
@@ -0,0 +1,77 @@
+groups:
+
+- name: GoogleCadvisor
+
+  rules:
+
+    - alert: ContainerKilled
+      expr: 'time() - container_last_seen > 60'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Container killed (instance {{ $labels.instance }})
+        description: "A container has disappeared\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ContainerAbsent
+      expr: 'absent(container_last_seen)'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: Container absent (instance {{ $labels.instance }})
+        description: "A container is absent for 5 min\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ContainerHighCpuUtilization
+      expr: '(sum(rate(container_cpu_usage_seconds_total{container!=""}[5m])) by (pod, container) / sum(container_spec_cpu_quota{container!=""}/container_spec_cpu_period{container!=""}) by (pod, container) * 100) > 80'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Container High CPU utilization (instance {{ $labels.instance }})
+        description: "Container CPU utilization is above 80%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ContainerHighMemoryUsage
+      expr: '(sum(container_memory_working_set_bytes{name!=""}) BY (instance, name) / sum(container_spec_memory_limit_bytes > 0) BY (instance, name) * 100) > 80'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Container High Memory usage (instance {{ $labels.instance }})
+        description: "Container Memory usage is above 80%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ContainerVolumeUsage
+      expr: '(1 - (sum(container_fs_inodes_free{name!=""}) BY (instance) / sum(container_fs_inodes_total) BY (instance))) * 100 > 80'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Container Volume usage (instance {{ $labels.instance }})
+        description: "Container Volume usage is above 80%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ContainerHighThrottleRate
+      expr: 'sum(increase(container_cpu_cfs_throttled_periods_total{container!=""}[5m])) by (container, pod, namespace) / sum(increase(container_cpu_cfs_periods_total[5m])) by (container, pod, namespace) > ( 25 / 100 )'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: Container high throttle rate (instance {{ $labels.instance }})
+        description: "Container is being throttled\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ContainerLowCpuUtilization
+      expr: '(sum(rate(container_cpu_usage_seconds_total{container!=""}[5m])) by (pod, container) / sum(container_spec_cpu_quota{container!=""}/container_spec_cpu_period{container!=""}) by (pod, container) * 100) < 20'
+      for: 7d
+      labels:
+        severity: info
+      annotations:
+        summary: Container Low CPU utilization (instance {{ $labels.instance }})
+        description: "Container CPU utilization is under 20% for 1 week. Consider reducing the allocated CPU.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ContainerLowMemoryUsage
+      expr: '(sum(container_memory_working_set_bytes{name!=""}) BY (instance, name) / sum(container_spec_memory_limit_bytes > 0) BY (instance, name) * 100) < 20'
+      for: 7d
+      labels:
+        severity: info
+      annotations:
+        summary: Container Low Memory usage (instance {{ $labels.instance }})
+        description: "Container Memory usage is under 20% for 1 week. Consider reducing the allocated memory.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
diff --git a/Utilities/experiments/old_experiment_scripts/prometheus_config/recording_rules/node-exporter.yml b/Utilities/experiments/old_experiment_scripts/prometheus_config/recording_rules/node-exporter.yml
new file mode 100644
index 0000000..6a465d9
--- /dev/null
+++ b/Utilities/experiments/old_experiment_scripts/prometheus_config/recording_rules/node-exporter.yml
@@ -0,0 +1,350 @@
+groups:
+
+- name: NodeExporter
+
+  rules:
+
+    - alert: HostOutOfMemory
+      expr: '(node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host out of memory (instance {{ $labels.instance }})
+        description: "Node memory is filling up (< 10% left)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostMemoryUnderMemoryPressure
+      expr: '(rate(node_vmstat_pgmajfault[1m]) > 1000) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host memory under memory pressure (instance {{ $labels.instance }})
+        description: "The node is under heavy memory pressure. High rate of major page faults\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostMemoryIsUnderutilized
+      expr: '(100 - (avg_over_time(node_memory_MemAvailable_bytes[30m]) / node_memory_MemTotal_bytes * 100) < 20) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 1w
+      labels:
+        severity: info
+      annotations:
+        summary: Host Memory is underutilized (instance {{ $labels.instance }})
+        description: "Node memory is < 20% for 1 week. Consider reducing memory space. (instance {{ $labels.instance }})\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostUnusualNetworkThroughputIn
+      expr: '(sum by (instance) (rate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host unusual network throughput in (instance {{ $labels.instance }})
+        description: "Host network interfaces are probably receiving too much data (> 100 MB/s)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostUnusualNetworkThroughputOut
+      expr: '(sum by (instance) (rate(node_network_transmit_bytes_total[2m])) / 1024 / 1024 > 100) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host unusual network throughput out (instance {{ $labels.instance }})
+        description: "Host network interfaces are probably sending too much data (> 100 MB/s)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostUnusualDiskReadRate
+      expr: '(sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host unusual disk read rate (instance {{ $labels.instance }})
+        description: "Disk is probably reading too much data (> 50 MB/s)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostUnusualDiskWriteRate
+      expr: '(sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host unusual disk write rate (instance {{ $labels.instance }})
+        description: "Disk is probably writing too much data (> 50 MB/s)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostOutOfDiskSpace
+      expr: '((node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host out of disk space (instance {{ $labels.instance }})
+        description: "Disk is almost full (< 10% left)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostDiskWillFillIn24Hours
+      expr: '((node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) predict_linear(node_filesystem_avail_bytes{fstype!~"tmpfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly == 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host disk will fill in 24 hours (instance {{ $labels.instance }})
+        description: "Filesystem is predicted to run out of space within the next 24 hours at current write rate\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostOutOfInodes
+      expr: '(node_filesystem_files_free{fstype!="msdosfs"} / node_filesystem_files{fstype!="msdosfs"} * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host out of inodes (instance {{ $labels.instance }})
+        description: "Disk is almost running out of available inodes (< 10% left)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostFilesystemDeviceError
+      expr: 'node_filesystem_device_error == 1'
+      for: 2m
+      labels:
+        severity: critical
+      annotations:
+        summary: Host filesystem device error (instance {{ $labels.instance }})
+        description: "{{ $labels.instance }}: Device error with the {{ $labels.mountpoint }} filesystem\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostInodesWillFillIn24Hours
+      expr: '(node_filesystem_files_free{fstype!="msdosfs"} / node_filesystem_files{fstype!="msdosfs"} * 100 < 10 and predict_linear(node_filesystem_files_free{fstype!="msdosfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly{fstype!="msdosfs"} == 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host inodes will fill in 24 hours (instance {{ $labels.instance }})
+        description: "Filesystem is predicted to run out of inodes within the next 24 hours at current write rate\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostUnusualDiskReadLatency
+      expr: '(rate(node_disk_read_time_seconds_total[1m]) / rate(node_disk_reads_completed_total[1m]) > 0.1 and rate(node_disk_reads_completed_total[1m]) > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host unusual disk read latency (instance {{ $labels.instance }})
+        description: "Disk latency is growing (read operations > 100ms)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostUnusualDiskWriteLatency
+      expr: '(rate(node_disk_write_time_seconds_total[1m]) / rate(node_disk_writes_completed_total[1m]) > 0.1 and rate(node_disk_writes_completed_total[1m]) > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host unusual disk write latency (instance {{ $labels.instance }})
+        description: "Disk latency is growing (write operations > 100ms)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostHighCpuLoad
+      expr: '(sum by (instance) (avg by (mode, instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m]))) > 0.8) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 10m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host high CPU load (instance {{ $labels.instance }})
+        description: "CPU load is > 80%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostCpuIsUnderutilized
+      expr: '(100 - (rate(node_cpu_seconds_total{mode="idle"}[30m]) * 100) < 20) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 1w
+      labels:
+        severity: info
+      annotations:
+        summary: Host CPU is underutilized (instance {{ $labels.instance }})
+        description: "CPU load is < 20% for 1 week. Consider reducing the number of CPUs.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostCpuStealNoisyNeighbor
+      expr: '(avg by(instance) (rate(node_cpu_seconds_total{mode="steal"}[5m])) * 100 > 10) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host CPU steal noisy neighbor (instance {{ $labels.instance }})
+        description: "CPU steal is > 10%. A noisy neighbor is killing VM performances or a spot instance may be out of credit.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostCpuHighIowait
+      expr: '(avg by (instance) (rate(node_cpu_seconds_total{mode="iowait"}[5m])) * 100 > 10) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host CPU high iowait (instance {{ $labels.instance }})
+        description: "CPU iowait > 10%. A high iowait means that you are disk or network bound.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostUnusualDiskIo
+      expr: '(rate(node_disk_io_time_seconds_total[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host unusual disk IO (instance {{ $labels.instance }})
+        description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostContextSwitchingHigh
+      expr: '(rate(node_context_switches_total[15m])/count without(mode,cpu) (node_cpu_seconds_total{mode="idle"}))
+/
+(rate(node_context_switches_total[1d])/count without(mode,cpu) (node_cpu_seconds_total{mode="idle"})) > 2
+'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host context switching high (instance {{ $labels.instance }})
+        description: "Context switching is growing on the node (twice the daily average during the last 15m)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostSwapIsFillingUp
+      expr: '((1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host swap is filling up (instance {{ $labels.instance }})
+        description: "Swap is filling up (>80%)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostSystemdServiceCrashed
+      expr: '(node_systemd_unit_state{state="failed"} == 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host systemd service crashed (instance {{ $labels.instance }})
+        description: "systemd service crashed\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostPhysicalComponentTooHot
+      expr: '((node_hwmon_temp_celsius * ignoring(label) group_left(instance, job, node, sensor) node_hwmon_sensor_label{label!="tctl"} > 75)) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host physical component too hot (instance {{ $labels.instance }})
+        description: "Physical hardware component too hot\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostNodeOvertemperatureAlarm
+      expr: '((node_hwmon_temp_crit_alarm_celsius == 1) or (node_hwmon_temp_alarm == 1)) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Host node overtemperature alarm (instance {{ $labels.instance }})
+        description: "Physical node temperature alarm triggered\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostRaidArrayGotInactive
+      expr: '(node_md_state{state="inactive"} > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Host RAID array got inactive (instance {{ $labels.instance }})
+        description: "RAID array {{ $labels.device }} is in a degraded state due to one or more disk failures. The number of spare drives is insufficient to fix the issue automatically.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostRaidDiskFailure
+      expr: '(node_md_disks{state="failed"} > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host RAID disk failure (instance {{ $labels.instance }})
+        description: "At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostKernelVersionDeviations
+      expr: '(count(sum(label_replace(node_uname_info, "kernel", "$1", "release", "([0-9]+.[0-9]+.[0-9]+).*")) by (kernel)) > 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 6h
+      labels:
+        severity: warning
+      annotations:
+        summary: Host kernel version deviations (instance {{ $labels.instance }})
+        description: "Different kernel versions are running\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostOomKillDetected
+      expr: '(increase(node_vmstat_oom_kill[1m]) > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host OOM kill detected (instance {{ $labels.instance }})
+        description: "OOM kill detected\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostEdacCorrectableErrorsDetected
+      expr: '(increase(node_edac_correctable_errors_total[1m]) > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 0m
+      labels:
+        severity: info
+      annotations:
+        summary: Host EDAC Correctable Errors detected (instance {{ $labels.instance }})
+        description: "Host {{ $labels.instance }} has had {{ printf \"%.0f\" $value }} correctable memory errors reported by EDAC in the last 5 minutes.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostEdacUncorrectableErrorsDetected
+      expr: '(node_edac_uncorrectable_errors_total > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }})
+        description: "Host {{ $labels.instance }} has had {{ printf \"%.0f\" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostNetworkReceiveErrors
+      expr: '(rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host Network Receive Errors (instance {{ $labels.instance }})
+        description: "Host {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} receive errors in the last two minutes.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostNetworkTransmitErrors
+      expr: '(rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host Network Transmit Errors (instance {{ $labels.instance }})
+        description: "Host {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} transmit errors in the last two minutes.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostNetworkInterfaceSaturated
+      expr: '((rate(node_network_receive_bytes_total{device!~"^tap.*|^vnet.*|^veth.*|^tun.*"}[1m]) + rate(node_network_transmit_bytes_total{device!~"^tap.*|^vnet.*|^veth.*|^tun.*"}[1m])) / node_network_speed_bytes{device!~"^tap.*|^vnet.*|^veth.*|^tun.*"} > 0.8 < 10000) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 1m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host Network Interface Saturated (instance {{ $labels.instance }})
+        description: "The network interface \"{{ $labels.device }}\" on \"{{ $labels.instance }}\" is getting overloaded.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostNetworkBondDegraded
+      expr: '((node_bonding_active - node_bonding_slaves) != 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host Network Bond Degraded (instance {{ $labels.instance }})
+        description: "Bond \"{{ $labels.device }}\" degraded on \"{{ $labels.instance }}\".\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostConntrackLimit
+      expr: '(node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 0.8) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host conntrack limit (instance {{ $labels.instance }})
+        description: "The number of conntrack is approaching limit\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostClockSkew
+      expr: '((node_timex_offset_seconds > 0.05 and deriv(node_timex_offset_seconds[5m]) >= 0) or (node_timex_offset_seconds < -0.05 and deriv(node_timex_offset_seconds[5m]) <= 0)) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 10m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host clock skew (instance {{ $labels.instance }})
+        description: "Clock skew detected. Clock is out of sync. Ensure NTP is configured correctly on this host.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostClockNotSynchronising
+      expr: '(min_over_time(node_timex_sync_status[1m]) == 0 and node_timex_maxerror_seconds >= 16) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host clock not synchronising (instance {{ $labels.instance }})
+        description: "Clock not synchronising. Ensure NTP is configured on this host.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostRequiresReboot
+      expr: '(node_reboot_required > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 4h
+      labels:
+        severity: info
+      annotations:
+        summary: Host requires reboot (instance {{ $labels.instance }})
+        description: "{{ $labels.instance }} requires a reboot.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
diff --git a/Utilities/experiments/old_experiment_scripts/prometheus_config/template.prometheus.yml b/Utilities/experiments/old_experiment_scripts/prometheus_config/template.prometheus.yml
new file mode 100644
index 0000000..00ca2b9
--- /dev/null
+++ b/Utilities/experiments/old_experiment_scripts/prometheus_config/template.prometheus.yml
@@ -0,0 +1,35 @@
+# my global config
+global:
+  scrape_interval: 1s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
+  evaluation_interval: 1s # Evaluate rules every 15 seconds. The default is every 1 minute.
+  #query_log_file: /scratch/sketch_db_for_prometheus/prometheus/queries.log
+  # scrape_timeout is set to the global default (10s).
+
+# Alertmanager configuration
+alerting:
+  alertmanagers:
+    - static_configs:
+        - targets:
+          # - alertmanager:9093
+
+# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
+#rule_files:
+#   - "blackbox-exporter.yml"
+#   - "node-exporter.yml"
+#   - "google-cadvisor.yml"
+
+# A scrape configuration containing exactly one endpoint to scrape:
+# Here it's Prometheus itself.
+#scrape_configs:
+#  - job_name: "prometheus"
+#    static_configs:
+#      - targets: ["localhost:9090"]
+#  - job_name: "node_exporter"
+#    static_configs:
+#      - targets: ["localhost:9100"]
+#  - job_name: "blackbox_exporter"
+#    static_configs:
+#      - targets: ["localhost:9115"]
+#  - job_name: "cadvisor"
+#    static_configs:
+#      - targets: ["localhost:8082"]
diff --git a/Utilities/experiments/old_experiment_scripts/setup.py b/Utilities/experiments/old_experiment_scripts/setup.py
new file mode 100644
index 0000000..9bd2532
--- /dev/null
+++ b/Utilities/experiments/old_experiment_scripts/setup.py
@@ -0,0 +1,62 @@
+import argparse
+
+import utils
+import constants
+
+
+def setup_dependencies():
+    cmd_dir = f"{constants.CLOUDLAB_HOME_DIR}/cloudlab_scripts"
+    cmd = (
+        "./setup_dependencies.sh; sudo apt-get update; sudo apt-get install -y python3-pip; sudo pip3 install humanize numpy; sudo usermod -aG docker "
+        + args.cloudlab_username
+    )
+    return cmd, cmd_dir
+
+
+def setup_exporters():
+    cmd_dir = f"{constants.CLOUDLAB_HOME_DIR}/cloudlab_scripts"
+    cmd = f"./setup_exporters.sh {constants.CLOUDLAB_HOME_DIR}"
+    return cmd, cmd_dir
+
+
+def setup_benchmarks():
+    cmd_dir = f"{constants.CLOUDLAB_HOME_DIR}/cloudlab_scripts"
+    cmd = f"./setup_benchmarks.sh {constants.CLOUDLAB_HOME_DIR}"
+    return cmd, cmd_dir
+
+
+def setup_prometheus():
+    cmd_dir = f"{constants.CLOUDLAB_HOME_DIR}/cloudlab_scripts"
+    cmd = f"./setup_prometheus.sh {constants.CLOUDLAB_HOME_DIR}"
+    return cmd, cmd_dir
+
+
+def main(args):
+    # TODO make this parallel
+    for node_idx in range(args.num_nodes + 1):
+        # local_ip = f"10.10.1.{node_idx + 1}"
+        setup_functions = [setup_dependencies, setup_benchmarks, setup_exporters]
+
+        if node_idx == 0:
+            setup_functions.append(setup_prometheus)
+
+        for setup_function in setup_functions:
+            cmd, cmd_dir = setup_function()
+            utils.run_on_cloudlab_node(
+                node_idx,
+                args.cloudlab_username,
+                args.hostname_suffix,
+                cmd,
+                cmd_dir,
+                nohup=False,
+                popen=False,
+            )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--num_nodes", type=int, required=True)
+    parser.add_argument("--cloudlab_username", type=str, required=True)
+    parser.add_argument("--hostname_suffix", type=str, required=True)
+    args = parser.parse_args()
+    main(args)
diff --git a/Utilities/experiments/old_experiment_scripts/vertical_scalability_grid_search.py b/Utilities/experiments/old_experiment_scripts/vertical_scalability_grid_search.py
new file mode 100644
index 0000000..17eb366
--- /dev/null
+++ b/Utilities/experiments/old_experiment_scripts/vertical_scalability_grid_search.py
@@ -0,0 +1,380 @@
+#!/usr/bin/env python3
+"""
+Vertical Scalability Grid Search for Prometheus and VictoriaMetrics
+
+This script runs a comprehensive grid search over different CPU and memory
+configurations to test the vertical scalability of Prometheus and VictoriaMetrics.
+It uses Docker resource constraints and the existing experiment infrastructure.
+
+Usage:
+    python vertical_scalability_grid_search.py \\
+        --base-config vertical_scalability_test \\
+        --output-dir /path/to/results \\
+        --tools prometheus,victoriametrics \\
+        --cpu-configs 1,2,4,8 \\
+        --memory-configs 1g,2g,4g,8g
+"""
+
+import argparse
+import os
+import subprocess
+import time
+from typing import List, Dict, Any
+import itertools
+import json
+from datetime import datetime
+
+# Default configurations
+DEFAULT_CPU_CONFIGS = [1.0, 2.0, 4.0, 8.0]
+DEFAULT_MEMORY_CONFIGS = ["1g", "2g", "4g", "8g"]
+DEFAULT_TOOLS = ["prometheus", "victoriametrics"]
+
+
+def run_single_experiment(
+    base_config: str,
+    tool: str,
+    cpu_limit: float,
+    memory_limit: str,
+    output_dir: str,
+    baseline: bool = False,
+) -> Dict[str, Any]:
+    """
+    Run a single vertical scalability experiment.
+
+    Args:
+        base_config: Base configuration name (e.g., "vertical_scalability_test")
+        tool: Tool to test ("prometheus" or "victoriametrics")
+        cpu_limit: CPU limit (e.g., 4.0)
+        memory_limit: Memory limit (e.g., "8g")
+        output_dir: Output directory for results
+        baseline: Whether this is a baseline test (no resource limits)
+
+    Returns:
+        Dictionary with experiment metadata and results path
+    """
+    # Generate experiment name
+    if baseline:
+        experiment_name = f"baseline_{tool}"
+    else:
+        experiment_name = f"scalability_{tool}_{cpu_limit}cpu_{memory_limit}"
+
+    print(f"Starting experiment: {experiment_name}")
+
+    # Prepare command arguments
+    cmd = [
+        "python",
+        "experiment_run_e2e.py",
+        f"experiment_name={experiment_name}",
+        f"experiment_params={base_config}",
+    ]
+
+    # Add Docker resource configuration (only if not baseline)
+    if not baseline:
+        cmd.extend(
+            [
+                f"experiment_params.docker_resources.tool={tool}",
+                f"experiment_params.docker_resources.cpu_limit={cpu_limit}",
+                f"experiment_params.docker_resources.memory_limit={memory_limit}",
+            ]
+        )
+
+    # Override tool in experiment mode if needed
+    cmd.append(f"experiment_params.docker_resources.tool={tool}")
+
+    # Record start time
+    start_time = datetime.now()
+
+    try:
+        # Run experiment
+        print(f"Running command: {' '.join(cmd)}")
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=3600,  # 1 hour timeout
+        )
+
+        end_time = datetime.now()
+        duration = (end_time - start_time).total_seconds()
+
+        # Prepare experiment metadata
+        experiment_metadata = {
+            "experiment_name": experiment_name,
+            "tool": tool,
+            "cpu_limit": cpu_limit if not baseline else None,
+            "memory_limit": memory_limit if not baseline else None,
+            "baseline": baseline,
+            "start_time": start_time.isoformat(),
+            "end_time": end_time.isoformat(),
+            "duration_seconds": duration,
+            "return_code": result.returncode,
+            "success": result.returncode == 0,
+            "command": " ".join(cmd),
+        }
+
+        # Log stdout and stderr
+        if result.stdout:
+            experiment_metadata["stdout"] = result.stdout
+        if result.stderr:
+            experiment_metadata["stderr"] = result.stderr
+
+        if result.returncode == 0:
+            print(
+                f"✅ Experiment {experiment_name} completed successfully in {duration:.1f}s"
+            )
+        else:
+            print(
+                f"❌ Experiment {experiment_name} failed with return code {result.returncode}"
+            )
+            if result.stderr:
+                print(f"Error output: {result.stderr[:500]}")
+
+        return experiment_metadata
+
+    except subprocess.TimeoutExpired:
+        end_time = datetime.now()
+        duration = (end_time - start_time).total_seconds()
+
+        print(f"⏰ Experiment {experiment_name} timed out after {duration:.1f}s")
+
+        return {
+            "experiment_name": experiment_name,
+            "tool": tool,
+            "cpu_limit": cpu_limit if not baseline else None,
+            "memory_limit": memory_limit if not baseline else None,
+            "baseline": baseline,
+            "start_time": start_time.isoformat(),
+            "end_time": end_time.isoformat(),
+            "duration_seconds": duration,
+            "return_code": -1,
+            "success": False,
+            "error": "timeout",
+            "command": " ".join(cmd),
+        }
+
+    except Exception as e:
+        end_time = datetime.now()
+        duration = (end_time - start_time).total_seconds()
+
+        print(f"💥 Experiment {experiment_name} failed with exception: {e}")
+
+        return {
+            "experiment_name": experiment_name,
+            "tool": tool,
+            "cpu_limit": cpu_limit if not baseline else None,
+            "memory_limit": memory_limit if not baseline else None,
+            "baseline": baseline,
+            "start_time": start_time.isoformat(),
+            "end_time": end_time.isoformat(),
+            "duration_seconds": duration,
+            "return_code": -2,
+            "success": False,
+            "error": str(e),
+            "command": " ".join(cmd),
+        }
+
+
+def generate_experiment_plan(
+    tools: List[str],
+    cpu_configs: List[float],
+    memory_configs: List[str],
+    include_baseline: bool = True,
+) -> List[Dict[str, Any]]:
+    """
+    Generate a list of experiments to run.
+
+    Args:
+        tools: List of tools to test
+        cpu_configs: List of CPU configurations
+        memory_configs: List of memory configurations
+        include_baseline: Whether to include baseline tests
+
+    Returns:
+        List of experiment configurations
+    """
+    experiments = []
+
+    # Add baseline experiments (no resource limits)
+    if include_baseline:
+        for tool in tools:
+            experiments.append(
+                {
+                    "tool": tool,
+                    "cpu_limit": None,
+                    "memory_limit": None,
+                    "baseline": True,
+                }
+            )
+
+    # Add resource-constrained experiments
+    for tool, cpu_limit, memory_limit in itertools.product(
+        tools, cpu_configs, memory_configs
+    ):
+        experiments.append(
+            {
+                "tool": tool,
+                "cpu_limit": cpu_limit,
+                "memory_limit": memory_limit,
+                "baseline": False,
+            }
+        )
+
+    return experiments
+
+
+def save_results_summary(
+    experiments_results: List[Dict[str, Any]], output_dir: str
+) -> None:
+    """
+    Save a summary of all experiment results.
+
+    Args:
+        experiments_results: List of experiment result dictionaries
+        output_dir: Output directory for summary
+    """
+    summary_file = os.path.join(output_dir, "grid_search_summary.json")
+
+    # Overall summary statistics
+    total_experiments = len(experiments_results)
+    successful_experiments = sum(1 for exp in experiments_results if exp["success"])
+    failed_experiments = total_experiments - successful_experiments
+
+    total_duration = sum(exp.get("duration_seconds", 0) for exp in experiments_results)
+
+    summary = {
+        "grid_search_metadata": {
+            "total_experiments": total_experiments,
+            "successful_experiments": successful_experiments,
+            "failed_experiments": failed_experiments,
+            "success_rate": (
+                successful_experiments / total_experiments
+                if total_experiments > 0
+                else 0
+            ),
+            "total_duration_seconds": total_duration,
+            "total_duration_hours": total_duration / 3600,
+            "generated_at": datetime.now().isoformat(),
+        },
+        "experiments": experiments_results,
+    }
+
+    # Save to file
+    with open(summary_file, "w") as f:
+        json.dump(summary, f, indent=2)
+
+    print("\n📊 Grid Search Summary:")
+    print(f"   Total experiments: {total_experiments}")
+    print(f"   Successful: {successful_experiments}")
+    print(f"   Failed: {failed_experiments}")
+    print(f"   Success rate: {successful_experiments/total_experiments*100:.1f}%")
+    print(f"   Total duration: {total_duration/3600:.1f} hours")
+    print(f"   Results saved to: {summary_file}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Run vertical scalability grid search for Prometheus and VictoriaMetrics"
+    )
+    parser.add_argument(
+        "--base-config",
+        default="vertical_scalability_test",
+        help="Base configuration template name",
+    )
+    parser.add_argument(
+        "--output-dir", required=True, help="Output directory for grid search results"
+    )
+    parser.add_argument(
+        "--tools",
+        default="prometheus,victoriametrics",
+        help="Comma-separated list of tools to test",
+    )
+    parser.add_argument(
+        "--cpu-configs",
+        default="1,2,4,8",
+        help="Comma-separated list of CPU configurations (vCPUs)",
+    )
+    parser.add_argument(
+        "--memory-configs",
+        default="1g,2g,4g,8g",
+        help="Comma-separated list of memory configurations",
+    )
+    parser.add_argument(
+        "--no-baseline",
+        action="store_true",
+        help="Skip baseline tests (no resource limits)",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print experiment plan without running experiments",
+    )
+
+    args = parser.parse_args()
+
+    # Parse configurations
+    tools = [tool.strip() for tool in args.tools.split(",")]
+    cpu_configs = [float(cpu.strip()) for cpu in args.cpu_configs.split(",")]
+    memory_configs = [mem.strip() for mem in args.memory_configs.split(",")]
+
+    # Create output directory
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    # Generate experiment plan
+    experiments = generate_experiment_plan(
+        tools=tools,
+        cpu_configs=cpu_configs,
+        memory_configs=memory_configs,
+        include_baseline=not args.no_baseline,
+    )
+
+    print("🧪 Vertical Scalability Grid Search")
+    print(f"   Tools: {tools}")
+    print(f"   CPU configs: {cpu_configs}")
+    print(f"   Memory configs: {memory_configs}")
+    print(f"   Total experiments: {len(experiments)}")
+    print(f"   Output directory: {args.output_dir}")
+
+    if args.dry_run:
+        print("\n📋 Experiment Plan (Dry Run):")
+        for i, exp in enumerate(experiments, 1):
+            if exp["baseline"]:
+                print(f"   {i:2d}. Baseline {exp['tool']}")
+            else:
+                print(
+                    f"   {i:2d}. {exp['tool']} - {exp['cpu_limit']} CPU, {exp['memory_limit']} memory"
+                )
+        return
+
+    # Run experiments
+    print("\n🚀 Starting grid search...")
+    start_time = time.time()
+
+    results = []
+    for i, exp in enumerate(experiments, 1):
+        print(f"\n--- Experiment {i}/{len(experiments)} ---")
+
+        result = run_single_experiment(
+            base_config=args.base_config,
+            tool=exp["tool"],
+            cpu_limit=exp.get("cpu_limit"),
+            memory_limit=exp.get("memory_limit"),
+            output_dir=args.output_dir,
+            baseline=exp["baseline"],
+        )
+
+        results.append(result)
+
+        # Add a small delay between experiments
+        time.sleep(5)
+
+    end_time = time.time()
+    total_duration = end_time - start_time
+
+    print(f"\n🏁 Grid search completed in {total_duration/3600:.1f} hours")
+
+    # Save results summary
+    save_results_summary(results, args.output_dir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/post_experiment/README_plot_cardinality_vs_benefit.md b/Utilities/experiments/post_experiment/README_plot_cardinality_vs_benefit.md
new file mode 100644
index 0000000..34dd6c4
--- /dev/null
+++ b/Utilities/experiments/post_experiment/README_plot_cardinality_vs_benefit.md
@@ -0,0 +1,92 @@
+# plot_cardinality_vs_benefit.py
+
+Plots latency benefit vs lookback period with one line per data scale (cardinality).
+
+## Overview
+
+Analyzes experiments following the naming pattern: `<query_type>_<lookback>_1_card_2_<exp>`
+
+Example: `qot_30m_1_card_2_5` = quantile_over_time, 30m lookback, cardinality 2^5
+
+**Plot output:**
+- **X-axis**: Lookback period (log₂ scale, base=15m) - equally spaced points
+- **Y-axis**: Latency benefit ratio (prometheus/sketchdb)
+- **Lines**: One per data scale (2^0 through 2^9)
+
+## X-axis Transformation
+
+Uses log₂(T/15) so lookback periods are equally spaced:
+```
+1m   → -3.91
+15m  →  0.00
+30m  →  1.00
+60m  →  2.00
+120m →  3.00
+```
+
+## Usage
+
+```bash
+# Print summary table
+python3 plot_cardinality_vs_benefit.py "qot_*_1_card_2_*" --print
+
+# Generate and save plot
+python3 plot_cardinality_vs_benefit.py "qot_*_1_card_2_*" --plot --save output.png
+
+# Both print and plot
+python3 plot_cardinality_vs_benefit.py "qot_*_1_card_2_*" --print --plot --save output.png
+
+# Filter specific cardinalities (cleaner plot)
+python3 plot_cardinality_vs_benefit.py "qot_*_1_card_2_*" \
+  --cardinalities 0 3 5 7 9 \
+  --plot --save sparse.png
+
+# Different metric (default: p95)
+python3 plot_cardinality_vs_benefit.py "qot_*_1_card_2_*" \
+  --metric median \
+  --print
+
+# Multiple patterns
+python3 plot_cardinality_vs_benefit.py "qot_15m_*" "qot_30m_*" \
+  --plot --save subset.png
+```
+
+## Options
+
+| Flag | Description |
+|------|-------------|
+| `patterns` | Glob patterns for experiment names (positional, required) |
+| `--metric` | Latency metric: `median`, `p95`, `p99`, `mean`, `sum` (default: `p95`) |
+| `--cardinalities` | Filter to specific cardinality exponents (e.g., `0 2 4 6 8`) |
+| `--print` | Print summary table |
+| `--plot` | Generate plot (requires `--save` or `--show`) |
+| `--save FILE` | Save plot to file |
+| `--show` | Display plot |
+
+## Requirements
+
+- Experiments must follow naming pattern: `<query_type>_<lookback>_1_card_2_<exp>`
+- Each experiment must have both `prometheus/` and `sketchdb/` subdirectories
+- Lookback format: `1m`, `15m`, `30m`, `60m`, `120m`, etc.
+
+## Output Example
+
+**Summary Table:**
+```
+Query Type: qot
+----------------------------------------------------------------------------------------------------
+lookback_str   1m  15m   30m   60m  120m
+2^0 (1)      1.43 1.56  1.81  1.88  2.29
+2^5 (32)     1.33 3.47  5.04  9.56 16.16
+2^9 (512)    1.44 6.34 11.32 21.78 35.75
+```
+
+**Plot:** One line per cardinality showing how benefit changes with lookback period.
+
+## Dependencies
+
+- plotnine
+- pandas
+- numpy
+- PyYAML
+- Local modules: `constants`, `results_loader`, `compare_latencies`
diff --git a/Utilities/experiments/post_experiment/analyze_latencies.py b/Utilities/experiments/post_experiment/analyze_latencies.py
new file mode 100644
index 0000000..5f6f8d0
--- /dev/null
+++ b/Utilities/experiments/post_experiment/analyze_latencies.py
@@ -0,0 +1,172 @@
+import os
+import sys
+import yaml
+import argparse
+import numpy as np
+
+from typing import List, Dict, Any
+
+from promql_utilities.query_results.classes import LatencyResultAcrossTime
+
+# TODO: make this more robust
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import constants  # noqa: E402
+
+
+def calculate_latency_stats(latencies: List[float]) -> Dict[str, float]:
+    """Calculate latency statistics for a list of latencies."""
+    if not latencies:
+        return {
+            "median": 0.0,
+            "p95": 0.0,
+            "p99": 0.0,
+            "sum": 0.0,
+            "mean": 0.0,
+        }
+
+    return {
+        "median": float(np.median(latencies)),
+        "p95": float(np.percentile(latencies, 95)),
+        "p99": float(np.percentile(latencies, 99)),
+        "sum": float(np.sum(latencies)),
+        "mean": float(np.mean(latencies)),
+    }
+
+
+def analyze_latencies(
+    results: Dict[int, LatencyResultAcrossTime],
+    all_queries: List[str],
+) -> Dict[int, Dict[str, Any]]:
+    """Analyze latencies for a single experiment."""
+
+    analysis_results = {}
+
+    all_latencies = []
+
+    for query_idx, query in enumerate(all_queries):
+        latencies = [
+            latency
+            for latency in results[query_idx].get_latencies()
+            if latency is not None
+        ]
+        all_latencies.extend(latencies)
+
+        stats = calculate_latency_stats(latencies)
+
+        analysis_results[query_idx] = {
+            "query": query,
+            "stats": stats,
+            "num_samples": len(latencies),
+        }
+
+    # Add aggregate statistics across all queries
+    analysis_results[-1] = {
+        "query": "All",
+        "stats": calculate_latency_stats(all_latencies),
+        "num_samples": len(all_latencies),
+    }
+
+    return analysis_results
+
+
+def print_analysis_results(
+    analysis_results: Dict[int, Dict[str, Any]], experiment_name: str
+) -> None:
+    """Print analysis results in a readable format."""
+    print(f"\nLatency Analysis: {experiment_name}")
+    print("-" * 100)
+
+    headers = [
+        "Query",
+        "Samples",
+        "Metric",
+        "Value (s)",
+    ]
+    print(f"{headers[0]:<10} {headers[1]:<10} {headers[2]:<10} {headers[3]:<15}")
+    print("-" * 100)
+
+    for query_idx, data in sorted(analysis_results.items()):
+        query_display = f"Q{query_idx}" if query_idx >= 0 else "All"
+        num_samples = data["num_samples"]
+
+        for metric in ["median", "p95", "p99", "mean", "sum"]:
+            val = data["stats"][metric]
+
+            if metric == "sum":
+                # For sum, show first without samples count
+                print(f"{query_display:<10} {'':<10} {metric:<10} {val:<15.4f}")
+            else:
+                # For other metrics, show samples on first line only
+                sample_str = str(num_samples) if metric == "median" else ""
+                print(f"{query_display:<10} {sample_str:<10} {metric:<10} {val:<15.4f}")
+
+        print("-" * 100)
+
+
+def main(args):
+    experiment_dir = os.path.join(constants.LOCAL_EXPERIMENT_DIR, args.experiment_name)
+
+    from results_loader import load_latencies_only, get_server_name_for_mode
+
+    if not args.experiment_server_name:
+        args.experiment_server_name = get_server_name_for_mode(
+            experiment_dir, args.experiment_mode
+        )
+    import logging
+
+    logging.basicConfig(level=logging.DEBUG)
+
+    experiment_mode_dir = os.path.join(
+        experiment_dir, args.experiment_mode, "prometheus_client_output"
+    )
+
+    try:
+        latencies = load_latencies_only(experiment_mode_dir)
+        results = latencies[args.experiment_server_name]
+    except (FileNotFoundError, KeyError) as e:
+        print(f"Error loading latencies: {e}")
+        raise
+
+    query_group_config = None
+    config_files = os.listdir(os.path.join(experiment_dir, "experiment_config"))
+    if len(config_files) != 1:
+        raise ValueError(
+            f"Expected exactly one config file in {experiment_dir}, but found {len(config_files)}"
+        )
+    with open(
+        os.path.join(experiment_dir, "experiment_config", config_files[0]), "r"
+    ) as f:
+        config = yaml.safe_load(f)
+        query_group_config = config["query_groups"]
+
+    # Flatten queries from all query groups
+    all_queries = []
+    for query_group in query_group_config:
+        all_queries.extend(query_group["queries"])
+
+    # Analyze latencies
+    assert results is not None
+    analysis_results = analyze_latencies(results, all_queries)
+
+    # Print results for each query
+    if args.print_per_query:
+        print_analysis_results(analysis_results, args.experiment_server_name)
+
+    # Print summary results across queries
+    print("\nSummary Results (All Queries):")
+    print("-" * 100)
+    for k in analysis_results[-1]:
+        print(f"{k}: {analysis_results[-1][k]}")
+    print("-" * 100)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Analyze latencies for a single experiment mode"
+    )
+    parser.add_argument("--experiment_name", type=str, required=True)
+    parser.add_argument("--experiment_mode", type=str, required=True)
+    parser.add_argument("--experiment_server_name", type=str, required=False)
+    parser.add_argument("--print_per_query", action="store_true", default=False)
+    args = parser.parse_args()
+    main(args)
diff --git a/Utilities/experiments/post_experiment/analyze_monitor_output.py b/Utilities/experiments/post_experiment/analyze_monitor_output.py
new file mode 100755
index 0000000..661da77
--- /dev/null
+++ b/Utilities/experiments/post_experiment/analyze_monitor_output.py
@@ -0,0 +1,300 @@
+#!/usr/bin/env python3
+"""
+Analyze monitor output JSON files to compute aggregate CPU and memory statistics.
+
+For each keyword, this script:
+1. Sums CPU and memory usage across all PIDs at each timestamp
+2. Computes p95, p99, and max statistics across time
+3. Prints the time series and summary statistics
+4. Optionally plots CPU and memory usage with PIDs grouped by keyword
+"""
+
+import json
+import sys
+import argparse
+import numpy as np
+import matplotlib.pyplot as plt
+from pathlib import Path
+from collections import defaultdict
+
+# Line styles for different keywords
+LINE_STYLES = ["-", "--", "-.", ":", (0, (3, 1, 1, 1)), (0, (5, 2, 1, 2))]
+
+
+def get_line_style_for_keyword(keyword, keyword_to_style):
+    """
+    Assign a consistent line style to each keyword.
+
+    Args:
+        keyword: The keyword to assign a style to
+        keyword_to_style: Dictionary mapping keywords to line styles
+
+    Returns:
+        Line style string for matplotlib
+    """
+    if keyword not in keyword_to_style:
+        keyword_to_style[keyword] = LINE_STYLES[
+            len(keyword_to_style) % len(LINE_STYLES)
+        ]
+    return keyword_to_style[keyword]
+
+
+def plot_resource_usage(data, file_path, args):
+    """
+    Plot CPU and memory usage with one line per PID.
+    PIDs with the same keyword share the same line style.
+
+    Args:
+        data: Dictionary with PIDs as keys and monitoring data as values
+        file_path: Path to the monitor_output.json file
+        args: Command-line arguments
+    """
+    # Set global font size
+    plt.rcParams.update({"font.size": 22})
+
+    # Group PIDs by keyword and assign line styles
+    keyword_to_style = {}
+    keyword_to_pids = defaultdict(list)
+
+    for pid, pid_info in data.items():
+        keyword = pid_info["keyword"]
+        keyword_to_pids[keyword].append(pid)
+        get_line_style_for_keyword(keyword, keyword_to_style)
+
+    # Create plots for each resource type
+    resources = [
+        ("cpu_percent", "CPU Usage (%)", "cpu"),
+        ("memory_info", "Memory Usage (MB)", "memory"),
+    ]
+
+    for resource_key, resource_label, resource_name in resources:
+        plt.figure(figsize=(20, 8))
+
+        # Plot data for each PID
+        for pid, pid_info in data.items():
+            keyword = pid_info["keyword"]
+            line_style = keyword_to_style[keyword]
+
+            # Convert memory to MB if needed
+            if resource_key == "memory_info":
+                y_values = np.array(pid_info[resource_key]) / (1024 * 1024)
+            else:
+                y_values = pid_info[resource_key]
+
+            x_values = list(range(len(y_values)))
+
+            plt.plot(
+                x_values,
+                y_values,
+                linestyle=line_style,
+                label=f"{keyword} (PID: {pid})",
+                linewidth=2,
+            )
+
+        # Add labels and title
+        plt.ylabel(resource_label)
+        plt.xlabel("Time (samples)")
+        plt.title(f"{resource_name.upper()} Usage by PID")
+        plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
+        plt.grid(True, alpha=0.3)
+        plt.tight_layout()
+
+        # Save or show based on args
+        if args.save:
+            output_filename = f"{Path(file_path).stem}_{resource_name}.png"
+            if args.save_to_experiment_dir:
+                output_dir = Path(file_path).parent
+            elif args.output_dir:
+                output_dir = Path(args.output_dir)
+            else:
+                output_dir = Path.cwd()
+
+            output_dir.mkdir(parents=True, exist_ok=True)
+            output_path = output_dir / output_filename
+            plt.savefig(output_path, dpi=300, bbox_inches="tight")
+            print(f"Saved plot to {output_path}")
+
+        if args.show:
+            plt.show()
+        else:
+            plt.close()
+
+
+def analyze_monitor_output(file_path: str, args=None):
+    """
+    Analyze monitor output JSON file and compute statistics.
+
+    Args:
+        file_path: Path to the monitor_output.json file
+        args: Command-line arguments (optional)
+    """
+    # Load the JSON file
+    with open(file_path, "r") as f:
+        data = json.load(f)
+
+    # Generate plots if requested
+    if args and args.plot:
+        plot_resource_usage(data, file_path, args)
+
+    # Group PIDs by keyword
+    keyword_data = {}
+    for pid, pid_info in data.items():
+        keyword = pid_info["keyword"]
+        if keyword not in keyword_data:
+            keyword_data[keyword] = {"cpu_percent": [], "memory_info": []}
+        keyword_data[keyword]["cpu_percent"].append(pid_info["cpu_percent"])
+        keyword_data[keyword]["memory_info"].append(pid_info["memory_info"])
+
+    # Skip printing if --print not specified
+    if not args or not args.print:
+        return
+
+    # Process each keyword
+    for keyword, metrics in keyword_data.items():
+        print(f"\n{'='*80}")
+        print(f"Keyword: {keyword}")
+        print(f"{'='*80}")
+
+        # Convert to numpy arrays for easier manipulation
+        cpu_arrays = [np.array(cpu) for cpu in metrics["cpu_percent"]]
+        mem_arrays = [np.array(mem) for mem in metrics["memory_info"]]
+
+        # Find the maximum length across all PIDs
+        max_len = max(len(arr) for arr in cpu_arrays)
+
+        # Pad arrays to the same length (with zeros for missing data)
+        # This handles cases where different PIDs have different numbers of samples
+        cpu_padded = np.zeros((len(cpu_arrays), max_len))
+        mem_padded = np.zeros((len(mem_arrays), max_len))
+
+        for i, arr in enumerate(cpu_arrays):
+            cpu_padded[i, : len(arr)] = arr
+        for i, arr in enumerate(mem_arrays):
+            mem_padded[i, : len(arr)] = arr
+
+        # Sum across PIDs at each timestamp
+        cpu_sum = np.sum(cpu_padded, axis=0)
+        mem_sum = np.sum(mem_padded, axis=0)
+
+        # Convert memory from bytes to MB for readability
+        mem_sum_mb = mem_sum / (1024 * 1024)
+
+        # Compute statistics
+        cpu_median = np.median(cpu_sum)
+        cpu_p95 = np.percentile(cpu_sum, 95)
+        cpu_p99 = np.percentile(cpu_sum, 99)
+        cpu_max = np.max(cpu_sum)
+
+        mem_median = np.median(mem_sum_mb)
+        mem_p95 = np.percentile(mem_sum_mb, 95)
+        mem_p99 = np.percentile(mem_sum_mb, 99)
+        mem_max = np.max(mem_sum_mb)
+
+        # Print CPU statistics
+        print(f"\nCPU Usage (sum across {len(cpu_arrays)} PIDs):")
+        print(f"  Median: {cpu_median:.2f}%")
+        print(f"  P95: {cpu_p95:.2f}%")
+        print(f"  P99: {cpu_p99:.2f}%")
+        print(f"  Max: {cpu_max:.2f}%")
+
+        print("\nCPU time series (sum across PIDs):")
+        print(f"  Samples: {len(cpu_sum)}")
+        print(f"  First 10 values: {cpu_sum[:10]}")
+        if len(cpu_sum) > 20:
+            print(f"  Last 10 values: {cpu_sum[-10:]}")
+        else:
+            print(f"  All values: {cpu_sum}")
+
+        # Print Memory statistics
+        print(f"\nMemory Usage in MB (sum across {len(mem_arrays)} PIDs):")
+        print(f"  Median: {mem_median:.2f} MB")
+        print(f"  P95: {mem_p95:.2f} MB")
+        print(f"  P99: {mem_p99:.2f} MB")
+        print(f"  Max: {mem_max:.2f} MB")
+
+        print("\nMemory time series in MB (sum across PIDs):")
+        print(f"  Samples: {len(mem_sum_mb)}")
+        print(f"  First 10 values: {mem_sum_mb[:10]}")
+        if len(mem_sum_mb) > 20:
+            print(f"  Last 10 values: {mem_sum_mb[-10:]}")
+        else:
+            print(f"  All values: {mem_sum_mb}")
+
+        # Optional: Print full time series to a separate file
+        # output_dir = Path(file_path).parent
+        # keyword_clean = keyword.replace('/', '_').replace('\\', '_')
+        #
+        # cpu_output = output_dir / f"{keyword_clean}_cpu_timeseries.txt"
+        # mem_output = output_dir / f"{keyword_clean}_memory_timeseries.txt"
+        #
+        # with open(cpu_output, 'w') as f:
+        #     f.write(f"# CPU Usage (%) - Sum across {len(cpu_arrays)} PIDs for keyword: {keyword}\n")
+        #     f.write(f"# Timestamp_index\tCPU_percent\n")
+        #     for i, val in enumerate(cpu_sum):
+        #         f.write(f"{i}\t{val:.2f}\n")
+        #
+        # with open(mem_output, 'w') as f:
+        #     f.write(f"# Memory Usage (MB) - Sum across {len(mem_arrays)} PIDs for keyword: {keyword}\n")
+        #     f.write(f"# Timestamp_index\tMemory_MB\n")
+        #     for i, val in enumerate(mem_sum_mb):
+        #         f.write(f"{i}\t{val:.2f}\n")
+        #
+        # print(f"\nTime series data written to:")
+        # print(f"  CPU: {cpu_output}")
+        # print(f"  Memory: {mem_output}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Analyze monitor output JSON files and compute/plot statistics"
+    )
+    parser.add_argument(
+        "file_path", type=str, help="Path to the monitor_output.json file"
+    )
+    parser.add_argument(
+        "--print", action="store_true", help="Print statistics to stdout"
+    )
+    parser.add_argument(
+        "--plot", action="store_true", help="Generate plots for CPU and memory usage"
+    )
+    parser.add_argument(
+        "--save", action="store_true", help="Save plots to files (requires --plot)"
+    )
+    parser.add_argument(
+        "--show", action="store_true", help="Display plots on screen (requires --plot)"
+    )
+    parser.add_argument(
+        "--output-dir", type=str, help="Directory to save plots (requires --save)"
+    )
+    parser.add_argument(
+        "--save-to-experiment-dir",
+        action="store_true",
+        help="Save plots to the same directory as the input file (requires --save)",
+    )
+
+    args = parser.parse_args()
+
+    # Validation
+    if not args.print and not args.plot:
+        parser.error("Must specify at least one of --print or --plot")
+
+    if args.plot and not args.show and not args.save:
+        parser.error("When using --plot, must specify at least one of --show or --save")
+
+    if args.save and not args.plot:
+        parser.error("--save requires --plot")
+
+    if args.save and not args.save_to_experiment_dir and not args.output_dir:
+        parser.error(
+            "When using --save, must specify either --save-to-experiment-dir or --output-dir"
+        )
+
+    if not Path(args.file_path).exists():
+        print(f"Error: File not found: {args.file_path}")
+        sys.exit(1)
+
+    analyze_monitor_output(args.file_path, args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/post_experiment/analyze_throughput.py b/Utilities/experiments/post_experiment/analyze_throughput.py
new file mode 100755
index 0000000..cca845d
--- /dev/null
+++ b/Utilities/experiments/post_experiment/analyze_throughput.py
@@ -0,0 +1,986 @@
+#!/usr/bin/env python3
+"""
+Analyze throughput from Prometheus and Arroyo experiment outputs.
+
+This script calculates throughput rates from cumulative sample counts and
+provides stable throughput measurements by averaging over multiple time windows.
+"""
+import os
+import argparse
+import json
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import List, Dict, Tuple, Optional
+import logging
+
+logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
+logger = logging.getLogger(__name__)
+
+
+class ThroughputAnalyzer:
+    """Analyzes throughput from prometheus metrics."""
+
+    def __init__(self, window_duration: int = 30, num_windows: int = 10):
+        """
+        Initialize the throughput analyzer.
+
+        Args:
+            window_duration: Duration in seconds for each rate measurement window
+            num_windows: Number of windows to average for stable throughput calculation
+        """
+        self.window_duration = window_duration
+        self.num_windows = num_windows
+
+    def load_prometheus_metrics(self, file_path: Path) -> Dict:
+        """Load prometheus throughput metrics from JSON file."""
+        try:
+            with open(file_path, "r") as f:
+                return json.load(f)
+        except FileNotFoundError:
+            logger.error(f"File not found: {file_path}")
+            raise
+        except json.JSONDecodeError as e:
+            logger.error(f"Invalid JSON in file {file_path}: {e}")
+            raise
+
+    def extract_timeseries(
+        self,
+        data: Dict,
+        metric_name: str,
+        label_filter: Optional[Dict[str, str]] = None,
+    ) -> List[Tuple[float, float]]:
+        """
+        Extract timeseries data from prometheus metrics.
+
+        Args:
+            data: Loaded prometheus metrics data
+            metric_name: Name of the metric to extract
+            label_filter: Dict of label key-value pairs to filter on (e.g., {"type": "float"})
+
+        Returns:
+            List of (timestamp_seconds, value) tuples, sorted by timestamp
+        """
+        if (
+            label_filter is None
+            and metric_name == "prometheus_tsdb_head_samples_appended_total"
+        ):
+            label_filter = {"type": "float"}  # Default to float type only
+
+        timeseries = []
+        collection_start = datetime.fromisoformat(data["collection_start"])
+
+        for measurement in data.get("measurements", []):
+            if metric_name not in measurement.get("metrics", {}):
+                continue
+
+            metric_entries = measurement["metrics"][metric_name]
+
+            for entry in metric_entries:
+                # Skip entries with errors or null values
+                if entry.get("error") or entry.get("value") is None:
+                    continue
+
+                # Check if labels match filter
+                entry_labels = entry.get("labels", {})
+                if label_filter is None or all(
+                    entry_labels.get(k) == v for k, v in label_filter.items()
+                ):
+                    timestamp = datetime.fromisoformat(entry["timestamp"])
+                    timestamp_seconds = (timestamp - collection_start).total_seconds()
+                    timeseries.append((timestamp_seconds, float(entry["value"])))
+
+        # Sort by timestamp and remove duplicates
+        timeseries.sort(key=lambda x: x[0])
+
+        if not timeseries:
+            logger.warning(
+                f"No data found for metric {metric_name} with filter {label_filter}"
+            )
+
+        return timeseries
+
+    def calculate_rates(
+        self,
+        timeseries: List[Tuple[float, float]],
+        window_duration: Optional[int] = None,
+    ) -> List[Tuple[float, float]]:
+        """
+        Calculate rate (samples/sec) between measurements.
+
+        Args:
+            timeseries: List of (timestamp, cumulative_value) tuples
+            window_duration: If provided, only calculate rates for pairs separated by
+                           approximately this duration (in seconds)
+
+        Returns:
+            List of (timestamp, rate) tuples where timestamp is the end of the interval
+        """
+        if len(timeseries) < 2:
+            logger.warning("Not enough data points to calculate rates")
+            return []
+
+        rates = []
+
+        if window_duration is None:
+            # Calculate rate between consecutive points
+            for i in range(1, len(timeseries)):
+                t1, v1 = timeseries[i - 1]
+                t2, v2 = timeseries[i]
+                time_diff = t2 - t1
+
+                if time_diff > 0:
+                    rate = (v2 - v1) / time_diff
+                    rates.append((t2, rate))
+        else:
+            # Calculate rates over specific window durations
+            # For each point, find the closest point approximately window_duration seconds earlier
+            tolerance = window_duration * 0.2  # 20% tolerance
+
+            for i in range(len(timeseries)):
+                t_current, v_current = timeseries[i]
+                target_time = t_current - window_duration
+
+                # Find closest earlier point to target_time
+                best_idx = None
+                best_diff = float("inf")
+
+                for j in range(i):
+                    t_prev, _ = timeseries[j]
+                    diff = abs(t_prev - target_time)
+
+                    if (
+                        diff < best_diff
+                        and t_current - t_prev >= window_duration - tolerance
+                    ):
+                        best_diff = diff
+                        best_idx = j
+
+                if best_idx is not None:
+                    t_prev, v_prev = timeseries[best_idx]
+                    time_diff = t_current - t_prev
+
+                    if time_diff > 0:
+                        rate = (v_current - v_prev) / time_diff
+                        rates.append((t_current, rate))
+
+        return rates
+
+    def calculate_stable_throughput(
+        self, rates: List[Tuple[float, float]], num_windows: Optional[int] = None
+    ) -> float:
+        """
+        Calculate stable throughput by averaging the last N rate measurements.
+
+        Args:
+            rates: List of (timestamp, rate) tuples
+            num_windows: Number of last measurements to average (defaults to self.num_windows)
+
+        Returns:
+            Average rate over the last num_windows measurements
+        """
+        if num_windows is None:
+            num_windows = self.num_windows
+
+        if len(rates) < num_windows:
+            logger.warning(
+                f"Only {len(rates)} rate measurements available, less than requested {num_windows}"
+            )
+            num_windows = len(rates)
+
+        if num_windows == 0:
+            return 0.0
+
+        last_rates = [rate for _, rate in rates[-num_windows:]]
+        return sum(last_rates) / len(last_rates)
+
+    def analyze_prometheus(self, file_path: Path) -> Dict:
+        """
+        Analyze prometheus throughput from metrics file.
+
+        Returns:
+            Dict containing analysis results
+        """
+        logger.info(f"Loading prometheus metrics from {file_path}")
+        data = self.load_prometheus_metrics(file_path)
+
+        logger.info("Extracting timeseries data...")
+
+        metrics = [
+            "prometheus_tsdb_head_samples_appended_total",
+            "prometheus_remote_storage_samples_total",
+        ]
+
+        results = {}
+
+        for metric in metrics:
+            logger.info(f"Trying to extract metric: {metric}")
+            timeseries = self.extract_timeseries(data, metric_name=metric)
+
+            if not timeseries:
+                logger.error("No valid timeseries data found")
+                return {"error": "No valid data"}
+
+            logger.info(
+                f"Found {len(timeseries)} data points spanning {timeseries[-1][0] - timeseries[0][0]:.1f} seconds"
+            )
+
+            # Calculate instant rates (between consecutive measurements)
+            instant_rates = self.calculate_rates(timeseries, window_duration=None)
+
+            # Calculate windowed rates
+            windowed_rates = self.calculate_rates(
+                timeseries, window_duration=self.window_duration
+            )
+
+            # Calculate stable throughput
+            stable_throughput = self.calculate_stable_throughput(
+                windowed_rates, num_windows=self.num_windows
+            )
+
+            results[metric] = {
+                "file": str(file_path),
+                "data_points": len(timeseries),
+                "duration_seconds": (
+                    timeseries[-1][0] - timeseries[0][0] if timeseries else 0
+                ),
+                "instant_rates": instant_rates,
+                "windowed_rates": windowed_rates,
+                "window_duration": self.window_duration,
+                "num_windows_for_stable": self.num_windows,
+                "stable_throughput_samples_per_sec": stable_throughput,
+            }
+
+        return results
+
+    def print_results(self, results: Dict):
+        """Print analysis results in a readable format."""
+        if "error" in results:
+            print(f"\nError: {results['error']}")
+            return
+
+        print("\n" + "=" * 60)
+        print("PROMETHEUS THROUGHPUT ANALYSIS")
+        print("=" * 60)
+        print(f"\nFile: {results['file']}")
+        print(f"Data points: {results['data_points']}")
+        print(f"Duration: {results['duration_seconds']:.1f} seconds")
+        print(f"\nRate calculation window: {results['window_duration']} seconds")
+        print(
+            f"Number of windows for stable throughput: {results['num_windows_for_stable']}"
+        )
+
+        windowed_rates = results["windowed_rates"]
+        if windowed_rates:
+            rates_only = [rate for _, rate in windowed_rates]
+            print("\nWindowed rate statistics:")
+            print(f"  Min: {min(rates_only):,.1f} samples/sec")
+            print(f"  Max: {max(rates_only):,.1f} samples/sec")
+            print(f"  Mean: {sum(rates_only)/len(rates_only):,.1f} samples/sec")
+            print(
+                f"  Last {min(len(rates_only), results['num_windows_for_stable'])} windows: {', '.join(f'{r:,.1f}' for r in rates_only[-results['num_windows_for_stable']:])}"
+            )
+
+        print(f"\n{'='*60}")
+        print(
+            f"STABLE THROUGHPUT: {results['stable_throughput_samples_per_sec']:,.2f} samples/sec"
+        )
+        print(f"{'='*60}\n")
+
+    def plot_throughput(self, results: Dict, output_file: Optional[Path] = None):
+        """
+        Plot throughput over time.
+
+        Args:
+            results: Analysis results dict
+            output_file: If provided, save plot to this file. Otherwise, display interactively.
+        """
+        try:
+            import matplotlib.pyplot as plt
+        except ImportError:
+            logger.error(
+                "matplotlib is required for plotting. Install with: pip install matplotlib"
+            )
+            return
+
+        if "error" in results:
+            logger.error("Cannot plot: analysis contains errors")
+            return
+
+        instant_rates = results["instant_rates"]
+        windowed_rates = results["windowed_rates"]
+        stable_throughput = results["stable_throughput_samples_per_sec"]
+
+        if not instant_rates and not windowed_rates:
+            logger.error("No rate data to plot")
+            return
+
+        fig, ax = plt.subplots(figsize=(12, 6))
+
+        # Plot instant rates (lighter, more volatile)
+        if instant_rates:
+            times_instant, rates_instant = zip(*instant_rates)
+            ax.plot(
+                times_instant,
+                rates_instant,
+                "o-",
+                alpha=0.3,
+                markersize=2,
+                label="Instant rate (consecutive points)",
+                color="lightblue",
+            )
+
+        # Plot windowed rates (darker, smoother)
+        if windowed_rates:
+            times_windowed, rates_windowed = zip(*windowed_rates)
+            ax.plot(
+                times_windowed,
+                rates_windowed,
+                "o-",
+                alpha=0.7,
+                markersize=4,
+                label=f'{results["window_duration"]}s window rate',
+                color="blue",
+                linewidth=2,
+            )
+
+        # Plot stable throughput line
+        ax.axhline(
+            y=stable_throughput,
+            color="red",
+            linestyle="--",
+            linewidth=2,
+            label=f'Stable throughput (last {results["num_windows_for_stable"]} windows): {stable_throughput:,.0f} samples/sec',
+        )
+
+        ax.set_xlabel("Time (seconds from start)", fontsize=12)
+        ax.set_ylabel("Throughput (samples/sec)", fontsize=12)
+        ax.set_title("Prometheus Throughput Over Time", fontsize=14, fontweight="bold")
+        ax.legend(loc="best")
+        ax.grid(True, alpha=0.3)
+
+        # Format y-axis with thousand separators
+        ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f"{int(x):,}"))
+
+        plt.tight_layout()
+
+        if output_file:
+            plt.savefig(output_file, dpi=150, bbox_inches="tight")
+            logger.info(f"Plot saved to {output_file}")
+        else:
+            plt.show()
+
+
+class ArroyoThroughputAnalyzer(ThroughputAnalyzer):
+    """Analyzes throughput from Arroyo metrics."""
+
+    def load_arroyo_metrics(self, file_path: Path) -> Dict:
+        """Load Arroyo throughput metrics from JSON file."""
+        try:
+            with open(file_path, "r") as f:
+                return json.load(f)
+        except FileNotFoundError:
+            logger.error(f"File not found: {file_path}")
+            raise
+        except json.JSONDecodeError as e:
+            logger.error(f"Invalid JSON in file {file_path}: {e}")
+            raise
+
+    def find_pipeline_graph_file(self, metrics_file_path: Path) -> Optional[Path]:
+        """
+        Find the pipeline graph JSON file in the same directory as the metrics file.
+
+        Args:
+            metrics_file_path: Path to the throughput metrics file
+
+        Returns:
+            Path to the pipeline graph file, or None if not found
+        """
+        metrics_dir = metrics_file_path.parent
+        # Look for pipeline_graph_*.json files
+        graph_files = list(metrics_dir.glob("pipeline_graph_*.json"))
+
+        if not graph_files:
+            logger.warning(f"No pipeline graph file found in {metrics_dir}")
+            return None
+
+        if len(graph_files) > 1:
+            logger.warning(
+                f"Multiple pipeline graph files found, using {graph_files[0]}"
+            )
+
+        return graph_files[0]
+
+    def find_node_by_description(
+        self, pipeline_graph_path: Path, description_pattern: str
+    ) -> Optional[int]:
+        """
+        Find a node ID by searching for a description pattern in the pipeline graph.
+
+        Args:
+            pipeline_graph_path: Path to the pipeline graph JSON file
+            description_pattern: String pattern to search for in node descriptions
+
+        Returns:
+            Node ID if found, None otherwise
+        """
+        try:
+            with open(pipeline_graph_path, "r") as f:
+                graph_data = json.load(f)
+        except (FileNotFoundError, json.JSONDecodeError) as e:
+            logger.error(f"Error loading pipeline graph: {e}")
+            return None
+
+        nodes = graph_data.get("nodes", {})
+
+        for node_id_str, node_info in nodes.items():
+            description = node_info.get("description", "")
+            if description_pattern in description:
+                logger.info(
+                    f"Found node matching '{description_pattern}': node_id={node_id_str}, description='{description}'"
+                )
+                return int(node_id_str)
+
+        logger.warning(
+            f"No node found with description containing '{description_pattern}'"
+        )
+        return None
+
+    def extract_arroyo_timeseries(
+        self,
+        data: Dict,
+        metric_type: str = "messages_sent",
+        node_id: Optional[int] = None,
+        aggregate_nodes: bool = True,
+    ) -> List[Tuple[float, float]]:
+        """
+        Extract timeseries data from Arroyo metrics.
+
+        Args:
+            data: Loaded Arroyo metrics data
+            metric_type: Type of metric to extract (messages_sent, messages_recv, bytes_sent, bytes_recv)
+            node_id: If provided, only extract data for this specific node
+            aggregate_nodes: If True, sum values across all nodes at each timestamp
+
+        Returns:
+            List of (timestamp_seconds, value) tuples, sorted by timestamp
+        """
+        # Track data points: map from time (microseconds) -> {node_id -> value}
+        time_to_nodes: Dict[int, Dict[int, float]] = {}
+
+        # Find the earliest timestamp to use as reference
+        first_measurement = (
+            data.get("measurements", [])[0] if data.get("measurements") else None
+        )
+        if not first_measurement:
+            logger.warning("No measurements found in Arroyo data")
+            return []
+
+        reference_time = datetime.fromisoformat(first_measurement["timestamp"])
+
+        for measurement in data.get("measurements", []):
+            for job in measurement.get("jobs", []):
+                metrics_data = job.get("metrics", {}).get(metric_type, [])
+
+                for entry in metrics_data:
+                    # Filter by node_id if specified
+                    if node_id is not None and entry.get("node_id") != node_id:
+                        continue
+
+                    time_micros = entry.get("time")
+                    value = entry.get("value", 0.0)
+                    entry_node_id = entry.get("node_id")
+
+                    if time_micros is None or entry_node_id is None:
+                        continue
+
+                    if time_micros not in time_to_nodes:
+                        time_to_nodes[time_micros] = {}
+
+                    time_to_nodes[time_micros][entry_node_id] = value
+
+        # Convert to timeseries
+        timeseries = []
+        for time_micros in sorted(time_to_nodes.keys()):
+            nodes_data = time_to_nodes[time_micros]
+
+            if aggregate_nodes:
+                # Sum across all nodes
+                total_value = sum(nodes_data.values())
+            else:
+                # If we're not aggregating and no specific node was requested, skip
+                if node_id is None:
+                    continue
+                total_value = nodes_data.get(node_id, 0.0)
+
+            # Convert microseconds to seconds relative to reference time
+            timestamp_seconds = (time_micros / 1_000_000.0) - reference_time.timestamp()
+            timeseries.append((timestamp_seconds, total_value))
+
+        if not timeseries:
+            logger.warning(
+                f"No data found for metric {metric_type}"
+                + (f" and node {node_id}" if node_id is not None else "")
+            )
+
+        return timeseries
+
+    def calculate_instantaneous_average(
+        self,
+        timeseries: List[Tuple[float, float]],
+        window_duration: Optional[int] = None,
+    ) -> List[Tuple[float, float]]:
+        """
+        Calculate average of instantaneous values over windows.
+
+        Args:
+            timeseries: List of (timestamp, instantaneous_value) tuples
+            window_duration: If provided, average values over this window duration
+
+        Returns:
+            List of (timestamp, average_value) tuples
+        """
+        if len(timeseries) < 2:
+            logger.warning("Not enough data points to calculate averages")
+            return timeseries
+
+        if window_duration is None:
+            # Return as-is for instantaneous values
+            return timeseries
+
+        # Calculate windowed averages
+        averages = []
+        tolerance = window_duration * 0.2  # 20% tolerance
+
+        for i in range(len(timeseries)):
+            t_current, _ = timeseries[i]
+
+            # Find all points in the window
+            window_values = []
+            for j in range(i + 1):
+                t_prev, v_prev = timeseries[j]
+                if t_current - t_prev <= window_duration + tolerance:
+                    window_values.append(v_prev)
+
+            if window_values:
+                avg_value = sum(window_values) / len(window_values)
+                averages.append((t_current, avg_value))
+
+        return averages
+
+    def analyze_arroyo(
+        self,
+        file_path: Path,
+        metric_type: str = "messages_recv",
+        node_description: str = "prometheus_8080_fake_metric_total -> watermark",
+        node_id: Optional[int] = None,
+    ) -> Dict:
+        """
+        Analyze Arroyo throughput from metrics file.
+
+        Args:
+            file_path: Path to Arroyo metrics JSON file
+            metric_type: Type of metric to analyze (messages_recv/bytes_recv are instantaneous,
+                        messages_sent/bytes_sent are cumulative)
+            node_description: Description pattern to search for in pipeline graph (ignored if node_id is provided)
+            node_id: Specific node ID to analyze (if None, will search by node_description)
+
+        Returns:
+            Dict containing analysis results
+        """
+        logger.info(f"Loading Arroyo metrics from {file_path}")
+        data = self.load_arroyo_metrics(file_path)
+
+        pipeline_id = data.get("pipeline_id", "unknown")
+        logger.info(f"Pipeline ID: {pipeline_id}")
+
+        # Find node_id if not provided
+        if node_id is None:
+            graph_file = self.find_pipeline_graph_file(file_path)
+            if graph_file:
+                node_id = self.find_node_by_description(graph_file, node_description)
+                if node_id is None:
+                    logger.error(
+                        f"Could not find node with description containing '{node_description}'"
+                    )
+                    return {"error": f"Node not found: {node_description}"}
+            else:
+                logger.error("Could not find pipeline graph file")
+                return {"error": "Pipeline graph file not found"}
+
+        logger.info(
+            f"Extracting timeseries data for metric: {metric_type}, node_id: {node_id}..."
+        )
+
+        timeseries = self.extract_arroyo_timeseries(
+            data, metric_type=metric_type, node_id=node_id, aggregate_nodes=False
+        )
+
+        if not timeseries:
+            logger.error("No valid timeseries data found")
+            return {"error": "No valid data"}
+
+        logger.info(
+            f"Found {len(timeseries)} data points spanning {timeseries[-1][0] - timeseries[0][0]:.1f} seconds"
+        )
+
+        # Determine if this is an instantaneous or cumulative metric
+        # messages_recv and bytes_recv are instantaneous rates
+        is_instantaneous = metric_type in ["messages_recv", "bytes_recv"]
+
+        if is_instantaneous:
+            logger.info(f"Treating {metric_type} as instantaneous throughput")
+            # For instantaneous metrics, use values directly
+            instant_rates = timeseries  # Already instantaneous values
+
+            # Calculate windowed averages
+            windowed_rates = self.calculate_instantaneous_average(
+                timeseries, window_duration=self.window_duration
+            )
+        else:
+            logger.info(
+                f"Treating {metric_type} as cumulative metric, calculating rates"
+            )
+            # Calculate instant rates (between consecutive measurements)
+            instant_rates = self.calculate_rates(timeseries, window_duration=None)
+
+            # Calculate windowed rates
+            windowed_rates = self.calculate_rates(
+                timeseries, window_duration=self.window_duration
+            )
+
+        # Calculate stable throughput
+        stable_throughput = self.calculate_stable_throughput(
+            windowed_rates, num_windows=self.num_windows
+        )
+
+        return {
+            "file": str(file_path),
+            "pipeline_id": pipeline_id,
+            "metric_type": metric_type,
+            "node_id": node_id,
+            "node_description": node_description,
+            "is_instantaneous": is_instantaneous,
+            "data_points": len(timeseries),
+            "duration_seconds": (
+                timeseries[-1][0] - timeseries[0][0] if timeseries else 0
+            ),
+            "instant_rates": instant_rates,
+            "windowed_rates": windowed_rates,
+            "window_duration": self.window_duration,
+            "num_windows_for_stable": self.num_windows,
+            "stable_throughput_per_sec": stable_throughput,
+        }
+
+    def print_arroyo_results(self, results: Dict):
+        """Print Arroyo analysis results in a readable format."""
+        if "error" in results:
+            print(f"\nError: {results['error']}")
+            return
+
+        metric_type = results.get("metric_type", "messages_sent")
+        is_instantaneous = results.get("is_instantaneous", False)
+        unit = "messages/sec" if "messages" in metric_type else "bytes/sec"
+
+        print("\n" + "=" * 60)
+        print("ARROYO THROUGHPUT ANALYSIS")
+        print("=" * 60)
+        print(f"\nFile: {results['file']}")
+        print(f"Pipeline ID: {results.get('pipeline_id', 'unknown')}")
+        print(f"Node ID: {results.get('node_id', 'N/A')}")
+        print(f"Node Description: {results.get('node_description', 'N/A')}")
+        print(
+            f"Metric type: {metric_type} ({'instantaneous' if is_instantaneous else 'cumulative'})"
+        )
+        print(f"Data points: {results['data_points']}")
+        print(f"Duration: {results['duration_seconds']:.1f} seconds")
+
+        if is_instantaneous:
+            print(f"\nAveraging window: {results['window_duration']} seconds")
+        else:
+            print(f"\nRate calculation window: {results['window_duration']} seconds")
+
+        print(
+            f"Number of windows for stable throughput: {results['num_windows_for_stable']}"
+        )
+
+        windowed_rates = results["windowed_rates"]
+        if windowed_rates:
+            rates_only = [rate for _, rate in windowed_rates]
+
+            if is_instantaneous:
+                print("\nWindowed average statistics:")
+            else:
+                print("\nWindowed rate statistics:")
+
+            print(f"  Min: {min(rates_only):,.1f} {unit}")
+            print(f"  Max: {max(rates_only):,.1f} {unit}")
+            print(f"  Mean: {sum(rates_only)/len(rates_only):,.1f} {unit}")
+            print(
+                f"  Last {min(len(rates_only), results['num_windows_for_stable'])} windows: {', '.join(f'{r:,.1f}' for r in rates_only[-results['num_windows_for_stable']:])}"
+            )
+
+        print(f"\n{'='*60}")
+        print(f"STABLE THROUGHPUT: {results['stable_throughput_per_sec']:,.2f} {unit}")
+        print(f"{'='*60}\n")
+
+    def plot_arroyo_throughput(self, results: Dict, output_file: Optional[Path] = None):
+        """
+        Plot Arroyo throughput over time.
+
+        Args:
+            results: Analysis results dict
+            output_file: If provided, save plot to this file. Otherwise, display interactively.
+        """
+        try:
+            import matplotlib.pyplot as plt
+        except ImportError:
+            logger.error(
+                "matplotlib is required for plotting. Install with: pip install matplotlib"
+            )
+            return
+
+        if "error" in results:
+            logger.error("Cannot plot: analysis contains errors")
+            return
+
+        metric_type = results.get("metric_type", "messages_sent")
+        is_instantaneous = results.get("is_instantaneous", False)
+        unit = "messages/sec" if "messages" in metric_type else "bytes/sec"
+
+        instant_rates = results["instant_rates"]
+        windowed_rates = results["windowed_rates"]
+        stable_throughput = results["stable_throughput_per_sec"]
+
+        if not instant_rates and not windowed_rates:
+            logger.error("No rate data to plot")
+            return
+
+        fig, ax = plt.subplots(figsize=(12, 6))
+
+        # Plot instant values/rates (lighter, more volatile)
+        if instant_rates:
+            times_instant, rates_instant = zip(*instant_rates)
+            instant_label = (
+                "Instantaneous values"
+                if is_instantaneous
+                else "Instant rate (consecutive points)"
+            )
+            ax.plot(
+                times_instant,
+                rates_instant,
+                "o-",
+                alpha=0.3,
+                markersize=2,
+                label=instant_label,
+                color="lightblue",
+            )
+
+        # Plot windowed values/rates (darker, smoother)
+        if windowed_rates:
+            times_windowed, rates_windowed = zip(*windowed_rates)
+            windowed_label = (
+                f'{results["window_duration"]}s window average'
+                if is_instantaneous
+                else f'{results["window_duration"]}s window rate'
+            )
+            ax.plot(
+                times_windowed,
+                rates_windowed,
+                "o-",
+                alpha=0.7,
+                markersize=4,
+                label=windowed_label,
+                color="blue",
+                linewidth=2,
+            )
+
+        # Plot stable throughput line
+        ax.axhline(
+            y=stable_throughput,
+            color="red",
+            linestyle="--",
+            linewidth=2,
+            label=f'Stable throughput (last {results["num_windows_for_stable"]} windows): {stable_throughput:,.0f} {unit}',
+        )
+
+        ax.set_xlabel("Time (seconds from start)", fontsize=12)
+        ax.set_ylabel(f"Throughput ({unit})", fontsize=12)
+
+        node_info = f"Node {results.get('node_id', 'N/A')}"
+        if results.get("node_description"):
+            node_info += f": {results['node_description']}"
+
+        ax.set_title(
+            f"Arroyo Throughput Over Time\n{metric_type} - {node_info}",
+            fontsize=14,
+            fontweight="bold",
+        )
+        ax.legend(loc="best")
+        ax.grid(True, alpha=0.3)
+
+        # Format y-axis with thousand separators
+        ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f"{int(x):,}"))
+
+        plt.tight_layout()
+
+        if output_file:
+            plt.savefig(output_file, dpi=150, bbox_inches="tight")
+            logger.info(f"Plot saved to {output_file}")
+        else:
+            plt.show()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Analyze throughput from Prometheus and Arroyo experiment outputs",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Basic Prometheus analysis
+  %(prog)s /path/to/prometheus_throughput_metrics.json
+
+  # With custom window parameters
+  %(prog)s --window-duration 60 --num-windows 5 /path/to/metrics.json
+
+  # Generate plot
+  %(prog)s --plot /path/to/metrics.json
+
+  # Save plot to file
+  %(prog)s --plot --plot-output throughput_plots /path/to/metrics.json
+
+  # Analyze Arroyo (auto-detects watermark node, uses instantaneous messages_recv)
+  %(prog)s --type arroyo /path/to/arroyo_metrics.json
+
+  # Analyze Arroyo with cumulative metric type
+  %(prog)s --type arroyo --metric-type messages_sent /path/to/arroyo_metrics.json
+
+  # Analyze specific Arroyo node by description (e.g., output sink)
+  %(prog)s --type arroyo --node-description "KafkaSink" /path/to/arroyo_metrics.json
+
+  # Analyze specific Arroyo node by ID
+  %(prog)s --type arroyo --node-id 27 /path/to/arroyo_metrics.json
+
+  # Arroyo analysis with plot
+  %(prog)s --type arroyo --plot --plot-output arroyo_plots /path/to/arroyo_metrics.json
+        """,
+    )
+
+    parser.add_argument(
+        "input_file",
+        type=Path,
+        help="Path to prometheus or arroyo throughput metrics JSON file",
+    )
+
+    parser.add_argument(
+        "--type",
+        choices=["prometheus", "arroyo"],
+        default="prometheus",
+        help="Type of metrics to analyze (default: prometheus)",
+    )
+
+    parser.add_argument(
+        "--window-duration",
+        type=int,
+        default=30,
+        help="Duration in seconds for each rate measurement window (default: 30)",
+    )
+
+    parser.add_argument(
+        "--num-windows",
+        type=int,
+        default=10,
+        help="Number of windows to average for stable throughput (default: 10)",
+    )
+
+    parser.add_argument(
+        "--plot", action="store_true", help="Generate a plot of throughput over time"
+    )
+
+    parser.add_argument(
+        "--plot-output",
+        type=Path,
+        help="Save plot to this file (if not provided, plot is displayed interactively)",
+    )
+
+    parser.add_argument(
+        "--metric-type",
+        choices=["messages_sent", "messages_recv", "bytes_sent", "bytes_recv"],
+        default="messages_recv",
+        help="For Arroyo: which metric type to analyze. messages_recv and bytes_recv are instantaneous, messages_sent and bytes_sent are cumulative (default: messages_recv)",
+    )
+
+    parser.add_argument(
+        "--node-description",
+        type=str,
+        default="prometheus_8080_fake_metric_total -> watermark",
+        help="For Arroyo: node description pattern to search for (default: 'prometheus_8080_fake_metric_total -> watermark')",
+    )
+
+    parser.add_argument(
+        "--node-id",
+        type=int,
+        default=None,
+        help="For Arroyo: specific node ID to analyze (overrides --node-description if provided)",
+    )
+
+    parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
+
+    args = parser.parse_args()
+
+    if args.verbose:
+        logging.getLogger().setLevel(logging.DEBUG)
+
+    # Validate input file exists
+    if not args.input_file.exists():
+        logger.error(f"Input file does not exist: {args.input_file}")
+        sys.exit(1)
+
+    (
+        os.makedirs(args.plot_output, exist_ok=True)
+        if args.plot and args.plot_output
+        else None
+    )
+
+    # Analyze based on type
+    if args.type == "prometheus":
+        analyzer = ThroughputAnalyzer(
+            window_duration=args.window_duration, num_windows=args.num_windows
+        )
+        results = analyzer.analyze_prometheus(args.input_file)
+        for k, v in results.items():
+            analyzer.print_results(v)
+
+            if args.plot:
+                analyzer.plot_throughput(
+                    v,
+                    output_file=os.path.join(args.plot_output, f"_{k}_throughput.png"),
+                )
+
+    elif args.type == "arroyo":
+        analyzer = ArroyoThroughputAnalyzer(
+            window_duration=args.window_duration, num_windows=args.num_windows
+        )
+        results = analyzer.analyze_arroyo(
+            args.input_file,
+            metric_type=args.metric_type,
+            node_description=args.node_description,
+            node_id=args.node_id,
+        )
+        analyzer.print_arroyo_results(results)
+
+        if args.plot:
+            plot_file = None
+            if args.plot_output:
+                plot_file = os.path.join(
+                    args.plot_output, f"arroyo_{args.metric_type}_throughput.png"
+                )
+            analyzer.plot_arroyo_throughput(results, output_file=plot_file)
+
+    else:
+        logger.error(f"Unknown type: {args.type}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/post_experiment/calculate_fidelity.py b/Utilities/experiments/post_experiment/calculate_fidelity.py
new file mode 100644
index 0000000..6c773a8
--- /dev/null
+++ b/Utilities/experiments/post_experiment/calculate_fidelity.py
@@ -0,0 +1,249 @@
+import os
+import sys
+import yaml
+import argparse
+import numpy as np
+
+from typing import List
+
+# from promql_utilities.query_results.classes import QueryResult, QueryResultAcrossTime
+
+# TODO: make this more robust
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import constants  # noqa: E402
+
+
+def correlation(exact, estimate) -> float:
+    corr_result = np.corrcoef(exact, estimate)[0, 1]
+
+    if np.isnan(corr_result):
+        print(
+            f"DEBUG correlation NaN detected - Array lengths: exact={len(exact)}, estimate={len(estimate)}"
+        )
+        print(f"DEBUG correlation NaN detected - Exact values: {exact}")
+        print(f"DEBUG correlation NaN detected - Estimate values: {estimate}")
+        print(f"DEBUG correlation NaN detected - Exact variance: {np.var(exact)}")
+        print(f"DEBUG correlation NaN detected - Estimate variance: {np.var(estimate)}")
+        print(
+            f"DEBUG correlation NaN detected - Contains NaN - Exact: {np.isnan(exact).any()}, Estimate: {np.isnan(estimate).any()}"
+        )
+        print(
+            f"DEBUG correlation NaN detected - Contains Inf - Exact: {np.isinf(exact).any()}, Estimate: {np.isinf(estimate).any()}"
+        )
+        print(f"DEBUG correlation NaN detected - Result: {corr_result}")
+
+    return corr_result
+
+
+def l1_norm(exact, estimate) -> float:
+    return np.sum(np.abs(exact - estimate) / exact)
+
+
+def l2_norm(exact, estimate) -> float:
+    return np.sum(np.square(exact - estimate) / exact)
+
+
+def mape(exact, estimate) -> float:
+    # Mean Absolute Percentage Error
+    # Handle division by zero by excluding zero values
+    non_zero_mask = exact != 0
+    if not np.any(non_zero_mask):
+        return float("inf") if not np.array_equal(exact, estimate) else 0.0
+    return (
+        np.mean(
+            np.abs(
+                (exact[non_zero_mask] - estimate[non_zero_mask]) / exact[non_zero_mask]
+            )
+        )
+        * 100
+    )
+
+
+def rmse_percentage(exact, estimate) -> float:
+    # Root Mean Square Percentage Error
+    non_zero_mask = exact != 0
+    if not np.any(non_zero_mask):
+        return float("inf") if not np.array_equal(exact, estimate) else 0.0
+    return (
+        np.sqrt(
+            np.mean(
+                (
+                    (exact[non_zero_mask] - estimate[non_zero_mask])
+                    / exact[non_zero_mask]
+                )
+                ** 2
+            )
+        )
+        * 100
+    )
+
+
+# def get_timeseries_similarity_scores(results_across_servers, queries: List[str], similarity_functions):
+#     similarity_scores = {f.__name__: {q: 0 for q in queries} for f in similarity_functions}
+
+#     for f in similarity_functions:
+#         for query_idx, query in enumerate(queries):
+#             prom_results = results_across_servers["prometheus"][query_idx].get_all_timeseries()
+#             sketchdb_results = results_across_servers["sketchdb"][query_idx].get_all_timeseries()
+
+#             scores_per_key = {}
+
+#             for timeseries_key in prom_results:
+#                 if timeseries_key not in sketchdb_results:
+#                     print(f"Skipping timeseries {timeseries_key} because it is not present in SketchDB")
+#                     continue
+
+#                 prom_timeseries = prom_results[timeseries_key].values
+#                 sketchdb_timeseries = sketchdb_results[timeseries_key].values
+
+#                 score = f(prom_timeseries, sketchdb_timeseries)
+#                 scores_per_key[timeseries_key] = score
+#                 similarity_scores[f.__name__][query] += score / len(prom_results)
+
+#     return similarity_scores
+
+
+def get_timeseries_similarity_scores(
+    exact_results,
+    estimate_results,
+    queries: List[str],
+    similarity_functions,
+    verbose: bool,
+):
+    similarity_scores = {
+        f.__name__: {q: 0 for q in queries} for f in similarity_functions
+    }
+
+    for f in similarity_functions:
+        for query_idx, query in enumerate(queries):
+            print(
+                f"Calculating similarity scores for query {query} using function {f.__name__}"
+            )
+            exact_timeseries_per_key = exact_results[query_idx].get_all_timeseries()
+            estimate_timeseries_per_key = estimate_results[
+                query_idx
+            ].get_all_timeseries()
+
+            scores_per_key = {}
+
+            for timeseries_key in sorted(exact_timeseries_per_key.keys()):
+                if timeseries_key not in estimate_timeseries_per_key:
+                    print(
+                        f"Skipping timeseries {timeseries_key} because it is not present in estimated results"
+                    )
+                    continue
+
+                exact_timeseries = exact_timeseries_per_key[timeseries_key].values
+                estimate_timeseries = estimate_timeseries_per_key[timeseries_key].values
+
+                if len(exact_timeseries) == 0 or len(estimate_timeseries) == 0:
+                    print(
+                        f"Skipping timeseries {timeseries_key} because exact has {len(exact_timeseries)} data points and estimate has {len(estimate_timeseries)} data points"
+                    )
+                    continue
+
+                if any([v is None for v in exact_timeseries]):
+                    print(
+                        f"Skipping timeseries {timeseries_key} because exact_timeseries has None value"
+                    )
+                    continue
+                if any([v is None for v in estimate_timeseries]):
+                    print(
+                        f"Skipping timeseries {timeseries_key} because estimate_timeseries has None value"
+                    )
+                    continue
+
+                if verbose:
+                    print("Key: {}".format(timeseries_key))
+                    print("Exact: {}".format(exact_timeseries))
+                    print("Estimate: {}".format(estimate_timeseries))
+
+                score = f(exact=exact_timeseries, estimate=estimate_timeseries)
+                scores_per_key[timeseries_key] = score
+                similarity_scores[f.__name__][query] += score / len(
+                    exact_timeseries_per_key
+                )
+
+    return similarity_scores
+
+
+def main(args):
+    experiment_dir = os.path.join(constants.LOCAL_EXPERIMENT_DIR, args.experiment_name)
+
+    exact_results = None
+    estimate_results = None
+
+    from results_loader import load_results, get_server_name_for_mode
+
+    exact_results = load_results(
+        os.path.join(
+            experiment_dir, args.exact_experiment_mode, "prometheus_client_output"
+        )
+    )
+    estimate_results = load_results(
+        os.path.join(
+            experiment_dir, args.estimate_experiment_mode, "prometheus_client_output"
+        )
+    )
+
+    # results_across_modes = {}
+    if not args.exact_experiment_server_name:
+        args.exact_experiment_server_name = get_server_name_for_mode(
+            experiment_dir, args.exact_experiment_mode
+        )
+
+    # results_across_modes['{}.{}'.format(args.exact_experiment_mode, args.exact_experiment_server_name)] = exact_results[args.exact_experiment_server_name]
+    # results_across_modes['{}.{}'.format(args.estimate_experiment_mode, args.estimate_experiment_server_name)] = estimate_results[args.estimate_experiment_server_name]
+    print(
+        exact_results.keys(),
+        estimate_results.keys(),
+        args.exact_experiment_server_name,
+        args.estimate_experiment_server_name,
+    )
+    exact_results = exact_results[args.exact_experiment_server_name]
+    estimate_results = estimate_results[args.estimate_experiment_server_name]
+
+    # results_across_modes[args.exact_experiment_mode] = exact_results[args.exact_experiment_mode]
+    # results_across_modes[args.estimate_experiment_mode] = estimate_results[args.estimate_experiment_mode]
+
+    query_group_config = None
+    config_files = os.listdir(os.path.join(experiment_dir, "experiment_config"))
+    if len(config_files) != 1:
+        raise ValueError(
+            f"Expected exactly one config file in {experiment_dir}, but found {len(config_files)}"
+        )
+    with open(
+        os.path.join(experiment_dir, "experiment_config", config_files[0]), "r"
+    ) as f:
+        config = yaml.safe_load(f)
+        query_group_config = config["query_groups"]
+
+    # Flatten queries from all query groups
+    all_queries = []
+    for query_group in query_group_config:
+        all_queries.extend(query_group["queries"])
+    # timeseries_similarity_scores = get_timeseries_similarity_scores(results_across_modes, all_queries, [
+    timeseries_similarity_scores = get_timeseries_similarity_scores(
+        exact_results,
+        estimate_results,
+        all_queries,
+        [correlation, l1_norm, l2_norm, mape, rmse_percentage],
+        args.verbose,
+    )
+
+    # with open(os.path.join(args.output_dir, args.output_file), "w") as fout:
+    for f in timeseries_similarity_scores:
+        for query in timeseries_similarity_scores[f]:
+            print(f"{f}: {query} = {timeseries_similarity_scores[f][query]}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--experiment_name", type=str, required=True)
+    parser.add_argument("--exact_experiment_mode", type=str, required=True)
+    parser.add_argument("--estimate_experiment_mode", type=str, required=True)
+    parser.add_argument("--exact_experiment_server_name", type=str, required=True)
+    parser.add_argument("--estimate_experiment_server_name", type=str, required=True)
+    parser.add_argument("--verbose", action="store_true", default=False)
+    args = parser.parse_args()
+    main(args)
diff --git a/Utilities/experiments/post_experiment/compare_costs.py b/Utilities/experiments/post_experiment/compare_costs.py
new file mode 100644
index 0000000..a6d487b
--- /dev/null
+++ b/Utilities/experiments/post_experiment/compare_costs.py
@@ -0,0 +1,492 @@
+import os
+import sys
+import json
+import argparse
+import humanize
+import numpy as np
+import matplotlib.pyplot as plt
+from typing import List
+from collections import defaultdict
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import constants  # noqa: E402
+
+RESOURCES = ["cpu_percent", "memory_info"]
+PROMETHEUS_PROCESS_KEYWORD = "prometheus.yml"
+
+relevant_stats = {
+    "sum": lambda x: sum(x),
+    "max": lambda x: max(x),
+    "median": lambda x: np.median(x),
+    "p95": lambda x: np.percentile(x, 95),
+    "p99": lambda x: np.percentile(x, 99),
+}
+
+
+def pretty_print(key, value):
+    if "memory" in key:
+        print(key, humanize.naturalsize(value))
+    else:
+        print(key, round(value, 2))
+
+
+def calculate_query_cpu(monitor_info, experiment_mode):
+    """
+    Calculate Query CPU timeseries for the given experiment mode.
+
+    Args:
+        monitor_info: Dictionary with PIDs as keys and monitoring data as values
+        experiment_mode: Name of the current experiment mode
+
+    Returns:
+        List of Query CPU values (timeseries)
+    """
+    pids = [pid for pid in monitor_info.keys() if pid != "all"]
+
+    if experiment_mode == constants.SKETCHDB_EXPERIMENT_NAME:
+        # Sum CPU across all PIDs except those with keyword="prometheus"
+        query_cpu = [0 for _ in range(len(monitor_info[pids[0]]["cpu_percent"]))]
+
+        for pid in pids:
+            keyword = monitor_info[pid]["keyword"]
+            if keyword != PROMETHEUS_PROCESS_KEYWORD:
+                for i in range(len(monitor_info[pid]["cpu_percent"])):
+                    query_cpu[i] += monitor_info[pid]["cpu_percent"][i]
+
+        return query_cpu
+
+    elif experiment_mode == constants.BASELINE_EXPERIMENT_NAME:
+        # Find prometheus PID(s) and get their CPU timeseries
+        prometheus_cpu = None
+        for pid in pids:
+            if monitor_info[pid]["keyword"] == PROMETHEUS_PROCESS_KEYWORD:
+                prometheus_cpu = monitor_info[pid]["cpu_percent"][:]
+                break
+
+        if prometheus_cpu is None:
+            raise ValueError(f"No prometheus PID found in mode {experiment_mode}")
+
+        # Calculate 5th percentile (ingestion cost)
+        prometheus_ingestion_cost = np.percentile(prometheus_cpu, 5)
+
+        # Subtract ingestion cost from each time point
+        query_cpu = [cpu - prometheus_ingestion_cost for cpu in prometheus_cpu]
+
+        return query_cpu
+
+    else:
+        raise AssertionError(
+            f"Query CPU calculation not supported for mode: {experiment_mode}"
+        )
+
+
+def plot_resource_usage(monitor_info, experiment_mode, args):
+    """
+    Plot raw resource usage data for each resource type.
+
+    Args:
+        monitor_info: Dictionary with PIDs as keys and monitoring data as values
+        experiment_mode: Name of the current experiment mode
+        args: Command-line arguments
+    """
+    # Set global font size to 24
+    plt.rcParams.update({"font.size": 22})
+
+    for resource in RESOURCES:
+        plt.figure(figsize=(20, 8))
+
+        # Plot data for each PID/keyword
+        for pid, data in monitor_info.items():
+            keyword = data["keyword"]
+            y_values = data[resource]
+            x_values = list(range(len(y_values)))
+
+            plt.plot(x_values, y_values, label=f"{keyword} (PID: {pid})")
+
+        # Add labels and title
+        resource_label = (
+            "Memory Usage (bytes)" if resource == "memory_info" else "CPU Usage (%)"
+        )
+        plt.ylabel(resource_label)
+        plt.xlabel("Time (samples)")
+        plt.title(f"{experiment_mode}: {resource} Raw Data")
+        plt.legend()
+
+        # Save or show based on args
+        if args.save:
+            # Make plot fullscreen before saving
+            mng = plt.get_current_fig_manager()
+            try:
+                mng.full_screen_toggle()  # For Qt backend
+            except AttributeError:
+                try:
+                    mng.window.showMaximized()  # For TkAgg backend
+                except Exception:
+                    try:
+                        mng.frame.Maximize(True)  # For WX backend
+                    except Exception:
+                        try:
+                            mng.resize(*mng.window.maxsize())  # For other backends
+                        except Exception:
+                            print("Warning: Could not maximize figure window")
+
+            output_filename = f"mode_{experiment_mode}_{resource}.png"
+            output_path = os.path.join(args.output_dir, output_filename)
+            os.makedirs(args.output_dir, exist_ok=True)
+            plt.savefig(output_path, dpi=300, bbox_inches="tight")
+            print(f"Saved plot to {output_path}")
+
+        if args.show:
+            plt.show()
+        else:
+            plt.close()
+
+
+def plot_query_cpu(query_cpu, experiment_mode, args):
+    """
+    Plot Query CPU timeseries.
+
+    Args:
+        query_cpu: List of Query CPU values (timeseries)
+        experiment_mode: Name of the current experiment mode
+        args: Command-line arguments
+    """
+    # Set global font size to 24
+    plt.rcParams.update({"font.size": 22})
+
+    plt.figure(figsize=(20, 8))
+
+    # Plot Query CPU timeseries
+    x_values = list(range(len(query_cpu)))
+    plt.plot(x_values, query_cpu, label="Query CPU", linewidth=2)
+
+    # Add labels and title
+    plt.ylabel("Query CPU Usage (%)")
+    plt.xlabel("Time (samples)")
+    plt.title(f"{experiment_mode}: Query CPU")
+    plt.legend()
+    plt.grid(True, alpha=0.3)
+
+    # Save or show based on args
+    if args.save:
+        # Make plot fullscreen before saving
+        mng = plt.get_current_fig_manager()
+        try:
+            mng.full_screen_toggle()  # For Qt backend
+        except AttributeError:
+            try:
+                mng.window.showMaximized()  # For TkAgg backend
+            except Exception:
+                try:
+                    mng.frame.Maximize(True)  # For WX backend
+                except Exception:
+                    try:
+                        mng.resize(*mng.window.maxsize())  # For other backends
+                    except Exception:
+                        print("Warning: Could not maximize figure window")
+
+        output_filename = f"mode_{experiment_mode}_query_cpu.png"
+        output_path = os.path.join(args.output_dir, output_filename)
+        os.makedirs(args.output_dir, exist_ok=True)
+        plt.savefig(output_path, dpi=300, bbox_inches="tight")
+        print(f"Saved Query CPU plot to {output_path}")
+
+    if args.show:
+        plt.show()
+    else:
+        plt.close()
+
+
+def main(args):
+    if not args.print and not args.plot:
+        raise ValueError("Must specify either --print or --plot")
+    if args.all_experiment_modes and args.experiment_mode:
+        raise ValueError(
+            "Cannot specify both --all_experiment_modes and --experiment_mode"
+        )
+    elif not args.all_experiment_modes and not args.experiment_mode:
+        raise ValueError(
+            "Must specify either --all_experiment_modes or --experiment_mode"
+        )
+
+    if args.plot:
+        if not args.show and not args.save:
+            raise ValueError("Must specify either --show or --save when using --plot")
+        if args.save:
+            if not args.save_to_experiment_dir and not args.output_dir:
+                raise ValueError(
+                    "Must specify --save_to_experiment_dir or --output_dir when using --save"
+                )
+
+    experiment_dir = os.path.join(constants.LOCAL_EXPERIMENT_DIR, args.experiment_name)
+
+    if not os.path.exists(experiment_dir):
+        raise ValueError(f"Experiment directory {experiment_dir} does not exist")
+
+    if args.plot and args.save:
+        if args.save_to_experiment_dir:
+            args.output_dir = experiment_dir
+
+    # Initialize machine-readable output structure
+    machine_readable_output = {
+        "experiment_name": args.experiment_name,
+        "experiment_modes": {},
+    }
+
+    experiment_modes: List[str] = []
+    if args.all_experiment_modes:
+        # experiment_modes = os.listdir(experiment_dir)
+        # experiment_modes = [
+        #     mode
+        #     for mode in experiment_modes
+        #     if os.path.exists(os.path.join(experiment_dir, mode, "monitor_output.json"))
+        # ]
+
+        # find all directories in experiment_dir recursively that contain monitor_output.json
+        experiment_modes = [
+            d
+            for d, _, files in os.walk(experiment_dir)
+            if "monitor_output.json" in files and d.endswith("remote_monitor_output")
+        ]
+        # for each directory, experiment mode is the parent directory relative to experiment_dir
+        experiment_modes = [
+            os.path.dirname(os.path.relpath(d, experiment_dir))
+            for d in experiment_modes
+        ]
+        # remove the experiment_dir prefix
+        experiment_modes = [
+            mode for mode in experiment_modes if mode != "." and mode != ""
+        ]
+    else:
+        experiment_modes = [args.experiment_mode]
+
+    if not args.machine_readable:
+        print(f"Experiment modes to analyze: {experiment_modes}")
+
+    experiment_mode_to_overall_resource_usage = {}
+    experiment_mode_to_query_cpu = {}
+
+    for experiment_mode in experiment_modes:
+        if not args.machine_readable:
+            print("-" * 20 + f" Mode: {experiment_mode} " + "-" * 20)
+
+        monitor_info_file = os.path.join(
+            experiment_dir,
+            experiment_mode,
+            "remote_monitor_output",
+            "monitor_output.json",
+        )
+
+        monitor_info = None
+        with open(monitor_info_file, "r") as f:
+            monitor_info = json.load(f)
+
+        resources_across_pids = defaultdict(list)
+
+        pids = list(monitor_info.keys())
+        # verify that all pids have the same length
+        assert all(
+            len(monitor_info[pid]["cpu_percent"])
+            == len(monitor_info[pids[0]]["cpu_percent"])
+            for pid in pids
+        ), "All PIDs must have the same length of CPU percent data"
+        assert all(
+            len(monitor_info[pid]["memory_info"])
+            == len(monitor_info[pids[0]]["memory_info"])
+            for pid in pids
+        ), "All PIDs must have the same length of memory info data"
+
+        # for pid in pids:
+        #     for resource in RESOURCES:
+        #         monitor_info[pid][resource] = monitor_info[pid][resource][610:]
+
+        monitor_info["all"] = {
+            "keyword": "all",
+            "cpu_percent": [
+                0 for _ in range(len(monitor_info[pids[0]]["cpu_percent"]))
+            ],
+            "memory_info": [
+                0 for _ in range(len(monitor_info[pids[0]]["memory_info"]))
+            ],
+        }
+
+        # Add the CPU and memory data to the "all" entry
+        for pid in pids:
+            for i in range(len(monitor_info[pid]["cpu_percent"])):
+                monitor_info["all"]["cpu_percent"][i] += monitor_info[pid][
+                    "cpu_percent"
+                ][i]
+                monitor_info["all"]["memory_info"][i] += monitor_info[pid][
+                    "memory_info"
+                ][i]
+
+        experiment_mode_to_overall_resource_usage[experiment_mode] = monitor_info["all"]
+
+        # Calculate Query CPU for this experiment mode
+        try:
+            query_cpu = calculate_query_cpu(monitor_info, experiment_mode)
+            experiment_mode_to_query_cpu[experiment_mode] = query_cpu
+        except (AssertionError, ValueError) as e:
+            if not args.machine_readable:
+                print(f"Skipping Query CPU calculation for {experiment_mode}: {e}")
+
+        # Initialize mode data for machine-readable output
+        if args.machine_readable:
+            machine_readable_output["experiment_modes"][experiment_mode] = {
+                "processes": {},
+                "overall": {},
+            }
+
+        for pid in monitor_info.keys():
+            keyword = monitor_info[pid]["keyword"]
+            if not args.machine_readable:
+                print(pid, keyword)
+
+            # Collect statistics for machine-readable output
+            if args.machine_readable:
+                process_stats = {}
+                for resource in RESOURCES:
+                    process_stats[resource] = {}
+                    for stat, agg_func in relevant_stats.items():
+                        value = agg_func(monitor_info[pid][resource])
+                        process_stats[resource][stat] = value
+
+                machine_readable_output["experiment_modes"][experiment_mode][
+                    "processes"
+                ][f"{pid}_{keyword}"] = process_stats
+
+            for resource in RESOURCES:
+                resources_across_pids[resource].extend(monitor_info[pid][resource])
+
+                if args.print and not args.machine_readable:
+                    for stat, agg_func in relevant_stats.items():
+                        pretty_print(
+                            f"{experiment_mode} {keyword} {resource} {stat}",
+                            agg_func(monitor_info[pid][resource]),
+                        )
+
+        # if args.print:
+        #     for resource, values in resources_across_pids.items():
+        #         for stat, agg_func in relevant_stats.items():
+        #             pretty_print(f"Overall {resource} {stat}", agg_func(values))
+
+        # Generate plots if requested
+        if args.plot:
+            plot_resource_usage(monitor_info, experiment_mode, args)
+
+            # Plot Query CPU if available
+            if experiment_mode in experiment_mode_to_query_cpu:
+                plot_query_cpu(
+                    experiment_mode_to_query_cpu[experiment_mode], experiment_mode, args
+                )
+
+    # Calculate and output benefit statistics
+    if (
+        constants.BASELINE_EXPERIMENT_NAME in experiment_mode_to_overall_resource_usage
+        and constants.SKETCHDB_EXPERIMENT_NAME
+        in experiment_mode_to_overall_resource_usage
+    ):
+        benefit_stats = {}
+        for resource in RESOURCES:
+            benefit_stats[resource] = {}
+            for stat, agg_func in relevant_stats.items():
+                # divide prometheus agg_func(resource) by sketchdb agg_func(resource)
+                prometheus_value = agg_func(
+                    experiment_mode_to_overall_resource_usage[
+                        constants.BASELINE_EXPERIMENT_NAME
+                    ][resource]
+                )
+                sketchdb_value = agg_func(
+                    experiment_mode_to_overall_resource_usage[
+                        constants.SKETCHDB_EXPERIMENT_NAME
+                    ][resource]
+                )
+                benefit = prometheus_value / sketchdb_value
+                benefit_stats[resource][stat] = benefit
+
+                if args.print and not args.machine_readable:
+                    print(f"Benefit for {stat}({resource}): {benefit}")
+
+        if args.machine_readable:
+            machine_readable_output["benefit"] = benefit_stats
+
+    # Handle Query CPU statistics
+    if experiment_mode_to_query_cpu:
+        if not args.machine_readable and args.print:
+            print("\n" + "=" * 60)
+            print("Query CPU Statistics")
+            print("=" * 60)
+
+        query_cpu_stats = {}
+        for experiment_mode, query_cpu in experiment_mode_to_query_cpu.items():
+            query_cpu_stats[experiment_mode] = {}
+            if not args.machine_readable and args.print:
+                print(f"\n{experiment_mode}:")
+
+            for stat, agg_func in relevant_stats.items():
+                value = agg_func(query_cpu)
+                query_cpu_stats[experiment_mode][stat] = value
+                if not args.machine_readable and args.print:
+                    print(f"  {stat}: {round(value, 2)}%")
+
+        if args.machine_readable:
+            machine_readable_output["query_cpu"] = query_cpu_stats
+
+        # Calculate benefit if both prometheus and sketchdb are present
+        if (
+            constants.BASELINE_EXPERIMENT_NAME in experiment_mode_to_query_cpu
+            and constants.SKETCHDB_EXPERIMENT_NAME in experiment_mode_to_query_cpu
+        ):
+            query_cpu_benefit = {}
+            if not args.machine_readable and args.print:
+                print("\nQuery CPU Benefit (prometheus / sketchdb):")
+
+            for stat, agg_func in relevant_stats.items():
+                prometheus_value = agg_func(
+                    experiment_mode_to_query_cpu[constants.BASELINE_EXPERIMENT_NAME]
+                )
+                sketchdb_value = agg_func(
+                    experiment_mode_to_query_cpu[constants.SKETCHDB_EXPERIMENT_NAME]
+                )
+                benefit = prometheus_value / sketchdb_value
+                query_cpu_benefit[stat] = benefit
+                if not args.machine_readable and args.print:
+                    print(f"  {stat}: {round(benefit, 2)}x")
+
+            if args.machine_readable:
+                machine_readable_output["query_cpu_benefit"] = query_cpu_benefit
+
+    # Output machine-readable results
+    if args.machine_readable:
+        print(json.dumps(machine_readable_output, indent=2))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--experiment_name", type=str, required=True)
+    parser.add_argument("--all_experiment_modes", action="store_true")
+    parser.add_argument("--experiment_mode", type=str, required=False)
+    parser.add_argument("--print", action="store_true", help="Print the results")
+    parser.add_argument("--plot", action="store_true", help="Plot the results")
+    parser.add_argument(
+        "--save", action="store_true", help="Save the results to a file"
+    )
+    parser.add_argument(
+        "--show", action="store_true", help="Show the results on the screen"
+    )
+    parser.add_argument(
+        "--output_dir", type=str, required=False, help="Directory to save the output"
+    )
+    parser.add_argument(
+        "--save_to_experiment_dir",
+        action="store_true",
+        help="Save to experiment directory",
+    )
+    parser.add_argument(
+        "--machine-readable",
+        action="store_true",
+        default=False,
+        help="Output results in machine-readable JSON format",
+    )
+    args = parser.parse_args()
+    main(args)
diff --git a/Utilities/experiments/post_experiment/compare_latencies.py b/Utilities/experiments/post_experiment/compare_latencies.py
new file mode 100644
index 0000000..8de9a2b
--- /dev/null
+++ b/Utilities/experiments/post_experiment/compare_latencies.py
@@ -0,0 +1,264 @@
+import os
+import sys
+import yaml
+import json
+import argparse
+import numpy as np
+
+from typing import List, Optional, Dict, Any
+
+from promql_utilities.query_results.classes import LatencyResultAcrossTime
+
+# TODO: make this more robust
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import constants  # noqa: E402
+
+
+def calculate_latency_stats(latencies: List[float]) -> Dict[str, float]:
+    """Calculate latency statistics for a list of latencies."""
+    if not latencies:
+        return {
+            "median": 0.0,
+            "p95": 0.0,
+            "p99": 0.0,
+            "sum": 0.0,
+            "mean": 0.0,
+        }
+
+    return {
+        "median": float(np.median(latencies)),
+        "p95": float(np.percentile(latencies, 95)),
+        "p99": float(np.percentile(latencies, 99)),
+        "sum": float(np.sum(latencies)),
+        "mean": float(np.mean(latencies)),
+    }
+
+
+def calculate_ratios(
+    exact_stats: Dict[str, float], estimate_stats: Dict[str, float]
+) -> Dict[str, float]:
+    """Calculate ratios of exact to estimate stats."""
+    ratios = {}
+    for metric in ["median", "p95", "p99", "sum", "mean"]:
+        if exact_stats[metric] > 0 and estimate_stats[metric] > 0:
+            ratios[metric] = round(exact_stats[metric] / estimate_stats[metric], 2)
+        elif exact_stats[metric] == 0 and estimate_stats[metric] == 0:
+            ratios[metric] = 1.0
+        elif exact_stats[metric] > 0 and estimate_stats[metric] == 0:
+            ratios[metric] = float("inf")
+        else:
+            ratios[metric] = 0.0
+    return ratios
+
+
+def compare_latencies(
+    exact_results: Dict[int, LatencyResultAcrossTime],
+    estimate_results: Dict[int, LatencyResultAcrossTime],
+    all_queries: List[str],
+) -> Dict[int, Dict[str, Any]]:
+    """Compare latencies between exact and estimate results."""
+
+    results = {}
+
+    all_exact_latencies = []
+    all_estimate_latencies = []
+
+    for query_idx, query in enumerate(all_queries):
+        exact_latencies = [
+            latency
+            for latency in exact_results[query_idx].get_latencies()
+            if latency is not None
+        ]
+        estimate_latencies = [
+            latency
+            for latency in estimate_results[query_idx].get_latencies()
+            if latency is not None
+        ]
+        all_exact_latencies.extend(exact_latencies)
+        all_estimate_latencies.extend(estimate_latencies)
+
+        exact_stats = calculate_latency_stats(exact_latencies)
+        estimate_stats = calculate_latency_stats(estimate_latencies)
+
+        # Calculate ratio (estimate/exact) for each metric
+        ratios = calculate_ratios(exact_stats, estimate_stats)
+
+        results[query_idx] = {
+            "query": query,
+            "exact": exact_stats,
+            "estimate": estimate_stats,
+            "ratios": ratios,
+        }
+
+    results[-1] = {
+        "query": "All",
+        "exact": calculate_latency_stats(all_exact_latencies),
+        "estimate": calculate_latency_stats(all_estimate_latencies),
+    }
+    results[-1]["ratios"] = calculate_ratios(
+        results[-1]["exact"], results[-1]["estimate"]
+    )
+
+    return results
+
+
+def print_comparison_results(
+    comparison_results: Dict[int, Dict[str, Any]], exact_name: str, estimate_name: str
+) -> None:
+    """Print comparison results in a readable format."""
+    print(f"\nLatency Comparison: {exact_name} vs {estimate_name}")
+    print("-" * 100)
+
+    headers = [
+        "Query",
+        "Metric",
+        f"{exact_name}",
+        f"{estimate_name}",
+        "Ratio (exact/estimate)",
+    ]
+    print(
+        f"{headers[0]:<5} {headers[1]:<10} {headers[2]:<15} {headers[3]:<15} {headers[4]:<15}"
+    )
+    print("-" * 100)
+
+    for query_idx, data in sorted(comparison_results.items()):
+        query_display = f"Q{query_idx}"
+
+        for metric in ["median", "p95", "p99", "sum", "mean"]:
+            exact_val = data["exact"][metric]
+            estimate_val = data["estimate"][metric]
+            ratio = data["ratios"][metric]
+
+            print(
+                f"{query_display:<5} {metric:<10} {exact_val:<15.4f} {estimate_val:<15.4f} {ratio:<15.4f}"
+            )
+
+        print("-" * 100)
+
+    # Print aggregate statistics
+    print("\nAggregate Statistics (across all queries):")
+    print("-" * 100)
+
+    avg_ratios = {
+        metric: np.mean(
+            [data["ratios"][metric] for data in comparison_results.values()]
+        )
+        for metric in ["median", "p95", "p99", "sum", "mean"]
+    }
+
+    for metric, avg_ratio in avg_ratios.items():
+        print(f"Average {metric} ratio: {avg_ratio:.4f}")
+
+
+def main(args):
+    experiment_dir = os.path.join(constants.LOCAL_EXPERIMENT_DIR, args.experiment_name)
+
+    exact_results: Optional[Dict[int, LatencyResultAcrossTime]] = None
+    estimate_results: Optional[Dict[int, LatencyResultAcrossTime]] = None
+
+    from results_loader import load_latencies_only, get_server_name_for_mode
+    import logging
+
+    # Suppress debug logging in machine-readable mode
+    if args.machine_readable:
+        logging.basicConfig(level=logging.ERROR)
+    else:
+        logging.basicConfig(level=logging.DEBUG)
+
+    if not args.exact_experiment_server_name:
+        args.exact_experiment_server_name = get_server_name_for_mode(
+            experiment_dir, args.exact_experiment_mode
+        )
+    if not args.estimate_experiment_server_name:
+        args.estimate_experiment_server_name = get_server_name_for_mode(
+            experiment_dir, args.estimate_experiment_mode
+        )
+
+    exact_dir = os.path.join(
+        experiment_dir, args.exact_experiment_mode, "prometheus_client_output"
+    )
+    estimate_dir = os.path.join(
+        experiment_dir, args.estimate_experiment_mode, "prometheus_client_output"
+    )
+
+    try:
+        exact_latencies = load_latencies_only(exact_dir)
+        exact_results = exact_latencies[args.exact_experiment_server_name]
+    except (FileNotFoundError, KeyError) as e:
+        print(f"Error loading exact latencies: {e}")
+        raise
+
+    try:
+        estimate_latencies = load_latencies_only(estimate_dir)
+        estimate_results = estimate_latencies[args.estimate_experiment_server_name]
+    except (FileNotFoundError, KeyError) as e:
+        print(f"Error loading estimate latencies: {e}")
+        raise
+
+    query_group_config = None
+    config_files = os.listdir(os.path.join(experiment_dir, "experiment_config"))
+    if len(config_files) != 1:
+        raise ValueError(
+            f"Expected exactly one config file in {experiment_dir}, but found {len(config_files)}"
+        )
+    with open(
+        os.path.join(experiment_dir, "experiment_config", config_files[0]), "r"
+    ) as f:
+        config = yaml.safe_load(f)
+        query_group_config = config["query_groups"]
+
+    # Flatten queries from all query groups
+    all_queries = []
+    for query_group in query_group_config:
+        all_queries.extend(query_group["queries"])
+
+    # Compare latencies
+    assert exact_results is not None
+    assert estimate_results is not None
+    comparison_results = compare_latencies(exact_results, estimate_results, all_queries)
+
+    # Output results based on machine-readable flag
+    if args.machine_readable:
+        # Convert comparison_results to a serializable format
+        output = {
+            "experiment_name": args.experiment_name,
+            "exact_experiment_mode": args.exact_experiment_mode,
+            "estimate_experiment_mode": args.estimate_experiment_mode,
+            "exact_experiment_server_name": args.exact_experiment_server_name,
+            "estimate_experiment_server_name": args.estimate_experiment_server_name,
+            "results": comparison_results,
+        }
+        print(json.dumps(output, indent=2))
+    else:
+        # Print results for each query
+        if args.print_per_query:
+            print_comparison_results(
+                comparison_results,
+                args.exact_experiment_server_name,
+                args.estimate_experiment_server_name,
+            )
+
+        # Print summary results across queries
+        print("\nSummary Results:")
+        print("-" * 100)
+        for k in comparison_results[-1]:
+            print(f"{k}: {comparison_results[-1][k]}")
+        print("-" * 100)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--experiment_name", type=str, required=True)
+    parser.add_argument("--exact_experiment_mode", type=str, required=True)
+    parser.add_argument("--estimate_experiment_mode", type=str, required=True)
+    parser.add_argument("--exact_experiment_server_name", type=str, required=False)
+    parser.add_argument("--estimate_experiment_server_name", type=str, required=False)
+    parser.add_argument("--print_per_query", action="store_true", default=False)
+    parser.add_argument(
+        "--machine-readable",
+        action="store_true",
+        default=False,
+        help="Output results in machine-readable JSON format",
+    )
+    args = parser.parse_args()
+    main(args)
diff --git a/Utilities/experiments/post_experiment/plot_cardinality_vs_benefit.py b/Utilities/experiments/post_experiment/plot_cardinality_vs_benefit.py
new file mode 100644
index 0000000..98cb1ff
--- /dev/null
+++ b/Utilities/experiments/post_experiment/plot_cardinality_vs_benefit.py
@@ -0,0 +1,659 @@
+#!/usr/bin/env python3
+"""
+Plot latency benefit vs lookback period, with one line per cardinality.
+
+This script analyzes experiments following the naming pattern:
+    <query_type>_<lookback>_1_card_2_<exp>
+
+For example: qot_30m_1_card_2_5
+    - query_type: qot (quantile_over_time)
+    - lookback: 30m
+    - cardinality: 2^5 = 32
+
+The script plots:
+    - X-axis: Lookback period (log scale, in minutes)
+    - Y-axis: Latency benefit ratio (prometheus/sketchdb)
+    - Lines: One per cardinality level (2^0 through 2^9)
+"""
+
+import os
+import sys
+import re
+import glob
+import yaml
+import argparse
+import subprocess
+import numpy as np
+import pandas as pd
+from typing import List, Dict, Any, Optional
+
+# plotnine imports
+from plotnine import (
+    ggplot,
+    aes,
+    geom_line,
+    geom_point,
+    geom_hline,
+    scale_color_discrete,
+    scale_x_continuous,
+    scale_y_continuous,
+    labs,
+    theme_minimal,
+    theme,
+    element_text,
+    ggsave,
+)
+
+# Add parent directories to path for imports
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import constants  # noqa: E402
+from post_experiment.results_loader import (  # noqa: E402
+    load_latencies_only,
+    get_server_name_for_mode,
+)
+from post_experiment.compare_latencies import calculate_latency_stats  # noqa: E402
+
+# Metric mapping for cost benefit (compare_costs.py doesn't have 'mean')
+METRIC_TO_CPU_STAT = {
+    "median": "median",
+    "p95": "p95",
+    "p99": "p99",
+    "sum": "sum",
+    "max": "max",
+}
+
+
+def parse_lookback_to_minutes(lookback_str: str) -> float:
+    """
+    Convert lookback string to minutes.
+
+    Examples:
+        '5m' -> 5.0
+        '1h' -> 60.0
+        '90s' -> 1.5
+        '2h30m' -> 150.0 (if needed in future)
+    """
+    lookback_str = lookback_str.strip().lower()
+
+    # Simple patterns first
+    if lookback_str.endswith("m"):
+        return float(lookback_str[:-1])
+    elif lookback_str.endswith("h"):
+        return float(lookback_str[:-1]) * 60
+    elif lookback_str.endswith("s"):
+        return float(lookback_str[:-1]) / 60
+
+    # Fallback: try to parse as just a number (assume minutes)
+    try:
+        return float(lookback_str)
+    except ValueError:
+        raise ValueError(f"Cannot parse lookback string: {lookback_str}")
+
+
+def parse_experiment_name(exp_name: str) -> Optional[Dict[str, Any]]:
+    """
+    Parse experiment name to extract metadata.
+
+    Expected format: <query_type>_<lookback>_1_card_2_<exp>
+    Example: qot_30m_1_card_2_5
+
+    Returns:
+        dict with keys: query_type, lookback_str, lookback_minutes, card_exp
+        or None if name doesn't match pattern
+    """
+    # Pattern: word_lookback_1_card_2_digit
+    pattern = r"^(?P<query_type>\w+)_(?P<lookback>\d+\w+)_1_card_2_(?P<card_exp>\d+)$"
+    match = re.match(pattern, exp_name)
+
+    if not match:
+        return None
+
+    data = match.groupdict()
+    lookback_str = data["lookback"]
+
+    return {
+        "query_type": data["query_type"],
+        "lookback_str": lookback_str,
+        "lookback_minutes": parse_lookback_to_minutes(lookback_str),
+        "card_exp": int(data["card_exp"]),
+    }
+
+
+def calculate_data_scale_from_config(config: Dict[str, Any]) -> int:
+    """
+    Calculate data scale from config: num_ports_per_server * num_values_per_label.
+    """
+    fake_exporter_config = config["exporters"]["exporter_list"]["fake_exporter"]
+
+    num_ports_per_server = fake_exporter_config["num_ports_per_server"]
+    num_values_per_label = fake_exporter_config["num_values_per_label"]
+
+    return num_ports_per_server * num_values_per_label
+
+
+def load_experiment_config(exp_dir: str) -> Dict[str, Any]:
+    """Load experiment configuration YAML."""
+    config_dir = os.path.join(exp_dir, "experiment_config")
+
+    if not os.path.exists(config_dir):
+        raise FileNotFoundError(f"Config directory not found: {config_dir}")
+
+    config_files = [f for f in os.listdir(config_dir) if f.endswith(".yaml")]
+    if len(config_files) != 1:
+        raise ValueError(f"Expected exactly one config file, found {len(config_files)}")
+
+    config_path = os.path.join(config_dir, config_files[0])
+    with open(config_path, "r") as f:
+        return yaml.safe_load(f)
+
+
+def extract_experiment_data(
+    exp_name: str, metric: str = "p95", verify_scale: bool = True
+) -> Optional[Dict[str, Any]]:
+    """
+    Extract data from a single experiment.
+
+    Args:
+        exp_name: Experiment name
+        metric: Latency metric to use (median, p95, p99, mean)
+        verify_scale: If True, verify data scale matches expected 2^card_exp
+
+    Returns:
+        dict with experiment data or None if extraction fails
+    """
+    # Parse experiment name
+    metadata = parse_experiment_name(exp_name)
+    if metadata is None:
+        print(f"Warning: Skipping {exp_name} (doesn't match naming pattern)")
+        return None
+
+    exp_dir = os.path.join(constants.LOCAL_EXPERIMENT_DIR, exp_name)
+
+    if not os.path.exists(exp_dir):
+        print(f"Warning: Experiment directory not found: {exp_dir}")
+        return None
+
+    try:
+        # Load config
+        config = load_experiment_config(exp_dir)
+
+        # Calculate data scale from config
+        actual_scale = calculate_data_scale_from_config(config)
+        expected_scale = 2 ** metadata["card_exp"]
+
+        if verify_scale and actual_scale != expected_scale:
+            print(
+                f"Warning: {exp_name} has scale {actual_scale} but expected {expected_scale}"
+            )
+
+        # Load latencies for both servers
+        latencies = {}
+        for server_type in ["baseline", "sketchdb"]:
+            server_dir = os.path.join(exp_dir, server_type, "prometheus_client_output")
+
+            if not os.path.exists(server_dir):
+                print(f"Warning: {server_type} directory not found for {exp_name}")
+                return None
+
+            try:
+                actual_server = get_server_name_for_mode(exp_dir, server_type)
+                server_latencies = load_latencies_only(server_dir)
+                if actual_server not in server_latencies:
+                    print(f"Warning: No {server_type} data in results for {exp_name}")
+                    return None
+
+                # Aggregate latencies across all queries
+                all_latencies = []
+                for query_idx, latency_result in server_latencies[
+                    actual_server
+                ].items():
+                    query_latencies = [
+                        lat for lat in latency_result.get_latencies() if lat is not None
+                    ]
+                    all_latencies.extend(query_latencies)
+
+                if not all_latencies:
+                    print(f"Warning: No latency data for {server_type} in {exp_name}")
+                    return None
+
+                stats = calculate_latency_stats(all_latencies)
+                latencies[server_type] = stats
+
+            except Exception as e:
+                print(f"Warning: Failed to load {server_type} data for {exp_name}: {e}")
+                return None
+
+        # Calculate benefit ratio
+        if "baseline" not in latencies or "sketchdb" not in latencies:
+            print(f"Warning: Missing server data for {exp_name}")
+            return None
+
+        baseline_latency = latencies["baseline"][metric]
+        sketchdb_latency = latencies["sketchdb"][metric]
+
+        if sketchdb_latency > 0:
+            benefit_ratio = baseline_latency / sketchdb_latency
+        elif baseline_latency > 0:
+            benefit_ratio = float("inf")
+        else:
+            benefit_ratio = 1.0
+
+        return {
+            "experiment_name": exp_name,
+            "query_type": metadata["query_type"],
+            "lookback_str": metadata["lookback_str"],
+            "lookback_minutes": metadata["lookback_minutes"],
+            "card_exp": metadata["card_exp"],
+            "data_scale": actual_scale,
+            "baseline_latency": baseline_latency,
+            "sketchdb_latency": sketchdb_latency,
+            "benefit_ratio": benefit_ratio,
+            "metric": metric,
+        }
+
+    except Exception as e:
+        print(f"Warning: Failed to process {exp_name}: {e}")
+        return None
+
+
+def extract_cost_benefit_data(
+    exp_name: str, metric: str = "p95", verify_scale: bool = True
+) -> Optional[Dict[str, Any]]:
+    """
+    Extract cost benefit data from a single experiment.
+
+    Runs compare_costs.py and parses Query CPU Benefit output.
+
+    Args:
+        exp_name: Experiment name
+        metric: CPU metric to use (median, p95, p99, sum, max)
+        verify_scale: If True, verify data scale matches expected 2^card_exp
+
+    Returns:
+        dict with experiment data or None if extraction fails
+    """
+    # Parse experiment name
+    metadata = parse_experiment_name(exp_name)
+    if metadata is None:
+        print(f"Warning: Skipping {exp_name} (doesn't match naming pattern)")
+        return None
+
+    exp_dir = os.path.join(constants.LOCAL_EXPERIMENT_DIR, exp_name)
+
+    if not os.path.exists(exp_dir):
+        print(f"Warning: Experiment directory not found: {exp_dir}")
+        return None
+
+    try:
+        # Verify scale if requested
+        actual_scale = None
+        if verify_scale:
+            config = load_experiment_config(exp_dir)
+            actual_scale = calculate_data_scale_from_config(config)
+            expected_scale = 2 ** metadata["card_exp"]
+            if actual_scale != expected_scale:
+                print(
+                    f"Warning: {exp_name} has scale {actual_scale} but expected {expected_scale}"
+                )
+
+        # Run compare_costs.py
+        script_dir = os.path.dirname(os.path.abspath(__file__))
+        compare_costs_path = os.path.join(script_dir, "compare_costs.py")
+
+        result = subprocess.run(
+            [
+                "python3",
+                compare_costs_path,
+                "--experiment_name",
+                exp_name,
+                "--all_experiment_modes",
+                "--print",
+            ],
+            capture_output=True,
+            text=True,
+            check=True,
+            cwd=script_dir,
+        )
+
+        # Parse output for Query CPU Benefit section
+        output = result.stdout + result.stderr
+        cpu_stat = METRIC_TO_CPU_STAT.get(metric, "p95")
+
+        # Look for pattern: "  <stat>: <value>x" in Query CPU Benefit section
+        pattern = rf"Query CPU Benefit.*?^\s+{cpu_stat}:\s+([\d.]+)x"
+        match = re.search(pattern, output, re.MULTILINE | re.DOTALL)
+
+        if not match:
+            print(
+                f"Warning: Could not find Query CPU Benefit '{cpu_stat}' for {exp_name}"
+            )
+            return None
+
+        benefit_ratio = float(match.group(1))
+
+        return {
+            "experiment_name": exp_name,
+            "query_type": metadata["query_type"],
+            "lookback_str": metadata["lookback_str"],
+            "lookback_minutes": metadata["lookback_minutes"],
+            "card_exp": metadata["card_exp"],
+            "data_scale": actual_scale,
+            "benefit_ratio": benefit_ratio,
+            "metric": metric,
+        }
+
+    except subprocess.CalledProcessError as e:
+        print(f"Warning: compare_costs.py failed for {exp_name}: {e}")
+        return None
+    except Exception as e:
+        print(f"Warning: Failed to process {exp_name}: {e}")
+        return None
+
+
+def extract_experiments_from_patterns(
+    patterns: List[str],
+    metric: str = "p95",
+    cardinalities: Optional[List[int]] = None,
+    benefit_type: str = "latency",
+) -> pd.DataFrame:
+    """
+    Extract data from experiments matching glob patterns.
+
+    Args:
+        patterns: List of glob patterns for experiment names
+        metric: Metric to use (latency or CPU stat depending on benefit_type)
+        cardinalities: Optional list of cardinality exponents to include
+        benefit_type: Type of benefit to extract ('latency' or 'cost')
+
+    Returns:
+        DataFrame with experiment data
+    """
+    # Find all matching experiment directories
+    exp_names = set()
+    for pattern in patterns:
+        pattern_path = os.path.join(constants.LOCAL_EXPERIMENT_DIR, pattern)
+        for path in glob.glob(pattern_path):
+            if os.path.isdir(path):
+                exp_names.add(os.path.basename(path))
+
+    if not exp_names:
+        raise ValueError(f"No experiments found matching patterns: {patterns}")
+
+    print(f"Found {len(exp_names)} experiments matching patterns")
+
+    # Extract data from each experiment - dispatch based on benefit type
+    data_list = []
+    for exp_name in sorted(exp_names):
+        if benefit_type == "latency":
+            exp_data = extract_experiment_data(exp_name, metric=metric)
+        elif benefit_type == "cost":
+            exp_data = extract_cost_benefit_data(exp_name, metric=metric)
+        else:
+            raise ValueError(f"Unknown benefit_type: {benefit_type}")
+
+        if exp_data is not None:
+            # Filter by cardinality if specified
+            if cardinalities is None or exp_data["card_exp"] in cardinalities:
+                data_list.append(exp_data)
+
+    if not data_list:
+        raise ValueError("No valid experiment data extracted")
+
+    print(f"Successfully extracted data from {len(data_list)} experiments")
+
+    df = pd.DataFrame(data_list)
+
+    # Add log-scale transformation: log2(T/15) where T is in minutes
+    # This makes lookback periods equally spaced on the plot
+    df["lookback_log2"] = np.log2(df["lookback_minutes"] / 15.0)
+
+    return df
+
+
+def create_plot(
+    df: pd.DataFrame, metric: str = "p95", benefit_type: str = "latency"
+) -> "ggplot":
+    """
+    Create benefit vs lookback plot with log2(T/15) x-axis.
+
+    Args:
+        df: DataFrame with experiment data
+        metric: Metric being plotted (latency or CPU stat)
+        benefit_type: Type of benefit ('latency' or 'cost')
+
+    Returns:
+        plotnine ggplot object
+    """
+    # Create labels for legend
+    card_exps = sorted(df["card_exp"].unique())
+    color_labels = [f"2^{exp}" for exp in card_exps]
+
+    # Get unique lookback values for x-axis breaks and labels
+    lookback_data = df[
+        ["lookback_minutes", "lookback_log2", "lookback_str"]
+    ].drop_duplicates()
+    lookback_data = lookback_data.sort_values("lookback_minutes")
+
+    x_breaks = lookback_data["lookback_log2"].tolist()
+    x_labels = lookback_data["lookback_str"].tolist()
+
+    # Get y-axis range and create breaks
+    y_min = df["benefit_ratio"].min()
+    y_max = df["benefit_ratio"].max()
+
+    # Create y-axis breaks: use multiples of 10, plus explicitly include 1.0
+    y_breaks = [1.0]  # Start with 1.0
+    step = 10
+    current = step
+    while current <= y_max:
+        y_breaks.append(float(current))
+        current += step
+
+    # Add 0 if needed (if y_min < 1)
+    if y_min < 1.0:
+        y_breaks.insert(0, 0.0)
+
+    y_breaks = sorted(list(set(y_breaks)))  # Remove duplicates and sort
+
+    # Dynamic Y-axis label based on benefit type
+    y_label = "Latency Benefit" if benefit_type == "latency" else "Cost Benefit (CPU)"
+
+    p = (
+        ggplot(
+            df,
+            aes(
+                x="lookback_log2",
+                y="benefit_ratio",
+                color="factor(card_exp)",
+                group="factor(card_exp)",
+            ),
+        )
+        + geom_hline(yintercept=1.0, linetype="dashed", color="gray", alpha=0.5)
+        + geom_line(size=1.2)
+        + geom_point(size=3)
+        + scale_x_continuous(name="Data Scale", breaks=x_breaks, labels=x_labels)
+        + scale_y_continuous(breaks=y_breaks)
+        + scale_color_discrete(name="Data Cardinality", labels=color_labels)
+        + labs(
+            # title=f'{benefit_type.capitalize()} Benefit vs Lookback Period ({metric.upper()})',
+            y=y_label
+        )
+        + theme_minimal()
+        + theme(
+            legend_position="right",
+            # plot_title=element_text(size=14, weight='bold'),
+            axis_title_x=element_text(size=12),
+            axis_title_y=element_text(
+                size=12, rotation=0, ha="left", va="center", margin={"r": 20}
+            ),
+            legend_title=element_text(size=11),
+            plot_margin=0.1,
+        )
+    )
+
+    return p
+
+
+def print_summary_table(
+    df: pd.DataFrame, metric: str = "p95", benefit_type: str = "latency"
+):
+    """Print summary table of experiment data."""
+    # Dynamic header based on benefit type
+    if benefit_type == "latency":
+        header = f"Latency Benefit Analysis Summary ({metric.upper()} metric)"
+    else:
+        cpu_stat = METRIC_TO_CPU_STAT.get(metric, metric)
+        header = f"Cost Benefit Analysis Summary (CPU {cpu_stat.upper()})"
+
+    print("\n" + "=" * 100)
+    print(header)
+    print("=" * 100)
+
+    # Group by query type
+    for query_type in df["query_type"].unique():
+        query_df = df[df["query_type"] == query_type]
+        print(f"\nQuery Type: {query_type}")
+        print("-" * 100)
+
+        # Pivot table: rows = cardinality, columns = lookback
+        pivot = query_df.pivot_table(
+            index="card_exp",
+            columns="lookback_str",
+            values="benefit_ratio",
+            aggfunc="mean",
+        )
+
+        # Sort columns by lookback minutes
+        lookback_order = (
+            query_df.groupby("lookback_str")["lookback_minutes"].first().sort_values()
+        )
+        pivot = pivot[lookback_order.index]
+
+        # Format with data scale labels
+        pivot.index = [f"2^{exp} ({2**exp})" for exp in pivot.index]
+
+        print(pivot.to_string(float_format=lambda x: f"{x:.2f}"))
+
+    print("\n" + "=" * 100)
+    print(f"Total experiments: {len(df)}")
+    print(f"Cardinalities: {sorted(df['card_exp'].unique())}")
+    print(f"Lookback periods: {sorted(df['lookback_str'].unique())}")
+    print(f"Query types: {sorted(df['query_type'].unique())}")
+
+    # Summary statistics
+    print("\nBenefit Ratio Statistics:")
+    print(f"  Mean:   {df['benefit_ratio'].mean():.2f}")
+    print(f"  Median: {df['benefit_ratio'].median():.2f}")
+    print(f"  Min:    {df['benefit_ratio'].min():.2f}")
+    print(f"  Max:    {df['benefit_ratio'].max():.2f}")
+    print("=" * 100 + "\n")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Plot latency or cost benefit vs lookback period, one line per cardinality",
+        epilog="""
+Examples:
+  # Print latency benefit summary (default)
+  python plot_cardinality_vs_benefit.py "qot_*_1_card_2_*" --print
+
+  # Plot and save latency benefit
+  python plot_cardinality_vs_benefit.py "qot_*_1_card_2_*" --plot --save latency_benefit.png
+
+  # Plot cost benefit with p99 CPU metric
+  python plot_cardinality_vs_benefit.py "qot_*_1_card_2_*" --benefit-type cost --metric p99 --plot --save cost_benefit_p99.png
+
+  # Plot cost benefit with max CPU metric
+  python plot_cardinality_vs_benefit.py "qot_*_1_card_2_*" --benefit-type cost --metric max --plot --save cost_benefit_max.png
+
+  # Plot specific cardinalities
+  python plot_cardinality_vs_benefit.py "qot_*_1_card_2_*" --plot --save benefit.png --cardinalities 0 2 4 6 8
+
+  # Print cost benefit summary table
+  python plot_cardinality_vs_benefit.py "qot_*_1_card_2_*" --benefit-type cost --metric p95 --print
+        """,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+
+    parser.add_argument(
+        "patterns",
+        nargs="+",
+        help='Glob patterns for experiment names (e.g., "qot_*_1_card_2_*")',
+    )
+    parser.add_argument(
+        "--metric",
+        type=str,
+        default="p95",
+        choices=["median", "p95", "p99", "mean", "sum", "max"],
+        help="Metric to plot (default: p95). Note: 'mean' only available for latency benefit",
+    )
+    parser.add_argument(
+        "--benefit-type",
+        type=str,
+        default="latency",
+        choices=["latency", "cost"],
+        help="Type of benefit to plot: latency or cost (CPU) (default: latency)",
+    )
+    parser.add_argument(
+        "--cardinalities",
+        type=int,
+        nargs="+",
+        help="Filter to specific cardinality exponents (e.g., 0 2 4 6 8)",
+    )
+    parser.add_argument(
+        "--print", action="store_true", dest="print_summary", help="Print summary table"
+    )
+    parser.add_argument("--plot", action="store_true", help="Generate plot")
+    parser.add_argument("--save", type=str, help="Save plot to file (provide filename)")
+    parser.add_argument("--show", action="store_true", help="Display plot")
+
+    args = parser.parse_args()
+
+    # Validate arguments
+    if args.plot and not (args.save or args.show):
+        parser.error("--plot requires either --save or --show (or both)")
+
+    if args.save and not args.plot:
+        parser.error("--save requires --plot")
+
+    if not args.print_summary and not args.plot:
+        parser.error("Must specify at least one of --print or --plot")
+
+    # Validate metric compatibility with benefit type
+    if args.benefit_type == "cost" and args.metric == "mean":
+        parser.error(
+            "'mean' metric is not available for cost benefit. Use median, p95, p99, sum, or max"
+        )
+
+    # Extract experiment data
+    print(
+        f"Extracting {args.benefit_type} benefit data from experiments matching: {args.patterns}"
+    )
+    df = extract_experiments_from_patterns(
+        args.patterns,
+        metric=args.metric,
+        cardinalities=args.cardinalities,
+        benefit_type=args.benefit_type,
+    )
+
+    # Print summary if requested
+    if args.print_summary:
+        print_summary_table(df, metric=args.metric, benefit_type=args.benefit_type)
+
+    # Generate plot if requested
+    if args.plot:
+        print("\nGenerating plot...")
+        plot = create_plot(df, metric=args.metric, benefit_type=args.benefit_type)
+
+        if args.save:
+            ggsave(plot, args.save, dpi=300, width=10, height=6)
+            print(f"Plot saved to: {args.save}")
+
+        if args.show:
+            print(plot)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/Utilities/experiments/post_experiment/plot_comparison_bars.py b/Utilities/experiments/post_experiment/plot_comparison_bars.py
new file mode 100644
index 0000000..f99cf58
--- /dev/null
+++ b/Utilities/experiments/post_experiment/plot_comparison_bars.py
@@ -0,0 +1,219 @@
+#!/usr/bin/env python3
+"""
+Script to plot a bar graph comparing latency or cost reduction across different queries
+against Prometheus and VictoriaMetrics.
+"""
+
+import argparse
+import matplotlib.pyplot as plt
+import numpy as np
+
+# Configuration
+QUERIES = ["Q1", "Q2", "Q3", "Q4", "Q5"]  # X-axis labels
+METRIC_NAME = "Latency Reduction"  # Y-axis label (can be "Cost Reduction")
+Y_LABEL = "Reduction Factor (log scale)"
+FONTSIZE = 16
+
+# Data: Dictionary of {query: {system: value}}
+# Values represent the reduction factor (e.g., 31.02 means 31.02X reduction)
+DATA = {
+    # result_1_quantile_1s_1
+    # result_1_quantile_1s_4_vm
+    "Q1": {"Prometheus": 3805.7, "VictoriaMetrics": 3576.6},
+    # result_1_non_quantile_1s_1
+    "Q2": {"Prometheus": 1341.1, "VictoriaMetrics": 1.0},
+    "Q3": {"Prometheus": 31.02, "VictoriaMetrics": 1.0},
+    "Q4": {"Prometheus": 1.0, "VictoriaMetrics": 1.0},
+    # result_1_collapsable_3
+    # result_1_collapsable_3_vm_2
+    "Q5": {"Prometheus": 28.3, "VictoriaMetrics": 23.2},
+}
+
+
+def print_data_summary(queries, data, metric_name):
+    """Print summary of the data."""
+    print("\nData Summary:")
+    print("=" * 80)
+    print(f"Metric: {metric_name}")
+    print(f"Queries: {', '.join(queries)}")
+    print("\nReduction Factors:")
+    print(f"{'Query':<10} {'Prometheus':<20} {'VictoriaMetrics':<20}")
+    print("-" * 50)
+
+    for query in queries:
+        prom_val = data[query]["Prometheus"]
+        vm_val = data[query]["VictoriaMetrics"]
+        print(f"{query:<10} {prom_val:<20.2f} {vm_val:<20.2f}")
+
+    print("\nStatistics:")
+    prom_values = [data[q]["Prometheus"] for q in queries]
+    vm_values = [data[q]["VictoriaMetrics"] for q in queries]
+
+    print(
+        f"  Prometheus - Mean: {np.mean(prom_values):.2f}, Max: {np.max(prom_values):.2f}, Min: {np.min(prom_values):.2f}"
+    )
+    print(
+        f"  VictoriaMetrics - Mean: {np.mean(vm_values):.2f}, Max: {np.max(vm_values):.2f}, Min: {np.min(vm_values):.2f}"
+    )
+    print("=" * 80)
+
+
+def plot_comparison_bars(
+    queries=QUERIES,
+    data=DATA,
+    metric_name=METRIC_NAME,
+    y_label=Y_LABEL,
+    save_file=None,
+    show=False,
+):
+    """
+    Plot a bar graph comparing reduction factors across queries and systems.
+
+    Args:
+        queries: List of query names for x-axis
+        data: Dictionary of {query: {system: value}}
+        metric_name: Name of the metric being plotted (for title)
+        y_label: Label for y-axis
+        save_file: Filename to save the plot (if None, doesn't save)
+        show: Whether to display the plot
+
+    Returns:
+        matplotlib figure object
+    """
+    # Extract data for plotting
+    prom_values = [data[q]["Prometheus"] for q in queries]
+    vm_values = [data[q]["VictoriaMetrics"] for q in queries]
+
+    # Set up the bar positions
+    x = np.arange(len(queries))
+    width = 0.35  # Width of bars
+
+    # Create the plot
+    fig, ax = plt.subplots(figsize=(10, 6))
+
+    # Create bars
+    bars1 = ax.bar(
+        x - width / 2,
+        prom_values,
+        width,
+        label="Compared to Prometheus",
+        color="#1f77b4",
+        alpha=0.8,
+    )
+    bars2 = ax.bar(
+        x + width / 2,
+        vm_values,
+        width,
+        label="Compared to VictoriaMetrics",
+        color="#ff7f0e",
+        alpha=0.8,
+    )
+
+    # Set y-axis to log scale
+    ax.set_yscale("log")
+
+    # Customize the plot
+    ax.set_xlabel("Query", fontsize=FONTSIZE, fontweight="bold")
+    ax.set_ylabel(y_label, fontsize=FONTSIZE, fontweight="bold")
+    ax.set_title(f"{metric_name} Comparison", fontsize=FONTSIZE, fontweight="bold")
+    ax.set_xticks(x)
+    ax.set_xticklabels(queries, fontsize=FONTSIZE)
+    ax.legend(loc="upper right", fontsize=FONTSIZE)
+    ax.grid(True, alpha=0.3, axis="y")
+
+    # Set tick label font sizes
+    ax.tick_params(axis="both", which="major", labelsize=FONTSIZE)
+
+    # Add value labels on top of bars (optional, can be removed if cluttered)
+    def add_value_labels(bars):
+        for bar in bars:
+            height = bar.get_height()
+            ax.text(
+                bar.get_x() + bar.get_width() / 2.0,
+                height,
+                f"{height:.2f}X" if height != 1.0 else "1X",
+                ha="center",
+                va="bottom",
+                fontsize=FONTSIZE,
+                fontweight="bold",
+            )
+
+    add_value_labels(bars1)
+    add_value_labels(bars2)
+
+    # Adjust layout
+    plt.tight_layout()
+
+    # Save if requested
+    if save_file:
+        plt.savefig(save_file, dpi=300, bbox_inches="tight")
+        print(f"Plot saved as '{save_file}'")
+
+    # Show if requested
+    if show:
+        plt.show()
+    else:
+        plt.close(fig)
+
+    return fig
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Plot bar graph comparing latency or cost reduction across queries",
+        epilog="""
+Examples:
+  # Print data summary only
+  python3 plot_comparison_bars.py --print
+
+  # Plot and save to file
+  python3 plot_comparison_bars.py --plot --save comparison_bars.png
+
+  # Plot and show interactively
+  python3 plot_comparison_bars.py --plot --show
+
+  # Both print and plot
+  python3 plot_comparison_bars.py --print --plot --save output.png --show
+        """,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+
+    parser.add_argument("--print", action="store_true", help="Print data summary")
+    parser.add_argument("--plot", action="store_true", help="Generate plot")
+    parser.add_argument(
+        "--save",
+        type=str,
+        metavar="FILENAME",
+        help="Save plot to file (provide filename)",
+    )
+    parser.add_argument("--show", action="store_true", help="Display plot")
+
+    args = parser.parse_args()
+
+    # Validate arguments
+    if args.plot and not (args.save or args.show):
+        parser.error("--plot requires either --save or --show (or both)")
+
+    if not args.print and not args.plot:
+        parser.error("At least one of --print or --plot must be specified")
+
+    # Print summary if requested
+    if args.print:
+        print_data_summary(QUERIES, DATA, METRIC_NAME)
+
+    # Generate plot if requested
+    if args.plot:
+        plot_comparison_bars(
+            queries=QUERIES,
+            data=DATA,
+            metric_name=METRIC_NAME,
+            y_label=Y_LABEL,
+            save_file=args.save,
+            show=args.show,
+        )
+
+    return 0
+
+
+if __name__ == "__main__":
+    exit(main())
diff --git a/Utilities/experiments/post_experiment/plot_latency_distribution.py b/Utilities/experiments/post_experiment/plot_latency_distribution.py
new file mode 100644
index 0000000..d2d7578
--- /dev/null
+++ b/Utilities/experiments/post_experiment/plot_latency_distribution.py
@@ -0,0 +1,314 @@
+import os
+import sys
+import yaml
+import json
+import argparse
+import numpy as np
+import matplotlib.pyplot as plt
+
+from typing import List, Optional, Dict
+
+from promql_utilities.query_results.classes import LatencyResultAcrossTime
+
+# TODO: make this more robust
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import constants  # noqa: E402
+
+
+def calculate_percentiles(latencies: List[float]) -> Dict[int, float]:
+    """Calculate percentiles from p0 to p100 in steps of 5."""
+    if not latencies:
+        return {p: 0.0 for p in range(0, 101, 5)}
+
+    percentiles = {}
+    for p in range(0, 101, 5):
+        percentiles[p] = float(np.percentile(latencies, p))
+
+    return percentiles
+
+
+def collect_all_latencies(
+    results: Dict[int, LatencyResultAcrossTime],
+    all_queries: List[str],
+) -> List[float]:
+    """Collect all latencies across all queries."""
+    all_latencies = []
+
+    for query_idx in range(len(all_queries)):
+        latencies = [
+            latency
+            for latency in results[query_idx].get_latencies()
+            if latency is not None
+        ]
+        all_latencies.extend(latencies)
+
+    return all_latencies
+
+
+def plot_latency_distribution(
+    exact_percentiles: Dict[int, float],
+    estimate_percentiles: Dict[int, float],
+    exact_name: str,
+    estimate_name: str,
+    output_path: Optional[str] = None,
+    show: bool = False,
+) -> None:
+    """Plot latency distribution across percentiles."""
+    percentile_values = sorted(exact_percentiles.keys())
+    exact_latencies = [exact_percentiles[p] for p in percentile_values]
+    estimate_latencies = [estimate_percentiles[p] for p in percentile_values]
+
+    plt.figure(figsize=(12, 6))
+    plt.plot(
+        percentile_values, exact_latencies, marker="o", label=exact_name, linewidth=2
+    )
+    plt.plot(
+        percentile_values,
+        estimate_latencies,
+        marker="s",
+        label=estimate_name,
+        linewidth=2,
+    )
+
+    plt.xlabel("Percentile", fontsize=12)
+    plt.ylabel("Latency (seconds)", fontsize=12)
+    plt.title("Query Latency Distribution", fontsize=14)
+    plt.legend(fontsize=10)
+    plt.grid(True, alpha=0.3)
+
+    # Set x-axis ticks to show all percentiles
+    plt.xticks(
+        percentile_values, [f"p{p}" for p in percentile_values], rotation=45, ha="right"
+    )
+
+    plt.tight_layout()
+
+    if output_path:
+        plt.savefig(output_path, dpi=300, bbox_inches="tight")
+        print(f"Plot saved to {output_path}")
+
+    if show:
+        plt.show()
+    else:
+        plt.close()
+
+
+def print_percentile_data(
+    exact_percentiles: Dict[int, float],
+    estimate_percentiles: Dict[int, float],
+    exact_name: str,
+    estimate_name: str,
+) -> None:
+    """Print percentile data in a readable format."""
+    print(f"\nLatency Distribution: {exact_name} vs {estimate_name}")
+    print("-" * 80)
+    print(
+        f"{'Percentile':<15} {exact_name:<20} {estimate_name:<20} {'Ratio (E/S)':<15}"
+    )
+    print("-" * 80)
+
+    for p in sorted(exact_percentiles.keys()):
+        exact_val = exact_percentiles[p]
+        estimate_val = estimate_percentiles[p]
+
+        if exact_val > 0 and estimate_val > 0:
+            ratio = exact_val / estimate_val
+        elif exact_val == 0 and estimate_val == 0:
+            ratio = 1.0
+        elif exact_val > 0 and estimate_val == 0:
+            ratio = float("inf")
+        else:
+            ratio = 0.0
+
+        print(f"p{p:<14} {exact_val:<20.6f} {estimate_val:<20.6f} {ratio:<15.4f}")
+
+    print("-" * 80)
+
+
+def main(args):
+    experiment_dir = os.path.join(constants.LOCAL_EXPERIMENT_DIR, args.experiment_name)
+
+    from results_loader import load_latencies_only, get_server_name_for_mode
+
+    if not args.exact_experiment_server_name:
+        args.exact_experiment_server_name = get_server_name_for_mode(
+            experiment_dir, args.exact_experiment_mode
+        )
+    if not args.estimate_experiment_server_name:
+        args.estimate_experiment_server_name = get_server_name_for_mode(
+            experiment_dir, args.estimate_experiment_mode
+        )
+    import logging
+
+    # Suppress debug logging if not printing
+    if not args.print:
+        logging.basicConfig(level=logging.ERROR)
+    else:
+        logging.basicConfig(level=logging.DEBUG)
+
+    exact_dir = os.path.join(
+        experiment_dir, args.exact_experiment_mode, "prometheus_client_output"
+    )
+    estimate_dir = os.path.join(
+        experiment_dir, args.estimate_experiment_mode, "prometheus_client_output"
+    )
+
+    try:
+        exact_latencies = load_latencies_only(exact_dir)
+        exact_results = exact_latencies[args.exact_experiment_server_name]
+    except (FileNotFoundError, KeyError) as e:
+        print(f"Error loading exact latencies: {e}")
+        raise
+
+    try:
+        estimate_latencies = load_latencies_only(estimate_dir)
+        estimate_results = estimate_latencies[args.estimate_experiment_server_name]
+    except (FileNotFoundError, KeyError) as e:
+        print(f"Error loading estimate latencies: {e}")
+        raise
+
+    # Load query configuration
+    config_files = os.listdir(os.path.join(experiment_dir, "experiment_config"))
+    if len(config_files) != 1:
+        raise ValueError(
+            f"Expected exactly one config file in {experiment_dir}, but found {len(config_files)}"
+        )
+
+    with open(
+        os.path.join(experiment_dir, "experiment_config", config_files[0]), "r"
+    ) as f:
+        config = yaml.safe_load(f)
+        query_group_config = config["query_groups"]
+
+    if len(query_group_config) != 1:
+        raise ValueError(
+            f"Expected exactly one query group in {experiment_dir}, but found {len(query_group_config)}"
+        )
+
+    query_group = query_group_config[0]
+    all_queries = query_group["queries"]
+
+    # Collect all latencies
+    all_exact_latencies = collect_all_latencies(exact_results, all_queries)
+    all_estimate_latencies = collect_all_latencies(estimate_results, all_queries)
+
+    # Calculate percentiles
+    exact_percentiles = calculate_percentiles(all_exact_latencies)
+    estimate_percentiles = calculate_percentiles(all_estimate_latencies)
+
+    # Print percentile data if requested
+    if args.print:
+        print_percentile_data(
+            exact_percentiles,
+            estimate_percentiles,
+            args.exact_experiment_server_name,
+            args.estimate_experiment_server_name,
+        )
+
+    # Output machine-readable JSON if requested
+    if args.machine_readable:
+        output = {
+            "experiment_name": args.experiment_name,
+            "exact_experiment_mode": args.exact_experiment_mode,
+            "estimate_experiment_mode": args.estimate_experiment_mode,
+            "exact_experiment_server_name": args.exact_experiment_server_name,
+            "estimate_experiment_server_name": args.estimate_experiment_server_name,
+            "exact_percentiles": exact_percentiles,
+            "estimate_percentiles": estimate_percentiles,
+        }
+        print(json.dumps(output, indent=2))
+
+    # Generate plot if requested
+    if args.plot or args.show or args.save:
+        output_path = None
+        if args.save:
+            if args.output:
+                output_path = args.output
+            else:
+                output_path = os.path.join(
+                    experiment_dir,
+                    f"latency_distribution_{args.exact_experiment_mode}_vs_{args.estimate_experiment_mode}.png",
+                )
+
+        plot_latency_distribution(
+            exact_percentiles,
+            estimate_percentiles,
+            args.exact_experiment_server_name,
+            args.estimate_experiment_server_name,
+            output_path=output_path,
+            show=args.show,
+        )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Plot query latency distribution across percentiles"
+    )
+    parser.add_argument(
+        "--experiment_name", type=str, required=True, help="Name of the experiment"
+    )
+    parser.add_argument(
+        "--exact_experiment_mode",
+        type=str,
+        required=True,
+        help="Exact experiment mode name",
+    )
+    parser.add_argument(
+        "--estimate_experiment_mode",
+        type=str,
+        required=True,
+        help="Estimate experiment mode name",
+    )
+    parser.add_argument(
+        "--exact_experiment_server_name",
+        type=str,
+        required=False,
+        help="Server name for exact experiment (defaults to experiment mode)",
+    )
+    parser.add_argument(
+        "--estimate_experiment_server_name",
+        type=str,
+        required=False,
+        help="Server name for estimate experiment (defaults to experiment mode)",
+    )
+    parser.add_argument(
+        "--print",
+        action="store_true",
+        default=False,
+        help="Print percentile data to console",
+    )
+    parser.add_argument(
+        "--plot",
+        action="store_true",
+        default=False,
+        help="Generate plot (use with --show and/or --save)",
+    )
+    parser.add_argument(
+        "--show",
+        action="store_true",
+        default=False,
+        help="Display the plot interactively",
+    )
+    parser.add_argument(
+        "--save", action="store_true", default=False, help="Save the plot to a file"
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        required=False,
+        help="Output file path (defaults to experiment_dir/latency_distribution_<modes>.png)",
+    )
+    parser.add_argument(
+        "--machine-readable",
+        action="store_true",
+        default=False,
+        help="Output results in machine-readable JSON format",
+    )
+
+    args = parser.parse_args()
+
+    # If no action is specified, default to printing
+    if not (args.print or args.plot or args.show or args.save or args.machine_readable):
+        args.print = True
+
+    main(args)
diff --git a/Utilities/experiments/post_experiment/plot_latency_metrics.py b/Utilities/experiments/post_experiment/plot_latency_metrics.py
new file mode 100755
index 0000000..74090b2
--- /dev/null
+++ b/Utilities/experiments/post_experiment/plot_latency_metrics.py
@@ -0,0 +1,520 @@
+#!/usr/bin/env python3
+"""
+Modular plotting script for query latency metrics against data scale.
+
+This script plots various latency metrics (median, p95, p99, mean, sum) against
+data scale for multiple experiments, comparing different servers (prometheus vs sketchdb).
+"""
+
+import os
+import sys
+import yaml
+import argparse
+import pandas as pd
+from typing import List, Dict, Any
+from abc import ABC, abstractmethod
+
+# plotnine imports
+from plotnine import (
+    ggplot,
+    aes,
+    geom_line,
+    geom_point,
+    scale_color_discrete,
+    scale_linetype_discrete,
+    labs,
+    theme_minimal,
+    theme,
+    facet_wrap,
+    element_text,
+    ggsave,
+)
+
+# Add parent directories to path for imports
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import constants  # noqa: E402
+from post_experiment.results_loader import (  # noqa: E402
+    load_latencies_only,
+    get_server_name_for_mode,
+)
+from post_experiment.compare_latencies import calculate_latency_stats  # noqa: E402
+
+
+class DataExtractor:
+    """Extracts experiment data and calculates data scale."""
+
+    def __init__(self, experiment_names: List[str]):
+        self.experiment_names = experiment_names
+        self.base_dir = constants.LOCAL_EXPERIMENT_DIR
+
+    def extract_experiment_data(self) -> List[Dict[str, Any]]:
+        """Extract data from all experiments."""
+        experiment_data = []
+
+        for exp_name in self.experiment_names:
+            try:
+                data = self._extract_single_experiment(exp_name)
+                experiment_data.append(data)
+            except Exception as e:
+                print(f"Warning: Failed to extract data from {exp_name}: {e}")
+
+        return experiment_data
+
+    def _extract_single_experiment(self, exp_name: str) -> Dict[str, Any]:
+        """Extract data from a single experiment."""
+        exp_dir = os.path.join(self.base_dir, exp_name)
+
+        if not os.path.exists(exp_dir):
+            raise FileNotFoundError(f"Experiment directory not found: {exp_dir}")
+
+        # Load experiment config
+        config = self._load_experiment_config(exp_dir)
+
+        # Calculate data scale
+        data_scale = self._calculate_data_scale(config)
+
+        # Load latency data for both servers
+        latency_data = {}
+        for server_type in ["baseline", "sketchdb"]:
+            try:
+                server_dir = os.path.join(
+                    exp_dir, server_type, "prometheus_client_output"
+                )
+                if os.path.exists(server_dir):
+                    actual_server = get_server_name_for_mode(exp_dir, server_type)
+                    server_latencies = load_latencies_only(server_dir)
+                    if (
+                        actual_server in server_latencies
+                        and actual_server != server_type
+                    ):
+                        server_latencies = {
+                            server_type: server_latencies[actual_server]
+                        }
+                    latency_data[server_type] = server_latencies
+            except Exception as e:
+                print(f"Warning: Failed to load {server_type} data for {exp_name}: {e}")
+
+        return {
+            "experiment_name": exp_name,
+            "data_scale": data_scale,
+            "config": config,
+            "latency_data": latency_data,
+        }
+
+    def _load_experiment_config(self, exp_dir: str) -> Dict[str, Any]:
+        """Load experiment configuration."""
+        config_dir = os.path.join(exp_dir, "experiment_config")
+
+        if not os.path.exists(config_dir):
+            raise FileNotFoundError(f"Config directory not found: {config_dir}")
+
+        config_files = [f for f in os.listdir(config_dir) if f.endswith(".yaml")]
+        if len(config_files) != 1:
+            raise ValueError(
+                f"Expected exactly one config file, found {len(config_files)}"
+            )
+
+        config_path = os.path.join(config_dir, config_files[0])
+        with open(config_path, "r") as f:
+            return yaml.safe_load(f)
+
+    def _calculate_data_scale(self, config: Dict[str, Any]) -> float:
+        """
+        Calculate data scale as: num_ports_per_server * instances * (cardinality^num_labels)
+        """
+        # Extract parameters from config
+        fake_exporter_config = config["exporters"]["exporter_list"]["fake_exporter"]
+
+        num_ports_per_server = fake_exporter_config["num_ports_per_server"]
+        num_values_per_label = fake_exporter_config[
+            "num_values_per_label"
+        ]  # cardinality
+        num_labels = fake_exporter_config["num_labels"]
+
+        # For now, assume 1 instance (could be extracted from server count if needed)
+        num_instances = 1
+
+        data_scale = (
+            num_ports_per_server * num_instances * (num_values_per_label**num_labels)
+        )
+
+        return float(data_scale)
+
+
+class DataProcessor:
+    """Processes latency data for plotting."""
+
+    def process_for_plotting(
+        self,
+        experiment_data: List[Dict[str, Any]],
+        individual_queries: bool = False,
+        show_benefit: bool = False,
+    ) -> pd.DataFrame:
+        """Process experiment data into format suitable for plotting."""
+        plot_data = []
+
+        for exp_data in experiment_data:
+            exp_name = exp_data["experiment_name"]
+            data_scale = exp_data["data_scale"]
+            latency_data = exp_data["latency_data"]
+            config = exp_data["config"]
+
+            # Get query information - flatten queries from all query groups
+            query_groups = config["query_groups"]
+            queries = []
+            for query_group in query_groups:
+                queries.extend(query_group["queries"])
+
+            # Process data for each server
+            for server_name, server_data in latency_data.items():
+                if server_name not in server_data:
+                    print(
+                        f"Warning: No data found for server {server_name} in {exp_name}"
+                    )
+                    continue
+
+                server_latencies = server_data[server_name]
+
+                if individual_queries:
+                    # Process individual queries
+                    for query_idx, query in enumerate(queries):
+                        if query_idx not in server_latencies:
+                            continue
+
+                        latencies = [
+                            latency
+                            for latency in server_latencies[query_idx].get_latencies()
+                            if latency is not None
+                        ]
+                        stats = calculate_latency_stats(latencies)
+
+                        for metric, value in stats.items():
+                            plot_data.append(
+                                {
+                                    "experiment_name": exp_name,
+                                    "data_scale": data_scale,
+                                    "server": server_name,
+                                    "query_idx": query_idx,
+                                    "query": query,
+                                    "metric": metric,
+                                    "latency": value,
+                                }
+                            )
+                else:
+                    # Process aggregated data across all queries
+                    all_latencies = []
+                    for query_idx in server_latencies:
+                        latencies = [
+                            latency
+                            for latency in server_latencies[query_idx].get_latencies()
+                            if latency is not None
+                        ]
+                        all_latencies.extend(latencies)
+
+                    stats = calculate_latency_stats(all_latencies)
+
+                    for metric, value in stats.items():
+                        plot_data.append(
+                            {
+                                "experiment_name": exp_name,
+                                "data_scale": data_scale,
+                                "server": server_name,
+                                "query_idx": -1,  # Indicates aggregated
+                                "query": "All Queries",
+                                "metric": metric,
+                                "latency": value,
+                            }
+                        )
+
+        df = pd.DataFrame(plot_data)
+
+        # Calculate benefit (prometheus/sketchdb ratio) if requested
+        if show_benefit and not df.empty:
+            df = self._calculate_benefits(df)
+
+        return df
+
+    def _calculate_benefits(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Calculate benefit ratios (prometheus latency / sketchdb latency)."""
+        benefit_data = []
+
+        # Group by experiment, query, and metric
+        for (exp_name, query_idx, metric), group in df.groupby(
+            ["experiment_name", "query_idx", "metric"]
+        ):
+            servers_data = {}
+            data_scale = None
+            query_name = None
+
+            for _, row in group.iterrows():
+                servers_data[row["server"]] = row["latency"]
+                data_scale = row["data_scale"]
+                query_name = row["query"]
+
+            # Calculate benefit ratio if both servers have data
+            if "baseline" in servers_data and "sketchdb" in servers_data:
+                baseline_latency = servers_data["baseline"]
+                sketchdb_latency = servers_data["sketchdb"]
+
+                if sketchdb_latency > 0:
+                    benefit_ratio = baseline_latency / sketchdb_latency
+                else:
+                    benefit_ratio = float("inf") if baseline_latency > 0 else 1.0
+
+                benefit_data.append(
+                    {
+                        "experiment_name": exp_name,
+                        "data_scale": data_scale,
+                        "server": "benefit_ratio",  # New "server" for benefit
+                        "query_idx": query_idx,
+                        "query": query_name,
+                        "metric": metric,
+                        "latency": benefit_ratio,
+                    }
+                )
+
+        return pd.DataFrame(benefit_data)
+
+
+class BasePlotter(ABC):
+    """Base class for plotting implementations."""
+
+    @abstractmethod
+    def create_plot(self, data: pd.DataFrame, **kwargs) -> "ggplot":
+        """Create a plot from the data."""
+        pass
+
+    def save_plot(self, plot: "ggplot", filename: str, **kwargs):
+        """Save plot to file."""
+        ggsave(plot, filename, **kwargs)
+
+    def show_plot(self, plot: "ggplot"):
+        """Display plot."""
+        print(plot)
+
+
+class LatencyVsScalePlotter(BasePlotter):
+    """Plotter for latency vs data scale."""
+
+    def create_plot(self, data: pd.DataFrame, **kwargs) -> "ggplot":
+        """Create latency vs scale plot."""
+        # Determine if we're plotting benefit ratios
+        is_benefit_plot = "benefit_ratio" in data["server"].unique()
+
+        if is_benefit_plot:
+            # Benefit ratio plot
+            p = (
+                ggplot(data, aes(x="data_scale", y="latency", color="metric"))
+                + geom_line(size=1.2)
+                + geom_point(size=2)
+                + scale_color_discrete(name="Latency Metric")
+                + labs(
+                    title="Latency Benefit (Prometheus/SketchDB Ratio) vs Data Scale",
+                    x="Data Scale (ports × instances × cardinality^labels)",
+                    y="Latency Benefit Ratio (prometheus/sketchdb)",
+                )
+                + theme_minimal()
+                + theme(
+                    legend_position="right",
+                    plot_title=element_text(size=14, weight="bold"),
+                    axis_title=element_text(size=12),
+                    legend_title=element_text(size=11),
+                )
+            )
+        else:
+            # Regular latency plot
+            p = (
+                ggplot(
+                    data,
+                    aes(x="data_scale", y="latency", color="metric", linetype="server"),
+                )
+                + geom_line(size=1.2)
+                + geom_point(size=2)
+                + scale_color_discrete(name="Latency Metric")
+                + scale_linetype_discrete(name="Server")
+                + labs(
+                    title="Query Latency vs Data Scale",
+                    x="Data Scale (ports × instances × cardinality^labels)",
+                    y="Latency (seconds)",
+                )
+                + theme_minimal()
+                + theme(
+                    legend_position="right",
+                    plot_title=element_text(size=14, weight="bold"),
+                    axis_title=element_text(size=12),
+                    legend_title=element_text(size=11),
+                )
+            )
+
+        # Add faceting for individual queries if requested
+        if "individual_queries" in kwargs and kwargs["individual_queries"]:
+            if len(data["query_idx"].unique()) > 1:
+                p = p + facet_wrap("query", scales="free")
+
+        return p
+
+
+def print_data_summary(data: pd.DataFrame, experiment_data: List[Dict[str, Any]]):
+    """Print summary of the extracted data."""
+    print("\nData Summary:")
+    print("=" * 80)
+
+    # Print experiment info
+    print(f"Experiments analyzed: {len(experiment_data)}")
+    for exp_data in experiment_data:
+        exp_name = exp_data["experiment_name"]
+        data_scale = exp_data["data_scale"]
+        servers = list(exp_data["latency_data"].keys())
+        print(f"  {exp_name}: scale={data_scale:,.0f}, servers={servers}")
+
+    print(f"\nTotal data points: {len(data)}")
+    print(f"Servers: {data['server'].unique().tolist()}")
+    print(f"Metrics: {data['metric'].unique().tolist()}")
+
+    if len(data["query_idx"].unique()) > 1:
+        print(f"Queries: {len(data['query_idx'].unique())} individual queries")
+    else:
+        print("Data: Aggregated across all queries")
+
+    # Check if this is benefit data or raw latency data
+    is_benefit_data = "benefit_ratio" in data["server"].unique()
+
+    if is_benefit_data:
+        print("\nBenefit Summary (prometheus/sketchdb ratio):")
+        summary = data.groupby(["metric"])["latency"].agg(["mean", "std"]).round(2)
+    else:
+        print("\nLatency Summary (seconds):")
+        summary = (
+            data.groupby(["server", "metric"])["latency"].agg(["mean", "std"]).round(4)
+        )
+
+    print(summary)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Plot query latency metrics against data scale",
+        epilog="""
+Examples:
+  # Print raw latency summary
+  python3 plot_latency_metrics.py exp1 exp2 --print --aggregated
+
+  # Plot and save benefit ratios
+  python3 plot_latency_metrics.py exp1 exp2 exp3 --plot --save benefit.png --aggregated --benefit
+
+  # Individual queries with raw latencies
+  python3 plot_latency_metrics.py exp1 exp2 --plot --show --individual-queries
+        """,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument(
+        "experiments", nargs="+", help="List of experiment names to analyze"
+    )
+    parser.add_argument("--print", action="store_true", help="Print data summary")
+    parser.add_argument("--plot", action="store_true", help="Generate plots")
+    parser.add_argument("--save", type=str, help="Save plot to file (provide filename)")
+    parser.add_argument("--show", action="store_true", help="Display plot")
+    parser.add_argument(
+        "--individual-queries",
+        action="store_true",
+        help="Plot individual query latencies",
+    )
+    parser.add_argument(
+        "--aggregated",
+        action="store_true",
+        help="Plot aggregated latencies across all queries",
+    )
+    parser.add_argument(
+        "--benefit",
+        action="store_true",
+        help="Show benefit ratios (prometheus/sketchdb) instead of raw latencies",
+    )
+
+    args = parser.parse_args()
+
+    # Validate arguments
+    if args.plot and not (args.save or args.show):
+        parser.error("--plot requires either --save or --show (or both)")
+
+    if not (args.individual_queries or args.aggregated):
+        # Default to aggregated if neither specified
+        args.aggregated = True
+
+    # Extract data
+    print(f"Extracting data from {len(args.experiments)} experiments...")
+    extractor = DataExtractor(args.experiments)
+    experiment_data = extractor.extract_experiment_data()
+
+    if not experiment_data:
+        print("Error: No valid experiment data found")
+        return 1
+
+    # Process data
+    processor = DataProcessor()
+
+    # Process and optionally plot for different configurations
+    if args.individual_queries:
+        print("\nProcessing individual query data...")
+        data_individual = processor.process_for_plotting(
+            experiment_data, individual_queries=True, show_benefit=args.benefit
+        )
+
+        if args.print:
+            data_type = (
+                "Individual Query Benefit Data"
+                if args.benefit
+                else "Individual Query Data"
+            )
+            print(f"\n--- {data_type} ---")
+            print_data_summary(data_individual, experiment_data)
+
+        if args.plot and not data_individual.empty:
+            plotter = LatencyVsScalePlotter()
+            plot = plotter.create_plot(
+                data_individual, individual_queries=True, show_benefit=args.benefit
+            )
+
+            if args.save:
+                prefix = "benefit_individual" if args.benefit else "individual"
+                filename = args.save if not args.aggregated else f"{prefix}_{args.save}"
+                plotter.save_plot(plot, filename)
+                print(f"Individual queries plot saved to: {filename}")
+
+            if args.show:
+                plotter.show_plot(plot)
+
+    if args.aggregated:
+        print("\nProcessing aggregated data...")
+        data_aggregated = processor.process_for_plotting(
+            experiment_data, individual_queries=False, show_benefit=args.benefit
+        )
+
+        if args.print:
+            data_type = "Aggregated Benefit Data" if args.benefit else "Aggregated Data"
+            print(f"\n--- {data_type} ---")
+            print_data_summary(data_aggregated, experiment_data)
+
+        if args.plot and not data_aggregated.empty:
+            plotter = LatencyVsScalePlotter()
+            plot = plotter.create_plot(
+                data_aggregated, individual_queries=False, show_benefit=args.benefit
+            )
+
+            if args.save:
+                prefix = "benefit_aggregated" if args.benefit else "aggregated"
+                filename = (
+                    args.save
+                    if not args.individual_queries
+                    else f"{prefix}_{args.save}"
+                )
+                plotter.save_plot(plot, filename)
+                print(f"Aggregated plot saved to: {filename}")
+
+            if args.show:
+                plotter.show_plot(plot)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/Utilities/experiments/post_experiment/plot_scale_vs_metrics.py b/Utilities/experiments/post_experiment/plot_scale_vs_metrics.py
new file mode 100755
index 0000000..fb4c72e
--- /dev/null
+++ b/Utilities/experiments/post_experiment/plot_scale_vs_metrics.py
@@ -0,0 +1,615 @@
+#!/usr/bin/env python3
+"""
+Script to plot data scale vs cost and latency across multiple experiments.
+X-axis: Data scale (metrics/sec) in log scale
+Y-axes: Left = Cost (CPU %), Right = Latency (ms)
+"""
+
+import argparse
+import os
+import sys
+import re
+import json
+import subprocess
+import yaml
+import matplotlib.pyplot as plt
+import numpy as np
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import constants  # noqa: E402
+
+# Configuration
+# EXPERIMENT_NAMES = [
+#    "non_quantile_1s_4queries_10valuesperlabel_2",
+#    "non_quantile_1s_4queries_20valuesperlabel_2",
+#    "non_quantile_1s_4queries_30valuesperlabel_2",
+#    "non_quantile_1s_4queries_40valuesperlabel_2",
+# ]
+EXPERIMENT_NAMES = [
+    "quantile_1s_10queries_10valuesperlabel_2",
+    "quantile_1s_10queries_20valuesperlabel_2",
+    "quantile_1s_10queries_30valuesperlabel_2",
+    "quantile_1s_10queries_40valuesperlabel_2",
+    #    "quantile_1s_10queries_50valuesperlabel_2",
+]
+# EXPERIMENT_NAMES = [
+#    "quantile_1s_10queries_20valuesperlabel_2labels",
+#    "quantile_1s_10queries_40valuesperlabel_2labels",
+#    "quantile_1s_10queries_60valuesperlabel_2labels",
+#    "quantile_1s_10queries_80valuesperlabel_2labels",
+#    "quantile_1s_10queries_100valuesperlabel_2labels",
+# ]
+
+FONTSIZE = 20
+
+
+def calculate_data_scale(experiment_name):
+    """
+    Calculate data scale (metrics/sec) from experiment config.
+    Formula: num_ports_per_server * (num_labels ^ num_values_per_label)
+
+    Args:
+        experiment_name: Name of the experiment
+
+    Returns:
+        Data scale in metrics/sec, or None if config not found
+    """
+    experiment_dir = os.path.join(constants.LOCAL_EXPERIMENT_DIR, experiment_name)
+    config_file = os.path.join(
+        experiment_dir, "experiment_config", "experiment_params.yaml"
+    )
+
+    if not os.path.exists(config_file):
+        print(f"Warning: Config file not found for {experiment_name}: {config_file}")
+        return None
+
+    try:
+        with open(config_file, "r") as f:
+            config = yaml.safe_load(f)
+
+        # Extract fake_exporter parameters
+        fake_exporter = config["exporters"]["exporter_list"]["fake_exporter"]
+        num_ports = fake_exporter["num_ports_per_server"]
+        num_labels = fake_exporter["num_labels"]
+        num_values_per_label = fake_exporter["num_values_per_label"]
+
+        # Calculate data scale
+        data_scale = num_ports * (num_values_per_label**num_labels)
+
+        return data_scale
+
+    except Exception as e:
+        print(f"Error parsing config for {experiment_name}: {e}")
+        return None
+
+
+def get_latency_p95(experiment_name):
+    """
+    Get p95 latency by running ./run_compare_latencies.sh
+
+    Args:
+        experiment_name: Name of the experiment
+
+    Returns:
+        p95 latency value (exact), or None if failed
+    """
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    script_path = os.path.join(script_dir, "run_compare_latencies.sh")
+
+    try:
+        result = subprocess.run(
+            [script_path, experiment_name],
+            capture_output=True,
+            text=True,
+            check=True,
+            cwd=script_dir,
+        )
+
+        # Parse output to extract p95 from exact
+        # Looking for: exact: {'median': X, 'p95': Y, ...}
+        output = result.stdout + result.stderr
+
+        # Find the "exact:" line
+        exact_match = re.search(r"exact:\s*\{([^}]+)\}", output)
+        if exact_match:
+            exact_dict_str = exact_match.group(1)
+            # Extract p95 value
+            p95_match = re.search(r"'p95':\s*([\d.]+)", exact_dict_str)
+            if p95_match:
+                return float(p95_match.group(1))
+
+        print(f"Warning: Could not parse p95 latency from output for {experiment_name}")
+        return None
+
+    except subprocess.CalledProcessError as e:
+        print(f"Error running latency comparison for {experiment_name}: {e}")
+        return None
+    except Exception as e:
+        print(f"Error getting latency for {experiment_name}: {e}")
+        return None
+
+
+def get_cost_p95(experiment_name):
+    """
+    Get p95 CPU cost by running compare_costs.py
+
+    Args:
+        experiment_name: Name of the experiment
+
+    Returns:
+        p95 CPU percentage, or None if failed
+    """
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    compare_costs_path = os.path.join(script_dir, "compare_costs.py")
+
+    try:
+        result = subprocess.run(
+            [
+                "python3",
+                compare_costs_path,
+                "--experiment_name",
+                experiment_name,
+                "--experiment_mode",
+                "baseline",
+                "--print",
+            ],
+            capture_output=True,
+            text=True,
+            check=True,
+            cwd=script_dir,
+        )
+
+        # Parse output to extract p95 CPU from "prometheus prometheus.yml cpu_percent p95"
+        # or "prometheus prometheus cpu_percent p95"
+        output = result.stdout + result.stderr
+
+        # Look for lines matching the pattern
+        for line in output.split("\n"):
+            if re.search(
+                r"prometheus\s+prometheus.*cpu_percent\s+p95\s+([\d.]+)", line
+            ):
+                match = re.search(
+                    r"prometheus\s+prometheus.*cpu_percent\s+p95\s+([\d.]+)", line
+                )
+                if match:
+                    return float(match.group(1))
+
+        print(
+            f"Warning: Could not parse p95 CPU cost from output for {experiment_name}"
+        )
+        return None
+
+    except subprocess.CalledProcessError as e:
+        print(f"Error running cost comparison for {experiment_name}: {e}")
+        return None
+    except Exception as e:
+        print(f"Error getting cost for {experiment_name}: {e}")
+        return None
+
+
+def get_query_cost_95(experiment_name):
+    """
+    Get query CPU cost p95 by running compare_costs.py
+
+    Args:
+        experiment_name: Name of the experiment
+
+    Returns:
+        Query CPU cost p95 percentage, or None if failed
+    """
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    compare_costs_path = os.path.join(script_dir, "compare_costs.py")
+
+    try:
+        result = subprocess.run(
+            [
+                "python3",
+                compare_costs_path,
+                "--experiment_name",
+                experiment_name,
+                "--experiment_mode",
+                "baseline",
+                "--print",
+            ],
+            capture_output=True,
+            text=True,
+            check=True,
+            cwd=script_dir,
+        )
+
+        # Parse output to extract query CPU sum from "Query CPU Statistics" section
+        # Looking for pattern like:
+        # prometheus:
+        #   p95: 1122837.55%
+        output = result.stdout + result.stderr
+
+        # Look for the Query CPU Statistics section
+        in_query_section = False
+        in_prometheus_subsection = False
+        for line in output.split("\n"):
+            if "Query CPU Statistics" in line:
+                in_query_section = True
+                continue
+
+            if in_query_section:
+                # Check if we're in the prometheus subsection
+                if line.strip().startswith("prometheus:"):
+                    in_prometheus_subsection = True
+                    continue
+
+                # If we're in prometheus subsection, look for sum
+                if in_prometheus_subsection:
+                    match = re.search(r"p95:\s+([\d.]+)%", line)
+                    if match:
+                        return float(match.group(1))
+                    # If we hit another section, stop
+                    if line.strip() and not line.strip().startswith(
+                        ("sum:", "max:", "median:", "p95:", "p99:")
+                    ):
+                        break
+
+        print(
+            f"Warning: Could not parse query CPU cost sum from output for {experiment_name}"
+        )
+        return None
+
+    except subprocess.CalledProcessError as e:
+        print(f"Error running cost comparison for {experiment_name}: {e}")
+        return None
+    except Exception as e:
+        print(f"Error getting query cost sum for {experiment_name}: {e}")
+        return None
+
+
+def get_query_cost_sum(experiment_name):
+    """
+    Get query CPU cost sum by running compare_costs.py
+
+    Args:
+        experiment_name: Name of the experiment
+
+    Returns:
+        Query CPU cost sum percentage, or None if failed
+    """
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    compare_costs_path = os.path.join(script_dir, "compare_costs.py")
+
+    try:
+        result = subprocess.run(
+            [
+                "python3",
+                compare_costs_path,
+                "--experiment_name",
+                experiment_name,
+                "--experiment_mode",
+                "baseline",
+                "--print",
+            ],
+            capture_output=True,
+            text=True,
+            check=True,
+            cwd=script_dir,
+        )
+
+        # Parse output to extract query CPU sum from "Query CPU Statistics" section
+        # Looking for pattern like:
+        # prometheus:
+        #   sum: 1122837.55%
+        output = result.stdout + result.stderr
+
+        # Look for the Query CPU Statistics section
+        in_query_section = False
+        in_prometheus_subsection = False
+        for line in output.split("\n"):
+            if "Query CPU Statistics" in line:
+                in_query_section = True
+                continue
+
+            if in_query_section:
+                # Check if we're in the prometheus subsection
+                if line.strip().startswith("prometheus:"):
+                    in_prometheus_subsection = True
+                    continue
+
+                # If we're in prometheus subsection, look for sum
+                if in_prometheus_subsection:
+                    match = re.search(r"sum:\s+([\d.]+)%", line)
+                    if match:
+                        return float(match.group(1))
+                    # If we hit another section, stop
+                    if line.strip() and not line.strip().startswith(
+                        ("sum:", "max:", "median:", "p95:", "p99:")
+                    ):
+                        break
+
+        print(
+            f"Warning: Could not parse query CPU cost sum from output for {experiment_name}"
+        )
+        return None
+
+    except subprocess.CalledProcessError as e:
+        print(f"Error running cost comparison for {experiment_name}: {e}")
+        return None
+    except Exception as e:
+        print(f"Error getting query cost sum for {experiment_name}: {e}")
+        return None
+
+
+def print_data_summary(
+    experiments, data_scales, latencies, costs, use_query_cost_sum=False
+):
+    """Print summary of the data."""
+    cost_label = "Query Cost Sum (CPU %)" if use_query_cost_sum else "Cost P95 (CPU %)"
+    cost_json_key = (
+        "query_cost_sum_cpu_percent" if use_query_cost_sum else "cost_p95_cpu_percent"
+    )
+
+    print("\nData Summary:")
+    print("=" * 100)
+    print(
+        f"{'Experiment':<50} {'Data Scale':<20} {'Latency P95 (s)':<20} {cost_label:<20}"
+    )
+    print("-" * 100)
+
+    for exp, scale, lat, cost in zip(experiments, data_scales, latencies, costs):
+        scale_str = f"{scale:.2e}" if scale is not None else "N/A"
+        lat_str = f"{lat:.4f}" if lat is not None else "N/A"
+        cost_str = f"{cost:.2f}" if cost is not None else "N/A"
+        print(f"{exp:<50} {scale_str:<20} {lat_str:<20} {cost_str:<20}")
+
+    print("=" * 100)
+
+    # Print json-like structure also
+    print("\nJSON-like Data Structure:")
+    data_list = []
+    for exp, scale, lat, cost in zip(experiments, data_scales, latencies, costs):
+        data_list.append(
+            {
+                "experiment": exp,
+                "data_scale_metrics_per_sec": scale,
+                "latency_p95_seconds": lat,
+                cost_json_key: cost,
+            }
+        )
+
+    print(json.dumps(data_list, indent=4))
+
+
+def plot_scale_vs_metrics(
+    experiments,
+    data_scales,
+    latencies,
+    costs,
+    save_file=None,
+    show=False,
+    use_query_cost_sum=False,
+):
+    """
+    Plot data scale vs cost and latency.
+
+    Args:
+        experiments: List of experiment names
+        data_scales: List of data scale values (metrics/sec)
+        latencies: List of p95 latency values (seconds)
+        costs: List of CPU cost values (% - either p95 or query sum)
+        save_file: Filename to save the plot (if None, doesn't save)
+        show: Whether to display the plot
+        use_query_cost_sum: Whether cost values represent query cost sum instead of p95
+
+    Returns:
+        matplotlib figure object
+    """
+    # Filter out None values and sort by data scale
+    valid_data = [
+        (s, l, c, e)
+        for s, l, c, e in zip(data_scales, latencies, costs, experiments)
+        if s is not None and l is not None and c is not None
+    ]
+
+    if not valid_data:
+        print("Error: No valid data points to plot")
+        return None
+
+    valid_data.sort(key=lambda x: x[0])  # Sort by data scale
+    data_scales_sorted, latencies_sorted, costs_sorted, experiments_sorted = zip(
+        *valid_data
+    )
+
+    # Convert to numpy arrays
+    data_scales_arr = np.array(data_scales_sorted)
+    # latencies_arr = np.array(latencies_sorted) * 1000  # Convert to milliseconds
+    latencies_arr = np.array(latencies_sorted)  # Keep as seconds
+    costs_arr = np.array(costs_sorted)
+
+    # Create the plot with two y-axes
+    fig, ax1 = plt.subplots(figsize=(12, 6))
+
+    # Determine cost label based on type
+    cost_ylabel = (
+        "Query Cost (CPU %, sum)" if use_query_cost_sum else "p95 CPU usage (%)"
+    )
+    cost_legend = "Query Cost (CPU %)" if use_query_cost_sum else "p95 CPU Usage (%)"
+
+    # Plot cost on left y-axis
+    color_cost = "#1f77b4"
+    ax1.set_xlabel("Data Scale (metrics/sec)", fontsize=FONTSIZE, fontweight="bold")
+    ax1.set_ylabel(cost_ylabel, fontsize=FONTSIZE, fontweight="bold", color=color_cost)
+    line1 = ax1.plot(
+        data_scales_arr,
+        costs_arr,
+        "o-",
+        color=color_cost,
+        linewidth=2,
+        markersize=8,
+        label=cost_legend,
+    )
+    ax1.tick_params(axis="y", labelcolor=color_cost, labelsize=FONTSIZE)
+    ax1.tick_params(axis="x", labelsize=FONTSIZE)
+    ax1.set_xscale("log")
+    ax1.grid(True, alpha=0.3, which="both")
+
+    # Create second y-axis for latency
+    ax2 = ax1.twinx()
+    color_latency = "#ff7f0e"
+    # ax2.set_ylabel('Latency (ms, p95)', fontsize=FONTSIZE, fontweight='bold', color=color_latency)
+    ax2.set_ylabel(
+        "p95 Latency (s)", fontsize=FONTSIZE, fontweight="bold", color=color_latency
+    )
+    line2 = ax2.plot(
+        data_scales_arr,
+        latencies_arr,
+        "s-",
+        color=color_latency,
+        linewidth=2,
+        markersize=8,
+        label="p95 Latency (s)",
+    )
+    ax2.tick_params(axis="y", labelcolor=color_latency, labelsize=FONTSIZE)
+
+    # Add title
+    plt.title(
+        "Data Scale vs Cost and Latency",
+        fontsize=FONTSIZE + 2,
+        fontweight="bold",
+        pad=20,
+    )
+
+    # Add legend
+    lines = line1 + line2
+    labels = [line_foo.get_label() for line_foo in lines]
+    ax1.legend(lines, labels, loc="upper left", fontsize=FONTSIZE)
+
+    # Adjust layout
+    fig.tight_layout()
+
+    # Save if requested
+    if save_file:
+        plt.savefig(save_file, dpi=300, bbox_inches="tight")
+        print(f"Plot saved as '{save_file}'")
+
+    # Show if requested
+    if show:
+        plt.show()
+    else:
+        plt.close(fig)
+
+    return fig
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Plot data scale vs cost and latency across experiments",
+        epilog="""
+Examples:
+  # Print data summary only
+  python3 plot_scale_vs_metrics.py --print
+
+  # Plot and save to file
+  python3 plot_scale_vs_metrics.py --plot --save scale_metrics.png
+
+  # Plot and show interactively
+  python3 plot_scale_vs_metrics.py --plot --show
+
+  # Both print and plot
+  python3 plot_scale_vs_metrics.py --print --plot --save output.png --show
+
+  # Use query cost sum instead of p95 cost
+  python3 plot_scale_vs_metrics.py --print --use-query-cost-sum
+  python3 plot_scale_vs_metrics.py --plot --save query_cost.png --use-query-cost-sum
+        """,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+
+    parser.add_argument("--print", action="store_true", help="Print data summary")
+    parser.add_argument("--plot", action="store_true", help="Generate plot")
+    parser.add_argument(
+        "--save",
+        type=str,
+        metavar="FILENAME",
+        help="Save plot to file (provide filename)",
+    )
+    parser.add_argument("--show", action="store_true", help="Display plot")
+    parser.add_argument(
+        "--use-query-cost-sum",
+        action="store_true",
+        help="Use query CPU cost sum instead of p95 CPU cost",
+    )
+    parser.add_argument(
+        "--use-query-cost-95",
+        action="store_true",
+        help="Use query CPU cost p95 instead of p95 CPU cost",
+    )
+
+    args = parser.parse_args()
+
+    # Validate arguments
+    if args.plot and not (args.save or args.show):
+        parser.error("--plot requires either --save or --show (or both)")
+
+    if not args.print and not args.plot:
+        parser.error("At least one of --print or --plot must be specified")
+
+    # Collect data for all experiments
+    print(f"Processing {len(EXPERIMENT_NAMES)} experiments...")
+
+    data_scales = []
+    latencies = []
+    costs = []
+
+    for exp_name in EXPERIMENT_NAMES:
+        print(f"\nProcessing: {exp_name}")
+
+        # Calculate data scale
+        scale = calculate_data_scale(exp_name)
+        data_scales.append(scale)
+        if scale is not None:
+            print(f"  Data scale: {scale:.2e} metrics/sec")
+
+        # Get latency p95
+        latency = get_latency_p95(exp_name)
+        latencies.append(latency)
+        if latency is not None:
+            print(f"  Latency p95: {latency:.4f} seconds")
+
+        # Get cost (either p95 or query cost sum based on flag)
+        if args.use_query_cost_sum:
+            cost = get_query_cost_sum(exp_name)
+            if cost is not None:
+                print(f"  Query cost sum: {cost:.2f} CPU %")
+        elif args.use_query_cost_95:
+            cost = get_query_cost_95(exp_name)
+            if cost is not None:
+                print(f"  Query cost p95: {cost:.2f} CPU %")
+        else:
+            cost = get_cost_p95(exp_name)
+            if cost is not None:
+                print(f"  Cost p95: {cost:.2f} CPU %")
+        costs.append(cost)
+
+    # Print summary if requested
+    if args.print:
+        print_data_summary(
+            EXPERIMENT_NAMES,
+            data_scales,
+            latencies,
+            costs,
+            use_query_cost_sum=args.use_query_cost_sum,
+        )
+
+    # Generate plot if requested
+    if args.plot:
+        plot_scale_vs_metrics(
+            experiments=EXPERIMENT_NAMES,
+            data_scales=data_scales,
+            latencies=latencies,
+            costs=costs,
+            save_file=args.save,
+            show=args.show,
+            use_query_cost_sum=args.use_query_cost_sum,
+        )
+
+    return 0
+
+
+if __name__ == "__main__":
+    exit(main())
diff --git a/Utilities/experiments/post_experiment/read_dumped_precomputes.py b/Utilities/experiments/post_experiment/read_dumped_precomputes.py
new file mode 100644
index 0000000..ef99568
--- /dev/null
+++ b/Utilities/experiments/post_experiment/read_dumped_precomputes.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python3
+"""
+Script to read QueryEngineRust --dump-precomputes output files.
+Files are in MessagePack format with length-prefixed records.
+Rust structs are serialized as arrays/lists.
+"""
+
+import sys
+import struct
+import msgpack
+from pathlib import Path
+from typing import List, Any
+import datetime
+
+
+class PrecomputeDump:
+    """Represents the PrecomputeDump struct from Rust."""
+
+    def __init__(self, data: List[Any]):
+        if len(data) != 4:
+            raise ValueError(f"Expected 4 fields in PrecomputeDump, got {len(data)}")
+
+        self.timestamp = data[0]  # u64
+        self.metadata = PrecomputedOutput(data[1])  # PrecomputedOutput
+        self.accumulator_type = data[2]  # String
+        self.accumulator_data_bytes = data[3]  # Vec<u8>
+
+
+class PrecomputedOutput:
+    """Represents the PrecomputedOutput struct from Rust."""
+
+    def __init__(self, data: List[Any]):
+        if len(data) != 4:
+            raise ValueError(f"Expected 4 fields in PrecomputedOutput, got {len(data)}")
+
+        self.start_timestamp = data[0]  # u64 (milliseconds)
+        self.end_timestamp = data[1]  # u64 (milliseconds)
+        self.key = (
+            KeyByLabelValues(data[2]) if data[2] is not None else None
+        )  # Option<KeyByLabelValues>
+        self.aggregation_id = data[3]  # u64
+
+
+class KeyByLabelValues:
+    """Represents the KeyByLabelValues struct from Rust."""
+
+    def __init__(self, data: List[Any]):
+        if len(data) != 1:
+            raise ValueError(f"Expected 1 field in KeyByLabelValues, got {len(data)}")
+
+        self.label_values = data[0]  # Vec<String>
+
+    def __str__(self):
+        return f"KeyByLabelValues({self.label_values})"
+
+
+def read_precompute_dump(file_path: str) -> List[PrecomputeDump]:
+    """
+    Read a precompute dump file and return list of PrecomputeDump records.
+    """
+    records = []
+
+    with open(file_path, "rb") as f:
+        while True:
+            # Read 4-byte length prefix
+            length_bytes = f.read(4)
+            if len(length_bytes) < 4:
+                break  # EOF
+
+            length = struct.unpack("<I", length_bytes)[0]  # little-endian uint32
+
+            # Read MessagePack data
+            data = f.read(length)
+            if len(data) < length:
+                print("Warning: Incomplete record at end of file")
+                break
+
+            # Deserialize MessagePack
+            try:
+                raw_record = msgpack.unpackb(data, raw=False)
+                record = PrecomputeDump(raw_record)
+                records.append(record)
+            except Exception as e:
+                print(f"Error deserializing record: {e}")
+                print(f"Raw data: {raw_record}")
+                continue
+
+    return records
+
+
+def format_timestamp_ms(ts_ms: int) -> str:
+    """Convert Unix timestamp in milliseconds to readable format."""
+    return datetime.datetime.fromtimestamp(ts_ms / 1000).strftime(
+        "%Y-%m-%d %H:%M:%S.%f"
+    )[:-3]
+
+
+def format_timestamp_s(ts_s: int) -> str:
+    """Convert Unix timestamp in seconds to readable format."""
+    return datetime.datetime.fromtimestamp(ts_s).strftime("%Y-%m-%d %H:%M:%S")
+
+
+def print_record_summary(record: PrecomputeDump, index: int):
+    """Print a summary of a single record."""
+    print(f"\n--- Record {index + 1} ---")
+    print(f"Dump timestamp: {format_timestamp_s(record.timestamp)}")
+
+    print(
+        f"Precompute period: {format_timestamp_ms(record.metadata.start_timestamp)} to {format_timestamp_ms(record.metadata.end_timestamp)}"
+    )
+    print(f"Aggregation ID: {record.metadata.aggregation_id}")
+
+    if record.metadata.key:
+        print(f"Key labels: {record.metadata.key.label_values}")
+    else:
+        print("Key: None (global aggregation)")
+
+    print(f"Accumulator type: {record.accumulator_type}")
+    print(f"Accumulator data size: {len(record.accumulator_data_bytes)} bytes")
+    print(f"Accumulator data (first 20 bytes): {record.accumulator_data_bytes[:20]}")
+
+
+def analyze_accumulator_data(record: PrecomputeDump):
+    """Attempt basic analysis of accumulator data."""
+    data = record.accumulator_data_bytes
+
+    if record.accumulator_type == "DatasketchesKLLAccumulator":
+        print("  KLL Sketch data analysis:")
+        print(f"    Total bytes: {len(data)}")
+        if len(data) >= 8:
+            # KLL sketches often start with specific headers
+            print(f"    First 8 bytes (hex): {' '.join(f'{b:02x}' for b in data[:8])}")
+
+    elif "HyperLogLog" in record.accumulator_type:
+        print("  HyperLogLog data analysis:")
+        print(f"    Total bytes: {len(data)}")
+
+    elif "CountMin" in record.accumulator_type:
+        print("  Count-Min Sketch data analysis:")
+        print(f"    Total bytes: {len(data)}")
+
+
+def main():
+    if len(sys.argv) != 2:
+        print("Usage: python read_precomputes.py <precompute_dump_file.msgpack>")
+        print("Example: python read_precomputes.py precomputes_1757889445.msgpack")
+        sys.exit(1)
+
+    file_path = sys.argv[1]
+
+    if not Path(file_path).exists():
+        print(f"Error: File {file_path} not found")
+        sys.exit(1)
+
+    print(f"Reading precompute dump file: {file_path}")
+
+    try:
+        records = read_precompute_dump(file_path)
+        print(f"\nFound {len(records)} records")
+
+        # Group by accumulator type
+        type_counts = {}
+        for record in records:
+            type_counts[record.accumulator_type] = (
+                type_counts.get(record.accumulator_type, 0) + 1
+            )
+
+        print("\nAccumulator types found:")
+        for acc_type, count in type_counts.items():
+            print(f"  {acc_type}: {count} records")
+
+        # Show details for first few records
+        max_detailed = min(5, len(records))
+        for i in range(max_detailed):
+            record = records[i]
+            print_record_summary(record, i)
+            analyze_accumulator_data(record)
+
+        if len(records) > max_detailed:
+            print(f"\n... ({len(records) - max_detailed} more records)")
+
+    except Exception as e:
+        print(f"Error reading file: {e}")
+        import traceback
+
+        traceback.print_exc()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/post_experiment/results_loader.py b/Utilities/experiments/post_experiment/results_loader.py
new file mode 100644
index 0000000..d812385
--- /dev/null
+++ b/Utilities/experiments/post_experiment/results_loader.py
@@ -0,0 +1,234 @@
+"""
+Unified results loader with backward compatibility.
+
+This module provides a unified interface for loading query results
+that automatically detects and handles both new streaming formats
+(JSONL, Parquet) and legacy pickle format.
+"""
+
+import os
+import pickle
+import logging
+import yaml
+from typing import Dict
+
+from promql_utilities.query_results.classes import (
+    QueryResultAcrossTime,
+    LatencyResultAcrossTime,
+)
+from promql_utilities.query_results.serializers import SerializerFactory
+
+logger = logging.getLogger(__name__)
+
+
+def get_server_name_for_mode(experiment_dir: str, mode: str) -> str:
+    """Return the server name used for a given experiment mode.
+
+    Reads controller_client_configs/{mode}.yaml from the experiment directory.
+    Raises FileNotFoundError if the config is missing, ValueError if it has no servers.
+    """
+    config_path = os.path.join(
+        experiment_dir, "controller_client_configs", f"{mode}.yaml"
+    )
+    if not os.path.exists(config_path):
+        raise FileNotFoundError(
+            f"No controller client config for mode '{mode}': {config_path}"
+        )
+    with open(config_path) as f:
+        config = yaml.safe_load(f)
+    servers = config.get("servers", [])
+    if not servers:
+        raise ValueError(
+            f"No servers listed in controller client config for mode '{mode}': {config_path}"
+        )
+    return servers[0]["name"]
+
+
+def load_results(experiment_dir: str) -> Dict[str, Dict[int, QueryResultAcrossTime]]:
+    """Load query results with automatic format detection and fallback.
+
+    This function tries to load results in the following order:
+    1. New streaming formats (JSONL, Parquet) - auto-detected
+    2. Legacy pickle format (results.pkl)
+    3. Raises error if no format is found
+
+    Args:
+        experiment_dir: Directory containing experiment results
+
+    Returns:
+        Nested dict of server_name -> query_idx -> QueryResultAcrossTime
+
+    Raises:
+        FileNotFoundError: If no results are found in any format
+        Exception: If results exist but cannot be loaded
+    """
+    if not os.path.exists(experiment_dir):
+        raise FileNotFoundError(
+            f"Experiment directory does not exist: {experiment_dir}"
+        )
+
+    # Try new formats first
+    try:
+        serializer = SerializerFactory.create_from_existing(experiment_dir)
+        if serializer is not None:
+            logger.debug(f"Loading results using {serializer} format")
+            return serializer.read_results()
+    except Exception as e:
+        logger.warning(f"Failed to load with new formats: {e}")
+
+    # Fall back to pickle format
+    pickle_path = os.path.join(experiment_dir, "results.pkl")
+    if os.path.exists(pickle_path):
+        try:
+            logger.debug("Loading results using legacy pickle format")
+            with open(pickle_path, "rb") as f:
+                return pickle.load(f)
+        except Exception as e:
+            logger.error(f"Failed to load pickle format: {e}")
+            raise
+
+    # No results found in any format
+    raise FileNotFoundError(
+        f"No results found in {experiment_dir}. "
+        f"Expected either new format files (experiment_metadata.json + results files) "
+        f"or legacy results.pkl"
+    )
+
+
+def save_results(
+    results_across_servers: Dict[str, Dict[int, QueryResultAcrossTime]],
+    output_dir: str,
+    format_name: str = "jsonl",
+    keep_pickle: bool = True,
+) -> None:
+    """Save query results in the specified format.
+
+    Args:
+        results_across_servers: Nested dict of results to save
+        output_dir: Directory where results will be written
+        format_name: Format to use ('jsonl', 'parquet', or 'auto')
+        keep_pickle: Whether to also save legacy pickle format for compatibility
+
+    Raises:
+        ValueError: If format is not supported
+        Exception: If saving fails
+    """
+    # Save in new format
+    try:
+        serializer = SerializerFactory.create(format_name, output_dir)
+        logger.debug(f"Saving results using {serializer} format")
+        serializer.write_results(results_across_servers)
+    except Exception as e:
+        logger.error(f"Failed to save in {format_name} format: {e}")
+        raise
+
+    # Also save in pickle format for backward compatibility if requested
+    if keep_pickle:
+        try:
+            pickle_path = os.path.join(output_dir, "results.pkl")
+            with open(pickle_path, "wb") as f:
+                pickle.dump(results_across_servers, f)
+            logger.debug("Also saved results in legacy pickle format for compatibility")
+        except Exception as e:
+            logger.warning(f"Failed to save pickle format: {e}")
+
+
+# Legacy compatibility aliases
+def load_results_legacy(
+    experiment_dir: str,
+) -> Dict[str, Dict[int, QueryResultAcrossTime]]:
+    """Legacy function name - use load_results() instead."""
+    import warnings
+
+    warnings.warn(
+        "load_results_legacy() is deprecated, use load_results() instead",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return load_results(experiment_dir)
+
+
+# Convenience functions for common use cases
+def load_exact_and_estimate_results(
+    experiment_dir: str, exact_mode: str, estimate_mode: str
+) -> tuple:
+    """Load both exact and estimate results for comparison.
+
+    Args:
+        experiment_dir: Base experiment directory
+        exact_mode: Subdirectory name for exact results
+        estimate_mode: Subdirectory name for estimate results
+
+    Returns:
+        Tuple of (exact_results, estimate_results)
+    """
+    exact_dir = os.path.join(experiment_dir, exact_mode)
+    estimate_dir = os.path.join(experiment_dir, estimate_mode)
+
+    exact_results = load_results(exact_dir)
+    estimate_results = load_results(estimate_dir)
+
+    return exact_results, estimate_results
+
+
+def load_latencies_only(
+    experiment_dir: str,
+) -> Dict[str, Dict[int, LatencyResultAcrossTime]]:
+    """Load only latency information without query results.
+
+    Args:
+        experiment_dir: Directory containing experiment results
+
+    Returns:
+        Nested dict: server_name -> query_idx -> LatencyResultAcrossTime
+
+    Raises:
+        FileNotFoundError: If no latency data is found in any format
+    """
+    if not os.path.exists(experiment_dir):
+        raise FileNotFoundError(
+            f"Experiment directory does not exist: {experiment_dir}"
+        )
+
+    # Try new formats first
+    try:
+        serializer = SerializerFactory.create_from_existing(experiment_dir)
+        if serializer is not None:
+            logger.debug(f"Loading latencies using {serializer} format")
+            return serializer.read_latencies_only()
+    except Exception as e:
+        logger.warning(f"Failed to load latencies with new formats: {e}")
+
+    # Fall back to pickle format
+    pickle_path = os.path.join(experiment_dir, "results.pkl")
+    if os.path.exists(pickle_path):
+        try:
+            logger.debug("Loading latencies from legacy pickle format")
+            with open(pickle_path, "rb") as f:
+                full_results = pickle.load(f)
+            return _extract_latencies_from_full_results(full_results)
+        except Exception as e:
+            logger.error(f"Failed to load latencies from pickle format: {e}")
+            raise
+
+    # No results found in any format
+    raise FileNotFoundError(f"No latency data found in {experiment_dir}")
+
+
+def _extract_latencies_from_full_results(
+    full_results: Dict[str, Dict[int, QueryResultAcrossTime]]
+) -> Dict[str, Dict[int, LatencyResultAcrossTime]]:
+    """Extract latency data from full QueryResultAcrossTime structure."""
+    latencies = {}
+
+    for server_name, server_results in full_results.items():
+        latencies[server_name] = {}
+        for query_idx, query_result_across_time in server_results.items():
+            # Use the class method to convert from QueryResultAcrossTime
+            latencies[server_name][query_idx] = (
+                LatencyResultAcrossTime.from_query_result_across_time(
+                    query_result_across_time
+                )
+            )
+
+    return latencies
diff --git a/Utilities/experiments/post_experiment/run_analyze_latencies.sh b/Utilities/experiments/post_experiment/run_analyze_latencies.sh
new file mode 100755
index 0000000..24dc2bd
--- /dev/null
+++ b/Utilities/experiments/post_experiment/run_analyze_latencies.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+if [ -z "$1" ] || [ -z "$2" ]; then
+  echo "Usage: $0 <experiment_name> <experiment_mode> [--print_per_query]"
+  echo "Example: $0 my_experiment sketchdb --print_per_query"
+  exit 1
+fi
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+
+EXP_NAME=$1
+EXP_MODE=$2
+PER_QUERY_FLAG=$3
+
+python3 $THIS_DIR/analyze_latencies.py --experiment_name $EXP_NAME --experiment_mode $EXP_MODE ${PER_QUERY_FLAG}
diff --git a/Utilities/experiments/post_experiment/run_calculate_fidelity.sh b/Utilities/experiments/post_experiment/run_calculate_fidelity.sh
new file mode 100755
index 0000000..b6235ee
--- /dev/null
+++ b/Utilities/experiments/post_experiment/run_calculate_fidelity.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+if [ -z "$1" ]; then
+  echo "Usage: $0 <experiment_name>"
+  exit 1
+fi
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+
+EXP_NAME=$1
+
+python3 "$THIS_DIR"/calculate_fidelity.py --experiment_name "$EXP_NAME" --exact_experiment_mode sketchdb --exact_experiment_server_name prometheus --estimate_experiment_mode sketchdb --estimate_experiment_server_name sketchdb
diff --git a/Utilities/experiments/post_experiment/run_compare_latencies.sh b/Utilities/experiments/post_experiment/run_compare_latencies.sh
new file mode 100755
index 0000000..a6867b0
--- /dev/null
+++ b/Utilities/experiments/post_experiment/run_compare_latencies.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+if [ -z "$1" ]; then
+  echo "Usage: $0 <experiment_name> [--per_query]"
+  exit 1
+fi
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+
+EXP_NAME=$1
+PER_QUERY_FLAG=$2
+
+#python3 compare_latencies.py --experiment_name $EXP_NAME --exact_experiment_mode sketchdb --exact_experiment_server_name baseline --estimate_experiment_mode sketchdb ${PER_QUERY_FLAG}
+python3 "$THIS_DIR"/compare_latencies.py --experiment_name "$EXP_NAME" --exact_experiment_mode baseline --estimate_experiment_mode sketchdb ${PER_QUERY_FLAG}
diff --git a/Utilities/experiments/post_experiment/run_plot_latency_distribution.sh b/Utilities/experiments/post_experiment/run_plot_latency_distribution.sh
new file mode 100755
index 0000000..e436acd
--- /dev/null
+++ b/Utilities/experiments/post_experiment/run_plot_latency_distribution.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+if [ -z "$1" ]; then
+  echo "Usage: $0 <experiment_name> [--print] [--show] [--save] [--output <path>]"
+  echo ""
+  echo "Options:"
+  echo "  --print       Print percentile data to console"
+  echo "  --show        Display the plot interactively"
+  echo "  --save        Save the plot to a file"
+  echo "  --output PATH Specify output file path (default: experiment_dir/latency_distribution_<modes>.png)"
+  echo ""
+  echo "Example: $0 my_experiment --print --save"
+  exit 1
+fi
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+
+EXP_NAME=$1
+shift  # Remove first argument, rest are optional flags
+
+python3 "$THIS_DIR"/plot_latency_distribution.py \
+  --experiment_name "$EXP_NAME" \
+  --exact_experiment_mode baseline \
+  --estimate_experiment_mode sketchdb \
+  "$@"
diff --git a/Utilities/experiments/prometheus_health_monitor.py b/Utilities/experiments/prometheus_health_monitor.py
new file mode 100644
index 0000000..a016dc8
--- /dev/null
+++ b/Utilities/experiments/prometheus_health_monitor.py
@@ -0,0 +1,259 @@
+#!/usr/bin/env python3
+"""
+Prometheus target health monitoring script using /api/v1/targets endpoint.
+
+This script monitors the health and scrape performance of all Prometheus targets.
+It queries the /api/v1/targets API endpoint to collect:
+- Target up/down status
+- Last scrape duration
+- Last scrape errors
+- Scrape URLs
+- Target labels (job, instance, etc.)
+
+This is particularly useful for high-cardinality, low-scrape-interval experiments
+to ensure Prometheus is successfully scraping all configured targets without missing
+any due to performance constraints.
+
+Related to issues #97 and #108.
+"""
+
+import argparse
+import json
+import os
+import time
+import requests
+from datetime import datetime
+from typing import Dict, Any, Optional
+
+
+def fetch_targets_api(prometheus_url: str) -> Optional[Dict[str, Any]]:
+    """
+    Fetch target health information from Prometheus /api/v1/targets endpoint.
+
+    Args:
+        prometheus_url: Base URL of Prometheus server
+
+    Returns:
+        JSON response containing activeTargets and droppedTargets, or None if failed
+    """
+    try:
+        response = requests.get(f"{prometheus_url}/api/v1/targets", timeout=10)
+        response.raise_for_status()
+        data = response.json()
+
+        if data.get("status") != "success":
+            print(f"API returned non-success status: {data}")
+            return None
+
+        return data.get("data", {})
+    except Exception as e:
+        print(f"Error fetching /api/v1/targets endpoint: {e}")
+        return None
+
+
+def process_targets_data(
+    targets_data: Optional[Dict[str, Any]], timestamp: str
+) -> Dict[str, Any]:
+    """
+    Process targets API response into a structured format for monitoring.
+
+    Args:
+        targets_data: Response from /api/v1/targets endpoint
+        timestamp: ISO timestamp when the query was made
+
+    Returns:
+        Dictionary containing processed target information
+    """
+    if targets_data is None:
+        return {
+            "timestamp": timestamp,
+            "active_targets": [],
+            "dropped_targets": [],
+            "error": "api_unavailable",
+        }
+
+    active_targets = []
+    for target in targets_data.get("activeTargets", []):
+        # Extract labels
+        labels = target.get("labels", {})
+
+        # Extract health information
+        health = target.get("health", "unknown")
+        last_error = target.get("lastError", "")
+        scrape_url = target.get("scrapeUrl", "")
+
+        # Extract scrape performance metrics
+        # lastScrapeDuration is in seconds (e.g., "0.003994474s")
+        if "lastScrapeDuration" in target:
+            last_scrape_duration = float(target["lastScrapeDuration"])
+        else:
+            last_scrape_duration = None
+
+        # Last scrape timestamp
+        last_scrape = target.get("lastScrape", "")
+
+        # Discovered labels (before relabeling)
+        discovered_labels = target.get("discoveredLabels", {})
+
+        active_targets.append(
+            {
+                "labels": labels,
+                "health": health,
+                "scrape_url": scrape_url,
+                "last_scrape": last_scrape,
+                "last_scrape_duration": last_scrape_duration,
+                "last_error": last_error,
+                "discovered_labels": discovered_labels,
+            }
+        )
+
+    # Process dropped targets (targets that were discovered but filtered out)
+    dropped_targets = []
+    for target in targets_data.get("droppedTargets", []):
+        discovered_labels = target.get("discoveredLabels", {})
+        dropped_targets.append(
+            {
+                "discovered_labels": discovered_labels,
+            }
+        )
+
+    return {
+        "timestamp": timestamp,
+        "active_targets": active_targets,
+        "dropped_targets": dropped_targets,
+        "active_count": len(active_targets),
+        "dropped_count": len(dropped_targets),
+    }
+
+
+def append_health_data_to_file(health_data: Dict[str, Any], output_file: str) -> None:
+    """
+    Append health monitoring data to the output JSON file.
+
+    Args:
+        health_data: Dictionary containing timestamp and target health data
+        output_file: Path to the output file
+    """
+    try:
+        # Load existing data if file exists
+        if os.path.exists(output_file):
+            with open(output_file, "r") as f:
+                data = json.load(f)
+        else:
+            data = {
+                "collection_start": health_data["collection_start"],
+                "prometheus_url": health_data["prometheus_url"],
+                "monitoring_type": "target_health",
+                "measurements": [],
+            }
+
+        # Add new measurement
+        measurement = {
+            "timestamp": health_data["timestamp"],
+            "active_targets": health_data["active_targets"],
+            "dropped_targets": health_data["dropped_targets"],
+            "active_count": health_data["active_count"],
+            "dropped_count": health_data["dropped_count"],
+        }
+
+        # Add error field if present
+        if "error" in health_data:
+            measurement["error"] = health_data["error"]
+
+        data["measurements"].append(measurement)
+
+        # Keep measurements sorted by timestamp
+        data["measurements"].sort(key=lambda x: x["timestamp"])
+
+        # Write back to file
+        with open(output_file, "w") as f:
+            json.dump(data, f, indent=2)
+
+    except Exception as e:
+        print(f"Error appending health data to file: {e}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Monitor Prometheus target health via /api/v1/targets endpoint"
+    )
+    parser.add_argument(
+        "--prometheus_url",
+        required=True,
+        help="Prometheus server URL",
+    )
+    parser.add_argument(
+        "--output_dir",
+        required=True,
+        help="Output directory for health monitoring data",
+    )
+    parser.add_argument(
+        "--interval", type=int, default=5, help="Polling interval in seconds"
+    )
+
+    args = parser.parse_args()
+
+    # Ensure Prometheus URL doesn't end with /
+    prometheus_url = args.prometheus_url.rstrip("/")
+
+    # Create output directory
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    # Output file for health monitoring data
+    output_file = os.path.join(args.output_dir, "prometheus_target_health.json")
+
+    collection_start = datetime.now().isoformat()
+    print("Starting Prometheus target health monitoring (/api/v1/targets approach)")
+    print(f"Prometheus URL: {prometheus_url}")
+    print(f"Output file: {output_file}")
+    print(f"Polling interval: {args.interval}s")
+
+    try:
+        while True:
+            timestamp = datetime.now().isoformat()
+
+            # Fetch target health data from API
+            targets_data = fetch_targets_api(prometheus_url)
+
+            # Process the target data
+            processed_data = process_targets_data(targets_data, timestamp)
+
+            # Add metadata for storage
+            health_data = {
+                "collection_start": collection_start,
+                "prometheus_url": prometheus_url,
+                **processed_data,
+            }
+
+            # Append to file
+            append_health_data_to_file(health_data, output_file)
+
+            # Print summary
+            if "error" not in processed_data:
+                print(
+                    f"[{timestamp}] Active targets: {processed_data['active_count']}, "
+                    f"Dropped targets: {processed_data['dropped_count']}"
+                )
+
+                # Count unhealthy targets
+                unhealthy = [
+                    t for t in processed_data["active_targets"] if t["health"] != "up"
+                ]
+                if unhealthy:
+                    print(f"  WARNING: {len(unhealthy)} unhealthy targets!")
+                    for target in unhealthy:
+                        print(
+                            f"    - {target['labels'].get('job', 'unknown')}/"
+                            f"{target['labels'].get('instance', 'unknown')}: "
+                            f"{target['health']} - {target['last_error']}"
+                        )
+
+            # Wait for next iteration
+            time.sleep(args.interval)
+
+    except KeyboardInterrupt:
+        print("\nMonitoring stopped")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/prometheus_throughput_monitor.py b/Utilities/experiments/prometheus_throughput_monitor.py
new file mode 100644
index 0000000..8d21175
--- /dev/null
+++ b/Utilities/experiments/prometheus_throughput_monitor.py
@@ -0,0 +1,256 @@
+#!/usr/bin/env python3
+"""
+Prometheus throughput monitoring script using /metrics endpoint.
+
+This version fetches metrics directly from Prometheus /metrics endpoint instead of
+using the API. This approach works even when Prometheus is not configured to scrape
+itself, which is the case in our setup by design.
+
+For the old API-based approach, see old_prometheus_throughput_monitor.py
+"""
+
+import argparse
+import json
+import os
+import re
+import time
+import requests
+from datetime import datetime
+from typing import List, Dict, Any, Optional
+
+
+# Metrics to collect for Prometheus throughput monitoring
+THROUGHPUT_METRICS = [
+    "prometheus_tsdb_samples_appended_total",
+    "prometheus_tsdb_head_samples_appended_total",
+    "prometheus_tsdb_symbol_table_size_bytes_total",
+    "prometheus_remote_storage_samples_total",
+    "prometheus_remote_storage_bytes_total",
+    "prometheus_remote_storage_succeeded_samples_total",
+    "prometheus_remote_storage_failed_samples_total",
+]
+
+
+def fetch_metrics_endpoint(prometheus_url: str) -> Optional[str]:
+    """
+    Fetch raw metrics text from Prometheus /metrics endpoint.
+
+    Args:
+        prometheus_url: Base URL of Prometheus server
+
+    Returns:
+        Raw metrics text or None if failed
+    """
+    try:
+        response = requests.get(f"{prometheus_url}/metrics", timeout=10)
+        response.raise_for_status()
+        return response.text
+    except Exception as e:
+        print(f"Error fetching /metrics endpoint: {e}")
+        return None
+
+
+def parse_prometheus_metrics(
+    metrics_text: str, target_metrics: List[str]
+) -> Dict[str, List[Dict[str, Any]]]:
+    """
+    Parse Prometheus exposition format text to extract specific metrics.
+
+    Args:
+        metrics_text: Raw metrics text from /metrics endpoint
+        target_metrics: List of metric names to extract
+
+    Returns:
+        Dictionary mapping metric names to list of parsed entries
+    """
+    parsed_metrics = {}
+
+    for metric_name in target_metrics:
+        parsed_metrics[metric_name] = []
+
+        # Regex to match metric lines: metric_name{labels} value
+        # Handle both with and without labels
+        pattern = rf"^{re.escape(metric_name)}(?:\{{([^}}]*)\}})?\s+([^\s#]+)"
+
+        for line in metrics_text.split("\n"):
+            line = line.strip()
+            if line.startswith("#") or not line:
+                continue
+
+            match = re.match(pattern, line)
+            if match:
+                labels_str = match.group(1) or ""
+                value_str = match.group(2)
+
+                # Parse labels
+                labels = {}
+                if labels_str:
+                    # Parse label pairs: key="value",key2="value2"
+                    label_pairs = re.findall(r'(\w+)="([^"]*)"', labels_str)
+                    labels = dict(label_pairs)
+
+                # Parse value (handle scientific notation)
+                try:
+                    value = float(value_str) if value_str != "NaN" else None
+                except (ValueError, TypeError):
+                    value = None
+
+                parsed_metrics[metric_name].append({"value": value, "labels": labels})
+
+    return parsed_metrics
+
+
+def process_metric_result(
+    metric_name: str, parsed_entries: List[Dict[str, Any]], timestamp: str
+) -> List[Dict[str, Any]]:
+    """
+    Process parsed metric entries into our output format.
+
+    Args:
+        metric_name: Name of the metric
+        parsed_entries: List of parsed metric entries
+        timestamp: ISO timestamp when the query was made
+
+    Returns:
+        List of processed metric entries
+    """
+    if not parsed_entries:
+        return [
+            {"timestamp": timestamp, "value": None, "labels": {}, "error": "no_data"}
+        ]
+
+    processed_entries = []
+    for entry in parsed_entries:
+        processed_entries.append(
+            {"timestamp": timestamp, "value": entry["value"], "labels": entry["labels"]}
+        )
+
+    return processed_entries
+
+
+def append_metrics_to_file(metrics_data: Dict[str, Any], output_file: str) -> None:
+    """
+    Append metrics data to the output JSON file.
+
+    Args:
+        metrics_data: Dictionary containing timestamp and metrics data
+        output_file: Path to the output file
+    """
+    try:
+        # Load existing data if file exists
+        if os.path.exists(output_file):
+            with open(output_file, "r") as f:
+                data = json.load(f)
+        else:
+            data = {
+                "collection_start": metrics_data["collection_start"],
+                "prometheus_url": metrics_data["prometheus_url"],
+                "monitoring_approach": "metrics_endpoint",
+                "measurements": [],
+            }
+
+        # Add new measurement
+        measurement = {
+            "timestamp": metrics_data["timestamp"],
+            "metrics": metrics_data["metrics"],
+        }
+        data["measurements"].append(measurement)
+
+        # Keep measurements sorted by timestamp
+        data["measurements"].sort(key=lambda x: x["timestamp"])
+
+        # Write back to file
+        with open(output_file, "w") as f:
+            json.dump(data, f, indent=2)
+
+    except Exception as e:
+        print(f"Error appending metrics to file: {e}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Monitor Prometheus throughput metrics via /metrics endpoint"
+    )
+    parser.add_argument(
+        "--prometheus_url",
+        default="http://localhost:9090",
+        help="Prometheus server URL",
+    )
+    parser.add_argument(
+        "--output_dir", required=True, help="Output directory for metrics"
+    )
+    parser.add_argument(
+        "--interval", type=int, required=True, help="Polling interval in seconds"
+    )
+
+    args = parser.parse_args()
+
+    # Ensure Prometheus URL doesn't end with /
+    prometheus_url = args.prometheus_url.rstrip("/")
+
+    # Create output directory
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    # Output file for all metrics
+    output_file = os.path.join(args.output_dir, "prometheus_throughput_metrics.json")
+
+    collection_start = datetime.now().isoformat()
+    print("Starting Prometheus throughput monitoring (/metrics endpoint approach)")
+    print(f"Prometheus URL: {prometheus_url}")
+    print(f"Output file: {output_file}")
+    print(f"Metrics to collect: {THROUGHPUT_METRICS}")
+
+    try:
+        while True:
+            timestamp = datetime.now().isoformat()
+
+            # Fetch raw metrics text
+            metrics_text = fetch_metrics_endpoint(prometheus_url)
+
+            # Collect all metrics for this timestamp
+            metrics_data = {}
+
+            if metrics_text is None:
+                # If /metrics endpoint unavailable, record null values
+                for metric_name in THROUGHPUT_METRICS:
+                    metrics_data[metric_name] = [
+                        {
+                            "timestamp": timestamp,
+                            "value": None,
+                            "labels": {},
+                            "error": "metrics_endpoint_unavailable",
+                        }
+                    ]
+            else:
+                # Parse metrics from text
+                parsed_metrics = parse_prometheus_metrics(
+                    metrics_text, THROUGHPUT_METRICS
+                )
+
+                for metric_name in THROUGHPUT_METRICS:
+                    parsed_entries = parsed_metrics.get(metric_name, [])
+                    processed_entries = process_metric_result(
+                        metric_name, parsed_entries, timestamp
+                    )
+                    metrics_data[metric_name] = processed_entries
+
+            # Package the data for storage
+            measurement_data = {
+                "collection_start": collection_start,
+                "prometheus_url": prometheus_url,
+                "timestamp": timestamp,
+                "metrics": metrics_data,
+            }
+
+            # Append to file
+            append_metrics_to_file(measurement_data, output_file)
+
+            # Wait for next iteration
+            time.sleep(args.interval)
+
+    except KeyboardInterrupt:
+        print("Monitoring stopped")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Utilities/experiments/remote_monitor.py b/Utilities/experiments/remote_monitor.py
new file mode 100644
index 0000000..cc113ef
--- /dev/null
+++ b/Utilities/experiments/remote_monitor.py
@@ -0,0 +1,438 @@
+import os
+import json
+import time
+import argparse
+import subprocess
+import yaml
+import signal
+from loguru import logger
+
+from typing import List
+from experiment_utils.services.prometheus_client_service import PrometheusClientService
+from experiment_utils.providers.cloudlab_local import CloudLabLocalProvider
+from classes import process_monitor
+from classes.query_cost import CostModelOption
+from classes.QueryCostExporter import QueryCostExporterHook
+from classes.ProcessMonitorHook import ProcessMonitorHook
+
+import utils
+import constants
+
+
+def create_loggers(logging_dir, log_level):
+    logger.remove(None)  # remove default loggers
+
+    logger.add("{}/remote_monitor.log".format(logging_dir), filter="__main__")
+
+    logger.add(  # add cost exporter logger
+        "{}/query_cost_exporter.log".format(logging_dir),
+        filter=lambda record: record["extra"].get("module") == "query_cost_exporter",
+        level=log_level,
+        enqueue=True,
+    )
+
+
+def get_pids(keyword) -> List[int]:
+    # TODO: In the future, we should probably have a separate cmd line argument for Docker container keywords vs process keywords
+    # First try to find Docker container by name/keyword
+    docker_cmd = f"docker inspect --format='{{{{.State.Pid}}}}' {keyword} 2>/dev/null"
+    result = subprocess.run(
+        docker_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+    )
+
+    if result.returncode == 0 and result.stdout.decode().strip():
+        # Found Docker container, return its PID
+        container_pid = result.stdout.decode().strip()
+        if container_pid != "0":  # 0 means container is not running
+            print(f"Found Docker container '{keyword}' with PID: {container_pid}")
+            return [int(container_pid)]
+
+    # Fallback to original process search for bare metal
+    # cmd = f"ps aux | grep {keyword} | grep -v grep | awk '{{print $2}}'"
+    # print(os.getpid())
+    # cmd = "pgrep -f {} | grep -v {}".format(keyword, os.getpid())
+    cmd = "ps aux | grep -v remote_monitor.py | grep -E \"{}\" | grep -v grep | awk '{{print $2}}'".format(
+        keyword
+    )
+    # print("My PID:", os.getpid())
+    print(cmd)
+    # result = subprocess.run(['pgrep', '-f', keyword], stdout=subprocess.PIPE)
+    result = subprocess.run(
+        cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+    )
+    pids = result.stdout.decode().strip().split("\n")
+    print(pids)
+    pids = [pid for pid in pids if pid]
+    pid_names = []
+    for pid in pids:
+        try:
+            with open(f"/proc/{pid}/cmdline", "r") as f:
+                cmdline = f.read().replace("\x00", " ").strip()
+                pid_names.append((pid, cmdline))
+        except FileNotFoundError:
+            pid_names.append((pid, "Process not found"))
+    print(pid_names)
+    if len(pids) == 0:
+        raise ValueError(f"No processes found for keyword {keyword}")
+    return [int(pid) for pid in pids]
+
+
+def start_profiling_flink_pids(flink_pids):
+    asprof_bin = "/scratch/sketch_db_for_prometheus/asprof/bin/asprof"
+    cmd = ";".join(["{} start {}".format(asprof_bin, pid) for pid in flink_pids])
+    logger.debug("Starting profiling for flink pids with command: {}".format(cmd))
+    utils.run_cmd(cmd, popen=False, ignore_errors=False)
+
+
+def stop_profiling_flink_pids(flink_pids, experiment_output_dir, store: bool):
+    asprof_bin = "/scratch/sketch_db_for_prometheus/asprof/bin/asprof"
+    flink_profiles_dir = os.path.join(experiment_output_dir, "flink_profiles")
+    os.makedirs(flink_profiles_dir, exist_ok=True)
+
+    cmds = []
+    for pid in flink_pids:
+        if not store:
+            cmd = "{} stop {} > /dev/null 2>&1".format(asprof_bin, pid)
+            cmds.append(cmd)
+        else:
+            for format in ["flamegraph", "tree", "flat"]:
+                cmd = "{} stop -o {} -f {} {}".format(
+                    asprof_bin,
+                    format,
+                    os.path.join(flink_profiles_dir, "{}.{}".format(pid, format)),
+                    pid,
+                )
+                cmds.append(cmd)
+
+    logger.debug("Stopping profiling for flink pids with command: {}".format(cmds))
+    utils.run_cmd(";".join(cmds), popen=False, ignore_errors=not store)
+
+
+def start_profiling_arroyo_pids(arroyo_pids, experiment_output_dir):
+    arroyo_flamegraph_pids = []
+
+    arroyo_profiles_dir = os.path.join(experiment_output_dir, "arroyo_profiles")
+    os.makedirs(arroyo_profiles_dir, exist_ok=True)
+
+    flamegraph_bin = os.path.expanduser("~/.cargo/bin/flamegraph")
+
+    for pid in arroyo_pids:
+        output_file = os.path.join(
+            arroyo_profiles_dir, "arroyo_worker_{}.svg".format(pid)
+        )
+        cmd = "{} -o {} --pid {} --no-inline".format(flamegraph_bin, output_file, pid)
+        logger.debug("Starting flamegraph for PID {} with command: {}".format(pid, cmd))
+        proc = subprocess.Popen(cmd, shell=True)
+        arroyo_flamegraph_pids.append(proc.pid)
+
+    logger.debug(
+        "Started flamegraph processes with PIDs: {}".format(arroyo_flamegraph_pids)
+    )
+    return arroyo_flamegraph_pids
+
+
+def stop_profiling_arroyo_pids(
+    arroyo_flamegraph_pids, experiment_output_dir, store: bool
+):
+    if not store:
+        for flamegraph_pid in arroyo_flamegraph_pids:
+            try:
+                os.kill(flamegraph_pid, signal.SIGTERM)
+                logger.debug("Killed flamegraph process PID: {}".format(flamegraph_pid))
+            except ProcessLookupError:
+                logger.debug(
+                    "Flamegraph process PID {} already terminated".format(
+                        flamegraph_pid
+                    )
+                )
+    else:
+        for flamegraph_pid in arroyo_flamegraph_pids:
+            try:
+                os.kill(flamegraph_pid, signal.SIGTERM)
+                logger.debug(
+                    "Stopped flamegraph process PID: {}".format(flamegraph_pid)
+                )
+            except ProcessLookupError:
+                logger.debug(
+                    "Flamegraph process PID {} already terminated".format(
+                        flamegraph_pid
+                    )
+                )
+
+    logger.debug("Stopped profiling for arroyo pids")
+
+
+# TODO Provide some way of specifying which hooks will be used
+def get_process_monitor_hooks(
+    export_cost: bool, provider, node_offset: int
+) -> List[ProcessMonitorHook]:
+    hooks = []
+    # TODO Ideally the cost exporter should be configured by either the experiment
+    #      config yaml or from the command line
+    if export_cost:
+        logger.debug("Cost exporter hook added to process monitor")
+        monitors_and_models = {
+            "memory_info": [
+                CostModelOption.NO_TRANSFORM,
+                CostModelOption.SUM,
+                CostModelOption.ARITHMETIC_AVG,
+            ],
+            "cpu_percent": [
+                CostModelOption.NO_TRANSFORM,
+                CostModelOption.SUM,
+                CostModelOption.ARITHMETIC_AVG,
+            ],
+        }
+        cost_exporter_hook = QueryCostExporterHook(
+            monitors_and_models, addr=provider.get_node_ip(node_offset), port=9151
+        )
+        hooks.append(cost_exporter_hook)
+
+    return hooks
+
+
+def check_args(args):
+    if args.execution_mode == "timed" and not args.time_to_run:
+        raise ValueError(
+            "--time_to_run must be specified when execution_mode is 'timed'"
+        )
+    elif (
+        args.execution_mode == "prometheus_client"
+        and not args.prometheus_client_output_file
+    ):
+        raise ValueError(
+            "--prometheus_client_output_file must be specified when execution_mode is 'prometheus_client'"
+        )
+
+
+def main(args):
+    check_args(args)
+
+    remote_monitor_output_dir = os.path.join(
+        args.experiment_output_dir, "remote_monitor_output"
+    )
+    prometheus_client_output_dir = os.path.join(
+        args.experiment_output_dir, "prometheus_client_output"
+    )
+    os.makedirs(remote_monitor_output_dir, exist_ok=True)
+    os.makedirs(prometheus_client_output_dir, exist_ok=True)
+
+    create_loggers(remote_monitor_output_dir, "DEBUG")
+
+    # pid_keyword_map = {}
+    pids = []
+    keywords_expanded = []
+    for keyword in args.keywords:
+        keyword_pids = get_pids(keyword)
+        pids.extend(keyword_pids)
+        keywords_expanded.extend([keyword] * len(keyword_pids))
+        # pid_keyword_map[pid] = keyword
+
+    # pids = list(pid_keyword_map.keys())
+    if not pids:
+        logger.error("No matching processes found.")
+        return
+
+    profile_query_engine_pid = None
+    if args.profile_query_engine:
+        if (
+            constants.QUERY_ENGINE_PY_PROCESS_KEYWORD in args.keywords
+            or constants.QUERY_ENGINE_PY_CONTAINER_NAME in args.keywords
+        ):
+            query_engine_pids = get_pids(constants.QUERY_ENGINE_PY_PROCESS_KEYWORD)
+            profile_query_engine_pid = query_engine_pids[
+                0
+            ]  # Take first PID for profiling
+        elif (
+            constants.QUERY_ENGINE_RS_PROCESS_KEYWORD in args.keywords
+            or constants.QUERY_ENGINE_RS_CONTAINER_NAME in args.keywords
+        ):
+            raise NotImplementedError(
+                "Profiling for Rust query engine is not implemented yet"
+            )
+
+    logger.debug("Starting process monitors")
+
+    with open(args.config_file) as config_f:
+        client_config = yaml.safe_load(config_f)
+
+    export_cost_and_latency = False
+    if (
+        "export_cost_and_latency" in client_config
+        and client_config["export_cost_and_latency"]
+    ):
+        export_cost_and_latency = True
+
+    # Create provider for getting network IPs (use CloudLab IPs for Prometheus to scrape)
+    ip_provider = CloudLabLocalProvider(username="user", use_cloudlab_ips=True)
+    monitor_hooks = get_process_monitor_hooks(
+        export_cost=export_cost_and_latency,
+        provider=ip_provider,
+        node_offset=args.node_offset,
+    )
+
+    monitor, control_pipe, monitor_pipe = process_monitor.start_monitor(
+        pids,
+        keywords_expanded,
+        1,
+        ["memory_info", "cpu_percent"],
+        include_children=True,
+        hooks=monitor_hooks,
+    )
+
+    if args.profile_flink_pids:
+        logger.debug("Starting profiling for flink pids")
+        logger.debug("Checking if profilers are already running. If so, stopping them.")
+        stop_profiling_flink_pids(
+            args.profile_flink_pids, args.experiment_output_dir, store=False
+        )
+        start_profiling_flink_pids(args.profile_flink_pids)
+
+    arroyo_flamegraph_pids = None
+    if args.profile_arroyo_pids:
+        logger.debug("Starting profiling for arroyo pids")
+        logger.debug("Checking if profilers are already running. If so, stopping them.")
+        stop_profiling_arroyo_pids([], args.experiment_output_dir, store=False)
+        arroyo_flamegraph_pids = start_profiling_arroyo_pids(
+            args.profile_arroyo_pids, args.experiment_output_dir
+        )
+
+    if args.execution_mode == "prometheus_client":
+        logger.debug("Starting prometheus client")
+        # Create CloudLab local provider for local execution with CloudLab paths
+        provider = CloudLabLocalProvider(username="user", use_cloudlab_ips=False)
+        prometheus_client_service = PrometheusClientService(
+            provider,
+            use_container=args.use_container_prometheus_client,
+            node_offset=args.node_offset,
+        )
+        prometheus_client_service.start(
+            args.experiment_mode,
+            args.config_file,
+            args.query_engine_config_file,
+            prometheus_client_output_dir,
+            args.prometheus_client_output_file,
+            export_cost_and_latency,
+            profile_query_engine_pid,
+            args.profile_prometheus_time,
+            args.prometheus_client_parallel,
+        )
+
+        if prometheus_client_service.use_container:
+            while prometheus_client_service.is_healthy():
+                logger.debug(
+                    "Waiting for prometheus client container to stop running..."
+                )
+                time.sleep(5)
+            prometheus_client_service.stop()
+
+        logger.debug("Finished prometheus client")
+
+    elif args.execution_mode == "interactive":
+        logger.debug("Waiting for user input to stop monitoring")
+        input("Press Enter to stop monitoring...")
+    elif args.execution_mode == "timed":
+        logger.debug(f"Running for {args.time_to_run} seconds")
+        time.sleep(args.time_to_run)
+
+    if args.profile_flink_pids:
+        logger.debug("Stopping profiling for flink pids")
+        stop_profiling_flink_pids(
+            args.profile_flink_pids, args.experiment_output_dir, store=True
+        )
+
+    if args.profile_arroyo_pids and arroyo_flamegraph_pids:
+        logger.debug("Stopping profiling for arroyo pids")
+        stop_profiling_arroyo_pids(
+            arroyo_flamegraph_pids, args.experiment_output_dir, store=True
+        )
+
+    logger.debug("Stopping process monitors")
+    monitor_info = process_monitor.stop_monitor(monitor, control_pipe, monitor_pipe)
+
+    monitor_output_file = os.path.join(
+        remote_monitor_output_dir, args.monitor_output_file
+    )
+    with open(monitor_output_file, "w") as f:
+        json.dump(monitor_info, f)
+
+    logger.debug("Done")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--execution_mode",
+        type=str,
+        required=True,
+        choices=["interactive", "timed", "prometheus_client"],
+        help="Execution mode: interactive, timed, or prometheus_client",
+    )
+    parser.add_argument("--experiment_mode", type=str, required=True)
+    parser.add_argument(
+        "--keywords",
+        type=str,
+        required=True,
+        help="List of comma separated keywords to search for processes",
+    )
+    parser.add_argument(
+        "--experiment_output_dir",
+        type=str,
+        required=True,
+        help="File to store monitor info",
+    )
+    parser.add_argument(
+        "--config_file",
+        type=str,
+        required=True,
+        help="File containing prometheus client configuration",
+    )
+    parser.add_argument(
+        "--query_engine_config_file",
+        type=str,
+        required=False,
+        help="File containing query engine configuration",
+    )
+    parser.add_argument(
+        "--monitor_output_file",
+        type=str,
+        required=True,
+        help="File to store monitor output",
+    )
+    parser.add_argument(
+        "--prometheus_client_output_file",
+        type=str,
+        required=False,
+        help="File to store prometheus client output",
+    )
+    parser.add_argument("--profile_query_engine", action="store_true")
+    parser.add_argument("--profile_prometheus_time", type=int, required=False)
+    parser.add_argument("--profile_flink_pids", type=str, required=False)
+    parser.add_argument("--profile_arroyo_pids", type=str, required=False)
+    parser.add_argument("--time_to_run", type=int, required=False)
+    parser.add_argument(
+        "--use_container_prometheus_client",
+        action="store_true",
+        help="Use containerized Prometheus client",
+    )
+    parser.add_argument(
+        "--prometheus_client_parallel",
+        action="store_true",
+        help="Enable parallel execution in Prometheus client",
+    )
+    parser.add_argument(
+        "--node_offset",
+        type=int,
+        required=True,
+    )
+    args = parser.parse_args()
+    args.keywords = args.keywords.strip().split(",")
+    if args.profile_flink_pids:
+        args.profile_flink_pids = [
+            int(pid) for pid in args.profile_flink_pids.split(",")
+        ]
+    if args.profile_arroyo_pids:
+        args.profile_arroyo_pids = [
+            int(pid) for pid in args.profile_arroyo_pids.split(",")
+        ]
+    main(args)
diff --git a/Utilities/experiments/reset_prometheus.py b/Utilities/experiments/reset_prometheus.py
new file mode 100644
index 0000000..450516b
--- /dev/null
+++ b/Utilities/experiments/reset_prometheus.py
@@ -0,0 +1,28 @@
+import argparse
+
+import utils
+import constants
+
+
+def main(args):
+    cmd_dir = f"{constants.CLOUDLAB_HOME_DIR}/prometheus"
+    cmd = "rm -rf data; rm -f queries.log"
+    utils.run_on_cloudlab_node(
+        args.node_offset,
+        args.cloudlab_username,
+        args.hostname_suffix,
+        cmd,
+        cmd_dir,
+        nohup=False,
+        popen=False,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--num_nodes", type=int, required=True)
+    parser.add_argument("--cloudlab_username", type=str, required=True)
+    parser.add_argument("--hostname_suffix", type=str, required=True)
+    parser.add_argument("--node_offset", type=int)
+    args = parser.parse_args()
+    main(args)
diff --git a/Utilities/experiments/run_export_prometheus_data_for_experiment.sh b/Utilities/experiments/run_export_prometheus_data_for_experiment.sh
new file mode 100755
index 0000000..e28ee57
--- /dev/null
+++ b/Utilities/experiments/run_export_prometheus_data_for_experiment.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+if [ -z "$1" ]; then
+    echo "Usage: $0 <experiment_dir>"
+    exit 1
+fi
+
+EXPERIMENT_DIR=$1
+PROMETHEUS_DATA_DIR=$EXPERIMENT_DIR/prometheus/prometheus_data
+
+# PROMETHEUS_HOME_DIR=/home/milind/Desktop/cmu/research/sketch_db_for_prometheus/prometheus/prometheus-2.53.2.linux-amd64
+PROMETHEUS_HOME_DIR=/scratch/sketch_db_for_prometheus/prometheus
+
+$PROMETHEUS_HOME_DIR/prometheus --storage.tsdb.path=$PROMETHEUS_DATA_DIR/data --config.file=$PROMETHEUS_HOME_DIR/prometheus.yml &
+sleep 20
+python3 export_prometheus_data.py --output_dir $PROMETHEUS_DATA_DIR/exported_data --metric_names fake_metric_total --formats csv
+killall $PROMETHEUS_HOME_DIR/prometheus
diff --git a/Utilities/experiments/utils.py b/Utilities/experiments/utils.py
new file mode 100644
index 0000000..a5e8694
--- /dev/null
+++ b/Utilities/experiments/utils.py
@@ -0,0 +1,165 @@
+from typing import Union
+import subprocess
+import time
+
+import constants
+
+
+def run_cmd_with_retry(
+    cmd, popen, ignore_errors=False, max_retries=3, retry_delay=5
+) -> Union[subprocess.Popen, subprocess.CompletedProcess]:
+    """
+    Run a command with retry logic for SSH/rsync failures.
+
+    Args:
+        cmd: Command to execute
+        popen: If True, use Popen (non-blocking), else use run (blocking)
+        ignore_errors: If True, don't raise exception on failure
+        max_retries: Maximum number of retry attempts (default: 3)
+        retry_delay: Delay in seconds between retries (default: 5)
+
+    Returns:
+        Popen or CompletedProcess object
+
+    Raises:
+        CalledProcessError: If command fails after all retries (and not ignoring errors)
+    """
+    # Detect if this is an ssh/rsync command
+    is_ssh_rsync = "ssh" in cmd.lower() or "rsync" in cmd.lower()
+
+    # Only apply retry logic for ssh/rsync commands
+    if not is_ssh_rsync:
+        return run_cmd(cmd, popen, ignore_errors)
+
+    attempt = 0
+    last_exception = None
+
+    while attempt <= max_retries:
+        try:
+            if attempt > 0:
+                print(
+                    f"Retry attempt {attempt}/{max_retries} after {retry_delay}s delay..."
+                )
+                time.sleep(retry_delay)
+
+            return run_cmd(cmd, popen, ignore_errors)
+
+        except subprocess.CalledProcessError as e:
+            # Only retry on SSH connection failures (exit code 255)
+            if e.returncode == 255:
+                last_exception = e
+                print(
+                    f"SSH/rsync connection failed (exit 255) on attempt {attempt + 1}"
+                )
+                attempt += 1
+                if attempt > max_retries:
+                    print(f"Failed after {max_retries + 1} attempts")
+                    raise
+            else:
+                # For other error codes, raise immediately
+                print(f"Command failed with exit code {e.returncode} (not retrying)")
+                raise
+
+    # This should never be reached, but just in case
+    if last_exception:
+        raise last_exception
+
+
+def get_ssh_cmd(username, ip, cmd, cmd_dir, nohup, redirect=False):
+    user = username
+    if nohup:
+        cmd = f"nohup {cmd}"
+
+    if cmd_dir:
+        cmd = f'ssh {constants.SSH_OPTIONS} {user}@{ip} "cd {cmd_dir}; {cmd}"'
+    else:
+        cmd = f'ssh {constants.SSH_OPTIONS} {user}@{ip} "{cmd}"'
+
+    if redirect:
+        cmd = f"{cmd} < /dev/null > /dev/null 2>&1"
+    return cmd
+
+
+def run_cmd(
+    cmd, popen, ignore_errors=False
+) -> Union[subprocess.Popen, subprocess.CompletedProcess]:
+    print(cmd)
+    if popen:
+        return subprocess.Popen(
+            cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+        )
+    else:
+        try:
+            return subprocess.run(
+                cmd,
+                shell=True,
+                check=not ignore_errors,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+            )
+        except subprocess.CalledProcessError as e:
+            # Print captured output for debugging
+            print("=" * 80)
+            print("ERROR: Command failed with exit code:", e.returncode)
+            print("=" * 80)
+            if e.stdout:
+                print("STDOUT:")
+                print(e.stdout)
+                print("=" * 80)
+            if e.stderr:
+                print("STDERR:")
+                print(e.stderr)
+                print("=" * 80)
+            # Re-raise the exception
+            raise
+
+
+def run_on_cloudlab_node(
+    node_idx,
+    username,
+    hostname_suffix,
+    cmd,
+    cmd_dir,
+    nohup,
+    popen,
+    ignore_errors=False,
+    manual=False,
+):
+    hostname = f"node{node_idx}.{hostname_suffix}"
+    ssh_cmd = get_ssh_cmd(username, hostname, cmd, cmd_dir, nohup)
+    if manual:
+        print(f"Run the following command on {hostname}:")
+        print(ssh_cmd)
+        input("Press Enter to continue...")
+        return
+    return run_cmd_with_retry(ssh_cmd, popen, ignore_errors=ignore_errors)
+
+
+def run_on_cloudlab_nodes_in_parallel(
+    node_idxs, username, hostname_suffix, cmd, cmd_dir, nohup, popen, redirect=False
+):
+    if not popen:
+        raise ValueError("popen must be True to run commands in parallel")
+
+    popens = []
+    for node_idx in node_idxs:
+        hostname = f"node{node_idx}.{hostname_suffix}"
+        ssh_cmd = get_ssh_cmd(username, hostname, cmd, cmd_dir, nohup, redirect)
+        popens.append(run_cmd_with_retry(ssh_cmd, popen))
+
+    for popen in popens:
+        popen.wait()
+
+
+def run_on_cloudlab_nodes_in_parallel_without_wait(
+    node_idxs, username, hostname_suffix, cmd, cmd_dir, nohup, popen, redirect=False
+):
+    if not popen:
+        raise ValueError("popen must be True to run commands in parallel")
+
+    popens = []
+    for node_idx in node_idxs:
+        hostname = f"node{node_idx}.{hostname_suffix}"
+        ssh_cmd = get_ssh_cmd(username, hostname, cmd, cmd_dir, nohup, redirect)
+        popens.append(run_cmd_with_retry(ssh_cmd, popen))
diff --git a/Utilities/install_components.sh b/Utilities/install_components.sh
new file mode 100644
index 0000000..3aa3f4e
--- /dev/null
+++ b/Utilities/install_components.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+source "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/shared_utils.sh"
+
+install_components() {
+    local setup_dependencies="$1"
+    shift
+    local components=("$@")
+
+    local this_dir=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+    local root_code_dir=$(dirname "$this_dir")
+
+    for component in "${components[@]}"; do
+        local component_dir="$root_code_dir/$component/installation"
+        if [ ! -d "$component_dir" ]; then
+            echo "Error: Component directory $component_dir does not exist."
+            continue
+        fi
+
+        if [ "$setup_dependencies" = true ] && [ -f "$component_dir/setup_dependencies.sh" ]; then
+            (source "$component_dir/setup_dependencies.sh")
+        fi
+
+        if [ -f "$component_dir/install.sh" ]; then
+            (source "$component_dir/install.sh")
+        fi
+    done
+}
+
+install_internal_components() {
+    local components=("$@")
+    local predefined_components
+    local this_dir=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+    local components_conf_file="$this_dir/components.conf"
+    readarray -t predefined_components < <(load_components_config "$components_conf_file")
+
+    if [ "${components[0]}" == "all" ]; then
+        components=("${predefined_components[@]}")
+    fi
+
+    install_components false "${components[@]}"
+}
+
+setup_internal_components() {
+    local components=("$@")
+    local predefined_components
+    local this_dir=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+    local components_conf_file="$this_dir/components.conf"
+    readarray -t predefined_components < <(load_components_config "$components_conf_file")
+
+    if [ "${components[0]}" == "all" ]; then
+        components=("${predefined_components[@]}")
+    fi
+
+    install_components true "${components[@]}"
+}
diff --git a/Utilities/installation/arroyo/install.sh b/Utilities/installation/arroyo/install.sh
new file mode 100755
index 0000000..4801ed9
--- /dev/null
+++ b/Utilities/installation/arroyo/install.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+set -e
+
+ARROYO_IMAGE="ghcr.io/projectasap/asap-arroyo:v0.1.0"
+
+echo "Pulling Arroyo Docker image..."
+# sudo su - required because usermod -aG docker only takes effect in a new shell
+sudo su - "$USER" -c "docker pull $ARROYO_IMAGE"
+echo "Arroyo image ready: $ARROYO_IMAGE"
diff --git a/Utilities/installation/asprof/install.sh b/Utilities/installation/asprof/install.sh
new file mode 100755
index 0000000..dbde6f1
--- /dev/null
+++ b/Utilities/installation/asprof/install.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+if [ -z "$1" ]; then
+    echo "Usage: $0 <install_dir>"
+    exit 1
+fi
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+source "$THIS_DIR/../utils.sh"
+
+INSTALL_DIR=$1
+
+ASPROF_FILENAME="async-profiler-3.0-linux-x64.tar.gz"
+ASPROF_URL="https://github.com/async-profiler/async-profiler/releases/download/v3.0/"$ASPROF_FILENAME
+ASPROF_DIRNAME="asprof"
+
+cd $INSTALL_DIR
+wget $ASPROF_URL
+rm -rf $ASPROF_DIRNAME; mkdir $ASPROF_DIRNAME
+untar $ASPROF_FILENAME $ASPROF_DIRNAME
diff --git a/Utilities/installation/asprof/setup_dependencies.sh b/Utilities/installation/asprof/setup_dependencies.sh
new file mode 100755
index 0000000..42502f9
--- /dev/null
+++ b/Utilities/installation/asprof/setup_dependencies.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+sudo sysctl kernel.perf_event_paranoid=1
+sudo sysctl kernel.kptr_restrict=0
diff --git a/Utilities/installation/benchmarks/install.sh b/Utilities/installation/benchmarks/install.sh
new file mode 100755
index 0000000..a333335
--- /dev/null
+++ b/Utilities/installation/benchmarks/install.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+if [ -z "$1" ]; then
+  echo "Usage: $0 <root_dir>"
+  exit 1
+fi
+
+ROOT_DIR=$1
+
+# DeathStarBench
+DEATHSTAR_BENCH_REPO="https://github.com/delimitrou/DeathStarBench.git"
+DEATHSTAR_BENCH_DIRNAME="DeathStarBench"
+
+cd $ROOT_DIR
+mkdir -p benchmarks
+cd benchmarks
+
+rm -rf $DEATHSTAR_BENCH_DIRNAME
+git clone --recurse-submodules $DEATHSTAR_BENCH_REPO
+cd $DEATHSTAR_BENCH_DIRNAME/wrk2
+make -j`nproc`
diff --git a/Utilities/installation/benchmarks/setup_dependencies.sh b/Utilities/installation/benchmarks/setup_dependencies.sh
new file mode 100755
index 0000000..523b02f
--- /dev/null
+++ b/Utilities/installation/benchmarks/setup_dependencies.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+sudo apt-get install -y luarocks luajit
+sudo luarocks --lua-version 5.1 install luasocket
diff --git a/Utilities/installation/clickhouse/.gitignore b/Utilities/installation/clickhouse/.gitignore
new file mode 100644
index 0000000..62faea5
--- /dev/null
+++ b/Utilities/installation/clickhouse/.gitignore
@@ -0,0 +1,12 @@
+clickhouse
+data/
+flags/
+format_schemas/
+metadata/
+metadata_dropped/
+preprocessed_configs/
+status/
+store/
+tmp/
+user_files/
+uuid
diff --git a/Utilities/installation/clickhouse/install.sh b/Utilities/installation/clickhouse/install.sh
new file mode 100755
index 0000000..a218fde
--- /dev/null
+++ b/Utilities/installation/clickhouse/install.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+if [ -z "$1" ]; then
+    echo "Usage: $0 <install_dir>"
+    exit 1
+fi
+
+INSTALL_DIR=$1
+
+mkdir -p "$INSTALL_DIR"
+
+cd "$INSTALL_DIR" || exit
+
+# Download ClickHouse static binary
+curl https://clickhouse.com/ | sh
+chmod +x clickhouse
+
+echo "ClickHouse installed to $INSTALL_DIR"
diff --git a/Utilities/installation/clickhouse/run.sh b/Utilities/installation/clickhouse/run.sh
new file mode 100755
index 0000000..7b8ed4f
--- /dev/null
+++ b/Utilities/installation/clickhouse/run.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+if [ $# -ne 1 ]; then
+  echo "Usage: $0 <path to clickhouse directory>"
+  exit 1
+fi
+
+CLICKHOUSE_DIR=$1
+
+# Load config from benchmark pipeline
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+CONFIG_FILE="${SCRIPT_DIR}/../../../ExecutionUtilities/clickhouse-benchmark-pipeline/config.env"
+if [ -f "$CONFIG_FILE" ]; then
+  set -a
+  source "$CONFIG_FILE"
+  set +a
+fi
+
+export TZ=UTC
+
+cd "$CLICKHOUSE_DIR" || exit
+./clickhouse server -- --http_port "${CLICKHOUSE_HTTP_PORT:-8123}" --tcp_port 9000
diff --git a/Utilities/installation/common/setup_dependencies.sh b/Utilities/installation/common/setup_dependencies.sh
new file mode 100755
index 0000000..b30b997
--- /dev/null
+++ b/Utilities/installation/common/setup_dependencies.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+
+sudo apt-get update
+sudo add-apt-repository ppa:deadsnakes/ppa -y
+sudo apt-get update
+sudo apt-get install -y python3-pip ipython3 golang-go python3.11 htop
+pip3 install --user pandas 'scipy==1.15' numpy matplotlib psutil loguru pyarrow hydra-core omegaconf
+pip3 install --user plotnine
+
+# Install Grafana Foundation SDK for dashboard configuration service
+python3.11 -m pip install --user pandas 'scipy==1.15' numpy matplotlib psutil loguru pyarrow hydra-core omegaconf grafana-foundation-sdk requests 'promql-parser==0.5.0'
+
+# For Rust fake exporter, cargo must be installed
+# Install Rust if not already installed
+echo "Installing Rust..."
+if ! command -v rustc &>/dev/null; then
+  curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+  source "$HOME/.cargo/env"
+else
+  echo "Rust already installed"
+  source "$HOME/.cargo/env"
+fi
+# (cd "$THIS_DIR/../../experiments/fake_exporter_rust/fake_exporter"; cargo build --release)
+(cd "$THIS_DIR/../../../PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter" && cargo build --release)
+
+DOCKER_DATA_DIR=/scratch/var_lib_docker
+
+# Add Docker's official GPG key:
+sudo apt-get install -y ca-certificates curl
+sudo install -m 0755 -d /etc/apt/keyrings
+sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
+sudo chmod a+r /etc/apt/keyrings/docker.asc
+
+# Add the repository to Apt sources:
+#export VERSION_STRING="5:24.0.7-1~ubuntu.20.04~focal"
+echo \
+  "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
+  $(. /etc/os-release && echo "$VERSION_CODENAME") stable" |
+  sudo tee /etc/apt/sources.list.d/docker.list >/dev/null
+sudo apt-get update
+#sudo apt-get install -y docker-ce=$VERSION_STRING docker-ce-cli=$VERSION_STRING containerd.io docker-buildx-plugin docker-compose-plugin libssl-dev make luarocks luajit
+sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin libssl-dev make luarocks luajit
+sudo usermod -aG docker "$USER"
+sudo mkdir -p /etc/docker && mkdir -p $DOCKER_DATA_DIR
+echo '{ "data-root": "'$DOCKER_DATA_DIR'" }' | sudo tee /etc/docker/daemon.json
+sudo service docker restart
diff --git a/Utilities/installation/elastic/README.md b/Utilities/installation/elastic/README.md
new file mode 100644
index 0000000..a760105
--- /dev/null
+++ b/Utilities/installation/elastic/README.md
@@ -0,0 +1,4 @@
+https://github.com/elastic/elasticsearch
+
+**Run Elasticsearch locally** using the installation script: <br>
+`curl -fsSL https://elastic.co/start-local | sh`
diff --git a/Utilities/installation/empty_experiment_outputs.sh b/Utilities/installation/empty_experiment_outputs.sh
new file mode 100755
index 0000000..c2f59d1
--- /dev/null
+++ b/Utilities/installation/empty_experiment_outputs.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+if [ "$#" -lt 3 ] || [ "$#" -gt 4 ]; then
+    echo "Usage: $0 <num_nodes> <cloudlab_username> <hostname_suffix> [<node_offset>]"
+    exit 1
+fi
+
+NUM_NODES=$1
+USERNAME=$2
+HOSTNAME_SUFFIX=$3
+NODE_OFFSET=${4:-0}
+
+THIS_DIR=$(dirname "$(readlink -f "$0")")
+
+OPTIONS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
+CMD="cd /scratch/sketch_db_for_prometheus/experiment_outputs; "
+CMD=$CMD"rm -rf *"
+
+echo "Running command: $CMD"
+for i in $(seq $NODE_OFFSET $(($NODE_OFFSET + $NUM_NODES - 1))); do
+    ssh $OPTIONS "$USERNAME"@node"$i"."$HOSTNAME_SUFFIX" "$CMD" < /dev/null &
+done
+
+wait
diff --git a/Utilities/installation/empty_flink_logs.sh b/Utilities/installation/empty_flink_logs.sh
new file mode 100755
index 0000000..928c83c
--- /dev/null
+++ b/Utilities/installation/empty_flink_logs.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+if [ "$#" -lt 2 ] || [ "$#" -gt 3 ]; then
+    echo "Usage: $0 <cloudlab_username> <hostname_suffix> [<node_offset>]"
+    exit 1
+fi
+
+USERNAME=$1
+HOSTNAME_SUFFIX=$2
+NODE_OFFSET=${3:-0}
+
+THIS_DIR=$(dirname "$(readlink -f "$0")")
+
+CMD="cd /scratch/sketch_db_for_prometheus/flink/log; "
+CMD=$CMD"for file in *; do > \$file; done;"
+
+echo "Running command: $CMD"
+
+ssh -o StrictHostKeyChecking=no $USERNAME@"node$NODE_OFFSET."$HOSTNAME_SUFFIX "$CMD" < /dev/null &
+
+wait
diff --git a/Utilities/installation/exporters/install.sh b/Utilities/installation/exporters/install.sh
new file mode 100755
index 0000000..cc441e8
--- /dev/null
+++ b/Utilities/installation/exporters/install.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+if [ -z "$1" ]; then
+  echo "Usage: $0 <install_dir>"
+  exit 1
+fi
+
+# node_exporter
+NODE_EXPORTER_FILENAME="node_exporter-1.8.2.linux-amd64.tar.gz"
+NODE_EXPORTER_URL="https://github.com/prometheus/node_exporter/releases/download/v1.8.2/"$NODE_EXPORTER_FILENAME
+NODE_EXPORTER_DIRNAME="node_exporter"
+
+# blackbox_exporter
+BLACKBOX_EXPORTER_FILENAME="blackbox_exporter-0.25.0.linux-amd64.tar.gz"
+BLACKBOX_EXPORTER_URL="https://github.com/prometheus/blackbox_exporter/releases/download/v0.25.0/"$BLACKBOX_EXPORTER_FILENAME
+BLACKBOX_EXPORTER_DIRNAME="blackbox_exporter"
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+source "$THIS_DIR/../utils.sh"
+
+INSTALL_DIR=$1
+
+cd $INSTALL_DIR
+mkdir -p exporters
+cd exporters
+
+wget $NODE_EXPORTER_URL
+# rm -rf $NODE_EXPORTER_DIRNAME; mkdir $NODE_EXPORTER_DIRNAME
+untar $NODE_EXPORTER_FILENAME $NODE_EXPORTER_DIRNAME
+
+wget $BLACKBOX_EXPORTER_URL
+# rm -rf $BLACKBOX_EXPORTER_DIRNAME; mkdir $BLACKBOX_EXPORTER_DIRNAME
+untar $BLACKBOX_EXPORTER_FILENAME $BLACKBOX_EXPORTER_DIRNAME
diff --git a/Utilities/installation/exporters/setup_dependencies.sh b/Utilities/installation/exporters/setup_dependencies.sh
new file mode 100644
index 0000000..093cb6d
--- /dev/null
+++ b/Utilities/installation/exporters/setup_dependencies.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+pip3 install --user prometheus_client
diff --git a/Utilities/installation/flink/install.sh b/Utilities/installation/flink/install.sh
new file mode 100755
index 0000000..904b82a
--- /dev/null
+++ b/Utilities/installation/flink/install.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+if [ -z "$1" ]; then
+    echo "Usage: $0 <install_dir>"
+    exit 1
+fi
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+source "$THIS_DIR/../utils.sh"
+
+INSTALL_DIR=$1
+
+FLINK_FILENAME="flink-1.20.0-bin-scala_2.12.tgz"
+FLINK_URL="https://dlcdn.apache.org/flink/flink-1.20.0/"$FLINK_FILENAME
+FLINK_DIRNAME="flink"
+
+cd $INSTALL_DIR
+wget $FLINK_URL
+rm -rf $FLINK_DIRNAME; mkdir $FLINK_DIRNAME
+untar $FLINK_FILENAME $FLINK_DIRNAME
diff --git a/Utilities/installation/grafana/install.sh b/Utilities/installation/grafana/install.sh
new file mode 100755
index 0000000..1e92a22
--- /dev/null
+++ b/Utilities/installation/grafana/install.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+if [ -z "$1" ]; then
+    echo "Usage: $0 <install_dir>"
+    exit 1
+fi
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+source "$THIS_DIR/../utils.sh"
+
+INSTALL_DIR=$1
+
+GRAFANA_FILENAME="grafana-enterprise-11.2.2.linux-amd64.tar.gz"
+GRAFANA_URL="https://dl.grafana.com/enterprise/release/"$GRAFANA_FILENAME
+GRAFANA_DIRNAME="grafana"
+
+cd $INSTALL_DIR
+wget $GRAFANA_URL
+untar $GRAFANA_FILENAME $GRAFANA_DIRNAME
diff --git a/Utilities/installation/grafana/setup_dependencies.sh b/Utilities/installation/grafana/setup_dependencies.sh
new file mode 100755
index 0000000..b76c118
--- /dev/null
+++ b/Utilities/installation/grafana/setup_dependencies.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+sudo apt-get install -y adduser libfontconfig1 musl
diff --git a/Utilities/installation/install_external_components.sh b/Utilities/installation/install_external_components.sh
new file mode 100755
index 0000000..5713435
--- /dev/null
+++ b/Utilities/installation/install_external_components.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# PREDEFINED_COMPONENTS=("benchmarks" "exporters" "flink" "grafana" "kafka" "prometheus" "prometheus_kafka_adapter" "asprof")
+PREDEFINED_COMPONENTS=("benchmarks" "exporters" "flink" "grafana" "kafka" "prometheus" "asprof" "arroyo")
+
+if [ "$#" -lt 2 ]; then
+    echo "Usage: $0 <install_dir> <component1> [<component2> ...]"
+    exit 1
+fi
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+
+INSTALL_DIR=$1
+shift
+COMPONENTS=("$@")
+
+if [ "${COMPONENTS[0]}" == "all" ]; then
+    COMPONENTS=("${PREDEFINED_COMPONENTS[@]}")
+fi
+COMPONENTS=("common" "${COMPONENTS[@]}")
+
+for COMPONENT in "${COMPONENTS[@]}"; do
+    COMPONENT_DIR="$THIS_DIR/$COMPONENT"
+    if [ ! -d "$COMPONENT_DIR" ]; then
+        echo "Error: Component directory $COMPONENT_DIR does not exist."
+        exit 1
+    fi
+
+    if [ -f "$COMPONENT_DIR/setup_dependencies.sh" ]; then
+        (source "$COMPONENT_DIR/setup_dependencies.sh")
+    fi
+    if [ -f "$COMPONENT_DIR/install.sh" ]; then
+        (source "$COMPONENT_DIR/install.sh" "$INSTALL_DIR")
+    fi
+done
diff --git a/Utilities/installation/kafka/.gitignore b/Utilities/installation/kafka/.gitignore
new file mode 100644
index 0000000..e21d4a9
--- /dev/null
+++ b/Utilities/installation/kafka/.gitignore
@@ -0,0 +1,3 @@
+kafka/
+*.tgz*
+*.env
diff --git a/Utilities/installation/kafka/install.sh b/Utilities/installation/kafka/install.sh
new file mode 100755
index 0000000..4216396
--- /dev/null
+++ b/Utilities/installation/kafka/install.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+if [ -z "$1" ]; then
+    echo "Usage: $0 <install_dir>"
+    exit 1
+fi
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+source "$THIS_DIR/../utils.sh"
+
+# Load overrides if they exist
+if [ -f "$THIS_DIR/override.env" ]; then
+    source "$THIS_DIR/override.env"
+fi
+
+INSTALL_DIR=$1
+
+KAFKA_FILENAME="kafka_2.13-3.8.0.tgz"
+KAFKA_URL="https://archive.apache.org/dist/kafka/3.8.0/"$KAFKA_FILENAME
+KAFKA_DIRNAME="kafka"
+
+KAFKA_CONFIG_FILE="./config/kraft/server.properties"
+KAFKA_LOG_DIR="${KAFKA_LOG_DIR:-/scratch/kraft-combined-logs}"
+
+mkdir -p "$KAFKA_LOG_DIR"
+
+cd "$INSTALL_DIR" || exit
+wget $KAFKA_URL
+untar $KAFKA_FILENAME $KAFKA_DIRNAME
+cd "$KAFKA_DIRNAME" || exit
+
+#HOST_IP=$(ip r | grep 10.10 | awk '{print $9}')
+if [ -n "$KAFKA_HOST_IP" ]; then
+    HOST_IP="$KAFKA_HOST_IP"
+else
+    HOST_IP=$(ip a | grep 10.10 | awk '{print $2}' | cut -d '/' -f1)
+fi
+
+# Set up Kafka configuration
+sed -i "s|log.dirs=.*|log.dirs=$KAFKA_LOG_DIR|g" $KAFKA_CONFIG_FILE
+# Increase message size limit to 20MB to accommodate large precomputes (e.g., 3x65536 CountMinSketch)
+set_property "$KAFKA_CONFIG_FILE" "message.max.bytes" "20971520"
+set_property "$KAFKA_CONFIG_FILE" "replica.fetch.max.bytes" "20971520"
+set_property "$KAFKA_CONFIG_FILE" "log.retention.hours" "1"
+set_property "$KAFKA_CONFIG_FILE" "advertised.listeners" "PLAINTEXT://${HOST_IP}:9092"
+
+echo "Resetting Kafka storage"
+UUID=$(./bin/kafka-storage.sh random-uuid)
+./bin/kafka-storage.sh format -t "$UUID" --config $KAFKA_CONFIG_FILE
+echo "Done resetting Kafka storage"
diff --git a/Utilities/installation/kafka/run.sh b/Utilities/installation/kafka/run.sh
new file mode 100755
index 0000000..cd40e7f
--- /dev/null
+++ b/Utilities/installation/kafka/run.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+if [ $# -ne 1 ]; then
+  echo "Usage: $0 <path to kafka directory>"
+  exit 1
+fi
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+
+# Load overrides if they exist
+if [ -f "$THIS_DIR/override.env" ]; then
+    source "$THIS_DIR/override.env"
+fi
+
+KAFKA_DIR=$1
+
+KAFKA_CONFIG_FILE="./config/kraft/server.properties"
+
+# Build override arguments if set
+OVERRIDE_ARGS=""
+[ -n "$LOG_RETENTION_MS" ] && OVERRIDE_ARGS="$OVERRIDE_ARGS --override log.retention.ms=$LOG_RETENTION_MS"
+[ -n "$LOG_RETENTION_BYTES" ] && OVERRIDE_ARGS="$OVERRIDE_ARGS --override log.retention.bytes=$LOG_RETENTION_BYTES"
+[ -n "$LOG_SEGMENT_BYTES" ] && OVERRIDE_ARGS="$OVERRIDE_ARGS --override log.segment.bytes=$LOG_SEGMENT_BYTES"
+[ -n "$LOG_RETENTION_CHECK_INTERVAL_MS" ] && OVERRIDE_ARGS="$OVERRIDE_ARGS --override log.retention.check.interval.ms=$LOG_RETENTION_CHECK_INTERVAL_MS"
+
+cd "$KAFKA_DIR" || exit
+# shellcheck disable=SC2086
+if ! ./bin/kafka-server-start.sh $KAFKA_CONFIG_FILE $OVERRIDE_ARGS; then
+    echo "Error starting Kafka server"
+    echo "Trying reset"
+    UUID=$(./bin/kafka-storage.sh random-uuid)
+    ./bin/kafka-storage.sh format -t "$UUID" --config $KAFKA_CONFIG_FILE
+    # shellcheck disable=SC2086
+    if ! ./bin/kafka-server-start.sh $KAFKA_CONFIG_FILE $OVERRIDE_ARGS; then
+        echo "Error starting Kafka server again"
+        exit 1
+    fi
+fi
diff --git a/Utilities/installation/kafka/setup_dependencies.sh b/Utilities/installation/kafka/setup_dependencies.sh
new file mode 100755
index 0000000..bc5edc1
--- /dev/null
+++ b/Utilities/installation/kafka/setup_dependencies.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+sudo apt-get install -y openjdk-11-jdk-headless openjdk-11-jre-headless
diff --git a/Utilities/installation/oneshot_only_install_internal_components.sh b/Utilities/installation/oneshot_only_install_internal_components.sh
new file mode 100755
index 0000000..b9743b4
--- /dev/null
+++ b/Utilities/installation/oneshot_only_install_internal_components.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+if [ "$#" -lt 3 ] || [ "$#" -gt 4 ]; then
+    echo "Usage: $0 <num_nodes> <cloudlab_username> <hostname_suffix> [<node_offset>]"
+    exit 1
+fi
+
+NUM_NODES=$1
+USERNAME=$2
+HOSTNAME_SUFFIX=$3
+NODE_OFFSET=${4:-0}
+
+THIS_DIR=$(dirname "$(readlink -f "$0")")
+source "$THIS_DIR/../cloudlab_setup/multi_node/utils.sh"
+
+OPTIONS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
+HOSTNAME_PREFIX="node"
+
+# Create a wrapper script to execute the installation command
+install_internal_cmd() {
+    local username="$1"
+    local hostname="$2"
+    local cd_cmd="cd /scratch/sketch_db_for_prometheus/code/Utilities/installation"
+    local internal_cmd="./only_install_internal_components.sh all"
+
+    ssh $OPTIONS $username@$hostname "$cd_cmd && $internal_cmd"
+}
+
+echo "Installing internal components on all nodes..."
+setup_nodes $NUM_NODES $USERNAME $HOSTNAME_PREFIX $HOSTNAME_SUFFIX "install_internal_cmd" true $NODE_OFFSET
+
+echo "Internal components installed on all nodes."
diff --git a/Utilities/installation/oneshot_setup.sh b/Utilities/installation/oneshot_setup.sh
new file mode 100755
index 0000000..ed6aec0
--- /dev/null
+++ b/Utilities/installation/oneshot_setup.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+if [ "$#" -lt 3 ] || [ "$#" -gt 4 ]; then
+    echo "Usage: $0 <num_nodes> <cloudlab_username> <hostname_suffix> [<node_offset>]"
+    exit 1
+fi
+
+NUM_NODES=$1
+USERNAME=$2
+HOSTNAME_SUFFIX=$3
+NODE_OFFSET=${4:-0}
+
+THIS_DIR=$(dirname "$(readlink -f "$0")")
+
+OPTIONS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
+CD_CMD="cd /scratch/sketch_db_for_prometheus/code/Utilities/installation; "
+EXTERNAL_CMD="./install_external_components.sh /scratch/sketch_db_for_prometheus/ all; "
+INTERNAL_CMD="./setup_internal_components.sh all;"
+
+echo "Running command: $EXTERNAL_CMD"
+for i in $(seq $NODE_OFFSET $(($NODE_OFFSET + $NUM_NODES - 1))); do
+    ssh $OPTIONS $USERNAME@node"$i".$HOSTNAME_SUFFIX "$CD_CMD $EXTERNAL_CMD" &
+done
+wait
+
+echo "External components installed on all nodes."
+
+echo "Running command: $INTERNAL_CMD"
+for i in $(seq $NODE_OFFSET $(($NODE_OFFSET + $NUM_NODES - 1))); do
+    ssh $OPTIONS $USERNAME@node"$i".$HOSTNAME_SUFFIX "$CD_CMD $INTERNAL_CMD" &
+done
+wait
+
+echo "Internal components installed on all nodes."
diff --git a/Utilities/installation/only_install_internal_components.sh b/Utilities/installation/only_install_internal_components.sh
new file mode 100755
index 0000000..00a0e27
--- /dev/null
+++ b/Utilities/installation/only_install_internal_components.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+source "$THIS_DIR/../install_components.sh"
+
+if [ "$#" -lt 1 ]; then
+    echo "Usage: $0 <component1> [<component2> ...]"
+    exit 1
+fi
+
+install_internal_components "$@"
diff --git a/Utilities/installation/prometheus/install.sh b/Utilities/installation/prometheus/install.sh
new file mode 100755
index 0000000..e26a3a5
--- /dev/null
+++ b/Utilities/installation/prometheus/install.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+if [ -z "$1" ]; then
+    echo "Usage: $0 <install_dir>"
+    exit 1
+fi
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+source "$THIS_DIR/../utils.sh"
+
+# prometheus
+PROMETHEUS_FILENAME="prometheus-2.53.2.linux-amd64.tar.gz"
+PROMETHEUS_URL="https://github.com/prometheus/prometheus/releases/download/v2.53.2/"$PROMETHEUS_FILENAME
+PROMETHEUS_DIRNAME="prometheus"
+
+INSTALL_DIR=$1
+
+cd $INSTALL_DIR
+
+wget $PROMETHEUS_URL
+# rm -rf $PROMETHEUS_DIRNAME; mkdir $PROMETHEUS_DIRNAME
+untar $PROMETHEUS_FILENAME $PROMETHEUS_DIRNAME
diff --git a/Utilities/installation/prometheus_kafka_adapter/install.sh b/Utilities/installation/prometheus_kafka_adapter/install.sh
new file mode 100755
index 0000000..fb7547b
--- /dev/null
+++ b/Utilities/installation/prometheus_kafka_adapter/install.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+if [ $# -ne 1 ]; then
+    echo "Usage: $0 <path_to_prometheus_kafka_adapter>"
+    exit 1
+fi
+
+DIR=$1
+
+PROM_KAFKA_ADAPTER_REPO="https://github.com/Telefonica/prometheus-kafka-adapter.git"
+PROM_KAFKA_ADAPTER_DIRNAME="prometheus-kafka-adapter"
+
+cd $DIR
+rm -rf $PROM_KAFKA_ADAPTER_DIRNAME
+git clone $PROM_KAFKA_ADAPTER_REPO
+cd $PROM_KAFKA_ADAPTER_DIRNAME
+
+# sudo su shenanigans are required because
+# (a) build-musl needs docker,
+# (b) docker usermod -aG only takes effect in a new shell, and
+# (c) I want to do this in a single SSH connection
+sudo su - $USER -c 'pwd; cd '$PWD'; pwd; make build-musl'
diff --git a/Utilities/installation/prometheus_kafka_adapter/run.sh b/Utilities/installation/prometheus_kafka_adapter/run.sh
new file mode 100755
index 0000000..b02f1d6
--- /dev/null
+++ b/Utilities/installation/prometheus_kafka_adapter/run.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+if [ $# -ne 4 ]; then
+  echo "Usage: $0 <path to prometheus-kafka-adapter directory> <kafka broker> <kafka topic> <serialization_format>"
+  exit 1
+fi
+
+DIR=$1
+KAFKA_BROKER=$2
+KAFKA_TOPIC=$3
+SERIALIZATION_FORMAT=$4
+
+cd $DIR
+KAFKA_BROKER_LIST=$KAFKA_BROKER KAFKA_TOPIC=$KAFKA_TOPIC SERIALIZATION_FORMAT=$SERIALIZATION_FORMAT ./prometheus-kafka-adapter-musl
diff --git a/Utilities/installation/prometheus_kafka_adapter/setup_dependencies.sh b/Utilities/installation/prometheus_kafka_adapter/setup_dependencies.sh
new file mode 100755
index 0000000..b6d6769
--- /dev/null
+++ b/Utilities/installation/prometheus_kafka_adapter/setup_dependencies.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+DOCKER_DATA_DIR=/scratch/var_lib_docker
+
+# Add Docker's official GPG key:
+sudo apt-get update
+sudo apt-get install -y golang-go
+sudo apt-get install -y ca-certificates curl
+sudo install -m 0755 -d /etc/apt/keyrings
+sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
+sudo chmod a+r /etc/apt/keyrings/docker.asc
+
+# Add the repository to Apt sources:
+#export VERSION_STRING="5:24.0.7-1~ubuntu.20.04~focal"
+echo \
+  "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
+  $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
+  sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
+sudo apt-get update
+#sudo apt-get install -y docker-ce=$VERSION_STRING docker-ce-cli=$VERSION_STRING containerd.io docker-buildx-plugin docker-compose-plugin libssl-dev make luarocks luajit
+sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin libssl-dev make luarocks luajit
+sudo usermod -aG docker $USER
+sudo mkdir -p /etc/docker && mkdir -p $DOCKER_DATA_DIR
+echo '{ "data-root": "'$DOCKER_DATA_DIR'" }' | sudo tee /etc/docker/daemon.json
+sudo service docker restart
diff --git a/Utilities/installation/recompile_flink.sh b/Utilities/installation/recompile_flink.sh
new file mode 100755
index 0000000..73d6b24
--- /dev/null
+++ b/Utilities/installation/recompile_flink.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+if [ "$#" -ne 2 ]; then
+    echo "Usage: $0 <cloudlab_username> <hostname_suffix>"
+    exit 1
+fi
+
+USERNAME=$1
+HOSTNAME_SUFFIX=$2
+
+THIS_DIR=$(dirname "$(readlink -f "$0")")
+
+CMD="cd /scratch/sketch_db_for_prometheus/code/Utilities/installation; "
+CMD=$CMD"./setup_internal_components.sh FlinkSketch;"
+
+echo "Running command: $CMD"
+
+ssh -o StrictHostKeyChecking=no $USERNAME@"node0."$HOSTNAME_SUFFIX "$CMD" < /dev/null &
+
+wait
diff --git a/Utilities/installation/setup_internal_components.sh b/Utilities/installation/setup_internal_components.sh
new file mode 100755
index 0000000..642370c
--- /dev/null
+++ b/Utilities/installation/setup_internal_components.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+source "$THIS_DIR/../install_components.sh"
+
+if [ "$#" -lt 1 ]; then
+    echo "Usage: $0 <component1> [<component2> ...]"
+    exit 1
+fi
+
+setup_internal_components "$@"
diff --git a/Utilities/installation/utils.sh b/Utilities/installation/utils.sh
new file mode 100644
index 0000000..ca433c6
--- /dev/null
+++ b/Utilities/installation/utils.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# function to untar a file
+untar() {
+    rm -rf $2
+    mkdir $2
+    tar -xvf $1 -C $2 --strip-components=1
+}
+
+set_property() {
+    local config_file="$1"
+    local property="$2"
+    local value="$3"
+
+    if grep -q "^$property=" "$config_file"; then
+        # Property exists, update it
+        sed -i "s|^$property=.*|$property=$value|g" "$config_file"
+    else
+        # Property doesn't exist, add it
+        echo "$property=$value" >> "$config_file"
+    fi
+}
diff --git a/Utilities/shared_utils.sh b/Utilities/shared_utils.sh
new file mode 100644
index 0000000..9a8e539
--- /dev/null
+++ b/Utilities/shared_utils.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+load_components_config() {
+    local components_conf_file="$1"
+    if [ ! -f "$components_conf_file" ]; then
+        echo "Error: Components configuration file not found at $components_conf_file" >&2
+        exit 1
+    fi
+    grep -v '^#' "$components_conf_file" | grep -v '^$'
+}
+
+build_rsync_paths() {
+    local this_dir="$1"
+    local components=("${@:2}")
+    local dirs_to_rsync=()
+
+    for component in "${components[@]}"; do
+        dirs_to_rsync+=("$this_dir/../../../$component")
+    done
+
+    printf '%s\n' "${dirs_to_rsync[@]}"
+}
+
+build_rsync_exclude_options() {
+    local dirs=("$@")
+    local rsync_exclude_options=()
+
+    for dir in "${dirs[@]}"; do
+        local rsyncignore_file="$dir/.rsyncignore"
+        if [ -f "$rsyncignore_file" ]; then
+            rsync_exclude_options+=("--exclude-from=$rsyncignore_file")
+        fi
+    done
+
+    printf '%s\n' "${rsync_exclude_options[@]}"
+}
+
+parse_rsync_output() {
+    local rsync_output="$1"
+    shift
+    local num_dirs=$(($# / 2))
+    local dirs=("${@:1:$num_dirs}")
+    local components=("${@:$((num_dirs+1)):$num_dirs}")
+    local synced_components=()
+
+    for ((i=0; i<${#dirs[@]}; i++)); do
+        local dir="${dirs[$i]}"
+        local component="${components[$i]}"
+        local dir_basename=$(basename "$dir")
+
+        if echo "$rsync_output" | grep -v ".git/" | grep -q "$dir_basename/"; then
+            synced_components+=("$component")
+        fi
+    done
+
+    printf '%s\n' "${synced_components[@]}"
+}
+
+perform_rsync() {
+    local username="$1"
+    local hostname="$2"
+    local destination_dir="$3"
+    local options="$4"
+    shift 4
+    local dirs_to_rsync=("$@")
+
+    ssh $options $username@$hostname "mkdir -p $destination_dir"
+
+    local rsync_exclude_options
+    readarray -t rsync_exclude_options < <(build_rsync_exclude_options "${dirs_to_rsync[@]}")
+
+    rsync -e "ssh $options" --omit-dir-times --itemize-changes -rltDzvh "${rsync_exclude_options[@]}" "${dirs_to_rsync[@]}" $username@$hostname:$destination_dir 1>&1
+}
diff --git a/assets/img/asapquery_intro_figure.jpg b/assets/img/asapquery_intro_figure.jpg
new file mode 100644
index 0000000..1a4b7d9
Binary files /dev/null and b/assets/img/asapquery_intro_figure.jpg differ
diff --git a/assets/img/quickstart_grafana_screenshot.png b/assets/img/quickstart_grafana_screenshot.png
new file mode 100644
index 0000000..e9dd1f9
Binary files /dev/null and b/assets/img/quickstart_grafana_screenshot.png differ
diff --git a/docs/01-getting-started/architecture.md b/docs/01-getting-started/architecture.md
new file mode 100644
index 0000000..8b79f4d
--- /dev/null
+++ b/docs/01-getting-started/architecture.md
@@ -0,0 +1,305 @@
+# Architecture
+
+This document provides a comprehensive overview of ASAP's architecture, data flows, and design decisions.
+
+## Table of Contents
+- [High-Level Architecture](#high-level-architecture)
+- [Data Flows](#data-flows)
+- [Component Overview](#component-overview)
+- [Key Design Decisions](#key-design-decisions)
+- [Technology Stack](#technology-stack)
+- [Repository Structure](#repository-structure)
+
+## High-Level Architecture
+
+ASAP consists of six main components working together to accelerate Prometheus queries:
+
+```mermaid
+graph TB
+    subgraph "Data Sources"
+        E[Prometheus Exporters]
+    end
+
+    subgraph "Existing Infrastructure"
+        P[Prometheus]
+        G[Grafana]
+    end
+
+    subgraph "ASAP Components"
+        A[Arroyo Streaming]
+        K[Kafka]
+        Q[QueryEngine]
+        C[Controller]
+        AS[ArroyoSketch]
+    end
+
+    E -->|metrics| P
+    P -->|remote_write| A
+    A -->|sketches| K
+    K -->|consume| Q
+    G -->|PromQL| Q
+    Q -->|results| G
+    Q -.->|fallback| P
+
+    C -->|streaming_config.yaml| AS
+    AS -->|create pipelines| A
+
+    style A fill:#e1f5ff
+    style Q fill:#e1f5ff
+    style C fill:#fff4e1
+    style AS fill:#fff4e1
+```
+
+## Data Flows
+
+ASAP has three primary data flows: **Ingestion**, **Query Execution**, and **Configuration**.
+
+### Ingestion Path
+
+How metrics flow from exporters to sketches:
+
+```mermaid
+sequenceDiagram
+    participant E as Exporters
+    participant P as Prometheus
+    participant A as Arroyo
+    participant K as Kafka
+    participant Q as QueryEngine
+
+    E->>P: Expose metrics
+    P->>P: Scrape metrics
+    P->>A: Remote write (HTTP)
+    A->>A: Build sketches (SQL pipeline)
+    A->>K: Produce sketches
+    K->>Q: Consume sketches
+    Q->>Q: Store in SimpleMapStore
+```
+
+**Step-by-step:**
+
+1. **Exporters** expose metrics on HTTP endpoints (e.g., `:9100/metrics`)
+2. **Prometheus** scrapes metrics at a specified time interval (e.g. every 10s)
+3. **Prometheus** sends metrics to **Arroyo** via remote write API
+4. **Arroyo** receives raw metrics via custom connector (`prometheus_remote_write_optimized`)
+5. **Arroyo** executes SQL pipelines that build sketches in real-time (configured by **ArroyoSketch**)
+6. **Arroyo** produces sketches to **Kafka** output topic
+7. **QueryEngine** consumes sketches from **Kafka**
+8. **QueryEngine** stores sketches in **SimpleMapStore** (in-memory)
+
+**Data format transformations:**
+- **Exporter → Prometheus**: Prometheus exposition format (text)
+- **Prometheus → Arroyo**: Prometheus remote write protobuf
+- **Arroyo → Kafka**: Serialized sketches (custom format)
+- **Kafka → QueryEngine**: Deserialize to custom sketch objects
+
+### Query Path
+
+How queries are executed:
+
+```mermaid
+sequenceDiagram
+    participant G as Grafana
+    participant Q as QueryEngine
+    participant S as SimpleMapStore
+    participant P as Prometheus
+
+    G->>Q: PromQL query (HTTP)
+    Q->>Q: Parse query (PromQL adapter)
+    Q->>Q: Check if supported
+
+    alt Supported query
+        Q->>S: Fetch sketches
+        S->>Q: Return sketches
+        Q->>Q: Execute query (SimpleEngine)
+        Q->>G: Approximate result
+    else Unsupported query
+        Q->>P: Forward query (fallback)
+        P->>Q: Exact result
+        Q->>G: Exact result
+    end
+```
+
+**Step-by-step:**
+
+1. **Grafana** sends PromQL query to **QueryEngine** (port 8088)
+2. **PrometheusHttpAdapter** parses the HTTP request and extracts the query
+3. **SimpleEngine** checks if the query can be answered with sketches
+4. **If supported:**
+   - Fetch relevant sketches from **SimpleMapStore**
+   - Execute query using sketch operations
+   - Format result as Prometheus-compatible JSON
+5. **If unsupported:**
+   - Forward query to **Prometheus** via fallback client
+   - Return exact result from Prometheus
+6. **QueryEngine** returns result to **Grafana**
+
+**Query support examples:**
+- ✅ Supported: `quantile(0.99, http_request_duration)`, `sum(rate(...))`
+- ❌ Unsupported: `up == 1`, `label_replace(...)`, exact histograms
+
+### Configuration Path
+
+How sketches are configured:
+
+```mermaid
+graph LR
+    U[User] -->|edit| CC[controller-config.yaml]
+    CC --> C[Controller]
+    C -->|analyze queries| C
+    C -->|streaming_config.yaml| AS[ArroyoSketch]
+    C -->|inference_config.yaml| Q[QueryEngine]
+    AS -->|generate SQL| AS
+    AS -->|Arroyo API| A[Arroyo]
+    A -->|running pipelines| A
+
+    style CC fill:#fff
+    style C fill:#fff4e1
+    style AS fill:#fff4e1
+```
+
+**Step-by-step:**
+
+1. **User** creates `controller-config.yaml` with:
+   - List of queries to accelerate
+   - Metric metadata (labels, types)
+
+2. **Controller** analyzes the query workload:
+   - Determines which sketch algorithms to use (DDSketch, KLL, etc.)
+   - Computes sketch parameters (size, accuracy)
+   - Generates `streaming_config.yaml` for Arroyo
+   - Generates `inference_config.yaml` for QueryEngine
+
+3. **ArroyoSketch** reads `streaming_config.yaml`:
+   - Renders SQL templates using Jinja2
+   - Creates Arroyo pipelines via REST API
+   - Configures sketch UDFs with parameters
+
+4. **QueryEngine** reads `inference_config.yaml`:
+   - Knows which sketches to expect from Kafka
+   - Configures deserialization logic
+   - Sets up query routing
+
+## Component Overview
+
+| Component | Purpose | Technology | Location |
+|-----------|---------|------------|----------|
+| **QueryEngineRust** | Answers PromQL queries using sketches | Rust | `QueryEngineRust/` |
+| **Arroyo** | Stream processing for building sketches | Rust (forked) | `arroyo/` |
+| **ArroyoSketch** | Configures Arroyo pipelines from config | Python | `ArroyoSketch/` |
+| **Controller** | Auto-determines sketch parameters | Python | `Controller/` |
+| **Kafka** | Message broker for sketch distribution | Apache Kafka | (external) |
+| **Prometheus** | Time-series database (existing) | Go | (external) |
+| **Exporters** | Generate synthetic metrics for testing | Rust/Python | `PrometheusExporters/` |
+| **Utilities** | Experimental harness that uses Cloudlab | Python | `Utilities/` |
+
+**Links to detailed documentation:**
+- [QueryEngineRust](../02-components/query-engine.md)
+- [Arroyo](../02-components/arroyo.md)
+- [ArroyoSketch](../02-components/arroyosketch.md)
+- [Controller](../02-components/controller.md)
+- [Exporters](../02-components/exporters.md)
+- [Utilities](../02-components/utilities.md)
+
+## Key Design Decisions
+
+### Fallback Mechanism
+
+**Design decision**: Always support fallback to Prometheus
+
+**Rationale**:
+- Not all queries can be accelerated (e.g., label manipulation)
+- Users shouldn't have to know which queries are supported
+- Gradual adoption - users can try ASAP without changing queries
+
+**Implementation**:
+- QueryEngine detects unsupported queries during parsing
+- Forwards to Prometheus via HTTP client
+- Returns results transparently
+
+**Trade-off**: Added complexity vs. compatibility
+- **Benefit outweighs cost**: Users can point Grafana at ASAP without modifying dashboards
+
+## Technology Stack
+
+### Core Languages
+- **Rust** - QueryEngine, Arroyo, some exporters
+  - Tokio for async runtime
+  - Axum for HTTP server
+  - Serde for serialization
+  - DataSketches (dsrs) for sketch algorithms
+
+- **Python** - Controller, ArroyoSketch, experiment framework
+  - PyYAML for config parsing
+  - Jinja2 for SQL templates
+  - Requests for HTTP clients
+  - Hydra for experiment config composition
+
+### Infrastructure
+- **Apache Kafka** - Message broker (KRaft mode, no Zookeeper)
+- **Prometheus** - Time-series database
+- **Grafana** - Visualization (unchanged from user's existing setup)
+
+### Development Tools
+- **Cargo** - Rust build system
+- **Docker** - Containerization
+- **GitHub Actions** - CI/CD
+- **Pre-commit** - Git hooks for linting
+
+## Repository Structure
+
+```
+asap-internal/
+├── QueryEngineRust/          # Rust query processor
+│   ├── src/
+│   │   ├── drivers/          # Ingest, query adapters, servers
+│   │   ├── engines/          # Query execution (SimpleEngine)
+│   │   ├── stores/           # Data storage (SimpleMapStore)
+│   │   ├── data_model/       # Core data structures
+│   │   ├── precompute_operators/  # Sketch operators
+│   │   └── tests/            # Integration tests
+│   └── docs/                 # QueryEngine dev docs
+│
+├── arroyo/                   # Arroyo streaming engine (forked)
+│   └── crates/
+│       └── arroyo-connectors/
+│           ├── prometheus_remote_write_with_schema/
+│           └── prometheus_remote_write_optimized/
+│
+├── ArroyoSketch/             # Pipeline configurator
+│   ├── run_arroyosketch.py   # Main script
+│   ├── templates/            # Jinja2 SQL templates
+│   └── utils/                # Arroyo API client
+│
+├── Controller/               # Auto-configuration service
+│   ├── main_controller.py    # Entry point
+│   ├── classes/              # Config data structures
+│   └── utils/                # Decision logic
+│
+├── PrometheusExporters/      # Metric generators
+│   ├── fake_exporter/        # Rust/Python fake exporters
+│   ├── cluster_data_exporter/  # Real trace data
+│   ├── query_cost_exporter/  # Resource metrics
+│   └── query_latency_exporter/  # Latency metrics
+│
+├── Utilities/                # Experiment framework
+│   ├── experiments/
+│   │   ├── experiment_run_e2e.py  # Main orchestrator
+│   │   ├── config/           # Hydra configs
+│   │   ├── experiment_utils/ # Services, providers
+│   │   └── post_experiment/  # Analysis scripts
+│   └── docs/                 # Utilities dev docs
+│
+├── CommonDependencies/       # Shared libraries
+│   ├── promql_utilities/     # PromQL parsing (Rust/Python)
+│   └── sql_utilities/        # SQL utilities
+│
+├── quickstart/               # Self-contained demo
+│   ├── docker-compose.yml    # Demo stack
+│   └── config/               # Demo configs
+│
+└── docs/                     # Developer documentation (this)
+    ├── 01-getting-started/
+    ├── 02-components/
+    ├── 03-how-to-guides/
+    └── 04-development/
+```
diff --git a/docs/01-getting-started/local-setup.md b/docs/01-getting-started/local-setup.md
new file mode 100644
index 0000000..0976337
--- /dev/null
+++ b/docs/01-getting-started/local-setup.md
@@ -0,0 +1,74 @@
+# Local Setup
+
+This guide covers development environment setup for ASAP.
+
+## Pre-commit Hooks
+
+Pre-commit hooks ensure code quality before committing.
+
+### Installation
+
+```bash
+pip3 install pre-commit
+
+# Install hooks (run from repo root)
+cd /path/to/asap-internal
+pre-commit install
+```
+
+### What the Hooks Do
+
+The pre-commit configuration (`.pre-commit-config.yaml`) runs:
+
+**Rust:**
+- `rustfmt` - Format Rust code
+- `cargo check` - Check Rust code compiles
+- `cargo clippy` - Lint Rust code (all warnings as errors)
+
+**Python:**
+- `black` - Format Python code
+- `isort` - Sort Python imports
+- `flake8` - Lint Python code
+- `mypy` - Type check Python code (in components that have it configured)
+
+**General:**
+- Remove trailing whitespace
+- Ensure files end with newline
+- Validate YAML files
+- Check for large files
+
+### Running Hooks
+
+**Automatic** (on git commit):
+```bash
+git add myfile.rs
+git commit -m "Fix bug"
+# Pre-commit hooks run automatically
+```
+
+**Manual** (run on all files):
+```bash
+pre-commit run --all-files
+```
+
+**Manual** (run on staged files):
+```bash
+pre-commit run
+```
+
+### Common Hook Failures
+
+**Issue**: `black` or `rustfmt` fails
+
+**Solution**: The hook auto-fixes formatting. Just re-stage and commit:
+```bash
+git add <file>
+git commit
+```
+**Issue**: `clippy` warnings
+
+**Solution**: Fix warnings
+
+**Issue**: `cargo fmt` checks failed
+
+**Solution**: Run `cargo fmt --`
diff --git a/docs/01-getting-started/overview.md b/docs/01-getting-started/overview.md
new file mode 100644
index 0000000..94fd258
--- /dev/null
+++ b/docs/01-getting-started/overview.md
@@ -0,0 +1,110 @@
+# Overview & Key Concepts
+
+Welcome to ASAP! This guide will help you understand what ASAP is, why it exists, and the key concepts you need to know.
+
+## What is ASAP?
+
+**ASAP** is a **drop-in query accelerator** for Prometheus that delivers:
+
+- ⚡ **Sub-second latency** for complex quantile and aggregate queries
+- 💾 **100x memory reduction** compared to raw time-series data
+- 🎯 **Configurable accuracy**
+- 🔌 **Full Prometheus compatibility** - works with existing Grafana dashboards and PromQL queries
+
+ASAP sits between Prometheus and Grafana, intercepting queries and answering them using pre-computed streaming sketches instead of scanning raw data.
+
+## Why ASAP?
+
+### The Problem
+
+Prometheus struggles with:
+- **High-cardinality metrics** - Queries slow down as cardinality increases
+- **Quantile queries** - Computing percentiles requires scanning massive amounts of data
+- **Long time windows** - `quantile_over_time(...[1h])` can take seconds or fail
+- **Memory pressure** - Storing raw samples for all time series
+
+### The Solution
+
+ASAP uses **streaming sketches** to:
+1. **Pre-compute approximate summaries** as data arrives
+2. **Answer queries in milliseconds** using compact sketches instead of raw data
+3. **Bound memory usage** - sketches are fixed-size regardless of data volume
+4. **Maintain accuracy** - configurable error bounds (typically <1% error)
+
+## Key Concepts
+
+### Sketches
+
+**Sketches** are probabilistic data structures that provide approximate answers with bounded error. Think of them as compact summaries that capture the essential characteristics of data distributions.
+
+Examples:
+- **DDSketch** and **KLL Sketch** - For quantiles (P50, P95, P99)
+- **Count-Min Sketch** and **CountSketch** - For frequency/sum estimation
+
+Key properties:
+- **Fixed size** - Memory usage doesn't grow with data volume or grows very slowly
+- **Mergeable** - Can merge multiple sketches into a single sketch
+- **Bounded error** - Guarantees on approximation quality (e.g., ±1% relative error)
+
+### Streaming Pipelines
+
+ASAP builds sketches in **real-time** using streaming pipelines:
+
+1. **Prometheus** scrapes metrics from exporters
+2. **Remote Write** sends metrics to **Arroyo** (streaming engine)
+3. **Arroyo** builds sketches using SQL queries
+4. **QueryEngine** consumes sketches and answers queries
+
+### Query Protocol
+
+ASAP implements the **Prometheus HTTP API**, making it a drop-in replacement:
+
+```
+# Point Grafana to ASAP instead of Prometheus
+Prometheus URL: http://asap:8088
+
+# Use the same PromQL queries
+quantile by (job) (0.99, http_request_duration_seconds)
+```
+
+Your existing dashboards work without modification!
+
+### Fallback
+
+Not all queries can be accelerated with sketches. ASAP automatically:
+
+1. **Detects unsupported queries**
+2. **Forwards them to Prometheus** for exact results
+3. **Returns results transparently** to the user
+
+This ensures compatibility while accelerating what's possible.
+
+## High-Level Architecture
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                     Your Existing Stack                     │
+├─────────────────────────────────────────────────────────────┤
+│  Applications → Exporters → Prometheus → Grafana            │
+└─────────────────────────────────────────────────────────────┘
+                                ↓
+                    ┌───────────────────────┐
+                    │  Prometheus           │
+                    │  Remote Write         │
+                    └───────────────────────┘
+                                ↓
+┌─────────────────────────────────────────────────────────────┐
+│                     ASAP Components                         │
+├─────────────────────────────────────────────────────────────┤
+│                                                             │
+│  ┌──────────┐      ┌──────────┐      ┌──────────┐           │
+│  │ Arroyo   │  →   │  Kafka   │  →   │  Query   │           │
+│  │          │      │          │      │  Engine  │           │
+│  └──────────┘      └──────────┘      └──────────┘           │
+└─────────────────────────────────────────────────────────────┘
+                                ↓
+                    ┌───────────────────────┐
+                    │  Grafana              │
+                    │  (query ASAP)         │
+                    └───────────────────────┘
+```
diff --git a/docs/02-components/README.md b/docs/02-components/README.md
new file mode 100644
index 0000000..9c9b205
--- /dev/null
+++ b/docs/02-components/README.md
@@ -0,0 +1,168 @@
+# Component Index
+
+This document provides an overview of all ASAP components and links to detailed documentation.
+
+## Components at a Glance
+
+| Component | Purpose | Technology | Links |
+|-----------|---------|------------|-------|
+| **QueryEngineRust** | Answers PromQL queries using sketches | Rust | [Details](query-engine.md) · [Code](../../QueryEngineRust/) · [Dev Docs](../../QueryEngineRust/docs/README.md) |
+| **Arroyo** | Stream processing for building sketches | Rust (forked) | [Details](arroyo.md) · [Code](../../arroyo/) |
+| **ArroyoSketch** | Configures Arroyo pipelines from config | Python | [Details](arroyosketch.md) · [Code](../../ArroyoSketch/) · [README](../../ArroyoSketch/README.md) |
+| **Controller** | Auto-determines sketch parameters | Python | [Details](controller.md) · [Code](../../Controller/) · [README](../../Controller/README.md) |
+| **Exporters** | Generate synthetic metrics for testing | Rust/Python | [Details](exporters.md) · [Code](../../PrometheusExporters/) · [README](../../PrometheusExporters/README.md) |
+| **Utilities** | Experiment framework for CloudLab | Python | [Details](utilities.md) · [Code](../../Utilities/) · [Docs](../../Utilities/docs/architecture.md) |
+
+## Component Interaction
+
+```mermaid
+graph TB
+    subgraph "Configuration (Offline)"
+        U[User] -->|edits| CC[controller-config.yaml]
+        CC --> C[Controller]
+        C -->|streaming_config.yaml| AS[ArroyoSketch]
+        C -->|inference_config.yaml| Q
+        AS -->|create pipelines| A
+    end
+
+    subgraph "Data Ingestion (Real-time)"
+        E[Exporters] -->|metrics| P[Prometheus]
+        P -->|remote_write| A[Arroyo]
+        A -->|build sketches| A
+        A -->|produce| K[Kafka]
+    end
+
+    subgraph "Query Execution (Real-time)"
+        K -->|consume| Q[QueryEngine]
+        G[Grafana] -->|PromQL| Q
+        Q -->|results| G
+        Q -.->|fallback| P
+    end
+
+    subgraph "Experiments (Research)"
+        EXP[Utilities] -->|deploy & run| E
+        EXP -->|deploy & run| P
+        EXP -->|deploy & run| A
+        EXP -->|collect results| EXP
+    end
+
+    style C fill:#fff4e1
+    style AS fill:#fff4e1
+    style A fill:#e1f5ff
+    style Q fill:#e1f5ff
+    style EXP fill:#f0f0f0
+```
+
+## By Role
+
+### Core Runtime Components
+
+These run continuously to serve queries:
+
+- **[QueryEngineRust](query-engine.md)** - Answers PromQL queries using sketches
+  - Consumes sketches from Kafka
+  - Implements Prometheus HTTP API
+  - Forwards unsupported queries to Prometheus
+
+- **[Arroyo](arroyo.md)** - Builds sketches from metrics streams
+  - Receives Prometheus remote write
+  - Executes SQL pipelines
+  - Produces sketches to Kafka
+
+### Configuration Components
+
+These run once to set up the system:
+
+- **[Controller](controller.md)** - Determines optimal sketch parameters
+  - Analyzes query workload
+  - Selects sketch algorithms
+  - Generates configs for Arroyo and QueryEngine
+
+- **[ArroyoSketch](arroyosketch.md)** - Creates Arroyo pipelines
+  - Reads streaming_config.yaml
+  - Renders SQL templates
+  - Creates pipelines via Arroyo API
+
+### Testing & Research Components
+
+These are used for development and experiments:
+
+- **[Exporters](exporters.md)** - Generate synthetic metrics
+  - Fake exporters with configurable cardinality
+  - Real trace data exporters
+  - Performance monitoring exporters
+
+- **[Utilities](utilities.md)** - Experiment orchestration
+  - Deploy ASAP to CloudLab
+  - Run controlled experiments
+  - Collect and analyze results
+
+## By Language
+
+### Rust Components
+
+Performance-critical components written in Rust:
+
+- **QueryEngineRust** - Sub-millisecond query execution
+- **Arroyo** - High-throughput stream processing
+- **Fake Exporters** - Fast metric generation
+
+### Python Components
+
+Configuration and orchestration in Python:
+
+- **Controller** - Query analysis and config generation
+- **ArroyoSketch** - Pipeline configuration
+- **Utilities** - Experiment framework
+- **Python Exporters** - Simpler metric generators
+
+## Component Dependencies
+
+```
+QueryEngineRust
+├── Kafka (runtime) - Consumes sketches
+├── Prometheus (runtime, optional) - Fallback queries
+└── inference_config.yaml (config) - From Controller
+
+Arroyo
+├── Prometheus (runtime) - Remote write source
+├── Kafka (runtime) - Sketch output
+└── SQL pipelines (config) - From ArroyoSketch
+
+ArroyoSketch
+├── Arroyo (runtime) - Creates pipelines via API
+└── streaming_config.yaml (config) - From Controller
+
+Controller
+├── controller-config.yaml (input) - User-provided
+├── streaming_config.yaml (output) - For ArroyoSketch
+└── inference_config.yaml (output) - For QueryEngine
+
+Exporters
+└── (standalone, no dependencies)
+
+Utilities
+├── All components (deploys and orchestrates)
+└── Hydra configs (experiment specifications)
+```
+
+## Component Documentation
+
+### Detailed Component Docs
+
+- [QueryEngineRust](query-engine.md) - Query processor deep dive
+- [Arroyo](arroyo.md) - Streaming engine + ASAP customizations
+- [ArroyoSketch](arroyosketch.md) - Pipeline configurator
+- [Controller](controller.md) - Auto-configuration service
+- [Exporters](exporters.md) - Metric generators
+- [Utilities](utilities.md) - Experiment framework
+
+### Component-Specific READMEs
+
+For implementation details, see READMEs co-located with code:
+
+- [QueryEngineRust/docs/](../../QueryEngineRust/docs/README.md) - Extensibility guides
+- [Controller/README.md](../../Controller/README.md) - Controller internals
+- [ArroyoSketch/README.md](../../ArroyoSketch/README.md) - Pipeline config internals
+- [PrometheusExporters/README.md](../../PrometheusExporters/README.md) - Exporter implementations
+- [Utilities/docs/](../../Utilities/docs/architecture.md) - Experiment framework architecture
diff --git a/docs/03-how-to-guides/development/add-new-sketch.md b/docs/03-how-to-guides/development/add-new-sketch.md
new file mode 100644
index 0000000..11c1258
--- /dev/null
+++ b/docs/03-how-to-guides/development/add-new-sketch.md
@@ -0,0 +1,115 @@
+# How to Add a New Sketch Algorithm
+
+Adding a new sketch requires changes to 3 components: CommonDependencies (sketch selection logic), ArroyoSketch (UDF for building sketches), and QueryEngineRust (deserialization and query logic).
+
+## Step 1: CommonDependencies - Define Sketch Mapping
+
+**File**: `CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/logics.py`
+
+**What to modify**:
+- `map_statistic_to_precompute_operator()` - Add mapping from statistic to your sketch name
+- `does_precompute_operator_support_subpopulations()` - Add whether your sketch supports subpopulations
+
+**Optional**: Add new statistic type to `enums.py::Statistic` if needed.
+
+---
+
+## Step 2: ArroyoSketch - Create Sketch UDF
+
+**File to create**: `ArroyoSketch/templates/udfs/yoursketchname_[subop].rs.j2` (or `.rs` if no template vars)
+
+**What to implement**:
+- Rust UDF function using `#[udf]` macro
+- Input: `Vec<f64>` (values to aggregate)
+- Output: `Option<Vec<u8>>` (serialized sketch using MessagePack)
+- Serialization format: Wrap sketch in struct with parameters, serialize with `rmp_serde`
+
+**Naming convention**: Lowercase sketch name with optional sub-operator suffix (e.g., `datasketcheskll_.rs.j2`, `countminsketch_sum.rs.j2`)
+
+**Validate**: Run `python validate_udfs.py` to check UDF compiles.
+
+**Reference examples**:
+- `ArroyoSketch/templates/udfs/datasketcheskll_.rs.j2`
+- `ArroyoSketch/templates/udfs/countminsketch_sum.rs.j2`
+
+---
+
+## Step 3: QueryEngineRust - Implement Accumulator
+
+### 3.1 Create Accumulator File
+
+**File to create**: `QueryEngineRust/src/precompute_operators/your_sketch_accumulator.rs`
+
+**What to implement**:
+- `YourSketchAccumulator` struct with sketch state
+- `deserialize_from_bytes_arroyo()` - Deserialize from MessagePack (must match UDF format)
+- Query methods (e.g., `get_quantile()`, `get_sum()`)
+- `merge_multiple()` - Merge multiple accumulators efficiently
+- Implement traits:
+  - `AggregateCore` (required) - `as_any()`, `get_accumulator_type()`, `clone_box()`, `merge_into()`
+  - `MergeableAccumulator` (marker trait)
+  - `SingleSubpopulationAggregate` (required) - `get_statistics()`, `get_statistic_values()`, `merge_with()`
+  - `SerializableToSink` (if needed) - `serialize_to_sink()`
+
+**Key requirement**: `get_accumulator_type()` must return the sketch name from CommonDependencies (PascalCase).
+
+### 3.2 Register Accumulator
+
+**File to modify**: `QueryEngineRust/src/precompute_operators/mod.rs`
+
+**What to add**:
+```rust
+pub mod your_sketch_accumulator;
+pub use your_sketch_accumulator::*;
+```
+
+### 3.3 Add Deserialization Dispatcher
+
+**Files to search**: Look for "DatasketchesKLL" pattern in `QueryEngineRust/src/stores/` or `QueryEngineRust/src/drivers/ingest/`
+
+**What to add**: Match case for your sketch name calling `YourSketchAccumulator::deserialize_from_bytes_arroyo(buffer)`.
+
+**Reference examples**:
+- `QueryEngineRust/src/precompute_operators/datasketches_kll_accumulator.rs`
+- `QueryEngineRust/src/precompute_operators/count_min_sketch_accumulator.rs`
+
+---
+
+## Step 4: Controller - Sketch Parameters (Optional)
+
+**File to modify**: `Controller/classes/StreamingAggregationConfig.py` or `Controller/utils/logics.py`
+
+**What to add**:
+- Custom sketch parameters (size, epsilon, etc.) in `get_sketch_parameters()` or similar
+- SLA-based parameter computation in `compute_sketch_parameters()` if needed
+
+**Usually**: Controller picks up sketch automatically from CommonDependencies mapping.
+
+---
+
+## Testing Checklist
+
+- [ ] `validate_udfs.py` passes (ArroyoSketch)
+- [ ] `cargo build --release` succeeds (QueryEngineRust)
+- [ ] `cargo test` passes (QueryEngineRust)
+- [ ] End-to-end: Controller → ArroyoSketch → Arroyo → Kafka → QueryEngine → Query result
+
+---
+
+## Naming Conventions
+
+| Component | Format | Example |
+|-----------|--------|---------|
+| CommonDependencies mapping | PascalCase | `DatasketchesKLL` |
+| ArroyoSketch UDF filename | lowercase_subop | `datasketcheskll_.rs.j2` |
+| QueryEngine accumulator | PascalCase + Accumulator | `DatasketchesKLLAccumulator` |
+| `get_accumulator_type()` return | Must match mapping | `"DatasketchesKLL"` |
+
+---
+
+## Common Issues
+
+- **UDF won't compile**: Check Rust syntax, dependencies in `[dependencies]` comment block
+- **Deserialization fails**: MessagePack format must match exactly between UDF and accumulator
+- **Query returns no results**: Check `get_statistic_values()` handles correct `Statistic` enum
+- **Sketch not found**: Verify name matches across all components (case-sensitive)
diff --git a/docs/03-how-to-guides/operations/manual-stack-run-clickhouse.md b/docs/03-how-to-guides/operations/manual-stack-run-clickhouse.md
new file mode 100644
index 0000000..2568969
--- /dev/null
+++ b/docs/03-how-to-guides/operations/manual-stack-run-clickhouse.md
@@ -0,0 +1,324 @@
+# Running the ASAP Stack Manually (Clickhouse)
+
+This guide covers running the ASAP stack manually with Clickhouse for development and debugging. For Prometheus, see [manual-stack-run-prometheus.md](manual-stack-run-prometheus.md). For automated experiments, use the experiment framework in `Utilities/experiments/`.
+
+## Prerequisites
+
+- Kafka installed
+- Arroyo built at `~/code/arroyo/target/release/arroyo`
+- QueryEngineRust built at `~/code/QueryEngineRust/target/release/query_engine_rust`
+- PrometheusExporters built (fake_kafka_exporter)
+- Clickhouse installed and accessible
+
+## Directory Structure
+
+```
+~/code/
+├── ArroyoSketch/           # Pipeline configuration scripts
+│   ├── config.yaml         # Arroyo cluster config
+│   └── run_arroyosketch.py # Creates sources, sinks, and pipelines
+├── QueryEngineRust/        # Query interception layer
+├── PrometheusExporters/    # Data generators
+│   └── fake_kafka_exporter/
+└── Utilities/experiments/  # Automated experiment framework
+```
+
+## Config Files
+
+Two config files are needed for ASAP mode. These must match the schema produced by fake_kafka_exporter.
+
+**Example fake_kafka_exporter invocation:**
+```bash
+./target/release/fake_kafka_exporter \
+    --kafka-topic raw_data_topic \
+    --metadata-columns hostname,datacenter \
+    --num-values-per-metadata-column 10,5 \
+    --value-columns cpu_usage,memory_usage
+```
+
+This produces records like:
+```json
+{"time": 1234567890000, "hostname": "hostname_0", "datacenter": "datacenter_0", "cpu_usage": 45.2, "memory_usage": 72.1}
+```
+
+**inference_config.yaml** - Defines metrics and queries for the QueryEngine:
+```yaml
+tables:
+  - name: metrics_table
+    time_column: time
+    metadata_columns: [hostname, datacenter]
+    value_columns: [cpu_usage, memory_usage]
+cleanup_policy:
+  name: read_based
+queries:
+- aggregations:
+  - aggregation_id: 1
+    read_count_threshold: 1
+  query: |
+    SELECT datacenter, quantile(0.99)(cpu_usage) as p99
+    FROM metrics_table
+    WHERE time BETWEEN DATEADD(s, -11, NOW()) AND DATEADD(s, -10, NOW())
+    GROUP BY datacenter
+```
+
+**streaming_config.yaml** - Defines streaming aggregations for Arroyo:
+```yaml
+aggregations:
+- aggregationId: 1
+  aggregationType: DatasketchesKLL
+  aggregationSubType: ''
+  labels:
+    grouping: [datacenter]
+    rollup: [hostname]
+    aggregated: []
+  table_name: metrics_table
+  value_column: cpu_usage
+  parameters:
+    K: 20
+  tumblingWindowSize: 1
+  windowSize: 1
+  windowType: tumbling
+  spatialFilter: ''
+tables:
+  - name: metrics_table
+    time_column: time
+    metadata_columns: [hostname, datacenter]
+    value_columns: [cpu_usage, memory_usage]
+```
+
+---
+
+## Baseline Mode (Clickhouse Only)
+
+Baseline mode runs queries directly against Clickhouse without ASAP's streaming layer.
+
+### 1. Start Kafka
+
+```bash
+cd ~/kafka
+./bin/kafka-server-start.sh ./config/kraft/server.properties
+```
+
+Wait for Kafka to be ready, then create topics:
+```bash
+./bin/kafka-topics.sh --bootstrap-server localhost:9092 --create \
+    --topic raw_data_topic --partitions 1 --replication-factor 1
+```
+
+### 2. Start Data Exporter
+
+The fake_kafka_exporter generates synthetic data and writes directly to Kafka:
+
+```bash
+cd ~/code/PrometheusExporters/fake_kafka_exporter
+./target/release/fake_kafka_exporter \
+    --kafka-topic raw_data_topic \
+    --metadata-columns hostname,datacenter \
+    --num-values-per-metadata-column 10,5 \
+    --value-columns cpu_usage,memory_usage \
+    --frequency 1
+```
+
+### 3. Start Clickhouse
+
+Install timezone data (required for Clickhouse):
+```bash
+apt-get install -y tzdata && ln -sf /usr/share/zoneinfo/UTC /etc/localtime
+```
+
+Start the Clickhouse server:
+```bash
+clickhouse-server
+```
+
+### 4. Configure Clickhouse to Ingest from Kafka
+
+Create a Kafka engine table to consume from the raw data topic. Run these commands in `clickhouse-client`:
+
+```sql
+CREATE TABLE kafka_table (
+    time DateTime64(3),
+    hostname String,
+    datacenter String,
+    cpu_usage Float64,
+    memory_usage Float64
+) ENGINE = Kafka
+SETTINGS
+    kafka_broker_list = 'localhost:9092',
+    kafka_topic_list = 'raw_data_topic',
+    kafka_group_name = 'clickhouse_consumer',
+    kafka_format = 'JSONEachRow';
+
+CREATE TABLE metrics_table (
+    time DateTime64(3),
+    hostname String,
+    datacenter String,
+    cpu_usage Float64,
+    memory_usage Float64
+) ENGINE = MergeTree()
+ORDER BY (datacenter, hostname, time);
+
+CREATE MATERIALIZED VIEW cpu_usage_mv TO metrics_table AS
+SELECT * FROM kafka_table;
+```
+
+### 5. Query Clickhouse
+
+Query via HTTP protocol (we can use clickhouse-client, but ASAP only supports HTTP protocol for now so use HTTP).
+The query parameter in the request is a URL-encoded form of a SQL query. See https://www.urlencoder.org/.
+
+SQL query:
+```sql
+SELECT datacenter, quantile(0.99)(cpu_usage) as p99
+FROM metrics_table
+WHERE time BETWEEN DATEADD(s, -11, NOW()) AND DATEADD(s, -10, NOW())
+GROUP BY datacenter
+```
+
+URL-encoded request:
+```bash
+curl 'http://localhost:8123/?query=SELECT%20datacenter%2C%20quantile%280.99%29%28cpu_usage%29%20as%20p99%0AFROM%20metrics_table%0AWHERE%20time%20BETWEEN%20DATEADD%28s%2C%20-11%2C%20NOW%28%29%29%20AND%20DATEADD%28s%2C%20-10%2C%20NOW%28%29%29%0AGROUP%20BY%20datacenter'
+```
+
+---
+
+## ASAP Mode
+
+ASAP mode adds Arroyo (streaming aggregation) and QueryEngineRust (query interception) to accelerate queries using sketches. Data flows through Kafka to both Arroyo (for sketches) and Clickhouse (for fallback queries).
+
+### 1. Start Kafka
+
+```bash
+cd ~/kafka
+./bin/kafka-server-start.sh ./config/kraft/server.properties
+```
+
+Wait for Kafka to be ready, then create topics:
+```bash
+./bin/kafka-topics.sh --bootstrap-server localhost:9092 --create \
+    --topic raw_data_topic --partitions 1 --replication-factor 1
+
+./bin/kafka-topics.sh --bootstrap-server localhost:9092 --create \
+    --topic sketch_topic --partitions 1 --replication-factor 1 \
+    --config max.message.bytes=20971520
+```
+
+### 2. Start Data Exporter
+
+Same as baseline - exporter writes to Kafka:
+
+```bash
+cd ~/code/PrometheusExporters/fake_kafka_exporter
+./target/release/fake_kafka_exporter \
+    --kafka-topic raw_data_topic \
+    --metadata-columns hostname,datacenter \
+    --num-values-per-metadata-column 10,5 \
+    --value-columns cpu_usage,memory_usage \
+    --frequency 1
+```
+
+### 3. Start Clickhouse
+
+Same as baseline (including timezone setup). Start the server and configure Kafka ingestion:
+```bash
+clickhouse-server
+```
+
+Then create the tables as shown in the Baseline section (step 4).
+
+### 4. Start Arroyo Cluster
+
+```bash
+cd ~/code/arroyo
+./target/release/arroyo --config ~/code/ArroyoSketch/config.yaml cluster
+```
+
+Arroyo API runs at `http://localhost:5115`. Verify with:
+```bash
+curl http://localhost:5115/api/v1/pipelines
+```
+
+### 5. Configure ArroyoSketch Pipeline
+
+Run `run_arroyosketch.py` to create Arroyo sources, sinks, and pipeline. For Clickhouse, always use Kafka source:
+
+```bash
+cd ~/code/ArroyoSketch
+python run_arroyosketch.py \
+    --source_type kafka \
+    --kafka_input_format json \
+    --input_kafka_topic raw_data_topic \
+    --output_format json \
+    --pipeline_name my_pipeline \
+    --config_file_path /path/to/streaming_config.yaml \
+    --output_kafka_topic sketch_topic \
+    --output_dir ./outputs \
+    --parallelism 1 \
+    --query_language sql
+```
+
+The script outputs the pipeline ID. Verify the pipeline is running:
+```bash
+curl http://localhost:5115/api/v1/pipelines
+```
+
+### 6. Start QueryEngineRust
+
+Check main.rs. It may be hardcoded to initialize the Prometheus-HTTP adapter. Change it the Clickhouse-HTTP adapter in the source code and recompile. We will make this configurable by a command line option in the future.
+
+Replace
+```rust
+let adapter_config = AdapterConfig::prometheus_promql(
+    args.prometheus_server.clone(),
+    args.forward_unsupported_queries,
+);
+```
+
+with
+
+```rust
+let adapter_config = AdapterConfig::clickhouse_sql(
+    "http://localhost:8123".to_string(), // ClickHouse server URL
+    "default".to_string(),               // Database name
+    true,                                // Always forward (fallback for every query)
+);
+```
+
+Recompile with `cargo build --release`.
+
+```bash
+cd ~/code/QueryEngineRust
+./target/release/query_engine_rust \
+    --kafka-topic sketch_topic \
+    --input-format json \
+    --config /path/to/inference_config.yaml \
+    --streaming-config /path/to/streaming_config.yaml \
+    --http-port 8088 \
+    --delete-existing-db \
+    --log-level info \
+    --output-dir ./output \
+    --streaming-engine arroyo \
+    --query-language SQL \
+    --lock-strategy per-key
+    --prometheus-scrape-interval 1 \ # this should not be required for Clickhouse, but currently is required
+```
+
+QueryEngine now listens on port 8088 and intercepts SQL queries.
+
+### 7. Query via QueryEngine
+
+Direct queries to QueryEngineRust instead of Clickhouse, using the Clickhouse HTTP protocol.
+The query parameter in the request is a URL-encoded form of a SQL query. See https://www.urlencoder.org/.
+
+SQL query:
+```sql
+SELECT datacenter, quantile(0.99)(cpu_usage) as p99
+FROM metrics_table
+WHERE time BETWEEN DATEADD(s, -11, NOW()) AND DATEADD(s, -10, NOW())
+GROUP BY datacenter
+```
+
+URL-encoded request:
+```bash
+curl 'http://localhost:8088/clickhouse/query?query=SELECT%20datacenter%2C%20quantile%280.99%29%28cpu_usage%29%20as%20p99%0AFROM%20metrics_table%0AWHERE%20time%20BETWEEN%20DATEADD%28s%2C%20-11%2C%20NOW%28%29%29%20AND%20DATEADD%28s%2C%20-10%2C%20NOW%28%29%29%0AGROUP%20BY%20datacenter'
+```
diff --git a/docs/03-how-to-guides/operations/manual-stack-run-prometheus.md b/docs/03-how-to-guides/operations/manual-stack-run-prometheus.md
new file mode 100644
index 0000000..fce3357
--- /dev/null
+++ b/docs/03-how-to-guides/operations/manual-stack-run-prometheus.md
@@ -0,0 +1,256 @@
+# Running the ASAP Stack Manually (Prometheus)
+
+This guide covers running the ASAP stack manually with Prometheus for development and debugging. For Clickhouse, see [manual-stack-run-clickhouse.md](manual-stack-run-clickhouse.md). For automated experiments, use the experiment framework in `Utilities/experiments/`.
+
+## Prerequisites
+
+- Kafka installed
+- Arroyo built at `~/code/arroyo/target/release/arroyo`
+- QueryEngineRust built at `~/code/QueryEngineRust/target/release/query_engine_rust`
+- PrometheusExporters built (fake_exporter_rust)
+- Prometheus installed and accessible
+
+## Directory Structure
+
+```
+~/code/
+├── ArroyoSketch/           # Pipeline configuration scripts
+│   ├── config.yaml         # Arroyo cluster config
+│   └── run_arroyosketch.py # Creates sources, sinks, and pipelines
+├── QueryEngineRust/        # Query interception layer
+├── PrometheusExporters/    # Data generators
+│   └── fake_exporter/
+└── Utilities/experiments/  # Automated experiment framework
+```
+
+## Config Files
+
+Two config files are needed for ASAP mode (generated by Controller or manually created):
+
+**inference_config.yaml** - Defines metrics and queries for the QueryEngine:
+```yaml
+metrics:
+  fake_metric:
+  - instance
+  - job
+  - label_0
+  - label_1
+cleanup_policy:
+  name: read_based
+queries:
+- aggregations:
+  - aggregation_id: 1
+    read_count_threshold: 1
+  query: quantile by (label_0) (0.99, fake_metric)
+```
+
+**streaming_config.yaml** - Defines streaming aggregations for Arroyo:
+```yaml
+aggregations:
+- aggregationId: 1
+  aggregationType: DatasketchesKLL
+  aggregationSubType: ''
+  labels:
+    grouping: [label_0]
+    rollup: [instance, job, label_1]
+    aggregated: []
+  metric: fake_metric
+  parameters:
+    K: 20
+  tumblingWindowSize: 1
+  windowSize: 1
+  windowType: tumbling
+  spatialFilter: ''
+metrics:
+  fake_metric:
+  - instance
+  - job
+  - label_0
+  - label_1
+```
+
+---
+
+## Baseline Mode (Prometheus Only)
+
+Baseline mode runs queries directly against Prometheus or Clickhouse without ASAP's streaming layer.
+
+### 1. Start Data Exporter
+
+The fake exporter generates synthetic metrics exposed at `/metrics`:
+
+```bash
+cd ~/code/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter
+./target/release/fake_exporter \
+    --num-labels 3 \
+    --cardinality 100 \
+    --port 8000
+```
+
+### 2. Start Prometheus
+
+Configure Prometheus to scrape the exporter. Create `prometheus.yml`:
+
+```yaml
+global:
+  scrape_interval: 1s
+
+scrape_configs:
+  - job_name: 'fake_exporter'
+    static_configs:
+      - targets: ['localhost:8000']
+```
+
+Start Prometheus:
+```bash
+prometheus --config.file=prometheus.yml
+```
+
+### 3. Query Prometheus
+
+Query via curl or Grafana:
+```bash
+curl 'http://localhost:9090/api/v1/query?query=fake_metric'
+```
+
+---
+
+## ASAP Mode
+
+ASAP mode adds Arroyo (streaming aggregation) and QueryEngineRust (query interception) to accelerate queries using sketches.
+
+### 1. Start Kafka
+
+```bash
+cd ~/kafka
+./bin/kafka-server-start.sh ./config/kraft/server.properties
+```
+
+Wait for Kafka to be ready, then create topics:
+```bash
+./bin/kafka-topics.sh --bootstrap-server localhost:9092 --create \
+    --topic sketch_topic --partitions 1 --replication-factor 1 \
+    --config max.message.bytes=20971520
+
+# Only needed if using Kafka ingestion (--source_type kafka):
+./bin/kafka-topics.sh --bootstrap-server localhost:9092 --create \
+    --topic raw_data_topic --partitions 1 --replication-factor 1
+```
+
+### 2. Start Data Exporter
+
+Same as baseline - exporter just exposes `/metrics`:
+
+```bash
+cd ~/code/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter
+./target/release/fake_exporter \
+    --num-labels 3 \
+    --cardinality 100 \
+    --port 8000
+```
+
+### 3. Start Arroyo Cluster
+
+```bash
+cd ~/code/arroyo
+./target/release/arroyo --config ~/code/ArroyoSketch/config.yaml cluster
+```
+
+Arroyo API runs at `http://localhost:5115`. Verify with:
+```bash
+curl http://localhost:5115/api/v1/pipelines
+```
+
+### 4. Configure ArroyoSketch Pipeline
+
+Run `run_arroyosketch.py` to create Arroyo sources, sinks, and pipeline:
+
+```bash
+cd ~/code/ArroyoSketch
+python run_arroyosketch.py \
+    --source_type prometheus_remote_write \
+    --prometheus_bind_ip 0.0.0.0 \
+    --prometheus_base_port 9001 \
+    --prometheus_path /write \
+    --parallelism 1 \
+    --output_format json \
+    --pipeline_name my_pipeline \
+    --config_file_path /path/to/streaming_config.yaml \
+    --output_kafka_topic sketch_topic \
+    --output_dir ./outputs
+```
+
+For Kafka-based ingestion instead of remote_write:
+```bash
+python run_arroyosketch.py \
+    --source_type kafka \
+    --kafka_input_format json \
+    --input_kafka_topic raw_data_topic \
+    --output_format json \
+    --pipeline_name my_pipeline \
+    --config_file_path /path/to/streaming_config.yaml \
+    --output_kafka_topic sketch_topic \
+    --output_dir ./outputs \
+    --parallelism 1
+```
+
+The script outputs the pipeline ID. Verify the pipeline is running:
+```bash
+curl http://localhost:5115/api/v1/pipelines
+```
+
+### 5. Start QueryEngineRust
+
+```bash
+cd ~/code/QueryEngineRust
+./target/release/query_engine_rust \
+    --kafka-topic sketch_topic \
+    --input-format json \
+    --config /path/to/inference_config.yaml \
+    --streaming-config /path/to/streaming_config.yaml \
+    --prometheus-scrape-interval 1 \
+    --prometheus-server http://localhost:9090 \
+    --http-port 8088 \
+    --delete-existing-db \
+    --log-level info \
+    --output-dir ./output \
+    --streaming-engine arroyo \
+    --query-language PROMQL \
+    --lock-strategy per-key
+```
+
+QueryEngine now listens on port 8088 and intercepts PromQL queries.
+
+### 6. Start Prometheus with Remote Write
+
+Prometheus scrapes the exporter and pushes data to Arroyo via `remote_write`. Create `prometheus-asap.yml`:
+
+```yaml
+global:
+  scrape_interval: 1s
+
+scrape_configs:
+  - job_name: 'fake_exporter'
+    static_configs:
+      - targets: ['localhost:8000']
+
+remote_write:
+  - url: "http://localhost:9001/write"
+```
+
+The `remote_write` URL must match the Arroyo source endpoint (prometheus_base_port + prometheus_path from step 4).
+
+Start Prometheus:
+```bash
+prometheus --config.file=prometheus-asap.yml --storage.tsdb.path=./data
+```
+
+Prometheus also serves as fallback for queries that can't be answered from sketches.
+
+### 7. Query via QueryEngine
+
+Direct queries to QueryEngineRust instead of Prometheus:
+
+```bash
+curl 'http://localhost:8088/api/v1/query?query=quantile%20by%20(label_0)%20(0.99,%20fake_metric)'
+```
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..8d91680
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,59 @@
+# ASAP Developer Documentation
+
+## 01. Getting Started
+
+Quick onboarding for new developers:
+- [Overview & Key Concepts](01-getting-started/overview.md) - What is ASAP?
+- [Architecture](01-getting-started/architecture.md) - System design & data flows
+- [Local Setup](01-getting-started/local-setup.md) - Set up dev environment
+
+## 02. Components
+
+Deep dives into each component:
+- [Component Index](02-components/README.md) - Component overview & 1-liners
+- [Query Engine](02-components/query-engine.md) - Rust query processor
+- [Arroyo](02-components/arroyo.md) - Streaming engine (fork + customizations)
+- [ArroyoSketch](02-components/arroyosketch.md) - Pipeline configurator
+- [Controller](02-components/controller.md) - Auto-configuration service
+- [Exporters](02-components/exporters.md) - Metric generators
+- [Utilities](02-components/utilities.md) - Experiment framework
+
+## 03. How-To Guides
+
+Task-oriented guides for common operations:
+
+### Development Tasks
+- [Add Protocol Adapter](03-how-to-guides/development/add-protocol-adapter.md) - Support new query protocol
+- [Add Fallback Backend](03-how-to-guides/development/add-fallback-backend.md) - Support new database
+- [Add Query Language](03-how-to-guides/development/add-query-language.md) - Support new language (cross-component)
+- [Add New Sketch](03-how-to-guides/development/add-new-sketch.md) - Implement new sketch algorithm
+- [Modify Sketch Logic](03-how-to-guides/development/modify-sketch-logic.md) - Change existing sketch
+
+### Experiment Tasks
+- [Run an Experiment](03-how-to-guides/experiments/run-experiment.md) - End-to-end experiment workflow
+- [Add Workload](03-how-to-guides/experiments/add-workload.md) - Create new query workload
+- [Add Data Generator](03-how-to-guides/experiments/add-data-generator.md) - New exporters
+- [Analyze Results](03-how-to-guides/experiments/analyze-results.md) - Post-experiment analysis
+
+### Operations Tasks
+- [Manual Stack Run for Prometheus](03-how-to-guides/operations/manual-stack-run-prometheus.md) - Run ASAP components manually to accelerate Prometheus
+- [Manual Stack Run for Clickhouse](03-how-to-guides/operations/manual-stack-run-clickhouse.md) - Run ASAP components manually to accelerate Clickhouse
+- [Deploy to CloudLab](03-how-to-guides/operations/deploy-cloudlab.md) - Deployment guide
+- [Troubleshooting](03-how-to-guides/operations/troubleshooting.md) - Common issues & solutions
+
+## 04. Development
+
+Developer practices and infrastructure:
+- [Testing Guide](04-development/testing-guide.md) - Unit, integration, E2E tests
+- [Code Style](04-development/code-style.md) - Conventions & pre-commit hooks
+- [CI/CD](04-development/ci-cd.md) - GitHub Actions & Docker builds
+- [Contributing](04-development/contributing.md) - PR process & guidelines
+
+## Component-Specific Documentation
+
+Technical details co-located with code:
+- [QueryEngineRust](../QueryEngineRust/docs/README.md) - Extensibility guides
+- [Utilities/Experiments](../Utilities/docs/architecture.md) - Experiment framework architecture
+- [Controller](../Controller/README.md) - Controller internals
+- [ArroyoSketch](../ArroyoSketch/README.md) - Pipeline configuration
+- [PrometheusExporters](../PrometheusExporters/README.md) - Exporter implementations
diff --git a/gwt.sh b/gwt.sh
new file mode 100755
index 0000000..8b6df7b
--- /dev/null
+++ b/gwt.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+# Add a git worktree for a branch
+
+if [ -z "$1" ]; then
+  echo "Usage: gwt.sh <branch-name>"
+  echo "Example: gwt.sh 97-add-option-to-record-prometheus-scrape-duration"
+  exit 1
+fi
+
+branch_name="$1"
+issue_num="${branch_name%%-*}"  # Extract issue number (everything before first hyphen)
+
+echo "Fetching branch $branch_name from origin..."
+git fetch origin "$branch_name"
+
+echo "Creating worktree at ../worktrees/$issue_num for branch $branch_name..."
+git worktree add --track -b "$branch_name" "../worktrees/$issue_num" "origin/$branch_name"
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..4cf7ca5
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,18 @@
+[tool.black]
+line-length = 88
+target-version = ['py38', 'py39', 'py310', 'py311']
+include = '\.pyi?$'
+exclude = '''
+/(
+    \.eggs
+  | \.git
+  | \.hg
+  | \.mypy_cache
+  | \.tox
+  | \.venv
+  | _build
+  | buck-out
+  | build
+  | dist
+)/
+'''
diff --git a/quickstart/.gitignore b/quickstart/.gitignore
new file mode 100644
index 0000000..e04d816
--- /dev/null
+++ b/quickstart/.gitignore
@@ -0,0 +1,8 @@
+# Environment configuration (if used)
+.env
+
+# Runtime output directories
+output/
+
+# Docker compose override files (local customizations)
+docker-compose.override.yml
diff --git a/quickstart/README.md b/quickstart/README.md
new file mode 100644
index 0000000..d89d557
--- /dev/null
+++ b/quickstart/README.md
@@ -0,0 +1,120 @@
+# ASAPQuery Quickstart Demo
+
+## What is ASAPQuery?
+
+ASAPQuery is a drop-in accelerator that reduces query latency by 100x. The current version of ASAPQuery (v0.1.0) sits between Prometheus and Grafana and accelerates repeating PromQL queries.
+ASAPQuery is compatible with the Prometheus query API and integrates seamlessly with existing Grafana dashboards.
+
+## What This Demo Shows
+
+This quickstart simulates a typical monitoring deployment with components you might already have:
+- Exporters - generating metrics
+- Prometheus - collecting metrics from exporters and storing them in a time-series database
+- Grafana - send queries to Prometheus and visualizing query results
+
+Then it adds ASAPQuery's components on top:
+- **Query Engine** - Prometheus-compatible API with sketch-based acceleration
+- **Arroyo + ArroyoSketch** - Streaming engine with pipelines configured for building sketches
+- **Kafka** - Message broker for streaming data from Arroyo to the Query Engine
+- **Controller** - Automatically configures sketches from PromQL queries
+
+Once you run the quickstart, you will see a pre-configured Grafana dashboard that compares Prometheues and ASAPQuery side-by-side. You will see **visually indistinguishable** results from Prometheus and ASAPQuery, with ASAPQuery being 100x faster
+
+## Prerequisites
+
+- **Docker & Docker Compose** v2.0+
+- **Ports available**: 3000 (Grafana), 5115 (Arroyo), 8088 (ASAPQuery), 9090 (Prometheus)
+
+## Quick Start
+
+### Step 1: Start the Demo
+
+```bash
+cd quickstart
+docker compose up -d
+```
+
+### Step 2: Wait for all Services to Start
+
+The docker-compose pulls official Prometheus and Grafana images, and pre-built ASAPQuery images. After pulling these images, it runs scripts to configure these components. This may take a few minutes.
+
+### Step 3: Open Grafana
+
+1. Go to **http://localhost:3000**
+2. If prompted, login with: `admin` / `admin`
+3. Navigate to **Dashboards** → **ASAPQuery Dashboards** folder
+4. Open **"ASAPQuery vs Prometheus Comparison"**
+
+## What you should see immediately
+
+The dashboard shows 4 rows, each with 2 columns. Each row shows the same query executing on ASAPQuery (left panel) and Prometheus (right panel).
+
+![Screenshot of Grafana after running ASAPQuery's quickstart showing visually-indistinguishable dashboards served by ASAPQuery and Prometheus](/assets/img/quickstart_grafana_screenshot.png)
+
+The dashboard auto-refreshes every 10 seconds. You will notice:
+- ASAPQuery dashboard panels (left side) refresh **almost instantaneously**
+- Prometheus panels (right side) lag considerably after each dashboard refresh
+
+For reference, here are the 4 queries, one per row:
+- "quantile by (pattern) (0.99, sensor_reading)"
+- "quantile by (pattern) (0.95, sensor_reading)"
+- "quantile by (pattern) (0.90, sensor_reading)"
+- "quantile by (pattern) (0.50, sensor_reading)"
+
+## What you should see after 5 minutes
+
+Take a coffee break and come back after 5 minutes. By now, Prometheus will have loaded more data. Open the dashboard and you will see that:
+- ASAPQuery dashboards (left side) still refresh almost instantaneously
+- Prometheus queries (right side) are severly lagging (~5 seconds to refresh)
+
+## Stopping the Demo
+
+```bash
+docker compose down
+```
+
+## Next Steps
+
+### Changing the cardinality of ingested data
+
+To change the cardinality of data exported by each exporter, use [set_data_cardinality.py](quickstart/set_data_cardinality.py).
+Run `python3 set_data_cardinality.py <M> <N>` where `M` is the number of labels (currently configured to 3) and `N` is the number of unique values per label (currently configured to 30). The script changes all the `fake-exporter-*` services in `docker-compose.yml` to have:
+```yaml
+  - "--num-labels=M"              # Number of label dimensions
+  - "--num-values-per-label=N"    # Cardinality per label
+```
+
+### Changing PromQL queries
+
+To modify the queries in the Grafana dashboard and run ASAPQuery against those:
+
+#### 1. Edit the Controller Config
+
+Edit `config/controller-config.yaml`:
+
+```yaml
+query_groups:
+  - id: 1
+    queries:
+      - "quantile by (label_0) (0.99, fake_metric)"
+      - "quantile by (label_0) (0.50, fake_metric)"
+      # Add your queries here
+```
+
+#### 2. Regenerate Grafana Dashboard
+
+```bash
+python3 generate_dashboards.py
+```
+
+This automatically creates a new dashboard with panels for all your queries.
+
+#### 3. Restart Services
+
+```bash
+docker compose restart
+```
+
+### Running ASAPQuery with your own Grafana and Prometheus setup
+
+**Coming Soon**: A drop-in ASAPQuery artifact that works with your existing already-configured Grafana and Prometheus deployments.
diff --git a/quickstart/config/arroyo-config.yaml b/quickstart/config/arroyo-config.yaml
new file mode 100644
index 0000000..534b31c
--- /dev/null
+++ b/quickstart/config/arroyo-config.yaml
@@ -0,0 +1,3 @@
+# Arroyo streaming engine configuration
+compiler:
+  use-local-udf-crate: true
diff --git a/quickstart/config/controller-config.yaml b/quickstart/config/controller-config.yaml
new file mode 100644
index 0000000..9462fd7
--- /dev/null
+++ b/quickstart/config/controller-config.yaml
@@ -0,0 +1,28 @@
+# SketchDB Configuration for Pattern-based Demo
+# Uses deterministic time-based patterns for visual comparison
+
+query_groups:
+  - id: 1
+    queries:
+      # Quantile queries grouped by pattern - shows each pattern type
+      - "quantile by (pattern) (0.99, sensor_reading)"
+      - "quantile by (pattern) (0.95, sensor_reading)"
+      - "quantile by (pattern) (0.90, sensor_reading)"
+      - "quantile by (pattern) (0.50, sensor_reading)"
+    repetition_delay: 10
+    controller_options:
+      accuracy_sla: 0.99
+      latency_sla: 1
+
+# Metric metadata - includes the 'pattern' label
+metrics:
+  - metric: "sensor_reading"
+    labels: ["instance", "job", "pattern", "region", "service", "host"]
+
+# Cleanup policy for aggregates
+aggregate_cleanup:
+  policy: "read_based"
+
+sketch_parameters:
+  DatasketchesKLL:
+    K: 200
diff --git a/quickstart/config/grafana/provisioning/dashboards/asap-comparison.json b/quickstart/config/grafana/provisioning/dashboards/asap-comparison.json
new file mode 100644
index 0000000..fa92d46
--- /dev/null
+++ b/quickstart/config/grafana/provisioning/dashboards/asap-comparison.json
@@ -0,0 +1,768 @@
+{
+  "annotations": {
+    "list": []
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": null,
+  "links": [],
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "Prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic-by-name"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "id": 1,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus"
+          },
+          "editorMode": "code",
+          "expr": "quantile by (pattern) (0.99, sensor_reading)",
+          "instant": false,
+          "interval": "10s",
+          "legendFormat": "{{pattern}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "P99 - Prometheus",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "ASAPQuery"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic-by-name"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 2,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "ASAPQuery"
+          },
+          "editorMode": "code",
+          "expr": "quantile by (pattern) (0.99, sensor_reading)",
+          "instant": false,
+          "interval": "10s",
+          "legendFormat": "{{pattern}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "P99 - ASAPQuery",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "Prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic-by-name"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 8
+      },
+      "id": 3,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus"
+          },
+          "editorMode": "code",
+          "expr": "quantile by (pattern) (0.95, sensor_reading)",
+          "instant": false,
+          "interval": "10s",
+          "legendFormat": "{{pattern}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "P95 - Prometheus",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "ASAPQuery"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic-by-name"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 8
+      },
+      "id": 4,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "ASAPQuery"
+          },
+          "editorMode": "code",
+          "expr": "quantile by (pattern) (0.95, sensor_reading)",
+          "instant": false,
+          "interval": "10s",
+          "legendFormat": "{{pattern}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "P95 - ASAPQuery",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "Prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic-by-name"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 16
+      },
+      "id": 5,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus"
+          },
+          "editorMode": "code",
+          "expr": "quantile by (pattern) (0.90, sensor_reading)",
+          "instant": false,
+          "interval": "10s",
+          "legendFormat": "{{pattern}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "P90 - Prometheus",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "ASAPQuery"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic-by-name"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 16
+      },
+      "id": 6,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "ASAPQuery"
+          },
+          "editorMode": "code",
+          "expr": "quantile by (pattern) (0.90, sensor_reading)",
+          "instant": false,
+          "interval": "10s",
+          "legendFormat": "{{pattern}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "P90 - ASAPQuery",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "Prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic-by-name"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 24
+      },
+      "id": 7,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus"
+          },
+          "editorMode": "code",
+          "expr": "quantile by (pattern) (0.50, sensor_reading)",
+          "instant": false,
+          "interval": "10s",
+          "legendFormat": "{{pattern}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "P50 - Prometheus",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "ASAPQuery"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic-by-name"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 24
+      },
+      "id": 8,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "ASAPQuery"
+          },
+          "editorMode": "code",
+          "expr": "quantile by (pattern) (0.50, sensor_reading)",
+          "instant": false,
+          "interval": "10s",
+          "legendFormat": "{{pattern}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "P50 - ASAPQuery",
+      "type": "timeseries"
+    }
+  ],
+  "refresh": "10s",
+  "schemaVersion": 39,
+  "tags": [
+    "asap",
+    "patterns",
+    "comparison"
+  ],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-300s-10s",
+    "to": "now-10s"
+  },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "ASAPQuery vs Prometheus Comparison",
+  "uid": "asap-patterns-comparison",
+  "version": 0,
+  "weekStart": ""
+}
diff --git a/quickstart/config/grafana/provisioning/dashboards/default.yml b/quickstart/config/grafana/provisioning/dashboards/default.yml
new file mode 100644
index 0000000..83747af
--- /dev/null
+++ b/quickstart/config/grafana/provisioning/dashboards/default.yml
@@ -0,0 +1,13 @@
+# Grafana dashboard provisioning configuration
+apiVersion: 1
+
+providers:
+  - name: 'ASAPQuery Dashboards'
+    orgId: 1
+    folder: ''
+    type: file
+    disableDeletion: false
+    updateIntervalSeconds: 10
+    allowUiUpdates: true
+    options:
+      path: /etc/grafana/provisioning/dashboards
diff --git a/quickstart/config/grafana/provisioning/datasources/datasources.yml b/quickstart/config/grafana/provisioning/datasources/datasources.yml
new file mode 100644
index 0000000..4cb7242
--- /dev/null
+++ b/quickstart/config/grafana/provisioning/datasources/datasources.yml
@@ -0,0 +1,21 @@
+# Grafana datasources provisioning
+apiVersion: 1
+
+datasources:
+  - name: ASAPQuery
+    type: prometheus
+    access: proxy
+    url: http://queryengine:8088
+    isDefault: true
+    editable: true
+    jsonData:
+      timeInterval: "5s"
+
+  - name: Prometheus
+    type: prometheus
+    access: proxy
+    url: http://prometheus:9090
+    isDefault: false
+    editable: true
+    jsonData:
+      timeInterval: "5s"
diff --git a/quickstart/config/prometheus.yml b/quickstart/config/prometheus.yml
new file mode 100644
index 0000000..eb853f2
--- /dev/null
+++ b/quickstart/config/prometheus.yml
@@ -0,0 +1,35 @@
+# Prometheus configuration for pattern-based fake exporters demo
+
+global:
+  scrape_interval: 1s
+  evaluation_interval: 1s
+
+# Remote write configuration to send metrics to Arroyo for sketch building
+remote_write:
+  - url: http://arroyo:9091/receive
+    queue_config:
+      batch_send_deadline: 1s
+    write_relabel_configs:
+      - source_labels: [__name__]
+        regex: sensor_reading
+        action: keep
+
+scrape_configs:
+  # Scrape pattern-based fake exporters
+  # Each exporter generates one pattern type (constant, sine, linear, etc.)
+  # All metrics have a 'pattern' label indicating their pattern type
+  - job_name: 'pattern-exporters'
+    metric_relabel_configs:
+      - source_labels: [__name__]
+        regex: sensor_reading
+        action: keep
+    static_configs:
+      - targets:
+          - 'fake-exporter-constant:50000'
+          - 'fake-exporter-linear-up:50001'
+          - 'fake-exporter-linear-down:50002'
+          - 'fake-exporter-sine:50003'
+          - 'fake-exporter-sine-noise:50004'
+          - 'fake-exporter-step:50005'
+          - 'fake-exporter-spiky:50006'
+          - 'fake-exporter-exp-up:50007'
diff --git a/quickstart/docker-compose.yml b/quickstart/docker-compose.yml
new file mode 100644
index 0000000..589b12b
--- /dev/null
+++ b/quickstart/docker-compose.yml
@@ -0,0 +1,423 @@
+name: asap-turboquery-quickstart
+
+# Docker Compose file for pattern-based fake exporters demo
+# Uses deterministic time-based patterns instead of random data
+# This makes visual comparison between Prometheus and TurboQuery easier
+
+networks:
+  asap-network:
+    driver: bridge
+    ipam:
+      driver: default
+      config:
+        - subnet: 172.25.0.0/16
+
+volumes:
+  kafka-data:
+  prometheus-data:
+  grafana-data:
+  controller-output:
+
+services:
+  #############################################################################
+  # INFRASTRUCTURE SERVICES
+  #############################################################################
+
+  kafka:
+    image: apache/kafka:3.7.0
+    container_name: asap-kafka
+    hostname: kafka
+    networks:
+      - asap-network
+    environment:
+      KAFKA_NODE_ID: 1
+      KAFKA_PROCESS_ROLES: broker,controller
+      KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092,CONTROLLER://0.0.0.0:9093
+      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
+      KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka:9093
+      KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
+      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT
+      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
+      KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
+      KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
+      KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
+      KAFKA_MESSAGE_MAX_BYTES: 20971520
+      KAFKA_REPLICA_FETCH_MAX_BYTES: 20971520
+      KAFKA_LOG_RETENTION_HOURS: 1
+      KAFKA_LOG_DIRS: /tmp/kraft-combined-logs
+      CLUSTER_ID: MkU3OEVBNTcwNTJENDM2Qk
+    volumes:
+      - kafka-data:/tmp/kraft-combined-logs
+    user: "0:0"
+    entrypoint: /bin/bash
+    command:
+      - -c
+      - |
+        chown -R appuser:appuser /tmp/kraft-combined-logs
+        chmod -R 755 /tmp/kraft-combined-logs
+        exec su appuser -c "/etc/kafka/docker/run"
+    healthcheck:
+      test: ["CMD-SHELL", "/opt/kafka/bin/kafka-broker-api-versions.sh --bootstrap-server localhost:9092 || exit 1"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 30s
+    restart: no
+
+  kafka-init:
+    image: apache/kafka:3.7.0
+    container_name: asap-kafka-init
+    networks:
+      - asap-network
+    depends_on:
+      kafka:
+        condition: service_healthy
+    entrypoint: /bin/bash
+    command:
+      - -c
+      - |
+        echo "Creating Kafka topics..."
+        /opt/kafka/bin/kafka-topics.sh --bootstrap-server kafka:9092 \
+          --create --if-not-exists --topic flink_input \
+          --partitions 1 --replication-factor 1 \
+          --config max.message.bytes=20971520
+
+        /opt/kafka/bin/kafka-topics.sh --bootstrap-server kafka:9092 \
+          --create --if-not-exists --topic flink_output \
+          --partitions 1 --replication-factor 1 \
+          --config max.message.bytes=20971520
+
+        echo "Kafka topics created successfully"
+    restart: "no"
+
+  arroyo:
+    image: ghcr.io/projectasap/asap-arroyo:v0.1.0
+    container_name: asap-arroyo
+    hostname: arroyo
+    networks:
+      - asap-network
+    ports:
+      - "5115:5115"
+    volumes:
+      - ./config/arroyo-config.yaml:/config.yaml:ro
+    command: ["--config", "/config.yaml", "cluster"]
+    environment:
+      - ARROYO__API__RUN_HTTP_PORT=5115
+      - KAFKA_BOOTSTRAP_SERVERS=kafka:9092
+    depends_on:
+      kafka:
+        condition: service_healthy
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:5115/api/v1/pipelines || exit 1"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    restart: no
+
+  prometheus:
+    image: prom/prometheus:v3.9.1
+    container_name: asap-prometheus
+    hostname: prometheus
+    networks:
+      - asap-network
+    ports:
+      - "9090:9090"
+    volumes:
+      - ./config/prometheus.yml:/etc/prometheus/prometheus.yml:ro
+      - prometheus-data:/prometheus
+    command:
+      - "--config.file=/etc/prometheus/prometheus.yml"
+      - "--storage.tsdb.path=/prometheus"
+      - "--web.console.libraries=/usr/share/prometheus/console_libraries"
+      - "--web.console.templates=/usr/share/prometheus/consoles"
+      - "--web.enable-lifecycle"
+    healthcheck:
+      test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:9090/-/healthy || exit 1"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    depends_on:
+      arroyosketch:
+        condition: service_completed_successfully
+    restart: no
+
+  grafana:
+    image: grafana/grafana-enterprise:12.3.3
+    container_name: asap-grafana
+    hostname: grafana
+    networks:
+      - asap-network
+    ports:
+      - "3000:3000"
+    environment:
+      - GF_SECURITY_ADMIN_PASSWORD=admin
+      - GF_SECURITY_ADMIN_USER=admin
+      - GF_USERS_ALLOW_SIGN_UP=false
+      - GF_SERVER_ROOT_URL=http://localhost:3000
+      - GF_SECURITY_ALLOW_EMBEDDING=true
+    volumes:
+      - grafana-data:/var/lib/grafana
+      - ./config/grafana/provisioning:/etc/grafana/provisioning:ro
+    healthcheck:
+      test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3000/api/health || exit 1"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    restart: no
+
+  #############################################################################
+  # INIT CONTAINERS
+  #############################################################################
+
+  controller:
+    image: ghcr.io/projectasap/asap-controller:v0.1.0
+    container_name: asap-controller
+    hostname: controller
+    networks:
+      - asap-network
+    command:
+      - "--input_config=/config/controller-config.yaml"
+      - "--output_dir=/controller-output"
+      - "--prometheus_scrape_interval=1"
+      - "--streaming_engine=arroyo"
+      - "--range-duration=300"
+      - "--step=10"
+    volumes:
+      - ./config/controller-config.yaml:/config/controller-config.yaml:ro
+      - controller-output:/controller-output
+    restart: "no"
+
+  arroyosketch:
+    image: ghcr.io/projectasap/asap-arroyosketch:v0.1.0
+    container_name: asap-arroyosketch
+    hostname: arroyosketch
+    networks:
+      - asap-network
+    command:
+      - "--config_file_path=/controller-output/streaming_config.yaml"
+      - "--source_type=prometheus_remote_write"
+      - "--prometheus_base_port=9091"
+      - "--prometheus_path=/receive"
+      - "--prometheus_bind_ip=0.0.0.0"
+      - "--parallelism=1"
+      - "--output_kafka_topic=flink_output"
+      - "--output_format=json"
+      - "--pipeline_name=asap-demo"
+      - "--output_dir=/arroyosketch-output"
+      - "--arroyo_url=http://arroyo:5115/api/v1"
+      - "--bootstrap_servers=kafka:9092"
+    volumes:
+      - controller-output:/controller-output:ro
+      - ./output/arroyosketch:/arroyosketch-output
+    depends_on:
+      controller:
+        condition: service_completed_successfully
+      arroyo:
+        condition: service_healthy
+    restart: "no"
+
+  #############################################################################
+  # CORE SERVICES
+  #############################################################################
+
+  queryengine:
+    image: ghcr.io/projectasap/asap-queryengine:v0.1.0
+    container_name: asap-queryengine
+    hostname: queryengine
+    networks:
+      - asap-network
+    ports:
+      - "8088:8088"
+    environment:
+      - RUST_LOG=INFO
+      - RUST_BACKTRACE=1
+    volumes:
+      - controller-output:/controller-output:ro
+      - ./output/queryengine:/app/outputs
+    command:
+      - "--kafka-topic=flink_output"
+      - "--kafka-broker=kafka:9092"
+      - "--input-format=json"
+      - "--config=/controller-output/inference_config.yaml"
+      - "--streaming-config=/controller-output/streaming_config.yaml"
+      - "--prometheus-server=http://prometheus:9090"
+      - "--prometheus-scrape-interval=1"
+      - "--streaming-engine=arroyo"
+      - "--delete-existing-db"
+      - "--log-level=INFO"
+      - "--output-dir=/app/outputs"
+      - "--query-language=PROMQL"
+      - "--lock-strategy=per-key"
+      - "--decompress-json"
+      - "--forward-unsupported-queries"
+    depends_on:
+      arroyosketch:
+        condition: service_completed_successfully
+      kafka:
+        condition: service_healthy
+      kafka-init:
+        condition: service_completed_successfully
+      prometheus:
+        condition: service_healthy
+    restart: no
+
+  #############################################################################
+  # PATTERN-BASED FAKE EXPORTERS
+  # Each exporter generates one pattern type with the 'pattern' label
+  # All series within an exporter follow the same pattern shape with variation
+  #############################################################################
+
+  # Constant values - baseline for comparison
+  fake-exporter-constant:
+    image: ghcr.io/projectasap/asap-fake-exporter:v0.1.0
+    container_name: asap-fake-exporter-constant
+    hostname: fake-exporter-constant
+    networks:
+      - asap-network
+    expose:
+      - "50000"
+    command:
+      - "--port=50000"
+      - "--valuescale=1000"
+      - "--dataset=constant"
+      - "--num-labels=3"
+      - "--num-values-per-label=30,30,30"
+      - "--metric-type=gauge"
+      - "--metric-name=sensor_reading"
+      - "--label-names=region,service,host"
+      - "--label-value-prefixes=region,svc,host"
+      - "--add-pattern-label"
+    restart: no
+
+  # Linear increasing - tests trend preservation
+  fake-exporter-linear-up:
+    image: ghcr.io/projectasap/asap-fake-exporter:v0.1.0
+    container_name: asap-fake-exporter-linear-up
+    hostname: fake-exporter-linear-up
+    networks:
+      - asap-network
+    expose:
+      - "50001"
+    command:
+      - "--port=50001"
+      - "--valuescale=1000"
+      - "--dataset=linear-up"
+      - "--num-labels=3"
+      - "--num-values-per-label=30,30,30"
+      - "--metric-type=gauge"
+      - "--metric-name=sensor_reading"
+      - "--label-names=region,service,host"
+      - "--label-value-prefixes=region,svc,host"
+      - "--add-pattern-label"
+    restart: no
+
+  # Linear decreasing - tests trend preservation
+  fake-exporter-linear-down:
+    image: ghcr.io/projectasap/asap-fake-exporter:v0.1.0
+    container_name: asap-fake-exporter-linear-down
+    hostname: fake-exporter-linear-down
+    networks:
+      - asap-network
+    expose:
+      - "50002"
+    command:
+      - "--port=50002"
+      - "--valuescale=1000"
+      - "--dataset=linear-down"
+      - "--num-labels=3"
+      - "--num-values-per-label=30,30,30"
+      - "--metric-type=gauge"
+      - "--metric-name=sensor_reading"
+      - "--label-names=region,service,host"
+      - "--label-value-prefixes=region,svc,host"
+      - "--add-pattern-label"
+    restart: no
+
+  # Sine wave - tests periodicity preservation
+  fake-exporter-sine:
+    image: ghcr.io/projectasap/asap-fake-exporter:v0.1.0
+    container_name: asap-fake-exporter-sine
+    hostname: fake-exporter-sine
+    networks:
+      - asap-network
+    expose:
+      - "50003"
+    command:
+      - "--port=50003"
+      - "--valuescale=1000"
+      - "--dataset=sine"
+      - "--num-labels=3"
+      - "--num-values-per-label=30,30,30"
+      - "--num-values-per-label=30,30,30"
+      - "--metric-type=gauge"
+      - "--metric-name=sensor_reading"
+      - "--label-names=region,service,host"
+      - "--label-value-prefixes=region,svc,host"
+      - "--add-pattern-label"
+    restart: no
+
+  # Sine with noise - tests signal extraction / smoothing
+  fake-exporter-sine-noise:
+    image: ghcr.io/projectasap/asap-fake-exporter:v0.1.0
+    container_name: asap-fake-exporter-sine-noise
+    hostname: fake-exporter-sine-noise
+    networks:
+      - asap-network
+    expose:
+      - "50004"
+    command:
+      - "--port=50004"
+      - "--valuescale=1000"
+      - "--dataset=sine-noise"
+      - "--num-labels=3"
+      - "--num-values-per-label=30,30,30"
+      - "--metric-type=gauge"
+      - "--metric-name=sensor_reading"
+      - "--label-names=region,service,host"
+      - "--label-value-prefixes=region,svc,host"
+      - "--add-pattern-label"
+    restart: no
+
+  # Step function - tests edge preservation
+  fake-exporter-step:
+    image: ghcr.io/projectasap/asap-fake-exporter:v0.1.0
+    container_name: asap-fake-exporter-step
+    hostname: fake-exporter-step
+    networks:
+      - asap-network
+    expose:
+      - "50005"
+    command:
+      - "--port=50005"
+      - "--valuescale=1000"
+      - "--dataset=step"
+      - "--num-labels=3"
+      - "--num-values-per-label=30,30,30"
+      - "--metric-type=gauge"
+      - "--metric-name=sensor_reading"
+      - "--label-names=region,service,host"
+      - "--label-value-prefixes=region,svc,host"
+      - "--add-pattern-label"
+    restart: no
+
+  # Exponential growth - tests non-linear patterns
+  fake-exporter-exp-up:
+    image: ghcr.io/projectasap/asap-fake-exporter:v0.1.0
+    container_name: asap-fake-exporter-exp-up
+    hostname: fake-exporter-exp-up
+    networks:
+      - asap-network
+    expose:
+      - "50007"
+    command:
+      - "--port=50007"
+      - "--valuescale=1000"
+      - "--dataset=exp-up"
+      - "--num-labels=3"
+      - "--num-values-per-label=30,30,30"
+      - "--metric-type=gauge"
+      - "--metric-name=sensor_reading"
+      - "--label-names=region,service,host"
+      - "--label-value-prefixes=region,svc,host"
+      - "--add-pattern-label"
+    restart: no
diff --git a/quickstart/generate_dashboards.py b/quickstart/generate_dashboards.py
new file mode 100644
index 0000000..56e029e
--- /dev/null
+++ b/quickstart/generate_dashboards.py
@@ -0,0 +1,185 @@
+#!/usr/bin/env python3
+"""Generate a single Grafana dashboard with all queries comparing Prometheus vs ASAPQuery."""
+
+import json
+import yaml
+import os
+import glob
+
+# Configuration
+# Query time offset accounts for data freshness delay in the pipeline:
+# exporter → prometheus scrape → remote write → arroyo → kafka → query engine
+# This prevents queries from requesting data that hasn't been processed yet
+QUERY_TIME_OFFSET = 10  # seconds
+
+# Range query parameters - must match values passed to Controller
+# These ensure deterministic query behavior across Grafana and the query engine
+RANGE_DURATION = 60  # seconds (end - start)
+STEP = 10  # seconds (query resolution)
+
+# Read controller config to get queries
+config_path = "config/controller-config.yaml"
+with open(config_path, "r") as f:
+    config = yaml.safe_load(f)
+
+queries = []
+for query_group in config["query_groups"]:
+    queries.extend(query_group["queries"])
+
+output_dir = "config/grafana/provisioning/dashboards"
+os.makedirs(output_dir, exist_ok=True)
+
+
+def get_percentile_name(query):
+    """Extract percentile name from query."""
+    if "0.99" in query:
+        return "P99"
+    elif "0.95" in query:
+        return "P95"
+    elif "0.90" in query:
+        return "P90"
+    elif "0.80" in query:
+        return "P80"
+    elif "0.70" in query:
+        return "P70"
+    elif "0.60" in query:
+        return "P60"
+    elif "0.50" in query:
+        return "P50"
+    elif "0.40" in query:
+        return "P40"
+    elif "0.30" in query:
+        return "P30"
+    elif "0.20" in query:
+        return "P20"
+    return "Query"
+
+
+def create_panel(query, datasource_name, datasource_uid, panel_id, x_pos, y_pos):
+    """Create a single panel."""
+    percentile = get_percentile_name(query)
+    title = f"{percentile} - {datasource_name}"
+
+    return {
+        "datasource": {"type": "prometheus", "uid": datasource_uid},
+        "fieldConfig": {
+            "defaults": {
+                "color": {"mode": "palette-classic-by-name"},
+                "min": 0,
+                "custom": {
+                    "axisBorderShow": False,
+                    "axisCenteredZero": False,
+                    "axisColorMode": "text",
+                    "axisLabel": "",
+                    "axisPlacement": "auto",
+                    "barAlignment": 0,
+                    "drawStyle": "line",
+                    "fillOpacity": 0,
+                    "gradientMode": "none",
+                    "hideFrom": {"tooltip": False, "viz": False, "legend": False},
+                    "insertNulls": False,
+                    "lineInterpolation": "linear",
+                    "lineWidth": 1,
+                    "pointSize": 5,
+                    "scaleDistribution": {"type": "linear"},
+                    "showPoints": "auto",
+                    "spanNulls": False,
+                    "stacking": {"group": "A", "mode": "none"},
+                    "thresholdsStyle": {"mode": "off"},
+                },
+                "mappings": [],
+                "thresholds": {
+                    "mode": "absolute",
+                    "steps": [{"color": "green", "value": None}],
+                },
+            },
+            "overrides": [],
+        },
+        "gridPos": {"h": 8, "w": 12, "x": x_pos, "y": y_pos},
+        "id": panel_id,
+        "options": {
+            "legend": {
+                "calcs": [],
+                "displayMode": "list",
+                "placement": "bottom",
+                "showLegend": True,
+                "sortBy": "Name",
+                "sortDesc": False,
+            },
+            "tooltip": {"mode": "single", "sort": "none"},
+        },
+        "targets": [
+            {
+                "datasource": {"type": "prometheus", "uid": datasource_uid},
+                "editorMode": "code",
+                "expr": query,
+                "instant": False,
+                "interval": f"{STEP}s",
+                "legendFormat": "{{label_0}}",
+                "range": True,
+                "refId": "A",
+            }
+        ],
+        "title": title,
+        "type": "timeseries",
+    }
+
+
+# Create dashboard
+panels = []
+panel_id = 1
+y_position = 0
+
+for query_idx, query in enumerate(queries):
+    # ASAPQuery panel (left side)
+    panels.append(
+        create_panel(query, "ASAPQuery", "ASAPQuery", panel_id, 0, y_position)
+    )
+    panel_id += 1
+
+    # Prometheus panel (right side)
+    panels.append(
+        create_panel(query, "Prometheus", "Prometheus", panel_id, 12, y_position)
+    )
+    panel_id += 1
+
+    # Move to next row
+    y_position += 8
+
+dashboard = {
+    "annotations": {"list": []},
+    "editable": True,
+    "fiscalYearStartMonth": 0,
+    "graphTooltip": 0,
+    "id": None,
+    "links": [],
+    "panels": panels,
+    "refresh": "10s",
+    "schemaVersion": 39,
+    "tags": ["asap", "quickstart", "comparison"],
+    "templating": {"list": []},
+    "time": {
+        "from": f"now-{RANGE_DURATION}s-{QUERY_TIME_OFFSET}s",
+        "to": f"now-{QUERY_TIME_OFFSET}s",
+    },
+    "timepicker": {},
+    "timezone": "browser",
+    "title": "ASAPQuery vs Prometheus - Quantile Comparison",
+    "uid": "asap-comparison",
+    "version": 0,
+    "weekStart": "",
+}
+
+for old_file in glob.glob(os.path.join(output_dir, "p*_*.json")):
+    os.remove(old_file)
+    print(f"Removed: {os.path.basename(old_file)}")
+
+# Write the combined dashboard
+filepath = os.path.join(output_dir, "asap-comparison.json")
+with open(filepath, "w") as f:
+    json.dump(dashboard, f, indent=2)
+
+print("\nCreated: asap-comparison.json")
+print(f"Total panels: {len(panels)}")
+print(f"Total rows: {len(queries)}")
+print(f"Layout: {len(queries)} rows × 2 panels per row")
diff --git a/quickstart/set_data_cardinality.py b/quickstart/set_data_cardinality.py
new file mode 100755
index 0000000..23daec1
--- /dev/null
+++ b/quickstart/set_data_cardinality.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+"""Script to set the data cardinality for fake exporters in the quickstart demo."""
+
+import argparse
+import re
+import sys
+
+
+def update_docker_compose(
+    file_path: str, num_labels: int, num_values_per_label: int
+) -> None:
+    """Update docker-compose.yml with the specified cardinality settings."""
+    with open(file_path, "r") as f:
+        content = f.read()
+
+    # Replace all --num-labels=N occurrences
+    content, num_labels_count = re.subn(
+        r'("--num-labels=)\d+(")',
+        rf"\g<1>{num_labels}\g<2>",
+        content,
+    )
+
+    # Replace all --num-values-per-label=N[,N,...] occurrences
+    # Build comma-separated value string matching the number of labels
+    values_str = ",".join([str(num_values_per_label)] * num_labels)
+    content, num_values_count = re.subn(
+        r'("--num-values-per-label=)[\d,]+(")',
+        rf"\g<1>{values_str}\g<2>",
+        content,
+    )
+
+    if num_labels_count == 0 or num_values_count == 0:
+        print("ERROR: Could not find exporter configuration in docker-compose.yml")
+        sys.exit(1)
+
+    with open(file_path, "w") as f:
+        f.write(content)
+
+    total_cardinality = num_values_per_label**num_labels
+    print("Updated docker-compose.yml:")
+    print(f"  --num-labels={num_labels}")
+    print(f"  --num-values-per-label={num_values_per_label}")
+    print(f"  Cardinality per exporter: {total_cardinality:,}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Set the data cardinality for fake exporters in the quickstart demo"
+    )
+    parser.add_argument(
+        "num_labels",
+        type=int,
+        help="Number of label dimensions (e.g., 3)",
+    )
+    parser.add_argument(
+        "num_values_per_label",
+        type=int,
+        help="Number of unique values per label (e.g., 20)",
+    )
+    parser.add_argument(
+        "--docker-compose",
+        default="docker-compose.yml",
+        help="Path to docker-compose.yml (default: docker-compose.yml)",
+    )
+
+    args = parser.parse_args()
+
+    if args.num_labels < 1 or args.num_labels > 10:
+        print("ERROR: Number of labels must be between 1 and 10")
+        sys.exit(1)
+
+    if args.num_values_per_label < 1 or args.num_values_per_label > 1000:
+        print("ERROR: Number of values per label must be between 1 and 1000")
+        sys.exit(1)
+
+    total_cardinality = args.num_values_per_label**args.num_labels
+    print("\nConfiguring fake exporters with cardinality settings...")
+    print(f"  Labels: {args.num_labels}")
+    print(f"  Values per label: {args.num_values_per_label}")
+    print(f"  Total cardinality per exporter: {total_cardinality:,}\n")
+
+    update_docker_compose(
+        args.docker_compose, args.num_labels, args.num_values_per_label
+    )
+
+    print("\nSuccessfully updated exporter cardinality settings!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/quickstart/set_num_exporters.py b/quickstart/set_num_exporters.py
new file mode 100755
index 0000000..9065f3e
--- /dev/null
+++ b/quickstart/set_num_exporters.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+"""Script to set the number of fake exporters in the quickstart demo."""
+
+import argparse
+import re
+import sys
+
+
+def update_docker_compose(file_path: str, num_exporters: int) -> None:
+    """Update docker-compose.yml with the specified number of exporters."""
+    with open(file_path, "r") as f:
+        lines = f.readlines()
+
+    # Find the start and end of the DATA GENERATORS section
+    start_idx = None
+    end_idx = None
+
+    for i, line in enumerate(lines):
+        if "# DATA GENERATORS" in line:
+            # Look backwards to find the first hash separator line
+            start_idx = i
+            while start_idx > 0 and lines[start_idx - 1].strip().startswith("#"):
+                start_idx -= 1
+        elif (
+            start_idx is not None
+            and line.strip()
+            and not line.startswith(" ")
+            and not line.startswith("#")
+        ):
+            # Found the start of next section (non-indented, non-comment line)
+            end_idx = i
+            break
+
+    if start_idx is None:
+        print("ERROR: Could not find DATA GENERATORS section in docker-compose.yml")
+        sys.exit(1)
+
+    # If we didn't find an end, assume it goes to the end of file
+    if end_idx is None:
+        end_idx = len(lines)
+
+    # Update the comment with the new number
+    header_line = f"  # DATA GENERATORS ({num_exporters} fake exporters for demo)\n"
+
+    # Generate new exporter services
+    exporter_template = """  fake-exporter-{idx}:
+    build:
+      context: ../PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter
+    container_name: asap-fake-exporter-{idx}
+    hostname: fake-exporter-{idx}
+    networks:
+      - asap-network
+    expose:
+      - "{port}"
+    command:
+      - "--port={port}"
+      - "--valuescale=10000"
+      - "--dataset=uniform"
+      - "--num-labels=3"
+      - "--num-values-per-label=20"
+      - "--metric-type=gauge"
+    restart: no
+"""
+
+    new_section = [
+        "  #############################################################################\n",
+        header_line,
+        "  #############################################################################\n",
+        "\n",
+    ]
+
+    for i in range(num_exporters):
+        port = 50000 + i
+        new_section.append(exporter_template.format(idx=i, port=port))
+        if i < num_exporters - 1:
+            new_section.append("\n")
+
+    # Replace the old section with the new one
+    new_lines = lines[:start_idx] + new_section + lines[end_idx:]
+
+    with open(file_path, "w") as f:
+        f.writelines(new_lines)
+
+    print(f"✓ Updated docker-compose.yml: {num_exporters} exporters")
+
+
+def update_prometheus_config(file_path: str, num_exporters: int) -> None:
+    """Update prometheus.yml with the specified number of exporter targets."""
+    with open(file_path, "r") as f:
+        content = f.read()
+
+    # Find and replace the targets section
+    # Match from "- targets:" to the next blank line or end of indented block
+    pattern = r"(      - targets:\n)((?:          - \'fake-exporter-\d+:\d+\'\n?)*)"
+
+    # Generate new targets
+    targets = []
+    for i in range(num_exporters):
+        port = 50000 + i
+        targets.append(f"          - 'fake-exporter-{i}:{port}'")
+
+    new_targets = "\n".join(targets) + "\n"
+
+    # Replace the targets
+    new_content = re.sub(
+        pattern,
+        (
+            f'{targets[0] if targets else ""}' + "\n" + "\n".join(targets[1:]) + "\n"
+            if len(targets) > 1
+            else new_targets
+        ),
+        content,
+        flags=re.MULTILINE,
+    )
+
+    # Better approach - find and replace directly
+    match = re.search(pattern, content, re.MULTILINE)
+    if match:
+        new_content = content[: match.start(2)] + new_targets + content[match.end(2) :]
+    else:
+        print("ERROR: Could not find targets section in prometheus.yml")
+        sys.exit(1)
+
+    with open(file_path, "w") as f:
+        f.write(new_content)
+
+    print(f"✓ Updated prometheus.yml: {num_exporters} scrape targets")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Set the number of fake exporters in the quickstart demo"
+    )
+    parser.add_argument(
+        "num_exporters", type=int, help="Number of fake exporters to configure (1-50)"
+    )
+    parser.add_argument(
+        "--docker-compose",
+        default="docker-compose.yml",
+        help="Path to docker-compose.yml (default: docker-compose.yml)",
+    )
+    parser.add_argument(
+        "--prometheus-config",
+        default="config/prometheus.yml",
+        help="Path to prometheus.yml (default: config/prometheus.yml)",
+    )
+
+    args = parser.parse_args()
+
+    if args.num_exporters < 1 or args.num_exporters > 50:
+        print("ERROR: Number of exporters must be between 1 and 50")
+        sys.exit(1)
+
+    print(f"\nConfiguring quickstart with {args.num_exporters} fake exporters...\n")
+
+    update_docker_compose(args.docker_compose, args.num_exporters)
+    update_prometheus_config(args.prometheus_config, args.num_exporters)
+
+    print(f"\n✓ Successfully configured {args.num_exporters} exporters!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/sketch-core/Cargo.toml b/sketch-core/Cargo.toml
new file mode 100644
index 0000000..2280b8e
--- /dev/null
+++ b/sketch-core/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "sketch-core"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+serde = { version = "1.0", features = ["derive"] }
+rmp-serde = "1.1"
+xxhash-rust = { version = "0.8", features = ["xxh32"] }
+dsrs = { git = "https://github.com/ProjectASAP/datasketches-rs" }
diff --git a/sketch-core/src/count_min.rs b/sketch-core/src/count_min.rs
new file mode 100644
index 0000000..fcd7794
--- /dev/null
+++ b/sketch-core/src/count_min.rs
@@ -0,0 +1,262 @@
+// Adapted from QueryEngineRust/src/precompute_operators/count_min_sketch_accumulator.rs
+// Changes:
+//   - Renamed CountMinSketchAccumulator -> CountMinSketch
+//   - _update(&KeyByLabelValues) -> pub update(&str)  (caller does key-to-string conversion)
+//   - query_key(&KeyByLabelValues) -> query_key(&str)
+//   - serialize_to_bytes (trait) -> serialize_msgpack (inherent method)
+//   - deserialize_from_bytes_arroyo -> deserialize_msgpack
+//   - merge_accumulators -> merge
+//   - Removed: deserialize_from_json, deserialize_from_bytes (legacy QE formats, stay in QE)
+//   - Removed: merge_multiple (QE trait-object helper, stays in QE)
+//   - Removed: AggregateCore, SerializableToSink, MergeableAccumulator, MultipleSubpopulationAggregate impls
+//   - Added: aggregate_count() / aggregate_sum() one-shot helpers for Arroyo call pattern
+
+use serde::{Deserialize, Serialize};
+use xxhash_rust::xxh32::xxh32;
+
+/// Count-Min Sketch probabilistic data structure for frequency counting.
+/// Provides approximate frequency counts with error bounds.
+/// This is the canonical shared implementation; the msgpack wire format is the
+/// contract between Arroyo UDAFs (producers) and QueryEngineRust (consumer).
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CountMinSketch {
+    pub sketch: Vec<Vec<f64>>,
+    pub row_num: usize,
+    pub col_num: usize,
+}
+
+impl CountMinSketch {
+    pub fn new(row_num: usize, col_num: usize) -> Self {
+        let sketch = vec![vec![0.0; col_num]; row_num];
+        Self {
+            sketch,
+            row_num,
+            col_num,
+        }
+    }
+
+    pub fn update(&mut self, key: &str, value: f64) {
+        let key_bytes = key.as_bytes();
+        // Update each row using different hash functions
+        for i in 0..self.row_num {
+            let hash_value = xxh32(key_bytes, i as u32);
+            let col_index = (hash_value as usize) % self.col_num;
+            self.sketch[i][col_index] += value;
+        }
+    }
+
+    pub fn query_key(&self, key: &str) -> f64 {
+        let key_bytes = key.as_bytes();
+        let mut min_value = f64::MAX;
+        // Query each row and take the minimum
+        for i in 0..self.row_num {
+            let hash_value = xxh32(key_bytes, i as u32);
+            let col_index = (hash_value as usize) % self.col_num;
+            min_value = min_value.min(self.sketch[i][col_index]);
+        }
+        min_value
+    }
+
+    pub fn merge(
+        accumulators: Vec<Self>,
+    ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+
+        if accumulators.len() == 1 {
+            return Ok(accumulators.into_iter().next().unwrap());
+        }
+
+        // Check that all accumulators have the same dimensions
+        let row_num = accumulators[0].row_num;
+        let col_num = accumulators[0].col_num;
+
+        for acc in &accumulators {
+            if acc.row_num != row_num || acc.col_num != col_num {
+                return Err(
+                    "Cannot merge CountMinSketch accumulators with different dimensions".into(),
+                );
+            }
+        }
+
+        let mut merged = accumulators[0].clone();
+        // Add all sketches element-wise
+        for acc in &accumulators[1..] {
+            for (merged_row, acc_row) in merged.sketch.iter_mut().zip(&acc.sketch) {
+                for (m_cell, a_cell) in merged_row.iter_mut().zip(acc_row.iter()) {
+                    *m_cell += *a_cell;
+                }
+            }
+        }
+
+        Ok(merged)
+    }
+
+    /// Merge from references, allocating only the output — no input clones.
+    pub fn merge_refs(
+        accumulators: &[&Self],
+    ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+
+        let row_num = accumulators[0].row_num;
+        let col_num = accumulators[0].col_num;
+
+        for acc in accumulators {
+            if acc.row_num != row_num || acc.col_num != col_num {
+                return Err(
+                    "Cannot merge CountMinSketch accumulators with different dimensions".into(),
+                );
+            }
+        }
+
+        let mut merged = Self::new(row_num, col_num);
+        for acc in accumulators {
+            for (merged_row, acc_row) in merged.sketch.iter_mut().zip(&acc.sketch) {
+                for (m_cell, a_cell) in merged_row.iter_mut().zip(acc_row.iter()) {
+                    *m_cell += *a_cell;
+                }
+            }
+        }
+
+        Ok(merged)
+    }
+
+    /// Serialize to MessagePack — matches the Arroyo UDF wire format exactly.
+    pub fn serialize_msgpack(&self) -> Vec<u8> {
+        // Match Arroyo UDF: countminsketch.serialize(&mut Serializer::new(&mut buf))
+        let mut buf = Vec::new();
+        self.serialize(&mut rmp_serde::Serializer::new(&mut buf))
+            .unwrap();
+        buf
+    }
+
+    /// Deserialize from MessagePack produced by the Arroyo UDF.
+    pub fn deserialize_msgpack(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        rmp_serde::from_slice(buffer).map_err(|e| {
+            format!("Failed to deserialize CountMinSketch from MessagePack: {e}").into()
+        })
+    }
+
+    /// One-shot aggregation for the Arroyo UDAF call pattern: build a sketch from
+    /// parallel key/value slices and return the msgpack bytes.
+    pub fn aggregate_count(
+        depth: usize,
+        width: usize,
+        keys: &[&str],
+        values: &[f64],
+    ) -> Option<Vec<u8>> {
+        if keys.is_empty() {
+            return None;
+        }
+        let mut sketch = Self::new(depth, width);
+        for (key, &value) in keys.iter().zip(values.iter()) {
+            sketch.update(key, value);
+        }
+        Some(sketch.serialize_msgpack())
+    }
+
+    /// Same as aggregate_count — CMS accumulates sums by construction.
+    pub fn aggregate_sum(
+        depth: usize,
+        width: usize,
+        keys: &[&str],
+        values: &[f64],
+    ) -> Option<Vec<u8>> {
+        Self::aggregate_count(depth, width, keys, values)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_count_min_sketch_creation() {
+        let cms = CountMinSketch::new(4, 1000);
+        assert_eq!(cms.row_num, 4);
+        assert_eq!(cms.col_num, 1000);
+        assert_eq!(cms.sketch.len(), 4);
+        assert_eq!(cms.sketch[0].len(), 1000);
+
+        // Check all values are initialized to 0
+        for row in &cms.sketch {
+            for &value in row {
+                assert_eq!(value, 0.0);
+            }
+        }
+    }
+
+    #[test]
+    fn test_count_min_sketch_update() {
+        let mut cms = CountMinSketch::new(2, 10);
+        cms.update("key1", 1.0);
+        // Query should return at least the updated value
+        let result = cms.query_key("key1");
+        assert!(result >= 1.0);
+    }
+
+    #[test]
+    fn test_count_min_sketch_query_empty() {
+        let cms = CountMinSketch::new(2, 10);
+        assert_eq!(cms.query_key("anything"), 0.0);
+    }
+
+    #[test]
+    fn test_count_min_sketch_merge() {
+        let mut cms1 = CountMinSketch::new(2, 3);
+        let mut cms2 = CountMinSketch::new(2, 3);
+
+        cms1.sketch[0][0] = 5.0;
+        cms1.sketch[1][2] = 10.0;
+
+        cms2.sketch[0][0] = 3.0;
+        cms2.sketch[0][1] = 7.0;
+
+        let merged = CountMinSketch::merge(vec![cms1, cms2]).unwrap();
+
+        assert_eq!(merged.sketch[0][0], 8.0); // 5 + 3
+        assert_eq!(merged.sketch[0][1], 7.0); // 0 + 7
+        assert_eq!(merged.sketch[1][2], 10.0); // 10 + 0
+    }
+
+    #[test]
+    fn test_count_min_sketch_merge_dimension_mismatch() {
+        let cms1 = CountMinSketch::new(2, 3);
+        let cms2 = CountMinSketch::new(3, 3);
+        assert!(CountMinSketch::merge(vec![cms1, cms2]).is_err());
+    }
+
+    #[test]
+    fn test_count_min_sketch_msgpack_round_trip() {
+        let mut cms = CountMinSketch::new(2, 3);
+        cms.sketch[0][1] = 42.0;
+        cms.sketch[1][2] = 100.0;
+
+        let bytes = cms.serialize_msgpack();
+        let deserialized = CountMinSketch::deserialize_msgpack(&bytes).unwrap();
+
+        assert_eq!(deserialized.row_num, 2);
+        assert_eq!(deserialized.col_num, 3);
+        assert_eq!(deserialized.sketch[0][1], 42.0);
+        assert_eq!(deserialized.sketch[1][2], 100.0);
+    }
+
+    #[test]
+    fn test_aggregate_count() {
+        let keys = ["a", "b", "a"];
+        let values = [1.0, 2.0, 3.0];
+        let bytes = CountMinSketch::aggregate_count(4, 100, &keys, &values).unwrap();
+        let cms = CountMinSketch::deserialize_msgpack(&bytes).unwrap();
+        // "a" was updated twice (1.0 + 3.0 = 4.0), "b" once (2.0)
+        assert!(cms.query_key("a") >= 4.0);
+        assert!(cms.query_key("b") >= 2.0);
+    }
+
+    #[test]
+    fn test_aggregate_count_empty() {
+        assert!(CountMinSketch::aggregate_count(4, 100, &[], &[]).is_none());
+    }
+}
diff --git a/sketch-core/src/count_min_with_heap.rs b/sketch-core/src/count_min_with_heap.rs
new file mode 100644
index 0000000..1c40ba3
--- /dev/null
+++ b/sketch-core/src/count_min_with_heap.rs
@@ -0,0 +1,355 @@
+// Adapted from QueryEngineRust/src/precompute_operators/count_min_sketch_with_heap_accumulator.rs
+// Changes:
+//   - Renamed CountMinSketchWithHeapAccumulator -> CountMinSketchWithHeap
+//   - Inner CmsData helper renamed to avoid name collision with count_min::CountMinSketch
+//   - update() takes &str instead of &KeyByLabelValues
+//   - query_key() takes &str
+//   - serialize_to_bytes (trait) -> serialize_msgpack (inherent method)
+//   - deserialize_from_bytes_arroyo -> deserialize_msgpack
+//   - merge_accumulators -> merge
+//   - Removed: deserialize_from_json, deserialize_from_bytes (legacy QE formats, stay in QE)
+//   - Removed: AggregateCore, SerializableToSink, MergeableAccumulator, MultipleSubpopulationAggregate impls
+//   - Removed: get_topk_keys (returns KeyByLabelValues — QE-specific)
+//   - Added: insert_or_update_heap helper, aggregate_topk() one-shot helper
+//
+// NOTE (bug, do not fix): QueryEngineRust uses xxhash-rust::xxh32; the Arroyo template uses
+// twox-hash::XxHash32. Bucket assignments differ, so query results will be wrong until the
+// hash crate mismatch is resolved. Tracked separately.
+
+use serde::{Deserialize, Serialize};
+use std::collections::HashSet;
+use xxhash_rust::xxh32::xxh32;
+
+/// Item in the top-k heap representing a key-value pair.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct HeapItem {
+    pub key: String,
+    pub value: f64,
+}
+
+/// Helper struct matching Arroyo's nested serialization format (inner CMS).
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct CmsData {
+    sketch: Vec<Vec<f64>>,
+    row_num: usize,
+    col_num: usize,
+}
+
+/// Helper struct matching Arroyo's serialization format (outer wrapper).
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct CountMinSketchWithHeapSerialized {
+    sketch: CmsData,
+    topk_heap: Vec<HeapItem>,
+    heap_size: usize,
+}
+
+/// Count-Min Sketch with Heap for top-k tracking.
+/// Combines probabilistic frequency counting with efficient top-k maintenance.
+#[derive(Debug, Clone)]
+pub struct CountMinSketchWithHeap {
+    pub sketch: Vec<Vec<f64>>,
+    pub row_num: usize,
+    pub col_num: usize,
+    pub topk_heap: Vec<HeapItem>,
+    pub heap_size: usize,
+}
+
+impl CountMinSketchWithHeap {
+    pub fn new(row_num: usize, col_num: usize, heap_size: usize) -> Self {
+        let sketch = vec![vec![0.0; col_num]; row_num];
+        Self {
+            sketch,
+            row_num,
+            col_num,
+            topk_heap: Vec::new(),
+            heap_size,
+        }
+    }
+
+    pub fn update(&mut self, key: &str, value: f64) {
+        let key_bytes = key.as_bytes();
+        for i in 0..self.row_num {
+            let hash_value = xxh32(key_bytes, i as u32);
+            let col_index = (hash_value as usize) % self.col_num;
+            self.sketch[i][col_index] += value;
+        }
+        self.insert_or_update_heap(key, value);
+    }
+
+    fn insert_or_update_heap(&mut self, key: &str, value: f64) {
+        if let Some(item) = self.topk_heap.iter_mut().find(|i| i.key == key) {
+            item.value += value;
+        } else if self.topk_heap.len() < self.heap_size {
+            self.topk_heap.push(HeapItem {
+                key: key.to_string(),
+                value,
+            });
+        } else if let Some(min_item) = self
+            .topk_heap
+            .iter_mut()
+            .min_by(|a, b| a.value.partial_cmp(&b.value).unwrap())
+        {
+            if value > min_item.value {
+                *min_item = HeapItem {
+                    key: key.to_string(),
+                    value,
+                };
+            }
+        }
+    }
+
+    pub fn query_key(&self, key: &str) -> f64 {
+        let key_bytes = key.as_bytes();
+        let mut min_value = f64::MAX;
+        for i in 0..self.row_num {
+            let hash_value = xxh32(key_bytes, i as u32);
+            let col_index = (hash_value as usize) % self.col_num;
+            min_value = min_value.min(self.sketch[i][col_index]);
+        }
+        min_value
+    }
+
+    pub fn merge(
+        accumulators: Vec<Self>,
+    ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+
+        if accumulators.len() == 1 {
+            return Ok(accumulators.into_iter().next().unwrap());
+        }
+
+        // Check that all accumulators have the same dimensions
+        let row_num = accumulators[0].row_num;
+        let col_num = accumulators[0].col_num;
+
+        for acc in &accumulators {
+            if acc.row_num != row_num || acc.col_num != col_num {
+                return Err(
+                    "Cannot merge CountMinSketchWithHeap accumulators with different dimensions"
+                        .into(),
+                );
+            }
+        }
+
+        // Merge the Count-Min Sketch tables element-wise
+        let mut merged_sketch = vec![vec![0.0; col_num]; row_num];
+        for acc in &accumulators {
+            for (i, row) in merged_sketch.iter_mut().enumerate() {
+                for (j, cell) in row.iter_mut().enumerate() {
+                    *cell += acc.sketch[i][j];
+                }
+            }
+        }
+
+        // Find the minimum heap size across all accumulators
+        let min_heap_size = accumulators
+            .iter()
+            .map(|acc| acc.heap_size)
+            .min()
+            .unwrap_or(0);
+
+        // Enumerate all unique keys from all heaps
+        let mut all_keys: HashSet<String> = HashSet::new();
+        for acc in &accumulators {
+            for item in &acc.topk_heap {
+                all_keys.insert(item.key.clone());
+            }
+        }
+
+        // Create a temporary merged accumulator to query frequencies
+        let temp_merged = CountMinSketchWithHeap {
+            sketch: merged_sketch.clone(),
+            row_num,
+            col_num,
+            topk_heap: Vec::new(),
+            heap_size: min_heap_size,
+        };
+
+        // Query the merged CMS for each key and build heap items
+        let mut heap_items: Vec<HeapItem> = all_keys
+            .into_iter()
+            .map(|key_str| {
+                let frequency = temp_merged.query_key(&key_str);
+                HeapItem {
+                    key: key_str,
+                    value: frequency,
+                }
+            })
+            .collect();
+
+        // Sort by frequency (descending) and take top min_heap_size items
+        heap_items.sort_by(|a, b| b.value.partial_cmp(&a.value).unwrap());
+        heap_items.truncate(min_heap_size);
+
+        Ok(CountMinSketchWithHeap {
+            sketch: merged_sketch,
+            row_num,
+            col_num,
+            topk_heap: heap_items,
+            heap_size: min_heap_size,
+        })
+    }
+
+    /// Serialize to MessagePack — matches the Arroyo UDF wire format exactly.
+    pub fn serialize_msgpack(&self) -> Vec<u8> {
+        // Match Arroyo UDF: serialize with nested MessagePack format
+        let serialized = CountMinSketchWithHeapSerialized {
+            sketch: CmsData {
+                sketch: self.sketch.clone(),
+                row_num: self.row_num,
+                col_num: self.col_num,
+            },
+            topk_heap: self.topk_heap.clone(),
+            heap_size: self.heap_size,
+        };
+
+        let mut buf = Vec::new();
+        serialized
+            .serialize(&mut rmp_serde::Serializer::new(&mut buf))
+            .unwrap();
+        buf
+    }
+
+    /// Deserialize from MessagePack produced by the Arroyo UDF.
+    pub fn deserialize_msgpack(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        let serialized: CountMinSketchWithHeapSerialized =
+            rmp_serde::from_slice(buffer).map_err(|e| {
+                format!("Failed to deserialize CountMinSketchWithHeap from MessagePack: {e}")
+            })?;
+
+        // Sort the topk_heap by value from largest to smallest
+        let mut sorted_topk_heap = serialized.topk_heap;
+        // We must sort here since the vectorized heap does not guarantee order.
+        sorted_topk_heap.sort_by(|a, b| b.value.partial_cmp(&a.value).unwrap());
+
+        Ok(Self {
+            sketch: serialized.sketch.sketch,
+            row_num: serialized.sketch.row_num,
+            col_num: serialized.sketch.col_num,
+            topk_heap: sorted_topk_heap,
+            heap_size: serialized.heap_size,
+        })
+    }
+
+    /// One-shot aggregation for the Arroyo UDAF call pattern.
+    pub fn aggregate_topk(
+        row_num: usize,
+        col_num: usize,
+        heap_size: usize,
+        keys: &[&str],
+        values: &[f64],
+    ) -> Option<Vec<u8>> {
+        if keys.is_empty() {
+            return None;
+        }
+        let mut sketch = Self::new(row_num, col_num, heap_size);
+        for (key, &value) in keys.iter().zip(values.iter()) {
+            sketch.update(key, value);
+        }
+        Some(sketch.serialize_msgpack())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_creation() {
+        let cms = CountMinSketchWithHeap::new(4, 1000, 20);
+        assert_eq!(cms.row_num, 4);
+        assert_eq!(cms.col_num, 1000);
+        assert_eq!(cms.heap_size, 20);
+        assert_eq!(cms.sketch.len(), 4);
+        assert_eq!(cms.sketch[0].len(), 1000);
+        assert_eq!(cms.topk_heap.len(), 0);
+    }
+
+    #[test]
+    fn test_query_empty() {
+        let cms = CountMinSketchWithHeap::new(2, 10, 5);
+        assert_eq!(cms.query_key("anything"), 0.0);
+    }
+
+    #[test]
+    fn test_merge() {
+        let mut cms1 = CountMinSketchWithHeap::new(2, 10, 5);
+        let mut cms2 = CountMinSketchWithHeap::new(2, 10, 3);
+
+        cms1.sketch[0][0] = 10.0;
+        cms1.sketch[1][1] = 20.0;
+        cms2.sketch[0][0] = 5.0;
+        cms2.sketch[1][1] = 15.0;
+
+        cms1.topk_heap.push(HeapItem {
+            key: "key1".to_string(),
+            value: 100.0,
+        });
+        cms1.topk_heap.push(HeapItem {
+            key: "key2".to_string(),
+            value: 50.0,
+        });
+        cms2.topk_heap.push(HeapItem {
+            key: "key3".to_string(),
+            value: 75.0,
+        });
+        cms2.topk_heap.push(HeapItem {
+            key: "key1".to_string(),
+            value: 80.0,
+        });
+
+        let merged = CountMinSketchWithHeap::merge(vec![cms1, cms2]).unwrap();
+
+        assert_eq!(merged.sketch[0][0], 15.0); // 10 + 5
+        assert_eq!(merged.sketch[1][1], 35.0); // 20 + 15
+        assert_eq!(merged.heap_size, 3); // min(5, 3)
+        assert!(merged.topk_heap.len() <= 3);
+    }
+
+    #[test]
+    fn test_merge_dimension_mismatch() {
+        let cms1 = CountMinSketchWithHeap::new(2, 10, 5);
+        let cms2 = CountMinSketchWithHeap::new(3, 10, 5);
+        assert!(CountMinSketchWithHeap::merge(vec![cms1, cms2]).is_err());
+    }
+
+    #[test]
+    fn test_msgpack_round_trip() {
+        let mut cms = CountMinSketchWithHeap::new(2, 3, 5);
+        cms.sketch[0][1] = 42.0;
+        cms.sketch[1][2] = 100.0;
+        cms.topk_heap.push(HeapItem {
+            key: "test_key".to_string(),
+            value: 99.0,
+        });
+
+        let bytes = cms.serialize_msgpack();
+        let deserialized = CountMinSketchWithHeap::deserialize_msgpack(&bytes).unwrap();
+
+        assert_eq!(deserialized.row_num, 2);
+        assert_eq!(deserialized.col_num, 3);
+        assert_eq!(deserialized.heap_size, 5);
+        assert_eq!(deserialized.sketch[0][1], 42.0);
+        assert_eq!(deserialized.sketch[1][2], 100.0);
+        assert_eq!(deserialized.topk_heap.len(), 1);
+        assert_eq!(deserialized.topk_heap[0].key, "test_key");
+        assert_eq!(deserialized.topk_heap[0].value, 99.0);
+    }
+
+    #[test]
+    fn test_aggregate_topk() {
+        let keys = ["a", "b", "a", "c"];
+        let values = [1.0, 2.0, 3.0, 0.5];
+        let bytes = CountMinSketchWithHeap::aggregate_topk(4, 100, 2, &keys, &values).unwrap();
+        let cms = CountMinSketchWithHeap::deserialize_msgpack(&bytes).unwrap();
+        assert_eq!(cms.heap_size, 2);
+        assert!(cms.topk_heap.len() <= 2);
+    }
+
+    #[test]
+    fn test_aggregate_topk_empty() {
+        assert!(CountMinSketchWithHeap::aggregate_topk(4, 100, 10, &[], &[]).is_none());
+    }
+}
diff --git a/sketch-core/src/delta_set_aggregator.rs b/sketch-core/src/delta_set_aggregator.rs
new file mode 100644
index 0000000..c086e2a
--- /dev/null
+++ b/sketch-core/src/delta_set_aggregator.rs
@@ -0,0 +1,71 @@
+// Adapted from QueryEngineRust/src/precompute_operators/delta_set_aggregator_accumulator.rs
+// Changes:
+//   - Only the wire format (DeltaResult) and its serialize/deserialize functions are extracted.
+//   - The Arroyo side uses lazy_static for stateful window tracking — that streaming logic
+//     stays in the Arroyo template and does NOT belong in sketch-core.
+//   - DeltaResult made pub (was private inline struct in QE).
+//   - serialize_msgpack / deserialize_msgpack are module-level free functions
+//     (not methods on DeltaSetAggregatorAccumulator, which stays in QE).
+//   - Removed: all QE accumulator struct/impls (stay in QE)
+
+use serde::{Deserialize, Serialize};
+use std::collections::HashSet;
+
+/// Wire format for the delta set aggregator — shared between Arroyo and QueryEngineRust.
+/// Both sides agree on `{ added: HashSet<String>, removed: HashSet<String> }` in msgpack.
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct DeltaResult {
+    pub added: HashSet<String>,
+    pub removed: HashSet<String>,
+}
+
+/// Serialize a delta result to MessagePack.
+pub fn serialize_msgpack(added: &HashSet<String>, removed: &HashSet<String>) -> Vec<u8> {
+    let result = DeltaResult {
+        added: added.clone(),
+        removed: removed.clone(),
+    };
+    let mut buf = Vec::new();
+    rmp_serde::encode::write(&mut buf, &result).unwrap();
+    buf
+}
+
+/// Deserialize a delta result from MessagePack produced by the Arroyo UDF.
+pub fn deserialize_msgpack(buffer: &[u8]) -> Result<DeltaResult, Box<dyn std::error::Error>> {
+    rmp_serde::from_slice(buffer)
+        .map_err(|e| format!("Failed to deserialize DeltaResult from MessagePack: {e}").into())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_msgpack_round_trip() {
+        let mut added = HashSet::new();
+        added.insert("web".to_string());
+        added.insert("api".to_string());
+
+        let mut removed = HashSet::new();
+        removed.insert("db".to_string());
+
+        let bytes = serialize_msgpack(&added, &removed);
+        let result = deserialize_msgpack(&bytes).unwrap();
+
+        assert_eq!(result.added.len(), 2);
+        assert!(result.added.contains("web"));
+        assert!(result.added.contains("api"));
+        assert_eq!(result.removed.len(), 1);
+        assert!(result.removed.contains("db"));
+    }
+
+    #[test]
+    fn test_empty_sets() {
+        let added = HashSet::new();
+        let removed = HashSet::new();
+        let bytes = serialize_msgpack(&added, &removed);
+        let result = deserialize_msgpack(&bytes).unwrap();
+        assert!(result.added.is_empty());
+        assert!(result.removed.is_empty());
+    }
+}
diff --git a/sketch-core/src/hydra_kll.rs b/sketch-core/src/hydra_kll.rs
new file mode 100644
index 0000000..e6888d5
--- /dev/null
+++ b/sketch-core/src/hydra_kll.rs
@@ -0,0 +1,295 @@
+// Adapted from QueryEngineRust/src/precompute_operators/hydra_kll_accumulator.rs
+// Changes:
+//   - Renamed HydraKllSketchAccumulator -> HydraKllSketch
+//   - KllSketchData import replaced by crate::kll::{KllSketch, KllSketchData}
+//   - Inner cells are KllSketch instead of DatasketchesKLLAccumulator
+//   - update() takes &str instead of &KeyByLabelValues
+//   - query_key() takes &str; renamed to query()
+//   - serialize_to_bytes (trait) -> serialize_msgpack (inherent method)
+//   - deserialize_from_bytes_arroyo -> deserialize_msgpack
+//   - merge_accumulators -> merge
+//   - Removed: deserialize_from_bytes (stub, stays in QE)
+//   - Removed: AggregateCore, SerializableToSink, MergeableAccumulator, MultipleSubpopulationAggregate impls
+//   - Removed: base64, serde_json imports (QE-specific)
+//   - Added: aggregate_hydrakll() one-shot helper
+
+use crate::kll::{KllSketch, KllSketchData};
+use serde::{Deserialize, Serialize};
+use std::cmp::Ordering;
+use xxhash_rust::xxh32::xxh32;
+
+#[derive(Serialize, Deserialize)]
+struct HydraKllSketchData {
+    row_num: usize,
+    col_num: usize,
+    sketches: Vec<Vec<KllSketchData>>,
+}
+
+#[derive(Debug, Clone)]
+pub struct HydraKllSketch {
+    pub sketch: Vec<Vec<KllSketch>>,
+    pub row_num: usize,
+    pub col_num: usize,
+}
+
+impl HydraKllSketch {
+    pub fn new(row_num: usize, col_num: usize, k: u16) -> Self {
+        let sketch = vec![vec![KllSketch::new(k); col_num]; row_num];
+        Self {
+            sketch,
+            row_num,
+            col_num,
+        }
+    }
+
+    pub fn update(&mut self, key: &str, value: f64) {
+        let key_bytes = key.as_bytes();
+        // Update each row using different hash functions
+        for i in 0..self.row_num {
+            let hash_value = xxh32(key_bytes, i as u32);
+            let col_index = (hash_value as usize) % self.col_num;
+            self.sketch[i][col_index].update(value);
+        }
+    }
+
+    pub fn query(&self, key: &str, quantile: f64) -> f64 {
+        let key_bytes = key.as_bytes();
+        let mut quantiles = Vec::with_capacity(self.row_num);
+
+        for i in 0..self.row_num {
+            let hash_value = xxh32(key_bytes, i as u32);
+            let col_index = (hash_value as usize) % self.col_num;
+            quantiles.push(self.sketch[i][col_index].get_quantile(quantile));
+        }
+
+        if quantiles.is_empty() {
+            return 0.0;
+        }
+
+        quantiles.sort_by(|a, b| match a.partial_cmp(b) {
+            Some(ordering) => ordering,
+            None => Ordering::Equal,
+        });
+
+        let mid = quantiles.len() / 2;
+        if quantiles.len() % 2 == 0 {
+            (quantiles[mid - 1] + quantiles[mid]) / 2.0
+        } else {
+            quantiles[mid]
+        }
+    }
+
+    pub fn merge(
+        accumulators: Vec<Self>,
+    ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+
+        // Check dimensions match
+        let row_num = accumulators[0].row_num;
+        let col_num = accumulators[0].col_num;
+        for acc in &accumulators {
+            if acc.row_num != row_num || acc.col_num != col_num {
+                return Err(
+                    "Cannot merge HydraKllSketch accumulators with different dimensions".into(),
+                );
+            }
+        }
+
+        // Transpose Vec<HydraKllSketch> into Vec<Vec<Vec<KllSketch>>> indexed [row][col][acc],
+        // consuming the owned accumulators so no per-cell clones are needed.
+        let mut by_cell: Vec<Vec<Vec<KllSketch>>> = (0..row_num)
+            .map(|_| (0..col_num).map(|_| Vec::new()).collect())
+            .collect();
+        for acc in accumulators {
+            for (i, row) in acc.sketch.into_iter().enumerate() {
+                for (j, cell) in row.into_iter().enumerate() {
+                    by_cell[i][j].push(cell);
+                }
+            }
+        }
+
+        // Merge each cell independently
+        let mut merged_sketch = Vec::with_capacity(row_num);
+        for row in by_cell {
+            let mut merged_row = Vec::with_capacity(col_num);
+            for cells in row {
+                merged_row.push(KllSketch::merge(cells)?);
+            }
+            merged_sketch.push(merged_row);
+        }
+
+        Ok(HydraKllSketch {
+            sketch: merged_sketch,
+            row_num,
+            col_num,
+        })
+    }
+
+    /// Serialize to MessagePack — matches the Arroyo UDF wire format exactly.
+    pub fn serialize_msgpack(&self) -> Vec<u8> {
+        let mut sketches = Vec::with_capacity(self.row_num);
+        for row in &self.sketch {
+            let mut row_data = Vec::with_capacity(self.col_num);
+            for cell in row {
+                // Serialize each KllSketch to KllSketchData
+                let cell_bytes = cell.serialize_msgpack();
+                let kll_data: KllSketchData = rmp_serde::from_slice(&cell_bytes)
+                    .expect("Failed to deserialize KllSketchData from cell");
+                row_data.push(kll_data);
+            }
+            sketches.push(row_data);
+        }
+
+        let serialized = HydraKllSketchData {
+            row_num: self.row_num,
+            col_num: self.col_num,
+            sketches,
+        };
+
+        let mut buf = Vec::new();
+        rmp_serde::encode::write(&mut buf, &serialized).unwrap();
+        buf
+    }
+
+    /// Deserialize from MessagePack produced by the Arroyo UDF.
+    pub fn deserialize_msgpack(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        let deserialized_sketch_data: HydraKllSketchData = rmp_serde::from_slice(buffer)
+            .map_err(|e| format!("Failed to deserialize HydraKLL from MessagePack: {e}"))?;
+
+        if deserialized_sketch_data.sketches.len() != deserialized_sketch_data.row_num {
+            return Err(format!(
+                "HydraKLL row count mismatch: expected {}, got {}",
+                deserialized_sketch_data.row_num,
+                deserialized_sketch_data.sketches.len()
+            )
+            .into());
+        }
+
+        let mut sketch: Vec<Vec<KllSketch>> = Vec::with_capacity(deserialized_sketch_data.row_num);
+
+        for (row_idx, row) in deserialized_sketch_data.sketches.into_iter().enumerate() {
+            if row.len() != deserialized_sketch_data.col_num {
+                return Err(format!(
+                    "HydraKLL column count mismatch in row {}: expected {}, got {}",
+                    row_idx,
+                    deserialized_sketch_data.col_num,
+                    row.len()
+                )
+                .into());
+            }
+
+            let mut accum_row: Vec<KllSketch> =
+                Vec::with_capacity(deserialized_sketch_data.col_num);
+            for cell in row {
+                let cell_bytes = rmp_serde::to_vec(&cell)
+                    .map_err(|e| format!("Failed to serialize nested KLL sketch: {e}"))?;
+                let kll = KllSketch::deserialize_msgpack(&cell_bytes)?;
+                accum_row.push(kll);
+            }
+
+            sketch.push(accum_row);
+        }
+
+        Ok(Self {
+            sketch,
+            row_num: deserialized_sketch_data.row_num,
+            col_num: deserialized_sketch_data.col_num,
+        })
+    }
+
+    /// One-shot aggregation for the Arroyo UDAF call pattern.
+    pub fn aggregate_hydrakll(
+        row_num: usize,
+        col_num: usize,
+        k: u16,
+        keys: &[&str],
+        values: &[f64],
+    ) -> Option<Vec<u8>> {
+        if keys.is_empty() {
+            return None;
+        }
+        let mut sketch = Self::new(row_num, col_num, k);
+        for (key, &value) in keys.iter().zip(values.iter()) {
+            sketch.update(key, value);
+        }
+        Some(sketch.serialize_msgpack())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_creation() {
+        let h = HydraKllSketch::new(2, 3, 200);
+        assert_eq!(h.row_num, 2);
+        assert_eq!(h.col_num, 3);
+        assert_eq!(h.sketch.len(), 2);
+        assert_eq!(h.sketch[0].len(), 3);
+    }
+
+    #[test]
+    fn test_update_and_query() {
+        let mut h = HydraKllSketch::new(2, 10, 200);
+        h.update("key1", 5.0);
+        h.update("key1", 10.0);
+        // With 2 values, median quantile should be between them
+        let q = h.query("key1", 0.5);
+        assert!(q >= 0.0);
+    }
+
+    #[test]
+    fn test_merge() {
+        let mut h1 = HydraKllSketch::new(2, 5, 200);
+        let mut h2 = HydraKllSketch::new(2, 5, 200);
+
+        for i in 1..=5 {
+            h1.update("key1", i as f64);
+        }
+        for i in 6..=10 {
+            h2.update("key1", i as f64);
+        }
+
+        let merged = HydraKllSketch::merge(vec![h1, h2]).unwrap();
+        assert_eq!(merged.row_num, 2);
+        assert_eq!(merged.col_num, 5);
+    }
+
+    #[test]
+    fn test_merge_dimension_mismatch() {
+        let h1 = HydraKllSketch::new(2, 5, 200);
+        let h2 = HydraKllSketch::new(3, 5, 200);
+        assert!(HydraKllSketch::merge(vec![h1, h2]).is_err());
+    }
+
+    #[test]
+    fn test_msgpack_round_trip() {
+        let mut h = HydraKllSketch::new(2, 3, 200);
+        h.update("key1", 5.0);
+        h.update("key2", 10.0);
+
+        let bytes = h.serialize_msgpack();
+        let deserialized = HydraKllSketch::deserialize_msgpack(&bytes).unwrap();
+
+        assert_eq!(deserialized.row_num, 2);
+        assert_eq!(deserialized.col_num, 3);
+    }
+
+    #[test]
+    fn test_aggregate_hydrakll() {
+        let keys = ["a", "b", "a"];
+        let values = [1.0, 2.0, 3.0];
+        let bytes = HydraKllSketch::aggregate_hydrakll(2, 5, 200, &keys, &values).unwrap();
+        let h = HydraKllSketch::deserialize_msgpack(&bytes).unwrap();
+        assert_eq!(h.row_num, 2);
+        assert_eq!(h.col_num, 5);
+    }
+
+    #[test]
+    fn test_aggregate_hydrakll_empty() {
+        assert!(HydraKllSketch::aggregate_hydrakll(2, 5, 200, &[], &[]).is_none());
+    }
+}
diff --git a/sketch-core/src/kll.rs b/sketch-core/src/kll.rs
new file mode 100644
index 0000000..c31f0cf
--- /dev/null
+++ b/sketch-core/src/kll.rs
@@ -0,0 +1,256 @@
+// Adapted from QueryEngineRust/src/precompute_operators/datasketches_kll_accumulator.rs
+// Changes:
+//   - Renamed DatasketchesKLLAccumulator -> KllSketch
+//   - KllSketchData made pub (used by hydra_kll)
+//   - _update -> pub update
+//   - serialize_to_bytes (trait) -> serialize_msgpack (inherent method)
+//   - deserialize_from_bytes_arroyo -> deserialize_msgpack
+//   - merge_accumulators -> merge
+//   - Removed: deserialize_from_json, deserialize_from_bytes (legacy QE formats, stay in QE)
+//   - Removed: merge_multiple (QE trait-object helper, stays in QE)
+//   - Removed: AggregateCore, SerializableToSink, MergeableAccumulator, SingleSubpopulationAggregate impls
+//   - Removed: base64, serde_json, tracing imports (QE-specific)
+//   - Added: aggregate_kll() one-shot helper
+
+use core::panic;
+use dsrs::KllDoubleSketch;
+use serde::{Deserialize, Serialize};
+
+/// Wire format used in MessagePack serialization (matches Arroyo UDF output).
+#[derive(Deserialize, Serialize)]
+pub struct KllSketchData {
+    pub k: u16,
+    pub sketch_bytes: Vec<u8>,
+}
+
+pub struct KllSketch {
+    pub k: u16,
+    pub sketch: KllDoubleSketch,
+}
+
+impl KllSketch {
+    pub fn new(k: u16) -> Self {
+        Self {
+            k,
+            sketch: KllDoubleSketch::with_k(k),
+        }
+    }
+
+    pub fn update(&mut self, value: f64) {
+        self.sketch.update(value);
+    }
+
+    pub fn get_quantile(&self, quantile: f64) -> f64 {
+        if self.sketch.get_n() == 0 {
+            return 0.0;
+        }
+        self.sketch.get_quantile(quantile)
+    }
+
+    pub fn merge(
+        accumulators: Vec<Self>,
+    ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+
+        // check K values for all and merge
+        let k = accumulators[0].k;
+        for acc in &accumulators {
+            if acc.k != k {
+                return Err("Cannot merge KllSketch with different k values".into());
+            }
+        }
+
+        let mut merged = KllSketch::new(k);
+        for accumulator in accumulators {
+            merged.sketch.merge(&accumulator.sketch);
+        }
+
+        Ok(merged)
+    }
+
+    /// Serialize to MessagePack — matches the Arroyo UDF wire format exactly.
+    pub fn serialize_msgpack(&self) -> Vec<u8> {
+        // Create KllSketchData compatible with deserialize_msgpack()
+        // This matches exactly what the Arroyo UDF does
+        let sketch_data = self.sketch.serialize();
+        let serialized = KllSketchData {
+            k: self.k,
+            sketch_bytes: sketch_data.as_ref().to_vec(),
+        };
+
+        let mut buf = Vec::new();
+        match rmp_serde::encode::write(&mut buf, &serialized) {
+            Ok(_) => buf,
+            Err(_) => {
+                panic!("Failed to serialize KllSketchData to MessagePack");
+            }
+        }
+    }
+
+    /// Deserialize from MessagePack produced by the Arroyo UDF.
+    pub fn deserialize_msgpack(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        let deserialized_sketch_data: KllSketchData = rmp_serde::from_slice(buffer)
+            .map_err(|e| format!("Failed to deserialize KllSketchData from MessagePack: {e}"))?;
+
+        let sketch: KllDoubleSketch =
+            KllDoubleSketch::deserialize(&deserialized_sketch_data.sketch_bytes)
+                .map_err(|e| format!("Failed to deserialize KLL sketch: {e}"))?;
+
+        Ok(Self {
+            k: deserialized_sketch_data.k,
+            sketch,
+        })
+    }
+
+    /// Merge from references without cloning — possible because KllDoubleSketch::merge
+    /// takes &other (the underlying C++ merge API is borrow-based).
+    pub fn merge_refs(
+        sketches: &[&Self],
+    ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+        if sketches.is_empty() {
+            return Err("No sketches to merge".into());
+        }
+        let k = sketches[0].k;
+        for s in sketches {
+            if s.k != k {
+                return Err("Cannot merge KllSketch with different k values".into());
+            }
+        }
+        let mut merged = Self::new(k);
+        for s in sketches {
+            merged.sketch.merge(&s.sketch);
+        }
+        Ok(merged)
+    }
+
+    /// Deserialize from a raw datasketches byte buffer (legacy Flink/FlinkSketch format).
+    /// Used by QE's legacy deserializers to avoid a direct dsrs dependency there.
+    pub fn from_dsrs_bytes(bytes: &[u8], k: u16) -> Result<Self, Box<dyn std::error::Error>> {
+        let sketch = KllDoubleSketch::deserialize(bytes)
+            .map_err(|e| format!("Failed to deserialize KLL sketch from dsrs bytes: {e}"))?;
+        Ok(Self { k, sketch })
+    }
+
+    /// One-shot aggregation for the Arroyo UDAF call pattern.
+    pub fn aggregate_kll(k: u16, values: &[f64]) -> Option<Vec<u8>> {
+        if values.is_empty() {
+            return None;
+        }
+        let mut sketch = Self::new(k);
+        for &value in values {
+            sketch.update(value);
+        }
+        Some(sketch.serialize_msgpack())
+    }
+}
+
+// Manual trait implementations since the C++ library doesn't provide them
+impl Clone for KllSketch {
+    fn clone(&self) -> Self {
+        let bytes = self.sketch.serialize();
+        let new_sketch = KllDoubleSketch::deserialize(bytes.as_ref()).unwrap();
+        Self {
+            k: self.k,
+            sketch: new_sketch,
+        }
+    }
+}
+
+impl std::fmt::Debug for KllSketch {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("KllSketch")
+            .field("k", &self.k)
+            .field("sketch_n", &self.sketch.get_n())
+            .finish()
+    }
+}
+
+// TODO: verify this
+// Thread safety: The C++ library is not thread-safe by default, but since we're using it
+// in a single-threaded context per accumulator instance and only sharing read-only operations,
+// this should be safe. The actual sketch data is immutable once created.
+unsafe impl Send for KllSketch {}
+unsafe impl Sync for KllSketch {}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_kll_creation() {
+        let kll = KllSketch::new(200);
+        assert!(kll.sketch.get_n() == 0);
+        assert_eq!(kll.k, 200);
+    }
+
+    #[test]
+    fn test_kll_update() {
+        let mut kll = KllSketch::new(200);
+        kll.update(10.0);
+        kll.update(20.0);
+        kll.update(15.0);
+        assert_eq!(kll.sketch.get_n(), 3);
+    }
+
+    #[test]
+    fn test_kll_quantile() {
+        let mut kll = KllSketch::new(200);
+        for i in 1..=10 {
+            kll.update(i as f64);
+        }
+        assert_eq!(kll.get_quantile(0.0), 1.0);
+        assert_eq!(kll.get_quantile(1.0), 10.0);
+        assert_eq!(kll.get_quantile(0.5), 6.0);
+    }
+
+    #[test]
+    fn test_kll_merge() {
+        let mut kll1 = KllSketch::new(200);
+        let mut kll2 = KllSketch::new(200);
+
+        for i in 1..=5 {
+            kll1.update(i as f64);
+        }
+        for i in 6..=10 {
+            kll2.update(i as f64);
+        }
+
+        let merged = KllSketch::merge(vec![kll1, kll2]).unwrap();
+        assert_eq!(merged.sketch.get_n(), 10);
+        assert_eq!(merged.get_quantile(0.0), 1.0);
+        assert_eq!(merged.get_quantile(1.0), 10.0);
+    }
+
+    #[test]
+    fn test_msgpack_round_trip() {
+        let mut kll = KllSketch::new(200);
+        for i in 1..=5 {
+            kll.update(i as f64);
+        }
+
+        let bytes = kll.serialize_msgpack();
+        let deserialized = KllSketch::deserialize_msgpack(&bytes).unwrap();
+
+        assert_eq!(deserialized.k, 200);
+        assert_eq!(deserialized.sketch.get_n(), 5);
+        assert_eq!(deserialized.get_quantile(0.0), 1.0);
+        assert_eq!(deserialized.get_quantile(1.0), 5.0);
+    }
+
+    #[test]
+    fn test_aggregate_kll() {
+        let values = [1.0, 2.0, 3.0, 4.0, 5.0];
+        let bytes = KllSketch::aggregate_kll(200, &values).unwrap();
+        let kll = KllSketch::deserialize_msgpack(&bytes).unwrap();
+        assert_eq!(kll.sketch.get_n(), 5);
+        assert_eq!(kll.get_quantile(0.0), 1.0);
+        assert_eq!(kll.get_quantile(1.0), 5.0);
+    }
+
+    #[test]
+    fn test_aggregate_kll_empty() {
+        assert!(KllSketch::aggregate_kll(200, &[]).is_none());
+    }
+}
diff --git a/sketch-core/src/lib.rs b/sketch-core/src/lib.rs
new file mode 100644
index 0000000..461d43e
--- /dev/null
+++ b/sketch-core/src/lib.rs
@@ -0,0 +1,6 @@
+pub mod count_min;
+pub mod count_min_with_heap;
+pub mod delta_set_aggregator;
+pub mod hydra_kll;
+pub mod kll;
+pub mod set_aggregator;
diff --git a/sketch-core/src/set_aggregator.rs b/sketch-core/src/set_aggregator.rs
new file mode 100644
index 0000000..c745f28
--- /dev/null
+++ b/sketch-core/src/set_aggregator.rs
@@ -0,0 +1,152 @@
+// Adapted from QueryEngineRust/src/precompute_operators/set_aggregator_accumulator.rs
+// Changes:
+//   - Renamed SetAggregatorAccumulator -> SetAggregator
+//   - values field is now HashSet<String> instead of HashSet<KeyByLabelValues>
+//   - add_key(&str) instead of add_key(KeyByLabelValues)
+//   - serialize_msgpack / deserialize_msgpack use StringSet { values: HashSet<String> }
+//     wire format matching the Arroyo setaggregator_ UDF exactly (same as DeltaResult pattern)
+//   - merge_accumulators -> merge
+//   - Removed: deserialize_from_json, deserialize_from_bytes, deserialize_from_bytes_arroyo
+//     (QE-specific / buggy legacy formats stay in QE)
+//   - Removed: AggregateCore, SerializableToSink, MergeableAccumulator, MultipleSubpopulationAggregate impls
+//   - Removed: with_added (QE-specific constructor)
+
+use serde::{Deserialize, Serialize};
+use std::collections::HashSet;
+
+/// Set aggregator for tracking a set of unique string keys.
+/// Wire format: StringSet { values: HashSet<String> } in MessagePack — matches Arroyo setaggregator_ UDF.
+#[derive(Debug, Clone)]
+pub struct SetAggregator {
+    pub values: HashSet<String>,
+}
+
+impl SetAggregator {
+    pub fn new() -> Self {
+        Self {
+            values: HashSet::new(),
+        }
+    }
+
+    pub fn insert(&mut self, key: &str) {
+        self.values.insert(key.to_string());
+    }
+
+    pub fn merge(
+        accumulators: Vec<Self>,
+    ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+        if accumulators.is_empty() {
+            return Err("No accumulators to merge".into());
+        }
+
+        let mut merged = SetAggregator::new();
+        for accumulator in accumulators {
+            merged.values.extend(accumulator.values);
+        }
+
+        Ok(merged)
+    }
+
+    /// Serialize to MessagePack — matches the Arroyo setaggregator_ UDF wire format exactly:
+    /// StringSet { values: HashSet<String> } as a msgpack map.
+    pub fn serialize_msgpack(&self) -> Vec<u8> {
+        #[derive(Serialize)]
+        struct StringSet<'a> {
+            values: &'a HashSet<String>,
+        }
+        let wrapper = StringSet {
+            values: &self.values,
+        };
+        let mut buf = Vec::new();
+        rmp_serde::encode::write(&mut buf, &wrapper).unwrap();
+        buf
+    }
+
+    /// Deserialize from MessagePack produced by the Arroyo setaggregator_ UDF.
+    pub fn deserialize_msgpack(buffer: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
+        #[derive(Deserialize)]
+        struct StringSet {
+            values: HashSet<String>,
+        }
+        let wrapper: StringSet = rmp_serde::from_slice(buffer)
+            .map_err(|e| format!("Failed to deserialize SetAggregator from MessagePack: {e}"))?;
+        Ok(Self {
+            values: wrapper.values,
+        })
+    }
+}
+
+impl Default for SetAggregator {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_creation() {
+        let sa = SetAggregator::new();
+        assert!(sa.values.is_empty());
+    }
+
+    #[test]
+    fn test_insert() {
+        let mut sa = SetAggregator::new();
+        sa.insert("web");
+        sa.insert("api");
+        sa.insert("web"); // duplicate
+        assert_eq!(sa.values.len(), 2);
+        assert!(sa.values.contains("web"));
+        assert!(sa.values.contains("api"));
+    }
+
+    #[test]
+    fn test_merge() {
+        let mut sa1 = SetAggregator::new();
+        let mut sa2 = SetAggregator::new();
+
+        sa1.insert("web");
+        sa1.insert("api");
+        sa2.insert("api"); // duplicate
+        sa2.insert("db");
+
+        let merged = SetAggregator::merge(vec![sa1, sa2]).unwrap();
+        assert_eq!(merged.values.len(), 3);
+        assert!(merged.values.contains("web"));
+        assert!(merged.values.contains("api"));
+        assert!(merged.values.contains("db"));
+    }
+
+    #[test]
+    fn test_msgpack_round_trip() {
+        let mut sa = SetAggregator::new();
+        sa.insert("web");
+        sa.insert("api");
+
+        let bytes = sa.serialize_msgpack();
+        let deserialized = SetAggregator::deserialize_msgpack(&bytes).unwrap();
+
+        assert_eq!(deserialized.values.len(), 2);
+        assert!(deserialized.values.contains("web"));
+        assert!(deserialized.values.contains("api"));
+    }
+
+    #[test]
+    fn test_msgpack_matches_arroyo_format() {
+        // Verify wire format is StringSet { values: [...] } not a plain array.
+        // Arroyo's setaggregator_.rs serializes StringSet { values: HashSet<String> }.
+        #[derive(Deserialize)]
+        struct StringSet {
+            values: HashSet<String>,
+        }
+        let mut sa = SetAggregator::new();
+        sa.insert("a");
+        let bytes = sa.serialize_msgpack();
+        let decoded: StringSet =
+            rmp_serde::from_slice(&bytes).expect("should decode as StringSet { values: ... }");
+        assert!(decoded.values.contains("a"));
+    }
+}