diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..397172d --- /dev/null +++ b/.flake8 @@ -0,0 +1,11 @@ +[flake8] +max-line-length = 88 +extend-ignore = E203, E501, W503 +exclude = + .git, + __pycache__, + .venv, + .eggs, + *.egg, + dist, + build diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 0000000..1191162 --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,57 @@ +name: Docker Builds + +on: + push: + branches: [ main ] + paths: + - 'CommonDependencies/installation/**' + - 'Controller/**' + - 'PrometheusClient/**' + - '.github/workflows/docker.yml' + pull_request: + branches: [ main ] + paths: + - 'CommonDependencies/installation/**' + - 'Controller/**' + - 'PrometheusClient/**' + - '.github/workflows/docker.yml' + workflow_dispatch: + +jobs: + build-images: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build base image + working-directory: CommonDependencies/installation + run: | + docker build \ + -t sketchdb-base:latest \ + -f Dockerfile \ + .. + + - name: Test base image + run: | + docker run --rm sketchdb-base:latest python --version + docker run --rm sketchdb-base:latest pip list + + - name: Build Controller Docker image + working-directory: Controller + run: | + docker build \ + -t sketchdb-controller:latest \ + -f Dockerfile \ + . + + - name: Build PrometheusClient Docker image + working-directory: PrometheusClient + run: | + docker build \ + -t sketchdb-prometheus-client:latest \ + -f Dockerfile \ + . diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml new file mode 100644 index 0000000..fc0e377 --- /dev/null +++ b/.github/workflows/python.yml @@ -0,0 +1,248 @@ +name: Python Projects CI + +on: + push: + branches: [ main ] + paths: + - 'ArroyoSketch/**' + - 'PrometheusClient/**' + - 'Controller/**' + - 'Utilities/**' + - 'PrometheusExporters/**' + - 'ExecutionUtilities/**' + - 'CommonDependencies/dependencies/py/**' + - '.github/workflows/python.yml' + pull_request: + branches: [ main ] + paths: + - 'ArroyoSketch/**' + - 'PrometheusClient/**' + - 'Controller/**' + - 'Utilities/**' + - 'PrometheusExporters/**' + - 'ExecutionUtilities/**' + - 'CommonDependencies/dependencies/py/**' + - '.github/workflows/python.yml' + workflow_dispatch: + +permissions: + contents: read + pull-requests: read + +jobs: + detect-changes: + runs-on: ubuntu-latest + outputs: + arroyo_sketch: ${{ steps.filter.outputs.arroyo_sketch }} + prometheus_client: ${{ steps.filter.outputs.prometheus_client }} + controller: ${{ steps.filter.outputs.controller }} + utilities: ${{ steps.filter.outputs.utilities }} + prometheus_exporters: ${{ steps.filter.outputs.prometheus_exporters }} + execution_utilities: ${{ steps.filter.outputs.execution_utilities }} + steps: + - uses: actions/checkout@v4 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + arroyo_sketch: + - 'ArroyoSketch/**' + - 'CommonDependencies/dependencies/py/**' + prometheus_client: + - 'PrometheusClient/**' + - 'CommonDependencies/dependencies/py/**' + controller: + - 'Controller/**' + - 'CommonDependencies/dependencies/py/**' + utilities: + - 'Utilities/**' + - 'CommonDependencies/dependencies/py/**' + prometheus_exporters: + - 'PrometheusExporters/**' + - 'CommonDependencies/dependencies/py/**' + execution_utilities: + - 'ExecutionUtilities/**' + - 'CommonDependencies/dependencies/py/**' + + test-arroyo-sketch: + needs: detect-changes + if: needs.detect-changes.outputs.arroyo_sketch == 'true' + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install black==24.8.0 flake8==6.1.0 + if [ -f ArroyoSketch/requirements.txt ]; then pip install -r ArroyoSketch/requirements.txt; fi + - name: Check formatting with Black + working-directory: ArroyoSketch + run: black --check --diff . + - name: Lint with flake8 + working-directory: ArroyoSketch + run: | + # Stop the build if there are Python syntax errors or undefined names + flake8 . --config=../.flake8 --count --select=E9,F63,F7,F82 --show-source --statistics + # Exit-zero treats all errors as warnings + flake8 . --config=../.flake8 --count --exit-zero --max-complexity=10 --statistics + + test-prometheus-client: + needs: detect-changes + if: needs.detect-changes.outputs.prometheus_client == 'true' + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install black==24.8.0 flake8==6.1.0 mypy types-requests types-PyYAML typing-extensions numpy prometheus-client urllib3 + if [ -f PrometheusClient/requirements.txt ]; then pip install -r PrometheusClient/requirements.txt; fi + - name: Check formatting with Black + working-directory: PrometheusClient + run: black --check --diff . + - name: Lint with flake8 + working-directory: PrometheusClient + run: | + # Stop the build if there are Python syntax errors or undefined names + flake8 . --config=../.flake8 --count --select=E9,F63,F7,F82 --show-source --statistics + # Exit-zero treats all errors as warnings + flake8 . --config=../.flake8 --count --exit-zero --max-complexity=10 --statistics + - name: Type check with mypy + working-directory: PrometheusClient + run: mypy . + + test-controller: + needs: detect-changes + if: needs.detect-changes.outputs.controller == 'true' + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install black==24.8.0 flake8==6.1.0 + if [ -f Controller/requirements.txt ]; then pip install -r Controller/requirements.txt; fi + - name: Check formatting with Black + working-directory: Controller + run: black --check --diff . + - name: Lint with flake8 + working-directory: Controller + run: | + # Stop the build if there are Python syntax errors or undefined names + flake8 . --config=../.flake8 --count --select=E9,F63,F7,F82 --show-source --statistics + # Exit-zero treats all errors as warnings + flake8 . --config=../.flake8 --count --exit-zero --max-complexity=10 --statistics + + test-utilities: + needs: detect-changes + if: needs.detect-changes.outputs.utilities == 'true' + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install black==24.8.0 flake8==6.1.0 + if [ -f Utilities/requirements.txt ]; then pip install -r Utilities/requirements.txt; fi + - name: Check formatting with Black + working-directory: Utilities + run: black --check --diff . + - name: Lint with flake8 + working-directory: Utilities + run: | + # Stop the build if there are Python syntax errors or undefined names + flake8 . --config=../.flake8 --count --select=E9,F63,F7,F82 --show-source --statistics + # Exit-zero treats all errors as warnings + flake8 . --config=../.flake8 --count --exit-zero --max-complexity=10 --statistics + + test-prometheus-exporters: + needs: detect-changes + if: needs.detect-changes.outputs.prometheus_exporters == 'true' + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install black==24.8.0 flake8==6.1.0 mypy isort + if [ -f PrometheusExporters/requirements.txt ]; then pip install -r PrometheusExporters/requirements.txt; fi + - name: Check formatting with Black + working-directory: PrometheusExporters + run: black --check --diff . + - name: Check import sorting with isort + working-directory: PrometheusExporters + run: isort --check-only --diff --settings-file .isort.cfg . + - name: Lint with flake8 + working-directory: PrometheusExporters + run: | + # Stop the build if there are Python syntax errors or undefined names + flake8 . --config=../.flake8 --count --select=E9,F63,F7,F82 --show-source --statistics + # Exit-zero treats all errors as warnings + flake8 . --config=../.flake8 --count --exit-zero --max-complexity=10 --statistics + - name: Type check with mypy + working-directory: PrometheusExporters + run: mypy . --config-file=.mypy.ini + + test-execution-utilities: + needs: detect-changes + if: needs.detect-changes.outputs.execution_utilities == 'true' + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install black==24.8.0 flake8==6.1.0 + if [ -f ExecutionUtilities/requirements.txt ]; then pip install -r ExecutionUtilities/requirements.txt; fi + - name: Check formatting with Black + working-directory: ExecutionUtilities + run: black --check --diff . + - name: Lint with flake8 + working-directory: ExecutionUtilities + run: | + # Stop the build if there are Python syntax errors or undefined names + flake8 . --config=../.flake8 --count --select=E9,F63,F7,F82 --show-source --statistics + # Exit-zero treats all errors as warnings + flake8 . --config=../.flake8 --count --exit-zero --max-complexity=10 --statistics diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml new file mode 100644 index 0000000..143865b --- /dev/null +++ b/.github/workflows/rust.yml @@ -0,0 +1,151 @@ +name: Rust Projects CI + +on: + push: + branches: [ main ] + paths: + - 'QueryEngineRust/**' + - 'CommonDependencies/dependencies/rs/**' + - 'CommonDependencies/tests/**' + - 'sketch-core/**' + - '.github/workflows/rust.yml' + pull_request: + branches: [ main ] + paths: + - 'QueryEngineRust/**' + - 'CommonDependencies/dependencies/rs/**' + - 'CommonDependencies/tests/**' + - 'sketch-core/**' + - '.github/workflows/rust.yml' + workflow_dispatch: + +env: + CARGO_TERM_COLOR: always + +jobs: + format-and-lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + # - name: Configure git for private dependencies + # run: git config --global url."https://x-access-token:${{ secrets.PRIVATE_REPO_TOKEN }}@github.com/".insteadOf "https://github.com/" + + # - name: Clone sketchlib-rust + # run: git clone https://github.com/ProjectASAP/sketchlib-rust.git + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + components: rustfmt, clippy + + - name: Run sccache-cache + uses: mozilla-actions/sccache-action@v0.0.4 + + - name: Cache cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + QueryEngineRust/target + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + + - name: Check formatting + run: cargo fmt -- --check + working-directory: QueryEngineRust + + - name: Check formatting (sketch_db_common) + run: cargo fmt -- --check + working-directory: CommonDependencies/dependencies/rs/sketch_db_common + + - name: Run clippy + run: cargo clippy --all-targets --all-features -- -D warnings + working-directory: QueryEngineRust + env: + RUSTC_WRAPPER: sccache + + - name: Check formatting (sketch-core) + run: cargo fmt -- --check + working-directory: sketch-core + + - name: Run clippy (sketch-core) + run: cargo clippy --all-targets --all-features -- -D warnings + working-directory: sketch-core + env: + RUSTC_WRAPPER: sccache + + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + # - name: Configure git for private dependencies + # run: git config --global url."https://x-access-token:${{ secrets.PRIVATE_REPO_TOKEN }}@github.com/".insteadOf "https://github.com/" + + # - name: Clone sketchlib-rust + # run: git clone https://github.com/ProjectASAP/sketchlib-rust.git + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + + - name: Run sccache-cache + uses: mozilla-actions/sccache-action@v0.0.4 + + - name: Cache cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + QueryEngineRust/target + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + + - name: Run QueryEngineRust tests + run: cargo test + working-directory: QueryEngineRust + env: + RUSTC_WRAPPER: sccache + + - name: Run sql_utilities tests + run: cargo test + working-directory: CommonDependencies/dependencies/rs/sql_utilities + env: + RUSTC_WRAPPER: sccache + + - name: Run promql_utilities tests + run: cargo test + working-directory: CommonDependencies/dependencies/rs/promql_utilities + env: + RUSTC_WRAPPER: sccache + + - name: Run sketch-core tests + run: cargo test + working-directory: sketch-core + - name: Run sketch_db_common tests + run: cargo test + working-directory: CommonDependencies/dependencies/rs/sketch_db_common + env: + RUSTC_WRAPPER: sccache + + docker: + runs-on: ubuntu-latest + needs: [format-and-lint, test] + steps: + - uses: actions/checkout@v4 + + # - name: Configure git for private dependencies + # run: git config --global url."https://x-access-token:${{ secrets.PRIVATE_REPO_TOKEN }}@github.com/".insteadOf "https://github.com/" + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build Docker image + run: docker build -f QueryEngineRust/Dockerfile -t sketchdb-queryengine-rust:latest . + # run: | + # echo "${{ secrets.PRIVATE_REPO_TOKEN }}" > /tmp/git_token + # docker build --secret id=git_token,src=/tmp/git_token -f QueryEngineRust/Dockerfile -t sketchdb-queryengine-rust:latest . + # rm -f /tmp/git_token diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fa17294 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +target/ +experiment_outputs/ + +# Private repo, vendored locally until open sourced (see GitHub issue) +sketchlib-rust/ + +# Runtime and generated files +metadata/ +preprocessed_configs/ +status +uuid +store/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..fed70d5 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,179 @@ +repos: + # General hooks for all file types + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + - id: check-case-conflict + - id: check-merge-conflict + - id: check-executables-have-shebangs + + # Docker linting + - repo: https://github.com/hadolint/hadolint + rev: v2.12.0 + hooks: + - id: hadolint-docker + files: ^.*Dockerfile.*$ + args: ['--ignore', 'DL3008'] + + # Shell script linting + - repo: https://github.com/shellcheck-py/shellcheck-py + rev: v0.9.0.5 + hooks: + - id: shellcheck + files: \.(sh|bash)$ + + # Python formatting and linting + - repo: https://github.com/psf/black + rev: 24.8.0 + hooks: + - id: black + language_version: python3 + files: \.(py|pyi)$ + + - repo: https://github.com/pycqa/isort + rev: 5.13.2 # last version of isort that supports python 3.8 + hooks: + - id: isort + name: isort (python) + files: ^PrometheusExporters/.*\.py$ + args: ["--settings-file", "PrometheusExporters/.isort.cfg"] + + - repo: https://github.com/pycqa/flake8 + rev: 6.1.0 + hooks: + - id: flake8 + files: \.(py|pyi)$ + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: "v1.14.1" + hooks: + - id: mypy + files: ^PrometheusExporters/.*\.py$ + args: [--config-file=PrometheusExporters/.mypy.ini] + + - id: mypy + files: ^PrometheusClient/ + args: [--config-file=PrometheusClient/pyproject.toml] + additional_dependencies: + - types-requests + - types-PyYAML + - typing-extensions + - numpy + - prometheus-client + - urllib3 + + # Rust formatting and linting + - repo: local + hooks: + - id: cargo-fmt + name: cargo fmt + description: Format Rust files with rustfmt. + entry: bash -c 'cargo fmt --manifest-path QueryEngineRust/Cargo.toml -- --check' + language: system + files: ^QueryEngineRust/.*\.rs$ + pass_filenames: false + + - id: cargo-check + name: cargo check + description: Check the package for errors. + entry: bash -c 'cargo check --manifest-path QueryEngineRust/Cargo.toml --all' + language: system + files: ^QueryEngineRust/.*\.rs$ + pass_filenames: false + + - id: cargo-clippy + name: cargo clippy + description: Lint Rust sources + entry: bash -c 'cargo clippy --manifest-path QueryEngineRust/Cargo.toml --all-targets --all-features -- -D warnings' + language: system + files: ^QueryEngineRust/.*\.rs$ + pass_filenames: false + + - id: cargo-fmt-datafusion-summary-library + name: cargo fmt (datafusion_summary_library) + description: Format datafusion_summary_libraryRust files with rustfmt. + entry: bash -c 'cargo fmt --manifest-path CommonDependencies/dependencies/rs/datafusion_summary_library/Cargo.toml -- --check' + language: system + files: ^CommonDependencies/dependencies/rs/datafusion_summary_library/.*\.rs$ + pass_filenames: false + + - id: cargo-test-datafusion-summary-library + name: cargo test (datafusion_summary_library) + description: Run datafusion_summary_library tests. + entry: bash -c 'cargo test --manifest-path CommonDependencies/dependencies/rs/datafusion_summary_library/Cargo.toml' + language: system + files: ^CommonDependencies/dependencies/rs/datafusion_summary_library/.*\.rs$ + pass_filenames: false + + - id: cargo-fmt-sql-utilities + name: cargo fmt (sql_utilities) + description: Format sql_utilities Rust files with rustfmt. + entry: bash -c 'cargo fmt --manifest-path CommonDependencies/dependencies/rs/sql_utilities/Cargo.toml -- --check' + language: system + files: ^CommonDependencies/dependencies/rs/sql_utilities/.*\.rs$ + pass_filenames: false + + - id: cargo-test-sql-utilities + name: cargo test (sql_utilities) + description: Run sql_utilities tests. + entry: bash -c 'cargo test --manifest-path CommonDependencies/dependencies/rs/sql_utilities/Cargo.toml' + language: system + files: ^CommonDependencies/dependencies/rs/sql_utilities/.*\.rs$ + pass_filenames: false + + - id: cargo-fmt-promql-utilities + name: cargo fmt (promql_utilities) + description: Format promql_utilities Rust files with rustfmt. + entry: bash -c 'cargo fmt --manifest-path CommonDependencies/dependencies/rs/promql_utilities/Cargo.toml -- --check' + language: system + files: ^CommonDependencies/dependencies/rs/promql_utilities/.*\.rs$ + pass_filenames: false + + - id: cargo-test-promql-utilities + name: cargo test (promql_utilities) + description: Run promql_utilities tests. + entry: bash -c 'cargo test --manifest-path CommonDependencies/dependencies/rs/promql_utilities/Cargo.toml' + language: system + files: ^CommonDependencies/dependencies/rs/promql_utilities/.*\.rs$ + pass_filenames: false + - id: cargo-fmt-sketch-core + name: cargo fmt (sketch-core) + description: Format sketch-core Rust files with rustfmt. + entry: bash -c 'cargo fmt --manifest-path sketch-core/Cargo.toml -- --check' + language: system + files: ^sketch-core/.*\.rs$ + pass_filenames: false + + - id: cargo-clippy-sketch-core + name: cargo clippy (sketch-core) + description: Lint sketch-core Rust sources. + entry: bash -c 'cargo clippy --manifest-path sketch-core/Cargo.toml --all-targets --all-features -- -D warnings' + language: system + files: ^sketch-core/.*\.rs$ + pass_filenames: false + + - id: cargo-test-sketch-core + name: cargo test (sketch-core) + description: Run sketch-core tests. + entry: bash -c 'cargo test --manifest-path sketch-core/Cargo.toml' + language: system + files: ^sketch-core/.*\.rs$ + - id: cargo-fmt-sketch-db-common + name: cargo fmt (sketch_db_common) + description: Format sketch_db_common Rust files with rustfmt. + entry: bash -c 'cargo fmt --manifest-path CommonDependencies/dependencies/rs/sketch_db_common/Cargo.toml -- --check' + language: system + files: ^CommonDependencies/dependencies/rs/sketch_db_common/.*\.rs$ + pass_filenames: false + + - id: cargo-test-sketch-db-common + name: cargo test (sketch_db_common) + description: Run sketch_db_common tests. + entry: bash -c 'cargo test --manifest-path CommonDependencies/dependencies/rs/sketch_db_common/Cargo.toml' + language: system + files: ^CommonDependencies/dependencies/rs/sketch_db_common/.*\.rs$ + pass_filenames: false diff --git a/.shellcheckrc b/.shellcheckrc new file mode 100644 index 0000000..022d394 --- /dev/null +++ b/.shellcheckrc @@ -0,0 +1,4 @@ +# Disable warnings about not following external sources +# SC1090: Can't follow non-constant source (e.g., source ~/.bashrc) +# SC1091: Not following sourced files that are external/generated (e.g., nvm.sh, cargo env) +disable=SC1090,SC1091 diff --git a/ArroyoSketch/.gitignore b/ArroyoSketch/.gitignore new file mode 100644 index 0000000..f7ee054 --- /dev/null +++ b/ArroyoSketch/.gitignore @@ -0,0 +1,3 @@ +__pycache__ +**/*.pyc +**/*.swp diff --git a/ArroyoSketch/Dockerfile b/ArroyoSketch/Dockerfile new file mode 100644 index 0000000..259e004 --- /dev/null +++ b/ArroyoSketch/Dockerfile @@ -0,0 +1,25 @@ +FROM sketchdb-base:latest + +LABEL maintainer="SketchDB Team" +LABEL description="ArroyoSketch pipeline configuration service" + +# Set working directory +WORKDIR /app + +# Install Python dependencies +RUN pip3 install --no-cache-dir jinja2 requests loguru pyyaml + +# Copy application code +COPY classes/ ./classes/ +COPY utils/ ./utils/ +COPY templates/ ./templates/ +COPY examples/ ./examples/ +COPY run_arroyosketch.py . +COPY delete_pipeline.py . +COPY validate_udfs.py . + +# Create output directory +RUN mkdir -p /app/output + +# Set the entry point +ENTRYPOINT ["python", "run_arroyosketch.py"] diff --git a/ArroyoSketch/LICENSE b/ArroyoSketch/LICENSE new file mode 100644 index 0000000..404d657 --- /dev/null +++ b/ArroyoSketch/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 SketchDB + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/ArroyoSketch/README.md b/ArroyoSketch/README.md new file mode 100644 index 0000000..576b181 --- /dev/null +++ b/ArroyoSketch/README.md @@ -0,0 +1,113 @@ +# ArroyoSketch + +ArroyoSketch is the pipeline configurator that creates Arroyo streaming pipelines from configuration files. + +## Purpose + +Given `streaming_config.yaml` (generated by Controller), ArroyoSketch: +1. Renders SQL query templates using Jinja2 +2. Creates Arroyo pipelines via REST API +3. Configures sketch-building UDFs with parameters +4. Sets up connections to Kafka for sketch output + +This automation eliminates manual pipeline creation and ensures consistency with Controller decisions. + +## How It Works + +### Input: streaming_config.yaml + +The Controller generates this file describing which sketches to build: + +**TODO** + +### Process: Render and Deploy + +**TODO** + +## Key Files + +### Entry Point + +**TODO** + +### Templates + +**TODO** + +### Validation + +- **`validate_udfs.py`** - UDF validation script + - Checks if UDFs are available in Arroyo + - Validates UDF signatures match usage + - Run before creating pipelines + +## Running Locally + +### Basic Usage + +```bash +python run_arroyosketch.py +``` + +**Expects:** +- `streaming_config.yaml` in current directory +- Arroyo running on `http://localhost:5115` (default) +- Kafka running on `localhost:9092` (default) + +### With Custom Config + +```bash +python run_arroyosketch.py \ + --config /path/to/streaming_config.yaml \ + --arroyo-url http://arroyo:5115 \ + --kafka-bootstrap kafka:9092 +``` + +### Dry-Run Mode + +Preview generated SQL without creating pipelines: + +```bash +python run_arroyosketch.py --dry-run +``` + +## Testing + +### Validate UDFs + +Before creating pipelines, verify that UDFs can compile against Arroyo: + +```bash +python validate_udfs.py +``` + +### Integration Test + +1. Start Arroyo: +```bash +cd arroyo +docker compose up +``` + +2. Run ArroyoSketch: +```bash +python run_arroyosketch.py --config test_config.yaml +``` + +3. Verify pipeline created: +```bash +curl http://localhost:5115/api/pipelines +# Should show your pipeline +``` + +4. Check pipeline is running: +```bash +# In Arroyo UI: http://localhost:5115 +# Navigate to Pipelines → See your pipeline status +``` + +## Extending + +### Adding a New Template + +**TODO** diff --git a/ArroyoSketch/arroyo-compose.yml b/ArroyoSketch/arroyo-compose.yml new file mode 100644 index 0000000..5e63756 --- /dev/null +++ b/ArroyoSketch/arroyo-compose.yml @@ -0,0 +1,11 @@ +services: + arroyo: + image: ghcr.io/projectasap/asap-arroyo:${ARROYO_VERSION:-v0.1.0} + container_name: sketchdb-arroyo + network_mode: host + volumes: + - ./config.yaml:/config.yaml + command: ["--config", "/config.yaml", "cluster"] + environment: + - ARROYO__API__RUN_HTTP_PORT=5115 + restart: no diff --git a/ArroyoSketch/arroyosketch-cli-compose.yml.j2 b/ArroyoSketch/arroyosketch-cli-compose.yml.j2 new file mode 100644 index 0000000..5421433 --- /dev/null +++ b/ArroyoSketch/arroyosketch-cli-compose.yml.j2 @@ -0,0 +1,30 @@ +services: + arroyosketch: + build: + context: {{ arroyosketch_dir }} + container_name: {{ container_name }} + hostname: arroyosketch + networks: + - asap-network + command: + - "--config_file_path=/controller-output/streaming_config.yaml" + - "--source_type=prometheus_remote_write" + - "--prometheus_base_port={{ prometheus_base_port }}" + - "--prometheus_path={{ prometheus_path }}" + - "--prometheus_bind_ip={{ prometheus_bind_ip }}" + - "--parallelism={{ parallelism }}" + - "--output_kafka_topic={{ output_kafka_topic }}" + - "--output_format={{ output_format }}" + - "--pipeline_name={{ pipeline_name }}" + - "--output_dir=/arroyosketch-output" + - "--arroyo_url={{ arroyo_url }}" + - "--bootstrap_servers={{ bootstrap_servers }}" + volumes: + - {{ controller_output_dir }}:/controller-output:ro + - {{ arroyosketch_output_dir }}:/arroyosketch-output + depends_on: + controller: + condition: service_completed_successfully + arroyo: + condition: service_healthy + restart: "no" # Init container - runs once and exits diff --git a/ArroyoSketch/config.yaml b/ArroyoSketch/config.yaml new file mode 100644 index 0000000..6388be9 --- /dev/null +++ b/ArroyoSketch/config.yaml @@ -0,0 +1,2 @@ +compiler: + use-local-udf-crate: true diff --git a/ArroyoSketch/delete_pipeline.py b/ArroyoSketch/delete_pipeline.py new file mode 100644 index 0000000..e493037 --- /dev/null +++ b/ArroyoSketch/delete_pipeline.py @@ -0,0 +1,50 @@ +import argparse + +from utils import arroyo_utils + + +def main(args): + # http_utils.make_api_request( + # url=f"{args.arroyo_url}/pipelines/{args.pipeline_id}", + # method="patch", + # data=json.dumps({"stop": "immediate"}), + # ) + # http_utils.make_api_request( + # url=f"{args.arroyo_url}/pipelines/{args.pipeline_id}", + # method="delete", + # ) + + if not args.pipeline_id and not args.all_pipelines: + raise ValueError("You must specify either --pipeline_id or --all_pipelines.") + + pipeline_ids = [] + if args.pipeline_id: + pipeline_ids = [args.pipeline_id] + elif args.all_pipelines: + pipeline_ids = arroyo_utils.get_all_pipelines(arroyo_url=args.arroyo_url) + + arroyo_utils.stop_and_delete_pipelines( + arroyo_url=args.arroyo_url, pipeline_ids=pipeline_ids + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Delete a pipeline.") + + parser.add_argument( + "--pipeline_id", + type=str, + required=False, + help="The ID of the pipeline to delete.", + ) + parser.add_argument( + "--all_pipelines", action="store_true", help="Delete all pipelines." + ) + parser.add_argument( + "--arroyo_url", + default="http://localhost:5115/api/v1", + help="URL of the Arroyo API server", + ) + + args = parser.parse_args() + main(args) diff --git a/ArroyoSketch/examples/configs/streaming_config.yaml b/ArroyoSketch/examples/configs/streaming_config.yaml new file mode 100644 index 0000000..e42bef5 --- /dev/null +++ b/ArroyoSketch/examples/configs/streaming_config.yaml @@ -0,0 +1,74 @@ +aggregations: +- aggregationId: 1 + aggregationSubType: sum + aggregationType: MultipleSum + labels: + aggregated: + # - instance + # - job + - label_0 + - label_1 + - label_2 + grouping: + - instance + - job + rollup: [] + metric: fake_metric_total + parameters: {} + spatialFilter: '' + tumblingWindowSize: 10 +# - aggregationId: 2 +# aggregationSubType: '' +# aggregationType: MultipleIncrease +# labels: +# aggregated: +# - instance +# - job +# - label_0 +# - label_1 +# - label_2 +# grouping: [] +# rollup: [] +# metric: fake_metric_total +# parameters: {} +# spatialFilter: '' +# tumblingWindowSize: 10 +# - aggregationId: 3 +# aggregationSubType: sum +# aggregationType: MultipleSum +# labels: +# aggregated: +# - instance +# - job +# grouping: [] +# rollup: +# - label_0 +# - label_1 +# - label_2 +# metric: fake_metric_total +# parameters: {} +# spatialFilter: '' +# tumblingWindowSize: 10 +- aggregationId: 4 + aggregationSubType: sum + aggregationType: MultipleSum + labels: + aggregated: + - label_0 + grouping: [] + rollup: + - instance + - job + - label_1 + - label_2 + metric: fake_metric_total + parameters: {} + spatialFilter: '' + tumblingWindowSize: 10 +metrics: + fake_metric_total: + - instance + - job + - label_0 + - label_1 + - label_2 diff --git a/ArroyoSketch/examples/configs/test_promql_streaming_config.yaml b/ArroyoSketch/examples/configs/test_promql_streaming_config.yaml new file mode 100644 index 0000000..1edd171 --- /dev/null +++ b/ArroyoSketch/examples/configs/test_promql_streaming_config.yaml @@ -0,0 +1,28 @@ +metrics: + fake_metric_total: + - instance + - job + - label_0 + - label_1 + - label_2 +aggregations: +- aggregationId: 1 + aggregationSubType: '' + aggregationType: DatasketchesKLL + labels: + aggregated: [] + grouping: + - instance + - job + - label_0 + - label_1 + - label_2 + rollup: [] + metric: fake_metric_total + parameters: + K: 20 + slideInterval: 60 + spatialFilter: '' + tumblingWindowSize: 60 + windowSize: 60 + windowType: tumbling diff --git a/ArroyoSketch/examples/configs/test_sql_streaming_config.yaml b/ArroyoSketch/examples/configs/test_sql_streaming_config.yaml new file mode 100644 index 0000000..8dc260f --- /dev/null +++ b/ArroyoSketch/examples/configs/test_sql_streaming_config.yaml @@ -0,0 +1,28 @@ +tables: +- name: metrics_table + time_column: time + value_columns: + - cpu_usage + - memory_usage + metadata_columns: + - hostname + - datacenter +aggregations: + - aggregationId: 1 + table_name: metrics_table + value_column: cpu_usage + aggregationSubType: '' + aggregationType: DatasketchesKLL + labels: + grouping: + - datacenter + aggregated: [] + rollup: + - hostname + parameters: + K: 20 + slideInterval: 1 + spatialFilter: '' + tumblingWindowSize: 1 + windowSize: 1 + windowType: tumbling diff --git a/ArroyoSketch/examples/inputs/fake_metric_total_10.json b/ArroyoSketch/examples/inputs/fake_metric_total_10.json new file mode 100644 index 0000000..49b080d --- /dev/null +++ b/ArroyoSketch/examples/inputs/fake_metric_total_10.json @@ -0,0 +1,9 @@ +{"timestamp": 1744164268.348, "value": 8112599.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} +{"timestamp": 1744164273.348, "value": 16029022.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} +{"timestamp": 1744164278.348, "value": 23866149.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} +{"timestamp": 1744164283.348, "value": 31757738.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} +{"timestamp": 1744164288.348, "value": 39597673.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} +{"timestamp": 1744164293.348, "value": 47164135.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} +{"timestamp": 1744164298.348, "value": 54908613.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} +{"timestamp": 1744164303.348, "value": 63114376.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} +{"timestamp": 1744164308.348, "value": 70839602.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} diff --git a/ArroyoSketch/examples/inputs/fake_metric_total_10_2.json b/ArroyoSketch/examples/inputs/fake_metric_total_10_2.json new file mode 100644 index 0000000..085cc1b --- /dev/null +++ b/ArroyoSketch/examples/inputs/fake_metric_total_10_2.json @@ -0,0 +1,9 @@ +{"timestamp": 1744164268, "value": 8112599.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} +{"timestamp": 1744164273, "value": 16029022.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} +{"timestamp": 1744164278, "value": 23866149.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} +{"timestamp": 1744164283, "value": 31757738.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} +{"timestamp": 1744164288, "value": 39597673.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} +{"timestamp": 1744164293, "value": 47164135.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} +{"timestamp": 1744164298, "value": 54908613.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} +{"timestamp": 1744164303, "value": 63114376.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} +{"timestamp": 1744164308, "value": 70839602.0, "metric_name": "fake_metric_total", "__name__": "fake_metric_total", "instance": "10.10.1.2:50000", "job": "fake_exporter", "label_0": "value_0_value_0", "label_1": "value_1_value_0", "label_2": "value_2_value_0"} diff --git a/ArroyoSketch/examples/inputs/value.json b/ArroyoSketch/examples/inputs/value.json new file mode 100644 index 0000000..3ea6445 --- /dev/null +++ b/ArroyoSketch/examples/inputs/value.json @@ -0,0 +1,3 @@ +{"value": "abc"} +{"value": "def"} +{"value": "ghi"} diff --git a/ArroyoSketch/examples/json/connection_profile.json b/ArroyoSketch/examples/json/connection_profile.json new file mode 100644 index 0000000..9ebe430 --- /dev/null +++ b/ArroyoSketch/examples/json/connection_profile.json @@ -0,0 +1,10 @@ +{ + "name": "default-kafka-config", + "connector": "kafka", + "config": { + "authentication": {}, + "bootstrapServers": "localhost:9092", + "name": "default-kafka-config", + "schemaRegistryEnum": {} + } +} diff --git a/ArroyoSketch/examples/json/connection_table.json b/ArroyoSketch/examples/json/connection_table.json new file mode 100644 index 0000000..08b505e --- /dev/null +++ b/ArroyoSketch/examples/json/connection_table.json @@ -0,0 +1,118 @@ +{ + "name": "arroyo_input_source", + "connector": "kafka", + "tableType": "source", + "config": { + "topic": "arroyo_input", + "type": { + "offset": "latest", + "read_mode": "read_uncommitted" + } + }, + "schema": { + "format": { + "json": { + "confluentSchemaRegistry": false, + "schemaId": null, + "includeSchema": false, + "debezium": false, + "unstructured": false, + "timestampFormat": "rfc3339" + } + }, + "badData": { + "fail": {} + }, + "framing": null, + "structName": null, + "fields": [ + { + "fieldName": "labels", + "fieldType": { + "type": { + "struct": { + "name": null, + "fields": [ + { + "fieldName": "application_name", + "fieldType": { + "type": { + "primitive": "String" + }, + "sqlName": "TEXT" + }, + "nullable": false, + "metadataKey": null + }, + { + "fieldName": "hostname", + "fieldType": { + "type": { + "primitive": "String" + }, + "sqlName": "TEXT" + }, + "nullable": false, + "metadataKey": null + }, + { + "fieldName": "location", + "fieldType": { + "type": { + "primitive": "String" + }, + "sqlName": "TEXT" + }, + "nullable": false, + "metadataKey": null + } + ] + } + }, + "sqlName": null + }, + "nullable": false, + "metadataKey": null + }, + { + "fieldName": "name", + "fieldType": { + "type": { + "primitive": "String" + }, + "sqlName": "TEXT" + }, + "nullable": false, + "metadataKey": null + }, + { + "fieldName": "timestamp", + "fieldType": { + "type": { + "primitive": "UnixNanos" + }, + "sqlName": "TIMESTAMP" + }, + "nullable": false, + "metadataKey": null + }, + { + "fieldName": "value", + "fieldType": { + "type": { + "primitive": "F64" + }, + "sqlName": "DOUBLE" + }, + "nullable": false, + "metadataKey": null + } + ], + "definition": { + "json_schema": "{\n \"type\": \"object\",\n \"required\": [\"labels\", \"value\", \"name\", \"timestamp\"],\n \"properties\": {\n \"labels\": {\n \"type\": \"object\",\n \"required\": [\"hostname\", \"location\", \"application_name\"],\n \"properties\": {\n \"hostname\": {\n \"type\": \"string\",\n \"description\": \"Host identifier\"\n },\n \"location\": {\n \"type\": \"string\",\n \"description\": \"Geographic or data center location\"\n },\n \"application_name\": {\n \"type\": \"string\",\n \"description\": \"Name of the application being monitored\"\n }\n },\n \"additionalProperties\": false\n },\n \"value\": {\n \"type\": \"number\",\n \"description\": \"Metric value\"\n },\n \"name\": {\n \"type\": \"string\",\n \"description\": \"Metric name\"\n },\n \"timestamp\": {\n \"type\": \"string\",\n \"format\": \"date-time\",\n \"description\": \"Time when the metric was recorded, in RFC 3339 format\"\n }\n },\n \"additionalProperties\": false\n}" + }, + "inferred": null, + "primaryKeys": [] + }, + "connectionProfileId": "cp_VzUf2EQ43R" +} diff --git a/ArroyoSketch/examples/json/connection_table_sink.json b/ArroyoSketch/examples/json/connection_table_sink.json new file mode 100644 index 0000000..1970fdb --- /dev/null +++ b/ArroyoSketch/examples/json/connection_table_sink.json @@ -0,0 +1,32 @@ +{ + "name": "arroyo_output_sink", + "connector": "kafka", + "tableType": "sink", + "config": { + "topic": "arroyo_output", + "type": { + "commit_mode": "at_least_once" + } + }, + "schema": { + "format": { + "json": { + "confluentSchemaRegistry": false, + "schemaId": null, + "includeSchema": false, + "debezium": false, + "unstructured": false, + "timestampFormat": "rfc3339" + } + }, + "badData": { + "fail": {} + }, + "framing": null, + "structName": null, + "fields": [], + "inferred": true, + "primaryKeys": [] + }, + "connectionProfileId": "cp_aBUB3tVozb" +} diff --git a/ArroyoSketch/examples/json/pipeline.json b/ArroyoSketch/examples/json/pipeline.json new file mode 100644 index 0000000..cb2685a --- /dev/null +++ b/ArroyoSketch/examples/json/pipeline.json @@ -0,0 +1,19 @@ +{ + "name": "p4", + "query": "-- INSERT INTO arroyo_output_sink\n-- SELECT COUNT(*) as count, labels.hostname as hostname, TUMBLE(INTERVAL '5 seconds') as window\n-- FROM arroyo_input_source\n-- GROUP BY hostname, window\n\nINSERT INTO arroyo_output_sink\nSELECT COUNT(*) as count, labels.hostname as hostname, TUMBLE(INTERVAL '5 seconds') as window, my_hashmap(value, string_to_hash(labels.location)) as hashmap\nFROM arroyo_input_source\nGROUP BY hostname, window", + "udfs": [ + { + "definition": "use arroyo_udf_plugin::udf;\n\n// #[udf]\n// fn my_median(mut args: Vec) -> Option {\n// if args.is_empty() {\n// return None;\n// }\n\n// args.sort();\n\n// let mid = args.len() / 2;\n// if args.len() % 2 == 0 {\n// Some((args[mid] + args[mid - 1]) as f64 / 2.0)\n// } else {\n// Some(args[mid] as f64)\n// }\n// }\n\n#[udf]\nfn my_median(mut args: Vec) -> Option {\n // Filter out NaN values\n args.retain(|x| !x.is_nan());\n \n if args.is_empty() {\n return None;\n }\n \n args.sort_by(|a, b| a.partial_cmp(b).unwrap()); // Safe now, no NaNs\n\n let mid = args.len() / 2;\n if args.len() % 2 == 0 {\n Some((args[mid] + args[mid - 1]) / 2.0)\n } else {\n Some(args[mid])\n }\n}", + "language": "rust" + }, + { + "definition": "\n/*\n[dependencies]\nbincode = \"1.3\"\n*/\n\nuse arroyo_udf_plugin::udf;\nuse std::collections::HashMap;\n\n#[udf]\nfn my_hashmap(mut values: Vec, keys: Vec) -> Option> {\n // Create a new hashmap to store the count of each name\n let mut name_counts: HashMap = HashMap::new();\n \n // Iterate through the keys and update the count for each name\n for key in keys {\n *name_counts.entry(key).or_insert(0) += 1;\n }\n \n // Serialize the hashmap to bytes using bincode\n bincode::serialize(&name_counts).ok()\n}", + "language": "rust" + }, + { + "definition": "\n/*\n[dependencies]\nahash = \"0.8.6\"\n*/\n\nuse arroyo_udf_plugin::udf;\nuse ahash::AHasher;\nuse std::hash::{Hash, Hasher};\n\n#[udf]\nfn string_to_hash(input: &str) -> u64 {\n let mut hasher = AHasher::default();\n input.hash(&mut hasher);\n hasher.finish()\n}", + "language": "rust" + } + ], + "parallelism": 1 +} diff --git a/ArroyoSketch/examples/sql/create_fake_metric.sql b/ArroyoSketch/examples/sql/create_fake_metric.sql new file mode 100644 index 0000000..e0430b6 --- /dev/null +++ b/ArroyoSketch/examples/sql/create_fake_metric.sql @@ -0,0 +1,47 @@ +CREATE TABLE your_table ( + timestamp DOUBLE, + value DOUBLE, + metric_name TEXT, + __name__ TEXT, + instance TEXT, + job TEXT, + label_0 TEXT, + label_1 TEXT, + label_2 TEXT +) WITH ( + connector = 'filesystem', + type = 'source', + path = '/Users/milindsrivastava/Desktop/cmu/research/sketch_db_for_prometheus/code/arroyo_files/inputs/', + format = 'json', + 'source.regex-pattern' = 'fake_metric_total_10\.json' +); +CREATE TABLE output_table ( + sums DOUBLE, + instance TEXT, + job TEXT, + label_0 TEXT, + label_1 TEXT, + label_2 TEXT +) WITH ( + connector = 'filesystem', + type = 'sink', + path = '/Users/milindsrivastava/Desktop/cmu/research/sketch_db_for_prometheus/code/arroyo_files/outputs/', + format = 'json' +); +INSERT INTO output_table +SELECT + SUM(value) as sums, + instance, + job, + label_0, + label_1, + label_2 +FROM your_table +WHERE __name__ = 'fake_metric_total' +GROUP BY + TUMBLE(INTERVAL '5 seconds'), + instance, + job, + label_0, + label_1, + label_2; diff --git a/ArroyoSketch/examples/sql/create_value.sql b/ArroyoSketch/examples/sql/create_value.sql new file mode 100644 index 0000000..a9b2505 --- /dev/null +++ b/ArroyoSketch/examples/sql/create_value.sql @@ -0,0 +1,19 @@ +CREATE TABLE your_table ( + value TEXT +) WITH ( + connector = 'filesystem', + type = 'source', + path = '/Users/milindsrivastava/Desktop/cmu/research/sketch_db_for_prometheus/code/arroyo_files/inputs/', + format = 'json', + 'source.regex-pattern' = 'value\.json' +); +CREATE TABLE output_table ( + value TEXT +) WITH ( + connector = 'filesystem', + type = 'sink', + path = '/Users/milindsrivastava/Desktop/cmu/research/sketch_db_for_prometheus/code/arroyo_files/outputs/', + format = 'json' +); +INSERT INTO output_table +SELECT value FROM your_table; diff --git a/ArroyoSketch/examples/sql/tumbling_window.sql b/ArroyoSketch/examples/sql/tumbling_window.sql new file mode 100644 index 0000000..1603672 --- /dev/null +++ b/ArroyoSketch/examples/sql/tumbling_window.sql @@ -0,0 +1,4 @@ +INSERT INTO arroyo_output_sink +SELECT COUNT(*) as count, labels.hostname as hostname, TUMBLE(INTERVAL '5 seconds') as window +FROM arroyo_input_source +GROUP BY hostname, window diff --git a/ArroyoSketch/installation/setup_dependencies.sh b/ArroyoSketch/installation/setup_dependencies.sh new file mode 100755 index 0000000..9842e04 --- /dev/null +++ b/ArroyoSketch/installation/setup_dependencies.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +sudo apt-get install -y python3-pip + +pip3 install --user jinja2 diff --git a/ArroyoSketch/run_arroyosketch.py b/ArroyoSketch/run_arroyosketch.py new file mode 100644 index 0000000..2ee7d61 --- /dev/null +++ b/ArroyoSketch/run_arroyosketch.py @@ -0,0 +1,1081 @@ +import os +import json +import yaml +import argparse +from loguru import logger +from jinja2 import Template +from typing import Tuple, List + +from utils import arroyo_utils, http_utils, jinja_utils +from promql_utilities.streaming_config.MetricConfig import MetricConfig +from promql_utilities.streaming_config.SQLTableConfig import SQLTableConfig, TableSchema +from promql_utilities.streaming_config.StreamingAggregationConfig import ( + StreamingAggregationConfig, +) + + +def check_args(args): + if args.output_file_path: + raise NotImplementedError("Output file path is not implemented yet") + + # Validate source type specific parameters + if args.source_type == "kafka": + if args.input_kafka_topic is None: + raise ValueError("Input Kafka topic is required when using Kafka source") + if args.kafka_input_format != "json": + raise NotImplementedError( + "Kafka input format {} is not implemented yet".format( + args.kafka_input_format + ) + ) + elif args.source_type == "prometheus_remote_write": + if args.prometheus_base_port is None: + raise ValueError( + "Prometheus base port is required when using prometheus_remote_write source" + ) + if args.prometheus_path is None: + raise ValueError( + "Prometheus path is required when using prometheus_remote_write source" + ) + if args.prometheus_bind_ip is None: + raise ValueError( + "Prometheus bind IP is required when using prometheus_remote_write source" + ) + elif args.source_type == "file": + if args.input_file_path is None: + raise ValueError("Input file path is required when using file source") + if args.file_format is None: + raise ValueError("--file_format is required when using file source") + if args.ts_format is None: + raise ValueError("--ts_format is required when using file source") + if args.query_language != "sql": + raise ValueError( + "File source only supports --query_language sql, got: {}".format( + args.query_language + ) + ) + + if args.output_kafka_topic is None: + raise ValueError("Output Kafka topic is required") + + if args.output_format != "json": + raise NotImplementedError( + "Output format {} is not implemented yet".format(args.output_format) + ) + + +def create_connection_profile(args, template_dir) -> str: + """Create a connection profile JSON based on template""" + template = jinja_utils.load_template(template_dir, "connection_profile.j2") + + rendered = template.render( + profile_name=args.profile_name, bootstrap_servers=args.bootstrap_servers + ) + + # Save to file + output_path = os.path.join(args.output_dir, "connection_profile.json") + with open(output_path, "w") as f: + f.write(rendered) + + print(f"Created connection profile at: {output_path}") + + if args.dry_run: + # Generate a dummy profile ID for dry run + profile_id = "dry_run_profile_id" + print(f"[DRY RUN] Would create connection profile with ID: {profile_id}") + return profile_id + + # If API URL provided, create connection profile via API + response = http_utils.create_arroyo_resource( + arroyo_url=args.arroyo_url, + endpoint="connection_profiles", + data=rendered, + resource_type="connection profile", + ) + profile_id = json.loads(response).get("id") + + return profile_id + + +def delete_connection_profile(args): + if args.dry_run: + print( + f"[DRY RUN] Would delete connection profiles with name: {args.profile_name}" + ) + return + + # list all connection profiles + response = http_utils.make_api_request( + url=f"{args.arroyo_url}/connection_profiles", + method="get", + ) + response = json.loads(response) + + # get the ID of the connection profile with the name args.profile_name + profiles = [ + profile for profile in response["data"] if profile["name"] == args.profile_name + ] + if len(profiles) == 0: + print(f"No connection profile found with name {args.profile_name}") + return + + # delete the connection profile with the ID + for profile in profiles: + http_utils.make_api_request( + url=f"{args.arroyo_url}/connection_profiles/{profile['id']}", + method="delete", + ) + + +def create_source_connection_table( + args, + topic_name, + table_name, + profile_id, + metric_labels: List[str], + template_dir, + query_language: str, + metrics_dict=None, + table_schema: TableSchema = None, +): + """Create a connection table JSON (source) based on template + + Args: + metrics_dict: For optimized source only. Dictionary mapping metric names to their label lists. + e.g., {"cpu_usage": ["instance", "job"], "memory_usage": ["instance", "node"]} + query_language: "promql" or "sql" - determines schema structure + table_schema: For SQL mode, the TableSchema for this table + """ + + # Select template based on source type and query language + if args.source_type == "kafka": + if query_language == "sql": + template_name = "connection_table_kafka_sql.j2" + else: + template_name = "connection_table_kafka.j2" + elif args.source_type == "prometheus_remote_write": + if args.prometheus_remote_write_source == "optimized": + template_name = "connection_table_prometheus_remote_write_optimized.j2" + else: + template_name = "connection_table_prometheus_remote_write.j2" + elif args.source_type == "file": + template_name = "connection_table_file.j2" + else: + raise ValueError(f"Unsupported source type: {args.source_type}") + + template = jinja_utils.load_template(template_dir, template_name) + + # Create JSON schema definition for label fields + label_properties = {} + label_fields_json = [] + + for field in metric_labels: + # Add field to JSON schema properties + label_properties[field] = {"type": "string", "description": f"{field} label"} + + # Add field to fields array for schema + label_fields_json.append( + { + "fieldName": field, + "fieldType": {"type": {"primitive": "String"}, "sqlName": "TEXT"}, + "nullable": False, + "metadataKey": None, + } + ) + + # Generate the complete JSON schema definition + json_schema = { + "type": "object", + "required": ["labels", "value", "name", "timestamp"], + "properties": { + "labels": { + "type": "object", + "required": metric_labels, + "properties": label_properties, + "additionalProperties": False, + }, + "value": {"type": "number", "description": "Metric value"}, + "name": {"type": "string", "description": "Metric name"}, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "Time when the metric was recorded, in RFC 3339 format", + }, + }, + "additionalProperties": False, + } + + if args.source_type == "kafka": + json_schema["properties"]["timestamp"] = { + "type": "string", + "format": "date-time", + "description": "Time when the metric was recorded, in RFC 3339 format", + } + elif args.source_type == "prometheus_remote_write": + json_schema["properties"]["timestamp"] = { + "type": "integer", + "description": "Unix timestamp in milliseconds when the metric was recorded", + } + + template_vars = { + "table_name": table_name, + "label_fields": label_fields_json, + "json_schema": json.dumps(json_schema, indent=2) + .replace("\n", "\\n") + .replace('"', '\\"'), + } + + if args.source_type == "kafka": + template_vars["topic_name"] = topic_name + template_vars["profile_id"] = profile_id + + # For SQL mode, override template_vars with flat schema + if query_language == "sql" and table_schema is not None: + sql_json_schema = build_sql_json_schema(table_schema) + template_vars = { + "table_name": table_name, + "topic_name": topic_name, + "profile_id": profile_id, + "time_column": table_schema.time_column, + "value_columns": table_schema.value_columns, + "metadata_columns": table_schema.metadata_columns, + "json_schema": json.dumps(sql_json_schema, indent=2) + .replace("\n", "\\n") + .replace('"', '\\"'), + } + elif args.source_type == "prometheus_remote_write": + template_vars["base_port"] = args.prometheus_base_port + template_vars["parallelism"] = args.parallelism + template_vars["path"] = args.prometheus_path + template_vars["bind_ip"] = args.prometheus_bind_ip + + # For optimized source, build metrics array from metrics_dict + if args.prometheus_remote_write_source == "optimized": + if metrics_dict is None: + raise ValueError("metrics_dict is required for optimized source") + + # Build metrics array: [{"name": "cpu_usage", "labels": ["instance", "job"]}, ...] + metrics_array = [ + {"name": metric_name, "labels": labels} + for metric_name, labels in metrics_dict.items() + ] + template_vars["metrics_json"] = json.dumps(metrics_array) + del template_vars["label_fields"] + # # Create a minimal JSON schema (won't be used by connector but required by API) + # minimal_schema = { + # "type": "object", + # "properties": { + # "metric_name": {"type": "string"}, + # "timestamp": {"type": "integer"}, + # "value": {"type": "number"}, + # }, + # } + # template_vars["json_schema"] = ( + # json.dumps(minimal_schema, indent=2) + # .replace("\n", "\\n") + # .replace('"', '\\"') + # ) + elif args.source_type == "file": + # NOTE: Currently assumes value_columns are F64/DOUBLE and metadata_columns are String/TEXT. + # If more precise type mappings are needed, extend SQLTableConfig with per-column type info. + ts_format_to_primitive = { + "unix_millis": "UnixMillis", + "unix_seconds": "UnixMillis", + "rfc3339": "UnixNanos", + } + template_vars = { + "table_name": table_name, + "file_path": args.input_file_path, + "file_format": args.file_format, + "timestamp_field": table_schema.time_column, + "ts_format": args.ts_format, + "time_column": table_schema.time_column, + "timestamp_primitive": ts_format_to_primitive[args.ts_format], + "value_columns": table_schema.value_columns, + "metadata_columns": table_schema.metadata_columns, + } + + rendered = template.render(**template_vars) + + # Save to file + filename = "connection_table_source.json" + output_path = os.path.join(args.output_dir, filename) + with open(output_path, "w") as f: + f.write(rendered) + + print(f"Created source table at: {output_path}") + + if args.dry_run: + print(f"[DRY RUN] Would create source connection table: {table_name}") + return + + # If API URL provided, create connection table via API + http_utils.create_arroyo_resource( + arroyo_url=args.arroyo_url, + endpoint="connection_tables", + data=rendered, + resource_type="source table", + ) + + +def create_sink_connection_table( + args, + topic_name, + table_name, + profile_id, + template_dir, +): + """Create a connection table JSON (sink) based on template""" + + template = jinja_utils.load_template(template_dir, "connection_table_sink.j2") + + rendered = template.render( + table_name=table_name, topic_name=topic_name, profile_id=profile_id + ) + + # Save to file + filename = "connection_table_sink.json" + output_path = os.path.join(args.output_dir, filename) + with open(output_path, "w") as f: + f.write(rendered) + + print(f"Created sink table at: {output_path}") + + if args.dry_run: + print(f"[DRY RUN] Would create sink connection table: {table_name}") + return + + # If API URL provided, create connection table via API + http_utils.create_arroyo_resource( + arroyo_url=args.arroyo_url, + endpoint="connection_tables", + data=rendered, + resource_type="sink table", + ) + + +def delete_connection_table(args, table_name): + if args.dry_run: + print(f"[DRY RUN] Would delete connection table: {table_name}") + return + + # list all connection tables + response = http_utils.make_api_request( + url=f"{args.arroyo_url}/connection_tables", + method="get", + ) + response = json.loads(response) + + # get the ID of the connection table with table_name + tables = [table for table in response["data"] if table["name"] == table_name] + if len(tables) == 0: + print(f"No connection table found with name {table_name}") + return + + # delete the connection table with the ID + for table in tables: + http_utils.make_api_request( + url=f"{args.arroyo_url}/connection_tables/{table['id']}", + method="delete", + ) + + +def create_pipeline( + args: argparse.Namespace, + sql_queries: List[str], + agg_functions_with_params: List[Tuple[str, dict]], + streaming_aggregation_configs: List, + json_template_dir: str, + udf_dir: str, +): + """Create a pipeline JSON based on template""" + + # Escape newlines in SQL query for JSON compatibility + sql_queries = [sql_query.replace("\n", "\\n") for sql_query in sql_queries] + sql_query = "\\n\\n".join(sql_queries) + + # UDFs handling + udfs = [] + # NOTE: if we're using Arroyo built from source (v0.15.0-dev), we can directly support &str arguments in UDAFs, and thus don't need string_to_hash + # udf_names = list(set(agg_functions)) + ["string_to_hash"] + unique_agg_functions = list( + set([agg_func for agg_func, _ in agg_functions_with_params]) + ) + udf_names = unique_agg_functions + ["gzip_compress"] + # udf_names = list(set(agg_functions)) + + # Create a mapping of agg_function to parameters for UDF rendering + agg_function_params = {} + for agg_func, params in agg_functions_with_params: + if agg_func not in agg_function_params: + agg_function_params[agg_func] = params + + # Special handling for deltasetaggregator - need separate UDF instances per aggregation_id + deltasetaggregator_instances = [] + for config in streaming_aggregation_configs: + if config.aggregationType.lower() == "deltasetaggregator": + deltasetaggregator_instances.append(config.aggregationId) + + for udf_name in udf_names: + # Special case for deltasetaggregator - generate separate UDF for each aggregation_id + if udf_name == "deltasetaggregator_": + for aggregation_id in deltasetaggregator_instances: + template_path = os.path.join(udf_dir, f"{udf_name}.rs.j2") + + if os.path.exists(template_path): + # Render the Jinja template with aggregation_id + udf_template = jinja_utils.load_template( + udf_dir, f"{udf_name}.rs.j2" + ) + udf_body = udf_template.render(aggregation_id=aggregation_id) + udfs.append({"definition": udf_body, "language": "rust"}) + else: + raise FileNotFoundError( + f"Template {template_path} not found for deltasetaggregator" + ) + else: + # Regular UDF processing for non-deltasetaggregator UDFs + template_path = os.path.join(udf_dir, f"{udf_name}.rs.j2") + regular_path = os.path.join(udf_dir, f"{udf_name}.rs") + + # Get parameters for this UDF + params = agg_function_params.get(udf_name, {}) + + if len(params) > 0 and not os.path.exists(template_path): + raise ValueError( + f"UDF {udf_name} requires parameters {params} but no template found at {template_path}" + ) + + if os.path.exists(template_path): + # Read template source and get required parameters + with open(template_path, "r") as file: + template_source = file.read() + + # Render the Jinja template with parameters + udf_template = jinja_utils.load_template(udf_dir, f"{udf_name}.rs.j2") + + # Get all required template variables + required_params = jinja_utils.get_template_variables( + template_source, udf_template.environment + ) + + # Handle config key mapping (K -> k for KLL) + if "K" in params and "k" in required_params: + params["k"] = params["K"] + + # Check that all required parameters are provided + missing_params = required_params - set(params.keys()) + if missing_params: + raise ValueError( + f"UDF {udf_name} requires parameters {missing_params} but they were not in the configuration" + ) + + udf_body = udf_template.render(**params) + elif os.path.exists(regular_path): + # Use regular file if no template exists + with open(regular_path, "r") as f: + udf_body = f.read() + else: + raise FileNotFoundError( + f"Neither {template_path} nor {regular_path} exists" + ) + + udfs.append({"definition": udf_body, "language": "rust"}) + + # Load pipeline template + pipeline_template = jinja_utils.load_template(json_template_dir, "pipeline.j2") + + rendered = pipeline_template.render( + pipeline_name=args.pipeline_name, + sql_query=sql_query, + udfs=udfs, + parallelism=args.parallelism, + ) + + # Save to file + output_path = os.path.join(args.output_dir, "pipeline.json") + with open(output_path, "w") as f: + f.write(rendered) + + print(f"Creating pipeline at: {output_path}") + + if args.dry_run: + pipeline_id = "dry_run_pipeline_id" + print(f"[DRY RUN] Would create pipeline with ID: {pipeline_id}") + return + + # If API URL provided, create pipeline via API + response = http_utils.create_arroyo_resource( + arroyo_url=args.arroyo_url, + endpoint="pipelines", + data=rendered, + resource_type="pipeline", + ) + + response = json.loads(response) + pipeline_id = response["id"] + print(f"Pipeline created with ID: {pipeline_id}") + + # Write pipeline ID to file for retrieval when running with avoid_long_ssh + pipeline_id_file = os.path.join(args.output_dir, "pipeline_id.txt") + with open(pipeline_id_file, "w") as f: + f.write(pipeline_id) + f.flush() + os.fsync(f.fileno()) # Ensure it's written to disk + print(f"Pipeline ID written to: {pipeline_id_file}") + + +def delete_pipelines(args): + if args.dry_run: + print("[DRY RUN] Would delete all existing pipelines") + return + + # # list all pipelines + # response = http_utils.make_api_request( + # url=f"{args.arroyo_url}/pipelines", + # method="get", + # ) + # response = json.loads(response) + # if response["data"] is None: + # print("No pipelines found") + # return + + # pipeline_ids = [pipeline["id"] for pipeline in response["data"]] + pipeline_ids = arroyo_utils.get_all_pipelines(arroyo_url=args.arroyo_url) + + arroyo_utils.stop_and_delete_pipelines( + arroyo_url=args.arroyo_url, pipeline_ids=pipeline_ids + ) + + # # stop and delete all pipelines + # for pipeline_id in pipeline_ids: + # response = http_utils.make_api_request( + # url=f"{args.arroyo_url}/pipelines/{pipeline_id}", + # method="patch", + # data=json.dumps({"stop": "immediate"}), + # ) + + # time.sleep(5) + # for pipeline_id in pipeline_ids: + # success = False + # for _ in range(num_retries): + # try: + # response = http_utils.make_api_request( + # url=f"{args.arroyo_url}/pipelines/{pipeline_id}", + # method="delete", + # ) + # success = True + # except Exception as e: + # print(f"Failed to delete pipeline {pipeline_id}: {e}") + # time.sleep(5) + + # if not success: + # raise Exception( + # f"Failed to delete pipeline {pipeline_id} after {num_retries} retries" + # ) + + +def get_sql_query( + streaming_aggregation_config: StreamingAggregationConfig, + schema_config, # MetricConfig or SQLTableConfig + query_language: str, + sql_template: Template, + source_table: str, + sink_table: str, + source_type: str, + use_nested_labels: bool, + filter_metric_name: str = None, +) -> Tuple[str, str, dict]: + + # NEW: Support both tumbling and sliding windows (Issue #236) + window_type = streaming_aggregation_config.windowType + window_interval = "{} seconds".format( + streaming_aggregation_config.tumblingWindowSize + ) + window_size = "{} seconds".format(streaming_aggregation_config.windowSize) + slide_interval = "{} seconds".format(streaming_aggregation_config.slideInterval) + + logger.info( + f"Preparing SQL query for aggregation {streaming_aggregation_config.aggregationId}: " + f"windowType={window_type}, windowSize={window_size}, slideInterval={slide_interval}" + ) + + agg_function = "{}_{}".format( + streaming_aggregation_config.aggregationType, + streaming_aggregation_config.aggregationSubType, + ) + + # Get column names based on query language + if query_language == "sql": + time_column = schema_config.get_time_column( + streaming_aggregation_config.table_name + ) + value_column = streaming_aggregation_config.value_column + label_prefix = "" # SQL mode: no nesting + else: + time_column = "timestamp" + value_column = "value" + label_prefix = "labels." if use_nested_labels else "" + + fully_qualified_group_by_columns = [ + "{}{}".format(label_prefix, label) + for label in streaming_aggregation_config.labels["grouping"].keys + ] + fully_qualified_agg_columns = [ + "{}{}".format(label_prefix, label) + for label in streaming_aggregation_config.labels["aggregated"].keys + ] + + # Get all labels for this aggregation + if query_language == "sql": + source_identifier = streaming_aggregation_config.table_name + all_labels = schema_config.get_metadata_columns(source_identifier) + else: + source_identifier = streaming_aggregation_config.metric + all_labels = schema_config.config[source_identifier].keys + + all_labels_agg_columns = [ + "{}{}".format(label_prefix, label) for label in all_labels + ] + + # Determine if timestamps should be included as argument + include_timestamps_as_argument = ( + streaming_aggregation_config.aggregationType == "multipleincrease" + ) + + # This is just a patch for topk query. + if streaming_aggregation_config.aggregationSubType == "topk": + key_list = all_labels_agg_columns + else: + key_list = fully_qualified_agg_columns + agg_columns = ", ".join(key_list) + + sql_query = sql_template.render( + aggregation_id=streaming_aggregation_config.aggregationId, + sink_table=sink_table, + agg_function=agg_function, + agg_columns=agg_columns, + source_table=source_table, + group_by_columns=", ".join(fully_qualified_group_by_columns), + window_interval=window_interval, + window_type=window_type, # NEW: for sliding/tumbling selection + window_size=window_size, # NEW: for HOP window size + slide_interval=slide_interval, # NEW: for HOP slide interval + include_timestamps_as_argument=include_timestamps_as_argument, + source_type=source_type, + filter_metric_name=filter_metric_name, # NEW: for multi-metric filtering + time_column=time_column, # NEW: for SQL mode + value_column=value_column, # NEW: for SQL mode + ) + + return sql_query, agg_function, streaming_aggregation_config.parameters + + +def build_sql_json_schema(table_schema: TableSchema) -> dict: + """Build JSON schema for SQL-style Kafka data.""" + properties = { + table_schema.time_column: { + "type": "string", + "format": "date-time", + "description": "Timestamp column", + } + } + required = [table_schema.time_column] + + for value_col in table_schema.value_columns: + properties[value_col] = { + "type": "number", + "description": f"Value column: {value_col}", + } + required.append(value_col) + + for meta_col in table_schema.metadata_columns: + properties[meta_col] = { + "type": "string", + "description": f"Metadata column: {meta_col}", + } + required.append(meta_col) + + return { + "type": "object", + "required": required, + "properties": properties, + "additionalProperties": False, + } + + +def get_source_table_name_sql(args, table_name: str) -> str: + """Get the source table name for SQL mode.""" + if args.source_type == "kafka": + return f"{args.input_kafka_topic}_{table_name.replace(' ', '_')}" + elif args.source_type == "file": + filename = os.path.basename(args.input_file_path) + filename_no_ext = os.path.splitext(filename)[0] + return f"{filename_no_ext}_{table_name.replace(' ', '_')}" + else: + raise ValueError(f"Unsupported source type for SQL mode: {args.source_type}") + + +def get_source_table_name(args, metric_name): + """Get the source table name based on the metric name and source type""" + if args.source_type == "kafka": + return "{}_{}".format(args.input_kafka_topic, metric_name.replace(" ", "_")) + elif args.source_type == "prometheus_remote_write": + return "prometheus_{}_{}".format( + args.prometheus_base_port, metric_name.replace(" ", "_") + ) + elif args.source_type == "file": + # Use filename without extension for table name + filename = os.path.basename(args.input_file_path) + filename_no_ext = os.path.splitext(filename)[0] + return "{}_{}".format(filename_no_ext, metric_name.replace(" ", "_")) + else: + raise ValueError(f"Unsupported source type: {args.source_type}") + + +def main(args): + os.makedirs(args.output_dir, exist_ok=True) + + # source_table = args.input_kafka_topic + "_table" + sink_table = args.output_kafka_topic + "_table" + + with open(args.config_file_path, "r") as fin: + config = yaml.safe_load(fin) + + # Query language from command line argument (defaults to promql) + query_language = args.query_language + + # Create appropriate schema config based on query language + if query_language == "promql": + schema_config = MetricConfig(config["metrics"]) + elif query_language == "sql": + schema_config = SQLTableConfig(config) + else: + raise ValueError(f"Unsupported query_language: {query_language}") + + streaming_aggregation_configs = [ + StreamingAggregationConfig.from_dict(aggregation_config) + for aggregation_config in config["aggregations"] + ] + + for streaming_aggregation_config in streaming_aggregation_configs: + streaming_aggregation_config.aggregationType = ( + streaming_aggregation_config.aggregationType.lower() + ) + streaming_aggregation_config.aggregationSubType = ( + streaming_aggregation_config.aggregationSubType.lower() + ) + streaming_aggregation_config.validate(schema_config, query_language) + + json_template_dir = os.path.join(args.template_dir, "json") + sql_template_dir = os.path.join(args.template_dir, "sql") + udf_dir = os.path.join(args.template_dir, "udfs") + + # Create connection profile for Kafka, since we definitely need it for sink + delete_connection_profile(args) + profile_id = create_connection_profile(args, json_template_dir) + + # For prometheus_remote_write optimized source, create ONE source for ALL metrics + if ( + args.source_type == "prometheus_remote_write" + and args.prometheus_remote_write_source == "optimized" + ): + # Create single source table for all metrics + source_table = f"prometheus_{args.prometheus_base_port}_all_metrics" + delete_connection_table(args, source_table) + + # Build metrics dict: {metric_name: [label1, label2, ...]} + metrics_dict = { + metric_name: list(metric_labels.keys) + for metric_name, metric_labels in schema_config.config.items() + } + + create_source_connection_table( + args, + None, # topic_name not needed + source_table, + profile_id, + [], # metric_labels not used for multi-metric + json_template_dir, + query_language=query_language, + metrics_dict=metrics_dict, + ) + elif query_language == "sql": + # SQL mode: create one source per table + for table_name, table_schema in schema_config.config.items(): + source_table = get_source_table_name_sql(args, table_name) + delete_connection_table(args, source_table) + + create_source_connection_table( + args, + args.input_kafka_topic, + source_table, + profile_id, + [], # metric_labels not used for SQL mode + json_template_dir, + query_language=query_language, + table_schema=table_schema, + ) + else: + # For other sources (Kafka, non-optimized prometheus, file), create one source per metric + for metric_name, metric_labels in schema_config.config.items(): + source_table = get_source_table_name(args, metric_name) + delete_connection_table(args, source_table) + + # Set topic_name based on source type (only needed for Kafka) + topic_name = args.input_kafka_topic if args.source_type == "kafka" else None + + create_source_connection_table( + args, + topic_name, + source_table, + profile_id, + metric_labels.keys, + json_template_dir, + query_language=query_language, + ) + + delete_connection_table(args, sink_table) + create_sink_connection_table( + args, args.output_kafka_topic, sink_table, profile_id, json_template_dir + ) + + aggregation_sql_template = jinja_utils.load_template( + sql_template_dir, "single_windowed_aggregation.j2" + ) + labels_sql_template = jinja_utils.load_template( + sql_template_dir, "distinct_windowed_labels.j2" + ) + deltasetaggregator_sql_template = jinja_utils.load_template( + sql_template_dir, "distinct_windowed_labels_deltasetaggregator.j2" + ) + value_only_sql_template = jinja_utils.load_template( + sql_template_dir, "single_arg_value_aggregation.j2" + ) + + sql_queries = [] + agg_functions_with_params = [] + + # Determine if using single unified source table + use_unified_source_table = ( + args.source_type == "prometheus_remote_write" + and args.prometheus_remote_write_source == "optimized" + ) + + for streaming_aggregation_config in streaming_aggregation_configs: + if use_unified_source_table: + # Use the unified table for all metrics + source_table = f"prometheus_{args.prometheus_base_port}_all_metrics" + elif query_language == "sql": + source_table = get_source_table_name_sql( + args, streaming_aggregation_config.table_name + ) + else: + source_table = get_source_table_name( + args, streaming_aggregation_config.metric + ) + + is_labels_accumulator: bool = ( + streaming_aggregation_config.aggregationType == "setaggregator" + or streaming_aggregation_config.aggregationType == "deltasetaggregator" + ) + + # Value-only aggregations that only take Vec as a single argument + is_value_only_aggregation: bool = ( + streaming_aggregation_config.aggregationType == "datasketcheskll" + ) + + # Choose appropriate SQL template + if streaming_aggregation_config.aggregationType == "deltasetaggregator": + sql_template = deltasetaggregator_sql_template + elif is_labels_accumulator: + sql_template = labels_sql_template + elif is_value_only_aggregation: + sql_template = value_only_sql_template + else: + sql_template = aggregation_sql_template + + # Determine if we should use nested labels based on source configuration + # SQL mode uses flat schema (no nesting), prometheus optimized also uses flat + use_nested_labels = not ( + query_language == "sql" + or ( + args.source_type == "prometheus_remote_write" + and args.prometheus_remote_write_source == "optimized" + ) + ) + + # When using unified source table, pass metric name for WHERE clause filtering + filter_metric_name = ( + streaming_aggregation_config.metric if use_unified_source_table else None + ) + + sql_query, agg_function, parameters = get_sql_query( + streaming_aggregation_config, + schema_config, + query_language, + sql_template, + source_table, + sink_table, + args.source_type, + use_nested_labels, + filter_metric_name, + ) + + sql_queries.append(sql_query) + # if not is_labels_accumulator: + agg_functions_with_params.append((agg_function, parameters)) + + print( + "Generated SQL query for aggregation ID {}: \n{}".format( + streaming_aggregation_config.aggregationId, sql_query + ) + ) + delete_pipelines(args) + create_pipeline( + args, + sql_queries, + agg_functions_with_params, + streaming_aggregation_configs, + json_template_dir, + udf_dir, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + # Dry run option + parser.add_argument( + "--dry_run", + action="store_true", + help="Test the logic without making API calls", + ) + + # StreamingConfig + parser.add_argument( + "--config_file_path", + type=str, + required=True, + help="Path to the configuration file", + ) + + # Connection profile parameters + parser.add_argument( + "--profile_name", + default="default-kafka-profile", + help="Name for the connection profile", + ) + parser.add_argument( + "--bootstrap_servers", default="localhost:9092", help="Kafka bootstrap servers" + ) + + # Source type selection + parser.add_argument( + "--source_type", + type=str, + choices=["kafka", "prometheus_remote_write", "file"], + required=True, + help="Type of source to use", + ) + + # Connection table parameters + parser.add_argument( + "--input_kafka_topic", type=str, required=False, help="Input Kafka topic" + ) + parser.add_argument( + "--input_file_path", type=str, required=False, help="Path to the input file" + ) + parser.add_argument( + "--file_format", + type=str, + required=False, + choices=["json", "parquet"], + help="Format of the input file (required for file source)", + ) + parser.add_argument( + "--ts_format", + type=str, + required=False, + choices=["unix_millis", "unix_seconds", "rfc3339"], + help="Timestamp format in the input file (required for file source)", + ) + + # Prometheus remote write source parameters + parser.add_argument( + "--prometheus_base_port", + type=int, + required=False, + help="Base port for Prometheus remote write endpoint", + ) + parser.add_argument( + "--prometheus_path", + type=str, + required=False, + help="Path for Prometheus remote write endpoint", + ) + parser.add_argument( + "--prometheus_bind_ip", + type=str, + required=False, + help="IP address to bind Prometheus remote write endpoint to", + ) + parser.add_argument( + "--parallelism", + type=int, + required=True, + help="Pipeline parallelism (number of parallel tasks)", + ) + parser.add_argument( + "--prometheus_remote_write_source", + type=str, + choices=["v1", "optimized"], + default="v1", + help="Version of Prometheus remote_write source (v1=nested labels, optimized=flattened labels)", + ) + + parser.add_argument( + "--output_kafka_topic", type=str, required=False, help="Output Kafka topic" + ) + parser.add_argument( + "--output_file_path", type=str, required=False, help="Path to the output file" + ) + + parser.add_argument( + "--kafka_input_format", + required=False, + choices=["json", "avro-json", "avro-binary"], + ) + parser.add_argument("--output_format", required=True, choices=["json", "byte"]) + + parser.add_argument("--pipeline_name", required=True, help="Pipeline name") + + parser.add_argument( + "--template_dir", + default="./templates", + help="Directory containing template files", + ) + + parser.add_argument( + "--output_dir", + default="./outputs", + help="Directory to save the generated files", + ) + + parser.add_argument( + "--arroyo_url", + default="http://localhost:5115/api/v1", + help="URL of the Arroyo API server", + ) + + parser.add_argument( + "--query_language", + type=str, + choices=["promql", "sql"], + default="promql", + help="Query language for schema interpretation (default: promql)", + ) + + args = parser.parse_args() + check_args(args) + main(args) diff --git a/ArroyoSketch/templates/hashed_key_udfs/countminsketch_sum.rs b/ArroyoSketch/templates/hashed_key_udfs/countminsketch_sum.rs new file mode 100644 index 0000000..2d214b2 --- /dev/null +++ b/ArroyoSketch/templates/hashed_key_udfs/countminsketch_sum.rs @@ -0,0 +1,58 @@ +/* +[dependencies] +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +twox-hash = "2.1.0" +*/ +use arroyo_udf_plugin::udf; +use rmp_serde::Serializer; +use serde::{Serialize, Deserialize}; +use twox_hash::XxHash32 + +// Count-Min Sketch parameters +const WIDTH: usize = 1024; // Number of buckets per hash function +const DEPTH: usize = 4; // Number of hash functions + +#[derive(Serialize, Deserialize, Clone)] +struct CountMinSketch { + table: Vec>, + width: usize, + depth: usize, +} + +impl CountMinSketch { + fn new() -> Self { + CountMinSketch { + table: vec![vec![0.0; WIDTH]; DEPTH], + width: WIDTH, + depth: DEPTH, + } + } + + // Update the sketch with a key-value pair + fn update(&mut self, key: u32, value: f64) { + for i in 0..self.depth { + //let hash_val = xxh32(&key.to_le_bytes(), i as u32); + let hash = XxHash32::oneshot(i as u32, &key.to_le_bytes()); + let bucket = (hash_val as usize) % self.width; + self.table[i][bucket] += value; + } + } +} + +#[udf] +fn countminsketch_sum(keys: Vec, values: Vec) -> Option> { + // Create a new Count-Min Sketch + let mut countminsketch = CountMinSketch::new(); + + // Iterate through the keys and values and update the sketch for each entry + for (i, &key) in keys.iter().enumerate() { + if i < values.len() { + countminsketch.update(key, values[i]); + } + } + + let mut buf = Vec::new(); + countminsketch.serialize(&mut Serializer::new(&mut buf)).ok()?; + Some(buf) +} diff --git a/ArroyoSketch/templates/hashed_key_udfs/multipleincrease_.rs b/ArroyoSketch/templates/hashed_key_udfs/multipleincrease_.rs new file mode 100644 index 0000000..9f1225c --- /dev/null +++ b/ArroyoSketch/templates/hashed_key_udfs/multipleincrease_.rs @@ -0,0 +1,53 @@ +/* +[dependencies] +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +*/ + +use arroyo_udf_plugin::udf; +use std::collections::HashMap; +use rmp_serde::Serializer; +use serde::Serialize; + +#[derive(Serialize)] +struct MeasurementData { + starting_measurement: f64, + starting_timestamp: i64, + last_seen_measurement: f64, + last_seen_timestamp: i64, +} + +#[udf] +fn multipleincrease_(keys: Vec, values: Vec, timestamps: Vec) -> Option> { + // Create a new hashmap to store measurement data with timestamps + let mut per_key_storage: HashMap = HashMap::new(); + + // Iterate through the keys, values, and timestamps + for (i, &key) in keys.iter().enumerate() { + if i < values.len() && i < timestamps.len() { + let value = values[i]; + let timestamp = timestamps[i]; + + let entry = per_key_storage.entry(key).or_insert(MeasurementData { + starting_measurement: value, + starting_timestamp: timestamp, + last_seen_measurement: value, + last_seen_timestamp: timestamp, + }); + + // Update last seen measurement and timestamp + entry.last_seen_measurement = value; + entry.last_seen_timestamp = timestamp; + + // If this timestamp is earlier than our current starting timestamp, update starting values + //if timestamp < entry.starting_timestamp { + // entry.starting_measurement = value; + // entry.starting_timestamp = timestamp; + //} + } + } + + let mut buf = Vec::new(); + per_key_storage.serialize(&mut Serializer::new(&mut buf)).ok()?; + Some(buf) +} diff --git a/ArroyoSketch/templates/hashed_key_udfs/multipleminmax_max.rs b/ArroyoSketch/templates/hashed_key_udfs/multipleminmax_max.rs new file mode 100644 index 0000000..bd35507 --- /dev/null +++ b/ArroyoSketch/templates/hashed_key_udfs/multipleminmax_max.rs @@ -0,0 +1,28 @@ +/* +[dependencies] +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +*/ + +use arroyo_udf_plugin::udf; +use std::collections::HashMap; +use rmp_serde::Serializer; +use serde::Serialize; + +#[udf] +fn multipleminmax_max(keys: Vec, values: Vec) -> Option> { + // Create a new hashmap + let mut per_key_storage: HashMap = HashMap::new(); + + // Iterate through the keys and values + for (i, &key) in keys.iter().enumerate() { + if i < values.len() { + // If the key is not present or the value is less than the current stored value, update it + per_key_storage.entry(key).and_modify(|v| *v = (*v).max(values[i])).or_insert(values[i]); + } + } + + let mut buf = Vec::new(); + per_key_storage.serialize(&mut Serializer::new(&mut buf)).ok()?; + Some(buf) +} diff --git a/ArroyoSketch/templates/hashed_key_udfs/multipleminmax_min.rs b/ArroyoSketch/templates/hashed_key_udfs/multipleminmax_min.rs new file mode 100644 index 0000000..750acaa --- /dev/null +++ b/ArroyoSketch/templates/hashed_key_udfs/multipleminmax_min.rs @@ -0,0 +1,28 @@ +/* +[dependencies] +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +*/ + +use arroyo_udf_plugin::udf; +use std::collections::HashMap; +use rmp_serde::Serializer; +use serde::Serialize; + +#[udf] +fn multipleminmax_min(keys: Vec, values: Vec) -> Option> { + // Create a new hashmap + let mut per_key_storage: HashMap = HashMap::new(); + + // Iterate through the keys and values + for (i, &key) in keys.iter().enumerate() { + if i < values.len() { + // If the key is not present or the value is less than the current stored value, update it + per_key_storage.entry(key).and_modify(|v| *v = (*v).min(values[i])).or_insert(values[i]); + } + } + + let mut buf = Vec::new(); + per_key_storage.serialize(&mut Serializer::new(&mut buf)).ok()?; + Some(buf) +} diff --git a/ArroyoSketch/templates/hashed_key_udfs/multiplesum_count.rs b/ArroyoSketch/templates/hashed_key_udfs/multiplesum_count.rs new file mode 100644 index 0000000..d8441c3 --- /dev/null +++ b/ArroyoSketch/templates/hashed_key_udfs/multiplesum_count.rs @@ -0,0 +1,27 @@ +/* +[dependencies] +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +*/ + +use arroyo_udf_plugin::udf; +use std::collections::HashMap; +use rmp_serde::Serializer; +use serde::Serialize; + +#[udf] +fn multiplesum_count(keys: Vec, values: Vec) -> Option> { + // Create a new hashmap to store the count for each key + let mut key_sums: HashMap = HashMap::new(); + + // Iterate through the keys and values + for (i, &key) in keys.iter().enumerate() { + if i < values.len() { + *key_sums.entry(key).or_insert(0.0) += 1.0; + } + } + + let mut buf = Vec::new(); + key_sums.serialize(&mut Serializer::new(&mut buf)).ok()?; + Some(buf) +} diff --git a/ArroyoSketch/templates/hashed_key_udfs/multiplesum_sum.rs b/ArroyoSketch/templates/hashed_key_udfs/multiplesum_sum.rs new file mode 100644 index 0000000..0a3cc80 --- /dev/null +++ b/ArroyoSketch/templates/hashed_key_udfs/multiplesum_sum.rs @@ -0,0 +1,27 @@ +/* +[dependencies] +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +*/ + +use arroyo_udf_plugin::udf; +use std::collections::HashMap; +use rmp_serde::Serializer; +use serde::Serialize; + +#[udf] +fn multiplesum_sum(keys: Vec, values: Vec) -> Option> { + // Create a new hashmap to store the sum of values for each key + let mut key_sums: HashMap = HashMap::new(); + + // Iterate through the keys and values and update the sum for each key + for (i, &key) in keys.iter().enumerate() { + if i < values.len() { + *key_sums.entry(key).or_insert(0.0) += values[i]; + } + } + + let mut buf = Vec::new(); + key_sums.serialize(&mut Serializer::new(&mut buf)).ok()?; + Some(buf) +} diff --git a/ArroyoSketch/templates/hashed_key_udfs/old/countminsketch_sum.rs b/ArroyoSketch/templates/hashed_key_udfs/old/countminsketch_sum.rs new file mode 100644 index 0000000..a29bee9 --- /dev/null +++ b/ArroyoSketch/templates/hashed_key_udfs/old/countminsketch_sum.rs @@ -0,0 +1,57 @@ +/* +[dependencies] +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +xxhash-rust = { version = "0.8", features = ["xxh32"] } +*/ +use arroyo_udf_plugin::udf; +use rmp_serde::Serializer; +use serde::{Serialize, Deserialize}; +use xxhash_rust::xxh32::xxh32; + +// Count-Min Sketch parameters +const WIDTH: usize = 1024; // Number of buckets per hash function +const DEPTH: usize = 4; // Number of hash functions + +#[derive(Serialize, Deserialize, Clone)] +struct CountMinSketch { + table: Vec>, + width: usize, + depth: usize, +} + +impl CountMinSketch { + fn new() -> Self { + CountMinSketch { + table: vec![vec![0.0; WIDTH]; DEPTH], + width: WIDTH, + depth: DEPTH, + } + } + + // Update the sketch with a key-value pair + fn update(&mut self, key: u64, value: f64) { + for i in 0..self.depth { + let hash_val = xxh32(&key.to_le_bytes(), i as u32); + let bucket = (hash_val as usize) % self.width; + self.table[i][bucket] += value; + } + } +} + +#[udf] +fn countminsketch_sum(keys: Vec, values: Vec) -> Option> { + // Create a new Count-Min Sketch + let mut countminsketch = CountMinSketch::new(); + + // Iterate through the keys and values and update the sketch for each entry + for (i, &key) in keys.iter().enumerate() { + if i < values.len() { + countminsketch.update(key, values[i]); + } + } + + let mut buf = Vec::new(); + countminsketch.serialize(&mut Serializer::new(&mut buf)).ok()?; + Some(buf) +} diff --git a/ArroyoSketch/templates/hashed_key_udfs/old/string_to_hash.rs b/ArroyoSketch/templates/hashed_key_udfs/old/string_to_hash.rs new file mode 100644 index 0000000..c08e244 --- /dev/null +++ b/ArroyoSketch/templates/hashed_key_udfs/old/string_to_hash.rs @@ -0,0 +1,16 @@ +/* +[dependencies] +ahash = "0.8.6" +*/ + +use arroyo_udf_plugin::udf; +use ahash::AHasher; +use std::hash::{Hash, Hasher}; +use xxhash_rust::xxh32::xxh32; + +#[udf] +fn string_to_hash(input: &str) -> u64 { + let mut hasher = AHasher::default(); + input.hash(&mut hasher); + hasher.finish() +} diff --git a/ArroyoSketch/templates/hashed_key_udfs/string_to_hash.rs b/ArroyoSketch/templates/hashed_key_udfs/string_to_hash.rs new file mode 100644 index 0000000..7014a62 --- /dev/null +++ b/ArroyoSketch/templates/hashed_key_udfs/string_to_hash.rs @@ -0,0 +1,15 @@ +/* +[dependencies] +twox-hash = "2.1.0" +*/ + +use arroyo_udf_plugin::udf; +use twox_hash::XxHash32; + +#[udf] +fn string_to_hash(input: &str, seed: u32) -> u32 { + //let mut hasher = XxHash32::with_seed(seed); + //hasher.write(input.as_bytes()); + //hasher.finish() as u32 + XxHash32::oneshot(seed, input.as_bytes()) +} diff --git a/ArroyoSketch/templates/json/connection_profile.j2 b/ArroyoSketch/templates/json/connection_profile.j2 new file mode 100644 index 0000000..2cf0827 --- /dev/null +++ b/ArroyoSketch/templates/json/connection_profile.j2 @@ -0,0 +1,14 @@ +{ + "name": "{{ profile_name }}", + "connector": "kafka", + "config": { + "authentication": {}, + "bootstrapServers": "{{ bootstrap_servers }}", + "name": "{{ profile_name }}", + "schemaRegistryEnum": {}, + "connectionProperties": { + "message.max.bytes": "20971520", + "batch.size": "20971520" + } + } +} diff --git a/ArroyoSketch/templates/json/connection_table_file.j2 b/ArroyoSketch/templates/json/connection_table_file.j2 new file mode 100644 index 0000000..100f971 --- /dev/null +++ b/ArroyoSketch/templates/json/connection_table_file.j2 @@ -0,0 +1,62 @@ +{ + "name": "{{ table_name }}", + "connector": "single_file_custom", + "tableType": "source", + "config": { + "path": "{{ file_path }}", + "file_format": "{{ file_format }}", + "timestamp_field": "{{ timestamp_field }}", + "ts_format": "{{ ts_format }}" + }, + "schema": { + "format": { + "{{ file_format }}": {} + }, + "badData": { + "fail": {} + }, + "framing": null, + "structName": null, + "fields": [ + { + "fieldName": "{{ time_column }}", + "fieldType": { + "type": { + "primitive": "{{ timestamp_primitive }}" + }, + "sqlName": "TIMESTAMP" + }, + "nullable": false, + "metadataKey": null + }, + {% for value_col in value_columns %} + { + "fieldName": "{{ value_col }}", + "fieldType": { + "type": { + "primitive": "F64" + }, + "sqlName": "DOUBLE" + }, + "nullable": false, + "metadataKey": null + }, + {% endfor %} + {% for col in metadata_columns %} + { + "fieldName": "{{ col }}", + "fieldType": { + "type": { + "primitive": "String" + }, + "sqlName": "TEXT" + }, + "nullable": false, + "metadataKey": null + }{% if not loop.last %},{% endif %} + {% endfor %} + ], + "inferred": true, + "primaryKeys": [] + } +} diff --git a/ArroyoSketch/templates/json/connection_table_kafka.j2 b/ArroyoSketch/templates/json/connection_table_kafka.j2 new file mode 100644 index 0000000..519a2c9 --- /dev/null +++ b/ArroyoSketch/templates/json/connection_table_kafka.j2 @@ -0,0 +1,98 @@ +{ + "name": "{{ table_name }}", + "connector": "kafka", + "tableType": "source", + "config": { + "topic": "{{ topic_name }}", + "type": { + "offset": "latest", + "read_mode": "read_uncommitted" + } + }, + "schema": { + "format": { + "json": { + "confluentSchemaRegistry": false, + "schemaId": null, + "includeSchema": false, + "debezium": false, + "unstructured": false, + "timestampFormat": "rfc3339" + } + }, + "badData": { + "fail": {} + }, + "framing": null, + "structName": null, + "fields": [ + { + "fieldName": "labels", + "fieldType": { + "type": { + "struct": { + "name": null, + "fields": [ + {% for field in label_fields %} + { + "fieldName": "{{ field.fieldName }}", + "fieldType": { + "type": { + "primitive": "String" + }, + "sqlName": "TEXT" + }, + "nullable": false, + "metadataKey": null + }{% if not loop.last %},{% endif %} + {% endfor %} + ] + } + }, + "sqlName": null + }, + "nullable": false, + "metadataKey": null + }, + { + "fieldName": "name", + "fieldType": { + "type": { + "primitive": "String" + }, + "sqlName": "TEXT" + }, + "nullable": false, + "metadataKey": null + }, + { + "fieldName": "timestamp", + "fieldType": { + "type": { + "primitive": "UnixNanos" + }, + "sqlName": "TIMESTAMP" + }, + "nullable": false, + "metadataKey": null + }, + { + "fieldName": "value", + "fieldType": { + "type": { + "primitive": "F64" + }, + "sqlName": "DOUBLE" + }, + "nullable": false, + "metadataKey": null + } + ], + "definition": { + "json_schema": "{{ json_schema }}" + }, + "inferred": null, + "primaryKeys": [] + }, + "connectionProfileId": "{{ profile_id }}" +} diff --git a/ArroyoSketch/templates/json/connection_table_kafka_sql.j2 b/ArroyoSketch/templates/json/connection_table_kafka_sql.j2 new file mode 100644 index 0000000..8926f6a --- /dev/null +++ b/ArroyoSketch/templates/json/connection_table_kafka_sql.j2 @@ -0,0 +1,74 @@ +{ + "name": "{{ table_name }}", + "connector": "kafka", + "tableType": "source", + "config": { + "topic": "{{ topic_name }}", + "type": { + "offset": "latest", + "read_mode": "read_uncommitted" + } + }, + "schema": { + "format": { + "json": { + "confluentSchemaRegistry": false, + "schemaId": null, + "includeSchema": false, + "debezium": false, + "unstructured": false, + "timestampFormat": "rfc3339" + } + }, + "badData": { + "fail": {} + }, + "framing": null, + "structName": null, + "fields": [ + { + "fieldName": "{{ time_column }}", + "fieldType": { + "type": { + "primitive": "UnixNanos" + }, + "sqlName": "TIMESTAMP" + }, + "nullable": false, + "metadataKey": null + }, + {% for value_col in value_columns %} + { + "fieldName": "{{ value_col }}", + "fieldType": { + "type": { + "primitive": "F64" + }, + "sqlName": "DOUBLE" + }, + "nullable": false, + "metadataKey": null + }, + {% endfor %} + {% for col in metadata_columns %} + { + "fieldName": "{{ col }}", + "fieldType": { + "type": { + "primitive": "String" + }, + "sqlName": "TEXT" + }, + "nullable": false, + "metadataKey": null + }{% if not loop.last %},{% endif %} + {% endfor %} + ], + "definition": { + "json_schema": "{{ json_schema }}" + }, + "inferred": null, + "primaryKeys": [] + }, + "connectionProfileId": "{{ profile_id }}" +} diff --git a/ArroyoSketch/templates/json/connection_table_prometheus_remote_write.j2 b/ArroyoSketch/templates/json/connection_table_prometheus_remote_write.j2 new file mode 100644 index 0000000..606f45e --- /dev/null +++ b/ArroyoSketch/templates/json/connection_table_prometheus_remote_write.j2 @@ -0,0 +1,96 @@ +{ + "name": "{{ table_name }}", + "connector": "prometheus_remote_write_with_schema", + "tableType": "source", + "config": { + "bind_address": "{{ bind_ip }}", + "base_port": {{ base_port }}, + "parallelism": {{ parallelism }}, + "path": "{{ path }}" + }, + "schema": { + "format": { + "json": { + "confluentSchemaRegistry": false, + "schemaId": null, + "includeSchema": false, + "debezium": false, + "unstructured": false, + "timestampFormat": "rfc3339" + } + }, + "badData": { + "fail": {} + }, + "framing": null, + "structName": null, + "fields": [ + { + "fieldName": "labels", + "fieldType": { + "type": { + "struct": { + "name": null, + "fields": [ + {% for field in label_fields %} + { + "fieldName": "{{ field.fieldName }}", + "fieldType": { + "type": { + "primitive": "String" + }, + "sqlName": "TEXT" + }, + "nullable": false, + "metadataKey": null + }{% if not loop.last %},{% endif %} + {% endfor %} + ] + } + }, + "sqlName": null + }, + "nullable": false, + "metadataKey": null + }, + { + "fieldName": "name", + "fieldType": { + "type": { + "primitive": "String" + }, + "sqlName": "TEXT" + }, + "nullable": false, + "metadataKey": null + }, + { + "fieldName": "timestamp", + "fieldType": { + "type": { + "primitive": "UnixMillis" + }, + "sqlName": "TIMESTAMP" + }, + "nullable": false, + "metadataKey": null + }, + { + "fieldName": "value", + "fieldType": { + "type": { + "primitive": "F64" + }, + "sqlName": "DOUBLE" + }, + "nullable": false, + "metadataKey": null + } + ], + "definition": { + "json_schema": "{{ json_schema }}" + }, + "inferred": null, + "primaryKeys": [] + } +} diff --git a/ArroyoSketch/templates/json/connection_table_prometheus_remote_write_optimized.j2 b/ArroyoSketch/templates/json/connection_table_prometheus_remote_write_optimized.j2 new file mode 100644 index 0000000..d46cd8f --- /dev/null +++ b/ArroyoSketch/templates/json/connection_table_prometheus_remote_write_optimized.j2 @@ -0,0 +1,12 @@ +{ + "name": "{{ table_name }}", + "connector": "prometheus_remote_write_optimized", + "tableType": "source", + "config": { + "bind_address": "{{ bind_ip }}", + "base_port": {{ base_port }}, + "parallelism": {{ parallelism }}, + "path": "{{ path }}", + "metrics": {{ metrics_json }} + } +} diff --git a/ArroyoSketch/templates/json/connection_table_sink.j2 b/ArroyoSketch/templates/json/connection_table_sink.j2 new file mode 100644 index 0000000..5baaeb1 --- /dev/null +++ b/ArroyoSketch/templates/json/connection_table_sink.j2 @@ -0,0 +1,32 @@ +{ + "name": "{{ table_name }}", + "connector": "kafka", + "tableType": "sink", + "config": { + "topic": "{{ topic_name }}", + "type": { + "commit_mode": "at_least_once" + } + }, + "schema": { + "format": { + "json": { + "confluentSchemaRegistry": false, + "schemaId": null, + "includeSchema": false, + "debezium": false, + "unstructured": false, + "timestampFormat": "rfc3339" + } + }, + "badData": { + "fail": {} + }, + "framing": null, + "structName": null, + "fields": [], + "inferred": true, + "primaryKeys": [] + }, + "connectionProfileId": "{{ profile_id }}" +} diff --git a/ArroyoSketch/templates/json/pipeline.j2 b/ArroyoSketch/templates/json/pipeline.j2 new file mode 100644 index 0000000..1d7731b --- /dev/null +++ b/ArroyoSketch/templates/json/pipeline.j2 @@ -0,0 +1,6 @@ +{ + "name": "{{ pipeline_name }}", + "query": "{{ sql_query }}", + "udfs": {{ udfs|tojson(indent=2) }}, + "parallelism": {{ parallelism }} +} diff --git a/ArroyoSketch/templates/sql/distinct_windowed_labels.j2 b/ArroyoSketch/templates/sql/distinct_windowed_labels.j2 new file mode 100644 index 0000000..70da6d1 --- /dev/null +++ b/ArroyoSketch/templates/sql/distinct_windowed_labels.j2 @@ -0,0 +1,20 @@ +INSERT INTO + {{ sink_table }} +SELECT + gzip_compress({{ agg_function }}({% if agg_columns %}concat_ws(';', {{ agg_columns }}){% else %}''{% endif %})) as precompute, + {{ aggregation_id }} as aggregation_id, + {% if group_by_columns %}concat_ws(';', {{ group_by_columns }}){% else %}''{% endif %} as key, + {%- if window_type == "sliding" %} + HOP(INTERVAL '{{ slide_interval }}', INTERVAL '{{ window_size }}') as window + {%- else %} + TUMBLE(INTERVAL '{{ window_interval }}') as window + {%- endif %} +FROM + {{ source_table }} +{%- if filter_metric_name %} +WHERE + metric_name = '{{ filter_metric_name }}' +{%- endif %} +GROUP BY + window, key +; diff --git a/ArroyoSketch/templates/sql/distinct_windowed_labels_deltasetaggregator.j2 b/ArroyoSketch/templates/sql/distinct_windowed_labels_deltasetaggregator.j2 new file mode 100644 index 0000000..49c41f4 --- /dev/null +++ b/ArroyoSketch/templates/sql/distinct_windowed_labels_deltasetaggregator.j2 @@ -0,0 +1,20 @@ +INSERT INTO + {{ sink_table }} +SELECT + gzip_compress({{ agg_function }}{{ aggregation_id }}_({% if agg_columns %}concat_ws(';', {{ agg_columns }}){% else %}''{% endif %})) as precompute, + {{ aggregation_id }} as aggregation_id, + {% if group_by_columns %}concat_ws(';', {{ group_by_columns }}){% else %}''{% endif %} as key, + {%- if window_type == "sliding" %} + HOP(INTERVAL '{{ slide_interval }}', INTERVAL '{{ window_size }}') as window + {%- else %} + TUMBLE(INTERVAL '{{ window_interval }}') as window + {%- endif %} +FROM + {{ source_table }} +{%- if filter_metric_name %} +WHERE + metric_name = '{{ filter_metric_name }}' +{%- endif %} +GROUP BY + window, key +; diff --git a/ArroyoSketch/templates/sql/single_arg_value_aggregation.j2 b/ArroyoSketch/templates/sql/single_arg_value_aggregation.j2 new file mode 100644 index 0000000..2a68caf --- /dev/null +++ b/ArroyoSketch/templates/sql/single_arg_value_aggregation.j2 @@ -0,0 +1,20 @@ +INSERT INTO + {{ sink_table }} +SELECT + gzip_compress({{ agg_function }}({{ value_column }})) as precompute, + {{ aggregation_id }} as aggregation_id, + {% if group_by_columns %}concat_ws(';', {{ group_by_columns }}){% else %}''{% endif %} as key, + {%- if window_type == "sliding" %} + HOP(INTERVAL '{{ slide_interval }}', INTERVAL '{{ window_size }}') as window + {%- else %} + TUMBLE(INTERVAL '{{ window_interval }}') as window + {%- endif %} +FROM + {{ source_table }} +{%- if filter_metric_name %} +WHERE + metric_name = '{{ filter_metric_name }}' +{%- endif %} +GROUP BY + window, key +; diff --git a/ArroyoSketch/templates/sql/single_windowed_aggregation.j2 b/ArroyoSketch/templates/sql/single_windowed_aggregation.j2 new file mode 100644 index 0000000..0f55fe1 --- /dev/null +++ b/ArroyoSketch/templates/sql/single_windowed_aggregation.j2 @@ -0,0 +1,20 @@ +INSERT INTO + {{ sink_table }} +SELECT + gzip_compress({{ agg_function }}({% if agg_columns %}concat_ws(';', {{ agg_columns }}), {% else %}'', {% endif %}{{ value_column }}{% if include_timestamps_as_argument %}{% if source_type == "kafka" %}, cast(extract(epoch from {{ time_column }}) * 1000 as bigint){% else %}, cast({{ time_column }} as bigint){% endif %}{% endif %})) as precompute, + {{ aggregation_id }} as aggregation_id, + {% if group_by_columns %}concat_ws(';', {{ group_by_columns }}){% else %}''{% endif %} as key, + {%- if window_type == "sliding" %} + HOP(INTERVAL '{{ slide_interval }}', INTERVAL '{{ window_size }}') as window + {%- else %} + TUMBLE(INTERVAL '{{ window_interval }}') as window + {%- endif %} +FROM + {{ source_table }} +{%- if filter_metric_name %} +WHERE + metric_name = '{{ filter_metric_name }}' +{%- endif %} +GROUP BY + window, key +; diff --git a/ArroyoSketch/templates/udfs/countminsketch_count.rs.j2 b/ArroyoSketch/templates/udfs/countminsketch_count.rs.j2 new file mode 100644 index 0000000..16b532c --- /dev/null +++ b/ArroyoSketch/templates/udfs/countminsketch_count.rs.j2 @@ -0,0 +1,58 @@ +/* +[dependencies] +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +twox-hash = "2.1.0" +*/ +use arroyo_udf_plugin::udf; +use rmp_serde::Serializer; +use serde::{Deserialize, Serialize}; +use twox_hash::XxHash32; + +// Count-Min Sketch parameters +const DEPTH: usize = {{ depth }}; // Number of hash functions +const WIDTH: usize = {{ width }}; // Number of buckets per hash function + +#[derive(Serialize, Deserialize, Clone)] +struct CountMinSketch { + sketch: Vec>, + row_num: usize, + col_num: usize, +} + +impl CountMinSketch { + fn new() -> Self { + CountMinSketch { + sketch: vec![vec![0.0; WIDTH]; DEPTH], + row_num: DEPTH, + col_num: WIDTH, + } + } + + // Update the sketch with a key-value pair + fn update(&mut self, key: &str, value: f64) { + for i in 0..self.row_num { + // already UTF-8 + let hash = XxHash32::oneshot(i as u32, key.as_bytes()); + let bucket = (hash as usize) % self.col_num; + self.sketch[i][bucket] += value; + } + } +} + +#[udf] +fn countminsketch_count(keys: Vec<&str>, values: Vec) -> Option> { + // Create a new Count-Min Sketch + let mut countminsketch = CountMinSketch::new(); + + // Iterate through the keys and values and update the sketch for each entry + for (i, &key) in keys.iter().enumerate() { + countminsketch.update(key, 1.0); + } + + let mut buf = Vec::new(); + countminsketch + .serialize(&mut Serializer::new(&mut buf)) + .ok()?; + Some(buf) +} diff --git a/ArroyoSketch/templates/udfs/countminsketch_sum.rs.j2 b/ArroyoSketch/templates/udfs/countminsketch_sum.rs.j2 new file mode 100644 index 0000000..8bf0530 --- /dev/null +++ b/ArroyoSketch/templates/udfs/countminsketch_sum.rs.j2 @@ -0,0 +1,63 @@ +/* +[dependencies] +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +twox-hash = "2.1.0" +*/ +use arroyo_udf_plugin::udf; +use rmp_serde::Serializer; +use serde::{Deserialize, Serialize}; +use twox_hash::XxHash32; + +// Count-Min Sketch parameters +const DEPTH: usize = {{ depth }}; // Number of hash functions +const WIDTH: usize = {{ width }}; // Number of buckets per hash function + +#[derive(Serialize, Deserialize, Clone)] +struct CountMinSketch { + sketch: Vec>, + row_num: usize, + col_num: usize, +} + +impl CountMinSketch { + fn new() -> Self { + CountMinSketch { + sketch: vec![vec![0.0; WIDTH]; DEPTH], + row_num: DEPTH, + col_num: WIDTH, + } + } + + // Update the sketch with a key-value pair + fn update(&mut self, key: &str, value: f64) { + for i in 0..self.row_num { + // already UTF-8 + let hash = XxHash32::oneshot(i as u32, key.as_bytes()); + let bucket = (hash as usize) % self.col_num; + self.sketch[i][bucket] += value; + } + } +} + +#[udf] +fn countminsketch_sum(keys: Vec<&str>, values: Vec) -> Option> { + // Check that keys and values have equal length + if keys.len() != values.len() { + return None; + } + + // Create a new Count-Min Sketch + let mut countminsketch = CountMinSketch::new(); + + // Iterate through the keys and values and update the sketch for each entry + for (i, &key) in keys.iter().enumerate() { + countminsketch.update(key, values[i]); + } + + let mut buf = Vec::new(); + countminsketch + .serialize(&mut Serializer::new(&mut buf)) + .ok()?; + Some(buf) +} diff --git a/ArroyoSketch/templates/udfs/countminsketchwithheap_topk.rs.j2 b/ArroyoSketch/templates/udfs/countminsketchwithheap_topk.rs.j2 new file mode 100644 index 0000000..988d780 --- /dev/null +++ b/ArroyoSketch/templates/udfs/countminsketchwithheap_topk.rs.j2 @@ -0,0 +1,194 @@ +/* +[dependencies] +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +twox-hash = "2.1.0" +*/ +use arroyo_udf_plugin::udf; +use rmp_serde::Serializer; +use serde::{Deserialize, Serialize}; +use std::collections::BinaryHeap; +use std::cmp::Ordering; +use twox_hash::XxHash32; + +// Count-Min Sketch with Heap parameters +const DEPTH: usize = {{ depth }}; // Number of hash functions +const WIDTH: usize = {{ width }}; // Number of buckets per hash function +const HEAP_SIZE: usize = {{ heapsize }}; // Maximum number of top-k items to track + +#[derive(Serialize, Deserialize, Clone)] +struct CountMinSketch { + sketch: Vec>, + row_num: usize, + col_num: usize, +} + +impl CountMinSketch { + fn new() -> Self { + CountMinSketch { + sketch: vec![vec![0.0; WIDTH]; DEPTH], + row_num: DEPTH, + col_num: WIDTH, + } + } + + // Update the sketch with a key-value pair + fn update(&mut self, key: &str, value: f64) { + for i in 0..self.row_num { + // already UTF-8 + let hash = XxHash32::oneshot(i as u32, key.as_bytes()); + let bucket = (hash as usize) % self.col_num; + self.sketch[i][bucket] += value; + } + } + + // Update the sketch and return the estimated frequency in one pass + fn update_with_query(&mut self, key: &str, value: f64) -> f64 { + let mut min_estimate = f64::MAX; + for i in 0..self.row_num { + // already UTF-8 + let hash = XxHash32::oneshot(i as u32, key.as_bytes()); + let bucket = (hash as usize) % self.col_num; + self.sketch[i][bucket] += value; + // Track minimum while updating + let estimate = self.sketch[i][bucket]; + if estimate < min_estimate { + min_estimate = estimate; + } + } + min_estimate + } +} + +// HeapItem: equality and ordering based on value only +#[derive(Serialize, Deserialize, Clone)] +struct HeapItem { + key: String, + value: f64, +} + +// Implement PartialEq based on value only (consistent with Ord) +impl PartialEq for HeapItem { + fn eq(&self, other: &Self) -> bool { + self.value == other.value + } +} + +// Implement Eq for HeapItem (required for BinaryHeap) +impl Eq for HeapItem {} + +// Implement Ord for HeapItem to create a min-heap (reverse ordering) +impl Ord for HeapItem { + fn cmp(&self, other: &Self) -> Ordering { + // Reverse ordering for min-heap (smallest values at top) + // Suitable for topk, but if you want to implement bottomk, you should use a max-heap + other.value.partial_cmp(&self.value).unwrap_or(Ordering::Equal) + } +} + +impl PartialOrd for HeapItem { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +struct CountMinSketchWithHeap { + sketch: CountMinSketch, + topk_heap: BinaryHeap, // Maintain as heap during processing + heap_size: usize, +} + +/// TODO: Make this serializable version more expanded as count_min_sketch_with_heap_accumulator.rs in QueryEngineRust +// Serializable version for output +#[derive(Serialize, Deserialize, Clone)] +struct CountMinSketchWithHeapSerialized { + sketch: CountMinSketch, + topk_heap: Vec, // Convert to Vec for serialization + heap_size: usize, +} + +impl CountMinSketchWithHeap { + fn new() -> Self { + CountMinSketchWithHeap { + sketch: CountMinSketch::new(), + topk_heap: BinaryHeap::new(), + heap_size: HEAP_SIZE, + } + } + + // Update the sketch and maintain the top-k heap + fn update_with_topk(&mut self, key: &str, value: f64) { + // Update the Count-Min Sketch and get the estimated frequency in one pass + let estimated_freq = self.sketch.update_with_query(key, value); + + // Check if the key already exists in the heap + // TODO: This takes O(k) time, can we do better? + // Or is different keys guaranteed and we can just omit this check? + // Or can we optimize this with a HashMap? + let existing_item = self.topk_heap.iter().find(|item| item.key == key).cloned(); + + if let Some(existing) = existing_item { + // Remove the old entry and add updated one + self.topk_heap.retain(|item| item.key != key); // retain others = remove this one + self.topk_heap.push(HeapItem { + key: key.to_string(), + value: estimated_freq, + }); + } else { + // New key: add to heap + if self.topk_heap.len() < self.heap_size { + // Heap not full, just add + self.topk_heap.push(HeapItem { + key: key.to_string(), + value: estimated_freq, + }); + } else { + // Heap is full, check if this item should replace the minimum + // Peeking is cheap. No worries. + if let Some(min_item) = self.topk_heap.peek() { + if estimated_freq > min_item.value { + self.topk_heap.pop(); // Remove minimum + self.topk_heap.push(HeapItem { + key: key.to_string(), + value: estimated_freq, + }); + } + } + } + } + } + + // Convert to serializable format + fn to_serializable(self) -> CountMinSketchWithHeapSerialized { + CountMinSketchWithHeapSerialized { + sketch: self.sketch, + topk_heap: self.topk_heap.into_iter().collect(), + heap_size: self.heap_size, + } + } +} + +#[udf] +fn countminsketchwithheap_topk(keys: Vec<&str>, values: Vec) -> Option> { + // Check that keys and values have equal length + if keys.len() != values.len() { + return None; + } + + // Create a new Count-Min Sketch with Heap + let mut cms_with_heap = CountMinSketchWithHeap::new(); + + // Iterate through the keys and values and update the sketch and heap + for (i, &key) in keys.iter().enumerate() { + cms_with_heap.update_with_topk(key, values[i]); + } + + // Convert to serializable format (heap to vec conversion happens only once here) + let serializable = cms_with_heap.to_serializable(); + + let mut buf = Vec::new(); + serializable + .serialize(&mut Serializer::new(&mut buf)) + .ok()?; + Some(buf) +} diff --git a/ArroyoSketch/templates/udfs/datasketcheskll_.rs.j2 b/ArroyoSketch/templates/udfs/datasketcheskll_.rs.j2 new file mode 100644 index 0000000..ca34027 --- /dev/null +++ b/ArroyoSketch/templates/udfs/datasketcheskll_.rs.j2 @@ -0,0 +1,59 @@ +/* +[dependencies] +dsrs = { git = "https://github.com/SketchDB/datasketches-rs" } +arroyo-udf-plugin = "0.1" +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +*/ + +use arroyo_udf_plugin::udf; +use dsrs::KllDoubleSketch; +use rmp_serde::Serializer; +use serde::{Deserialize, Serialize}; + +const DEFAULT_K: u16 = {{ k }}; + +#[derive(Serialize, Deserialize)] +struct KllSketchData { + k: u16, + sketch_bytes: Vec, +} + +struct KllSketchWrapper { + k: u16, + sketch: KllDoubleSketch, +} + +impl KllSketchWrapper { + fn new(k: u16) -> Self { + KllSketchWrapper { + k, + sketch: KllDoubleSketch::with_k(k), + } + } + + fn update(&mut self, values: &[f64]) { + for &value in values { + self.sketch.update(value); + } + } + + fn serialize_bytes(&self) -> Vec { + let sketch_data = self.sketch.serialize(); + let serialized = KllSketchData { + k: self.k, + sketch_bytes: sketch_data.as_ref().to_vec(), + }; + let mut buf = Vec::new(); + rmp_serde::encode::write(&mut buf, &serialized).unwrap(); + buf + } +} + +#[udf] +fn datasketcheskll_(values: Vec) -> Option> { + let mut kll_wrapper = KllSketchWrapper::new(DEFAULT_K); + kll_wrapper.update(&values); + + Some(kll_wrapper.serialize_bytes()) +} diff --git a/ArroyoSketch/templates/udfs/deltasetaggregator_.rs.j2 b/ArroyoSketch/templates/udfs/deltasetaggregator_.rs.j2 new file mode 100644 index 0000000..7a70a86 --- /dev/null +++ b/ArroyoSketch/templates/udfs/deltasetaggregator_.rs.j2 @@ -0,0 +1,62 @@ +/* +[dependencies] +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +lazy_static = "1.4" +*/ + +use arroyo_udf_plugin::udf; +use rmp_serde::Serializer; +use serde::{Deserialize, Serialize}; +use std::collections::HashSet; +use std::sync::Mutex; +use lazy_static::lazy_static; + +#[derive(Serialize, Deserialize, Clone)] +struct DeltaResult { + added: HashSet, + removed: HashSet, +} + +// Global state to store the previous window's labels for aggregation {{aggregation_id}} +lazy_static! { + static ref PREVIOUS_LABELS_{{aggregation_id}}: Mutex> = Mutex::new(HashSet::new()); +} + +// Stateful UDAF that tracks label changes between windows +// This takes all concatenated label strings for a window and computes delta vs previous window +#[udf] +fn deltasetaggregator_{{aggregation_id}}_(concatenated_labels: Vec<&str>) -> Option> { + // Return None if input is empty + if concatenated_labels.is_empty() { + return None; + } + + // Collect current window's unique label combinations + let mut current_labels = HashSet::new(); + for label_string in concatenated_labels { + current_labels.insert(label_string.to_string()); + } + + // Get previous window's labels and update state + let mut previous_labels_guard = PREVIOUS_LABELS_{{aggregation_id}}.lock().unwrap(); + let previous_labels = previous_labels_guard.clone(); + + // Calculate differences + let added: HashSet = current_labels.difference(&previous_labels).cloned().collect(); + let removed: HashSet = previous_labels.difference(¤t_labels).cloned().collect(); + + // Update state for next window + *previous_labels_guard = current_labels; + + // Create delta result + let delta_result = DeltaResult { + added, + removed, + }; + + // Serialize result + let mut buf = Vec::new(); + delta_result.serialize(&mut Serializer::new(&mut buf)).ok()?; + Some(buf) +} diff --git a/ArroyoSketch/templates/udfs/gzip_compress.rs b/ArroyoSketch/templates/udfs/gzip_compress.rs new file mode 100644 index 0000000..1977723 --- /dev/null +++ b/ArroyoSketch/templates/udfs/gzip_compress.rs @@ -0,0 +1,16 @@ +/* +[dependencies] +flate2 = "1.1.1" +*/ + +use arroyo_udf_plugin::udf; +use std::io::Write; +use flate2::{Compression, write::GzEncoder}; + +#[udf] +fn gzip_compress(data: &[u8]) -> Option> { + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + + encoder.write_all(&data).ok()?; + encoder.finish().ok() +} diff --git a/ArroyoSketch/templates/udfs/hydrakll_.rs.j2 b/ArroyoSketch/templates/udfs/hydrakll_.rs.j2 new file mode 100644 index 0000000..b9be3cb --- /dev/null +++ b/ArroyoSketch/templates/udfs/hydrakll_.rs.j2 @@ -0,0 +1,83 @@ +/* +[dependencies] +dsrs = { git = "https://github.com/SketchDB/datasketches-rs" } +arroyo-udf-plugin = "0.1" +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +xxhash-rust = { version = "0.8", features = ["xxh32"] } +*/ + +use arroyo_udf_plugin::udf; +use dsrs::KllDoubleSketch; +use rmp_serde::Serializer; +use serde::{Deserialize, Serialize}; +use xxhash_rust::xxh32::xxh32; + +const ROW_NUM: usize = {{ row_num }}; +const COL_NUM: usize = {{ col_num }}; +const DEFAULT_K: u16 = {{ k }}; + +// Match QueryEngineRust format exactly +#[derive(Deserialize, Serialize)] +struct KllSketchData { + k: u16, + sketch_bytes: Vec, +} + +#[derive(Serialize, Deserialize)] +struct HydraKllSketchData { + row_num: usize, + col_num: usize, + sketches: Vec>, +} + +#[udf] +fn hydrakll_(keys: Vec<&str>, values: Vec) -> Option> { + // Initialize 2D matrix of KLL sketches + let mut sketches: Vec> = vec![ + vec![KllDoubleSketch::with_k(DEFAULT_K); COL_NUM]; + ROW_NUM + ]; + + // Process each key-value pair + for (i, &key) in keys.iter().enumerate() { + if i >= values.len() { + break; + } + + let key_bytes = key.as_bytes(); + + // Update each row using different hash functions + for row in 0..ROW_NUM { + let hash_value = xxh32(key_bytes, row as u32); + let col_index = (hash_value as usize) % COL_NUM; + sketches[row][col_index].update(values[i]); + } + } + + // Serialize to match QueryEngineRust format + let sketch_data: Vec> = sketches + .iter() + .map(|row| { + row.iter() + .map(|sketch| { + let sketch_bytes = sketch.serialize(); + KllSketchData { + k: DEFAULT_K, + sketch_bytes: sketch_bytes.as_ref().to_vec(), + } + }) + .collect() + }) + .collect(); + + let hydra_data = HydraKllSketchData { + row_num: ROW_NUM, + col_num: COL_NUM, + sketches: sketch_data, + }; + + let mut buf = Vec::new(); + hydra_data.serialize(&mut Serializer::new(&mut buf)).ok()?; + Some(buf) +} diff --git a/ArroyoSketch/templates/udfs/multipleincrease_.rs b/ArroyoSketch/templates/udfs/multipleincrease_.rs new file mode 100644 index 0000000..3ba2b42 --- /dev/null +++ b/ArroyoSketch/templates/udfs/multipleincrease_.rs @@ -0,0 +1,55 @@ +/* +[dependencies] +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +*/ + +use arroyo_udf_plugin::udf; +use rmp_serde::Serializer; +use serde::Serialize; +use std::collections::HashMap; + +#[derive(Serialize)] +struct MeasurementData { + starting_measurement: f64, + starting_timestamp: i64, + last_seen_measurement: f64, + last_seen_timestamp: i64, +} + +#[udf] +fn multipleincrease_(keys: Vec<&str>, values: Vec, timestamps: Vec) -> Option> { + // Create a new hashmap to store measurement data with timestamps + let mut per_key_storage: HashMap = HashMap::new(); + + // Iterate through the keys, values, and timestamps + for (i, &key) in keys.iter().enumerate() { + if i < values.len() && i < timestamps.len() { + let value = values[i]; + let timestamp = timestamps[i]; + + let entry = per_key_storage.entry(key.to_string()).or_insert(MeasurementData { + starting_measurement: value, + starting_timestamp: timestamp, + last_seen_measurement: value, + last_seen_timestamp: timestamp, + }); + + // Update last seen measurement and timestamp + entry.last_seen_measurement = value; + entry.last_seen_timestamp = timestamp; + + // If this timestamp is earlier than our current starting timestamp, update starting values + //if timestamp < entry.starting_timestamp { + // entry.starting_measurement = value; + // entry.starting_timestamp = timestamp; + //} + } + } + + let mut buf = Vec::new(); + per_key_storage + .serialize(&mut Serializer::new(&mut buf)) + .ok()?; + Some(buf) +} diff --git a/ArroyoSketch/templates/udfs/multipleminmax_max.rs b/ArroyoSketch/templates/udfs/multipleminmax_max.rs new file mode 100644 index 0000000..a636208 --- /dev/null +++ b/ArroyoSketch/templates/udfs/multipleminmax_max.rs @@ -0,0 +1,33 @@ +/* +[dependencies] +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +*/ + +use arroyo_udf_plugin::udf; +use rmp_serde::Serializer; +use serde::Serialize; +use std::collections::HashMap; + +#[udf] +fn multipleminmax_max(keys: Vec<&str>, values: Vec) -> Option> { + // Create a new hashmap + let mut per_key_storage: HashMap = HashMap::new(); + + // Iterate through the keys and values + for (i, &key) in keys.iter().enumerate() { + if i < values.len() { + // If the key is not present or the value is less than the current stored value, update it + per_key_storage + .entry(key.to_string()) + .and_modify(|v| *v = (*v).max(values[i])) + .or_insert(values[i]); + } + } + + let mut buf = Vec::new(); + per_key_storage + .serialize(&mut Serializer::new(&mut buf)) + .ok()?; + Some(buf) +} diff --git a/ArroyoSketch/templates/udfs/multipleminmax_min.rs b/ArroyoSketch/templates/udfs/multipleminmax_min.rs new file mode 100644 index 0000000..a44268c --- /dev/null +++ b/ArroyoSketch/templates/udfs/multipleminmax_min.rs @@ -0,0 +1,33 @@ +/* +[dependencies] +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +*/ + +use arroyo_udf_plugin::udf; +use rmp_serde::Serializer; +use serde::Serialize; +use std::collections::HashMap; + +#[udf] +fn multipleminmax_min(keys: Vec<&str>, values: Vec) -> Option> { + // Create a new hashmap + let mut per_key_storage: HashMap = HashMap::new(); + + // Iterate through the keys and values + for (i, &key) in keys.iter().enumerate() { + if i < values.len() { + // If the key is not present or the value is less than the current stored value, update it + per_key_storage + .entry(key.to_string()) + .and_modify(|v| *v = (*v).min(values[i])) + .or_insert(values[i]); + } + } + + let mut buf = Vec::new(); + per_key_storage + .serialize(&mut Serializer::new(&mut buf)) + .ok()?; + Some(buf) +} diff --git a/ArroyoSketch/templates/udfs/multiplesum_count.rs b/ArroyoSketch/templates/udfs/multiplesum_count.rs new file mode 100644 index 0000000..67a8691 --- /dev/null +++ b/ArroyoSketch/templates/udfs/multiplesum_count.rs @@ -0,0 +1,27 @@ +/* +[dependencies] +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +*/ + +use arroyo_udf_plugin::udf; +use rmp_serde::Serializer; +use serde::Serialize; +use std::collections::HashMap; + +#[udf] +fn multiplesum_count(keys: Vec<&str>, values: Vec) -> Option> { + // Create a new hashmap to store the count for each key + let mut key_sums: HashMap = HashMap::new(); + + // Iterate through the keys and values + for (i, &key) in keys.iter().enumerate() { + if i < values.len() { + *key_sums.entry(key.to_string()).or_insert(0.0) += 1.0; + } + } + + let mut buf = Vec::new(); + key_sums.serialize(&mut Serializer::new(&mut buf)).ok()?; + Some(buf) +} diff --git a/ArroyoSketch/templates/udfs/multiplesum_sum.rs b/ArroyoSketch/templates/udfs/multiplesum_sum.rs new file mode 100644 index 0000000..3615105 --- /dev/null +++ b/ArroyoSketch/templates/udfs/multiplesum_sum.rs @@ -0,0 +1,27 @@ +/* +[dependencies] +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +*/ + +use arroyo_udf_plugin::udf; +use rmp_serde::Serializer; +use serde::Serialize; +use std::collections::HashMap; + +#[udf] +fn multiplesum_sum(keys: Vec<&str>, values: Vec) -> Option> { + // Create a new hashmap to store the sum of values for each key + let mut key_sums: HashMap = HashMap::new(); + + // Iterate through the keys and values and update the sum for each key + for (i, &key) in keys.iter().enumerate() { + if i < values.len() { + *key_sums.entry(key.to_string()).or_insert(0.0) += values[i]; + } + } + + let mut buf = Vec::new(); + key_sums.serialize(&mut Serializer::new(&mut buf)).ok()?; + Some(buf) +} diff --git a/ArroyoSketch/templates/udfs/setaggregator_.rs b/ArroyoSketch/templates/udfs/setaggregator_.rs new file mode 100644 index 0000000..9ab897a --- /dev/null +++ b/ArroyoSketch/templates/udfs/setaggregator_.rs @@ -0,0 +1,38 @@ +/* +[dependencies] +rmp-serde = "1.1" +serde = { version = "1.0", features = ["derive"] } +*/ + +use arroyo_udf_plugin::udf; +use rmp_serde::Serializer; +use serde::{Deserialize, Serialize}; +use std::collections::HashSet; + +#[derive(Serialize, Deserialize)] +struct StringSet { + values: HashSet, +} + +#[udf] +fn setaggregator_(strings: Vec<&str>) -> Option> { + // Return None if input is empty + if strings.is_empty() { + return None; + } + + // Create a HashSet and collect all unique strings + let mut unique_strings = HashSet::new(); + for s in strings { + unique_strings.insert(s.to_string()); + } + + // Wrap in a serializable struct + let string_set = StringSet { + values: unique_strings, + }; + + let mut buf = Vec::new(); + string_set.serialize(&mut Serializer::new(&mut buf)).ok()?; + Some(buf) +} diff --git a/ArroyoSketch/tests/__init__.py b/ArroyoSketch/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ArroyoSketch/tests/test_integration.py b/ArroyoSketch/tests/test_integration.py new file mode 100644 index 0000000..335ddeb --- /dev/null +++ b/ArroyoSketch/tests/test_integration.py @@ -0,0 +1,435 @@ +""" +Integration tests for SQL schema support in ArroyoSketch. + +Tests cover: +1. Helper functions (build_sql_json_schema, get_source_table_name_sql) +2. get_sql_query with SQL mode +3. End-to-end config parsing +""" + +import pytest +import sys +import os +import yaml + +# Add parent directory to path for imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from promql_utilities.streaming_config.SQLTableConfig import ( # noqa: E402 + SQLTableConfig, + TableSchema, +) +from promql_utilities.streaming_config.StreamingAggregationConfig import ( # noqa: E402 + StreamingAggregationConfig, +) +from promql_utilities.streaming_config.MetricConfig import MetricConfig # noqa: E402 +from run_arroyosketch import ( # noqa: E402 + build_sql_json_schema, + get_source_table_name_sql, + get_sql_query, +) +from utils import jinja_utils # noqa: E402 + + +class TestBuildSqlJsonSchema: + """Tests for build_sql_json_schema helper function.""" + + def test_basic_schema(self): + """Test building JSON schema for a simple table.""" + table_schema = TableSchema( + time_column="timestamp", + value_columns=["cpu_usage"], + metadata_columns=["host"], + ) + + json_schema = build_sql_json_schema(table_schema) + + assert json_schema["type"] == "object" + assert "timestamp" in json_schema["required"] + assert "cpu_usage" in json_schema["required"] + assert "host" in json_schema["required"] + assert json_schema["properties"]["timestamp"]["type"] == "string" + assert json_schema["properties"]["timestamp"]["format"] == "date-time" + assert json_schema["properties"]["cpu_usage"]["type"] == "number" + assert json_schema["properties"]["host"]["type"] == "string" + + def test_multiple_value_columns(self): + """Test building JSON schema with multiple value columns.""" + table_schema = TableSchema( + time_column="ts", + value_columns=["val1", "val2", "val3"], + metadata_columns=["label"], + ) + + json_schema = build_sql_json_schema(table_schema) + + assert "val1" in json_schema["required"] + assert "val2" in json_schema["required"] + assert "val3" in json_schema["required"] + assert json_schema["properties"]["val1"]["type"] == "number" + assert json_schema["properties"]["val2"]["type"] == "number" + assert json_schema["properties"]["val3"]["type"] == "number" + + def test_multiple_metadata_columns(self): + """Test building JSON schema with multiple metadata columns.""" + table_schema = TableSchema( + time_column="ts", + value_columns=["val"], + metadata_columns=["host", "region", "datacenter"], + ) + + json_schema = build_sql_json_schema(table_schema) + + assert "host" in json_schema["required"] + assert "region" in json_schema["required"] + assert "datacenter" in json_schema["required"] + for col in ["host", "region", "datacenter"]: + assert json_schema["properties"][col]["type"] == "string" + + def test_additional_properties_false(self): + """Test that additionalProperties is set to False.""" + table_schema = TableSchema( + time_column="ts", + value_columns=["val"], + metadata_columns=["label"], + ) + + json_schema = build_sql_json_schema(table_schema) + assert json_schema["additionalProperties"] is False + + +class TestGetSourceTableNameSql: + """Tests for get_source_table_name_sql helper function.""" + + def test_kafka_source(self): + """Test source table name generation for Kafka.""" + + class MockArgs: + source_type = "kafka" + input_kafka_topic = "test_topic" + + args = MockArgs() + result = get_source_table_name_sql(args, "my_table") + assert result == "test_topic_my_table" + + def test_kafka_source_with_spaces(self): + """Test source table name generation with spaces in table name.""" + + class MockArgs: + source_type = "kafka" + input_kafka_topic = "test_topic" + + args = MockArgs() + result = get_source_table_name_sql(args, "my table name") + assert result == "test_topic_my_table_name" + + def test_file_source(self): + """Test source table name generation for file source.""" + + class MockArgs: + source_type = "file" + input_file_path = "/data/metrics.parquet" + + args = MockArgs() + result = get_source_table_name_sql(args, "my_table") + assert result == "metrics_my_table" + + def test_file_source_with_spaces(self): + """Test source table name generation for file source with spaces in table name.""" + + class MockArgs: + source_type = "file" + input_file_path = "/data/metrics.parquet" + + args = MockArgs() + result = get_source_table_name_sql(args, "my table name") + assert result == "metrics_my_table_name" + + def test_unsupported_source_type(self): + """Test that unsupported source types raise ValueError.""" + + class MockArgs: + source_type = "prometheus_remote_write" + input_kafka_topic = "test_topic" + + args = MockArgs() + with pytest.raises(ValueError, match="Unsupported source type for SQL mode"): + get_source_table_name_sql(args, "my_table") + + +class TestGetSqlQuerySQL: + """Tests for get_sql_query with SQL mode.""" + + @pytest.fixture + def sql_schema_config(self): + """Create a sample SQL schema config.""" + return SQLTableConfig( + { + "tables": [ + { + "name": "cpu_metrics", + "time_column": "event_time", + "value_columns": ["cpu_usage", "cpu_system"], + "metadata_columns": ["host", "region", "service"], + } + ] + } + ) + + @pytest.fixture + def sql_agg_config(self): + """Create a sample SQL aggregation config.""" + return StreamingAggregationConfig.from_dict( + { + "aggregationId": 1, + "aggregationType": "MultipleSum", + "aggregationSubType": "sum", + "table_name": "cpu_metrics", + "value_column": "cpu_usage", + "labels": { + "grouping": ["host", "region"], + "aggregated": ["service"], + "rollup": [], + }, + "parameters": {}, + "spatialFilter": "", + "tumblingWindowSize": 10, + } + ) + + @pytest.fixture + def sql_template(self): + """Load the SQL template.""" + template_dir = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "templates", + "sql", + ) + return jinja_utils.load_template(template_dir, "single_windowed_aggregation.j2") + + def test_sql_query_uses_value_column( + self, sql_schema_config, sql_agg_config, sql_template + ): + """Test that SQL query uses the correct value_column.""" + sql_agg_config.aggregationType = "multiplesum" + sql_agg_config.aggregationSubType = "sum" + + sql_query, agg_function, params = get_sql_query( + streaming_aggregation_config=sql_agg_config, + schema_config=sql_schema_config, + query_language="sql", + sql_template=sql_template, + source_table="test_source", + sink_table="test_sink", + source_type="kafka", + use_nested_labels=False, + ) + + assert "cpu_usage" in sql_query + assert "value" not in sql_query or "cpu_usage" in sql_query + + def test_sql_query_no_label_prefix( + self, sql_schema_config, sql_agg_config, sql_template + ): + """Test that SQL mode doesn't use labels. prefix.""" + sql_agg_config.aggregationType = "multiplesum" + sql_agg_config.aggregationSubType = "sum" + + sql_query, _, _ = get_sql_query( + streaming_aggregation_config=sql_agg_config, + schema_config=sql_schema_config, + query_language="sql", + sql_template=sql_template, + source_table="test_source", + sink_table="test_sink", + source_type="kafka", + use_nested_labels=False, + ) + + # Should have flat column names, not labels.host + assert "labels.host" not in sql_query + assert "labels.region" not in sql_query + # Should have host and region directly + assert "host" in sql_query + assert "region" in sql_query + + +class TestGetSqlQueryPromQL: + """Tests for get_sql_query with PromQL mode (backward compatibility).""" + + @pytest.fixture + def promql_metric_config(self): + """Create a sample PromQL metric config.""" + return MetricConfig({"fake_metric": ["instance", "job", "label_0"]}) + + @pytest.fixture + def promql_agg_config(self): + """Create a sample PromQL aggregation config.""" + return StreamingAggregationConfig.from_dict( + { + "aggregationId": 1, + "aggregationType": "MultipleSum", + "aggregationSubType": "sum", + "metric": "fake_metric", + "labels": { + "grouping": ["instance", "job"], + "aggregated": ["label_0"], + "rollup": [], + }, + "parameters": {}, + "spatialFilter": "", + "tumblingWindowSize": 10, + } + ) + + @pytest.fixture + def sql_template(self): + """Load the SQL template.""" + template_dir = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "templates", + "sql", + ) + return jinja_utils.load_template(template_dir, "single_windowed_aggregation.j2") + + def test_promql_query_uses_value( + self, promql_metric_config, promql_agg_config, sql_template + ): + """Test that PromQL query uses 'value' column.""" + promql_agg_config.aggregationType = "multiplesum" + promql_agg_config.aggregationSubType = "sum" + + sql_query, _, _ = get_sql_query( + streaming_aggregation_config=promql_agg_config, + schema_config=promql_metric_config, + query_language="promql", + sql_template=sql_template, + source_table="test_source", + sink_table="test_sink", + source_type="kafka", + use_nested_labels=True, + ) + + assert "value" in sql_query + + def test_promql_query_uses_label_prefix( + self, promql_metric_config, promql_agg_config, sql_template + ): + """Test that PromQL query uses labels. prefix when nested.""" + promql_agg_config.aggregationType = "multiplesum" + promql_agg_config.aggregationSubType = "sum" + + sql_query, _, _ = get_sql_query( + streaming_aggregation_config=promql_agg_config, + schema_config=promql_metric_config, + query_language="promql", + sql_template=sql_template, + source_table="test_source", + sink_table="test_sink", + source_type="kafka", + use_nested_labels=True, + ) + + assert "labels.instance" in sql_query + assert "labels.job" in sql_query + + +class TestEndToEndConfigParsing: + """End-to-end tests for config file parsing.""" + + def test_parse_sql_config_file(self): + """Test parsing a complete SQL config file.""" + config_content = """ +query_language: sql + +tables: + - name: system_metrics + time_column: event_time + value_columns: + - cpu_percent + - memory_mb + metadata_columns: + - hostname + - datacenter + - service + +aggregations: + - aggregationId: 1 + table_name: system_metrics + value_column: cpu_percent + aggregationType: MultipleSum + aggregationSubType: sum + labels: + grouping: + - hostname + - datacenter + aggregated: + - service + rollup: [] + parameters: {} + spatialFilter: '' + tumblingWindowSize: 10 +""" + config = yaml.safe_load(config_content) + + assert config["query_language"] == "sql" + + schema_config = SQLTableConfig(config) + assert "system_metrics" in schema_config.config + + agg_configs = [ + StreamingAggregationConfig.from_dict(agg) for agg in config["aggregations"] + ] + assert len(agg_configs) == 1 + assert agg_configs[0].table_name == "system_metrics" + assert agg_configs[0].value_column == "cpu_percent" + + # Validate should pass + agg_configs[0].validate(schema_config, query_language="sql") + + def test_parse_promql_config_file(self): + """Test parsing a complete PromQL config file (backward compatibility).""" + config_content = """ +aggregations: + - aggregationId: 1 + aggregationSubType: sum + aggregationType: MultipleSum + labels: + aggregated: + - label_0 + grouping: + - instance + - job + rollup: [] + metric: fake_metric_total + parameters: {} + spatialFilter: '' + tumblingWindowSize: 10 + +metrics: + fake_metric_total: + - instance + - job + - label_0 +""" + config = yaml.safe_load(config_content) + + # No query_language means default to promql + query_language = config.get("query_language", "promql") + assert query_language == "promql" + + metric_config = MetricConfig(config["metrics"]) + assert "fake_metric_total" in metric_config.config + + agg_configs = [ + StreamingAggregationConfig.from_dict(agg) for agg in config["aggregations"] + ] + assert len(agg_configs) == 1 + assert agg_configs[0].metric == "fake_metric_total" + + # Validate should pass + agg_configs[0].validate(metric_config, query_language="promql") + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/ArroyoSketch/tests/test_sql_schema.py b/ArroyoSketch/tests/test_sql_schema.py new file mode 100644 index 0000000..5924ddf --- /dev/null +++ b/ArroyoSketch/tests/test_sql_schema.py @@ -0,0 +1,463 @@ +""" +Unit tests for SQL schema support in ArroyoSketch. + +Tests cover: +1. SQLTableConfig parsing and validation +2. StreamingAggregationConfig SQL mode support +3. Backward compatibility with PromQL configs +""" + +import pytest +import sys +import os + +# Add parent directory to path for imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from promql_utilities.streaming_config.SQLTableConfig import ( # noqa: E402 + SQLTableConfig, + TableSchema, +) +from promql_utilities.streaming_config.StreamingAggregationConfig import ( # noqa: E402 + StreamingAggregationConfig, +) +from promql_utilities.streaming_config.MetricConfig import MetricConfig # noqa: E402 + + +class TestSQLTableConfig: + """Tests for SQLTableConfig class.""" + + def test_parse_single_table(self): + """Test parsing a single table from sql_schema.""" + yaml_dict = { + "tables": [ + { + "name": "cpu_metrics", + "time_column": "timestamp", + "value_columns": ["cpu_usage", "cpu_system"], + "metadata_columns": ["host", "region"], + } + ] + } + + config = SQLTableConfig(yaml_dict) + + assert "cpu_metrics" in config.config + table = config.get_table("cpu_metrics") + assert table.time_column == "timestamp" + assert table.value_columns == ["cpu_usage", "cpu_system"] + assert table.metadata_columns == ["host", "region"] + + def test_parse_multiple_tables(self): + """Test parsing multiple tables from sql_schema.""" + yaml_dict = { + "tables": [ + { + "name": "cpu_metrics", + "time_column": "ts", + "value_columns": ["cpu_usage"], + "metadata_columns": ["host"], + }, + { + "name": "memory_metrics", + "time_column": "event_time", + "value_columns": ["memory_used", "memory_free"], + "metadata_columns": ["host", "datacenter"], + }, + ] + } + + config = SQLTableConfig(yaml_dict) + + assert len(config.config) == 2 + assert "cpu_metrics" in config.config + assert "memory_metrics" in config.config + + cpu_table = config.get_table("cpu_metrics") + assert cpu_table.time_column == "ts" + + mem_table = config.get_table("memory_metrics") + assert mem_table.time_column == "event_time" + assert mem_table.value_columns == ["memory_used", "memory_free"] + + def test_get_time_column(self): + """Test get_time_column helper method.""" + yaml_dict = { + "tables": [ + { + "name": "test_table", + "time_column": "custom_timestamp", + "value_columns": ["val"], + "metadata_columns": ["label"], + } + ] + } + + config = SQLTableConfig(yaml_dict) + assert config.get_time_column("test_table") == "custom_timestamp" + + def test_get_metadata_columns(self): + """Test get_metadata_columns helper method.""" + yaml_dict = { + "tables": [ + { + "name": "test_table", + "time_column": "ts", + "value_columns": ["val"], + "metadata_columns": ["host", "region", "cluster"], + } + ] + } + + config = SQLTableConfig(yaml_dict) + assert config.get_metadata_columns("test_table") == [ + "host", + "region", + "cluster", + ] + + def test_get_nonexistent_table(self): + """Test that get_table returns None for nonexistent table.""" + yaml_dict = {"tables": []} + config = SQLTableConfig(yaml_dict) + assert config.get_table("nonexistent") is None + + def test_empty_tables_list(self): + """Test parsing with empty tables list.""" + yaml_dict = {"tables": []} + config = SQLTableConfig(yaml_dict) + assert len(config.config) == 0 + + def test_missing_tables_key(self): + """Test parsing with missing tables key.""" + yaml_dict = {} + config = SQLTableConfig(yaml_dict) + assert len(config.config) == 0 + + +class TestStreamingAggregationConfigSQL: + """Tests for StreamingAggregationConfig SQL mode support.""" + + def test_parse_sql_aggregation_config(self): + """Test parsing aggregation config with SQL fields.""" + config_dict = { + "aggregationId": 1, + "aggregationType": "MultipleSum", + "aggregationSubType": "sum", + "table_name": "cpu_metrics", + "value_column": "cpu_usage", + "labels": { + "grouping": ["host", "region"], + "aggregated": ["cluster"], + "rollup": [], + }, + "parameters": {}, + "spatialFilter": "", + "tumblingWindowSize": 10, + } + + agg_config = StreamingAggregationConfig.from_dict(config_dict) + + assert agg_config.table_name == "cpu_metrics" + assert agg_config.value_column == "cpu_usage" + assert agg_config.metric is None + + def test_default_value_column(self): + """Test that value_column defaults to 'value' if not specified.""" + config_dict = { + "aggregationId": 1, + "aggregationType": "MultipleSum", + "aggregationSubType": "sum", + "table_name": "cpu_metrics", + "labels": { + "grouping": [], + "aggregated": ["host"], + "rollup": [], + }, + "parameters": {}, + "spatialFilter": "", + "tumblingWindowSize": 10, + } + + agg_config = StreamingAggregationConfig.from_dict(config_dict) + assert agg_config.value_column == "value" + + def test_get_source_identifier_sql(self): + """Test get_source_identifier returns table_name for SQL mode.""" + config_dict = { + "aggregationId": 1, + "aggregationType": "MultipleSum", + "aggregationSubType": "sum", + "table_name": "my_table", + "value_column": "val", + "labels": { + "grouping": [], + "aggregated": ["label"], + "rollup": [], + }, + "parameters": {}, + "spatialFilter": "", + "tumblingWindowSize": 10, + } + + agg_config = StreamingAggregationConfig.from_dict(config_dict) + assert agg_config.get_source_identifier() == "my_table" + + def test_validate_sql_mode_success(self): + """Test successful validation in SQL mode.""" + config = { + "tables": [ + { + "name": "cpu_metrics", + "time_column": "ts", + "value_columns": ["cpu_usage", "cpu_system"], + "metadata_columns": ["host", "region"], + } + ] + } + schema_config = SQLTableConfig(config) + + config_dict = { + "aggregationId": 1, + "aggregationType": "MultipleSum", + "aggregationSubType": "sum", + "table_name": "cpu_metrics", + "value_column": "cpu_usage", + "labels": { + "grouping": ["host"], + "aggregated": ["region"], + "rollup": [], + }, + "parameters": {}, + "spatialFilter": "", + "tumblingWindowSize": 10, + } + + agg_config = StreamingAggregationConfig.from_dict(config_dict) + # Should not raise + agg_config.validate(schema_config, query_language="sql") + + def test_validate_sql_mode_missing_table(self): + """Test validation fails when table doesn't exist.""" + config = {"tables": []} + schema_config = SQLTableConfig(config) + + config_dict = { + "aggregationId": 1, + "aggregationType": "MultipleSum", + "aggregationSubType": "sum", + "table_name": "nonexistent_table", + "value_column": "val", + "labels": { + "grouping": [], + "aggregated": ["label"], + "rollup": [], + }, + "parameters": {}, + "spatialFilter": "", + "tumblingWindowSize": 10, + } + + agg_config = StreamingAggregationConfig.from_dict(config_dict) + + with pytest.raises(ValueError, match="not found in sql_schema"): + agg_config.validate(schema_config, query_language="sql") + + def test_validate_sql_mode_invalid_value_column(self): + """Test validation fails when value_column doesn't exist in table.""" + config = { + "tables": [ + { + "name": "cpu_metrics", + "time_column": "ts", + "value_columns": ["cpu_usage"], + "metadata_columns": ["host"], + } + ] + } + schema_config = SQLTableConfig(config) + + config_dict = { + "aggregationId": 1, + "aggregationType": "MultipleSum", + "aggregationSubType": "sum", + "table_name": "cpu_metrics", + "value_column": "nonexistent_column", + "labels": { + "grouping": [], + "aggregated": ["host"], + "rollup": [], + }, + "parameters": {}, + "spatialFilter": "", + "tumblingWindowSize": 10, + } + + agg_config = StreamingAggregationConfig.from_dict(config_dict) + + with pytest.raises(ValueError, match="value_column.*not in table"): + agg_config.validate(schema_config, query_language="sql") + + def test_validate_sql_mode_mismatched_labels(self): + """Test validation fails when labels don't match metadata_columns.""" + config = { + "tables": [ + { + "name": "cpu_metrics", + "time_column": "ts", + "value_columns": ["cpu_usage"], + "metadata_columns": ["host", "region"], + } + ] + } + schema_config = SQLTableConfig(config) + + config_dict = { + "aggregationId": 1, + "aggregationType": "MultipleSum", + "aggregationSubType": "sum", + "table_name": "cpu_metrics", + "value_column": "cpu_usage", + "labels": { + "grouping": ["host"], + "aggregated": ["wrong_label"], # doesn't match metadata_columns + "rollup": [], + }, + "parameters": {}, + "spatialFilter": "", + "tumblingWindowSize": 10, + } + + agg_config = StreamingAggregationConfig.from_dict(config_dict) + + with pytest.raises(ValueError, match="Labels do not match metadata_columns"): + agg_config.validate(schema_config, query_language="sql") + + +class TestStreamingAggregationConfigPromQL: + """Tests for backward compatibility with PromQL configs.""" + + def test_parse_promql_aggregation_config(self): + """Test parsing aggregation config with PromQL fields.""" + config_dict = { + "aggregationId": 1, + "aggregationType": "MultipleSum", + "aggregationSubType": "sum", + "metric": "fake_metric_total", + "labels": { + "grouping": ["instance", "job"], + "aggregated": ["label_0"], + "rollup": [], + }, + "parameters": {}, + "spatialFilter": "", + "tumblingWindowSize": 10, + } + + agg_config = StreamingAggregationConfig.from_dict(config_dict) + + assert agg_config.metric == "fake_metric_total" + assert agg_config.table_name is None + + def test_get_source_identifier_promql(self): + """Test get_source_identifier returns metric for PromQL mode.""" + config_dict = { + "aggregationId": 1, + "aggregationType": "MultipleSum", + "aggregationSubType": "sum", + "metric": "my_metric", + "labels": { + "grouping": [], + "aggregated": ["label"], + "rollup": [], + }, + "parameters": {}, + "spatialFilter": "", + "tumblingWindowSize": 10, + } + + agg_config = StreamingAggregationConfig.from_dict(config_dict) + assert agg_config.get_source_identifier() == "my_metric" + + def test_validate_promql_mode_success(self): + """Test successful validation in PromQL mode.""" + metrics = {"fake_metric": ["host", "region"]} + metric_config = MetricConfig(metrics) + + config_dict = { + "aggregationId": 1, + "aggregationType": "MultipleSum", + "aggregationSubType": "sum", + "metric": "fake_metric", + "labels": { + "grouping": ["host"], + "aggregated": ["region"], + "rollup": [], + }, + "parameters": {}, + "spatialFilter": "", + "tumblingWindowSize": 10, + } + + agg_config = StreamingAggregationConfig.from_dict(config_dict) + # Should not raise + agg_config.validate(metric_config, query_language="promql") + + def test_validate_promql_mode_default(self): + """Test that query_language defaults to promql.""" + metrics = {"fake_metric": ["host"]} + metric_config = MetricConfig(metrics) + + config_dict = { + "aggregationId": 1, + "aggregationType": "MultipleSum", + "aggregationSubType": "sum", + "metric": "fake_metric", + "labels": { + "grouping": [], + "aggregated": ["host"], + "rollup": [], + }, + "parameters": {}, + "spatialFilter": "", + "tumblingWindowSize": 10, + } + + agg_config = StreamingAggregationConfig.from_dict(config_dict) + # Should not raise - defaults to promql + agg_config.validate(metric_config, query_language="promql") + + +class TestTableSchema: + """Tests for TableSchema dataclass.""" + + def test_table_schema_creation(self): + """Test creating a TableSchema.""" + schema = TableSchema( + time_column="ts", + value_columns=["val1", "val2"], + metadata_columns=["label1", "label2"], + ) + + assert schema.time_column == "ts" + assert schema.value_columns == ["val1", "val2"] + assert schema.metadata_columns == ["label1", "label2"] + + def test_table_schema_equality(self): + """Test TableSchema equality.""" + schema1 = TableSchema( + time_column="ts", + value_columns=["val"], + metadata_columns=["label"], + ) + schema2 = TableSchema( + time_column="ts", + value_columns=["val"], + metadata_columns=["label"], + ) + + assert schema1 == schema2 + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/ArroyoSketch/utils/arroyo_utils.py b/ArroyoSketch/utils/arroyo_utils.py new file mode 100644 index 0000000..2f33870 --- /dev/null +++ b/ArroyoSketch/utils/arroyo_utils.py @@ -0,0 +1,72 @@ +import json +import time +from typing import List + +from utils import http_utils + + +def get_all_pipelines(arroyo_url: str) -> List[str]: + # list all pipelines + response = http_utils.make_api_request( + url=f"{arroyo_url}/pipelines", + method="get", + ) + response = json.loads(response) + if response["data"] is None: + print("No pipelines found") + return [] + + pipeline_ids = [pipeline["id"] for pipeline in response["data"]] + return pipeline_ids + + +def stop_and_delete_pipelines( + arroyo_url: str, pipeline_ids: List[str], num_retries: int = 30 +): + # stop each pipeline + for pipeline_id in pipeline_ids: + response = http_utils.make_api_request( + url=f"{arroyo_url}/pipelines/{pipeline_id}", + method="patch", + data=json.dumps({"stop": "immediate"}), + ) + print("Sent stop request for pipeline:", pipeline_id) + + # for each pipeline, get status and verify that stop==immediate and actionInProgress==False + # for pipelines not satisfying this, retry N times with a delay, before raising an error + for pipeline_id in pipeline_ids: + for attempt in range(num_retries): + try: + response = http_utils.make_api_request( + url=f"{arroyo_url}/pipelines/{pipeline_id}", + method="get", + ) + print("Got status for pipeline:", pipeline_id) + + try: + data = json.loads(response) + print("data['stop']:", data["stop"], type(data["stop"])) + print( + "data['actionInProgress']:", + data["actionInProgress"], + type(data["actionInProgress"]), + ) + if data["stop"] == "immediate" and not data["actionInProgress"]: + break + except json.JSONDecodeError as e: + print("Failed to decode JSON response:", e) + pass + time.sleep(10) + except Exception as e: + if attempt < num_retries - 1: + continue + else: + raise e + + # delete each pipeline + for pipeline_id in pipeline_ids: + response = http_utils.make_api_request( + url=f"{arroyo_url}/pipelines/{pipeline_id}", + method="delete", + ) + print("Sent delete request for pipeline:", pipeline_id) diff --git a/ArroyoSketch/utils/http_utils.py b/ArroyoSketch/utils/http_utils.py new file mode 100644 index 0000000..08e08ce --- /dev/null +++ b/ArroyoSketch/utils/http_utils.py @@ -0,0 +1,74 @@ +import requests + + +def make_api_request(url, method, data=None): + """ + Make an API request to the Arroyo API. + + Args: + url (str): The URL to make the request to + method (str): The HTTP method (get or post) + data (str): The data to send with the request (for POST) + + Returns: + dict: The response JSON data + + Raises: + Exception: If the request fails + """ + headers = {"Content-Type": "application/json"} + + try: + if method.lower() == "post": + response = requests.post(url, headers=headers, data=data) + elif method.lower() == "get": + response = requests.get(url, headers=headers) + elif method.lower() == "delete": + response = requests.delete(url, headers=headers) + elif method.lower() == "patch": + response = requests.patch(url, headers=headers, data=data) + else: + raise ValueError(f"Unsupported HTTP method: {method}") + + try: + response.raise_for_status() + except requests.exceptions.HTTPError as e: + print( + f"HTTP Error {response.status_code}: {response.content.decode('utf-8')}" + ) + raise e + + response_data = response.content.decode("utf-8") + + return response_data + except Exception as e: + error_msg = f"Failed {method} request to URL: {url}" + print("Error details:", e) + print(error_msg) + raise Exception(error_msg) + + +def create_arroyo_resource(arroyo_url, endpoint, data, resource_type): + """ + Create a resource using the Arroyo API. + + Args: + arroyo_url (str): Base URL of the Arroyo API + endpoint (str): API endpoint (e.g., 'connection_profiles') + data (str): JSON data for the resource + + Returns: + dict: The response JSON data + """ + url = f"{arroyo_url.rstrip('/')}/{endpoint}" + try: + # print(f"Creating {resource_type} resource at {url}...\n") + # print(f"Data: {data}\n") + # input("Press Enter to continue...") + response_data = make_api_request(url=url, method="post", data=data) + except Exception as e: + error_msg = f"Failed to create {resource_type} resource: {e}" + print(error_msg) + raise Exception(error_msg) + + return response_data diff --git a/ArroyoSketch/utils/jinja_utils.py b/ArroyoSketch/utils/jinja_utils.py new file mode 100644 index 0000000..08f6aa8 --- /dev/null +++ b/ArroyoSketch/utils/jinja_utils.py @@ -0,0 +1,27 @@ +from jinja2 import Environment, FileSystemLoader, nodes + + +def load_template(template_dir, template_name): + """Load a template from the specified directory""" + env = Environment(loader=FileSystemLoader(template_dir)) + template = env.get_template(template_name) + return template + + +def get_template_variables(template_source, environment=None): + """ + Extract all template variables from a Jinja2 template source. + + Args: + template_source (str): The raw template source code + environment (Environment, optional): Jinja2 environment. If None, creates a default one. + + Returns: + set: Set of variable names found in the template + """ + if environment is None: + environment = Environment() + + ast = environment.parse(template_source) + template_vars = ast.find_all(nodes.Name) + return {var.name for var in template_vars if var.ctx == "load"} diff --git a/ArroyoSketch/validate_udfs.py b/ArroyoSketch/validate_udfs.py new file mode 100644 index 0000000..0e7b1bc --- /dev/null +++ b/ArroyoSketch/validate_udfs.py @@ -0,0 +1,115 @@ +import os +import json +import argparse +from typing import List + +import utils.http_utils as http_utils +import utils.jinja_utils as jinja_utils + + +def main(args): + if args.all_udfs and args.udfs: + raise ValueError( + "Cannot specify both --all_udfs and --udfs. Use one or the other." + ) + if not args.all_udfs and not args.udfs: + raise ValueError("You must specify either --all_udfs or --udfs.") + + udfs: List[str] = [] + if args.udfs: + udfs = args.udfs.strip().split(",") + udfs = [udf.strip() for udf in udfs if udf.strip()] + else: + udf_templates = os.listdir(os.path.join(args.template_dir, "udfs")) + udfs = [ + udf.split(".rs")[0] + for udf in udf_templates + if udf.endswith(".rs") or udf.endswith(".rs.j2") + ] + + if not udfs: + raise ValueError("No UDFs found to validate.") + udfs = sorted(udfs) + + print(f"Validating UDFs: {', '.join(udfs)}") + + for udf in udfs: + udf_body = None + udf_dir = os.path.join(args.template_dir, "udfs") + + # Check if we have a Jinja template version first + template_path = os.path.join(udf_dir, f"{udf}.rs.j2") + regular_path = os.path.join(udf_dir, f"{udf}.rs") + + if os.path.exists(template_path): + # Read template source and parse for variables + with open(template_path, "r") as file: + template_source = file.read() + + # Load the template for rendering + udf_template = jinja_utils.load_template(udf_dir, f"{udf}.rs.j2") + + # Get all template variables and set them to 100 + template_vars = jinja_utils.get_template_variables( + template_source, udf_template.environment + ) + params = {var_name: 100 for var_name in template_vars} + + udf_body = udf_template.render(**params) + elif os.path.exists(regular_path): + # Use regular file if no template exists + with open(regular_path, "r") as file: + udf_body = file.read() + else: + raise ValueError( + f"UDF {udf} not found. Neither {template_path} nor {regular_path} exists." + ) + + if not udf_body: + raise ValueError(f"UDF {udf} is empty or could not be rendered.") + + data = {"definition": udf_body, "language": "rust"} + + response = http_utils.create_arroyo_resource( + args.arroyo_url, + endpoint="udfs/validate", + data=json.dumps(data), + resource_type="UDF", + ) + response = json.loads(response) + + print(f"Validating UDF: {udf}") + print(response) + print("-" * 80) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Validate UDFs in a given directory against a template directory." + ) + parser.add_argument( + "--template_dir", + default="./templates", + help="Directory containing template files", + ) + + parser.add_argument( + "--arroyo_url", + default="http://localhost:5115/api/v1", + help="URL of the Arroyo API server", + ) + + parser.add_argument( + "--all_udfs", + action="store_true", + help="Validate all UDFs in the template directory", + ) + parser.add_argument( + "--udfs", + type=str, + required=False, + help="Comma-separated list of UDFs to validate", + ) + + args = parser.parse_args() + main(args) diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..ed0c3c7 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,5115 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "const-random", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "ar_archive_writer" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b" +dependencies = [ + "object", +] + +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "arrow" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3a3ec4fe573f9d1f59d99c085197ef669b00b088ba1d7bb75224732d9357a74" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dcf19f07792d8c7f91086c67b574a79301e367029b17fcf63fb854332246a10" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "num", +] + +[[package]] +name = "arrow-array" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7845c32b41f7053e37a075b3c2f29c6f5ea1b3ca6e5df7a2d325ee6e1b4a63cf" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.15.5", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b5c681a99606f3316f2a99d9c8b6fa3aad0b1d34d8f6d7a1b471893940219d8" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6365f8527d4f87b133eeb862f9b8093c009d41a210b8f101f91aa2392f61daac" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64 0.22.1", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-csv" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30dac4d23ac769300349197b845e0fd18c7f9f15d260d4659ae6b5a9ca06f586" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "lazy_static", + "lexical-core", + "regex", +] + +[[package]] +name = "arrow-data" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd962fc3bf7f60705b25bcaa8eb3318b2545aa1d528656525ebdd6a17a6cd6fb" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ipc" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3527365b24372f9c948f16e53738eb098720eea2093ae73c7af04ac5e30a39b" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "flatbuffers", + "lz4_flex", +] + +[[package]] +name = "arrow-json" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdec0024749fc0d95e025c0b0266d78613727b3b3a5d4cf8ea47eb6d38afdd1" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap", + "lexical-core", + "num", + "serde", + "serde_json", +] + +[[package]] +name = "arrow-ord" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79af2db0e62a508d34ddf4f76bfd6109b6ecc845257c9cba6f939653668f89ac" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half", + "num", +] + +[[package]] +name = "arrow-row" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da30e9d10e9c52f09ea0cf15086d6d785c11ae8dcc3ea5f16d402221b6ac7735" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", +] + +[[package]] +name = "arrow-schema" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35b0f9c0c3582dd55db0f136d3b44bfa0189df07adcf7dc7f2f2e74db0f52eb8" + +[[package]] +name = "arrow-select" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92fc337f01635218493c23da81a364daf38c694b05fc20569c3193c11c561984" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d596a9fc25dae556672d5069b090331aca8acb93cae426d8b7dcdf1c558fa0ce" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax", +] + +[[package]] +name = "async-compression" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" +dependencies = [ + "bzip2 0.5.2", + "flate2", + "futures-core", + "futures-io", + "memchr", + "pin-project-lite", + "tokio", + "xz2", + "zstd", + "zstd-safe", +] + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper 1.0.2", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper 1.0.2", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] +name = "blake3" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "cpufeatures", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "brotli" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "4.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata 0.1.10", +] + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" +dependencies = [ + "bzip2-sys", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.13+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +dependencies = [ + "cc", + "pkg-config", +] + +[[package]] +name = "cactus" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbc26382d871df4b7442e3df10a9402bf3cf5e55cbd66f12be38861425f0564" + +[[package]] +name = "cc" +version = "1.2.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfgrammar" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf" +dependencies = [ + "indexmap", + "lazy_static", + "num-traits", + "regex", + "serde", + "vob", +] + +[[package]] +name = "chrono" +version = "0.4.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-targets 0.52.6", +] + +[[package]] +name = "chrono-tz" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" +dependencies = [ + "chrono", + "phf", +] + +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "ansi_term", + "atty", + "bitflags 1.3.2", + "strsim 0.8.0", + "textwrap", + "unicode-width 0.1.14", + "vec_map", +] + +[[package]] +name = "clap" +version = "4.5.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim 0.11.1", +] + +[[package]] +name = "clap_derive" +version = "4.5.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "clap_lex" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" + +[[package]] +name = "codespan-reporting" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af491d569909a7e4dee0ad7db7f5341fef5c614d5b8ec8cf765732aba3cff681" +dependencies = [ + "serde", + "termcolor", + "unicode-width 0.2.2", +] + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "comfy-table" +version = "7.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" +dependencies = [ + "unicode-segmentation", + "unicode-width 0.2.2", +] + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.17", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde_core", +] + +[[package]] +name = "csv-core" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" +dependencies = [ + "memchr", +] + +[[package]] +name = "cxx" +version = "1.0.194" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "747d8437319e3a2f43d93b341c137927ca70c0f5dabeea7a005a73665e247c7e" +dependencies = [ + "cc", + "cxx-build", + "cxxbridge-cmd", + "cxxbridge-flags", + "cxxbridge-macro", + "foldhash 0.2.0", + "link-cplusplus", +] + +[[package]] +name = "cxx-build" +version = "1.0.194" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0f4697d190a142477b16aef7da8a99bfdc41e7e8b1687583c0d23a79c7afc1e" +dependencies = [ + "cc", + "codespan-reporting", + "indexmap", + "proc-macro2", + "quote", + "scratch", + "syn 2.0.117", +] + +[[package]] +name = "cxxbridge-cmd" +version = "1.0.194" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0956799fa8678d4c50eed028f2de1c0552ae183c76e976cf7ca8c4e36a7c328" +dependencies = [ + "clap 4.5.60", + "codespan-reporting", + "indexmap", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "cxxbridge-flags" +version = "1.0.194" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23384a836ab4f0ad98ace7e3955ad2de39de42378ab487dc28d3990392cb283a" + +[[package]] +name = "cxxbridge-macro" +version = "1.0.194" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6acc6b5822b9526adfb4fc377b67128fdd60aac757cc4a741a6278603f763cf" +dependencies = [ + "indexmap", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "datafusion" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-ipc", + "arrow-schema", + "async-compression", + "async-trait", + "bytes", + "bzip2 0.4.4", + "chrono", + "dashmap 6.1.0", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", + "datafusion-functions-window", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-sql", + "flate2", + "futures", + "glob", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools 0.13.0", + "log", + "num_cpus", + "object_store", + "parking_lot", + "parquet", + "paste", + "pin-project-lite", + "rand", + "sqlparser 0.51.0", + "tempfile", + "tokio", + "tokio-util", + "url", + "uuid", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-catalog" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560" +dependencies = [ + "arrow-schema", + "async-trait", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", +] + +[[package]] +name = "datafusion-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.14.5", + "indexmap", + "instant", + "libc", + "num_cpus", + "object_store", + "parquet", + "paste", + "sqlparser 0.51.0", + "tokio", +] + +[[package]] +name = "datafusion-common-runtime" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685" +dependencies = [ + "log", + "tokio", +] + +[[package]] +name = "datafusion-execution" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361" +dependencies = [ + "arrow", + "chrono", + "dashmap 6.1.0", + "datafusion-common", + "datafusion-expr", + "futures", + "hashbrown 0.14.5", + "log", + "object_store", + "parking_lot", + "rand", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "chrono", + "datafusion-common", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", + "indexmap", + "paste", + "serde_json", + "sqlparser 0.51.0", + "strum", + "strum_macros", +] + +[[package]] +name = "datafusion-expr-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" +dependencies = [ + "arrow", + "datafusion-common", + "itertools 0.13.0", + "paste", +] + +[[package]] +name = "datafusion-functions" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" +dependencies = [ + "arrow", + "arrow-buffer", + "base64 0.22.1", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "hashbrown 0.14.5", + "hex", + "itertools 0.13.0", + "log", + "md-5", + "rand", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741" +dependencies = [ + "ahash", + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "half", + "indexmap", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", + "rand", +] + +[[package]] +name = "datafusion-functions-nested" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317" +dependencies = [ + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-physical-expr-common", + "itertools 0.13.0", + "log", + "paste", + "rand", +] + +[[package]] +name = "datafusion-functions-window" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe" +dependencies = [ + "datafusion-common", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-optimizer" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "hashbrown 0.14.5", + "indexmap", + "itertools 0.13.0", + "log", + "paste", + "regex-syntax", +] + +[[package]] +name = "datafusion-physical-expr" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "arrow-string", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools 0.13.0", + "log", + "paste", + "petgraph", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "hashbrown 0.14.5", + "rand", +] + +[[package]] +name = "datafusion-physical-optimizer" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-plan", + "itertools 0.13.0", +] + +[[package]] +name = "datafusion-physical-plan" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools 0.13.0", + "log", + "once_cell", + "parking_lot", + "pin-project-lite", + "rand", + "tokio", +] + +[[package]] +name = "datafusion-sql" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "indexmap", + "log", + "regex", + "sqlparser 0.51.0", + "strum", +] + +[[package]] +name = "datafusion_summary_library" +version = "0.1.0" +dependencies = [ + "arrow", + "async-trait", + "datafusion", + "futures", + "hyperloglogplus", +] + +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "dsrs" +version = "0.6.1" +source = "git+https://github.com/ProjectASAP/datasketches-rs#d748ec75c80fff21f7b24897244dd1c895df2e9a" +dependencies = [ + "base64 0.13.1", + "bstr", + "cxx", + "cxx-build", + "memchr", + "rmp-serde", + "serde", + "structopt", + "thin-dst", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "filetime" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db" +dependencies = [ + "cfg-if", + "libc", + "libredox", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "flatbuffers" +version = "24.12.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096" +dependencies = [ + "bitflags 1.3.2", + "rustc_version", +] + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getopts" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" +dependencies = [ + "unicode-width 0.2.2", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "getrandom" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", + "wasip3", +] + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", + "zerocopy", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash 0.1.5", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "hashlink" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" +dependencies = [ + "hashbrown 0.14.5", +] + +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.4.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "humantime" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" + +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "pin-utils", + "smallvec", + "tokio", +] + +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper 0.14.32", + "native-tls", + "tokio", + "tokio-native-tls", +] + +[[package]] +name = "hyper-util" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" +dependencies = [ + "bytes", + "http 1.4.0", + "http-body 1.0.1", + "hyper 1.8.1", + "pin-project-lite", + "tokio", + "tower-service", +] + +[[package]] +name = "hyperloglogplus" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "621debdf94dcac33e50475fdd76d34d5ea9c0362a834b9db08c3024696c1fbe3" +dependencies = [ + "serde", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" + +[[package]] +name = "icu_properties" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" + +[[package]] +name = "icu_provider" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", + "serde", + "serde_core", +] + +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + +[[package]] +name = "ipnet" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "jiff" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3e3d65f018c6ae946ab16e80944b97096ed73c35b221d1c478a6c81d8f57940" +dependencies = [ + "jiff-static", + "jiff-tzdb-platform", + "log", + "portable-atomic", + "portable-atomic-util", + "serde_core", + "windows-sys 0.61.2", +] + +[[package]] +name = "jiff-static" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a17c2b211d863c7fde02cbea8a3c1a439b98e109286554f2860bdded7ff83818" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "jiff-tzdb" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2" + +[[package]] +name = "jiff-tzdb-platform" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8" +dependencies = [ + "jiff-tzdb", +] + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "lexical-core" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" +dependencies = [ + "lexical-parse-integer", + "lexical-util", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" +dependencies = [ + "lexical-util", +] + +[[package]] +name = "lexical-util" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" + +[[package]] +name = "lexical-write-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361" +dependencies = [ + "lexical-util", + "lexical-write-integer", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df" +dependencies = [ + "lexical-util", +] + +[[package]] +name = "libc" +version = "0.2.182" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "libredox" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" +dependencies = [ + "bitflags 2.11.0", + "libc", + "redox_syscall 0.7.3", +] + +[[package]] +name = "libsqlite3-sys" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "libz-sys" +version = "1.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4735e9cbde5aac84a5ce588f6b23a90b9b0b528f6c5a8db8a4aff300463a0839" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "link-cplusplus" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f78c730aaa7d0b9336a299029ea49f9ee53b0ed06e9202e8cb7db9bae7b8c82" +dependencies = [ + "cc", +] + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "litemap" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "lrlex" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c71364e868116ee891b0f93559eb9eca5675bec28b22d33c58481e66c3951d7e" +dependencies = [ + "cfgrammar", + "getopts", + "lazy_static", + "lrpar", + "num-traits", + "quote", + "regex", + "regex-syntax", + "serde", + "vergen", +] + +[[package]] +name = "lrpar" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b265a81193d94c92d1c9c715498d6fa505bce3f789ceecb24ab5d6fa2dbc71" +dependencies = [ + "bincode", + "cactus", + "cfgrammar", + "filetime", + "indexmap", + "lazy_static", + "lrtable", + "num-traits", + "packedvec", + "regex", + "serde", + "static_assertions", + "vergen", + "vob", +] + +[[package]] +name = "lrtable" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc36d15214ca997a5097845be1f932b7ee6125c36f5c5e55f6c49e027ddeb6de" +dependencies = [ + "cfgrammar", + "fnv", + "num-traits", + "serde", + "sparsevec", + "vob", +] + +[[package]] +name = "lz4_flex" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +dependencies = [ + "twox-hash 2.1.2", +] + +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata 0.4.14", +] + +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "mio" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.61.2", +] + +[[package]] +name = "native-tls" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-conv" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi 0.5.2", + "libc", +] + +[[package]] +name = "num_enum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + +[[package]] +name = "object" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +dependencies = [ + "memchr", +] + +[[package]] +name = "object_store" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cfccb68961a56facde1163f9319e0d15743352344e7808a11795fb99698dcaf" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "humantime", + "itertools 0.13.0", + "parking_lot", + "percent-encoding", + "snafu", + "tokio", + "tracing", + "url", + "walkdir", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "openssl" +version = "0.10.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" +dependencies = [ + "bitflags 2.11.0", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + +[[package]] +name = "openssl-sys" +version = "0.9.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "packedvec" +version = "1.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a69e0a534dd2e6aefce319af62a0aa0066a76bdfcec0201dfe02df226bc9ec70" +dependencies = [ + "num-traits", + "serde", +] + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.5.18", + "smallvec", + "windows-link", +] + +[[package]] +name = "parquet" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f8cf58b29782a7add991f655ff42929e31a7859f5319e53db9e39a714cb113c" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64 0.22.1", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.15.5", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "seq-macro", + "snap", + "thrift", + "tokio", + "twox-hash 1.6.3", + "zstd", + "zstd-sys", +] + +[[package]] +name = "parse_datetime" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acea383beda9652270f3c9678d83aa58cbfc16880343cae0c0c8c7d6c0974132" +dependencies = [ + "jiff", + "num-traits", + "winnow", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "phf" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_shared" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "portable-atomic-util" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "potential_utf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +dependencies = [ + "zerovec", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.117", +] + +[[package]] +name = "proc-macro-crate" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "prometheus" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d33c28a30771f7f96db69893f78b857f7450d7e0237e9c8fc6427a81bae7ed1" +dependencies = [ + "cfg-if", + "fnv", + "lazy_static", + "memchr", + "parking_lot", + "protobuf", + "thiserror 1.0.69", +] + +[[package]] +name = "promql-parser" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60d851f6523a8215e2fbf86b6cef4548433f8b76092e9ffb607105de52ae63fd" +dependencies = [ + "cfgrammar", + "chrono", + "lazy_static", + "lrlex", + "lrpar", + "regex", +] + +[[package]] +name = "promql_utilities" +version = "0.1.0" +dependencies = [ + "chrono", + "promql-parser", + "serde", + "serde_json", + "thiserror 1.0.69", + "tokio-test", + "tracing", +] + +[[package]] +name = "prost" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +dependencies = [ + "anyhow", + "itertools 0.14.0", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "protobuf" +version = "2.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" + +[[package]] +name = "psm" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3852766467df634d74f0b2d7819bf8dc483a0eb2e3b0f50f756f9cfe8b0d18d8" +dependencies = [ + "ar_archive_writer", + "cc", +] + +[[package]] +name = "query_engine_rust" +version = "0.1.0" +dependencies = [ + "anyhow", + "arrow", + "async-trait", + "axum", + "base64 0.21.7", + "bincode", + "chrono", + "clap 4.5.60", + "dashmap 5.5.3", + "datafusion", + "datafusion_summary_library", + "dsrs", + "flate2", + "form_urlencoded", + "futures", + "hex", + "lazy_static", + "prometheus", + "promql-parser", + "promql_utilities", + "prost", + "rdkafka", + "regex", + "reqwest", + "rmp-serde", + "rusqlite", + "serde", + "serde_json", + "serde_yaml", + "sketch-core", + "sketch_db_common", + "snap", + "sql_utilities", + "sqlparser 0.59.0", + "structopt", + "tempfile", + "thiserror 1.0.69", + "tokio", + "tracing", + "tracing-appender", + "tracing-subscriber", + "urlencoding", + "uuid", + "xxhash-rust", + "zstd", +] + +[[package]] +name = "quote" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "rdkafka" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053adfa02fab06e86c01d586cc68aa47ee0ff4489a59469081dc12cbcde578bf" +dependencies = [ + "futures-channel", + "futures-util", + "libc", + "log", + "rdkafka-sys", + "serde", + "serde_derive", + "serde_json", + "slab", + "tokio", +] + +[[package]] +name = "rdkafka-sys" +version = "4.10.0+2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e234cf318915c1059d4921ef7f75616b5219b10b46e9f3a511a15eb4b56a3f77" +dependencies = [ + "libc", + "libz-sys", + "num_enum", + "pkg-config", +] + +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.117", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags 2.11.0", +] + +[[package]] +name = "redox_syscall" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16" +dependencies = [ + "bitflags 2.11.0", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata 0.4.14", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "reqwest" +version = "0.11.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" +dependencies = [ + "base64 0.21.7", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper-tls", + "ipnet", + "js-sys", + "log", + "mime", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls-pemfile", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper 0.1.2", + "system-configuration", + "tokio", + "tokio-native-tls", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg", +] + +[[package]] +name = "rmp" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ba8be72d372b2c9b35542551678538b562e7cf86c3315773cae48dfbfe7790c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "rmp-serde" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72f81bee8c8ef9b577d1681a70ebbc962c232461e397b22c208c43c04b67a155" +dependencies = [ + "rmp", + "serde", +] + +[[package]] +name = "rusqlite" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae" +dependencies = [ + "bitflags 2.11.0", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "smallvec", +] + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags 2.11.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64 0.21.7", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "schannel" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "scratch" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d68f2ec51b097e4c1a75b681a8bec621909b5e91f15bb7b840c4f2f7b01148b2" + +[[package]] +name = "security-framework" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +dependencies = [ + "bitflags 2.11.0", + "core-foundation 0.10.1", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "seq-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" +dependencies = [ + "errno", + "libc", +] + +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + +[[package]] +name = "sketch-core" +version = "0.1.0" +dependencies = [ + "dsrs", + "rmp-serde", + "serde", + "xxhash-rust", +] + +[[package]] +name = "sketch_db_common" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap 4.5.60", + "promql_utilities", + "serde", + "serde_json", + "serde_yaml", +] + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "snafu" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e84b3f4eacbf3a1ce05eac6763b4d629d60cbc94d632e4092c54ade71f1e1a2" +dependencies = [ + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "socket2" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" +dependencies = [ + "libc", + "windows-sys 0.60.2", +] + +[[package]] +name = "sparsevec" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68b4a8ce3045f0fe173fb5ae3c6b7dcfbec02bfa650bb8618b2301f52af0134d" +dependencies = [ + "num-traits", + "packedvec", + "serde", + "vob", +] + +[[package]] +name = "sql_utilities" +version = "0.1.0" +dependencies = [ + "chrono", + "parse_datetime", + "sqlparser 0.59.0", + "tokio-test", +] + +[[package]] +name = "sqlparser" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" +dependencies = [ + "log", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +dependencies = [ + "log", + "recursive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "stacker" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d74a23609d509411d10e2176dc2a4346e3b4aea2e7b1869f19fdedbc71c013" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "structopt" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" +dependencies = [ + "clap 2.34.0", + "lazy_static", + "structopt-derive", +] + +[[package]] +name = "structopt-derive" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" +dependencies = [ + "heck 0.3.3", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.117", +] + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation 0.9.4", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "tempfile" +version = "3.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" +dependencies = [ + "fastrand", + "getrandom 0.4.1", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width 0.1.14", +] + +[[package]] +name = "thin-dst" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c46be180f1af9673ebb27bc1235396f61ef6965b3fe0dbb2e624deb604f0e" + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "thrift" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "ordered-float", +] + +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "itoa", + "libc", + "num-conv", + "num_threads", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tokio" +version = "1.49.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" +dependencies = [ + "bytes", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2 0.6.2", + "tokio-macros", + "windows-sys 0.61.2", +] + +[[package]] +name = "tokio-macros" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-test" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f6d24790a10a7af737693a3e8f1d03faef7e6ca0cc99aae5066f533766de545" +dependencies = [ + "futures-core", + "tokio", + "tokio-stream", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "toml_datetime" +version = "0.7.5+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.23.10+spec-1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" +dependencies = [ + "indexmap", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.9+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4" +dependencies = [ + "winnow", +] + +[[package]] +name = "tower" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper 1.0.2", + "tokio", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "log", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-appender" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "786d480bce6247ab75f005b14ae1624ad978d3029d9113f0a22fa1ac773faeaf" +dependencies = [ + "crossbeam-channel", + "thiserror 2.0.18", + "time", + "tracing-subscriber", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata 0.4.14", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + +[[package]] +name = "twox-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uuid" +version = "1.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" +dependencies = [ + "getrandom 0.4.1", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + +[[package]] +name = "vergen" +version = "8.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2990d9ea5967266ea0ccf413a4aa5c42a93dbcfda9cb49a97de6931726b12566" +dependencies = [ + "anyhow", + "rustversion", + "time", +] + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "vob" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc936b5a7202a703aeaf7ce05e7931db2e0c8126813f97db3e9e06d867b0bb38" +dependencies = [ + "num-traits", + "serde", +] + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" +dependencies = [ + "cfg-if", + "futures-util", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn 2.0.117", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.11.0", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "web-sys" +version = "0.3.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + +[[package]] +name = "winnow" +version = "0.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" +dependencies = [ + "memchr", +] + +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck 0.5.0", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck 0.5.0", + "indexmap", + "prettyplease", + "syn 2.0.117", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.11.0", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "writeable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" + +[[package]] +name = "xxhash-rust" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" + +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + +[[package]] +name = "yoke" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a789c6e490b576db9f7e6b6d661bcc9799f7c0ac8352f56ea20193b2681532e5" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f65c489a7071a749c849713807783f70672b28094011623e200cb86dcb835953" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "zerotrie" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.13+zstd.1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..f6da817 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,4 @@ +[workspace] +members = ["sketch-core", "QueryEngineRust"] +exclude = ["sketchlib-rust"] +resolver = "2" diff --git a/CommonDependencies/.gitignore b/CommonDependencies/.gitignore new file mode 100644 index 0000000..9e7080c --- /dev/null +++ b/CommonDependencies/.gitignore @@ -0,0 +1,11 @@ +**/__pycache__ +**/*.pyc +**/*.swp +.DS_Store +.vscode/ + +dependencies/py/promql_utilities/promql_utilities.egg-info/ +dependencies/rs/**/target/ + +tests/**/*.json +tests/**/target/ diff --git a/CommonDependencies/LICENSE b/CommonDependencies/LICENSE new file mode 100644 index 0000000..404d657 --- /dev/null +++ b/CommonDependencies/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 SketchDB + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/__init__.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/ast_matching/PromQLPattern.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/ast_matching/PromQLPattern.py new file mode 100644 index 0000000..8336949 --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/ast_matching/PromQLPattern.py @@ -0,0 +1,261 @@ +from dataclasses import dataclass +from typing import Dict, Any +from promql_parser import ( + VectorSelector, + MatrixSelector, + Call, + BinaryExpr, + AggregateExpr, + SubqueryExpr, + NumberLiteral, + TokenType, +) + + +@dataclass +class MatchResult: + matches: bool + tokens: Dict[str, Any] + + def __bool__(self): + return self.matches + + +class PromQLPattern: + """Pattern matching implementation (same as before)""" + + def __init__(self, ast_pattern: dict): + self.pattern = ast_pattern + + def matches(self, node, debug=False) -> MatchResult: + tokens = {} + result = self._matches_recursive(node, self.pattern, tokens, debug) + return MatchResult(matches=result, tokens=tokens) + + def _node_to_dict(self, node: Any) -> dict: + """Convert a promql_parser node to a dictionary representation""" + if isinstance(node, VectorSelector): + return { + "type": "VectorSelector", + "name": node.name, + "matchers": node.matchers, + "at": node.at, # Include the "@" modifier + "ast": node, + } + elif isinstance(node, MatrixSelector): + return { + "type": "MatrixSelector", + "vector_selector": node.vector_selector, + "range": node.range, + "ast": node, + } + elif isinstance(node, Call): + return { + "type": "Call", + "func": {"type": "Function", "name": node.func.name}, + "args": node.args, + "ast": node, + } + elif isinstance(node, BinaryExpr): + return { + "type": "BinaryExpr", + "op": node.op, + "left": node.lhs, + "right": node.rhs, + "ast": node, + } + elif isinstance(node, AggregateExpr): + return { + "type": "AggregateExpr", + "op": str(node.op), + "expr": node.expr, + "param": node.param, + "modifier": node.modifier, + "ast": node, + } + elif isinstance(node, SubqueryExpr): + return { + "type": "SubqueryExpr", + "expr": node.expr, + "range": node.range, + "step": node.step, + "offset": node.offset, + "ast": node, + } + elif isinstance(node, NumberLiteral): + return {"type": "NumberLiteral", "value": node.val, "ast": node} + elif isinstance(node, dict): + return node + else: + raise ValueError(f"Unsupported node type: {type(node)}") + + def _matches_recursive( + self, node, pattern: dict, tokens: dict, debug: bool + ) -> bool: + if pattern is None: + return True + + # if not isinstance(node, dict) and not isinstance(node, VectorSelector): + # return False + + node_dict = self._node_to_dict(node) + + if debug: + print("After return point 2") + print(node_dict) + print(pattern) + print(tokens) + + if "type" in pattern and pattern["type"] != node_dict["type"]: + return False + + if debug: + print("After return point 3") + print(node_dict) + print(pattern) + print(tokens) + + # Collect tokens if requested + collect_as = pattern.get("_collect_as") + if collect_as: + if node_dict["type"] == "VectorSelector": + tokens[collect_as] = { + "name": node_dict["name"], + "labels": node_dict["matchers"], + "at": node_dict["at"], + "ast": node_dict["ast"], + } + elif node_dict["type"] == "Call": + tokens[collect_as] = { + "name": node_dict["func"]["name"], + "args": node_dict["args"], + "ast": node_dict["ast"], + } + elif node_dict["type"] == "MatrixSelector": + tokens[collect_as] = { + "range": node_dict["range"], + "ast": node_dict["ast"], + } + elif node_dict["type"] == "SubqueryExpr": + tokens[collect_as] = { + "range": node_dict["range"], + "offset": node_dict["offset"], + "step": node_dict["step"], + "ast": node_dict["ast"], + } + elif node_dict["type"] == "AggregateExpr": + tokens[collect_as] = { + "op": node_dict["op"], + "modifier": node_dict["modifier"], + "param": node_dict["param"], + "ast": node_dict["ast"], + } + elif node_dict["type"] == "NumberLiteral": + tokens[collect_as] = node_dict["value"] + elif node_dict["type"] == "BinaryExpr": + tokens[collect_as] = { + "op": node_dict["op"], + "left": node_dict["left"], + "right": node_dict["right"], + "ast": node_dict["ast"], + } + + # Special handling for function arguments collection + collect_args_as = pattern.get("_collect_args_as") + if collect_args_as: + tokens[collect_args_as] = node_dict["args"] + + for key, pattern_value in pattern.items(): + if key.startswith("_"): # Skip our special collection directives + continue + + if key not in node_dict: + if debug: + print(f"Key {key} not found in node_dict") + return False + + node_value = node_dict[key] + + if key in ["name", "op"] and isinstance(pattern_value, list): + if node_value not in pattern_value: + if debug: + print(f"Failed to match {node_value} with {pattern_value}") + return False + continue + + if pattern_value is None: + continue + + if isinstance(pattern_value, dict): + if not self._matches_recursive( + node_value, pattern_value, tokens, debug + ): + if debug: + print(f"(a) Failed to match {node_value} with {pattern_value}") + return False + elif isinstance(pattern_value, list): + if not isinstance(node_value, list) or len(pattern_value) != len( + node_value + ): + if debug: + print( + f"(b) Failed to match list {node_value} with {pattern_value}" + ) + return False + for p_item, n_item in zip(pattern_value, node_value): + if isinstance(p_item, dict): + if not self._matches_recursive(n_item, p_item, tokens, debug): + if debug: + print(f"(c) Failed to match {n_item} with {p_item}") + return False + elif p_item != n_item: + if debug: + print(f"(d) Failed to match {n_item} with {p_item}") + return False + elif isinstance(node_value, TokenType): + if pattern_value != str(node_value): + if debug: + print( + f"(e) Failed to match token {node_value} with {pattern_value}" + ) + return False + elif pattern_value != node_value: + if debug: + print(f"(f) Failed to match {node_value} with {pattern_value}") + return False + + return True + + # def matches(self, node) -> bool: + # if self.pattern is None: + # return True + # + # if not isinstance(node, dict) and not isinstance(node, VectorSelector): + # return False + # + # if isinstance(node, VectorSelector): + # node = { + # 'type': 'VectorSelector', + # 'name': node.name, + # 'label_matchers': node.label_matchers + # } + # + # if 'type' in self.pattern and self.pattern['type'] != node.get('type'): + # return False + # + # for key, pattern_value in self.pattern.items(): + # if key not in node: + # return False + # + # node_value = node[key] + # + # if pattern_value is None: + # continue + # + # if isinstance(pattern_value, dict): + # if not self.matches(node_value): + # return False + # elif pattern_value != node_value: + # return False + # + # return True diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/ast_matching/PromQLPatternBuilder.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/ast_matching/PromQLPatternBuilder.py new file mode 100644 index 0000000..cddbbfc --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/ast_matching/PromQLPatternBuilder.py @@ -0,0 +1,105 @@ +from dataclasses import dataclass +from typing import List, Optional, Union, Dict + + +@dataclass +class PromQLPatternBuilder: + @staticmethod + def any(): + return None + + @staticmethod + def binary_op(op: str, left, right, collect_as: Optional[str] = None): + return { + "type": "BinaryExpr", + "op": op, + "left": left, + "right": right, + "_collect_as": collect_as, # If set, store the binary operation details + } + + @staticmethod + def metric( + name: Optional[str] = None, + labels: Optional[Dict[str, str]] = None, + at: Optional[str] = None, + collect_as: Optional[str] = None, + ): + return { + "type": "VectorSelector", + "name": name, + "matchers": labels, + "at": at, # Add the "@" modifier + "_collect_as": collect_as, # If set, store the matched metric details + } + + @staticmethod + def function( + name: Union[str, List[str]], + *args, + collect_args_as: Optional[str] = None, + collect_as: Optional[str] = None, + ): + if isinstance(name, str): + name = [name] + return { + "type": "Call", + "func": {"type": "Function", "name": name}, + "args": list(args), + "_collect_args_as": collect_args_as, # If set, store the function arguments + "_collect_as": collect_as, # If set, store the function details + } + + @staticmethod + def subquery( + expr, duration: Optional[str] = None, collect_as: Optional[str] = None + ): + return { + "type": "SubqueryExpr", + "expr": expr, + "range": duration, + "step": None, + "offset": None, + "_collect_as": collect_as, # If set, store the range details + } + + @staticmethod + def matrix_selector( + vector_selector, range: Optional[str] = None, collect_as: Optional[str] = None + ): + """Match a matrix selector (range vector selector)""" + return { + "type": "MatrixSelector", + "vector_selector": vector_selector, + "range": range, # e.g., '5m', '1h' + "_collect_as": collect_as, + } + + @staticmethod + def aggregation( + op: Union[str, List[str]], + expr, + param=None, + by: Optional[List[str]] = None, + without: Optional[List[str]] = None, + collect_as: Optional[str] = None, + ): + if isinstance(op, str): + op = [op] + + return { + "type": "AggregateExpr", + "op": op, + "expr": expr, + "param": param, + "modifier": by or without or None, + "_collect_as": collect_as, # If set, store the aggregation details + } + + @staticmethod + def number(value: Optional[float] = None, collect_as: Optional[str] = None): + return { + "type": "NumberLiteral", + "value": value, + "_collect_as": collect_as, # If set, store the number value + } diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/ast_matching/__init__.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/ast_matching/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/data_model/KeyByLabelNames.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/data_model/KeyByLabelNames.py new file mode 100644 index 0000000..e869a6d --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/data_model/KeyByLabelNames.py @@ -0,0 +1,34 @@ +from typing import List + + +class KeyByLabelNames: + def __init__(self, keys: List[str]): + self.keys = sorted(keys) + + def __repr__(self) -> str: + return f"KeyByLabelNames({self.keys})" + + def __hash__(self) -> int: + return hash(tuple(self.keys)) + + def __eq__(self, other) -> bool: + if not isinstance(other, KeyByLabelNames): + return False + return self.keys == other.keys + + def __add__(self, other: "KeyByLabelNames") -> "KeyByLabelNames": + if not isinstance(other, KeyByLabelNames): + raise ValueError("Addition is only supported for KeyByLabelNames") + return KeyByLabelNames(list(set(self.keys) | set(other.keys))) + + def __sub__(self, other: "KeyByLabelNames") -> "KeyByLabelNames": + if not isinstance(other, KeyByLabelNames): + raise ValueError("Subtraction is only supported for KeyByLabelNames") + return KeyByLabelNames(list(set(self.keys) - set(other.keys))) + + def serialize_to_json(self) -> List[str]: + return self.keys + + @staticmethod + def deserialize_from_json(data: List[str]) -> "KeyByLabelNames": + return KeyByLabelNames(data) diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/data_model/__init__.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/data_model/__init__.py new file mode 100644 index 0000000..bd2ac97 --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/data_model/__init__.py @@ -0,0 +1 @@ +# data_model module for promql_utilities diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/__init__.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/__init__.py new file mode 100644 index 0000000..3158bea --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/__init__.py @@ -0,0 +1 @@ +# query_logics module for promql_utilities diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/enums.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/enums.py new file mode 100644 index 0000000..4e750ac --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/enums.py @@ -0,0 +1,36 @@ +from enum import Enum, auto + + +class QueryPatternType(Enum): + ONLY_TEMPORAL = auto() + ONLY_SPATIAL = auto() + ONE_TEMPORAL_ONE_SPATIAL = auto() + + +class QueryTreatmentType(Enum): + EXACT = auto() + APPROXIMATE = auto() + + +class Statistic(Enum): + COUNT = auto() + SUM = auto() + CARDINALITY = auto() + INCREASE = auto() + RATE = auto() + MIN = auto() + MAX = auto() + QUANTILE = auto() + TOPK = auto() + + +class QueryResultType(Enum): + INSTANT_VECTOR = auto() + + +class CleanupPolicy(Enum): + """Policy for cleaning up old aggregates from the store.""" + + CIRCULAR_BUFFER = "circular_buffer" + READ_BASED = "read_based" + NO_CLEANUP = "no_cleanup" diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/logics.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/logics.py new file mode 100644 index 0000000..469d8ea --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/logics.py @@ -0,0 +1,123 @@ +from typing import Tuple + +from promql_utilities.query_logics.enums import QueryTreatmentType, Statistic + +# def map_statistic_to_precompute_operators( +# statistic: str, treatment_type: QueryTreatmentType +# ) -> List[Tuple[str, str]]: +# # if statistic in ["quantile", "stddev", "stdvar"]: +# if statistic == "quantile": +# if treatment_type == QueryTreatmentType.EXACT: +# raise ValueError(f"Statistic {statistic} cannot be computed exactly") +# else: +# return [("KLL", "")] +# # else: +# # return [("UnivMon", "")] +# elif statistic in ["min", "max"]: +# if treatment_type == QueryTreatmentType.APPROXIMATE: +# return [("KLL", "")] +# else: +# return [("MinMax", statistic)] +# elif statistic in ["sum", "count"]: +# if treatment_type == QueryTreatmentType.APPROXIMATE: +# return [("CountMinSketch", statistic)] +# else: +# return [("Sum", statistic)] +# elif statistic == "avg": +# if treatment_type == QueryTreatmentType.APPROXIMATE: +# return [("CountMinSketch", "sum"), ("CountMinSketch", "count")] +# else: +# return [("Sum", "sum"), ("Sum", "count")] +# elif statistic in ["rate", "increase"]: +# return [("Increase", "")] +# else: +# raise NotImplementedError(f"Statistic {statistic} not supported") + + +def map_statistic_to_precompute_operator( + statistic: Statistic, treatment_type: QueryTreatmentType +) -> Tuple[str, str]: + # if statistic in ["quantile", "stddev", "stdvar"]: + if statistic == Statistic.QUANTILE: + if treatment_type == QueryTreatmentType.EXACT: + raise ValueError(f"Statistic {statistic} cannot be computed exactly") + else: + return ("DatasketchesKLL", "") + # return ("HydraKLL", "") + # else: + # return [("UnivMon", "")] + elif statistic == Statistic.TOPK: + if treatment_type == QueryTreatmentType.EXACT: + raise ValueError(f"Statistic {statistic} cannot be computed exactly") + else: + return ("CountMinSketchWithHeap", statistic.name.lower()) + elif statistic in [Statistic.MIN, Statistic.MAX]: + if treatment_type == QueryTreatmentType.APPROXIMATE: + return ("DatasketchesKLL", "") + # return ("HydraKLL", "") + else: + # NOTE: Change to Multiple<>Accumulator + # return ("MinMax", statistic.name.lower()) + return ("MultipleMinMax", statistic.name.lower()) + elif statistic in [Statistic.SUM, Statistic.COUNT]: + if treatment_type == QueryTreatmentType.APPROXIMATE: + return ("CountMinSketch", statistic.name.lower()) + else: + # NOTE: Change to Multiple<>Accumulator + # return ("Sum", statistic.name.lower()) + return ("MultipleSum", statistic.name.lower()) + # elif statistic == "avg": + # if treatment_type == QueryTreatmentType.APPROXIMATE: + # return [("CountMinSketch", "sum"), ("CountMinSketch", "count")] + # else: + # return [("Sum", "sum"), ("Sum", "count")] + elif statistic in [Statistic.RATE, Statistic.INCREASE]: + # NOTE: Change to Multiple<>Accumulator + # return ("Increase", "") + return ("MultipleIncrease", "") + else: + raise NotImplementedError(f"Statistic {statistic} not supported") + + +def does_precompute_operator_support_subpopulations( + statistic: Statistic, precompute_operator: str +) -> bool: + if precompute_operator in ["Increase", "MinMax", "Sum", "DatasketchesKLL"]: + return False + elif precompute_operator in [ + "MultipleIncrease", + "MultipleMinMax", + "MultipleSum", + "HydraKLL", + ]: + # TODO: do we need to check for statistic here? If not, remove the check from CountMinSketch + return True + elif precompute_operator == "CountMinSketch": + return statistic in [Statistic.SUM, Statistic.COUNT] + elif ( + precompute_operator == "CountMinSketchWithHeap" and statistic == Statistic.TOPK + ): + # topk and bottomk do not support subpopulations! + # other usages of CountMinSketchWithHeap will fall through. + return False + # elif precompute_operator == "UnivMon": + # return statistic in ["sum", "count", "avg"] + else: + raise NotImplementedError( + f"Precompute operator {precompute_operator} not supported" + ) + + +def get_is_collapsable(temporal_aggregation: str, spatial_aggregation: str) -> bool: + if spatial_aggregation == "sum": + return temporal_aggregation in [ + "sum_over_time", + "count_over_time", + # "increase", + # "rate", + ] + elif spatial_aggregation == "min": + return temporal_aggregation == "min_over_time" + elif spatial_aggregation == "max": + return temporal_aggregation == "max_over_time" + return False diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/parsing.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/parsing.py new file mode 100644 index 0000000..81b710d --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_logics/parsing.py @@ -0,0 +1,68 @@ +from typing import Tuple, List + +from promql_utilities.data_model.KeyByLabelNames import KeyByLabelNames +from promql_utilities.query_logics.enums import QueryPatternType, Statistic + + +def get_metric_and_spatial_filter(query_pattern_match) -> Tuple[str, str]: + metric = query_pattern_match.tokens["metric"]["name"] + spatial_filter = "" + + if query_pattern_match.tokens["metric"]["labels"].matchers: + spatial_filter = ( + query_pattern_match.tokens["metric"]["ast"] + .prettify() + .split("{")[1] + .split("}")[0] + ) + metric = metric.split("{")[0] + + return metric, spatial_filter + + +def get_statistics_to_compute( + query_pattern_type, query_pattern_match +) -> List[Statistic]: + statistic_to_compute = None + + if ( + query_pattern_type == QueryPatternType.ONLY_TEMPORAL + or query_pattern_type == QueryPatternType.ONE_TEMPORAL_ONE_SPATIAL + ): + statistic_to_compute = query_pattern_match.tokens["function"]["name"].split( + "_" + )[0] + # template_config.tumblingWindowSize = self.t_repeat + elif query_pattern_type == QueryPatternType.ONLY_SPATIAL: + statistic_to_compute = query_pattern_match.tokens["aggregation"]["op"] + # template_config.tumblingWindowSize = self.prometheus_scrape_interval + else: + raise ValueError("Invalid query pattern type") + + if statistic_to_compute == "avg": + return [Statistic.SUM, Statistic.COUNT] + else: + # get enum value from string + return [Statistic[statistic_to_compute.upper()]] + + +def get_spatial_aggregation_output_labels( + query_pattern_match, all_labels: KeyByLabelNames +) -> KeyByLabelNames: + aggregation_modifier = query_pattern_match.tokens["aggregation"]["modifier"] + aggregation_modifier_labels = None + + # Fixing issue https://github.com/ProjectASAP/asap-internal/issues/24 + if aggregation_modifier is None: + return KeyByLabelNames([]) + + if aggregation_modifier.type == aggregation_modifier.type.By: + aggregation_modifier_labels = KeyByLabelNames(aggregation_modifier.labels) + elif aggregation_modifier.type == aggregation_modifier.type.Without: + aggregation_modifier_labels = all_labels - KeyByLabelNames( + aggregation_modifier.labels + ) + else: + raise ValueError("Invalid aggregation modifier") + + return aggregation_modifier_labels diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/__init__.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/__init__.py new file mode 100644 index 0000000..97c74ca --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/__init__.py @@ -0,0 +1 @@ +# query_results module for promql_utilities diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/classes.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/classes.py new file mode 100644 index 0000000..7f43d74 --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/classes.py @@ -0,0 +1,143 @@ +import numpy as np +from typing import List, Dict, Optional, Set + + +class TimeSeries: + def __init__(self, key: frozenset, values: List[Optional[float]]): + self.key = key + self.values = np.array(values) + + +class QueryResult: + def __init__( + self, + server_name: str, + query: str, + query_idx: int, + repetition_idx: int, + result: Optional[List[Dict]], + latency: Optional[float], + cumulative_latency: Optional[float], + query_group_idx: int = 0, + raw_text_result: Optional[str] = None, + ): + self.server_name = server_name + self.query = query + self.query_idx = query_idx + self.repetition_idx = repetition_idx + self.query_group_idx = query_group_idx + self.latency = latency + self.cumulative_latency = cumulative_latency + self.raw_text_result = raw_text_result + + self.result: Optional[Dict[frozenset, float]] = None + if result: + self.result = { + frozenset(result_per_key["metric"].items()): float( + result_per_key["value"][1] + ) + for result_per_key in result + } + + +class QueryResultAcrossTime: + def __init__(self, server_name, query, query_idx, num_repetitions): + self.server_name = server_name + self.query = query + self.query_idx = query_idx + self.num_repetitions = num_repetitions + self.query_results: List[QueryResult] = [] + + def add_result(self, query_result: QueryResult): + self.query_results.append(query_result) + + def get_all_timeseries(self) -> Dict[frozenset, TimeSeries]: + keys: Set[frozenset] = set() + for query_result in self.query_results: + if query_result.result: + keys.update(query_result.result.keys()) + + assert len(self.query_results) == self.num_repetitions + ret: Dict[frozenset, TimeSeries] = {} + intermediate_ret: Dict[frozenset, List[Optional[float]]] = { + k: [None for _ in range(self.num_repetitions)] for k in keys + } + + for k in keys: + for repetition_idx, result in enumerate(self.query_results): + if result.result: + intermediate_ret[k][repetition_idx] = result.result[k] + + ret[k] = TimeSeries(k, intermediate_ret[k]) + + return ret + + +class LatencyResult: + """Represents latency data for a single query execution.""" + + def __init__( + self, + server_name: str, + query: str, + query_idx: int, + repetition_idx: int, + latency: Optional[float], + cumulative_latency: Optional[float], + query_group_idx: int = 0, + ): + self.server_name = server_name + self.query = query + self.query_idx = query_idx + self.repetition_idx = repetition_idx + self.query_group_idx = query_group_idx + self.latency = latency + self.cumulative_latency = cumulative_latency + + +class LatencyResultAcrossTime: + """Represents latency data for a query across multiple repetitions.""" + + def __init__( + self, server_name: str, query: str, query_idx: int, num_repetitions: int + ): + self.server_name = server_name + self.query = query + self.query_idx = query_idx + self.num_repetitions = num_repetitions + self.latency_results: List[LatencyResult] = [] + + def add_latency_result(self, latency_result: LatencyResult): + """Add a latency result for a specific repetition.""" + self.latency_results.append(latency_result) + + def get_latencies(self) -> List[Optional[float]]: + """Get list of latencies across all repetitions.""" + return [lr.latency for lr in self.latency_results] + + def get_cumulative_latencies(self) -> List[Optional[float]]: + """Get list of cumulative latencies across all repetitions.""" + return [lr.cumulative_latency for lr in self.latency_results] + + @classmethod + def from_query_result_across_time( + cls, qrat: "QueryResultAcrossTime" + ) -> "LatencyResultAcrossTime": + """Create LatencyResultAcrossTime from existing QueryResultAcrossTime.""" + latency_result_across_time = cls( + qrat.server_name, qrat.query, qrat.query_idx, qrat.num_repetitions + ) + + for query_result in qrat.query_results: + latency_result = LatencyResult( + server_name=query_result.server_name, + query=query_result.query, + query_idx=query_result.query_idx, + repetition_idx=query_result.repetition_idx, + latency=query_result.latency, + cumulative_latency=query_result.cumulative_latency, + query_group_idx=query_result.query_group_idx, + ) + latency_result_across_time.add_latency_result(latency_result) + + return latency_result_across_time diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/__init__.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/__init__.py new file mode 100644 index 0000000..ad96e65 --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/__init__.py @@ -0,0 +1,13 @@ +""" +Streaming serialization interfaces for query results. + +This module provides multiple serialization formats for query results: +- JSONL + gzip: Compressed streaming format, human-readable +- Parquet: Columnar format for analytics, high compression +- Backward compatibility with pickle format +""" + +from .base import ResultsSerializer +from .factory import SerializerFactory, get_available_formats + +__all__ = ["ResultsSerializer", "SerializerFactory", "get_available_formats"] diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/base.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/base.py new file mode 100644 index 0000000..55ef371 --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/base.py @@ -0,0 +1,84 @@ +""" +Abstract base class for results serializers. +""" + +from abc import ABC, abstractmethod +from typing import Any, Dict +from ..classes import QueryResultAcrossTime, LatencyResultAcrossTime, QueryResult + + +class ResultsSerializer(ABC): + """Abstract interface for query results serialization.""" + + def __init__(self, output_dir: str): + """Initialize serializer with output directory. + + Args: + output_dir: Directory where results will be written + """ + self.output_dir = output_dir + + @abstractmethod + def write_results( + self, results_across_servers: Dict[str, Dict[int, QueryResultAcrossTime]] + ) -> None: + """Write query results to storage. + + Args: + results_across_servers: Nested dict of server -> query_idx -> QueryResultAcrossTime + """ + pass + + @abstractmethod + def read_results(self) -> Dict[str, Dict[int, QueryResultAcrossTime]]: + """Read query results from storage. + + Returns: + Nested dict of server -> query_idx -> QueryResultAcrossTime + """ + pass + + @abstractmethod + def exists(self) -> bool: + """Check if serialized results exist. + + Returns: + True if results exist and can be read + """ + pass + + @abstractmethod + def streaming_write_start(self, metadata: Dict[str, Any]) -> None: + """Initialize streaming write session with experiment metadata. + + Args: + metadata: Experiment metadata containing queries, servers, repetitions, etc. + """ + pass + + @abstractmethod + def streaming_write_result(self, query_result: QueryResult) -> None: + """Write a single query result incrementally. + + Args: + query_result: Individual query result to write + """ + pass + + @abstractmethod + def streaming_write_end(self) -> None: + """Finalize streaming write session and close any open resources.""" + pass + + def cleanup(self) -> None: + """Clean up any resources. Override if needed.""" + pass + + @abstractmethod + def read_latencies_only(self) -> Dict[str, Dict[int, LatencyResultAcrossTime]]: + """Read only latency data without loading full results. + + Returns: + Nested dict of server -> query_idx -> LatencyResultAcrossTime + """ + pass diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/factory.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/factory.py new file mode 100644 index 0000000..f9b22d6 --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/factory.py @@ -0,0 +1,121 @@ +""" +Factory for creating serializers with automatic format detection. +""" + +import os +import logging +from typing import List, Optional +from .base import ResultsSerializer + + +logger = logging.getLogger(__name__) + + +def get_available_formats() -> List[str]: + """Get list of available serialization formats. + + Returns: + List of format names that can be used + """ + return ["jsonl", "parquet"] + + +class SerializerFactory: + """Factory for creating results serializers.""" + + @staticmethod + def create(format_name: str, output_dir: str, **kwargs) -> ResultsSerializer: + """Create a serializer for the specified format. + + Args: + format_name: Format name ('jsonl', 'parquet', or 'auto') + output_dir: Directory for output files + **kwargs: Additional arguments passed to serializer + + Returns: + ResultsSerializer instance + + Raises: + ValueError: If format is not supported + ImportError: If required dependencies are missing + """ + if format_name == "auto": + format_name = SerializerFactory._detect_format(output_dir) + + if format_name == "jsonl": + from .jsonl_serializer import JSONLResultsSerializer + + return JSONLResultsSerializer(output_dir, **kwargs) + + elif format_name == "parquet": + from .parquet_serializer import ParquetResultsSerializer + + return ParquetResultsSerializer(output_dir, **kwargs) + + else: + available = get_available_formats() + raise ValueError( + f"Unsupported format '{format_name}'. Available formats: {available}" + ) + + @staticmethod + def _detect_format(output_dir: str) -> str: + """Auto-detect format based on existing files. + + Args: + output_dir: Directory to check for existing files + + Returns: + Detected format name, defaults to 'jsonl' if none found + """ + if not os.path.exists(output_dir): + return "jsonl" # Default for new directories + + # Check for Parquet files first (they indicate intent for analytics) + parquet_files = ["query_results.parquet", "query_latencies.parquet"] + + if any(os.path.exists(os.path.join(output_dir, f)) for f in parquet_files): + return "parquet" + + # Check for JSONL files + jsonl_files = [ + "query_results.jsonl.gz", + "query_results.jsonl", + "query_latencies.jsonl.gz", + "query_latencies.jsonl", + ] + + if any(os.path.exists(os.path.join(output_dir, f)) for f in jsonl_files): + return "jsonl" + + # Default to JSONL for new directories + logger.debug( + f"No existing format detected in {output_dir}, defaulting to JSONL" + ) + return "jsonl" + + @staticmethod + def create_from_existing(output_dir: str) -> Optional[ResultsSerializer]: + """Create serializer by detecting format from existing files. + + Args: + output_dir: Directory containing existing results + + Returns: + ResultsSerializer instance, or None if no results found + """ + if not os.path.exists(output_dir): + return None + + detected_format = SerializerFactory._detect_format(output_dir) + + try: + serializer = SerializerFactory.create(detected_format, output_dir) + if serializer.exists(): + return serializer + except (ValueError, ImportError) as e: + logger.warning( + f"Could not create serializer for detected format {detected_format}: {e}" + ) + + return None diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/jsonl_serializer.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/jsonl_serializer.py new file mode 100644 index 0000000..1a07595 --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/jsonl_serializer.py @@ -0,0 +1,499 @@ +""" +JSONL+gzip streaming serializer for query results. +""" + +import json +import gzip +import os +import threading +from typing import Any, Dict, Iterator +from .base import ResultsSerializer +from ..classes import ( + QueryResult, + QueryResultAcrossTime, + LatencyResult, + LatencyResultAcrossTime, +) + + +class JSONLResultsSerializer(ResultsSerializer): + """JSONL+gzip streaming serializer for query results.""" + + def __init__(self, output_dir: str, use_compression: bool = True): + """Initialize JSONL serializer. + + Args: + output_dir: Directory for output files + use_compression: Whether to use gzip compression + """ + super().__init__(output_dir) + self.use_compression = use_compression + self.results_file = os.path.join(output_dir, "query_results.jsonl") + self.latency_file = os.path.join(output_dir, "query_latencies.jsonl") + self.metadata_file = os.path.join(output_dir, "experiment_metadata.json") + + if use_compression: + self.results_file += ".gz" + self.latency_file += ".gz" + + os.makedirs(output_dir, exist_ok=True) + + # Streaming write state + self._streaming_results_file = None + self._streaming_latency_file = None + self._streaming_metadata = None + + # Thread safety for streaming writes + self._write_lock = threading.Lock() + + def _open_for_write(self, filepath: str): + """Open file for writing with optional compression.""" + if self.use_compression: + return gzip.open(filepath, "wt", encoding="utf-8") + return open(filepath, "w", encoding="utf-8") + + def _open_for_read(self, filepath: str): + """Open file for reading with optional compression.""" + if self.use_compression: + return gzip.open(filepath, "rt", encoding="utf-8") + return open(filepath, "r", encoding="utf-8") + + def write_results( + self, results_across_servers: Dict[str, Dict[int, QueryResultAcrossTime]] + ) -> None: + """Write query results to JSONL files. + + Args: + results_across_servers: Nested dict of server -> query_idx -> QueryResultAcrossTime + """ + # Write metadata + self._write_metadata(results_across_servers) + + # Write results and latencies + with self._open_for_write(self.results_file) as results_f, self._open_for_write( + self.latency_file + ) as latency_f: + for server_name, server_results in results_across_servers.items(): + for query_idx, query_result_across_time in server_results.items(): + for query_result in query_result_across_time.query_results: + # Write result record + if query_result.result: + for frozenset_key, value in query_result.result.items(): + result_record = { + "server_name": server_name, + "query": query_result.query, + "query_idx": query_idx, + "repetition_idx": query_result.repetition_idx, + "result_labels": self._serialize_frozenset_key( + frozenset_key + ), + "result_value": value, + } + results_f.write(json.dumps(result_record) + "\n") + + # Write latency record + latency_record = { + "server_name": server_name, + "query_idx": query_idx, + "repetition_idx": query_result.repetition_idx, + "latency": query_result.latency, + "cumulative_latency": query_result.cumulative_latency, + } + latency_f.write(json.dumps(latency_record) + "\n") + + def read_results(self) -> Dict[str, Dict[int, QueryResultAcrossTime]]: + """Read query results from JSONL files. + + Returns: + Nested dict of server -> query_idx -> QueryResultAcrossTime + """ + if not self.exists(): + raise FileNotFoundError(f"No results found in {self.output_dir}") + + # Read metadata + metadata = self._read_metadata() + + # Handle both old and new metadata formats + if "query_groups" in metadata: + # New format with query groups + all_queries = [] + query_idx_to_repetitions = {} + global_query_idx = 0 + + for qg in metadata["query_groups"]: + for query in qg["queries"]: + all_queries.append(query) + query_idx_to_repetitions[global_query_idx] = qg["repetitions"] + global_query_idx += 1 + + servers = metadata["servers"] + else: + # Old format (backward compatible) + all_queries = metadata["queries"] + servers = metadata["servers"] + query_idx_to_repetitions = { + i: metadata["repetitions"] for i in range(len(all_queries)) + } + + # Initialize nested structure + results = {} + for server in servers: + results[server] = {} + for query_idx, query in enumerate(all_queries): + results[server][query_idx] = QueryResultAcrossTime( + server, + query, + query_idx, + query_idx_to_repetitions[query_idx], + ) + + # Read latencies into lookup table + latencies = {} + if os.path.exists(self.latency_file): + with self._open_for_read(self.latency_file) as f: + for line in f: + line = line.strip() + if line: + latency_record = json.loads(line) + key = ( + latency_record["server_name"], + latency_record["query_idx"], + latency_record["repetition_idx"], + ) + latencies[key] = ( + latency_record["latency"], + latency_record["cumulative_latency"], + ) + + # Read results and reconstruct QueryResult objects + query_results = {} # (server, query_idx, repetition_idx) -> partial QueryResult + + if os.path.exists(self.results_file): + with self._open_for_read(self.results_file) as f: + for line in f: + line = line.strip() + if line: + result_record = json.loads(line) + + key = ( + result_record["server_name"], + result_record["query_idx"], + result_record["repetition_idx"], + ) + + # Check if this is a raw_text_result (SQL/ClickHouse) record + is_raw_text = "raw_text_result" in result_record + + # Initialize QueryResult if not exists + if key not in query_results: + latency, cumulative_latency = latencies.get( + key, (None, None) + ) + query_results[key] = QueryResult( + server_name=result_record["server_name"], + query=result_record["query"], + query_idx=result_record["query_idx"], + repetition_idx=result_record["repetition_idx"], + result=None, # Will be populated below for Prometheus + latency=latency, + cumulative_latency=cumulative_latency, + query_group_idx=result_record.get("query_group_idx", 0), + raw_text_result=None, # Will be populated for SQL + ) + if not is_raw_text: + query_results[key].result = {} + + if is_raw_text: + # SQL/ClickHouse raw text result + query_results[key].raw_text_result = result_record[ + "raw_text_result" + ] + else: + # Prometheus-style result + frozenset_key = self._deserialize_frozenset_key( + result_record["result_labels"] + ) + query_results[key].result[frozenset_key] = result_record[ + "result_value" + ] + + # Add QueryResult objects to the nested structure + for ( + server_name, + query_idx, + repetition_idx, + ), query_result in query_results.items(): + results[server_name][query_idx].add_result(query_result) + + # Handle cases where we have latencies but no results + for (server_name, query_idx, repetition_idx), ( + latency, + cumulative_latency, + ) in latencies.items(): + if (server_name, query_idx, repetition_idx) not in query_results: + # Create empty QueryResult with just latency data + empty_result = QueryResult( + server_name=server_name, + query=all_queries[query_idx], + query_idx=query_idx, + repetition_idx=repetition_idx, + result=None, + latency=latency, + cumulative_latency=cumulative_latency, + query_group_idx=0, # Default for backward compatibility + ) + results[server_name][query_idx].add_result(empty_result) + + return results + + def exists(self) -> bool: + """Check if serialized results exist. + + Returns: + True if results exist and can be read + """ + return os.path.exists(self.metadata_file) and ( + os.path.exists(self.results_file) or os.path.exists(self.latency_file) + ) + + def streaming_write_start(self, metadata: Dict[str, Any]) -> None: + """Initialize streaming write session with experiment metadata. + + Args: + metadata: Experiment metadata containing queries, servers, repetitions, etc. + """ + if ( + self._streaming_results_file is not None + or self._streaming_latency_file is not None + ): + raise RuntimeError("Streaming write session already active") + + self._streaming_metadata = metadata + self._streaming_results_file = self._open_for_write(self.results_file) + self._streaming_latency_file = self._open_for_write(self.latency_file) + + def streaming_write_result(self, query_result: QueryResult) -> None: + """Write a single query result incrementally. + + Args: + query_result: Individual query result to write + """ + if self._streaming_results_file is None or self._streaming_latency_file is None: + raise RuntimeError("Streaming write session not started") + + with self._write_lock: + # Write result records - handle both Prometheus (result) and SQL (raw_text_result) + if query_result.result: + # Prometheus-style normalized results + for frozenset_key, value in query_result.result.items(): + result_record = { + "query_group_idx": query_result.query_group_idx, + "server_name": query_result.server_name, + "query": query_result.query, + "query_idx": query_result.query_idx, + "repetition_idx": query_result.repetition_idx, + "result_labels": self._serialize_frozenset_key(frozenset_key), + "result_value": value, + } + self._streaming_results_file.write(json.dumps(result_record) + "\n") + elif query_result.raw_text_result is not None: + # SQL/ClickHouse raw text result + result_record = { + "query_group_idx": query_result.query_group_idx, + "server_name": query_result.server_name, + "query": query_result.query, + "query_idx": query_result.query_idx, + "repetition_idx": query_result.repetition_idx, + "raw_text_result": query_result.raw_text_result, + } + self._streaming_results_file.write(json.dumps(result_record) + "\n") + + # Write latency record + latency_record = { + "query_group_idx": query_result.query_group_idx, + "server_name": query_result.server_name, + "query_idx": query_result.query_idx, + "repetition_idx": query_result.repetition_idx, + "latency": query_result.latency, + "cumulative_latency": query_result.cumulative_latency, + } + self._streaming_latency_file.write(json.dumps(latency_record) + "\n") + + def streaming_write_end(self) -> None: + """Finalize streaming write session and close any open resources.""" + if self._streaming_results_file is not None: + self._streaming_results_file.close() + self._streaming_results_file = None + + if self._streaming_latency_file is not None: + self._streaming_latency_file.close() + self._streaming_latency_file = None + + # Write metadata at the end + if self._streaming_metadata is not None: + with open(self.metadata_file, "w") as f: + json.dump(self._streaming_metadata, f, indent=2) + self._streaming_metadata = None + + def stream_results(self) -> Iterator[Dict]: + """Stream read query results one record at a time. + + Yields: + Dict containing result record data + """ + if not os.path.exists(self.results_file): + return + + with self._open_for_read(self.results_file) as f: + for line in f: + line = line.strip() + if line: + yield json.loads(line) + + def stream_latencies(self) -> Iterator[Dict]: + """Stream read latency data one record at a time. + + Yields: + Dict containing latency record data + """ + if not os.path.exists(self.latency_file): + return + + with self._open_for_read(self.latency_file) as f: + for line in f: + line = line.strip() + if line: + yield json.loads(line) + + def _write_metadata( + self, results_across_servers: Dict[str, Dict[int, QueryResultAcrossTime]] + ): + """Write experiment metadata.""" + if not results_across_servers: + return + + servers = list(results_across_servers.keys()) + queries = [] + repetitions = 0 + + if servers: + first_server = servers[0] + if results_across_servers[first_server]: + query_indices = sorted(results_across_servers[first_server].keys()) + queries = [ + results_across_servers[first_server][i].query for i in query_indices + ] + if query_indices: + repetitions = results_across_servers[first_server][ + query_indices[0] + ].num_repetitions + + metadata = { + "queries": queries, + "servers": servers, + "repetitions": repetitions, + "total_queries": len(queries), + } + + with open(self.metadata_file, "w") as f: + json.dump(metadata, f, indent=2) + + def _read_metadata(self) -> Dict: + """Read experiment metadata.""" + with open(self.metadata_file, "r") as f: + return json.load(f) + + def _serialize_frozenset_key(self, frozenset_key: frozenset) -> str: + """Convert frozenset key to JSON string. + + Args: + frozenset_key: frozenset of (key, value) tuples + + Returns: + JSON string representation + """ + # Convert to dict and serialize as JSON with sorted keys for consistency + labels_dict = dict(frozenset_key) + return json.dumps(labels_dict, sort_keys=True) + + def _deserialize_frozenset_key(self, json_str: str) -> frozenset: + """Convert JSON string back to frozenset key. + + Args: + json_str: JSON string representation + + Returns: + frozenset of (key, value) tuples + """ + labels_dict = json.loads(json_str) + return frozenset(labels_dict.items()) + + def read_latencies_only(self) -> Dict[str, Dict[int, LatencyResultAcrossTime]]: + """Read only latency data without loading full results. + + Returns: + Nested dict of server -> query_idx -> LatencyResultAcrossTime + """ + if not self.exists(): + raise FileNotFoundError(f"No results found in {self.output_dir}") + + # Read metadata + metadata = self._read_metadata() + + # Handle both old and new metadata formats + if "query_groups" in metadata: + # New format with query groups + all_queries = [] + query_idx_to_repetitions = {} + global_query_idx = 0 + + for qg in metadata["query_groups"]: + for query in qg["queries"]: + all_queries.append(query) + query_idx_to_repetitions[global_query_idx] = qg["repetitions"] + global_query_idx += 1 + + servers = metadata["servers"] + else: + # Old format (backward compatible) + all_queries = metadata["queries"] + servers = metadata["servers"] + query_idx_to_repetitions = { + i: metadata["repetitions"] for i in range(len(all_queries)) + } + + # Initialize nested structure + latencies = {} + for server in servers: + latencies[server] = {} + for query_idx, query in enumerate(all_queries): + latencies[server][query_idx] = LatencyResultAcrossTime( + server, + query, + query_idx, + query_idx_to_repetitions[query_idx], + ) + + # Read only latency data + if os.path.exists(self.latency_file): + with self._open_for_read(self.latency_file) as f: + for line in f: + line = line.strip() + if line: + latency_record = json.loads(line) + + latency_result = LatencyResult( + server_name=latency_record["server_name"], + query=all_queries[latency_record["query_idx"]], + query_idx=latency_record["query_idx"], + repetition_idx=latency_record["repetition_idx"], + latency=latency_record["latency"], + cumulative_latency=latency_record["cumulative_latency"], + query_group_idx=latency_record.get("query_group_idx", 0), + ) + + latencies[latency_record["server_name"]][ + latency_record["query_idx"] + ].add_latency_result(latency_result) + + return latencies diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/parquet_serializer.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/parquet_serializer.py new file mode 100644 index 0000000..faadc3d --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/query_results/serializers/parquet_serializer.py @@ -0,0 +1,505 @@ +""" +Parquet serializer for query results using JSON columns for labels. +""" + +import json +import os +import threading +from typing import Any, Dict, List, Optional +import pandas as pd +import pyarrow as pa +import pyarrow.parquet as pq +from .base import ResultsSerializer +from ..classes import ( + QueryResult, + QueryResultAcrossTime, + LatencyResult, + LatencyResultAcrossTime, +) + + +class ParquetResultsSerializer(ResultsSerializer): + """Parquet serializer for query results with JSON column for labels.""" + + def __init__( + self, output_dir: str, compression: str = "snappy", batch_size: int = 1000 + ): + """Initialize Parquet serializer. + + Args: + output_dir: Directory for output files + compression: Compression algorithm ('snappy', 'gzip', 'lz4', etc.) + batch_size: Number of records to batch before writing to parquet + """ + super().__init__(output_dir) + self.compression = compression + self.batch_size = batch_size + self.results_file = os.path.join(output_dir, "query_results.parquet") + self.latency_file = os.path.join(output_dir, "query_latencies.parquet") + self.metadata_file = os.path.join(output_dir, "experiment_metadata.json") + + os.makedirs(output_dir, exist_ok=True) + + # Streaming write state + self._streaming_results_writer: Optional[pq.ParquetWriter] = None + self._streaming_latency_writer: Optional[pq.ParquetWriter] = None + self._results_batch: List[Dict] = [] + self._latency_batch: List[Dict] = [] + self._streaming_metadata = None + + # Define schemas for streaming + self._results_schema = pa.schema( + [ + ("query_group_idx", pa.int64()), + ("server_name", pa.string()), + ("query", pa.string()), + ("query_idx", pa.int64()), + ("repetition_idx", pa.int64()), + ("result_labels", pa.string()), + ("result_value", pa.float64()), + ] + ) + + self._latency_schema = pa.schema( + [ + ("query_group_idx", pa.int64()), + ("server_name", pa.string()), + ("query_idx", pa.int64()), + ("repetition_idx", pa.int64()), + ("latency", pa.float64()), + ("cumulative_latency", pa.float64()), + ] + ) + + # Thread safety for streaming writes + self._write_lock = threading.Lock() + + def write_results( + self, results_across_servers: Dict[str, Dict[int, QueryResultAcrossTime]] + ) -> None: + """Write query results to Parquet files. + + Args: + results_across_servers: Nested dict of server -> query_idx -> QueryResultAcrossTime + """ + # Write metadata + self._write_metadata(results_across_servers) + + results_rows = [] + latency_rows = [] + + for server_name, server_results in results_across_servers.items(): + for query_idx, query_result_across_time in server_results.items(): + query = query_result_across_time.query + + for query_result in query_result_across_time.query_results: + # Process query results + if query_result.result: + for frozenset_key, value in query_result.result.items(): + # Convert frozenset to JSON string + labels_dict = dict(frozenset_key) + labels_json = json.dumps(labels_dict, sort_keys=True) + + results_rows.append( + { + "server_name": server_name, + "query": query, + "query_idx": query_idx, + "repetition_idx": query_result.repetition_idx, + "result_labels": labels_json, + "result_value": value, + } + ) + + # Process latency data separately + latency_rows.append( + { + "server_name": server_name, + "query_idx": query_idx, + "repetition_idx": query_result.repetition_idx, + "latency": query_result.latency, + "cumulative_latency": query_result.cumulative_latency, + } + ) + + # Write results DataFrame + if results_rows: + results_df = pd.DataFrame(results_rows) + results_df.to_parquet( + self.results_file, compression=self.compression, index=False + ) + + # Write latencies DataFrame + if latency_rows: + latency_df = pd.DataFrame(latency_rows) + latency_df.to_parquet( + self.latency_file, compression=self.compression, index=False + ) + + def read_results(self) -> Dict[str, Dict[int, QueryResultAcrossTime]]: + """Read query results from Parquet files. + + Returns: + Nested dict of server -> query_idx -> QueryResultAcrossTime + """ + if not self.exists(): + raise FileNotFoundError(f"No results found in {self.output_dir}") + + # Read metadata + metadata = self._read_metadata() + + # Handle both old and new metadata formats + if "query_groups" in metadata: + # New format with query groups + all_queries = [] + query_idx_to_repetitions = {} + global_query_idx = 0 + + for qg in metadata["query_groups"]: + for query in qg["queries"]: + all_queries.append(query) + query_idx_to_repetitions[global_query_idx] = qg["repetitions"] + global_query_idx += 1 + + servers = metadata["servers"] + else: + # Old format (backward compatible) + all_queries = metadata["queries"] + servers = metadata["servers"] + query_idx_to_repetitions = { + i: metadata["repetitions"] for i in range(len(all_queries)) + } + + # Initialize nested structure + results = {} + for server in servers: + results[server] = {} + for query_idx, query in enumerate(all_queries): + results[server][query_idx] = QueryResultAcrossTime( + server, + query, + query_idx, + query_idx_to_repetitions[query_idx], + ) + + # Read latencies + latencies = {} + if os.path.exists(self.latency_file): + latency_df = pd.read_parquet(self.latency_file) + for _, row in latency_df.iterrows(): + key = (row["server_name"], row["query_idx"], row["repetition_idx"]) + latencies[key] = (row["latency"], row["cumulative_latency"]) + + # Read results and reconstruct QueryResult objects + query_results = {} # (server, query_idx, repetition_idx) -> QueryResult + + if os.path.exists(self.results_file): + results_df = pd.read_parquet(self.results_file) + + for _, row in results_df.iterrows(): + key = (row["server_name"], row["query_idx"], row["repetition_idx"]) + + # Initialize QueryResult if not exists + if key not in query_results: + latency, cumulative_latency = latencies.get(key, (None, None)) + query_results[key] = QueryResult( + server_name=row["server_name"], + query=row["query"], + query_idx=row["query_idx"], + repetition_idx=row["repetition_idx"], + result=None, # Will be populated below + latency=latency, + cumulative_latency=cumulative_latency, + query_group_idx=row.get("query_group_idx", 0), + ) + query_results[key].result = {} + + # Parse labels back to frozenset + labels_dict = json.loads(row["result_labels"]) + frozenset_key = frozenset(labels_dict.items()) + query_results[key].result[frozenset_key] = row["result_value"] + + # Add QueryResult objects to the nested structure + for ( + server_name, + query_idx, + repetition_idx, + ), query_result in query_results.items(): + results[server_name][query_idx].add_result(query_result) + + # Handle cases where we have latencies but no results + for (server_name, query_idx, repetition_idx), ( + latency, + cumulative_latency, + ) in latencies.items(): + if (server_name, query_idx, repetition_idx) not in query_results: + # Create empty QueryResult with just latency data + empty_result = QueryResult( + server_name=server_name, + query=all_queries[query_idx], + query_idx=query_idx, + repetition_idx=repetition_idx, + result=None, + latency=latency, + cumulative_latency=cumulative_latency, + query_group_idx=0, # Default for backward compatibility + ) + results[server_name][query_idx].add_result(empty_result) + + return results + + def exists(self) -> bool: + """Check if serialized results exist. + + Returns: + True if results exist and can be read + """ + return os.path.exists(self.metadata_file) and ( + os.path.exists(self.results_file) or os.path.exists(self.latency_file) + ) + + def streaming_write_start(self, metadata: Dict[str, Any]) -> None: + """Initialize streaming write session with experiment metadata. + + Args: + metadata: Experiment metadata containing queries, servers, repetitions, etc. + """ + if ( + self._streaming_results_writer is not None + or self._streaming_latency_writer is not None + ): + raise RuntimeError("Streaming write session already active") + + self._streaming_metadata = metadata + self._results_batch = [] + self._latency_batch = [] + + # Initialize ParquetWriter instances with schemas + self._streaming_results_writer = pq.ParquetWriter( + self.results_file, schema=self._results_schema, compression=self.compression + ) + self._streaming_latency_writer = pq.ParquetWriter( + self.latency_file, schema=self._latency_schema, compression=self.compression + ) + + def streaming_write_result(self, query_result: QueryResult) -> None: + """Write a single query result incrementally. + + Args: + query_result: Individual query result to write + """ + if ( + self._streaming_results_writer is None + or self._streaming_latency_writer is None + ): + raise RuntimeError("Streaming write session not started") + + with self._write_lock: + # Add result records to batch + if query_result.result: + for frozenset_key, value in query_result.result.items(): + labels_dict = dict(frozenset_key) + labels_json = json.dumps(labels_dict, sort_keys=True) + + self._results_batch.append( + { + "query_group_idx": query_result.query_group_idx, + "server_name": query_result.server_name, + "query": query_result.query, + "query_idx": query_result.query_idx, + "repetition_idx": query_result.repetition_idx, + "result_labels": labels_json, + "result_value": value, + } + ) + + # Add latency record to batch + self._latency_batch.append( + { + "query_group_idx": query_result.query_group_idx, + "server_name": query_result.server_name, + "query_idx": query_result.query_idx, + "repetition_idx": query_result.repetition_idx, + "latency": query_result.latency, + "cumulative_latency": query_result.cumulative_latency, + } + ) + + # Flush batches if they reach batch_size + if len(self._results_batch) >= self.batch_size: + self._flush_results_batch() + if len(self._latency_batch) >= self.batch_size: + self._flush_latency_batch() + + def streaming_write_end(self) -> None: + """Finalize streaming write session and close any open resources.""" + # Flush any remaining batches + if self._results_batch: + self._flush_results_batch() + if self._latency_batch: + self._flush_latency_batch() + + # Close writers + if self._streaming_results_writer is not None: + self._streaming_results_writer.close() + self._streaming_results_writer = None + + if self._streaming_latency_writer is not None: + self._streaming_latency_writer.close() + self._streaming_latency_writer = None + + # Write metadata at the end + if self._streaming_metadata is not None: + with open(self.metadata_file, "w") as f: + json.dump(self._streaming_metadata, f, indent=2) + self._streaming_metadata = None + + def _flush_results_batch(self) -> None: + """Write current results batch to parquet.""" + if self._results_batch and self._streaming_results_writer is not None: + results_df = pd.DataFrame(self._results_batch) + table = pa.Table.from_pandas(results_df, schema=self._results_schema) + self._streaming_results_writer.write_table(table) + self._results_batch = [] + + def _flush_latency_batch(self) -> None: + """Write current latency batch to parquet.""" + if self._latency_batch and self._streaming_latency_writer is not None: + latency_df = pd.DataFrame(self._latency_batch) + table = pa.Table.from_pandas(latency_df, schema=self._latency_schema) + self._streaming_latency_writer.write_table(table) + self._latency_batch = [] + + def query_results(self, filters=None, columns=None) -> pd.DataFrame: + """Query results with optional filtering and column selection. + + Args: + filters: PyArrow filters for row selection + columns: List of column names to read + + Returns: + Pandas DataFrame with query results + """ + if not os.path.exists(self.results_file): + return pd.DataFrame() + + return pd.read_parquet(self.results_file, filters=filters, columns=columns) + + def query_latencies(self, filters=None, columns=None) -> pd.DataFrame: + """Query latencies with optional filtering and column selection. + + Args: + filters: PyArrow filters for row selection + columns: List of column names to read + + Returns: + Pandas DataFrame with latency data + """ + if not os.path.exists(self.latency_file): + return pd.DataFrame() + + return pd.read_parquet(self.latency_file, filters=filters, columns=columns) + + def _write_metadata( + self, results_across_servers: Dict[str, Dict[int, QueryResultAcrossTime]] + ): + """Write experiment metadata.""" + if not results_across_servers: + return + + servers = list(results_across_servers.keys()) + queries = [] + repetitions = 0 + + if servers: + first_server = servers[0] + if results_across_servers[first_server]: + query_indices = sorted(results_across_servers[first_server].keys()) + queries = [ + results_across_servers[first_server][i].query for i in query_indices + ] + if query_indices: + repetitions = results_across_servers[first_server][ + query_indices[0] + ].num_repetitions + + metadata = { + "queries": queries, + "servers": servers, + "repetitions": repetitions, + "total_queries": len(queries), + } + + with open(self.metadata_file, "w") as f: + json.dump(metadata, f, indent=2) + + def _read_metadata(self) -> Dict: + """Read experiment metadata.""" + with open(self.metadata_file, "r") as f: + return json.load(f) + + def read_latencies_only(self) -> Dict[str, Dict[int, LatencyResultAcrossTime]]: + """Read only latency data without loading full results. + + Returns: + Nested dict of server -> query_idx -> LatencyResultAcrossTime + """ + if not self.exists(): + raise FileNotFoundError(f"No results found in {self.output_dir}") + + # Read metadata + metadata = self._read_metadata() + + # Handle both old and new metadata formats + if "query_groups" in metadata: + # New format with query groups + all_queries = [] + query_idx_to_repetitions = {} + global_query_idx = 0 + + for qg in metadata["query_groups"]: + for query in qg["queries"]: + all_queries.append(query) + query_idx_to_repetitions[global_query_idx] = qg["repetitions"] + global_query_idx += 1 + + servers = metadata["servers"] + else: + # Old format (backward compatible) + all_queries = metadata["queries"] + servers = metadata["servers"] + query_idx_to_repetitions = { + i: metadata["repetitions"] for i in range(len(all_queries)) + } + + # Initialize nested structure + latencies = {} + for server in servers: + latencies[server] = {} + for query_idx, query in enumerate(all_queries): + latencies[server][query_idx] = LatencyResultAcrossTime( + server, + query, + query_idx, + query_idx_to_repetitions[query_idx], + ) + + # Read only latency data + if os.path.exists(self.latency_file): + latency_df = pd.read_parquet(self.latency_file) + for _, row in latency_df.iterrows(): + latency_result = LatencyResult( + server_name=row["server_name"], + query=all_queries[row["query_idx"]], + query_idx=row["query_idx"], + repetition_idx=row["repetition_idx"], + latency=row["latency"], + cumulative_latency=row["cumulative_latency"], + query_group_idx=row.get("query_group_idx", 0), + ) + + latencies[row["server_name"]][row["query_idx"]].add_latency_result( + latency_result + ) + + return latencies diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/MetricConfig.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/MetricConfig.py new file mode 100644 index 0000000..49b176e --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/MetricConfig.py @@ -0,0 +1,17 @@ +from promql_utilities.data_model.KeyByLabelNames import KeyByLabelNames + + +class MetricConfig: + def __init__(self, yaml_str): + self.config = {} + for metric, labels in yaml_str.items(): + self.config[metric] = KeyByLabelNames(labels) + + @classmethod + def from_list(cls, yaml_list): + """Create MetricConfig from a list-of-dicts format used by Controller. + + Format: [{"metric": "name", "labels": ["l1", "l2"]}, ...] + """ + as_dict = {item["metric"]: item["labels"] for item in yaml_list} + return cls(as_dict) diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/SQLTableConfig.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/SQLTableConfig.py new file mode 100644 index 0000000..ac903f3 --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/SQLTableConfig.py @@ -0,0 +1,38 @@ +from dataclasses import dataclass +from typing import Dict, List + + +@dataclass +class TableSchema: + """Schema for a single SQL table.""" + + time_column: str + value_columns: List[str] + metadata_columns: List[str] + + +class SQLTableConfig: + """ + SQL schema configuration, equivalent to MetricConfig for SQL mode. + + Mirrors the Rust SQLSchema/Table structure in: + CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlhelper.rs + """ + + def __init__(self, yaml_dict: dict): + self.config: Dict[str, TableSchema] = {} + for table in yaml_dict.get("tables", []): + self.config[table["name"]] = TableSchema( + time_column=table["time_column"], + value_columns=table["value_columns"], + metadata_columns=table["metadata_columns"], + ) + + def get_table(self, table_name: str) -> TableSchema: + return self.config.get(table_name) + + def get_time_column(self, table_name: str) -> str: + return self.config[table_name].time_column + + def get_metadata_columns(self, table_name: str) -> List[str]: + return self.config[table_name].metadata_columns diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/StreamingAggregationConfig.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/StreamingAggregationConfig.py new file mode 100644 index 0000000..540d411 --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/StreamingAggregationConfig.py @@ -0,0 +1,167 @@ +import yaml + +# from ruamel.yaml import YAML + +from typing import Dict, Tuple, Union +from promql_utilities.streaming_config.MetricConfig import MetricConfig +from promql_utilities.streaming_config.SQLTableConfig import SQLTableConfig +from promql_utilities.data_model.KeyByLabelNames import KeyByLabelNames + +yaml.add_representer( + KeyByLabelNames, + lambda dumper, data: dumper.represent_list(data.serialize_to_json()), +) + +# yaml_writer = YAML() +# yaml_writer.representer.add_representer( +# KeyByLabelNames, +# lambda dumper, data: dumper.represent_sequence( +# "tag:yaml.org,2002:seq", data.serialize_to_json(), flow_style=False +# ), +# ) + + +class StreamingAggregationConfig: + aggregationId: int + aggregationType: str + aggregationSubType: str + + # NEW fields for sliding window support (Issue #236) + windowSize: int # Window size in seconds (e.g., 900s for 15m) + slideInterval: int # Slide/hop interval in seconds (e.g., 30s) + windowType: str # "tumbling" or "sliding" + + # DEPRECATED but kept for backward compatibility + tumblingWindowSize: int # For reading old configs + + spatialFilter: str + metric: str # PromQL mode: metric name + parameters: dict + + labels: Dict[str, KeyByLabelNames] + + # SQL-specific fields (optional, used when query_language=sql) + table_name: str # SQL mode: table name + value_column: str # SQL mode: which value column to aggregate + + def __init__(self): + self.labels = { + "rollup": KeyByLabelNames([]), + "grouping": KeyByLabelNames([]), + "aggregated": KeyByLabelNames([]), + } + # Default to tumbling windows for backward compatibility + self.windowType = "tumbling" + # SQL fields default to None + self.table_name = None + self.value_column = None + self.metric = None + + @staticmethod + def from_dict(aggregation_config: dict) -> "StreamingAggregationConfig": + aggregation = StreamingAggregationConfig() + aggregation.aggregationId = aggregation_config["aggregationId"] + aggregation.aggregationType = aggregation_config["aggregationType"] + aggregation.aggregationSubType = aggregation_config["aggregationSubType"] + + # NEW: Handle new window fields with backward compatibility + aggregation.windowType = aggregation_config.get("windowType", "tumbling") + aggregation.windowSize = aggregation_config.get( + "windowSize", aggregation_config.get("tumblingWindowSize") + ) + aggregation.slideInterval = aggregation_config.get( + "slideInterval", aggregation_config.get("tumblingWindowSize") + ) + + # Keep deprecated field for backward compatibility + aggregation.tumblingWindowSize = aggregation_config.get( + "tumblingWindowSize", aggregation.windowSize + ) + + aggregation.spatialFilter = aggregation_config["spatialFilter"] + aggregation.parameters = aggregation_config["parameters"] + + # Handle both PromQL (metric) and SQL (table_name/value_column) formats + aggregation.metric = aggregation_config.get("metric") + aggregation.table_name = aggregation_config.get("table_name") + aggregation.value_column = aggregation_config.get("value_column", "value") + + for k, v in aggregation_config["labels"].items(): + if k not in aggregation.labels: + raise ValueError(f"Invalid label name: {k}") + if v is not None: + aggregation.labels[k] = KeyByLabelNames(v) + + return aggregation + + def validate( + self, + schema_config: Union[MetricConfig, SQLTableConfig], + query_language: str, + ): + """Validate against MetricConfig (promql) or SQLTableConfig (sql).""" + configured_labels = KeyByLabelNames([]) + for k, v in self.labels.items(): + assert v is not None + configured_labels += v + + if query_language == "promql": + # Existing validation logic for PromQL + if schema_config.config[self.metric] != configured_labels: + raise ValueError( + "Labels do not match: {} vs {}".format( + schema_config.config[self.metric], + configured_labels, + ) + ) + elif query_language == "sql": + # SQL validation: check labels match metadata_columns + table_schema = schema_config.get_table(self.table_name) + if table_schema is None: + raise ValueError(f"Table '{self.table_name}' not found in sql_schema") + + expected_columns = set(table_schema.metadata_columns) + actual_columns = set(configured_labels.keys) + if expected_columns != actual_columns: + raise ValueError( + f"Labels do not match metadata_columns for table {self.table_name}: " + f"expected {expected_columns}, got {actual_columns}" + ) + # Validate value_column exists + if self.value_column not in table_schema.value_columns: + raise ValueError( + f"value_column '{self.value_column}' not in table {self.table_name} " + f"value_columns: {table_schema.value_columns}" + ) + + def to_dict( + self, + schema_config: Union[MetricConfig, SQLTableConfig], + query_language: str, + ) -> dict: + self.validate(schema_config, query_language) + return self.__dict__ + + def get_source_identifier(self) -> str: + """Get the metric name (promql) or table name (sql).""" + return self.metric if self.metric else self.table_name + + def get_identifying_key(self) -> Tuple: + keys = [ + self.aggregationType, + self.aggregationSubType, + self.windowType, # NEW: Include window type + self.windowSize, # NEW: Include window size + self.slideInterval, # NEW: Include slide interval + self.tumblingWindowSize, # Keep for backward compatibility + self.spatialFilter, + self.metric, + self.table_name, # SQL mode: table name + self.value_column, # SQL mode: value column + tuple(self.parameters.items()), + ] + for k in sorted(self.labels.keys()): + keys.append(k) + keys.append(tuple(self.labels[k].serialize_to_json())) + + return tuple(keys) diff --git a/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/__init__.py b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/__init__.py new file mode 100644 index 0000000..e822aff --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/promql_utilities/streaming_config/__init__.py @@ -0,0 +1,12 @@ +from promql_utilities.streaming_config.MetricConfig import MetricConfig +from promql_utilities.streaming_config.SQLTableConfig import SQLTableConfig, TableSchema +from promql_utilities.streaming_config.StreamingAggregationConfig import ( + StreamingAggregationConfig, +) + +__all__ = [ + "MetricConfig", + "SQLTableConfig", + "TableSchema", + "StreamingAggregationConfig", +] diff --git a/CommonDependencies/dependencies/py/promql_utilities/setup.py b/CommonDependencies/dependencies/py/promql_utilities/setup.py new file mode 100644 index 0000000..0b1378c --- /dev/null +++ b/CommonDependencies/dependencies/py/promql_utilities/setup.py @@ -0,0 +1,8 @@ +from setuptools import setup, find_packages + +setup( + name="promql_utilities", + version="0.1", + packages=find_packages(), + install_requires=["promql-parser>=0.4.1", "pandas", "pyarrow"], +) diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/Cargo.lock b/CommonDependencies/dependencies/rs/datafusion_summary_library/Cargo.lock new file mode 100644 index 0000000..0bad1fe --- /dev/null +++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/Cargo.lock @@ -0,0 +1,2814 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "const-random", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "arrow" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3a3ec4fe573f9d1f59d99c085197ef669b00b088ba1d7bb75224732d9357a74" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dcf19f07792d8c7f91086c67b574a79301e367029b17fcf63fb854332246a10" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "num", +] + +[[package]] +name = "arrow-array" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7845c32b41f7053e37a075b3c2f29c6f5ea1b3ca6e5df7a2d325ee6e1b4a63cf" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.15.5", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b5c681a99606f3316f2a99d9c8b6fa3aad0b1d34d8f6d7a1b471893940219d8" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6365f8527d4f87b133eeb862f9b8093c009d41a210b8f101f91aa2392f61daac" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-csv" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30dac4d23ac769300349197b845e0fd18c7f9f15d260d4659ae6b5a9ca06f586" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "lazy_static", + "lexical-core", + "regex", +] + +[[package]] +name = "arrow-data" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd962fc3bf7f60705b25bcaa8eb3318b2545aa1d528656525ebdd6a17a6cd6fb" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ipc" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3527365b24372f9c948f16e53738eb098720eea2093ae73c7af04ac5e30a39b" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "flatbuffers", + "lz4_flex", +] + +[[package]] +name = "arrow-json" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdec0024749fc0d95e025c0b0266d78613727b3b3a5d4cf8ea47eb6d38afdd1" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap", + "lexical-core", + "num", + "serde", + "serde_json", +] + +[[package]] +name = "arrow-ord" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79af2db0e62a508d34ddf4f76bfd6109b6ecc845257c9cba6f939653668f89ac" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half", + "num", +] + +[[package]] +name = "arrow-row" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da30e9d10e9c52f09ea0cf15086d6d785c11ae8dcc3ea5f16d402221b6ac7735" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", +] + +[[package]] +name = "arrow-schema" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35b0f9c0c3582dd55db0f136d3b44bfa0189df07adcf7dc7f2f2e74db0f52eb8" + +[[package]] +name = "arrow-select" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92fc337f01635218493c23da81a364daf38c694b05fc20569c3193c11c561984" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d596a9fc25dae556672d5069b090331aca8acb93cae426d8b7dcdf1c558fa0ce" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax", +] + +[[package]] +name = "async-compression" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" +dependencies = [ + "bzip2 0.5.2", + "flate2", + "futures-core", + "futures-io", + "memchr", + "pin-project-lite", + "tokio", + "xz2", + "zstd", + "zstd-safe", +] + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] +name = "blake3" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "cpufeatures", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "brotli" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "4.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bumpalo" +version = "3.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" + +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" +dependencies = [ + "bzip2-sys", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.13+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +dependencies = [ + "cc", + "pkg-config", +] + +[[package]] +name = "cc" +version = "1.2.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "chrono" +version = "0.4.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "windows-targets", +] + +[[package]] +name = "chrono-tz" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" +dependencies = [ + "chrono", + "phf", +] + +[[package]] +name = "comfy-table" +version = "7.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" +dependencies = [ + "unicode-segmentation", + "unicode-width", +] + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.17", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde_core", +] + +[[package]] +name = "csv-core" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" +dependencies = [ + "memchr", +] + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "datafusion" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-ipc", + "arrow-schema", + "async-compression", + "async-trait", + "bytes", + "bzip2 0.4.4", + "chrono", + "dashmap", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", + "datafusion-functions-window", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-sql", + "flate2", + "futures", + "glob", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools", + "log", + "num_cpus", + "object_store", + "parking_lot", + "parquet", + "paste", + "pin-project-lite", + "rand", + "sqlparser", + "tempfile", + "tokio", + "tokio-util", + "url", + "uuid", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-catalog" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560" +dependencies = [ + "arrow-schema", + "async-trait", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", +] + +[[package]] +name = "datafusion-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.14.5", + "indexmap", + "instant", + "libc", + "num_cpus", + "object_store", + "parquet", + "paste", + "sqlparser", + "tokio", +] + +[[package]] +name = "datafusion-common-runtime" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685" +dependencies = [ + "log", + "tokio", +] + +[[package]] +name = "datafusion-execution" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361" +dependencies = [ + "arrow", + "chrono", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "hashbrown 0.14.5", + "log", + "object_store", + "parking_lot", + "rand", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "chrono", + "datafusion-common", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", + "indexmap", + "paste", + "serde_json", + "sqlparser", + "strum", + "strum_macros", +] + +[[package]] +name = "datafusion-expr-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" +dependencies = [ + "arrow", + "datafusion-common", + "itertools", + "paste", +] + +[[package]] +name = "datafusion-functions" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" +dependencies = [ + "arrow", + "arrow-buffer", + "base64", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "hashbrown 0.14.5", + "hex", + "itertools", + "log", + "md-5", + "rand", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741" +dependencies = [ + "ahash", + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "half", + "indexmap", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", + "rand", +] + +[[package]] +name = "datafusion-functions-nested" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317" +dependencies = [ + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-physical-expr-common", + "itertools", + "log", + "paste", + "rand", +] + +[[package]] +name = "datafusion-functions-window" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe" +dependencies = [ + "datafusion-common", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-optimizer" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "hashbrown 0.14.5", + "indexmap", + "itertools", + "log", + "paste", + "regex-syntax", +] + +[[package]] +name = "datafusion-physical-expr" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "arrow-string", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools", + "log", + "paste", + "petgraph", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "hashbrown 0.14.5", + "rand", +] + +[[package]] +name = "datafusion-physical-optimizer" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-plan", + "itertools", +] + +[[package]] +name = "datafusion-physical-plan" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools", + "log", + "once_cell", + "parking_lot", + "pin-project-lite", + "rand", + "tokio", +] + +[[package]] +name = "datafusion-sql" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "indexmap", + "log", + "regex", + "sqlparser", + "strum", +] + +[[package]] +name = "datafusion_summary_library" +version = "0.1.0" +dependencies = [ + "arrow", + "async-trait", + "datafusion", + "futures", + "hyperloglogplus", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "flatbuffers" +version = "24.12.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096" +dependencies = [ + "bitflags 1.3.2", + "rustc_version", +] + +[[package]] +name = "flate2" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", + "zerocopy", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "humantime" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" + +[[package]] +name = "hyperloglogplus" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "621debdf94dcac33e50475fdd76d34d5ea9c0362a834b9db08c3024696c1fbe3" +dependencies = [ + "serde", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" + +[[package]] +name = "icu_properties" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" + +[[package]] +name = "icu_provider" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", +] + +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lexical-core" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" +dependencies = [ + "lexical-parse-integer", + "lexical-util", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" +dependencies = [ + "lexical-util", +] + +[[package]] +name = "lexical-util" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" + +[[package]] +name = "lexical-write-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361" +dependencies = [ + "lexical-util", + "lexical-write-integer", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df" +dependencies = [ + "lexical-util", +] + +[[package]] +name = "libc" +version = "0.2.180" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + +[[package]] +name = "litemap" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "lz4_flex" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +dependencies = [ + "twox-hash 2.1.2", +] + +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "object_store" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cfccb68961a56facde1163f9319e0d15743352344e7808a11795fb99698dcaf" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "humantime", + "itertools", + "parking_lot", + "percent-encoding", + "snafu", + "tokio", + "tracing", + "url", + "walkdir", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "parquet" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f8cf58b29782a7add991f655ff42929e31a7859f5319e53db9e39a714cb113c" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.15.5", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "seq-macro", + "snap", + "thrift", + "tokio", + "twox-hash 1.6.3", + "zstd", + "zstd-sys", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "phf" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_shared" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "potential_utf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +dependencies = [ + "zerovec", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags 2.10.0", +] + +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +dependencies = [ + "bitflags 2.10.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "seq-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "snafu" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e84b3f4eacbf3a1ce05eac6763b4d629d60cbc94d632e4092c54ade71f1e1a2" +dependencies = [ + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + +[[package]] +name = "sqlparser" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" +dependencies = [ + "log", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "2.0.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tempfile" +version = "3.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +dependencies = [ + "fastrand", + "getrandom 0.3.4", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "thrift" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "ordered-float", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tokio" +version = "1.49.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" +dependencies = [ + "bytes", + "pin-project-lite", + "tokio-macros", +] + +[[package]] +name = "tokio-macros" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", +] + +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + +[[package]] +name = "twox-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "uuid" +version = "1.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee48d38b119b0cd71fe4141b30f5ba9c7c5d9f4e7a3a8b4a674e4b6ef789976f" +dependencies = [ + "getrandom 0.3.4", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "web-sys" +version = "0.3.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" + +[[package]] +name = "writeable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" + +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + +[[package]] +name = "yoke" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7456cf00f0685ad319c5b1693f291a650eaf345e941d082fc4e03df8a03996ac" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1328722bbf2115db7e19d69ebcc15e795719e2d66b60827c6a69a117365e37a0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerotrie" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff05f8caa9038894637571ae6b9e29466c1f4f829d26c9b28f869a29cbe3445" + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.13+zstd.1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/Cargo.toml b/CommonDependencies/dependencies/rs/datafusion_summary_library/Cargo.toml new file mode 100644 index 0000000..503f62f --- /dev/null +++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "datafusion_summary_library" +version = "0.1.0" +edition = "2021" +authors = ["ProjectASAP Team"] + +[dependencies] +datafusion = "43" +arrow = "53" +hyperloglogplus = "0.4" +async-trait = "0.1" +futures = "0.3" diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/src/lib.rs b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/lib.rs new file mode 100644 index 0000000..4183785 --- /dev/null +++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/lib.rs @@ -0,0 +1,15 @@ +// DataFusion Summary Library +// +// This library provides logical and physical operators for sketch-based +// query optimization in DataFusion. It supports approximate query processing +// using data structures like HyperLogLog for COUNT(DISTINCT) operations. + +pub mod physical; +pub mod sketch_operators; + +pub use physical::{HllSketch, SketchExtensionPlanner, SummaryInferExec, SummaryInsertExec}; +pub use sketch_operators::{ + GroupingStrategy, InferOperation, PrecomputedSummaryRead, SketchMetadata, SketchSpec, + SketchType, SummaryInfer, SummaryInsert, SummaryMerge, SummaryMergeMultiple, SummaryRead, + SummarySubtract, SummaryType, TypedExpr, +}; diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/hll.rs b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/hll.rs new file mode 100644 index 0000000..6fa0dbd --- /dev/null +++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/hll.rs @@ -0,0 +1,169 @@ +// HyperLogLog wrapper for cardinality estimation. +// +// This provides a simple wrapper around the hyperloglogplus crate for use +// in sketch-based COUNT(DISTINCT) queries. + +use std::collections::hash_map::{DefaultHasher, RandomState}; +use std::hash::{Hash, Hasher}; + +use hyperloglogplus::{HyperLogLog, HyperLogLogPlus}; + +/// Wrapper around HyperLogLog++ for cardinality estimation. +/// +/// Uses precision 14 by default which gives ~0.8% standard error. +#[derive(Clone)] +pub struct HllSketch { + hll: HyperLogLogPlus, +} + +impl HllSketch { + /// Create a new HLL sketch with default precision (14). + pub fn new() -> Self { + Self::with_precision(14) + } + + /// Create a new HLL sketch with specified precision. + /// + /// Precision must be between 4 and 18. Higher precision means + /// more accuracy but more memory usage. + pub fn with_precision(precision: u8) -> Self { + let hll = HyperLogLogPlus::new(precision, RandomState::new()) + .expect("Valid precision range is 4-18"); + Self { hll } + } + + /// Insert a value into the sketch. + /// + /// The value is hashed to u64 before insertion. + pub fn insert(&mut self, value: &T) { + // Hash the value to u64 first, then insert + let mut hasher = DefaultHasher::new(); + value.hash(&mut hasher); + let hash = hasher.finish(); + self.hll.insert(&hash); + } + + /// Insert a byte slice into the sketch. + pub fn insert_bytes(&mut self, value: &[u8]) { + self.insert(&value); + } + + /// Get the estimated cardinality. + pub fn count(&mut self) -> u64 { + self.hll.count().round() as u64 + } + + /// Merge another HLL sketch into this one. + #[allow(dead_code)] + pub fn merge(&mut self, other: &mut Self) { + self.hll + .merge(&other.hll) + .expect("HLL merge should succeed for same precision"); + } + + /// Serialize the sketch to bytes. + /// + /// Format: [precision: u8][count as f64 bytes(8)] + /// This is a hacky serialization that just stores the current count. + pub fn to_bytes(&mut self) -> Vec { + let precision = 14u8; // We always use 14 for now + let count = self.hll.count(); + + // Simple format: [precision(1)][count as f64 bytes(8)] + let mut bytes = Vec::with_capacity(9); + bytes.push(precision); + bytes.extend_from_slice(&count.to_le_bytes()); + bytes + } + + /// Deserialize a sketch from bytes. + /// Note: This only recovers the count, not the full HLL state. + #[allow(dead_code)] + pub fn from_bytes(bytes: &[u8]) -> Option { + if bytes.len() < 9 { + return None; + } + + let _precision = bytes[0]; + let count_bytes: [u8; 8] = bytes[1..9].try_into().ok()?; + let _count = f64::from_le_bytes(count_bytes); + + // Since we can't truly deserialize the HLL state from just the count, + // we create an empty HLL. This is a limitation of the simple format. + Some(Self::new()) + } +} + +impl Default for HllSketch { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_hll_basic() { + let mut hll = HllSketch::new(); + + // Insert 1000 unique values + for i in 0..1000 { + hll.insert(&i); + } + + let count = hll.count(); + // HLL has ~0.8% error at precision 14, so allow 5% tolerance + assert!(count > 900, "Count {} should be > 900", count); + assert!(count < 1100, "Count {} should be < 1100", count); + } + + #[test] + fn test_hll_duplicates() { + let mut hll = HllSketch::new(); + + // Insert same value many times + for _ in 0..1000 { + hll.insert(&42); + } + + let count = hll.count(); + assert_eq!(count, 1, "Duplicates should not increase count"); + } + + #[test] + fn test_hll_merge() { + let mut hll1 = HllSketch::new(); + let mut hll2 = HllSketch::new(); + + // Insert different values into each + for i in 0..500 { + hll1.insert(&i); + } + for i in 500..1000 { + hll2.insert(&i); + } + + hll1.merge(&mut hll2); + let count = hll1.count(); + + // Should have ~1000 unique values + assert!(count > 900, "Merged count {} should be > 900", count); + assert!(count < 1100, "Merged count {} should be < 1100", count); + } + + #[test] + fn test_hll_strings() { + let mut hll = HllSketch::new(); + + for i in 0..1000 { + let s = format!("user_{}", i); + hll.insert(&s); + } + + let count = hll.count(); + assert!(count > 900, "String count {} should be > 900", count); + assert!(count < 1100, "String count {} should be < 1100", count); + } +} diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/mod.rs b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/mod.rs new file mode 100644 index 0000000..ae432b9 --- /dev/null +++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/mod.rs @@ -0,0 +1,17 @@ +// Physical execution module for sketch-based query plans. +// +// This module provides physical execution plan nodes for sketch operators: +// - SummaryInsertExec: Computes sketches from raw data +// - SummaryInferExec: Extracts results from sketches +// +// Currently only HLL (HyperLogLog) sketches are supported for COUNT(DISTINCT). + +mod hll; +mod planner; +mod summary_infer_exec; +mod summary_insert_exec; + +pub use hll::HllSketch; +pub use planner::SketchExtensionPlanner; +pub use summary_infer_exec::SummaryInferExec; +pub use summary_insert_exec::SummaryInsertExec; diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/planner.rs b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/planner.rs new file mode 100644 index 0000000..5823923 --- /dev/null +++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/planner.rs @@ -0,0 +1,205 @@ +// ExtensionPlanner for sketch-based logical plan nodes. +// +// This planner converts SummaryInsert and SummaryInfer logical nodes +// into their physical execution plan counterparts. + +use std::sync::Arc; + +use async_trait::async_trait; +use datafusion::common::{DataFusionError, Result as DFResult}; +use datafusion::execution::context::SessionState; +use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNode}; +use datafusion::physical_plan::ExecutionPlan; +use datafusion::physical_planner::{ExtensionPlanner, PhysicalPlanner}; + +use crate::sketch_operators::{InferOperation, SketchType, SummaryInfer, SummaryInsert}; + +use super::{SummaryInferExec, SummaryInsertExec}; + +/// ExtensionPlanner that handles SummaryInsert and SummaryInfer logical nodes. +#[derive(Debug, Default)] +pub struct SketchExtensionPlanner; + +impl SketchExtensionPlanner { + pub fn new() -> Self { + Self + } +} + +#[async_trait] +impl ExtensionPlanner for SketchExtensionPlanner { + async fn plan_extension( + &self, + _planner: &dyn PhysicalPlanner, + node: &dyn UserDefinedLogicalNode, + _logical_inputs: &[&LogicalPlan], + physical_inputs: &[Arc], + _session_state: &SessionState, + ) -> DFResult>> { + // Try to downcast to SummaryInsert + if let Some(summary_insert) = node.as_any().downcast_ref::() { + return self.plan_summary_insert(summary_insert, physical_inputs); + } + + // Try to downcast to SummaryInfer + if let Some(summary_infer) = node.as_any().downcast_ref::() { + return self.plan_summary_infer(summary_infer, physical_inputs); + } + + // Unknown node type, let other planners handle it + Ok(None) + } +} + +impl SketchExtensionPlanner { + fn plan_summary_insert( + &self, + node: &SummaryInsert, + physical_inputs: &[Arc], + ) -> DFResult>> { + if physical_inputs.len() != 1 { + return Err(DataFusionError::Internal( + "SummaryInsert expects exactly one input".to_string(), + )); + } + + let input = physical_inputs[0].clone(); + let input_schema = input.schema(); + + // Only support HLL for now + if node.sketches.len() != 1 { + return Err(DataFusionError::NotImplemented( + "SummaryInsert with multiple sketches not yet supported".to_string(), + )); + } + + let sketch_spec = &node.sketches[0]; + if sketch_spec.sketch_type != SketchType::HLL { + return Err(DataFusionError::NotImplemented(format!( + "Sketch type {:?} not yet supported, only HLL is implemented", + sketch_spec.sketch_type + ))); + } + + // Find value column index + let value_col_idx = match &sketch_spec.value_column { + Some(col_name) => input_schema + .fields() + .iter() + .position(|f| f.name() == col_name) + .ok_or_else(|| { + DataFusionError::Plan(format!( + "Value column '{}' not found in input schema", + col_name + )) + })?, + None => { + return Err(DataFusionError::Plan( + "SummaryInsert requires a value column for HLL".to_string(), + )); + } + }; + + // Find group-by column indices + let group_by_indices: Vec = if !node.group_by_exprs.is_empty() { + // Use group_by_exprs: find columns by expression name + node.group_by_exprs + .iter() + .map(|typed_expr| { + // For simple column expressions, extract the column name + let col_name = + if let datafusion::logical_expr::Expr::Column(col) = &typed_expr.expr { + col.name.clone() + } else { + typed_expr.expr.schema_name().to_string() + }; + + input_schema + .fields() + .iter() + .position(|f| f.name() == &col_name) + .ok_or_else(|| { + DataFusionError::Plan(format!( + "Group-by column '{}' not found in input schema", + col_name + )) + }) + }) + .collect::>>()? + } else { + // Use legacy group_by strings + node.group_by + .iter() + .map(|col_name| { + input_schema + .fields() + .iter() + .position(|f| f.name() == col_name) + .ok_or_else(|| { + DataFusionError::Plan(format!( + "Group-by column '{}' not found in input schema", + col_name + )) + }) + }) + .collect::>>()? + }; + + let exec = SummaryInsertExec::new( + input, + value_col_idx, + group_by_indices, + sketch_spec.output_column_name.clone(), + ); + + Ok(Some(Arc::new(exec))) + } + + fn plan_summary_infer( + &self, + node: &SummaryInfer, + physical_inputs: &[Arc], + ) -> DFResult>> { + if physical_inputs.len() != 1 { + return Err(DataFusionError::Internal( + "SummaryInfer expects exactly one input".to_string(), + )); + } + + let input = physical_inputs[0].clone(); + let input_schema = input.schema(); + + // Only support single operation for now + if node.operations.len() != 1 { + return Err(DataFusionError::NotImplemented( + "SummaryInfer with multiple operations not yet supported".to_string(), + )); + } + + let operation = node.operations[0].clone(); + let output_name = node.output_names[0].clone(); + + // Only support CountDistinct for now + if !matches!(operation, InferOperation::CountDistinct) { + return Err(DataFusionError::NotImplemented(format!( + "Infer operation {:?} not yet supported, only CountDistinct is implemented", + operation + ))); + } + + // Find sketch column index (last column with "sketch" in name, or Binary type) + let sketch_col_idx = input_schema + .fields() + .iter() + .rposition(|f| { + f.name().contains("sketch") || f.data_type() == &arrow::datatypes::DataType::Binary + }) + .ok_or_else(|| { + DataFusionError::Plan("No sketch column found in input schema".to_string()) + })?; + + let exec = SummaryInferExec::new(input, sketch_col_idx, operation, output_name); + + Ok(Some(Arc::new(exec))) + } +} diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/summary_infer_exec.rs b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/summary_infer_exec.rs new file mode 100644 index 0000000..8549452 --- /dev/null +++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/summary_infer_exec.rs @@ -0,0 +1,282 @@ +// Physical execution plan for SummaryInfer (sketch querying). +// +// This ExecutionPlan reads sketch data and extracts results. +// Currently only supports CountDistinct operation on HLL sketches. + +use std::any::Any; +use std::fmt; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + +use arrow::array::{Array, ArrayRef, BinaryArray, RecordBatch, UInt64Builder}; +use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use datafusion::common::{DataFusionError, Result as DFResult}; +use datafusion::execution::{SendableRecordBatchStream, TaskContext}; +use datafusion::physical_expr::{EquivalenceProperties, Partitioning}; +use datafusion::physical_plan::{ + DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties, +}; +use futures::Stream; + +use crate::sketch_operators::InferOperation; + +/// Physical execution plan for extracting results from HLL sketches. +/// +/// Takes input batches with sketch columns and produces one row per input row with: +/// - Group key columns (passed through) +/// - Result column (e.g., UInt64 for CountDistinct) +#[derive(Debug)] +pub struct SummaryInferExec { + /// Input execution plan (typically SummaryInsertExec) + input: Arc, + + /// Index of the sketch column in input schema + sketch_col_idx: usize, + + /// Infer operation to perform + operation: InferOperation, + + /// Output column name + output_name: String, + + /// Output schema + schema: SchemaRef, + + /// Plan properties (cached) + properties: PlanProperties, +} + +impl SummaryInferExec { + pub fn new( + input: Arc, + sketch_col_idx: usize, + operation: InferOperation, + output_name: String, + ) -> Self { + let input_schema = input.schema(); + + // Build output schema: all columns except sketch column, plus result column + let mut fields: Vec = input_schema + .fields() + .iter() + .enumerate() + .filter(|(idx, _)| *idx != sketch_col_idx) + .map(|(_, f)| f.as_ref().clone()) + .collect(); + + // Add result column based on operation type + let result_type = match &operation { + InferOperation::CountDistinct => DataType::UInt64, + InferOperation::Quantile(_) | InferOperation::Median => DataType::Float64, + _ => DataType::UInt64, // Default for unsupported ops + }; + + fields.push(Field::new(&output_name, result_type, false)); + + let schema = Arc::new(Schema::new(fields)); + + // Plan properties: same partitioning as input + let properties = PlanProperties::new( + EquivalenceProperties::new(schema.clone()), + Partitioning::UnknownPartitioning(1), + ExecutionMode::Bounded, + ); + + Self { + input, + sketch_col_idx, + operation, + output_name, + schema, + properties, + } + } +} + +impl DisplayAs for SummaryInferExec { + fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result { + match t { + DisplayFormatType::Default | DisplayFormatType::Verbose => { + write!( + f, + "SummaryInferExec: sketch_col={}, op={}, output={}", + self.sketch_col_idx, self.operation, self.output_name + ) + } + } + } +} + +impl ExecutionPlan for SummaryInferExec { + fn name(&self) -> &str { + "SummaryInferExec" + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn properties(&self) -> &PlanProperties { + &self.properties + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.input] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> DFResult> { + if children.len() != 1 { + return Err(DataFusionError::Internal( + "SummaryInferExec expects exactly one child".to_string(), + )); + } + Ok(Arc::new(SummaryInferExec::new( + children[0].clone(), + self.sketch_col_idx, + self.operation.clone(), + self.output_name.clone(), + ))) + } + + fn execute( + &self, + partition: usize, + context: Arc, + ) -> DFResult { + if partition != 0 { + return Err(DataFusionError::Internal(format!( + "SummaryInferExec only supports partition 0, got {}", + partition + ))); + } + + let input_stream = self.input.execute(0, context)?; + + let stream = SummaryInferStream::new( + input_stream, + self.schema.clone(), + self.sketch_col_idx, + self.operation.clone(), + ); + + Ok(Box::pin(stream)) + } +} + +/// Stream that transforms sketch batches into result batches. +struct SummaryInferStream { + /// Input stream + input: SendableRecordBatchStream, + + /// Output schema + schema: SchemaRef, + + /// Sketch column index + sketch_col_idx: usize, + + /// Operation to perform + operation: InferOperation, +} + +impl SummaryInferStream { + fn new( + input: SendableRecordBatchStream, + schema: SchemaRef, + sketch_col_idx: usize, + operation: InferOperation, + ) -> Self { + Self { + input, + schema, + sketch_col_idx, + operation, + } + } + + /// Transform a batch by extracting results from sketches. + fn transform_batch(&self, batch: &RecordBatch) -> DFResult { + let num_rows = batch.num_rows(); + + // Get the sketch column + let sketch_col = batch.column(self.sketch_col_idx); + let sketch_array = sketch_col + .as_any() + .downcast_ref::() + .ok_or_else(|| { + DataFusionError::Internal("Sketch column is not Binary type".to_string()) + })?; + + // Build result column based on operation + let result_col: ArrayRef = match &self.operation { + InferOperation::CountDistinct => { + let mut builder = UInt64Builder::with_capacity(num_rows); + for i in 0..num_rows { + if sketch_array.is_null(i) { + builder.append_null(); + } else { + let sketch_bytes = sketch_array.value(i); + // Deserialize and get count + // Note: Our simple serialization format stores the count directly + let count = if sketch_bytes.len() >= 9 { + let count_bytes: [u8; 8] = sketch_bytes[1..9].try_into().unwrap(); + f64::from_le_bytes(count_bytes).round() as u64 + } else { + 0 + }; + builder.append_value(count); + } + } + Arc::new(builder.finish()) + } + _ => { + return Err(DataFusionError::NotImplemented(format!( + "Infer operation {:?} not yet implemented", + self.operation + ))); + } + }; + + // Build output columns: all input columns except sketch, plus result + let mut columns: Vec = batch + .columns() + .iter() + .enumerate() + .filter(|(idx, _)| *idx != self.sketch_col_idx) + .map(|(_, col)| col.clone()) + .collect(); + columns.push(result_col); + + RecordBatch::try_new(self.schema.clone(), columns) + .map_err(|e| DataFusionError::ArrowError(e, None)) + } +} + +impl Stream for SummaryInferStream { + type Item = DFResult; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + match Pin::new(&mut self.input).poll_next(cx) { + Poll::Ready(Some(Ok(batch))) => { + let result = self.transform_batch(&batch); + Poll::Ready(Some(result)) + } + Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(e))), + Poll::Ready(None) => Poll::Ready(None), + Poll::Pending => Poll::Pending, + } + } +} + +impl datafusion::physical_plan::RecordBatchStream for SummaryInferStream { + fn schema(&self) -> SchemaRef { + self.schema.clone() + } +} diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/summary_insert_exec.rs b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/summary_insert_exec.rs new file mode 100644 index 0000000..a62d8ba --- /dev/null +++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/physical/summary_insert_exec.rs @@ -0,0 +1,434 @@ +// Physical execution plan for SummaryInsert (sketch building). +// +// This ExecutionPlan consumes input batches and builds HLL sketches +// for each group. Currently only supports HLL sketches. + +use std::any::Any; +use std::collections::HashMap; +use std::fmt; +use std::sync::Arc; + +use arrow::array::{Array, ArrayRef, BinaryBuilder, RecordBatch, StringBuilder, UInt64Builder}; +use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use datafusion::common::{DataFusionError, Result as DFResult, ScalarValue}; +use datafusion::execution::{SendableRecordBatchStream, TaskContext}; +use datafusion::physical_expr::{EquivalenceProperties, Partitioning}; +use datafusion::physical_plan::{ + DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties, +}; + +use super::hll::HllSketch; + +/// Physical execution plan for building HLL sketches. +/// +/// Takes input batches and produces one row per group with: +/// - Group key columns +/// - Binary column containing serialized HLL sketch +#[derive(Debug)] +pub struct SummaryInsertExec { + /// Input execution plan + input: Arc, + + /// Index of the value column to sketch (in input schema) + value_col_idx: usize, + + /// Indices of group-by columns (in input schema) + group_by_indices: Vec, + + /// Name of the output sketch column + sketch_col_name: String, + + /// Output schema + schema: SchemaRef, + + /// Plan properties (cached) + properties: PlanProperties, +} + +impl SummaryInsertExec { + pub fn new( + input: Arc, + value_col_idx: usize, + group_by_indices: Vec, + sketch_col_name: String, + ) -> Self { + let input_schema = input.schema(); + + // Build output schema: group columns + sketch column + let mut fields: Vec = group_by_indices + .iter() + .map(|&idx| input_schema.field(idx).clone()) + .collect(); + + // Add sketch column with the specified name + fields.push(Field::new(&sketch_col_name, DataType::Binary, false)); + + let schema = Arc::new(Schema::new(fields)); + + // Plan properties: single partition output, no ordering guarantees + let properties = PlanProperties::new( + EquivalenceProperties::new(schema.clone()), + Partitioning::UnknownPartitioning(1), + ExecutionMode::Bounded, + ); + + Self { + input, + value_col_idx, + group_by_indices, + sketch_col_name, + schema, + properties, + } + } + + /// Extracts a group key from a row as a vector of ScalarValues. + fn extract_group_key( + batch: &RecordBatch, + row_idx: usize, + group_by_indices: &[usize], + ) -> Vec { + group_by_indices + .iter() + .map(|&col_idx| { + ScalarValue::try_from_array(batch.column(col_idx), row_idx) + .unwrap_or(ScalarValue::Null) + }) + .collect() + } + + /// Extracts a value as bytes for hashing. + fn extract_value_bytes(array: &ArrayRef, row_idx: usize) -> Vec { + // Convert any value to string representation for hashing + // This is a hacky but universal approach + if array.is_null(row_idx) { + return b"__NULL__".to_vec(); + } + + // Use Arrow's display formatter to get string representation + let value = ScalarValue::try_from_array(array.as_ref(), row_idx) + .map(|v| v.to_string()) + .unwrap_or_else(|_| "__ERROR__".to_string()); + + value.into_bytes() + } +} + +impl DisplayAs for SummaryInsertExec { + fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result { + match t { + DisplayFormatType::Default | DisplayFormatType::Verbose => { + write!( + f, + "SummaryInsertExec: value_col={}, group_by={:?}", + self.value_col_idx, self.group_by_indices + ) + } + } + } +} + +impl ExecutionPlan for SummaryInsertExec { + fn name(&self) -> &str { + "SummaryInsertExec" + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn properties(&self) -> &PlanProperties { + &self.properties + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.input] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> DFResult> { + if children.len() != 1 { + return Err(DataFusionError::Internal( + "SummaryInsertExec expects exactly one child".to_string(), + )); + } + Ok(Arc::new(SummaryInsertExec::new( + children[0].clone(), + self.value_col_idx, + self.group_by_indices.clone(), + self.sketch_col_name.clone(), + ))) + } + + fn execute( + &self, + partition: usize, + context: Arc, + ) -> DFResult { + if partition != 0 { + return Err(DataFusionError::Internal(format!( + "SummaryInsertExec only supports partition 0, got {}", + partition + ))); + } + + // Get input stream + let input_stream = self.input.execute(0, context)?; + + // Create the output stream + let schema = self.schema.clone(); + let value_col_idx = self.value_col_idx; + let group_by_indices = self.group_by_indices.clone(); + + let stream = + SummaryInsertStream::new(input_stream, schema, value_col_idx, group_by_indices); + + Ok(Box::pin(stream)) + } +} + +/// Stream that consumes input batches and produces aggregated sketch results. +struct SummaryInsertStream { + /// Input stream + input: SendableRecordBatchStream, + + /// Output schema + schema: SchemaRef, + + /// Value column index + value_col_idx: usize, + + /// Group-by column indices + group_by_indices: Vec, + + /// Accumulated sketches per group + groups: HashMap, HllSketch>, + + /// Whether we've finished consuming input + finished_input: bool, + + /// Whether we've emitted the final result + emitted_result: bool, +} + +impl SummaryInsertStream { + fn new( + input: SendableRecordBatchStream, + schema: SchemaRef, + value_col_idx: usize, + group_by_indices: Vec, + ) -> Self { + Self { + input, + schema, + value_col_idx, + group_by_indices, + groups: HashMap::new(), + finished_input: false, + emitted_result: false, + } + } + + /// Process a batch of input data. + fn process_batch(&mut self, batch: &RecordBatch) { + let value_array = batch.column(self.value_col_idx); + let num_rows = batch.num_rows(); + + for row_idx in 0..num_rows { + // Extract group key + let group_key = + SummaryInsertExec::extract_group_key(batch, row_idx, &self.group_by_indices); + + // Get or create HLL for this group + let hll = self.groups.entry(group_key).or_default(); + + // Extract value and insert into HLL + let value_bytes = SummaryInsertExec::extract_value_bytes(value_array, row_idx); + hll.insert_bytes(&value_bytes); + } + } + + /// Build the final output batch from accumulated sketches. + fn build_output(&mut self) -> DFResult { + let num_groups = self.groups.len(); + + // Build group key columns + let mut group_builders: Vec = self + .schema + .fields() + .iter() + .take(self.group_by_indices.len()) + .map(|field| ScalarArrayBuilder::new(field.data_type(), num_groups)) + .collect(); + + // Build sketch column + let mut sketch_builder = BinaryBuilder::with_capacity(num_groups, num_groups * 16); + + // Populate arrays + for (group_key, hll) in &mut self.groups { + // Add group key values + for (idx, scalar) in group_key.iter().enumerate() { + group_builders[idx].append(scalar); + } + + // Add serialized sketch + let sketch_bytes = hll.to_bytes(); + sketch_builder.append_value(&sketch_bytes); + } + + // Finish building arrays + let mut columns: Vec = group_builders.iter_mut().map(|b| b.finish()).collect(); + columns.push(Arc::new(sketch_builder.finish())); + + RecordBatch::try_new(self.schema.clone(), columns) + .map_err(|e| DataFusionError::ArrowError(e, None)) + } +} + +use futures::Stream; +use std::pin::Pin; +use std::task::{Context, Poll}; + +impl Stream for SummaryInsertStream { + type Item = DFResult; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + // If we've emitted the result, we're done + if self.emitted_result { + return Poll::Ready(None); + } + + // Consume all input batches first + if !self.finished_input { + loop { + match Pin::new(&mut self.input).poll_next(cx) { + Poll::Ready(Some(Ok(batch))) => { + self.process_batch(&batch); + } + Poll::Ready(Some(Err(e))) => { + return Poll::Ready(Some(Err(e))); + } + Poll::Ready(None) => { + self.finished_input = true; + break; + } + Poll::Pending => { + return Poll::Pending; + } + } + } + } + + // Build and emit the final result + self.emitted_result = true; + + // Handle case with no groups + if self.groups.is_empty() { + return Poll::Ready(None); + } + + let batch = self.build_output(); + Poll::Ready(Some(batch)) + } +} + +impl datafusion::physical_plan::RecordBatchStream for SummaryInsertStream { + fn schema(&self) -> SchemaRef { + self.schema.clone() + } +} + +// Helper enum for building arrays dynamically +enum ScalarArrayBuilder { + Utf8(StringBuilder), + UInt64(UInt64Builder), + Int64(arrow::array::Int64Builder), + Float64(arrow::array::Float64Builder), + Int32(arrow::array::Int32Builder), + UInt32(arrow::array::UInt32Builder), +} + +impl ScalarArrayBuilder { + fn new(data_type: &DataType, capacity: usize) -> Self { + match data_type { + DataType::Utf8 => { + ScalarArrayBuilder::Utf8(StringBuilder::with_capacity(capacity, capacity * 32)) + } + DataType::UInt64 => ScalarArrayBuilder::UInt64(UInt64Builder::with_capacity(capacity)), + DataType::Int64 => { + ScalarArrayBuilder::Int64(arrow::array::Int64Builder::with_capacity(capacity)) + } + DataType::Float64 => { + ScalarArrayBuilder::Float64(arrow::array::Float64Builder::with_capacity(capacity)) + } + DataType::Int32 => { + ScalarArrayBuilder::Int32(arrow::array::Int32Builder::with_capacity(capacity)) + } + DataType::UInt32 => { + ScalarArrayBuilder::UInt32(arrow::array::UInt32Builder::with_capacity(capacity)) + } + // For unsupported types, fall back to string representation + _ => ScalarArrayBuilder::Utf8(StringBuilder::with_capacity(capacity, capacity * 32)), + } + } + + fn append(&mut self, scalar: &ScalarValue) { + match (self, scalar) { + (ScalarArrayBuilder::Utf8(b), ScalarValue::Utf8(v)) => match v { + Some(s) => b.append_value(s), + None => b.append_null(), + }, + (ScalarArrayBuilder::UInt64(b), ScalarValue::UInt64(v)) => match v { + Some(val) => b.append_value(*val), + None => b.append_null(), + }, + (ScalarArrayBuilder::Int64(b), ScalarValue::Int64(v)) => match v { + Some(val) => b.append_value(*val), + None => b.append_null(), + }, + (ScalarArrayBuilder::Float64(b), ScalarValue::Float64(v)) => match v { + Some(val) => b.append_value(*val), + None => b.append_null(), + }, + (ScalarArrayBuilder::Int32(b), ScalarValue::Int32(v)) => match v { + Some(val) => b.append_value(*val), + None => b.append_null(), + }, + (ScalarArrayBuilder::UInt32(b), ScalarValue::UInt32(v)) => match v { + Some(val) => b.append_value(*val), + None => b.append_null(), + }, + // Fallback: convert to string for Utf8 builder + (ScalarArrayBuilder::Utf8(b), scalar) => { + if scalar.is_null() { + b.append_null(); + } else { + b.append_value(scalar.to_string()); + } + } + // For type mismatches with non-Utf8 builders, append null + (ScalarArrayBuilder::UInt64(b), _) => b.append_null(), + (ScalarArrayBuilder::Int64(b), _) => b.append_null(), + (ScalarArrayBuilder::Float64(b), _) => b.append_null(), + (ScalarArrayBuilder::Int32(b), _) => b.append_null(), + (ScalarArrayBuilder::UInt32(b), _) => b.append_null(), + } + } + + fn finish(&mut self) -> ArrayRef { + match self { + ScalarArrayBuilder::Utf8(b) => Arc::new(b.finish()), + ScalarArrayBuilder::UInt64(b) => Arc::new(b.finish()), + ScalarArrayBuilder::Int64(b) => Arc::new(b.finish()), + ScalarArrayBuilder::Float64(b) => Arc::new(b.finish()), + ScalarArrayBuilder::Int32(b) => Arc::new(b.finish()), + ScalarArrayBuilder::UInt32(b) => Arc::new(b.finish()), + } + } +} diff --git a/CommonDependencies/dependencies/rs/datafusion_summary_library/src/sketch_operators.rs b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/sketch_operators.rs new file mode 100644 index 0000000..64fe394 --- /dev/null +++ b/CommonDependencies/dependencies/rs/datafusion_summary_library/src/sketch_operators.rs @@ -0,0 +1,1630 @@ +// Sketch-based query plan operators for DataFusion +// +// This module defines custom logical plan nodes for sketch-based query optimization. +// These operators support exploring different sketch-based execution strategies. +#![allow(deprecated)] + +#[allow(deprecated)] +use datafusion::arrow::datatypes::{DataType, Field}; +use datafusion::common::{DFSchema, DFSchemaRef, Result as DFResult}; +use datafusion::error::DataFusionError; +use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; +use std::cmp::Ordering; +use std::collections::BTreeMap; // BTreeMap instead of HashMap (can derive Hash) +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::sync::Arc; + +// ============================================================================ +// TypedExpr - Expression with pre-resolved type +// ============================================================================ + +/// An expression paired with its pre-resolved data type. +/// +/// This is used to preserve type information when expressions are passed through +/// plan nodes that may not have access to the original schema needed to resolve types. +/// For example, SummaryInfer's input is a SummaryInsert which may not include +/// the columns referenced in GROUP BY expressions (especially for Hydra strategy). +#[derive(Debug, Clone)] +pub struct TypedExpr { + pub expr: Expr, + pub data_type: DataType, +} + +impl TypedExpr { + pub fn new(expr: Expr, data_type: DataType) -> Self { + Self { expr, data_type } + } +} + +// Manual trait implementations since Expr implements these traits +impl PartialEq for TypedExpr { + fn eq(&self, other: &Self) -> bool { + self.expr == other.expr && self.data_type == other.data_type + } +} + +impl Eq for TypedExpr {} + +impl Hash for TypedExpr { + fn hash(&self, state: &mut H) { + self.expr.hash(state); + self.data_type.hash(state); + } +} + +// ============================================================================ +// Sketch Types +// ============================================================================ + +/// Types of sketches/summaries supported for query processing +/// Also aliased as SummaryType for clarity (includes both sketches and exact aggregators) +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum SketchType { + // ======================================================================== + // Exact aggregators (non-sketch, single population) + // ======================================================================== + Sum, // Exact sum accumulator + Increase, // Counter increase tracking + MinMax, // Min/max values + + MultipleSum, + MultipleIncrease, + MultipleMinMax, + + // ======================================================================== + // Set aggregators + // ======================================================================== + SetAggregator, // Exact set of group keys (HashSet-based) + DeltaSetAggregator, // Set aggregation with separate key tracking + + // ======================================================================== + // COUNT DISTINCT sketches + // ======================================================================== + HLL, // HyperLogLog + UltraLogLog, // UltraLogLog (improved HLL) + HydraHLL, // HyperLogLog with multi-population support + + // ======================================================================== + // Quantile sketches + // ======================================================================== + KLL, // KLL sketch + TDigest, // T-Digest + HydraKLL, // KLL with multi-population support + + // ======================================================================== + // Heavy hitters / TOP K + // ======================================================================== + SpaceSaving, // Space-Saving algorithm + FrequentItems, // Frequent items sketch + + // ======================================================================== + // Frequency estimation + // ======================================================================== + CountMinSketch, // Count-Min Sketch + CountSketch, // Count Sketch + + // ======================================================================== + // General purpose + // ======================================================================== + Sampling, // Reservoir sampling +} + +/// Type alias for clarity - SummaryType includes both sketches and exact aggregators +pub type SummaryType = SketchType; + +impl fmt::Display for SketchType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + // Exact aggregators + SketchType::Sum => write!(f, "Sum"), + SketchType::Increase => write!(f, "Increase"), + SketchType::MinMax => write!(f, "MinMax"), + SketchType::SetAggregator => write!(f, "SetAggregator"), + SketchType::DeltaSetAggregator => write!(f, "DeltaSetAggregator"), + SketchType::MultipleSum => write!(f, "MultipleSum"), + SketchType::MultipleIncrease => write!(f, "MultipleIncrease"), + SketchType::MultipleMinMax => write!(f, "MultipleMinMax"), + // Sketches + SketchType::HLL => write!(f, "HLL"), + SketchType::UltraLogLog => write!(f, "UltraLogLog"), + SketchType::HydraHLL => write!(f, "HydraHLL"), + SketchType::KLL => write!(f, "KLL"), + SketchType::TDigest => write!(f, "TDigest"), + SketchType::HydraKLL => write!(f, "HydraKLL"), + SketchType::SpaceSaving => write!(f, "SpaceSaving"), + SketchType::FrequentItems => write!(f, "FrequentItems"), + SketchType::CountMinSketch => write!(f, "CountMinSketch"), + SketchType::CountSketch => write!(f, "CountSketch"), + SketchType::Sampling => write!(f, "Sampling"), + } + } +} + +impl SketchType { + /// Check if this sketch type supports multi-population (Hydra-style) + pub fn is_hydra(&self) -> bool { + matches!(self, SketchType::HydraHLL | SketchType::HydraKLL) + } + + /// Get the base sketch type (non-Hydra version) + pub fn base_type(&self) -> SketchType { + match self { + SketchType::HydraHLL => SketchType::HLL, + SketchType::HydraKLL => SketchType::KLL, + other => other.clone(), + } + } +} + +// ============================================================================ +// Inference Operations +// ============================================================================ + +/// Operations that can be performed on sketches/summaries to extract results +/// Note: Uses simplified types (strings instead of Expr) for DataFusion integration +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum InferOperation { + // ======================================================================== + // Exact aggregator extraction operations + // ======================================================================== + /// Extract sum value from Sum/HydraSum accumulator + ExtractSum, + + /// Extract count value from an accumulator + ExtractCount, + + /// Extract minimum value from MinMax/HydraMinMax accumulator + ExtractMin, + + /// Extract maximum value from MinMax/HydraMinMax accumulator + ExtractMax, + + /// Extract increase value from Increase/HydraIncrease accumulator + ExtractIncrease, + + /// Extract rate (increase / time_range) from Increase accumulator + ExtractRate, + + // ======================================================================== + // Sketch operations + // ======================================================================== + /// COUNT(DISTINCT column) + CountDistinct, + + /// Quantile/percentile estimation + /// Stores quantile as integer (0-10000) for 4 decimal places: 0.9500 = 9500 + Quantile(u16), + + /// Median (equivalent to Quantile(0.5)) + Median, + + /// TOP K items + TopK(usize), + + /// Frequency-based COUNT(*) aggregation with GROUP BY + /// Queries frequency sketch to get count for each group key + FrequencyCount, + + /// Frequency-based SUM(column) aggregation with GROUP BY + /// Queries frequency sketch to get sum for each group key + FrequencySum, + + /// Frequency-based AVG(column) aggregation with GROUP BY + /// Computed as SUM(column) / COUNT(*) using two frequency sketches + FrequencyAvg, + + /// Frequency estimate for a specific value (stored as string) + FrequencyEstimate(String), + + /// Get all frequent items above threshold (stored as integer, 0-10000) + FrequentItems(u16), + + /// Enumerate set contents (for SetAggregator) + /// Returns all unique values seen in the set + EnumerateSet, +} + +impl InferOperation { + /// Create a Quantile operation from a float (0.0 to 1.0) + pub fn quantile(p: f64) -> Self { + InferOperation::Quantile((p * 10000.0).round() as u16) + } + + /// Get the quantile value as f64 + pub fn quantile_value(&self) -> Option { + match self { + InferOperation::Quantile(p) => Some(*p as f64 / 10000.0), + InferOperation::Median => Some(0.5), + _ => None, + } + } + + /// Create a FrequentItems operation from a float threshold + pub fn frequent_items(threshold: f64) -> Self { + InferOperation::FrequentItems((threshold * 10000.0).round() as u16) + } + + /// Get the threshold value as f64 + pub fn threshold_value(&self) -> Option { + match self { + InferOperation::FrequentItems(t) => Some(*t as f64 / 10000.0), + _ => None, + } + } +} + +impl fmt::Display for InferOperation { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + // Exact aggregator extractions + InferOperation::ExtractSum => write!(f, "EXTRACT_SUM"), + InferOperation::ExtractCount => write!(f, "EXTRACT_COUNT"), + InferOperation::ExtractMin => write!(f, "EXTRACT_MIN"), + InferOperation::ExtractMax => write!(f, "EXTRACT_MAX"), + InferOperation::ExtractIncrease => write!(f, "EXTRACT_INCREASE"), + InferOperation::ExtractRate => write!(f, "EXTRACT_RATE"), + // Sketch operations + InferOperation::CountDistinct => write!(f, "COUNT_DISTINCT"), + InferOperation::Quantile(p) => write!(f, "QUANTILE({:.4})", *p as f64 / 10000.0), + InferOperation::Median => write!(f, "MEDIAN"), + InferOperation::TopK(k) => write!(f, "TOPK({})", k), + InferOperation::FrequencyCount => write!(f, "FREQ_COUNT"), + InferOperation::FrequencySum => write!(f, "FREQ_SUM"), + InferOperation::FrequencyAvg => write!(f, "FREQ_AVG"), + InferOperation::FrequencyEstimate(value) => write!(f, "FREQ_EST({})", value), + InferOperation::FrequentItems(threshold) => { + write!(f, "FREQ_ITEMS({:.4})", *threshold as f64 / 10000.0) + } + InferOperation::EnumerateSet => write!(f, "ENUM_SET"), + } + } +} + +// ============================================================================ +// Grouping Strategy +// ============================================================================ + +/// Strategy for handling GROUP BY queries +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum GroupingStrategy { + /// One sketch per group (filter-based, computed separately) + PerGroup, + + /// Single Hydra-style sketch containing all groups + Hydra, + + /// No grouping (simple aggregation) + None, +} + +// ============================================================================ +// Sketch Metadata +// ============================================================================ + +/// Metadata for identifying and loading sketches +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct SketchMetadata { + pub table_name: String, + pub column_name: String, + pub sketch_type: SketchType, + pub filter_predicate: Option, + pub key_columns: Vec, // For Hydra sketches +} + +impl fmt::Display for SketchMetadata { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{}.{}.{}", + self.table_name, self.column_name, self.sketch_type + )?; + if let Some(filter) = &self.filter_predicate { + write!(f, " WHERE {}", filter)?; + } + if !self.key_columns.is_empty() { + write!(f, " KEY BY [{}]", self.key_columns.join(", "))?; + } + Ok(()) + } +} + +// ============================================================================ +// Sketch Specification +// ============================================================================ + +/// Specification for a single sketch to create +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct SketchSpec { + pub value_column: Option, + pub sketch_type: SketchType, + pub output_column_name: String, // e.g., "host_sketch", "cpu_sketch" +} + +// ============================================================================ +// SummaryInsert - Compute sketch from raw data +// ============================================================================ + +/// Logical plan node: Compute a sketch from raw data +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct SummaryInsert { + /// Input data source + pub input: Arc, + + /// Sketches to create (one or more) + pub sketches: Vec, + + /// GROUP BY columns for per-group strategy (columns appear in output schema) + /// Legacy field - use group_by_exprs for computed expressions + pub group_by: Vec, + + /// Key columns for Hydra strategy (columns embedded in sketch, NOT in output schema) + /// Legacy field - use key_column_exprs for computed expressions + pub key_columns: Vec, + + /// GROUP BY expressions with pre-resolved types for per-group strategy + /// When non-empty, takes precedence over group_by in compute_schema + pub group_by_exprs: Vec, + + /// Key column expressions with pre-resolved types for Hydra strategy + /// These are embedded in the sketch, not in output schema + pub key_column_exprs: Vec, + + /// Optional parameters (e.g., HLL precision, KLL k value) + /// Using BTreeMap instead of HashMap so it can derive Hash + pub parameters: BTreeMap, + + /// Cached output schema + schema: DFSchemaRef, +} + +impl SummaryInsert { + /// Create a new SummaryInsert with multiple sketches + pub fn new(input: Arc, sketches: Vec) -> DFResult { + if sketches.is_empty() { + return Err(DataFusionError::Plan( + "SummaryInsert requires at least one sketch".to_string(), + )); + } + + let schema = Self::compute_schema(&input, &sketches, &[], &[], &[])?; + Ok(Self { + input, + sketches, + group_by: vec![], + key_columns: vec![], + group_by_exprs: vec![], + key_column_exprs: vec![], + parameters: BTreeMap::new(), + schema, + }) + } + + /// Helper constructor for single sketch (backward compatibility) + pub fn single( + input: Arc, + value_column: Option, + sketch_type: SketchType, + ) -> DFResult { + let output_column_name = match &value_column { + Some(col) => format!("{}_sketch", col), + None => "value_sketch".to_string(), + }; + + Self::new( + input, + vec![SketchSpec { + value_column, + sketch_type, + output_column_name, + }], + ) + } + + pub fn with_group_by(mut self, group_by: Vec) -> DFResult { + self.schema = Self::compute_schema( + &self.input, + &self.sketches, + &group_by, + &self.key_columns, + &self.group_by_exprs, + )?; + self.group_by = group_by; + Ok(self) + } + + pub fn with_key_columns(mut self, key_columns: Vec) -> DFResult { + self.schema = Self::compute_schema( + &self.input, + &self.sketches, + &self.group_by, + &key_columns, + &self.group_by_exprs, + )?; + self.key_columns = key_columns; + Ok(self) + } + + /// Set GROUP BY expressions with pre-resolved types (supports computed expressions) + pub fn with_group_by_exprs(mut self, group_by_exprs: Vec) -> DFResult { + self.schema = Self::compute_schema( + &self.input, + &self.sketches, + &self.group_by, + &self.key_columns, + &group_by_exprs, + )?; + self.group_by_exprs = group_by_exprs; + Ok(self) + } + + /// Set key column expressions with pre-resolved types for Hydra strategy + /// Note: These are embedded in the sketch, not in the output schema + pub fn with_key_column_exprs(mut self, key_column_exprs: Vec) -> DFResult { + // key_column_exprs don't affect output schema, but store them for later use + self.key_column_exprs = key_column_exprs; + Ok(self) + } + + pub fn with_parameters(mut self, parameters: BTreeMap) -> Self { + self.parameters = parameters; + self + } + + /// Compute output schema based on grouping strategy + fn compute_schema( + input: &Arc, + sketches: &[SketchSpec], + group_by: &[String], + _key_columns: &[String], + group_by_exprs: &[TypedExpr], + ) -> DFResult { + let input_schema = input.schema(); + let mut qualified_fields = Vec::new(); + + // For per-group strategy: include group columns in output with their qualifications + // This matches vanilla DataFusion Aggregate behavior + // + // Prefer group_by_exprs (TypedExpr) over group_by (String) if available + if !group_by_exprs.is_empty() { + // Use TypedExpr - supports computed expressions like date_part(), CASE, etc. + for typed_expr in group_by_exprs { + // For simple columns, use col.name as field name and col.relation as qualifier + // For computed expressions, use schema_name() with no qualifier + let (qualifier, field_name) = if let Expr::Column(col) = &typed_expr.expr { + (col.relation.clone(), col.name.clone()) + } else { + (None, typed_expr.expr.schema_name().to_string()) + }; + qualified_fields.push(( + qualifier, + Arc::new(Field::new(&field_name, typed_expr.data_type.clone(), true)), + )); + } + } else if !group_by.is_empty() { + // Fallback to legacy string-based group_by + for col_name in group_by { + // Get both qualifier and field to preserve qualification + let (qualifier, field) = input_schema + .qualified_field_with_unqualified_name(col_name) + .map_err(|e| { + DataFusionError::Plan(format!( + "Group column '{}' not found in input schema: {}", + col_name, e + )) + })?; + qualified_fields.push((qualifier.cloned(), Arc::new(field.clone()))); + } + } + + // For Hydra strategy: key columns are embedded in sketch, not in output + // (no fields added here - neither key_columns nor key_column_exprs affect output) + + // Add sketch columns (Binary type, unqualified) + // Create one column per sketch specification + for sketch_spec in sketches { + qualified_fields.push(( + None, + Arc::new(Field::new( + &sketch_spec.output_column_name, + DataType::Binary, + false, + )), + )); + } + + // Create DFSchema from qualified fields + let schema = DFSchema::new_with_metadata(qualified_fields, Default::default()) + .map_err(|e| DataFusionError::Plan(format!("Failed to create schema: {}", e)))?; + + Ok(Arc::new(schema)) + } +} + +impl PartialOrd for SummaryInsert { + fn partial_cmp(&self, other: &Self) -> Option { + // Compare by sketches, then grouping, then parameters, then input + match self.sketches.partial_cmp(&other.sketches) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.group_by.partial_cmp(&other.group_by) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.key_columns.partial_cmp(&other.key_columns) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.parameters.partial_cmp(&other.parameters) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.input.partial_cmp(&other.input) { + Some(Ordering::Equal) => {} + other => return other, + } + // Compare schemas by pointer (Arc comparison) + Some(Arc::as_ptr(&self.schema).cmp(&Arc::as_ptr(&other.schema))) + } +} + +impl UserDefinedLogicalNodeCore for SummaryInsert { + fn name(&self) -> &str { + "SummaryInsert" + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![self.input.as_ref()] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.sketches.len() == 1 { + // Single sketch: show simplified format + let sketch = &self.sketches[0]; + write!(f, "SummaryInsert: sketch_type={}", sketch.sketch_type)?; + if let Some(col) = &sketch.value_column { + write!(f, ", value_column={}", col)?; + } + } else { + // Multiple sketches: show as array + write!(f, "SummaryInsert: sketches=[")?; + for (i, sketch) in self.sketches.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{{type={}", sketch.sketch_type)?; + if let Some(col) = &sketch.value_column { + write!(f, ", column={}", col)?; + } + write!(f, "}}")?; + } + write!(f, "]")?; + } + if !self.group_by.is_empty() { + write!(f, ", group_by=[{}]", self.group_by.join(", "))?; + } + if !self.key_columns.is_empty() { + write!(f, ", key_columns=[{}]", self.key_columns.join(", "))?; + } + Ok(()) + } + + fn from_template(&self, _exprs: &[Expr], inputs: &[LogicalPlan]) -> Self { + let input = Arc::new(inputs[0].clone()); + // Recompute schema with new input + let schema = Self::compute_schema( + &input, + &self.sketches, + &self.group_by, + &self.key_columns, + &self.group_by_exprs, + ) + .unwrap_or_else(|_| self.schema.clone()); + + Self { + input, + sketches: self.sketches.clone(), + group_by: self.group_by.clone(), + key_columns: self.key_columns.clone(), + group_by_exprs: self.group_by_exprs.clone(), + key_column_exprs: self.key_column_exprs.clone(), + parameters: self.parameters.clone(), + schema, + } + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> DFResult { + Ok(self.from_template(&_exprs, &inputs)) + } +} + +// ============================================================================ +// SummaryRead - Load pre-computed sketch +// ============================================================================ + +/// Logical plan node: Load a pre-computed sketch +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct SummaryRead { + /// Metadata to identify the sketch + pub metadata: SketchMetadata, + + /// Optional: Direct sketch ID if known + pub sketch_id: Option, + + /// Schema (placeholder for now) + schema: DFSchemaRef, +} + +impl SummaryRead { + pub fn new(metadata: SketchMetadata, schema: DFSchemaRef) -> Self { + Self { + metadata, + sketch_id: None, + schema, + } + } + + pub fn with_sketch_id(mut self, sketch_id: String) -> Self { + self.sketch_id = Some(sketch_id); + self + } +} + +impl PartialOrd for SummaryRead { + fn partial_cmp(&self, other: &Self) -> Option { + match self.metadata.partial_cmp(&other.metadata) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.sketch_id.partial_cmp(&other.sketch_id) { + Some(Ordering::Equal) => {} + other => return other, + } + // DFSchemaRef is Arc, and DFSchema likely doesn't implement PartialOrd + // So we compare by pointer + Some(Arc::as_ptr(&self.schema).cmp(&Arc::as_ptr(&other.schema))) + } +} + +impl UserDefinedLogicalNodeCore for SummaryRead { + fn name(&self) -> &str { + "SummaryRead" + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![] // No inputs - reads from storage + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "SummaryRead: {}", self.metadata)?; + if let Some(id) = &self.sketch_id { + write!(f, " [id={}]", id)?; + } + Ok(()) + } + + fn from_template(&self, _exprs: &[Expr], _inputs: &[LogicalPlan]) -> Self { + self.clone() + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> DFResult { + Ok(self.from_template(&_exprs, &inputs)) + } +} + +// ============================================================================ +// SummaryMerge - Merge multiple sketches +// ============================================================================ + +/// Logical plan node: Merge two or more sketches +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct SummaryMerge { + /// Left sketch source + pub left: Arc, + + /// Right sketch source + pub right: Arc, + + /// Sketch type (for validation) + pub sketch_type: SketchType, +} + +impl SummaryMerge { + pub fn new(left: Arc, right: Arc, sketch_type: SketchType) -> Self { + Self { + left, + right, + sketch_type, + } + } +} + +impl PartialOrd for SummaryMerge { + fn partial_cmp(&self, other: &Self) -> Option { + match self.sketch_type.partial_cmp(&other.sketch_type) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.left.partial_cmp(&other.left) { + Some(Ordering::Equal) => {} + other => return other, + } + self.right.partial_cmp(&other.right) + } +} + +impl UserDefinedLogicalNodeCore for SummaryMerge { + fn name(&self) -> &str { + "SummaryMerge" + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![self.left.as_ref(), self.right.as_ref()] + } + + fn schema(&self) -> &DFSchemaRef { + // Return left schema (should be compatible with right) + self.left.schema() + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "SummaryMerge: sketch_type={}", self.sketch_type) + } + + fn from_template(&self, _exprs: &[Expr], inputs: &[LogicalPlan]) -> Self { + Self { + left: Arc::new(inputs[0].clone()), + right: Arc::new(inputs[1].clone()), + sketch_type: self.sketch_type.clone(), + } + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> DFResult { + Ok(self.from_template(&_exprs, &inputs)) + } +} + +// ============================================================================ +// SummarySubtract - Subtract one sketch from another +// ============================================================================ + +/// Logical plan node: Subtract one sketch from another (for sliding windows) +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct SummarySubtract { + /// Sketch to subtract FROM (minuend) + pub minuend: Arc, + + /// Sketch to subtract (subtrahend) + pub subtrahend: Arc, + + /// Sketch type (for validation) + pub sketch_type: SketchType, +} + +impl SummarySubtract { + pub fn new( + minuend: Arc, + subtrahend: Arc, + sketch_type: SketchType, + ) -> Self { + Self { + minuend, + subtrahend, + sketch_type, + } + } +} + +impl PartialOrd for SummarySubtract { + fn partial_cmp(&self, other: &Self) -> Option { + match self.sketch_type.partial_cmp(&other.sketch_type) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.minuend.partial_cmp(&other.minuend) { + Some(Ordering::Equal) => {} + other => return other, + } + self.subtrahend.partial_cmp(&other.subtrahend) + } +} + +impl UserDefinedLogicalNodeCore for SummarySubtract { + fn name(&self) -> &str { + "SummarySubtract" + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![self.minuend.as_ref(), self.subtrahend.as_ref()] + } + + fn schema(&self) -> &DFSchemaRef { + self.minuend.schema() + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "SummarySubtract: sketch_type={}", self.sketch_type) + } + + fn from_template(&self, _exprs: &[Expr], inputs: &[LogicalPlan]) -> Self { + Self { + minuend: Arc::new(inputs[0].clone()), + subtrahend: Arc::new(inputs[1].clone()), + sketch_type: self.sketch_type.clone(), + } + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> DFResult { + Ok(self.from_template(&_exprs, &inputs)) + } +} + +// ============================================================================ +// SummaryInfer - Extract result from sketch +// ============================================================================ + +/// Logical plan node: Extract a result from a sketch +#[derive(Debug, Clone)] +pub struct SummaryInfer { + /// Input sketch source + pub input: Arc, + + /// Optional second input for keys enumeration (multi-population accumulators). + /// When present, SummaryInferExec deserializes the value sketch once per spatial group + /// and queries it N times (once per sub-key from the keys input). + pub keys_input: Option>, + + /// Operations to perform on the sketch(es) + /// For single sketch with multiple operations: operations map to sketch in order + pub operations: Vec, + + /// Output column names (one per operation) + pub output_names: Vec, + + /// Optional group key columns for Hydra sketches (column names, not full Expr) + /// Legacy field - use group_key_exprs for computed expressions + pub group_key_columns: Vec, + + /// Optional qualifier for the group key columns (for Hydra sketches) + pub group_key_qualifier: Option, + + /// Group key expressions with pre-resolved types (supports computed expressions) + /// When non-empty, takes precedence over group_key_columns in compute_schema + pub group_key_exprs: Vec, + + /// Cached output schema + schema: DFSchemaRef, +} + +impl SummaryInfer { + /// Create a new SummaryInfer with multiple operations + pub fn new( + input: Arc, + operations: Vec, + output_names: Vec, + ) -> DFResult { + // Validate inputs + if operations.is_empty() { + return Err(DataFusionError::Plan( + "SummaryInfer requires at least one operation".to_string(), + )); + } + if operations.len() != output_names.len() { + return Err(DataFusionError::Plan(format!( + "SummaryInfer operations ({}) and output_names ({}) length mismatch", + operations.len(), + output_names.len() + ))); + } + + let schema = Self::compute_schema(&input, &operations, &output_names, &[], &None, &[])?; + Ok(Self { + input, + keys_input: None, + operations, + output_names, + group_key_columns: vec![], + group_key_qualifier: None, + group_key_exprs: vec![], + schema, + }) + } + + /// Helper constructor for single operation (backward compatibility) + pub fn single( + input: Arc, + operation: InferOperation, + output_name: String, + ) -> DFResult { + Self::new(input, vec![operation], vec![output_name]) + } + + /// Add group key columns for Hydra sketches (supports multiple columns) + /// Legacy method - use with_group_key_exprs for computed expressions + pub fn with_group_key_columns( + mut self, + group_key_columns: Vec, + qualifier: Option, + ) -> DFResult { + self.schema = Self::compute_schema( + &self.input, + &self.operations, + &self.output_names, + &group_key_columns, + &qualifier, + &self.group_key_exprs, + )?; + self.group_key_columns = group_key_columns; + self.group_key_qualifier = qualifier; + Ok(self) + } + + /// Set a second input for keys enumeration (multi-population accumulators). + pub fn with_keys_input(mut self, keys_input: Arc) -> Self { + self.keys_input = Some(keys_input); + self + } + + /// Add group key expressions with pre-resolved types (supports computed expressions) + pub fn with_group_key_exprs( + mut self, + group_key_exprs: Vec, + qualifier: Option, + ) -> DFResult { + self.schema = Self::compute_schema( + &self.input, + &self.operations, + &self.output_names, + &self.group_key_columns, + &qualifier, + &group_key_exprs, + )?; + self.group_key_exprs = group_key_exprs; + self.group_key_qualifier = qualifier; + Ok(self) + } + + /// Compute output schema based on operations and grouping + fn compute_schema( + input: &Arc, + operations: &[InferOperation], + output_names: &[String], + group_key_columns: &[String], + group_key_qualifier: &Option, + group_key_exprs: &[TypedExpr], + ) -> DFResult { + let input_schema = input.schema(); + let mut qualified_fields = Vec::new(); + + // Add group columns to output with qualifications preserved + // Prefer group_key_exprs (TypedExpr) over group_key_columns (String) if available + if !group_key_exprs.is_empty() { + // Use TypedExpr - supports computed expressions like date_part(), CASE, etc. + // First: pass through input label columns not covered by group_key_exprs + let expr_names: Vec = group_key_exprs + .iter() + .filter_map(|te| { + if let Expr::Column(col) = &te.expr { + Some(col.name.clone()) + } else { + None + } + }) + .collect(); + for (qualifier, field) in input_schema.iter() { + if field.name() != "sketch" + && !field.name().ends_with("_sketch") + && !expr_names.contains(&field.name().to_string()) + { + qualified_fields.push((qualifier.cloned(), field.clone())); + } + } + // Then: add group key expression columns + for typed_expr in group_key_exprs { + // For simple columns, use col.name as field name and col.relation as qualifier + // For computed expressions, use schema_name() with optional provided qualifier + let (qualifier, field_name) = if let Expr::Column(col) = &typed_expr.expr { + (col.relation.clone(), col.name.clone()) + } else if let Some(qual) = group_key_qualifier { + ( + Some(datafusion::common::TableReference::bare(qual.clone())), + typed_expr.expr.schema_name().to_string(), + ) + } else { + (None, typed_expr.expr.schema_name().to_string()) + }; + qualified_fields.push(( + qualifier, + Arc::new(Field::new(&field_name, typed_expr.data_type.clone(), true)), + )); + } + } else if !group_key_columns.is_empty() { + // Fallback to legacy string-based group_key_columns + // Hydra/self-keyed case: pass through input label columns first, + // then add materialized group key columns from the accumulator. + for (qualifier, field) in input_schema.iter() { + if field.name() != "sketch" + && !field.name().ends_with("_sketch") + && !group_key_columns.contains(&field.name().to_string()) + { + qualified_fields.push((qualifier.cloned(), field.clone())); + } + } + for key_col in group_key_columns { + // Try to find it in the input schema first + if let Ok((qualifier, field)) = + input_schema.qualified_field_with_unqualified_name(key_col) + { + qualified_fields.push((qualifier.cloned(), Arc::new(field.clone()))); + } else if let Some(qual) = group_key_qualifier { + // Use provided qualifier if input schema doesn't have it + // This happens for Hydra where SummaryInsert doesn't output the group keys + let qualifier = Some(datafusion::common::TableReference::bare(qual.clone())); + qualified_fields.push(( + qualifier, + Arc::new(Field::new(key_col, DataType::Utf8, false)), + )); + } else { + // No qualifier available - use unqualified + qualified_fields + .push((None, Arc::new(Field::new(key_col, DataType::Utf8, false)))); + } + } + } else { + // Per-group case: preserve group columns from input (non-sketch columns) with qualifications + for (qualifier, field) in input_schema.iter() { + if field.name() != "sketch" && !field.name().ends_with("_sketch") { + qualified_fields.push((qualifier.cloned(), field.clone())); + } + } + } + + // Add result columns based on operation types (unqualified) + for (operation, output_name) in operations.iter().zip(output_names.iter()) { + let result_type = match operation { + // Exact aggregator extractions - all return Float64 + InferOperation::ExtractSum => DataType::Float64, + InferOperation::ExtractCount => DataType::Float64, + InferOperation::ExtractMin => DataType::Float64, + InferOperation::ExtractMax => DataType::Float64, + InferOperation::ExtractIncrease => DataType::Float64, + InferOperation::ExtractRate => DataType::Float64, + // Sketch operations + InferOperation::CountDistinct => DataType::UInt64, + InferOperation::Quantile(_) | InferOperation::Median => DataType::Float64, + InferOperation::TopK(_) => { + DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))) + } + InferOperation::FrequencyCount => DataType::Float64, // COUNT returns numeric + InferOperation::FrequencySum => DataType::Float64, // SUM returns numeric + InferOperation::FrequencyAvg => DataType::Float64, // AVG returns numeric + InferOperation::FrequencyEstimate(_) => DataType::UInt64, + InferOperation::FrequentItems(_) => { + DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))) + } + InferOperation::EnumerateSet => { + DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))) + } + }; + + qualified_fields.push((None, Arc::new(Field::new(output_name, result_type, false)))); + } + + // Create DFSchema from qualified fields + let schema = DFSchema::new_with_metadata(qualified_fields, Default::default()) + .map_err(|e| DataFusionError::Plan(format!("Failed to create schema: {}", e)))?; + + Ok(Arc::new(schema)) + } +} + +impl PartialEq for SummaryInfer { + fn eq(&self, other: &Self) -> bool { + self.input == other.input + && self.keys_input == other.keys_input + && self.operations == other.operations + && self.output_names == other.output_names + && self.group_key_columns == other.group_key_columns + && self.group_key_qualifier == other.group_key_qualifier + && self.schema == other.schema + } +} + +impl Eq for SummaryInfer {} + +impl std::hash::Hash for SummaryInfer { + fn hash(&self, state: &mut H) { + self.input.hash(state); + self.keys_input.hash(state); + self.operations.hash(state); + self.output_names.hash(state); + self.group_key_columns.hash(state); + self.group_key_qualifier.hash(state); + self.schema.hash(state); + } +} + +impl PartialOrd for SummaryInfer { + fn partial_cmp(&self, other: &Self) -> Option { + match self.operations.partial_cmp(&other.operations) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.output_names.partial_cmp(&other.output_names) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.group_key_columns.partial_cmp(&other.group_key_columns) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.input.partial_cmp(&other.input) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.keys_input.partial_cmp(&other.keys_input) { + Some(Ordering::Equal) => {} + other => return other, + } + // Compare schemas by pointer + Some(Arc::as_ptr(&self.schema).cmp(&Arc::as_ptr(&other.schema))) + } +} + +impl UserDefinedLogicalNodeCore for SummaryInfer { + fn name(&self) -> &str { + "SummaryInfer" + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + let mut inputs = vec![self.input.as_ref()]; + if let Some(ref keys_input) = self.keys_input { + inputs.push(keys_input.as_ref()); + } + inputs + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + // No Expr types stored anymore - group_key_column is just a string + vec![] + } + + fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.operations.len() == 1 { + write!( + f, + "SummaryInfer: operation={}, output={}", + self.operations[0], self.output_names[0] + )?; + } else { + write!(f, "SummaryInfer: operations=[")?; + for (i, op) in self.operations.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", op)?; + } + write!(f, "], outputs=[")?; + for (i, name) in self.output_names.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", name)?; + } + write!(f, "]")?; + } + if !self.group_key_columns.is_empty() { + write!( + f, + ", group_key_columns=[{}]", + self.group_key_columns.join(", ") + )?; + } + if self.keys_input.is_some() { + write!(f, ", has_keys_input=true")?; + } + Ok(()) + } + + fn from_template(&self, _exprs: &[Expr], inputs: &[LogicalPlan]) -> Self { + let input = Arc::new(inputs[0].clone()); + let keys_input = if inputs.len() > 1 { + Some(Arc::new(inputs[1].clone())) + } else { + self.keys_input.clone() + }; + // Recompute schema with new input + let schema = Self::compute_schema( + &input, + &self.operations, + &self.output_names, + &self.group_key_columns, + &self.group_key_qualifier, + &self.group_key_exprs, + ) + .unwrap_or_else(|_| self.schema.clone()); + + Self { + input, + keys_input, + operations: self.operations.clone(), + output_names: self.output_names.clone(), + group_key_columns: self.group_key_columns.clone(), + group_key_qualifier: self.group_key_qualifier.clone(), + group_key_exprs: self.group_key_exprs.clone(), + schema, + } + } + + fn with_exprs_and_inputs(&self, exprs: Vec, inputs: Vec) -> DFResult { + Ok(self.from_template(&exprs, &inputs)) + } +} + +// ============================================================================ +// PrecomputedSummaryRead - Read precomputed summaries from store +// ============================================================================ + +/// Logical plan node: Read precomputed summaries from a store +/// +/// This is a leaf node that represents reading precomputed aggregates +/// (summaries) from a PrecomputedOutputStore. Used for OnlySpatial queries +/// where data has already been aggregated by a streaming engine. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct PrecomputedSummaryRead { + /// Metric name being queried + metric: String, + + /// Aggregation ID to query + aggregation_id: u64, + + /// Start timestamp of the query range + start_timestamp: u64, + + /// End timestamp of the query range + end_timestamp: u64, + + /// Whether this is an exact query (sliding window) vs approximate (tumbling) + is_exact_query: bool, + + /// Output label names (group by columns) + output_labels: Vec, + + /// Type of summary being read + summary_type: SketchType, + + /// Cached output schema + schema: DFSchemaRef, +} + +impl PrecomputedSummaryRead { + #[allow(clippy::too_many_arguments)] + pub fn new( + metric: String, + aggregation_id: u64, + start_timestamp: u64, + end_timestamp: u64, + is_exact_query: bool, + output_labels: Vec, + summary_type: SketchType, + schema: DFSchemaRef, + ) -> Self { + Self { + metric, + aggregation_id, + start_timestamp, + end_timestamp, + is_exact_query, + output_labels, + summary_type, + schema, + } + } + + /// Create with auto-generated schema based on output_labels + pub fn with_auto_schema( + metric: String, + aggregation_id: u64, + start_timestamp: u64, + end_timestamp: u64, + is_exact_query: bool, + output_labels: Vec, + summary_type: SketchType, + ) -> DFResult { + let schema = Self::compute_schema(&output_labels)?; + Ok(Self::new( + metric, + aggregation_id, + start_timestamp, + end_timestamp, + is_exact_query, + output_labels, + summary_type, + schema, + )) + } + + /// Compute schema: [label columns (Utf8), sketch (Binary)] + fn compute_schema(output_labels: &[String]) -> DFResult { + let mut qualified_fields = Vec::new(); + + // Add label columns (all Utf8, nullable) + for label in output_labels { + qualified_fields.push((None, Arc::new(Field::new(label, DataType::Utf8, true)))); + } + + // Add sketch column (Binary, not nullable) + qualified_fields.push(( + None, + Arc::new(Field::new("sketch", DataType::Binary, false)), + )); + + let schema = DFSchema::new_with_metadata(qualified_fields, Default::default()) + .map_err(|e| DataFusionError::Plan(format!("Failed to create schema: {}", e)))?; + + Ok(Arc::new(schema)) + } + + // Getters + pub fn metric(&self) -> &str { + &self.metric + } + + pub fn aggregation_id(&self) -> u64 { + self.aggregation_id + } + + pub fn start_timestamp(&self) -> u64 { + self.start_timestamp + } + + pub fn end_timestamp(&self) -> u64 { + self.end_timestamp + } + + pub fn is_exact_query(&self) -> bool { + self.is_exact_query + } + + pub fn output_labels(&self) -> &[String] { + &self.output_labels + } + + pub fn summary_type(&self) -> &SketchType { + &self.summary_type + } +} + +impl PartialOrd for PrecomputedSummaryRead { + fn partial_cmp(&self, other: &Self) -> Option { + match self.metric.partial_cmp(&other.metric) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.aggregation_id.partial_cmp(&other.aggregation_id) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.start_timestamp.partial_cmp(&other.start_timestamp) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.end_timestamp.partial_cmp(&other.end_timestamp) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.is_exact_query.partial_cmp(&other.is_exact_query) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.output_labels.partial_cmp(&other.output_labels) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.summary_type.partial_cmp(&other.summary_type) { + Some(Ordering::Equal) => {} + other => return other, + } + Some(Arc::as_ptr(&self.schema).cmp(&Arc::as_ptr(&other.schema))) + } +} + +impl UserDefinedLogicalNodeCore for PrecomputedSummaryRead { + fn name(&self) -> &str { + "PrecomputedSummaryRead" + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![] // Leaf node - no inputs + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "PrecomputedSummaryRead: metric={}, agg_id={}, range=[{}, {}], exact={}, type={}, labels=[{}]", + self.metric, + self.aggregation_id, + self.start_timestamp, + self.end_timestamp, + self.is_exact_query, + self.summary_type, + self.output_labels.join(", ") + ) + } + + fn from_template(&self, _exprs: &[Expr], _inputs: &[LogicalPlan]) -> Self { + self.clone() + } + + fn with_exprs_and_inputs( + &self, + _exprs: Vec, + _inputs: Vec, + ) -> DFResult { + Ok(self.clone()) + } +} + +// ============================================================================ +// SummaryMergeMultiple - Merge multiple summaries by group key +// ============================================================================ + +/// Logical plan node: Merge multiple summaries with the same group key +/// +/// Takes an input with multiple rows per group key (e.g., from multiple +/// precomputed buckets) and merges them into one summary per group key. +/// This is used when tumbling windows need to be merged for a query range. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct SummaryMergeMultiple { + /// Input plan (typically PrecomputedSummaryRead) + input: Arc, + + /// Columns to group by when merging + group_by: Vec, + + /// Column containing the sketch/summary data + sketch_column: String, + + /// Type of summary being merged (for dispatch to correct merge logic) + summary_type: SketchType, + + /// Cached output schema (same as input - merging reduces rows, not columns) + schema: DFSchemaRef, +} + +impl SummaryMergeMultiple { + pub fn new( + input: Arc, + group_by: Vec, + sketch_column: String, + summary_type: SketchType, + ) -> Self { + // Schema is same as input (we reduce rows, not columns) + let schema = input.schema().clone(); + Self { + input, + group_by, + sketch_column, + summary_type, + schema, + } + } + + // Getters + pub fn input(&self) -> &LogicalPlan { + &self.input + } + + pub fn group_by(&self) -> &[String] { + &self.group_by + } + + pub fn sketch_column(&self) -> &str { + &self.sketch_column + } + + pub fn summary_type(&self) -> &SketchType { + &self.summary_type + } +} + +impl PartialOrd for SummaryMergeMultiple { + fn partial_cmp(&self, other: &Self) -> Option { + match self.group_by.partial_cmp(&other.group_by) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.sketch_column.partial_cmp(&other.sketch_column) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.summary_type.partial_cmp(&other.summary_type) { + Some(Ordering::Equal) => {} + other => return other, + } + match self.input.partial_cmp(&other.input) { + Some(Ordering::Equal) => {} + other => return other, + } + Some(Arc::as_ptr(&self.schema).cmp(&Arc::as_ptr(&other.schema))) + } +} + +impl UserDefinedLogicalNodeCore for SummaryMergeMultiple { + fn name(&self) -> &str { + "SummaryMergeMultiple" + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![self.input.as_ref()] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "SummaryMergeMultiple: group_by=[{}], sketch_column={}, type={}", + self.group_by.join(", "), + self.sketch_column, + self.summary_type + ) + } + + fn from_template(&self, _exprs: &[Expr], inputs: &[LogicalPlan]) -> Self { + Self { + input: Arc::new(inputs[0].clone()), + group_by: self.group_by.clone(), + sketch_column: self.sketch_column.clone(), + summary_type: self.summary_type.clone(), + schema: inputs[0].schema().clone(), + } + } + + fn with_exprs_and_inputs(&self, exprs: Vec, inputs: Vec) -> DFResult { + Ok(self.from_template(&exprs, &inputs)) + } +} diff --git a/CommonDependencies/dependencies/rs/promql_utilities/.gitignore b/CommonDependencies/dependencies/rs/promql_utilities/.gitignore new file mode 100644 index 0000000..9f97022 --- /dev/null +++ b/CommonDependencies/dependencies/rs/promql_utilities/.gitignore @@ -0,0 +1 @@ +target/ \ No newline at end of file diff --git a/CommonDependencies/dependencies/rs/promql_utilities/Cargo.lock b/CommonDependencies/dependencies/rs/promql_utilities/Cargo.lock new file mode 100644 index 0000000..f32cf1d --- /dev/null +++ b/CommonDependencies/dependencies/rs/promql_utilities/Cargo.lock @@ -0,0 +1,1064 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anyhow" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" + +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "backtrace" +version = "0.3.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets 0.52.6", +] + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "2.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a65b545ab31d687cff52899d4890855fec459eb6afe0da6417b8a18da87aa29" + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "bytes" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" + +[[package]] +name = "cactus" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbc26382d871df4b7442e3df10a9402bf3cf5e55cbd66f12be38861425f0564" + +[[package]] +name = "cc" +version = "1.2.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ee0f8803222ba5a7e2777dd72ca451868909b1ac410621b676adf07280e9b5f" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" + +[[package]] +name = "cfgrammar" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf" +dependencies = [ + "indexmap", + "lazy_static", + "num-traits", + "regex", + "serde", + "vob", +] + +[[package]] +name = "chrono" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "deranged" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "filetime" +version = "0.2.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed" +dependencies = [ + "cfg-if", + "libc", + "libredox", + "windows-sys 0.60.2", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "getopts" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cba6ae63eb948698e300f645f87c70f76630d505f23b8907cf1e193ee85048c1" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" + +[[package]] +name = "iana-time-zone" +version = "0.1.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "indexmap" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "io-uring" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.175" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" + +[[package]] +name = "libredox" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3" +dependencies = [ + "bitflags", + "libc", + "redox_syscall", +] + +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "lrlex" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c71364e868116ee891b0f93559eb9eca5675bec28b22d33c58481e66c3951d7e" +dependencies = [ + "cfgrammar", + "getopts", + "lazy_static", + "lrpar", + "num-traits", + "quote", + "regex", + "regex-syntax", + "serde", + "vergen", +] + +[[package]] +name = "lrpar" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b265a81193d94c92d1c9c715498d6fa505bce3f789ceecb24ab5d6fa2dbc71" +dependencies = [ + "bincode", + "cactus", + "cfgrammar", + "filetime", + "indexmap", + "lazy_static", + "lrtable", + "num-traits", + "packedvec", + "regex", + "serde", + "static_assertions", + "vergen", + "vob", +] + +[[package]] +name = "lrtable" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc36d15214ca997a5097845be1f932b7ee6125c36f5c5e55f6c49e027ddeb6de" +dependencies = [ + "cfgrammar", + "fnv", + "num-traits", + "serde", + "sparsevec", + "vob", +] + +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.59.0", +] + +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "packedvec" +version = "1.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a69e0a534dd2e6aefce319af62a0aa0066a76bdfcec0201dfe02df226bc9ec70" +dependencies = [ + "num-traits", + "serde", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "proc-macro2" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "promql-parser" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60d851f6523a8215e2fbf86b6cef4548433f8b76092e9ffb607105de52ae63fd" +dependencies = [ + "cfgrammar", + "chrono", + "lazy_static", + "lrlex", + "lrpar", + "regex", +] + +[[package]] +name = "promql_utilities" +version = "0.1.0" +dependencies = [ + "chrono", + "promql-parser", + "serde", + "serde_json", + "thiserror", + "tokio-test", + "tracing", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.5.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rustc-demangle" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.143" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "slab" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" + +[[package]] +name = "sparsevec" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68b4a8ce3045f0fe173fb5ae3c6b7dcfbec02bfa650bb8618b2301f52af0134d" +dependencies = [ + "num-traits", + "packedvec", + "serde", + "vob", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "2.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "time" +version = "0.3.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" +dependencies = [ + "deranged", + "itoa", + "libc", + "num-conv", + "num_threads", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" + +[[package]] +name = "time-macros" +version = "0.2.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tokio" +version = "1.47.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +dependencies = [ + "backtrace", + "io-uring", + "libc", + "mio", + "pin-project-lite", + "slab", +] + +[[package]] +name = "tokio-stream" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-test" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2468baabc3311435b55dd935f702f42cd1b8abb7e754fb7dfb16bd36aa88f9f7" +dependencies = [ + "async-stream", + "bytes", + "futures-core", + "tokio", + "tokio-stream", +] + +[[package]] +name = "tracing" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +dependencies = [ + "once_cell", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "unicode-width" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" + +[[package]] +name = "vergen" +version = "8.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2990d9ea5967266ea0ccf413a4aa5c42a93dbcfda9cb49a97de6931726b12566" +dependencies = [ + "anyhow", + "rustversion", + "time", +] + +[[package]] +name = "vob" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc936b5a7202a703aeaf7ce05e7931db2e0c8126813f97db3e9e06d867b0bb38" +dependencies = [ + "num-traits", + "serde", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasm-bindgen" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "windows-core" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-result" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.3", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" diff --git a/CommonDependencies/dependencies/rs/promql_utilities/Cargo.toml b/CommonDependencies/dependencies/rs/promql_utilities/Cargo.toml new file mode 100644 index 0000000..adb9ec5 --- /dev/null +++ b/CommonDependencies/dependencies/rs/promql_utilities/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "promql_utilities" +version = "0.1.0" +edition = "2021" +authors = ["SketchDB Team"] +description = "A standalone PromQL pattern matching and query analysis library for Rust" +license = "MIT" +keywords = ["prometheus", "promql", "pattern-matching", "query-analysis"] +categories = ["parsing", "database", "development-tools"] + +[dependencies] +promql-parser = "0.5.0" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +thiserror = "1.0" +chrono = { version = "0.4", features = ["serde"] } +tracing = "0.1" + +[dev-dependencies] +tokio-test = "0.4" diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/mod.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/mod.rs new file mode 100644 index 0000000..bcc58df --- /dev/null +++ b/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/mod.rs @@ -0,0 +1,7 @@ +pub mod promql_pattern; +pub mod promql_pattern_builder; +//pub mod promql_pattern_factory; + +pub use promql_pattern::*; +pub use promql_pattern_builder::*; +//pub use promql_pattern_factory::*; diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern.rs new file mode 100644 index 0000000..625fee5 --- /dev/null +++ b/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern.rs @@ -0,0 +1,917 @@ +use chrono::Duration; +use core::panic; +use promql_parser::label::MatchOp; +use promql_parser::parser::{AtModifier, Expr, LabelModifier, SubqueryExpr, VectorSelector}; +use serde::Serialize; +use serde_json::Value; +use std::collections::HashMap; +use std::time::UNIX_EPOCH; +use tracing::debug; + +/// PromQL pattern for AST-based matching +#[derive(Debug, Clone)] +pub struct PromQLPattern { + /// AST pattern definition (JSON-like structure). None indicates a wildcard (match any). + pub ast_pattern: Option>, + ///// Tokens to collect during matching + //pub collect_tokens: Vec, +} + +impl PromQLPattern { + /// Create a new pattern with AST pattern definition + //pub fn new(ast_pattern: Option>, collect_tokens: Vec) -> Self { + pub fn new(ast_pattern: Option>) -> Self { + debug!("Creating new PromQLPattern"); + Self { + ast_pattern, + //collect_tokens, + } + } + + /// Convert an Expr to a clean string representation + fn expr_to_string(expr: &Expr) -> String { + match expr { + Expr::NumberLiteral(num) => num.val.to_string(), + _ => format!("{:?}", expr), + } + } + + /// Match this pattern against a parsed AST + pub fn matches(&self, ast: &Expr) -> PromQLMatchResult { + debug!("Starting pattern matching against AST"); + debug!("Pattern: {:?}", self.ast_pattern); + debug!("AST: {:?}", ast); + let mut tokens = HashMap::new(); + let matches = self.matches_recursive(ast, self.ast_pattern.as_ref(), &mut tokens); + debug!( + "Pattern matching completed: {}, collected {} tokens", + matches, + tokens.len() + ); + if !matches { + debug!("MATCH FAILED - tokens collected: {:?}", tokens); + } + PromQLMatchResult { matches, tokens } + } + + /// Recursive pattern matching implementation + fn matches_recursive( + &self, + node: &Expr, + pattern: Option<&HashMap>, + tokens: &mut HashMap, + ) -> bool { + // None pattern is treated as wildcard (matches anything) to mirror Python's None + if pattern.is_none() { + debug!("Wildcard pattern matched"); + return true; + } + let pattern = pattern.unwrap(); + if pattern.is_empty() { + panic!("Empty pattern is not allowed"); + } + + // Get the pattern type + let pattern_type = match pattern.get("type") { + Some(Value::String(t)) => t.as_str(), + _ => panic!("Pattern must have a 'type' field of string type"), + }; + + debug!("Matching pattern type: {} against node type", pattern_type); + debug!("Full pattern: {:?}", pattern); + debug!("Node: {:?}", node); + match (pattern_type, node) { + // Match metric selectors + ("VectorSelector", Expr::VectorSelector(vs)) => { + self.match_metric_selector(vs, pattern, tokens) + } + + // Match function calls + ("Call", Expr::Call(call)) => self.match_function_call(call, pattern, tokens), + + // Match aggregation operations + ("AggregateExpr", Expr::Aggregate(agg)) => self.match_aggregation(agg, pattern, tokens), + + // Match matrix selectors (range vectors) + ("MatrixSelector", Expr::MatrixSelector(ms)) => { + self.match_matrix_selector(ms, pattern, tokens) + } + + // Match binary operations + ("BinaryExpr", Expr::Binary(bin_op)) => { + self.match_binary_operation(bin_op, pattern, tokens) + } + + // Match number literals + ("NumberLiteral", Expr::NumberLiteral(num)) => { + self.match_number_literal(num, pattern, tokens) + } + + // Match subquery expressions + ("SubqueryExpr", Expr::Subquery(subquery)) => { + self.match_subquery(subquery, pattern, tokens) + } + + _ => false, // Simply return false for non-matching types + } + } + + /// Match a VectorSelector node against pattern + fn match_metric_selector( + &self, + vs: &VectorSelector, + pattern: &HashMap, + tokens: &mut HashMap, + ) -> bool { + // Check metric name if specified in pattern + if let Some(Value::String(expected_name)) = pattern.get("name") { + if let Some(metric_name) = &vs.name { + if *metric_name != *expected_name { + return false; + } + } else { + return false; // Pattern expects name but node has none + } + } + + // Extract and store token data if this node should be collected + if let Some(Value::String(collect_as)) = pattern.get("_collect_as") { + debug!("Collecting metric token as: {}", collect_as); + let mut labels = HashMap::new(); + + // Extract label matchers + for matcher in &vs.matchers.matchers { + if matcher.op == MatchOp::Equal { + labels.insert(matcher.name.clone(), matcher.value.clone()); + } + } + + let at_modifier_opt = match &vs.at { + Some(AtModifier::At(t)) => { + // Convert SystemTime to seconds since UNIX_EPOCH (u64). + // Panic if time is earlier than UNIX_EPOCH (pre-epoch) as requested. + let secs = match t.duration_since(UNIX_EPOCH) { + Ok(dur) => dur.as_secs(), + Err(_) => panic!("AtModifier::At contains a time before UNIX_EPOCH, which is not supported by the pattern matcher"), + }; + + Some(secs) + } + Some(AtModifier::Start) => { + panic!("AtModifier::Start is not supported by pattern matcher") + } + Some(AtModifier::End) => { + panic!("AtModifier::End is not supported by pattern matcher") + } + None => None, + }; + + let metric_token = MetricToken { + name: vs.name.clone().unwrap_or_default(), + labels, + at_modifier: at_modifier_opt, + ast: Some(vs.clone()), + }; + + let token_data = TokenData { + metric: Some(metric_token), + function: None, + aggregation: None, + range_vector: None, + subquery: None, + binary_op: None, + number: None, + }; + + tokens.insert(collect_as.clone(), token_data); + } + + true + } + + /// Match a Call node (function call) against pattern + fn match_function_call( + &self, + call: &promql_parser::parser::Call, + pattern: &HashMap, + tokens: &mut HashMap, + ) -> bool { + // Check function name + // BUGFIX: Pattern builder creates "func" as an Object, not Array of Objects + // Original code (incorrect - expected Array of Objects): + // if let Some(Value::Array(expected_names)) = pattern.get("func") { + // if let Some(func_obj) = expected_names.first() { + // if let Some(func_map) = func_obj.as_object() { + // if let Some(Value::Array(names)) = func_map.get("name") { + // let function_name = call.func.name; + // let matches_name = names.iter().any(|name| { + // if let Some(name_str) = name.as_str() { + // name_str == function_name + // } else { + // false + // } + // }); + // + // if !matches_name { + // return false; + // } + // } + // } + // } + // } + + // Fixed code (correct - expects Object with "name" field): + if let Some(func_pattern_value) = pattern.get("func") { + if let Some(func_pattern) = func_pattern_value.as_object() { + if let Some(Value::Array(names)) = func_pattern.get("name") { + let function_name = call.func.name; + let matches_name = names.iter().any(|name| { + if let Some(name_str) = name.as_str() { + name_str == function_name + } else { + false + } + }); + + if !matches_name { + return false; + } + } + } + } + + // Check arguments recursively + if let Some(Value::Array(expected_args)) = pattern.get("args") { + if call.args.args.len() != expected_args.len() { + return false; + } + + for (i, arg) in call.args.args.iter().enumerate() { + if let Some(arg_pattern) = expected_args[i].as_object() { + let arg_pattern_map: HashMap = arg_pattern + .clone() + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); + + if !self.matches_recursive(arg.as_ref(), Some(&arg_pattern_map), tokens) { + return false; + } + } + } + } + + // Extract and store token data if this node should be collected + if let Some(Value::String(collect_as)) = pattern.get("_collect_as") { + debug!("Collecting function token as: {}", collect_as); + let function_token = FunctionToken { + name: call.func.name.to_string(), + args: call + .args + .args + .iter() + .map(|arg| Self::expr_to_string(arg)) + .collect(), // Capture actual args + }; + + let token_data = TokenData { + metric: None, + function: Some(function_token), + aggregation: None, + range_vector: None, + subquery: None, + binary_op: None, + number: None, + }; + + tokens.insert(collect_as.clone(), token_data); + } + + // If requested, collect the raw function arguments (as strings) under a separate token + if let Some(Value::String(collect_args_as)) = pattern.get("_collect_args_as") { + let arg_strs: Vec = call + .args + .args + .iter() + .map(|arg| Self::expr_to_string(arg)) + .collect(); + + let function_args_token = FunctionToken { + name: call.func.name.to_string(), + args: arg_strs, + }; + + let token_data = TokenData { + metric: None, + function: Some(function_args_token), + aggregation: None, + range_vector: None, + subquery: None, + binary_op: None, + number: None, + }; + + tokens.insert(collect_args_as.clone(), token_data); + } + + true + } + + /// Match an Aggregate node against pattern + fn match_aggregation( + &self, + agg: &promql_parser::parser::AggregateExpr, + pattern: &HashMap, + tokens: &mut HashMap, + ) -> bool { + debug!("=== AGGREGATION MATCHING START ==="); + debug!("Aggregation pattern: {:?}", pattern); + debug!("Aggregation AST: {:?}", agg); + // Check aggregation operation + if let Some(Value::Array(expected_ops)) = pattern.get("op") { + let agg_op = agg.op.to_string(); + debug!( + "Checking aggregation op '{}' against pattern ops: {:?}", + agg_op, expected_ops + ); + let matches_op = expected_ops.iter().any(|op| { + if let Some(op_str) = op.as_str() { + op_str == agg_op + } else { + false + } + }); + + if !matches_op { + debug!("Aggregation op '{}' does not match pattern ops", agg_op); + return false; + } + debug!("Aggregation op '{}' matched!", agg_op); + } + + // Check inner expression recursively + if let Some(expr_pattern_value) = pattern.get("expr") { + debug!("Found expr pattern value: {:?}", expr_pattern_value); + if let Some(expr_pattern) = expr_pattern_value.as_object() { + debug!("Expr pattern is an object, recursing..."); + let expr_pattern_map: HashMap = expr_pattern + .clone() + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); + + if !self.matches_recursive(&agg.expr, Some(&expr_pattern_map), tokens) { + debug!("Inner expression recursive match FAILED"); + return false; + } + debug!("Inner expression recursive match SUCCESS"); + } else if expr_pattern_value.is_null() { + debug!("Expr pattern is null, skipping validation"); + } else { + debug!( + "Expr pattern is neither object nor null: {:?}", + expr_pattern_value + ); + } + } else { + debug!("No expr pattern found, skipping inner expression check"); + } + + // Check modifier if specified in pattern + // Original code (too strict - fails when query has modifier but pattern is null): + // if let Some(pattern_modifier_value) = pattern.get("modifier") { + // let actual_modifier = match &agg.modifier { + // Some(LabelModifier::Include(_)) => "by", + // Some(LabelModifier::Exclude(_)) => "without", + // None => "null", + // }; + // + // match pattern_modifier_value { + // Value::String(expected_modifier) => { + // if actual_modifier != expected_modifier { + // return false; + // } + // } + // Value::Null => { + // if actual_modifier != "null" { + // return false; + // } + // } + // _ => { + // // Invalid pattern modifier format + // return false; + // } + // } + // } + + // Fixed code - only validate modifiers if pattern explicitly specifies a non-null modifier + if let Some(pattern_modifier_value) = pattern.get("modifier") { + debug!("Found modifier pattern: {:?}", pattern_modifier_value); + let actual_modifier = match &agg.modifier { + Some(LabelModifier::Include(_)) => "by", + Some(LabelModifier::Exclude(_)) => "without", + None => "null", + }; + debug!("Actual aggregation modifier: '{}'", actual_modifier); + + // Only validate if pattern explicitly requires a specific modifier (not null) + if !pattern_modifier_value.is_null() { + debug!("Pattern requires specific modifier, validating..."); + match pattern_modifier_value { + Value::String(expected_modifier) => { + debug!( + "Expected modifier: '{}', actual: '{}'", + expected_modifier, actual_modifier + ); + if actual_modifier != expected_modifier { + debug!("Modifier mismatch - FAILED"); + return false; + } + debug!("Modifier match - SUCCESS"); + } + _ => { + debug!("Invalid pattern modifier format - FAILED"); + return false; + } + } + } else { + debug!("Pattern modifier is null, allowing any query modifier (wildcard)"); + } + } else { + debug!("No modifier pattern found, allowing any query modifier"); + } + + debug!("=== AGGREGATION MATCHING SUCCESS ==="); + + // Extract and store token data if this node should be collected + if let Some(Value::String(collect_as)) = pattern.get("_collect_as") { + debug!("Collecting aggregation token as: {}", collect_as); + let modifier = match &agg.modifier { + Some(LabelModifier::Include(labels)) => Some(AggregationModifier { + modifier_type: "by".to_string(), + labels: labels.labels.clone(), + }), + Some(LabelModifier::Exclude(labels)) => Some(AggregationModifier { + modifier_type: "without".to_string(), + labels: labels.labels.clone(), + }), + None => None, + }; + + let aggregation_token = AggregationToken { + op: agg.op.to_string(), + modifier, + param: agg.param.as_ref().map(|p| Self::expr_to_string(p)), + }; + + let token_data = TokenData { + metric: None, + function: None, + aggregation: Some(aggregation_token), + range_vector: None, + subquery: None, + binary_op: None, + number: None, + }; + + tokens.insert(collect_as.clone(), token_data); + } + + true + } + + /// Match a MatrixSelector node against pattern + fn match_matrix_selector( + &self, + ms: &promql_parser::parser::MatrixSelector, + pattern: &HashMap, + tokens: &mut HashMap, + ) -> bool { + // Check the inner vector selector + if let Some(vs_pattern_value) = pattern.get("vector_selector") { + if let Some(vs_pattern) = vs_pattern_value.as_object() { + let vs_pattern_map: HashMap = vs_pattern + .clone() + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); + + if !self.match_metric_selector(&ms.vs, &vs_pattern_map, tokens) { + return false; + } + } + } + + // Extract and store token data if this node should be collected + if let Some(Value::String(collect_as)) = pattern.get("_collect_as") { + // Convert std::time::Duration to chrono::Duration and store directly + let chrono_dur = Duration::from_std(ms.range) + .map_err(|_| Duration::zero()) + .unwrap(); + + let range_token = RangeToken { + range: chrono_dur, + offset: ms.vs.offset.as_ref().map(|offset| format!("{:?}", offset)), + }; + + let token_data = TokenData { + metric: None, + function: None, + aggregation: None, + range_vector: Some(range_token), + subquery: None, + binary_op: None, + number: None, + }; + + tokens.insert(collect_as.clone(), token_data); + } + + true + } + + ///// Normalize duration to standard PromQL format (prefer larger units when possible) + // fn normalize_duration_string(duration: &std::time::Duration) -> String { + // let secs = duration.as_secs(); + + // // Convert to the most appropriate unit, preferring larger units when possible + // if secs >= 3600 && secs % 3600 == 0 { + // format!("{}h", secs / 3600) + // } else if secs >= 60 && secs % 60 == 0 { + // format!("{}m", secs / 60) + // } else if secs > 0 { + // format!("{secs}s") + // } else { + // // Handle sub-second durations + // let millis = duration.as_millis(); + // if millis > 0 { + // format!("{millis}ms") + // } else { + // "0s".to_string() + // } + // } + // } + + /// Match a Binary expression node against pattern + fn match_binary_operation( + &self, + bin_op: &promql_parser::parser::BinaryExpr, + pattern: &HashMap, + tokens: &mut HashMap, + ) -> bool { + // Check operation type + if let Some(Value::String(expected_op)) = pattern.get("op") { + if bin_op.op.to_string() != *expected_op { + return false; + } + } + + // Check left and right expressions recursively + if let Some(left_pattern_value) = pattern.get("left") { + if let Some(left_pattern) = left_pattern_value.as_object() { + let left_pattern_map: HashMap = left_pattern + .clone() + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); + + if !self.matches_recursive(&bin_op.lhs, Some(&left_pattern_map), tokens) { + return false; + } + } + } + + if let Some(right_pattern_value) = pattern.get("right") { + if let Some(right_pattern) = right_pattern_value.as_object() { + let right_pattern_map: HashMap = right_pattern + .clone() + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); + + if !self.matches_recursive(&bin_op.rhs, Some(&right_pattern_map), tokens) { + return false; + } + } + } + + // Extract and store token data if this node should be collected + if let Some(Value::String(collect_as)) = pattern.get("_collect_as") { + let binary_token = BinaryOpToken { + op: bin_op.op.to_string(), + matching: None, // TODO: Add vector matching support + }; + + let token_data = TokenData { + metric: None, + function: None, + aggregation: None, + range_vector: None, + subquery: None, + binary_op: Some(binary_token), + number: None, + }; + + tokens.insert(collect_as.clone(), token_data); + } + + true + } + + /// Match a NumberLiteral node against pattern + fn match_number_literal( + &self, + num: &promql_parser::parser::NumberLiteral, + pattern: &HashMap, + tokens: &mut HashMap, + ) -> bool { + // Check value if specified in pattern + if let Some(Value::Number(expected_value)) = pattern.get("value") { + if let Some(expected_f64) = expected_value.as_f64() { + if (num.val - expected_f64).abs() > f64::EPSILON { + return false; + } + } + } + + // Extract and store token data if this node should be collected + if let Some(Value::String(collect_as)) = pattern.get("_collect_as") { + let number_token = NumberToken { value: num.val }; + + let token_data = TokenData { + metric: None, + function: None, + aggregation: None, + range_vector: None, + subquery: None, + binary_op: None, + number: Some(number_token), + }; + + tokens.insert(collect_as.clone(), token_data); + } + + true + } + + /// Match a SubqueryExpr node against pattern + fn match_subquery( + &self, + subquery: &SubqueryExpr, + pattern: &HashMap, + tokens: &mut HashMap, + ) -> bool { + // Check inner expression recursively + if let Some(expr_pattern_value) = pattern.get("expr") { + if let Some(expr_pattern) = expr_pattern_value.as_object() { + let expr_pattern_map: HashMap = expr_pattern + .clone() + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); + + if !self.matches_recursive(&subquery.expr, Some(&expr_pattern_map), tokens) { + return false; + } + } + } + + // Extract and store token data if this node should be collected + if let Some(Value::String(collect_as)) = pattern.get("_collect_as") { + // Convert std::time::Duration to chrono::Duration and store + let chrono_dur = Duration::from_std(subquery.range) + .map_err(|_| Duration::zero()) + .unwrap(); + + let subquery_token = SubqueryToken { + range: chrono_dur, + offset: subquery + .offset + .as_ref() + .map(|offset| format!("{:?}", offset)), + step: subquery.step.as_ref().map(|step| format!("{:?}", step)), + }; + + let token_data = TokenData { + metric: None, + function: None, + aggregation: None, + range_vector: None, + subquery: Some(subquery_token), + binary_op: None, + number: None, + }; + + tokens.insert(collect_as.clone(), token_data); + } + + true + } +} + +/// Token data extracted from AST nodes - pattern matching system +#[derive(Debug, Clone, Serialize)] +pub struct TokenData { + pub metric: Option, + pub function: Option, + pub aggregation: Option, + pub range_vector: Option, + pub subquery: Option, + pub binary_op: Option, + pub number: Option, +} + +#[derive(Debug, Clone, Serialize)] +pub struct MetricToken { + pub name: String, + pub labels: HashMap, + // seconds since UNIX_EPOCH + pub at_modifier: Option, + #[serde(skip_serializing, skip_deserializing)] + pub ast: Option, +} + +#[derive(Debug, Clone, Serialize)] +pub struct FunctionToken { + pub name: String, + pub args: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct AggregationToken { + pub op: String, + pub modifier: Option, + pub param: Option, +} + +#[derive(Debug, Clone, Serialize)] +pub struct RangeToken { + pub range: Duration, + pub offset: Option, +} + +#[derive(Debug, Clone, Serialize)] +pub struct SubqueryToken { + pub range: Duration, + pub offset: Option, + pub step: Option, +} + +#[derive(Debug, Clone, Serialize)] +pub struct BinaryOpToken { + pub op: String, + pub matching: Option, +} + +#[derive(Debug, Clone, Serialize)] +pub struct NumberToken { + pub value: f64, +} + +#[derive(Debug, Clone, Serialize)] +pub struct VectorMatching { + pub card: String, // "one-to-one", "one-to-many", "many-to-one" + pub on: Vec, + pub ignoring: Vec, + pub group_left: Vec, + pub group_right: Vec, +} + +/// Match result with token-based extraction +#[derive(Debug, Clone)] +pub struct PromQLMatchResult { + pub matches: bool, + pub tokens: HashMap, +} + +impl PromQLMatchResult { + /// Create a new empty result + pub fn new() -> Self { + Self { + matches: false, + tokens: HashMap::new(), + } + } + + /// Create a successful match result with tokens + pub fn with_tokens(tokens: HashMap) -> Self { + Self { + matches: true, + tokens, + } + } + + /// Get metric name from tokens + pub fn get_metric_name(&self) -> Option { + self.tokens + .get("metric")? + .metric + .as_ref() + .map(|m| m.name.clone()) + } + + /// Get function name from tokens + pub fn get_function_name(&self) -> Option { + self.tokens + .get("function")? + .function + .as_ref() + .map(|f| f.name.clone()) + } + + /// Get aggregation operation from tokens + pub fn get_aggregation_op(&self) -> Option { + self.tokens + .get("aggregation")? + .aggregation + .as_ref() + .map(|a| a.op.clone()) + } + + /// Get range duration from tokens as chrono::Duration + pub fn get_range_duration(&self) -> Option { + self.tokens + .get("range_vector")? + .range_vector + .as_ref() + .map(|r| r.range) + } +} + +impl Default for PromQLMatchResult { + fn default() -> Self { + Self::new() + } +} + +/// Represents aggregation modifiers like "by" or "without" +#[derive(Debug, Clone, Serialize)] +pub struct AggregationModifier { + pub modifier_type: String, // "by" or "without" + pub labels: Vec, +} + +impl AggregationModifier { + /// Create a new AggregationModifier + pub fn new(modifier_type: String, labels: Vec) -> Self { + Self { + modifier_type, + labels, + } + } + + // /// Check if a function name represents a temporal function + // fn is_temporal_function(&self, function_name: &str) -> bool { + // matches!( + // function_name, + // "rate" + // | "increase" + // | "sum_over_time" + // | "min_over_time" + // | "max_over_time" + // | "avg_over_time" + // | "count_over_time" + // | "quantile_over_time" + // | "stddev_over_time" + // | "stdvar_over_time" + // | "last_over_time" + // | "present_over_time" + // ) + // } + + // /// Extract label filters from matchers + // fn extract_label_filters(&self, matchers: &Matchers) -> HashMap { + // let mut filters = HashMap::new(); + + // for matcher in &matchers.matchers { + // // For now, only handle exact equality matches + // if matcher.op == MatchOp::Equal { + // filters.insert(matcher.name.clone(), matcher.value.clone()); + // } + // } + + // filters + // } + + // /// Convert Duration to string representation in PromQL format + // fn duration_to_string(&self, duration: &std::time::Duration) -> String { + // let secs = duration.as_secs(); + + // // Convert to the most appropriate unit, preferring larger units when possible + // if secs >= 3600 && secs % 3600 == 0 { + // format!("{}h", secs / 3600) + // } else if secs >= 60 && secs % 60 == 0 { + // format!("{}m", secs / 60) + // } else if secs > 0 { + // format!("{secs}s") + // } else { + // // Handle sub-second durations + // let millis = duration.as_millis(); + // if millis > 0 { + // format!("{millis}ms") + // } else { + // "0s".to_string() + // } + // } + // } +} diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern_builder.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern_builder.rs new file mode 100644 index 0000000..5a45a92 --- /dev/null +++ b/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern_builder.rs @@ -0,0 +1,238 @@ +use serde_json::Value; +use std::collections::HashMap; +use tracing::debug; + +/// PromQL Pattern Builder for creating PromQL-based patterns +/// This mirrors the Python PromQLPatternBuilder class +pub struct PromQLPatternBuilder; + +impl PromQLPatternBuilder { + /// Create a pattern for any node type + pub fn any() -> Option> { + debug!("Creating wildcard pattern (any)"); + None + } + + /// Create a binary operation pattern (BinaryExpr) + pub fn binary_op( + op: &str, + left: Option>, + right: Option>, + collect_as: Option<&str>, + ) -> Option> { + debug!("Creating binary operation pattern for op: {}", op); + let mut pattern = HashMap::new(); + pattern.insert("type".to_string(), Value::String("BinaryExpr".to_string())); + pattern.insert("op".to_string(), Value::String(op.to_string())); + pattern.insert("left".to_string(), serde_json::to_value(left).unwrap()); + pattern.insert("right".to_string(), serde_json::to_value(right).unwrap()); + + match collect_as { + Some(collect) => pattern.insert( + "_collect_as".to_string(), + Value::String(collect.to_string()), + ), + None => pattern.insert("_collect_as".to_string(), Value::Null), + }; + + Some(pattern) + } + + /// Create a metric pattern (VectorSelector) + pub fn metric( + name: Option<&str>, + labels: Option>, + at_modifier: Option<&str>, + collect_as: Option<&str>, + ) -> Option> { + debug!("Creating metric pattern for name: {:?}", name); + let mut pattern = HashMap::new(); + pattern.insert( + "type".to_string(), + Value::String("VectorSelector".to_string()), + ); + + match name { + Some(n) => pattern.insert("name".to_string(), Value::String(n.to_string())), + None => pattern.insert("name".to_string(), Value::Null), + }; + + match labels { + Some(l) => { + let labels_value = serde_json::to_value(l).unwrap(); + pattern.insert("matchers".to_string(), labels_value) + } + None => pattern.insert("matchers".to_string(), Value::Null), + }; + + match at_modifier { + Some(a) => pattern.insert("at".to_string(), Value::String(a.to_string())), + None => pattern.insert("at".to_string(), Value::Null), + }; + + match collect_as { + Some(c) => pattern.insert("_collect_as".to_string(), Value::String(c.to_string())), + None => pattern.insert("_collect_as".to_string(), Value::Null), + }; + + Some(pattern) + } + + /// Create a function pattern (Call) + pub fn function( + names: Vec<&str>, + args: Vec>>, + collect_as: Option<&str>, + collect_args_as: Option<&str>, + ) -> Option> { + debug!("Creating function pattern for names: {:?}", names); + let mut pattern = HashMap::new(); + pattern.insert("type".to_string(), Value::String("Call".to_string())); + + let mut func = HashMap::new(); + func.insert("type".to_string(), Value::String("Function".to_string())); + func.insert( + "name".to_string(), + Value::Array(names.iter().map(|n| Value::String(n.to_string())).collect()), + ); + + pattern.insert("func".to_string(), serde_json::to_value(func).unwrap()); + pattern.insert("args".to_string(), serde_json::to_value(args).unwrap()); + + match collect_args_as { + Some(c) => pattern.insert("_collect_args_as".to_string(), Value::String(c.to_string())), + None => pattern.insert("_collect_args_as".to_string(), Value::Null), + }; + + match collect_as { + Some(c) => pattern.insert("_collect_as".to_string(), Value::String(c.to_string())), + None => pattern.insert("_collect_as".to_string(), Value::Null), + }; + + Some(pattern) + } + + /// Create a subquery pattern (SubqueryExpr) + pub fn subquery( + expr: Option>, + duration: Option<&str>, + collect_as: Option<&str>, + ) -> Option> { + let mut pattern = HashMap::new(); + pattern.insert( + "type".to_string(), + Value::String("SubqueryExpr".to_string()), + ); + pattern.insert("expr".to_string(), serde_json::to_value(expr).unwrap()); + + match duration { + Some(d) => pattern.insert("range".to_string(), Value::String(d.to_string())), + None => pattern.insert("range".to_string(), Value::Null), + }; + + // Initialize step and offset as null, matching Python implementation + pattern.insert("step".to_string(), Value::Null); + pattern.insert("offset".to_string(), Value::Null); + + match collect_as { + Some(c) => pattern.insert("_collect_as".to_string(), Value::String(c.to_string())), + None => pattern.insert("_collect_as".to_string(), Value::Null), + }; + + Some(pattern) + } + + /// Create a matrix selector pattern (MatrixSelector) + pub fn matrix_selector( + vector_selector: Option>, + range: Option<&str>, + collect_as: Option<&str>, + ) -> Option> { + let mut pattern = HashMap::new(); + pattern.insert( + "type".to_string(), + Value::String("MatrixSelector".to_string()), + ); + pattern.insert( + "vector_selector".to_string(), + serde_json::to_value(vector_selector).unwrap(), + ); + + match range { + Some(r) => pattern.insert("range".to_string(), Value::String(r.to_string())), + None => pattern.insert("range".to_string(), Value::Null), + }; + + match collect_as { + Some(c) => pattern.insert("_collect_as".to_string(), Value::String(c.to_string())), + None => pattern.insert("_collect_as".to_string(), Value::Null), + }; + + Some(pattern) + } + + /// Create an aggregation pattern (AggregateExpr) + pub fn aggregation( + ops: Vec<&str>, + expr: Option>, + param: Option>, + by_labels: Option>, + without_labels: Option>, + collect_as: Option<&str>, + ) -> Option> { + let mut pattern = HashMap::new(); + pattern.insert( + "type".to_string(), + Value::String("AggregateExpr".to_string()), + ); + pattern.insert( + "op".to_string(), + Value::Array(ops.iter().map(|op| Value::String(op.to_string())).collect()), + ); + pattern.insert("expr".to_string(), serde_json::to_value(expr).unwrap()); + + match param { + Some(p) => pattern.insert("param".to_string(), serde_json::to_value(p).unwrap()), + None => pattern.insert("param".to_string(), Value::Null), + }; + + // Use single "modifier" field to match Python format + let modifier_value = match (by_labels, without_labels) { + (Some(_), None) => Value::String("by".to_string()), + (None, Some(_)) => Value::String("without".to_string()), + _ => Value::Null, + }; + pattern.insert("modifier".to_string(), modifier_value); + + match collect_as { + Some(c) => pattern.insert("_collect_as".to_string(), Value::String(c.to_string())), + None => pattern.insert("_collect_as".to_string(), Value::Null), + }; + + Some(pattern) + } + + /// Create a number literal pattern + pub fn number(value: Option, collect_as: Option<&str>) -> Option> { + let mut pattern = HashMap::new(); + pattern.insert( + "type".to_string(), + Value::String("NumberLiteral".to_string()), + ); + + match value { + Some(v) => pattern.insert( + "value".to_string(), + Value::Number(serde_json::Number::from_f64(v).unwrap()), + ), + None => pattern.insert("value".to_string(), Value::Null), + }; + + match collect_as { + Some(c) => pattern.insert("_collect_as".to_string(), Value::String(c.to_string())), + None => pattern.insert("_collect_as".to_string(), Value::Null), + }; + + Some(pattern) + } +} diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern_factory.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern_factory.rs new file mode 100644 index 0000000..d8181ec --- /dev/null +++ b/CommonDependencies/dependencies/rs/promql_utilities/src/ast_matching/promql_pattern_factory.rs @@ -0,0 +1,123 @@ +//use crate::ast_matching::{PromQLPattern, PromQLPatternBuilder}; +//use tracing::debug; +// +///// Pattern factory for creating common PromQL patterns +//pub struct PromQLPatternFactory; +// +//impl PromQLPatternFactory { +// /// Create pattern for OnlyTemporal queries (e.g., rate(metric[5m])) +// pub fn only_temporal_pattern() -> PromQLPattern { +// debug!("Creating only temporal pattern"); +// let ms = PromQLPatternBuilder::matrix_selector( +// PromQLPatternBuilder::metric(None, None, None, Some("metric")), +// None, +// Some("range_vector"), +// ); +// +// let func_args: Vec>> = vec![ms]; +// +// let pattern = PromQLPatternBuilder::function( +// vec![ +// "rate", +// "increase", +// "sum_over_time", +// "avg_over_time", +// "min_over_time", +// "max_over_time", +// "count_over_time", +// ], +// func_args, +// Some("function"), +// None, +// ); +// +// PromQLPattern::new( +// pattern, +// //vec![ +// // "metric".to_string(), +// // "function".to_string(), +// // "range_vector".to_string(), +// //], +// // QueryPatternType::OnlyTemporal, +// ) +// } +// +// /// Create pattern for OnlySpatial queries (e.g., sum(metric) by (label)) +// pub fn only_spatial_pattern() -> PromQLPattern { +// debug!("Creating only spatial pattern"); +// let metric = PromQLPatternBuilder::metric(None, None, None, Some("metric")); +// +// let pattern = PromQLPatternBuilder::aggregation( +// vec!["sum", "count", "avg", "min", "max", "quantile"], +// metric, +// None, +// None, +// None, +// Some("aggregation"), +// ); +// +// PromQLPattern::new( +// pattern, +// //vec!["metric".to_string(), "aggregation".to_string()], +// // QueryPatternType::OnlySpatial, +// ) +// } +// +// /// Create pattern for OneTemporalOneSpatial queries (e.g., sum(rate(metric[5m])) by (label)) +// pub fn one_temporal_one_spatial_pattern() -> PromQLPattern { +// debug!("Creating one temporal one spatial pattern"); +// let ms2 = PromQLPatternBuilder::matrix_selector( +// PromQLPatternBuilder::metric(None, None, None, Some("metric")), +// None, +// Some("range_vector"), +// ); +// +// let func_args2: Vec>> = +// vec![ms2]; +// +// let temporal_part = PromQLPatternBuilder::function( +// vec![ +// "rate", +// "increase", +// "sum_over_time", +// "avg_over_time", +// "min_over_time", +// "max_over_time", +// "count_over_time", +// ], +// func_args2, +// Some("function"), +// None, +// ); +// +// let pattern = PromQLPatternBuilder::aggregation( +// vec!["sum", "count", "avg", "min", "max", "quantile"], +// temporal_part, +// None, +// None, +// None, +// Some("aggregation"), +// ); +// +// PromQLPattern::new( +// pattern, +// //vec![ +// // "metric".to_string(), +// // "function".to_string(), +// // "range_vector".to_string(), +// // "aggregation".to_string(), +// //], +// // QueryPatternType::OneTemporalOneSpatial, +// ) +// } +// +// /// Get all standard patterns +// pub fn get_all_patterns() -> Vec { +// debug!("Getting all standard patterns"); +// vec![ +// Self::one_temporal_one_spatial_pattern(), +// Self::only_temporal_pattern(), +// Self::only_spatial_pattern(), +// ] +// } +//} diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/data_model/key_by_label_names.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/data_model/key_by_label_names.rs new file mode 100644 index 0000000..df4f0f0 --- /dev/null +++ b/CommonDependencies/dependencies/rs/promql_utilities/src/data_model/key_by_label_names.rs @@ -0,0 +1,134 @@ +use serde::{Deserialize, Serialize}; +use tracing::debug; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct KeyByLabelNames { + pub labels: Vec, // Renamed from label_names to match query_logics usage +} + +impl KeyByLabelNames { + pub fn new(label_names: Vec) -> Self { + debug!("Creating KeyByLabelNames with {} labels", label_names.len()); + let mut sorted_names = label_names; + sorted_names.sort(); // Match Python behavior - keys are sorted + debug!("Sorted labels: {:?}", sorted_names); + Self { + labels: sorted_names, + } + } + + pub fn empty() -> Self { + Self::new(Vec::new()) + } + + pub fn from_names(names: Vec) -> Self { + Self::new(names) + } + + pub fn push(&mut self, name: String) { + debug!("Adding label: {}", name); + self.labels.push(name); + self.labels.sort(); // Keep sorted + } + + /// Set difference operation - remove labels that are in the other set + /// Based on Python implementation: KeyByLabelNames.__sub__ + pub fn difference(&self, other: &KeyByLabelNames) -> KeyByLabelNames { + debug!( + "Computing difference between {:?} and {:?}", + self.labels, other.labels + ); + let other_set: std::collections::HashSet<_> = other.labels.iter().collect(); + let result: Vec = self + .labels + .iter() + .filter(|label| !other_set.contains(label)) + .cloned() + .collect(); + KeyByLabelNames::new(result) + } + + /// Set union operation - combine labels from both sets + /// Based on Python implementation: KeyByLabelNames.__add__ + pub fn union(&self, other: &KeyByLabelNames) -> KeyByLabelNames { + debug!( + "Computing union between {:?} and {:?}", + self.labels, other.labels + ); + let mut combined = std::collections::HashSet::new(); + for label in &self.labels { + combined.insert(label.clone()); + } + for label in &other.labels { + combined.insert(label.clone()); + } + KeyByLabelNames::new(combined.into_iter().collect()) + } + + pub fn serialize_to_json(&self) -> serde_json::Value { + serde_json::to_value(&self.labels).unwrap_or(serde_json::Value::Null) + } + + pub fn deserialize_from_json(data: &serde_json::Value) -> Result { + let names: Vec = serde_json::from_value(data.clone())?; + Ok(Self::new(names)) + } + + pub fn is_empty(&self) -> bool { + self.labels.is_empty() + } + + pub fn len(&self) -> usize { + self.labels.len() + } +} + +impl Default for KeyByLabelNames { + fn default() -> Self { + Self::empty() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_key_by_label_names() { + let key = KeyByLabelNames::new(vec!["instance".to_string(), "job".to_string()]); + + assert_eq!(key.len(), 2); + assert_eq!(key.labels, vec!["instance".to_string(), "job".to_string()]); + + let mut key = KeyByLabelNames::new(vec!["instance".to_string(), "job".to_string()]); + key.push("new_label".to_string()); + assert_eq!(key.len(), 3); + // After sorting, should be in alphabetical order + assert!(key.labels.contains(&"instance".to_string())); + assert!(key.labels.contains(&"job".to_string())); + assert!(key.labels.contains(&"new_label".to_string())); + } + + #[test] + fn test_difference() { + let key1 = KeyByLabelNames::new(vec!["a".to_string(), "b".to_string(), "c".to_string()]); + let key2 = KeyByLabelNames::new(vec!["b".to_string(), "c".to_string()]); + + let diff = key1.difference(&key2); + assert_eq!(diff.len(), 1); + assert_eq!(diff.labels, vec!["a".to_string()]); + } + + #[test] + fn test_union() { + let key1 = KeyByLabelNames::new(vec!["a".to_string(), "b".to_string()]); + let key2 = KeyByLabelNames::new(vec!["b".to_string(), "c".to_string()]); + + let union = key1.union(&key2); + assert_eq!(union.len(), 3); + assert_eq!( + union.labels, + vec!["a".to_string(), "b".to_string(), "c".to_string()] + ); + } +} diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/data_model/mod.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/data_model/mod.rs new file mode 100644 index 0000000..f587f43 --- /dev/null +++ b/CommonDependencies/dependencies/rs/promql_utilities/src/data_model/mod.rs @@ -0,0 +1,3 @@ +pub mod key_by_label_names; + +pub use key_by_label_names::*; diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/lib.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/lib.rs new file mode 100644 index 0000000..5de6fa3 --- /dev/null +++ b/CommonDependencies/dependencies/rs/promql_utilities/src/lib.rs @@ -0,0 +1,7 @@ +pub mod ast_matching; +pub mod data_model; +pub mod query_logics; + +pub use ast_matching::*; +pub use data_model::*; +pub use query_logics::*; diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/enums.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/enums.rs new file mode 100644 index 0000000..7369f79 --- /dev/null +++ b/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/enums.rs @@ -0,0 +1,142 @@ +use serde::{Deserialize, Serialize}; +use tracing::debug; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub enum QueryPatternType { + OnlyTemporal, + OnlySpatial, + OneTemporalOneSpatial, +} + +impl std::fmt::Display for QueryPatternType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + debug!("Formatting QueryPatternType: {:?}", self); + match self { + QueryPatternType::OnlyTemporal => write!(f, "only_temporal"), + QueryPatternType::OnlySpatial => write!(f, "only_spatial"), + QueryPatternType::OneTemporalOneSpatial => write!(f, "one_temporal_one_spatial"), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub enum QueryTreatmentType { + Exact, + Approximate, +} + +impl std::fmt::Display for QueryTreatmentType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + debug!("Formatting QueryTreatmentType: {:?}", self); + match self { + QueryTreatmentType::Exact => write!(f, "exact"), + QueryTreatmentType::Approximate => write!(f, "approximate"), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub enum Statistic { + Count, + Sum, + Cardinality, + Increase, + Rate, + Min, + Max, + Quantile, + Topk, +} + +impl std::fmt::Display for Statistic { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + debug!("Formatting Statistic: {:?}", self); + match self { + Statistic::Count => write!(f, "count"), + Statistic::Sum => write!(f, "sum"), + Statistic::Cardinality => write!(f, "cardinality"), + Statistic::Increase => write!(f, "increase"), + Statistic::Rate => write!(f, "rate"), + Statistic::Min => write!(f, "min"), + Statistic::Max => write!(f, "max"), + Statistic::Quantile => write!(f, "quantile"), + Statistic::Topk => write!(f, "topk"), + } + } +} + +#[allow(clippy::should_implement_trait)] +impl Statistic { + pub fn from_str(s: &str) -> Option { + debug!("Parsing Statistic from string: {}", s); + match s.to_lowercase().as_str() { + "count" => Some(Statistic::Count), + "sum" => Some(Statistic::Sum), + "cardinality" => Some(Statistic::Cardinality), + "increase" => Some(Statistic::Increase), + "rate" => Some(Statistic::Rate), + "min" => Some(Statistic::Min), + "max" => Some(Statistic::Max), + "quantile" => Some(Statistic::Quantile), + "topk" => Some(Statistic::Topk), + _ => None, + } + } +} + +impl std::str::FromStr for Statistic { + type Err = (); + + /// Parse a statistic from a string (case-insensitive). + /// Use `s.parse::()` or `Statistic::from_str(s)`. + fn from_str(s: &str) -> Result { + debug!("FromStr trait parsing Statistic: {}", s); + Statistic::from_str(s).ok_or(()) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub enum QueryResultType { + InstantVector, + RangeVector, +} + +impl std::fmt::Display for QueryResultType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + debug!("Formatting QueryResultType: {:?}", self); + match self { + QueryResultType::InstantVector => write!(f, "instant_vector"), + QueryResultType::RangeVector => write!(f, "range_vector"), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_query_treatment_type_display() { + assert_eq!(QueryTreatmentType::Exact.to_string(), "exact"); + assert_eq!(QueryTreatmentType::Approximate.to_string(), "approximate"); + } + + #[test] + fn test_query_treatment_type_serialization() { + let exact = QueryTreatmentType::Exact; + let approximate = QueryTreatmentType::Approximate; + + // Test that they can be serialized/deserialized + let exact_str = serde_json::to_string(&exact).unwrap(); + let approximate_str = serde_json::to_string(&approximate).unwrap(); + + assert_eq!(exact_str, "\"Exact\""); + assert_eq!(approximate_str, "\"Approximate\""); + + let exact_back: QueryTreatmentType = serde_json::from_str(&exact_str).unwrap(); + let approximate_back: QueryTreatmentType = serde_json::from_str(&approximate_str).unwrap(); + + assert_eq!(exact_back, QueryTreatmentType::Exact); + assert_eq!(approximate_back, QueryTreatmentType::Approximate); + } +} diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/logics.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/logics.rs new file mode 100644 index 0000000..90dd913 --- /dev/null +++ b/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/logics.rs @@ -0,0 +1,169 @@ +use crate::query_logics::enums::{QueryTreatmentType, Statistic}; +use tracing::debug; + +/// Map statistic to precompute operator based on treatment type +/// This mirrors the Python implementation's logic +pub fn map_statistic_to_precompute_operator( + statistic: Statistic, + treatment_type: QueryTreatmentType, +) -> Result<(String, String), String> { + debug!( + "Mapping statistic {:?} with treatment type {:?} to precompute operator", + statistic, treatment_type + ); + match statistic { + Statistic::Quantile => { + if treatment_type == QueryTreatmentType::Exact { + Err("Statistic Quantile cannot be computed exactly".to_string()) + } else { + Ok(("DatasketchesKLL".to_string(), "".to_string())) + //Ok(("HydraKLL".to_string(), "".to_string())) + } + } + Statistic::Min | Statistic::Max => { + if treatment_type == QueryTreatmentType::Approximate { + Ok(("DatasketchesKLL".to_string(), "".to_string())) + //Ok(("HydraKLL".to_string(), "".to_string())) + } else { + Ok(( + "MultipleMinMax".to_string(), + statistic.to_string().to_lowercase(), + )) + } + } + Statistic::Sum | Statistic::Count => { + if treatment_type == QueryTreatmentType::Approximate { + Ok(( + "CountMinSketch".to_string(), + statistic.to_string().to_lowercase(), + )) + } else { + Ok(( + "MultipleSum".to_string(), + statistic.to_string().to_lowercase(), + )) + } + } + Statistic::Rate | Statistic::Increase => { + Ok(("MultipleIncrease".to_string(), "".to_string())) + } + _ => Err(format!("Statistic {statistic:?} not supported")), + } +} + +/// Check if a precompute operator supports subpopulations (multiple keys) +pub fn does_precompute_operator_support_subpopulations( + statistic: Statistic, + precompute_operator: &str, +) -> bool { + debug!( + "Checking if precompute operator '{}' supports subpopulations for statistic {:?}", + precompute_operator, statistic + ); + match precompute_operator { + // Single-key operators + "Increase" | "MinMax" | "Sum" | "DatasketchesKLL" => false, + + // Multi-key operators + "MultipleIncrease" | "MultipleMinMax" | "MultipleSum" | "HydraKLL" => true, + + // CountMinSketch supports subpopulations only for certain statistics + "CountMinSketch" => matches!(statistic, Statistic::Sum | Statistic::Count), + + // "CountMinSketchWithHeap" is only supported for Topk + // Other usages of CountMinSketchWithHeap will fall through. + "CountMinSketchWithHeap" if matches!(statistic, Statistic::Topk) => false, + + // Default: not supported + _ => panic!("Unexpected precompute operator: {}", precompute_operator), + } +} + +/// Check if temporal and spatial aggregations are collapsible +/// Based on Python implementation in promql_utilities/query_logics/logics.py +pub fn get_is_collapsable(temporal_aggregation: &str, spatial_aggregation: &str) -> bool { + debug!( + "Checking if temporal aggregation '{}' and spatial aggregation '{}' are collapsable", + temporal_aggregation, spatial_aggregation + ); + match spatial_aggregation { + "sum" => matches!( + temporal_aggregation, + "sum_over_time" | "count_over_time" // Note: "increase" and "rate" are commented out in Python + ), + "min" => temporal_aggregation == "min_over_time", + "max" => temporal_aggregation == "max_over_time", + _ => false, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_map_statistic_to_precompute_operator() { + // Test exact sum + let result = + map_statistic_to_precompute_operator(Statistic::Sum, QueryTreatmentType::Exact) + .unwrap(); + assert_eq!(result, ("MultipleSum".to_string(), "sum".to_string())); + + // Test approximate sum + let result = + map_statistic_to_precompute_operator(Statistic::Sum, QueryTreatmentType::Approximate) + .unwrap(); + assert_eq!(result, ("CountMinSketch".to_string(), "sum".to_string())); + + // Test exact quantile (should fail) + let result = + map_statistic_to_precompute_operator(Statistic::Quantile, QueryTreatmentType::Exact); + assert!(result.is_err()); + + // Test approximate quantile + let result = map_statistic_to_precompute_operator( + Statistic::Quantile, + QueryTreatmentType::Approximate, + ) + .unwrap(); + assert_eq!(result, ("DatasketchesKLL".to_string(), "".to_string())); + //assert_eq!(result, ("HydraKLL".to_string(), "".to_string())); + } + + #[test] + fn test_does_precompute_operator_support_subpopulations() { + // Test MultipleSum supports subpopulations + assert!(does_precompute_operator_support_subpopulations( + Statistic::Sum, + "MultipleSum" + )); + + // Test DatasketchesKLL does not support subpopulations + assert!(!does_precompute_operator_support_subpopulations( + Statistic::Quantile, + "DatasketchesKLL" + )); + + // Test HydraKLL supports subpopulations + assert!(does_precompute_operator_support_subpopulations( + Statistic::Quantile, + "HydraKLL" + )); + + // Test CountMinSketch with valid statistic + assert!(does_precompute_operator_support_subpopulations( + Statistic::Sum, + "CountMinSketch" + )); + } + + #[test] + fn test_get_is_collapsable() { + assert!(get_is_collapsable("sum_over_time", "sum")); + assert!(get_is_collapsable("count_over_time", "sum")); + assert!(get_is_collapsable("min_over_time", "min")); + assert!(get_is_collapsable("max_over_time", "max")); + assert!(!get_is_collapsable("min_over_time", "sum")); + assert!(!get_is_collapsable("unknown", "sum")); + } +} diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/mod.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/mod.rs new file mode 100644 index 0000000..f3a98d1 --- /dev/null +++ b/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/mod.rs @@ -0,0 +1,7 @@ +pub mod enums; +pub mod logics; +pub mod parsing; + +pub use enums::*; +pub use logics::*; +pub use parsing::*; diff --git a/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/parsing.rs b/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/parsing.rs new file mode 100644 index 0000000..c37bb48 --- /dev/null +++ b/CommonDependencies/dependencies/rs/promql_utilities/src/query_logics/parsing.rs @@ -0,0 +1,135 @@ +use core::panic; + +use promql_parser::parser::Expr; +use tracing::debug; + +use crate::ast_matching::PromQLMatchResult; +use crate::data_model::KeyByLabelNames; +use crate::query_logics::enums::{QueryPatternType, Statistic}; + +pub fn get_metric_and_spatial_filter(match_result: &PromQLMatchResult) -> (String, String) { + debug!("Extracting metric and spatial filter from match result"); + let mut metric_name = match_result.get_metric_name().unwrap_or_default(); + debug!("Initial metric name: {}", metric_name); + + let spatial_filter = if let Some(metric_token) = match_result + .tokens + .get("metric") + .and_then(|token| token.metric.as_ref()) + { + if let Some(ast_vs) = metric_token.ast.as_ref() { + // Render the VectorSelector AST to string and extract inner `{...}` content + // let ast_str = format!("{}", ast_vs); + let ast_str = Expr::from(ast_vs.clone()).prettify(); + if let Some(inner) = ast_str.split('{').nth(1).and_then(|s| s.split('}').next()) { + debug!("Found spatial filter content: {}", inner); + // Ensure metric_name does not include the selector part + metric_name = metric_name + .split('{') + .next() + .unwrap_or(&metric_name) + .to_string(); + debug!("Cleaned metric name: {}", metric_name); + inner.to_string() + } else { + String::new() + } + } else { + // No AST available -> return empty spatial filter (no fallback reconstruction) + String::new() + } + } else { + String::new() + }; + + debug!( + "Final result - metric: {}, spatial_filter: {}", + metric_name, spatial_filter + ); + (metric_name, spatial_filter) +} + +/// Get statistics to compute based on pattern type and tokens +pub fn get_statistics_to_compute( + pattern_type: QueryPatternType, + match_result: &PromQLMatchResult, +) -> Vec { + debug!("Computing statistics for pattern type {:?}", pattern_type); + let statistic_to_compute: Option = if pattern_type == QueryPatternType::OnlyTemporal + || pattern_type == QueryPatternType::OneTemporalOneSpatial + { + match_result.get_function_name().map(|function_name| { + let name = function_name.to_lowercase(); + name.split('_').next().unwrap_or(&name).to_string() + }) + } else if pattern_type == QueryPatternType::OnlySpatial { + match_result + .get_aggregation_op() + .map(|agg| agg.to_lowercase()) + } else { + panic!("Unsupported query pattern type"); + }; + + if let Some(statistic_to_compute) = statistic_to_compute { + debug!("Found statistic to compute: {}", statistic_to_compute); + if statistic_to_compute == "avg" { + vec![Statistic::Sum, Statistic::Count] + } else if let Ok(stat) = statistic_to_compute.parse::() { + vec![stat] + } else { + panic!("Unsupported statistic: {}", statistic_to_compute); + } + } else { + panic!("No statistic found in the query"); + } +} + +pub fn get_spatial_aggregation_output_labels( + match_result: &PromQLMatchResult, + all_labels: &KeyByLabelNames, +) -> KeyByLabelNames { + debug!("Getting spatial aggregation output labels"); + debug!("All labels: {:?}", all_labels); + // Match Python behaviour: assume aggregation token and modifier exist + // and raise (panic) if missing or invalid. "by" and "without" logic + // remain the same. + let aggregation_token = match_result + .tokens + .get("aggregation") + .and_then(|token| token.aggregation.as_ref()) + .expect("aggregation token missing"); + + // Patching: When the query is topk, we should always return all labels + if aggregation_token.op.to_lowercase() == "topk" { + debug!("Aggregation operation is 'topk', returning all labels"); + return all_labels.clone(); + } + + // Fixing issue https://github.com/ProjectASAP/asap-internal/issues/24 + let modifier: &crate::AggregationModifier = match aggregation_token.modifier.as_ref() { + Some(m) => m, + None => { + debug!("No aggregation modifier found, returning empty KeyByLabelNames"); + return KeyByLabelNames::new(vec![]); + } + }; + + debug!( + "Modifier type: {}, labels: {:?}", + modifier.modifier_type, modifier.labels + ); + match modifier.modifier_type.as_str() { + "by" => { + debug!("Processing 'by' modifier"); + // Return only the labels specified in "by" clause + KeyByLabelNames::new(modifier.labels.clone()) + } + "without" => { + debug!("Processing 'without' modifier"); + // Return all labels except those specified in "without" clause + let without_labels = KeyByLabelNames::new(modifier.labels.clone()); + all_labels.difference(&without_labels) + } + _ => panic!("Invalid aggregation modifier"), + } +} diff --git a/CommonDependencies/dependencies/rs/sketch_db_common/Cargo.lock b/CommonDependencies/dependencies/rs/sketch_db_common/Cargo.lock new file mode 100644 index 0000000..133a015 --- /dev/null +++ b/CommonDependencies/dependencies/rs/sketch_db_common/Cargo.lock @@ -0,0 +1,919 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "cactus" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbc26382d871df4b7442e3df10a9402bf3cf5e55cbd66f12be38861425f0564" + +[[package]] +name = "cc" +version = "1.2.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfgrammar" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf" +dependencies = [ + "indexmap", + "lazy_static", + "num-traits", + "regex", + "serde", + "vob", +] + +[[package]] +name = "chrono" +version = "0.4.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "clap" +version = "4.5.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "deranged" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "filetime" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db" +dependencies = [ + "cfg-if", + "libc", + "libredox", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "getopts" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "js-sys" +version = "0.3.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.182" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" + +[[package]] +name = "libredox" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" +dependencies = [ + "bitflags", + "libc", + "redox_syscall", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "lrlex" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c71364e868116ee891b0f93559eb9eca5675bec28b22d33c58481e66c3951d7e" +dependencies = [ + "cfgrammar", + "getopts", + "lazy_static", + "lrpar", + "num-traits", + "quote", + "regex", + "regex-syntax", + "serde", + "vergen", +] + +[[package]] +name = "lrpar" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b265a81193d94c92d1c9c715498d6fa505bce3f789ceecb24ab5d6fa2dbc71" +dependencies = [ + "bincode", + "cactus", + "cfgrammar", + "filetime", + "indexmap", + "lazy_static", + "lrtable", + "num-traits", + "packedvec", + "regex", + "serde", + "static_assertions", + "vergen", + "vob", +] + +[[package]] +name = "lrtable" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc36d15214ca997a5097845be1f932b7ee6125c36f5c5e55f6c49e027ddeb6de" +dependencies = [ + "cfgrammar", + "fnv", + "num-traits", + "serde", + "sparsevec", + "vob", +] + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "num-conv" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "packedvec" +version = "1.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a69e0a534dd2e6aefce319af62a0aa0066a76bdfcec0201dfe02df226bc9ec70" +dependencies = [ + "num-traits", + "serde", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "promql-parser" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60d851f6523a8215e2fbf86b6cef4548433f8b76092e9ffb607105de52ae63fd" +dependencies = [ + "cfgrammar", + "chrono", + "lazy_static", + "lrlex", + "lrpar", + "regex", +] + +[[package]] +name = "promql_utilities" +version = "0.1.0" +dependencies = [ + "chrono", + "promql-parser", + "serde", + "serde_json", + "thiserror", + "tracing", +] + +[[package]] +name = "quote" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35985aa610addc02e24fc232012c86fd11f14111180f902b67e2d5331f8ebf2b" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "sketch_db_common" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "promql_utilities", + "serde", + "serde_json", + "serde_yaml", +] + +[[package]] +name = "sparsevec" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68b4a8ce3045f0fe173fb5ae3c6b7dcfbec02bfa650bb8618b2301f52af0134d" +dependencies = [ + "num-traits", + "packedvec", + "serde", + "vob", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "itoa", + "libc", + "num-conv", + "num_threads", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "vergen" +version = "8.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2990d9ea5967266ea0ccf413a4aa5c42a93dbcfda9cb49a97de6931726b12566" +dependencies = [ + "anyhow", + "rustversion", + "time", +] + +[[package]] +name = "vob" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc936b5a7202a703aeaf7ce05e7931db2e0c8126813f97db3e9e06d867b0bb38" +dependencies = [ + "num-traits", + "serde", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/CommonDependencies/dependencies/rs/sketch_db_common/Cargo.toml b/CommonDependencies/dependencies/rs/sketch_db_common/Cargo.toml new file mode 100644 index 0000000..a70577c --- /dev/null +++ b/CommonDependencies/dependencies/rs/sketch_db_common/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "sketch_db_common" +version = "0.1.0" +edition = "2021" + +[dependencies] +promql_utilities = { path = "../promql_utilities" } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +serde_yaml = "0.9" +anyhow = "1.0" +clap = { version = "4.0", features = ["derive"] } diff --git a/CommonDependencies/dependencies/rs/sketch_db_common/src/aggregation_config.rs b/CommonDependencies/dependencies/rs/sketch_db_common/src/aggregation_config.rs new file mode 100644 index 0000000..20dcc2b --- /dev/null +++ b/CommonDependencies/dependencies/rs/sketch_db_common/src/aggregation_config.rs @@ -0,0 +1,387 @@ +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use serde_yaml; +use std::collections::HashMap; + +use crate::enums::QueryLanguage; +use crate::traits::SerializableToSink; +use crate::utils::normalize_spatial_filter; +use promql_utilities::data_model::KeyByLabelNames; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AggregationConfig { + pub aggregation_id: u64, + pub aggregation_type: String, + pub aggregation_sub_type: String, + pub parameters: HashMap, + pub grouping_labels: KeyByLabelNames, + pub aggregated_labels: KeyByLabelNames, + pub rollup_labels: KeyByLabelNames, + pub original_yaml: String, + + // NEW fields for sliding window support (Issue #236) + pub window_size: u64, // Window size in seconds (e.g., 900s for 15m) + pub slide_interval: u64, // Slide/hop interval in seconds (e.g., 30s) + pub window_type: String, // "tumbling" or "sliding" + + // DEPRECATED but kept for backward compatibility + pub tumbling_window_size: u64, + + pub spatial_filter: String, + pub spatial_filter_normalized: String, + pub metric: String, // PromQL mode: metric name; SQL mode: derived from table_name.value_column + pub num_aggregates_to_retain: Option, + pub read_count_threshold: Option, + + // SQL-specific fields (optional, used when query_language=sql) + pub table_name: Option, // SQL mode: table name + pub value_column: Option, // SQL mode: which value column to aggregate +} + +// TODO: need to implement deserialization methods + +impl AggregationConfig { + #[allow(clippy::too_many_arguments)] + pub fn new( + aggregation_id: u64, + aggregation_type: String, + aggregation_sub_type: String, + parameters: HashMap, + grouping_labels: KeyByLabelNames, + aggregated_labels: KeyByLabelNames, + rollup_labels: KeyByLabelNames, + original_yaml: String, + tumbling_window_size: u64, + spatial_filter: String, + metric: String, + num_aggregates_to_retain: Option, + read_count_threshold: Option, + // NEW parameters for sliding window support + window_size: Option, + slide_interval: Option, + window_type: Option, + // SQL-specific fields + table_name: Option, + value_column: Option, + ) -> Self { + // Generate normalized spatial filter (placeholder implementation) + let spatial_filter_normalized = normalize_spatial_filter(&spatial_filter); + + // Handle backward compatibility: if new fields not provided, use tumbling_window_size + let window_size = window_size.unwrap_or(tumbling_window_size); + let slide_interval = slide_interval.unwrap_or(tumbling_window_size); + let window_type = window_type.unwrap_or_else(|| "tumbling".to_string()); + + Self { + aggregation_id, + aggregation_type, + aggregation_sub_type, + parameters, + grouping_labels, + aggregated_labels, + rollup_labels, + original_yaml, + window_size, + slide_interval, + window_type, + tumbling_window_size, + spatial_filter, + spatial_filter_normalized, + metric, + num_aggregates_to_retain, + read_count_threshold, + table_name, + value_column, + } + } + + // pub fn with_sub_type(mut self, sub_type: String) -> Self { + // self.aggregation_sub_type = Some(sub_type); + // self + // } + + // pub fn with_parameters(mut self, parameters: HashMap) -> Self { + // self.parameters = parameters; + // self + // } + + pub fn with_original_yaml(mut self, yaml: String) -> Self { + self.original_yaml = yaml; + self + } + + pub fn deserialize_from_json( + data: &Value, + ) -> Result> { + let aggregation_id = data["aggregationId"] + .as_u64() + .ok_or("Missing aggregationId")?; + + let aggregation_type = data["aggregationType"] + .as_str() + .ok_or("Missing aggregationType")? + .to_string(); + + let aggregation_sub_type = data["aggregationSubType"] + .as_str() + .ok_or("Missing aggregationSubType")? + .to_string(); + + let parameters = data["parameters"] + .as_object() + .ok_or("Missing parameters")? + .iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); + + // Note: In Python, eval(data["originalYaml"]) is used, but this is unsafe + // Using the string value directly instead + let original_yaml = data["originalYaml"].as_str().unwrap_or("").to_string(); + + // Deserialize KeyByLabelNames - assuming they have deserialize_from_json methods + let grouping_labels = KeyByLabelNames::deserialize_from_json(&data["groupingLabels"])?; + let aggregated_labels = KeyByLabelNames::deserialize_from_json(&data["aggregatedLabels"])?; + let rollup_labels = KeyByLabelNames::deserialize_from_json(&data["rollupLabels"])?; + + let tumbling_window_size = data["tumblingWindowSize"] + .as_u64() + .ok_or("Missing tumblingWindowSize")?; + + // NEW: Handle new window fields with backward compatibility + let window_type = data + .get("windowType") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + + let window_size = data.get("windowSize").and_then(|v| v.as_u64()); + + let slide_interval = data.get("slideInterval").and_then(|v| v.as_u64()); + + let spatial_filter = data["spatialFilter"].as_str().unwrap_or("").to_string(); + + let metric = data["metric"].as_str().ok_or("Missing metric")?.to_string(); + + let num_aggregates_to_retain = data.get("numAggregatesToRetain").and_then(|v| v.as_u64()); + let read_count_threshold = data.get("readCountThreshold").and_then(|v| v.as_u64()); + + // SQL-specific fields (optional) + let table_name = data + .get("tableName") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + let value_column = data + .get("valueColumn") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + + Ok(Self::new( + aggregation_id, + aggregation_type, + aggregation_sub_type, + parameters, + grouping_labels, + aggregated_labels, + rollup_labels, + original_yaml, + tumbling_window_size, + spatial_filter, + metric, + num_aggregates_to_retain, + read_count_threshold, + window_size, + slide_interval, + window_type, + table_name, + value_column, + )) + } + + pub fn deserialize_from_bytes( + bytes: &[u8], + ) -> Result> { + let data_str = std::str::from_utf8(bytes)?.trim(); + let data: Value = serde_json::from_str(data_str)?; + Self::deserialize_from_json(&data) + } + + pub fn from_yaml_data( + aggregation_data: &serde_yaml::Value, + num_aggregates_to_retain: Option, + read_count_threshold: Option, + query_language: QueryLanguage, + ) -> Result { + let aggregation_id = aggregation_data["aggregationId"] + .as_u64() + .ok_or_else(|| anyhow::anyhow!("Missing aggregationId"))?; + + let labels = &aggregation_data["labels"]; + let grouping_labels = KeyByLabelNames::new( + labels["grouping"] + .as_sequence() + .ok_or_else(|| anyhow::anyhow!("Missing grouping labels"))? + .iter() + .filter_map(|v| v.as_str()) + .map(|s| s.to_string()) + .collect(), + ); + let aggregated_labels = KeyByLabelNames::new( + labels["aggregated"] + .as_sequence() + .ok_or_else(|| anyhow::anyhow!("Missing aggregated labels"))? + .iter() + .filter_map(|v| v.as_str()) + .map(|s| s.to_string()) + .collect(), + ); + let rollup_labels = KeyByLabelNames::new( + labels["rollup"] + .as_sequence() + .ok_or_else(|| anyhow::anyhow!("Missing rollup labels"))? + .iter() + .filter_map(|v| v.as_str()) + .map(|s| s.to_string()) + .collect(), + ); + + let aggregation_type = aggregation_data["aggregationType"] + .as_str() + .ok_or_else(|| anyhow::anyhow!("Missing aggregationType"))? + .to_string(); + + let aggregation_sub_type = aggregation_data["aggregationSubType"] + .as_str() + .ok_or_else(|| anyhow::anyhow!("Missing aggregationSubType"))? + .to_string(); + + // Convert serde_yaml::Value to serde_json::Value for parameters + let parameters: HashMap = aggregation_data["parameters"] + .as_mapping() + .ok_or_else(|| anyhow::anyhow!("Missing parameters"))? + .iter() + .map(|(k, v)| { + let key = k.as_str().unwrap_or("").to_string(); + let value = serde_json::to_value(v).unwrap_or(Value::Null); + (key, value) + }) + .collect(); + + let tumbling_window_size = aggregation_data["tumblingWindowSize"] + .as_u64() + .ok_or_else(|| anyhow::anyhow!("Missing tumblingWindowSize"))?; + + // NEW: Handle new window fields with backward compatibility + let window_type = aggregation_data + .get("windowType") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + + let window_size = aggregation_data.get("windowSize").and_then(|v| v.as_u64()); + + let slide_interval = aggregation_data + .get("slideInterval") + .and_then(|v| v.as_u64()); + + let spatial_filter = aggregation_data["spatialFilter"] + .as_str() + .unwrap_or("") + .to_string(); + + // Handle PromQL (metric) vs SQL (table_name/value_column) based on query_language + let (metric, table_name, value_column) = match query_language { + QueryLanguage::promql => { + let metric = aggregation_data["metric"] + .as_str() + .ok_or_else(|| anyhow::anyhow!("Missing metric for PromQL query language"))? + .to_string(); + (metric, None, None) + } + QueryLanguage::sql => { + let table_name = aggregation_data + .get("table_name") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing table_name for SQL query language"))? + .to_string(); + let value_column = aggregation_data + .get("value_column") + .and_then(|v| v.as_str()) + .unwrap_or("value") + .to_string(); + // Derive metric from table_name.value_column for internal use + let metric = format!("{}.{}", table_name, value_column); + (metric, Some(table_name), Some(value_column)) + } + QueryLanguage::elastic_querydsl => { + // Elastic doesn't use metric/table_name in aggregations + (String::new(), None, None) + } + QueryLanguage::elastic_sql => { + // Elastic doesn't use metric/table_name in aggregations + (String::new(), None, None) + } + }; + + Ok(Self::new( + aggregation_id, + aggregation_type, + aggregation_sub_type, + parameters, + grouping_labels, + aggregated_labels, + rollup_labels, + String::new(), // original_yaml - empty as in Python + tumbling_window_size, + spatial_filter, + metric, + num_aggregates_to_retain, + read_count_threshold, + window_size, + slide_interval, + window_type, + table_name, + value_column, + )) + } +} + +impl SerializableToSink for AggregationConfig { + fn serialize_to_json(&self) -> Value { + let mut json = serde_json::json!({ + "aggregationId": self.aggregation_id, + "aggregationType": self.aggregation_type, + "aggregationSubType": self.aggregation_sub_type, + "parameters": self.parameters, + "originalYaml": self.original_yaml, + "tumblingWindowSize": self.tumbling_window_size, + // NEW: Include new window fields + "windowSize": self.window_size, + "slideInterval": self.slide_interval, + "windowType": self.window_type, + "spatialFilter": self.spatial_filter, + "metric": self.metric, + }); + + // Only include numAggregatesToRetain if it's Some + if let Some(num_aggregates) = self.num_aggregates_to_retain { + json["numAggregatesToRetain"] = serde_json::json!(num_aggregates); + } + + // Only include readCountThreshold if it's Some + if let Some(threshold) = self.read_count_threshold { + json["readCountThreshold"] = serde_json::json!(threshold); + } + + // SQL-specific fields (only include if present) + if let Some(ref table_name) = self.table_name { + json["tableName"] = serde_json::json!(table_name); + } + if let Some(ref value_column) = self.value_column { + json["valueColumn"] = serde_json::json!(value_column); + } + + json + } + + fn serialize_to_bytes(&self) -> Vec { + self.original_yaml.as_bytes().to_vec() + } +} diff --git a/CommonDependencies/dependencies/rs/sketch_db_common/src/enums.rs b/CommonDependencies/dependencies/rs/sketch_db_common/src/enums.rs new file mode 100644 index 0000000..3b5bb6a --- /dev/null +++ b/CommonDependencies/dependencies/rs/sketch_db_common/src/enums.rs @@ -0,0 +1,25 @@ +#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq)] +#[allow(non_camel_case_types)] +pub enum QueryLanguage { + #[value(alias = "SQL")] + sql, + #[value(alias = "PROMQL")] + promql, + #[value(alias = "ElasticQueryDSL")] + elastic_querydsl, + #[value(alias = "ElasticSQL")] + elastic_sql, +} + +/// Policy for cleaning up old aggregates from the store. +/// Must be explicitly specified in inference_config.yaml. +#[derive(Clone, Debug, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum CleanupPolicy { + /// Keep only the N most recent aggregates (circular buffer behavior) + CircularBuffer, + /// Remove aggregates after they've been read N times + ReadBased, + /// Never clean up aggregates + NoCleanup, +} diff --git a/CommonDependencies/dependencies/rs/sketch_db_common/src/lib.rs b/CommonDependencies/dependencies/rs/sketch_db_common/src/lib.rs new file mode 100644 index 0000000..286882f --- /dev/null +++ b/CommonDependencies/dependencies/rs/sketch_db_common/src/lib.rs @@ -0,0 +1,4 @@ +pub mod aggregation_config; +pub mod enums; +pub mod traits; +pub mod utils; diff --git a/CommonDependencies/dependencies/rs/sketch_db_common/src/traits.rs b/CommonDependencies/dependencies/rs/sketch_db_common/src/traits.rs new file mode 100644 index 0000000..196d908 --- /dev/null +++ b/CommonDependencies/dependencies/rs/sketch_db_common/src/traits.rs @@ -0,0 +1,7 @@ +use serde_json::Value; + +/// Trait for objects that can be serialized to different formats +pub trait SerializableToSink { + fn serialize_to_json(&self) -> Value; + fn serialize_to_bytes(&self) -> Vec; +} diff --git a/CommonDependencies/dependencies/rs/sketch_db_common/src/utils.rs b/CommonDependencies/dependencies/rs/sketch_db_common/src/utils.rs new file mode 100644 index 0000000..ef90e78 --- /dev/null +++ b/CommonDependencies/dependencies/rs/sketch_db_common/src/utils.rs @@ -0,0 +1,41 @@ +/// Normalize spatial filter for PromQL queries +pub fn normalize_spatial_filter(filter: &str) -> String { + if filter.is_empty() { + return String::new(); + } + + // TODO: Parse the spatial filter, make fake ASTs, each one with matcher, + // prettify each, and sort them. Unfortunately, unable to manually create fake ASTs + // Current workaround: split spatial filter by commas, sort, and join + + let trimmed = filter.trim().strip_prefix('{').unwrap_or(filter.trim()); + let trimmed = trimmed.strip_suffix('}').unwrap_or(trimmed); + let trimmed = trimmed.trim(); + + let mut parts: Vec<&str> = trimmed.split(',').collect(); + parts.sort(); + + format!("{{{}}}", parts.join(",")) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_normalize_spatial_filter() { + assert_eq!(normalize_spatial_filter("").as_str(), ""); + + let result = normalize_spatial_filter("instance=\"localhost:9090\""); + assert_eq!(result, "{instance=\"localhost:9090\"}"); + + let result = normalize_spatial_filter("{instance=\"localhost:9090\"}"); + assert_eq!(result, "{instance=\"localhost:9090\"}"); + + let result = normalize_spatial_filter("{job=\"prometheus\",instance=\"localhost:9090\"}"); + assert_eq!(result, "{instance=\"localhost:9090\",job=\"prometheus\"}"); + + let result = normalize_spatial_filter("job=\"prometheus\",instance=\"localhost:9090\""); + assert_eq!(result, "{instance=\"localhost:9090\",job=\"prometheus\"}"); + } +} diff --git a/CommonDependencies/dependencies/rs/sql_utilities/.gitignore b/CommonDependencies/dependencies/rs/sql_utilities/.gitignore new file mode 100644 index 0000000..9f97022 --- /dev/null +++ b/CommonDependencies/dependencies/rs/sql_utilities/.gitignore @@ -0,0 +1 @@ +target/ \ No newline at end of file diff --git a/CommonDependencies/dependencies/rs/sql_utilities/Cargo.lock b/CommonDependencies/dependencies/rs/sql_utilities/Cargo.lock new file mode 100644 index 0000000..0a9134e --- /dev/null +++ b/CommonDependencies/dependencies/rs/sql_utilities/Cargo.lock @@ -0,0 +1,587 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "bytes" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" + +[[package]] +name = "cc" +version = "1.2.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "chrono" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "find-msvc-tools" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "iana-time-zone" +version = "0.1.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "jiff" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35" +dependencies = [ + "jiff-static", + "jiff-tzdb-platform", + "log", + "portable-atomic", + "portable-atomic-util", + "serde_core", + "windows-sys", +] + +[[package]] +name = "jiff-static" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "jiff-tzdb" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2" + +[[package]] +name = "jiff-tzdb-platform" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8" +dependencies = [ + "jiff-tzdb", +] + +[[package]] +name = "js-sys" +version = "0.3.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "log" +version = "0.4.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "parse_datetime" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acea383beda9652270f3c9678d83aa58cbfc16880343cae0c0c8c7d6c0974132" +dependencies = [ + "jiff", + "num-traits", + "winnow", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "proc-macro2" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "psm" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e66fcd288453b748497d8fb18bccc83a16b0518e3906d4b8df0a8d42d93dbb1c" +dependencies = [ + "cc", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "sql_utilities" +version = "0.1.0" +dependencies = [ + "chrono", + "parse_datetime", + "sqlparser", + "tokio-test", +] + +[[package]] +name = "sqlparser" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +dependencies = [ + "log", + "recursive", +] + +[[package]] +name = "stacker" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys", +] + +[[package]] +name = "syn" +version = "2.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tokio" +version = "1.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +dependencies = [ + "pin-project-lite", +] + +[[package]] +name = "tokio-stream" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-test" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2468baabc3311435b55dd935f702f42cd1b8abb7e754fb7dfb16bd36aa88f9f7" +dependencies = [ + "async-stream", + "bytes", + "futures-core", + "tokio", + "tokio-stream", +] + +[[package]] +name = "unicode-ident" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" + +[[package]] +name = "wasm-bindgen" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" +dependencies = [ + "memchr", +] diff --git a/CommonDependencies/dependencies/rs/sql_utilities/Cargo.toml b/CommonDependencies/dependencies/rs/sql_utilities/Cargo.toml new file mode 100644 index 0000000..7ad9756 --- /dev/null +++ b/CommonDependencies/dependencies/rs/sql_utilities/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "sql_utilities" +version = "0.1.0" +edition = "2021" +authors = ["SketchDB Team"] +description = "A standalone SQL pattern matching and query analysis library for Rust" +license = "MIT" +keywords = ["Clickhouse", "sql", "pattern-matching", "query-analysis"] +categories = ["parsing", "database", "development-tools"] + +[dependencies] +chrono = "0.4.39" +parse_datetime = "0.13.3" +sqlparser = "0.59.0" + +[dev-dependencies] +tokio-test = "0.4" diff --git a/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/mod.rs b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/mod.rs new file mode 100644 index 0000000..74fd48f --- /dev/null +++ b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/mod.rs @@ -0,0 +1,8 @@ +pub mod sqlhelper; +pub mod sqlparser_test; +pub mod sqlpattern_matcher; +pub mod sqlpattern_parser; + +pub use sqlhelper::{SQLSchema, Table}; +pub use sqlpattern_matcher::*; +pub use sqlpattern_parser::*; diff --git a/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlhelper.rs b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlhelper.rs new file mode 100644 index 0000000..3e3176f --- /dev/null +++ b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlhelper.rs @@ -0,0 +1,157 @@ +use std::collections::{HashMap, HashSet}; + +#[derive(Debug, Clone)] +struct Columns { + time: String, + value_columns: HashSet, + metadata_columns: HashSet, +} + +#[derive(Debug, Clone)] +pub struct Table { + pub name: String, + pub time_column: String, + pub value_columns: HashSet, + pub metadata_columns: HashSet, +} + +impl Table { + pub fn new( + table_name: String, + time_column: String, + value_columns: HashSet, + metadata_columns: HashSet, + ) -> Self { + Self { + name: table_name, + time_column, + value_columns, + metadata_columns, + } + } +} + +#[derive(Debug, Clone)] +pub struct SQLSchema { + info: HashMap, +} + +impl SQLSchema { + pub fn new(table_schemas: Vec) -> Self { + let mut info = HashMap::new(); + + for table in table_schemas { + let columns = Columns { + time: table.time_column, + value_columns: table.value_columns, + metadata_columns: table.metadata_columns, + }; + info.insert(table.name, columns); + } + + Self { info } + } + + pub fn get_time_column(&self, table_name: &str) -> Option<&String> { + self.info.get(table_name).map(|cols| &cols.time) + } + + pub fn get_value_columns(&self, table_name: &str) -> Option<&HashSet> { + self.info.get(table_name).map(|cols| &cols.value_columns) + } + + pub fn get_metadata_columns(&self, table_name: &str) -> Option<&HashSet> { + self.info.get(table_name).map(|cols| &cols.metadata_columns) + } + + pub fn is_valid_value_column(&self, table: &str, value_column: &str) -> bool { + if let Some(value_columns) = self.get_value_columns(table) { + value_columns.contains(value_column) + } else { + false + } + } + + pub fn are_valid_metadata_columns(&self, table: &str, columns: &HashSet) -> bool { + if let Some(table_metadata_columns) = self.get_metadata_columns(table) { + for col in columns { + if !table_metadata_columns.contains(col) { + return false; + } + } + true + } else { + false + } + } +} + +#[derive(Debug, Clone)] +pub struct SQLQueryData { + pub aggregation_info: AggregationInfo, + pub metric: String, + pub labels: HashSet, + pub time_info: TimeInfo, + pub subquery: Option>, +} + +#[derive(Debug, Clone)] +pub struct TimeInfo { + time_col_name: String, + // Can be changed to use timezone (normal datetime incorporates TimeZone) in the future + start: f64, + // is_now: bool, + duration: f64, +} + +impl TimeInfo { + pub fn new(time_col_name: String, start: f64, duration: f64) -> Self { + Self { + time_col_name, + start, + // is_now, + duration, + } + } + + pub fn get_time_col_name(&self) -> &str { + &self.time_col_name + } + + pub fn get_start(&self) -> f64 { + self.start + } + + pub fn get_duration(&self) -> f64 { + self.duration + } +} + +#[derive(Debug, Clone)] +pub struct AggregationInfo { + name: String, + value_column_name: String, + args: Vec, +} + +impl AggregationInfo { + pub fn new(name: String, value_column_name: String, args: Vec) -> Self { + Self { + name, + value_column_name, + args, + } + } + + pub fn get_name(&self) -> &str { + &self.name + } + + pub fn get_value_column_name(&self) -> &str { + &self.value_column_name + } + + pub fn get_args(&self) -> &Vec { + &self.args + } +} diff --git a/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlparser_test.rs b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlparser_test.rs new file mode 100644 index 0000000..fe40fcd --- /dev/null +++ b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlparser_test.rs @@ -0,0 +1,412 @@ +#[cfg(test)] +mod tests { + // use super::*; + use sqlparser::dialect::GenericDialect; + use sqlparser::parser::Parser; + use std::collections::HashSet; + + use crate::sqlhelper::{SQLQueryData, SQLSchema as Schema, Table}; + use crate::sqlpattern_matcher::{QueryError, QueryType, SQLPatternMatcher}; + use crate::sqlpattern_parser::SQLPatternParser; + + pub fn create_test_schema() -> Schema { + let mut cpu_labels = HashSet::new(); + cpu_labels.insert("L1".to_string()); + cpu_labels.insert("L2".to_string()); + cpu_labels.insert("L3".to_string()); + cpu_labels.insert("L4".to_string()); + + let mut mem_labels = HashSet::new(); + mem_labels.insert("L1".to_string()); + mem_labels.insert("L2".to_string()); + mem_labels.insert("L3".to_string()); + mem_labels.insert("L4".to_string()); + + let cpu_table = Table::new( + "cpu_usage".to_string(), + "time".to_string(), + HashSet::from(["value".to_string()]), + cpu_labels, + ); + let mem_table = Table::new( + "mem_usage".to_string(), + "ms".to_string(), + HashSet::from(["mb".to_string()]), + mem_labels, + ); + + Schema::new(vec![cpu_table, mem_table]) + } + + #[test] + fn test_basic_parsing() { + let schema = create_test_schema(); + let time = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs_f64(); + let dialect = GenericDialect {}; + let sql = "SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1"; + + let statements = Parser::parse_sql(&dialect, sql).unwrap(); + let query_data = SQLPatternParser::new(&schema, time).parse_query(&statements); + + assert!(query_data.is_some()); + let query = query_data.unwrap(); + assert_eq!(query.metric, "cpu_usage"); + assert_eq!(query.aggregation_info.get_name(), "AVG"); + assert!(query.labels.contains("L1")); + } + + #[test] + fn test_pattern_matching() { + let schema = create_test_schema(); + let time = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs_f64(); + let matcher = SQLPatternMatcher::new(schema.clone(), 1.0); + + let dialect = GenericDialect {}; + let sql = "SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1, L2, L3, L4"; + + let statements = Parser::parse_sql(&dialect, sql).unwrap(); + + if let Some(query_data) = SQLPatternParser::new(&schema, time).parse_query(&statements) { + let result = matcher.query_info_to_pattern(&query_data); + assert!(result.is_valid()); + assert_eq!(result.query_type, vec![QueryType::Spatial]); + } + } + + #[test] + fn test_full_suite() { + let tables = vec![Table::new( + String::from("cpu_usage"), + String::from("time"), + HashSet::from([String::from("value")]), + HashSet::from([ + String::from("L1"), + String::from("L2"), + String::from("L3"), + String::from("L4"), + ]), + )]; + let schema = Schema::new(tables); + let scrape_interval = 1.0; + + let test_queries = vec![ + ( + "dated_temporal_sum", + "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, '2025-10-01 00:00:00') AND '2025-10-01 00:00:00' GROUP BY L1, L2, L3, L4", + vec![QueryType::TemporalGeneric], + None + ), + ( + "dated_temporal_quantile", + "SELECT QUANTILE(0.95, value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, '2025-10-01 00:00:00') AND '2025-10-01 00:00:00' GROUP BY L1, L2, L3, L4", + vec![QueryType::TemporalQuantile], + None + ), + ( + "dated_spatial_avg", + "SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, '2025-10-01 00:00:00') AND '2025-10-01 00:00:00' GROUP BY L1, L2, L3, L4", + vec![QueryType::Spatial], + None + ), + ( + "dated_spatial_quantile", + "SELECT QUANTILE(0.95, value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, '2025-10-01 00:00:00') AND '2025-10-01 00:00:00' GROUP BY L1", + vec![QueryType::Spatial], + None + ), + ( + "dated_spatial_of_temporal_quantile_max", + "SELECT QUANTILE(0.95, value) FROM (SELECT MAX(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, '2025-10-01 00:00:00') AND '2025-10-01 00:00:00' GROUP BY L1, L2, L3, L4) GROUP BY L1", + vec![QueryType::Spatial, QueryType::TemporalGeneric], + None + ), + // // Temporal queries + ( + "temporal_quantile", + "SELECT QUANTILE(0.95, value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4", + vec![QueryType::TemporalQuantile], + None + ), + ( + "temporal_sum", + "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4", + vec![QueryType::TemporalGeneric], + None + ), + ( + "temporal_max", + "SELECT MAX(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4", + vec![QueryType::TemporalGeneric], + None + ), + ( + "temporal_min", + "SELECT MIN(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4", + vec![QueryType::TemporalGeneric], + None + ), + ( + "temporal_avg", + "SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4", + vec![QueryType::TemporalGeneric], + None + ), + // // // Spatial queries + ( + "spatial_sum", + "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1", + vec![QueryType::Spatial], + None + ), + ( + "spatial_max", + "SELECT MAX(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1, L2", + vec![QueryType::Spatial], + None + ), + ( + "spatial_min", + "SELECT MIN(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1, L2, L3", + vec![QueryType::Spatial], + None + ), + ( + "spatial_avg", + "SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1, L2, L3, L4", + vec![QueryType::Spatial], + None + ), + ( + "spatial_quantile", + "SELECT QUANTILE(0.95, value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1", + vec![QueryType::Spatial], + None + ), + // // // Spatial of temporal queries + ( + "spatial_of_temporal_sum_sum", + "SELECT SUM(result) FROM (SELECT SUM(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1", + vec![QueryType::Spatial, QueryType::TemporalGeneric], + None + ), + ( + "spatial_of_temporal_sum_min", + "SELECT SUM(result) FROM (SELECT MIN(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1, L2", + vec![QueryType::Spatial, QueryType::TemporalGeneric], + None + ), + ( + "spatial_of_temporal_sum_max", + "SELECT SUM(result) FROM (SELECT MAX(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1, L2, L3", + vec![QueryType::Spatial, QueryType::TemporalGeneric], + None + ), + ( + "spatial_of_temporal_sum_avg", + "SELECT SUM(result) FROM (SELECT AVG(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1, L2, L3, L4", + vec![QueryType::Spatial, QueryType::TemporalGeneric], + None + ), + ( + "spatial_of_temporal_max_sum", + "SELECT MAX(result) FROM (SELECT SUM(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1, L2", + vec![QueryType::Spatial, QueryType::TemporalGeneric], + None + ), + ( + "spatial_of_temporal_max_min", + "SELECT MAX(result) FROM (SELECT MIN(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1", + vec![QueryType::Spatial, QueryType::TemporalGeneric], + None + ), + ( + "spatial_of_temporal_max_max", + "SELECT MAX(result) FROM (SELECT MAX(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1, L2, L3", + vec![QueryType::Spatial, QueryType::TemporalGeneric], + None + ), + ( + "spatial_of_temporal_max_avg", + "SELECT MAX(result) FROM (SELECT AVG(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1, L2, L3, L4", + vec![QueryType::Spatial, QueryType::TemporalGeneric], + None + ), + ( + "spatial_of_temporal_quantile_max", + "SELECT QUANTILE(0.95, value) FROM (SELECT MAX(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1", + vec![QueryType::Spatial, QueryType::TemporalGeneric], + None + ), + ( + "spatial_of_temporal_quantile_min", + "SELECT QUANTILE(0.95, value) FROM (SELECT MIN(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1", + vec![QueryType::Spatial, QueryType::TemporalGeneric], + None + ), + ( + "spatial_of_temporal_quantile_sum", + "SELECT QUANTILE(0.95, value) FROM (SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1", + vec![QueryType::Spatial, QueryType::TemporalGeneric], + None + ), + ( + "spatial_of_temporal_quantile_avg", + "SELECT QUANTILE(0.95, value) FROM (SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1", + vec![QueryType::Spatial, QueryType::TemporalGeneric], + None + ), + ( + "spatial_of_temporal_avg_quantile", + "SELECT AVG(result) FROM (SELECT QUANTILE(0.95, value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1, L2", + vec![QueryType::Spatial, QueryType::TemporalQuantile], + None + ), + ( + "spatial_of_temporal_quantile_quantile", + "SELECT QUANTILE(0.95, value) FROM (SELECT QUANTILE(0.95, value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1, L2, L3", + vec![QueryType::Spatial, QueryType::TemporalQuantile], + None + ), + // // // Error cases + ( + "temporal_invalid_aggregation_label", + "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, FAKE_LABEL", + vec![], + Some(QueryError::InvalidAggregationLabel) + ), + ( + "temporal_invalid_time_column", + "SELECT SUM(value) FROM cpu_usage WHERE datetime BETWEEN NOW() AND DATEADD(s, -10, NOW()) GROUP BY L1, L2, L3, L4", + vec![], + Some(QueryError::InvalidTimeCol) + ), + ( + "temporal_invalid_value_column", + "SELECT SUM(not_a_value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4", + vec![], + Some(QueryError::InvalidValueCol) + ), + // SpatioTemporal queries - span multiple scrape intervals but group by subset of labels + ( + "spatiotemporal_sum", + "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1", + vec![QueryType::SpatioTemporal], + None + ), + ( + "spatiotemporal_max", + "SELECT MAX(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2", + vec![QueryType::SpatioTemporal], + None + ), + ( + "spatiotemporal_min", + "SELECT MIN(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3", + vec![QueryType::SpatioTemporal], + None + ), + ( + "spatiotemporal_avg", + "SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1", + vec![QueryType::SpatioTemporal], + None + ), + ( + "spatiotemporal_quantile", + "SELECT QUANTILE(0.95, value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2", + vec![QueryType::SpatioTemporal], + None + ), + ( + "temporal_illegal_aggregation_function", + "SELECT HARMONIC_MEAN(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3", + vec![], + Some(QueryError::IllegalAggregationFn) + ), + ( + "spatial_scrape_duration_too_small", + "SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN NOW() AND DATEADD(s, 0, NOW()) GROUP BY L1, L2", + vec![], + Some(QueryError::SpatialDurationSmall) + ), + ( + "temporal_percentile", + "SELECT PERCENTILE(value, 95) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4", + vec![QueryType::TemporalQuantile], + None + ), + ( + "spatial_percentile", + "SELECT PERCENTILE(value, 95) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1", + vec![QueryType::Spatial], + None + ), + ( + "spatiotemporal_percentile", + "SELECT PERCENTILE(value, 95) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2", + vec![QueryType::SpatioTemporal], + None + ), + ( + "spatial_of_temporal_percentile_max", + "SELECT PERCENTILE(value, 95) FROM (SELECT MAX(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1", + vec![QueryType::Spatial, QueryType::TemporalGeneric], + None + ), + ]; + + let mut successes = 0; + let mut failures = 0; + + for (name, sql, expected_types, error) in test_queries { + println!("Testing: {}", name); + + if let Some(query_data) = parse_sql_query(sql) { + let matcher = SQLPatternMatcher::new(schema.clone(), scrape_interval); + let result = matcher.query_info_to_pattern(&query_data); + + assert_eq!(result.query_type, expected_types); + assert_eq!(result.error, error); + + if result.query_type == expected_types && result.error == error { + println!("✓ Passed"); + successes += 1; + } else { + println!("✗ Failed"); + println!("expected type, error: {:?}, {:?}", expected_types, error); + println!( + "got type, error: {:?}, {:?}", + result.query_type, result.error + ); + failures += 1; + } + } else { + println!("✗ Failed to parse"); + failures += 1; + } + } + + println!("\nRESULTS\n======="); + println!("Passed: {}", successes); + println!("Failed: {}", failures); + } + + pub fn parse_sql_query(sql: &str) -> Option { + let schema = create_test_schema(); + let time = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs_f64(); + let dialect = sqlparser::dialect::ClickHouseDialect {}; + let statements = Parser::parse_sql(&dialect, sql).ok()?; + print!("Query: {sql}, AST: {statements:#?}\n"); + + SQLPatternParser::new(&schema, time).parse_query(&statements) + } +} diff --git a/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_matcher.rs b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_matcher.rs new file mode 100644 index 0000000..1aac0da --- /dev/null +++ b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_matcher.rs @@ -0,0 +1,287 @@ +use crate::sqlhelper::AggregationInfo; +use crate::sqlhelper::SQLQueryData; +use crate::sqlhelper::SQLSchema; +use crate::sqlhelper::TimeInfo; + +use std::collections::HashSet; + +#[derive(Debug, Clone, PartialEq)] +pub enum QueryType { + Spatial, + TemporalGeneric, + TemporalQuantile, + SpatioTemporal, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum QueryError { + InvalidAggregationLabel, + InvalidTimeCol, + InvalidValueCol, + TemporalMissingLabels, // indistinguishable from too large scrape duration + IllegalAggregationFn, + SpatialDurationSmall, +} + +#[derive(Debug)] +pub struct SQLQuery { + pub query_type: Vec, + pub query_data: Vec, + pub error: Option, + pub msg: Option, +} + +impl SQLQuery { + pub fn new(query_type: Vec, error: Option, msg: Option) -> Self { + Self { + query_type, + query_data: Vec::new(), + error, + msg, + } + } + + pub fn add_subquery( + &mut self, + query_type: QueryType, + aggregation: AggregationInfo, + metric: String, + labels: HashSet, + time: TimeInfo, + ) { + self.query_type.push(query_type); + + let query_data = SQLQueryData { + aggregation_info: aggregation, + metric, + labels, + time_info: time, + subquery: None, + }; + + self.query_data.push(query_data); + } + + pub fn invalidate_query(&mut self, error: QueryError, msg: String) { + self.error = Some(error); + self.msg = Some(msg); + self.query_type.clear(); + } + + pub fn is_valid(&self) -> bool { + self.error.is_none() + } +} + +pub struct SQLPatternMatcher { + schema: SQLSchema, + scrape_interval: f64, + legal_aggregations: HashSet<&'static str>, +} + +impl SQLPatternMatcher { + pub fn new(schema: SQLSchema, scrape_interval: f64) -> Self { + let mut legal_aggregations = HashSet::new(); + legal_aggregations.insert("AVG"); + legal_aggregations.insert("SUM"); + legal_aggregations.insert("COUNT"); + legal_aggregations.insert("MIN"); + legal_aggregations.insert("MAX"); + legal_aggregations.insert("QUANTILE"); + + Self { + schema, + scrape_interval, + legal_aggregations, + } + } + + pub fn is_valid_aggregation(&self, aggregation: &str) -> bool { + self.legal_aggregations.contains(aggregation) + } + + #[allow(clippy::type_complexity)] + pub fn flatten_query_info( + &self, + query: &SQLQueryData, + ) -> Result, TimeInfo)>, (QueryError, String)> + { + let mut query_data = Vec::new(); + let mut current_query = Some(query); + let mut scraped_intervals = 0.0; + + while let Some(query) = current_query { + if !self + .schema + .are_valid_metadata_columns(&query.metric, &query.labels) + { + if let Some(schema_metadata_columns) = + self.schema.get_metadata_columns(&query.metric) + { + let illegal_columns: HashSet<_> = + query.labels.difference(schema_metadata_columns).collect(); + println!("Returned QueryError::InvalidAggregationLabel"); + return Err(( + QueryError::InvalidAggregationLabel, + format!( + "attempt to aggregate by columns {:?}, which are not present for metric {}", + illegal_columns, query.metric + ) + )); + } + } + + if !self.is_valid_aggregation(query.aggregation_info.get_name()) { + println!("Returned QueryError::IllegalAggregationFn"); + + return Err(( + QueryError::IllegalAggregationFn, + format!( + "attempt to use illegal aggregation function {}", + query.aggregation_info.get_name() + ), + )); + } + + let time_info = &query.time_info; + let time_column_name = time_info.get_time_col_name(); + + if time_column_name != "UNUSED" { + if let Some(schema_time_column) = self.schema.get_time_column(&query.metric) { + if time_column_name != schema_time_column { + println!("Returned QueryError::InvalidTimeCol: {time_column_name}"); + + return Err(( + QueryError::InvalidTimeCol, + format!( + "Attempted to scrape from column [ {} ] instead of correct time column [ {} ]", + time_column_name, schema_time_column + ) + )); + } + } + + let value_column_name = query.aggregation_info.get_value_column_name(); + if !self + .schema + .is_valid_value_column(&query.metric, value_column_name) + { + println!("Returned QueryError::InvalidValueCol"); + + return Err(( + QueryError::InvalidValueCol, + format!("Incorrect value column name: {}", value_column_name), + )); + } + + let scrape_duration = time_info.get_duration(); + scraped_intervals = scrape_duration / self.scrape_interval; + + if scraped_intervals < self.scrape_interval { + println!("Returned QueryError::SpatialDurationSmall"); + + return Err(( + QueryError::SpatialDurationSmall, + format!( + "scrape duration {} less than one interval {}", + scraped_intervals, self.scrape_interval + ), + )); + } + } + + query_data.push(( + query.metric.clone(), + query.aggregation_info.clone(), + scraped_intervals, + query.labels.clone(), + time_info.clone(), + )); + + current_query = query.subquery.as_deref(); + } + + Ok(query_data) + } + + pub fn query_info_to_pattern(&self, query_data: &SQLQueryData) -> SQLQuery { + println!("SQLQueryData: {query_data:?}"); + let query_data = match self.flatten_query_info(query_data) { + Ok(data) => data, + Err((error, msg)) => { + return SQLQuery::new(Vec::new(), Some(error), Some(msg)); + } + }; + println!("flattened QueryData: {query_data:?}"); + + let mut sql_query = SQLQuery::new(Vec::new(), None, None); + + for (i, (metric, aggregation_info, scrape_duration, labels, time_info)) in + query_data.iter().enumerate() + { + if i < query_data.len() - 1 { + // Not the last query + // let time_info = TimeInfo::new("time".to_string(), *start, *scrape_duration); // You may need to adjust this + sql_query.add_subquery( + QueryType::Spatial, + aggregation_info.clone(), + metric.clone(), + labels.clone(), + time_info.clone(), + ); + } else { + // Last query + // let time_info = TimeInfo::new("time".to_string(), *start, *scrape_duration); + + if (scrape_duration - self.scrape_interval).abs() < f64::EPSILON { + sql_query.add_subquery( + QueryType::Spatial, + aggregation_info.clone(), + metric.clone(), + labels.clone(), + time_info.clone(), + ); + } else if *scrape_duration > self.scrape_interval { + // Check if labels match all metadata columns + let has_all_labels = self + .schema + .get_metadata_columns(metric) + .map(|schema_metadata_columns| labels == schema_metadata_columns) + .unwrap_or(true); + + if has_all_labels { + // Full temporal query with all labels (PromQL-equivalent) + if aggregation_info.get_name() == "QUANTILE" { + sql_query.add_subquery( + QueryType::TemporalQuantile, + aggregation_info.clone(), + metric.clone(), + labels.clone(), + time_info.clone(), + ); + } else { + sql_query.add_subquery( + QueryType::TemporalGeneric, + aggregation_info.clone(), + metric.clone(), + labels.clone(), + time_info.clone(), + ); + } + } else { + // SpatioTemporal: spans multiple scrape intervals but groups by subset of labels + sql_query.add_subquery( + QueryType::SpatioTemporal, + aggregation_info.clone(), + metric.clone(), + labels.clone(), + time_info.clone(), + ); + } + } + } + } + + sql_query + } +} diff --git a/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_parser.rs b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_parser.rs new file mode 100644 index 0000000..c7aa245 --- /dev/null +++ b/CommonDependencies/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_parser.rs @@ -0,0 +1,528 @@ +use crate::sqlhelper::SQLSchema; +use crate::sqlhelper::{AggregationInfo, SQLQueryData, TimeInfo}; +use sqlparser::ast::*; +use std::collections::HashSet; + +use parse_datetime::parse_datetime; +use sqlparser::ast::Value::SingleQuotedString; + +pub struct SQLPatternParser { + schema: SQLSchema, + query_evaluation_time: f64, +} + +impl SQLPatternParser { + pub fn new(schema: &SQLSchema, query_evaluation_time: f64) -> SQLPatternParser { + Self { + schema: schema.clone(), + query_evaluation_time, + } + } + + pub fn parse_query(&self, statements: &[Statement]) -> Option { + if statements.len() != 1 { + println!("illegal query length"); + return None; + } + + match &statements[0] { + Statement::Query(query) => self.parse_query_node(query), + _ => { + println!("Not a query statement"); + None + } + } + } + + fn parse_query_node(&self, query: &Query) -> Option { + // Convert CTE to subquery if present + let query = self.cte_to_subquery(query); + + match &query.body.as_ref() { + SetExpr::Select(select) => self.parse_select(select), + _ => { + println!("Not a SELECT statement"); + None + } + } + } + + fn cte_to_subquery(&self, query: &Query) -> Query { + let mut query = query.clone(); + + if let Some(with) = &query.with { + if !with.cte_tables.is_empty() { + let cte = &with.cte_tables[0]; + + // Create a subquery from the CTE + if let Some(new_body) = match &query.body.as_ref() { + SetExpr::Select(select) => { + let mut new_select = select.clone(); + new_select.from = vec![TableWithJoins { + relation: TableFactor::Derived { + lateral: false, + subquery: Box::new(*(cte.query).clone()), + alias: None, + }, + joins: vec![], + }]; + Some(SetExpr::Select(Box::new(*new_select))) + } + _ => None, + } { + query.body = Box::new(new_body); + query.with = None; + } + } + } + + query + } + + fn parse_select(&self, select: &Select) -> Option { + let (metric, has_subquery) = self.get_metric(select)?; + + let aggregation = self.get_aggregation(select)?; + + let group_bys = self.get_groupbys(select)?; + + if !has_subquery { + let time_info = self.get_time_info(select, &metric)?; + + // Check for unexpected fields + if select.distinct.is_some() + || select.top.is_some() + || select.into.is_some() + || !select.lateral_views.is_empty() + || select.prewhere.is_some() + || !select.cluster_by.is_empty() + || !select.distribute_by.is_empty() + || !select.sort_by.is_empty() + || select.having.is_some() + || !select.named_window.is_empty() + || select.window_before_qualify + { + println!("Unexpected SELECT fields present"); + return None; + } + + Some(SQLQueryData { + aggregation_info: aggregation, + metric, + labels: group_bys, + time_info, + subquery: None, + }) + } else { + // Parse subquery + let subquery = match &select.from[0].relation { + TableFactor::Derived { subquery, .. } => match subquery.body.as_ref() { + SetExpr::Select(inner_select) => { + let inner_aggregation = self.get_aggregation(inner_select)?; + let inner_group_bys = self.get_groupbys(inner_select)?; + let time_info = self.get_time_info(inner_select, &metric)?; + + Some(Box::new(SQLQueryData { + aggregation_info: inner_aggregation, + metric: metric.clone(), + labels: inner_group_bys, + time_info, + subquery: None, + })) + } + _ => None, + }, + _ => None, + }?; + + Some(SQLQueryData { + aggregation_info: aggregation, + metric, + labels: group_bys, + time_info: TimeInfo::new("UNUSED".to_string(), -1.0, -1_f64), + subquery: Some(subquery), + }) + } + } + + fn get_quantile_args(&self, func: &Function) -> Vec { + let name = func.name.to_string().to_uppercase(); + + match (&func.args, name.as_str()) { + (FunctionArguments::List(args), "QUANTILE") => { + let mut quantile_arg = Vec::new(); + + match &args.args[0] { + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(value))) => { + quantile_arg.push(value.value.to_string()); + quantile_arg + } + _ => quantile_arg, + } + } + (FunctionArguments::List(args), "PERCENTILE") => { + let mut quantile_arg = Vec::new(); + + // Convert PERCENTILE to QUANTILE format + match &args.args[1] { + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(value))) => { + let val_str = value.value.to_string(); + if let Ok(percentile) = val_str.parse::() { + // Convert to quantile (0-1 range) + let quantile = if percentile > 1.0 { + percentile / 100.0 + } else { + percentile + }; + quantile_arg.push(quantile.to_string()); + } + quantile_arg + } + _ => quantile_arg, + } + } + _ => Vec::new(), + } + } + + fn get_aggregation(&self, select: &Select) -> Option { + if select.projection.len() != 1 { + return None; + } + + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::Function(func)) + | SelectItem::ExprWithAlias { + expr: Expr::Function(func), + .. + } => { + let name = func.name.to_string().to_uppercase(); + + let args = self.get_quantile_args(func); + + // Get the column being aggregated + let col = match &func.args { + FunctionArguments::None => return None, + FunctionArguments::Subquery(_) => return None, + FunctionArguments::List(func_args) => { + if name == "QUANTILE" { + // QUANTILE(0.95, value) - column is second argument + if func_args.args.len() < 2 { + return None; + } + match &func_args.args[1] { + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier( + ident, + ))) => ident.value.clone(), + _ => return None, + } + } else if name == "PERCENTILE" { + // PERCENTILE(value, 95) - column is first argument + if func_args.args.is_empty() { + return None; + } + match &func_args.args[0] { + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier( + ident, + ))) => ident.value.clone(), + _ => return None, + } + } else { + // For other aggregations - column is first argument + if func_args.args.is_empty() { + return None; + } + match &func_args.args[0] { + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier( + ident, + ))) => ident.value.clone(), + _ => return None, + } + } + } + }; + + // Always store PERCENTILE as QUANTILE internally + let normalized_name = if name == "PERCENTILE" { + "QUANTILE".to_string() + } else { + name + }; + + Some(AggregationInfo::new(normalized_name, col, args)) + } + _ => None, + } + } + + fn get_metric(&self, select: &Select) -> Option<(String, bool)> { + if select.from.is_empty() { + return None; + } + + match &select.from[0].relation { + TableFactor::Table { name, .. } => { + let metric = name.0.first()?.to_string(); + Some((metric, false)) + } + TableFactor::Derived { subquery, .. } => match subquery.body.as_ref() { + SetExpr::Select(inner_select) => { + if inner_select.from.is_empty() { + return None; + } + match &inner_select.from[0].relation { + TableFactor::Table { name, .. } => { + let metric = name.0.first()?.to_string(); + Some((metric, true)) + } + _ => None, + } + } + _ => None, + }, + _ => None, + } + } + + fn get_timestamp_from_datetime_str(datetime_str: &str) -> Option { + let parsed_datetime = parse_datetime(datetime_str).ok()?; + Some(parsed_datetime.timestamp().as_second() as f64) + } + + fn get_timestamp_from_between_highlow(&self, highlow: &Expr) -> Option { + match highlow { + Expr::Function(func) if func.name.to_string().to_uppercase() == "NOW" => { + Some(self.query_evaluation_time) + } + Expr::Value(ValueWithSpan { + value: SingleQuotedString(datetime_str), + span: _, + }) => Self::get_timestamp_from_datetime_str(datetime_str), + Expr::Function(func) if func.name.to_string().to_uppercase() == "DATEADD" => { + self.parse_dateadd(func) + } + _ => { + panic!("invalid time syntax {:?}", highlow); + } + } + } + + fn get_time_info(&self, select: &Select, table_name: &str) -> Option { + let selection = select.selection.as_ref()?; + + match selection { + Expr::Between { + expr, + negated, + low, + high, + } => { + if *negated { + return None; + } + + // Extract time column name + let col_name = match expr.as_ref() { + Expr::Identifier(ident) => ident.value.clone(), + _ => return None, + }; + + let time_col_name = self.schema.get_time_column(table_name)?; + + if col_name != *time_col_name { + println!( + "Found selection statement with column name {} but time column name is {}", + col_name, time_col_name + ); + return None; + } + + let start = self.get_timestamp_from_between_highlow(low.as_ref())?; + let end = self.get_timestamp_from_between_highlow(high.as_ref())?; + + let duration = end - start; + + Some(TimeInfo::new(col_name, start, duration)) + } + _ => None, + } + } + + fn parse_dateadd(&self, func: &Function) -> Option { + let args = match &func.args { + FunctionArguments::List(args) => &args.args, + _ => return None, + }; + + if args.len() != 3 { + return None; + } + + // First arg is time unit + let time_unit = match &args[0] { + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(ident))) => { + ident.value.to_lowercase() + } + _ => return None, + }; + + // Second arg is the value + let duration_to_add = match &args[1] { + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::UnaryOp { + op: UnaryOperator::Minus, + expr, + })) => { + println!("CORRECT MATCH EXPR!: {:?}", args[1]); + match expr.as_ref() { + Expr::Value(ValueWithSpan { + value: Value::Number(n, _), + span: _, + }) => -n.parse::().ok()?, + _ => return None, + } + } + FunctionArg::Unnamed(FunctionArgExpr::Expr(expr)) => match expr { + Expr::Value(ValueWithSpan { + value: Value::Number(n, _), + span: _, + }) => n.parse::().ok()?, + _ => return None, + }, + _ => { + println!("DID NOT MATCH EXPR!: {:?}", args[1]); + return None; + } + }; + + let base_timestamp = match &args[2] { + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Function(func))) + if func.name.to_string().to_uppercase() == "NOW" => + { + self.query_evaluation_time + } + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(ValueWithSpan { + value: SingleQuotedString(datetime_str), + span: _, + }))) => parse_datetime(datetime_str).ok()?.timestamp().as_second() as f64, + _ => { + println!("time upper bound not calculating from present"); + return None; + } + }; + + // Convert to seconds + let multiplier = match time_unit.as_str() { + "s" | "second" | "seconds" => 1.0, + "m" | "minute" | "minutes" => 60.0, + "h" | "hour" | "hours" => 3600.0, + "d" | "day" | "days" => 86400.0, + _ => return None, + }; + + Some(base_timestamp + (duration_to_add as f64) * multiplier) + } + + // fn parse_dateadd_duration(&self, func: &Function, start: f64) -> Option { + // let args = match &func.args { + // FunctionArguments::List(args) => &args.args, + // _ => return None, + // }; + + // if args.len() != 3 { + // return None; + // } + + // // First arg is time unit + // let time_unit = match &args[0] { + // FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(ident))) => { + // ident.value.to_lowercase() + // } + // _ => return None, + // }; + + // // Second arg is the value + // let time_value = match &args[1] { + // FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::UnaryOp { + // op: UnaryOperator::Minus, + // expr, + // })) => { + // println!("CORRECT MATCH EXPR!: {:?}", args[1]); + // match expr.as_ref() { + // Expr::Value(ValueWithSpan { + // value: Value::Number(n, _), + // span: _, + // }) => n.parse::().ok()?, + // _ => return None, + // } + // } + // FunctionArg::Unnamed(FunctionArgExpr::Expr(expr)) => match expr { + // Expr::Value(ValueWithSpan { + // value: Value::Number(n, _), + // span: _, + // }) => n.parse::().ok()?, + // _ => return None, + // }, + // _ => { + // println!("DID NOT MATCH EXPR!: {:?}", args[1]); + // return None; + // } + // }; + + // // Third arg should be NOW() or start + // // let printargs = &args[2]; + // // println!("DATEADD ARGS: {printargs:?}"); + // match &args[2] { + // FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Function(func))) + // if func.name.to_string().to_uppercase() == "NOW" => {} + // FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(ValueWithSpan { + // value: SingleQuotedString(datetime_str), + // span: _, + // }))) if start + // == (parse_datetime(datetime_str).ok()?.timestamp().as_second() as f64) => {} + + // _ => { + // println!("time upper bound not calculating from present"); + // return None; + // } + // } + + // // Convert to seconds + // let multiplier = match time_unit.as_str() { + // "s" | "second" | "seconds" => 1.0, + // "m" | "minute" | "minutes" => 60.0, + // "h" | "hour" | "hours" => 3600.0, + // "d" | "day" | "days" => 86400.0, + // _ => return None, + // }; + + // Some(time_value as f64 * multiplier) + // } + + fn get_groupbys(&self, select: &Select) -> Option> { + match &select.group_by { + GroupByExpr::Expressions(exprs, mods) => { + if !mods.is_empty() { + return None; + } + + let mut group_bys = HashSet::new(); + + for expr in exprs { + match expr { + Expr::Identifier(ident) => { + group_bys.insert(ident.value.clone()); + } + _ => return None, + } + } + + if group_bys.is_empty() { + None + } else { + Some(group_bys) + } + } + _ => None, + } + } +} diff --git a/CommonDependencies/dependencies/rs/sql_utilities/src/lib.rs b/CommonDependencies/dependencies/rs/sql_utilities/src/lib.rs new file mode 100644 index 0000000..02c154d --- /dev/null +++ b/CommonDependencies/dependencies/rs/sql_utilities/src/lib.rs @@ -0,0 +1,3 @@ +pub mod ast_matching; + +pub use ast_matching::*; diff --git a/CommonDependencies/installation/Dockerfile b/CommonDependencies/installation/Dockerfile new file mode 100644 index 0000000..272bb15 --- /dev/null +++ b/CommonDependencies/installation/Dockerfile @@ -0,0 +1,39 @@ +# CommonDependencies/Dockerfile +# Shared base image for SketchDB services containing common dependencies and internal packages + +FROM python:3.10-slim AS sketchdb-base + +LABEL maintainer="SketchDB Team" +LABEL description="Shared base image with common dependencies for SketchDB services" + +WORKDIR /app + +# Install system dependencies needed across all services +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + git \ + && rm -rf /var/lib/apt/lists/* + +# Install promql_utilities (shared internal package) +COPY dependencies/py/promql_utilities /tmp/promql_utilities +RUN pip install --no-cache-dir /tmp/promql_utilities && rm -rf /tmp/promql_utilities + +# Install common Python dependencies used across multiple services +COPY installation/requirements.txt /tmp/requirements.txt +RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt + +# Create common directories +RUN mkdir -p /app/logs /app/config /app/outputs + +# Set Python path to include app directory +#ENV PYTHONPATH=/app${PYTHONPATH:+:${PYTHONPATH}} +ENV PYTHONPATH=/app + +# Default working directory for services +WORKDIR /app + +# Default user (can be overridden by services if needed) +# RUN useradd -m -s /bin/bash sketchdb +# USER sketchdb + +# Services will inherit from this base image diff --git a/CommonDependencies/installation/install.sh b/CommonDependencies/installation/install.sh new file mode 100755 index 0000000..304dfbe --- /dev/null +++ b/CommonDependencies/installation/install.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# Build script for SketchDB shared base image +# This script builds the base image that contains common dependencies + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +BASE_DIR="$(dirname "$SCRIPT_DIR")" + +# Image name and tag +IMAGE_NAME="sketchdb-base" +IMAGE_TAG="latest" +FULL_IMAGE_NAME="${IMAGE_NAME}:${IMAGE_TAG}" + +echo "Building SketchDB base image: $FULL_IMAGE_NAME" +echo "Build context: $BASE_DIR" + +# Build the base image +docker build \ + -t "$FULL_IMAGE_NAME" \ + -f "$SCRIPT_DIR/Dockerfile" \ + "$BASE_DIR" + +echo "Successfully built base image: $FULL_IMAGE_NAME" + +echo "Base image build complete!" +echo "Services can now use: FROM $FULL_IMAGE_NAME" diff --git a/CommonDependencies/installation/requirements.txt b/CommonDependencies/installation/requirements.txt new file mode 100644 index 0000000..ff61d97 --- /dev/null +++ b/CommonDependencies/installation/requirements.txt @@ -0,0 +1,7 @@ +loguru==0.7.3 +PyYAML==6.0.2 +confluent_kafka==2.3.0 +requests==2.32.3 +xxhash==3.5.0 +datasketches==5.1.1 +promql_parser==0.4.2 diff --git a/CommonDependencies/installation/setup_dependencies.sh b/CommonDependencies/installation/setup_dependencies.sh new file mode 100755 index 0000000..4f14ee6 --- /dev/null +++ b/CommonDependencies/installation/setup_dependencies.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") + +sudo apt-get install -y python3-pip +# TODO: change to virtualenv +pip3 install --user -r "${THIS_DIR}/requirements.txt" +( + cd "${THIS_DIR}/../dependencies/py/promql_utilities" || exit + pip3 install --user -e . +) diff --git a/CommonDependencies/tests/compare_matched_tokens/README.md b/CommonDependencies/tests/compare_matched_tokens/README.md new file mode 100644 index 0000000..f09b79b --- /dev/null +++ b/CommonDependencies/tests/compare_matched_tokens/README.md @@ -0,0 +1,183 @@ +# Cross-Language PromQL Pattern Testing Framework + +This framework provides comprehensive testing to compare the functionality between Python and Rust implementations of PromQL pattern matching utilities. + +## Directory Structure + +``` +tests/cross_language_comparison/ +├── test_data/ +│ └── promql_queries.json # Test cases and expected results +├── python_tests/ +│ ├── test_data.py # Python test data structures +│ ├── pattern_tests.py # Python pattern testing logic +│ └── test_runner.py # Python test runner +├── rust_tests/ +│ ├── src/ +│ │ ├── main.rs # Rust test runner entry point +│ │ ├── test_data.rs # Rust test data structures +│ │ └── pattern_tests.rs # Rust pattern testing logic +│ └── Cargo.toml # Rust project configuration +├── comparison_tests/ +│ └── result_comparator.py # Cross-language result comparison +├── utilities/ +│ └── master_test_runner.py # Orchestrates all tests +└── README.md # This file +``` + +## Quick Start + +### Prerequisites + +1. **Python**: Ensure Python 3.8+ is installed with access to the `promql_utilities` package +2. **Rust**: Ensure Rust 1.70+ is installed with Cargo +3. **Dependencies**: The promql_utilities packages for both Python and Rust must be available + +### Running All Tests + +```bash +# From the project root directory +cd tests/cross_language_comparison +python utilities/master_test_runner.py +``` + +This will: +1. Run Python pattern tests +2. Run Rust pattern tests +3. Compare results between both implementations +4. Generate comprehensive reports + +### Running Individual Test Suites + +#### Python Tests Only +```bash +cd tests/cross_language_comparison/python_tests +python test_runner.py ../test_data/promql_queries.json +``` + +#### Rust Tests Only +```bash +cd tests/cross_language_comparison/rust_tests +cargo run --release -- ../test_data/promql_queries.json +``` + +#### Comparison Only +```bash +cd tests/cross_language_comparison/comparison_tests +python result_comparator.py ../python_tests/python_test_results.json ../rust_tests/rust_test_results.json +``` + +## Test Data Format + +The test data is defined in `test_data/promql_queries.json`: + +```json +{ + "test_cases": [ + { + "id": "unique_test_id", + "description": "Human readable description", + "query": "actual_promql_query", + "expected_pattern_type": "ONLY_TEMPORAL|ONLY_SPATIAL|ONE_TEMPORAL_ONE_SPATIAL", + "expected_tokens": { + "metric": {"name": "...", "labels": {...}}, + "function": {"name": "..."}, + "aggregation": {"op": "..."} + } + } + ], + "pattern_builder_tests": [ + // Tests for PromQLPatternBuilder functionality + ] +} +``` + +## Adding New Test Cases + +1. **Add test case to JSON**: Edit `test_data/promql_queries.json` to include new queries +2. **Update patterns if needed**: Modify pattern definitions in both Python and Rust implementations +3. **Run tests**: Execute the master test runner to validate new cases + +### Example Test Case + +```json +{ + "id": "custom_aggregation", + "description": "Custom aggregation test", + "query": "avg(cpu_usage{instance=\"server1\"})", + "expected_pattern_type": "ONLY_SPATIAL", + "expected_tokens": { + "metric": { + "name": "cpu_usage", + "labels": {"instance": "server1"}, + "at_modifier": null + }, + "aggregation": { + "op": "avg", + "modifier": null + } + } +} +``` + +## Output Files + +After running tests, several output files are generated: + +- `python_tests/python_test_results.json` - Python test results +- `rust_tests/rust_test_results.json` - Rust test results +- `comparison_tests/comparison_report.json` - Detailed comparison report +- `test_summary.json` - High-level test execution summary + +## Understanding Results + +### Success Metrics +- **Both Passed**: Both implementations correctly handled the test case +- **Pattern Type Match**: Both implementations identified the same pattern type +- **Token Similarity**: Measure of how similar the extracted tokens are (0.0-1.0) + +### Common Issues +- **Pattern Type Mismatch**: Implementations categorize queries differently +- **Token Extraction Differences**: Different token data extracted from the same query +- **Success Rate Differences**: One implementation handles a query that the other doesn't + +### Performance Comparison +The framework also compares execution times between implementations to identify performance characteristics. + +## Extending the Framework + +### Adding New Pattern Types +1. Update both Python and Rust `QueryPatternType` enums +2. Add corresponding patterns to both test implementations +3. Update test data with examples of the new pattern type + +### Adding New Token Types +1. Define token structures in both `test_data.py` and `test_data.rs` +2. Update token extraction logic in both pattern testers +3. Update comparison logic in `result_comparator.py` + +## Troubleshooting + +### Common Issues + +**"Module not found" errors**: Ensure the promql_utilities packages are properly installed and accessible + +**Rust build failures**: Check that all Rust dependencies are available and versions are compatible + +**Path issues**: Run commands from the correct directories as shown in the examples + +**Missing test files**: Ensure all required files are present and have correct permissions + +### Debug Mode + +For more detailed output, you can run individual components with verbose logging or add debug prints to the test implementations. + +## Contributing + +When contributing new tests or improvements: + +1. Follow the existing code patterns +2. Add appropriate documentation +3. Test both happy path and edge cases +4. Ensure cross-platform compatibility +5. Update this README with any new features diff --git a/CommonDependencies/tests/compare_matched_tokens/ast_matching_comparison.md b/CommonDependencies/tests/compare_matched_tokens/ast_matching_comparison.md new file mode 100644 index 0000000..a144c7d --- /dev/null +++ b/CommonDependencies/tests/compare_matched_tokens/ast_matching_comparison.md @@ -0,0 +1,237 @@ +## AST Matching: Python vs Rust — detailed comparison + +Date: 2025-08-26 + +Purpose: dump a full, function-level and class-level comparison of the `ast_matching` modules in +Python (dependencies/py/promql_utilities/promql_utilities/ast_matching) and Rust +(dependencies/rs/promql_utilities/src/ast_matching). Each discrepancy or change is tagged as +"MUST HAVE" (correctness-related) or "GOOD TO HAVE" (portability/ergonomics/perf). + +--- + +Files compared +- Python + - PromQLPattern.py + - PromQLPatternBuilder.py +- Rust + - promql_pattern.rs + - promql_pattern_builder.rs + - promql_pattern_factory.rs + +Note: This file assumes the versions present in the repo as of the timestamp above. The Rust +`promql_pattern.rs` file already includes `SubqueryExpr` handling (line ranges present in the +attachment). + +### High-level summary +- Both sides implement: pattern builder -> pattern object -> matcher that walks a parsed PromQL AST + and optionally collects tokens. +- Major conceptual parity but concrete representation, naming, and normalization differences exist. + +--- + +## Class/struct level mapping + +- Python: `PromQLPattern` (class) + - Holds pattern dict, exposes `matches(node)` -> `MatchResult(matches: bool, tokens: Dict)`. + - Key internals: `_node_to_dict`, `_matches_recursive`. + +- Python: `PromQLPatternBuilder` (static-method-only dataclass) + - Produces Python-native pattern dicts (or `None` for `any()` wildcard). + +- Rust: `PromQLPattern` (struct) + - Holds `ast_pattern: HashMap`, typed token model, `expected_pattern_type`. + - Exposes `matches(&Expr)` -> `PromQLMatchResult` (typed tokens). + - Internals: `matches_recursive`, typed `match_*` helpers. + +- Rust: `PromQLPatternBuilder` (impl) + - Produces `HashMap` patterns. + +- Rust: typed token structs (`TokenData`, `MetricToken`, `FunctionToken`, ...). + +Discrepancy tag: class/struct correspondence — GOOD TO HAVE. It's fine for Rust to use typed tokens, but if cross-language token portability is desired, aligning JSON shapes is recommended. + +--- + +## Function-by-function comparison (Python -> Rust) + +Legend: MUST HAVE = correctness/security-related; GOOD TO HAVE = portability/ergonomics/perf. + +1) Builder: any() + - Python: `PromQLPatternBuilder.any()` returns `None`. Python matcher treats `pattern is None` as wildcard -> matches anything. + - Rust: `PromQLPatternBuilder::any()` returns an empty `HashMap` (i.e., `{}`). `matches_recursive` requires a `type` string and returns false if missing; an empty map does NOT act as wildcard. + - Discrepancy: semantics differ and lead to non-matching behavior in Rust when user expects wildcard. + - Tag: MUST HAVE (pattern wildcard semantics affect correctness of many patterns). + - Suggested fixes (MUST HAVE): make Rust `matches_recursive` treat empty pattern as wildcard (e.g., `if pattern.is_empty() { return true; }`) or change `any()` to return a sentinel `Value::Null` and handle it. + +2) Builder: binary_op / BinaryExpr naming + - Python builder returns `type: "BinaryOpExpr"` (PromQLPatternBuilder.binary_op). + - Python `_node_to_dict` for actual AST Binary returns `type: "BinaryExpr"`. + - Therefore patterns built by Python builder will not match binary AST nodes; token collection for binary ops (which checks "BinaryOpExpr") will also never trigger. + - Rust builder and matcher consistently use `"BinaryExpr"`. + - Discrepancy: naming typo/inconsistency in Python. + - Tag: MUST HAVE (causes incorrect matching of binary expressions). + - Suggested fix (MUST HAVE): change Python builder to produce `"BinaryExpr"` (or change `_node_to_dict` to produce `"BinaryOpExpr"`, but updating builder is minimal). + +3) Builder: function (`function` / `Call` / `func` field shape) + - Python builder sets `func` to `{"type":"Function","name": [ ... ]}` (dict with `name` list). + - Rust builder sets `func` to `Value::Array([ func_object ])` (an array containing the func object). Rust matcher expects this array-wrapped shape. + - Both matchers work with their own builders but cross-language serialized patterns will differ. + - Discrepancy: pattern JSON shape mismatch; porting patterns across languages will fail unless normalized. + - Tag: GOOD TO HAVE (affects portability, not correctness inside a single language runtime). + - Suggested fix (GOOD TO HAVE): normalize representations to a single shape (prefer object rather than array) or make matchers accept both shapes. + +4) Function args matching and collection + - Python `_matches_recursive` checks `args` as list; requires same length and recurses per-item; `_collect_args_as` stores `tokens[collect_args_as] = node_dict['args']` (raw arg ASTs) and `_collect_as` stores args raw. + - Rust `match_function_call` checks arg count and recurses. For `_collect_as` Rust stores args as `format!("{:?}", arg)` (stringified) and for `_collect_args_as` does the same. Earlier Rust code used placeholders for args in some versions; current code stringifies args (improvement). + - Discrepancy: token shape differs (Python raw AST vs Rust stringified args). + - Tag: GOOD TO HAVE (token shape matters for portability and downstream consumers). + +5) Aggregate / AggregateExpr + - Python builder stores `op` possibly as list or string (builder converts to list), `modifier` field as `by`/`without` stored under `modifier` key. + - Rust builder stores `op` as array, stores `by` and `without` separately in the pattern JSON. Rust `match_aggregation` checks membership and recurses into `expr`. Rust sets `param` to `agg.param.as_ref().map(|p| format!("{:?}", p))` while Python earlier stored `param` more directly. + - Discrepancy: minor shape/field naming differences for modifiers (`modifier` vs `by`/`without`) and param normalization. + - Tag: GOOD TO HAVE (affects portability; correctness preserved if each side consumes its own builder). + - Suggested fix (GOOD TO HAVE): agree on `by`/`without` keys or accept both forms in matchers. + +6) MatrixSelector / range vector + - Python `_node_to_dict` exposes `range` verbatim from parser; builder stores `range` string. + - Rust `match_matrix_selector` converts `ms.range` (std::time::Duration) to `chrono::Duration` in tokens and stores `offset` from `ms.vs.offset`. Rust token normalizes duration; Python currently leaves raw parser value. + - Discrepancy: duration representation difference and `offset` location naming. + - Tag: GOOD TO HAVE (normalization difference — important for portability but not strictly correctness inside runtime). + +7) NumberLiteral numeric comparison + - Python compares pattern value vs node value using equality (exact) in general code; there is no explicit epsilon handling unless the pattern_value is TokenType then handled specially. (Note: Python code uses TokenType branch for token comparisons; numeric equality uses Python's `==` semantics on floats.) + - Rust compares floats using `if (num.val - expected_f64).abs() > f64::EPSILON { return false; }` i.e., epsilon-based equivalence. + - Discrepancy: Python exact vs Rust EPSILON tolerance. + - Tag: MUST HAVE (numeric equality semantics can cause correctness surprises across languages). + - Suggested fix (MUST HAVE): pick one policy (recommended: epsilon compare) and apply to Python; or clearly document language-specific rule. + +8) SubqueryExpr + - Python: builder + `_node_to_dict` include `SubqueryExpr` support (range, step, offset) and `_matches_recursive` handles nested dicts for subquery patterns. + - Rust: the current `promql_pattern.rs` includes `match_subquery` and `SubqueryToken` — so Rust supports subquery matching now. + - Discrepancy: earlier there was a gap; currently parity exists in repo (good). + - Tag: GOOD TO HAVE (presence is correctness-related only if you rely on subquery patterns; treat as MUST HAVE if you need subquery correctness). For correctness: mark MUST HAVE if you plan to support subquery-based pattern matching; otherwise GOOD TO HAVE. + +9) AtModifier (`@` modifier) handling + - Python: stores `at` raw in `node_dict` and in tokens (no conversion) — flexible. + - Rust: converts `AtModifier::At(t)` to seconds since UNIX_EPOCH and panics on `AtModifier::Start` or `AtModifier::End` (explicit panics). That means Rust can panic on certain AST values. + - Discrepancy: Rust panics on `Start/End`, Python will simply put the value in token. + - Tag: MUST HAVE (panic on parser output is correctness/robustness issue). + - Suggested fix (MUST HAVE): make Rust handle `Start`/`End` gracefully (either encode them as sentinel strings or treat as non-matching rather than panic). Convert time to a normalized representation but don't panic. + +10) Pattern strictness & missing-type handling + - Python: `if pattern is None: return True` (wildcard) and when a key exists with value `None` the matcher treats that as wildcard for that field. Python is permissive. + - Rust: `matches_recursive` requires `pattern.get("type")` to be a `Value::String` and returns false otherwise. Nested checks require `Value::Object` for nested patterns. Rust is strict about pattern shape. + - Discrepancy: permissiveness vs strictness causes different failure modes and different ways of expressing wildcards in nested positions. + - Tag: MUST HAVE (expressing patterns consistently across languages is essential for correctness of pattern design). + - Suggested fix (MUST HAVE): either document the strict JSON contract for Rust builders or make Rust accept `Value::Null` or empty maps as wildcards; conversely, validate Python patterns to guarantee shape if you prefer Rust's strictness. + +11) Token shapes and type normalization + - Python tokens: lightweight dicts; include `ast` fields that carry parser nodes. Values are not normalized (e.g., `at` raw). + - Rust tokens: typed structs, normalized fields (`at_modifier: Option`, `RangeToken.range: chrono::Duration`) and some stringification via `format!("{:?}", ...)` for parameters/args when necessary. + - Discrepancy: serialization and field names differ; cross-language consumers will need mapping. + - Tag: GOOD TO HAVE (portability/contract-related). If consumers rely on specific token fields for correctness, escalate to MUST HAVE. + +12) Utility / Factory functions + - Rust includes `PromQLPatternFactory` with prebuilt patterns for OnlyTemporal / OnlySpatial patterns. + - Python lacks the same factory file (you can emulate using `PromQLPatternBuilder`). + - Discrepancy: convenience API mismatch. + - Tag: GOOD TO HAVE. + +--- + +## Per-function diffs (concise) — where to look + +- PromQLPattern.__init__ (py) vs PromQLPattern::new (rs) + - Both store the pattern. Python stores pattern as an arbitrary dict possibly `None`; Rust requires `HashMap` and an explicit `expected_pattern_type`. + - Tag: GOOD TO HAVE. + +- PromQLPattern.matches(node) (py) vs PromQLPattern::matches(&Expr) (rs) + - Both call recursive matching and return a pair of (matches, tokens). Python returns `MatchResult(matches, tokens)` where tokens are a plain dict; Rust returns typed `PromQLMatchResult`. + - Tag: GOOD TO HAVE. + +- _node_to_dict (py) vs explicit typed match arms (rs) + - Python converts parser nodes to dict forms used by recursive matcher. + - Rust uses pattern_type & node enum and calls typed `match_*` helpers directly. Rust does not use a transient dict representation. + - Tag: GOOD TO HAVE (architectural difference; both valid). + +- _matches_recursive (py) vs matches_recursive (rs) + - Python: flexible dict-driven matching with list/dict/TokenType handlers and `_collect_as` logic. + - Rust: strict: pattern must include `type` string; then match arms call typed helpers. + - Key correctness mismatch: Python supports `pattern is None` wildcard; Rust requires `type` key. + - Tag: MUST HAVE for wildcard semantics. + +- match_metric_selector (rs) vs VectorSelector handling in Python + - Both check `name` membership; both can collect labels. Rust extracts equality-match labels only (`MatchOp::Equal`) and builds typed `MetricToken` with `at_modifier` normalized to seconds or panics on Start/End. + - Python exposes `labels` as `matchers` and leaves `at` raw. + - Tag: MUST HAVE for panic behavior on `@` variants; GOOD TO HAVE for normalization parity. + +- match_function_call (rs) vs Call handling in Python + - Similar high-level behavior (name membership, arg count, recursive matching). Differences in tokenization and `func` pattern shape. + - Tag: GOOD TO HAVE. + +- match_aggregation (rs) vs AggregateExpr handling in Python + - Both check `op` membership and recurse into `expr`. Rust builds typed `AggregationToken` and stringifies `param`; Python stores param in token dict. + - Tag: GOOD TO HAVE. + +- match_matrix_selector (rs) vs MatrixSelector handling in Python + - Both support vector_selector nested matching and token collection. Rust normalizes durations into chrono::Duration and extracts `offset`; Python leaves raw range and step/offset fields in node dict. + - Tag: GOOD TO HAVE. + +- match_binary_operation (rs) vs BinaryExpr handling in Python + - Rust expects pattern type `BinaryExpr` and checks `op`, left/right recursion, collects token. + - Python builder mismatch (BinaryOpExpr vs BinaryExpr) is a MUST HAVE fix. + +- match_number_literal (rs) vs NumberLiteral handling in Python + - Rust uses epsilon comparison; Python uses direct equality (unless pattern is None). Make numeric equality policy consistent (MUST HAVE). + +- match_subquery (rs) vs Subquery handling in Python + - Current repo: Rust includes `match_subquery` and `SubqueryToken` (parity achieved). If you rely on subquery correctness, tests must validate behavior. + - Tag: GOOD TO HAVE / MUST HAVE depending on usage. + +--- + +## Concrete list of discrepancies & tags (compact) + +1. any() wildcard semantics — MUST HAVE +2. Python binary builder `type` naming (`BinaryOpExpr` vs `BinaryExpr`) — MUST HAVE +3. Numeric equality epsilon (Py exact vs Rust eps) — MUST HAVE +4. Rust panics on `AtModifier::Start` / `End` — MUST HAVE +5. `func` shape (object vs array-wrapped object) — GOOD TO HAVE +6. Token shapes and normalization (raw AST vs typed/normalized representation) — GOOD TO HAVE +7. Aggregation modifier naming (`modifier` vs `by`/`without`) — GOOD TO HAVE +8. Matrix range and offset normalization differences — GOOD TO HAVE +9. Subquery support parity (now present in Rust) — GOOD TO HAVE (escalate to MUST HAVE if subqueries are required) +10. Presence of `PromQLPatternFactory` in Rust but not Python — GOOD TO HAVE + +--- + +## Minimal recommended fixes (priority order) +1. Fix Python builder `binary_op` to set `type: "BinaryExpr"` (MUST HAVE) +2. Make Rust `matches_recursive` treat empty `pattern` (or `Value::Null`) as wildcard, or change `PromQLPatternBuilder::any()` to return `Value::Null` and recognize it (MUST HAVE) +3. Unify numeric equality policy (use epsilon both sides) (MUST HAVE) +4. Prevent Rust panics on `AtModifier::Start`/`End`: encode them as sentinel strings (e.g., "start"/"end") or treat as non-match (MUST HAVE) +5. Add optional tolerant parsing for `func` pattern shapes (accept both array-wrapped and object forms) (GOOD TO HAVE) +6. Add small JSON-token serializer in Python matching Rust token schema, or vice versa, for portability (GOOD TO HAVE) + +--- + +## Must-have tests to add (short list) +- `test_any_wildcard_matches_any_node` (Py + Rust) +- `test_binary_expr_matching` (detect Python builder bug) +- `test_numeric_equality_policy` (float epsilon consistency) +- `test_at_modifier_no_panic` (Rust must not panic for `Start`/`End`) +- `test_token_contracts` (verify presence and basic types of token fields) + +## Good-to-have tests +- cross-language serialized pattern roundtrip tests +- token schema parity tests (JSON serialize Rust tokens, compare to Python tokens) +- factory pattern equivalence (Rust `PromQLPatternFactory` vs composed Python builder) + +--- + +If you'd like, I can now: +- apply the MUST HAVE code fixes (small, targeted edits) and run the unit tests; or +- add the MUST HAVE tests first to surface current failures. + +Tell me which action to run next and I'll edit files + run tests. diff --git a/CommonDependencies/tests/compare_matched_tokens/comparison_tests/result_comparator.py b/CommonDependencies/tests/compare_matched_tokens/comparison_tests/result_comparator.py new file mode 100755 index 0000000..cc6f189 --- /dev/null +++ b/CommonDependencies/tests/compare_matched_tokens/comparison_tests/result_comparator.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python3 + +import json +import sys +from typing import Dict, List, Any, Optional, Tuple +from dataclasses import dataclass +from datetime import datetime + +@dataclass +class ComparisonResult: + test_id: str + python_success: bool + rust_success: bool + both_passed: bool + pattern_type_match: bool + token_similarity: float + execution_time_diff_ms: float + issues: List[str] + +@dataclass +class ComparisonSummary: + total_tests: int + both_passed: int + python_only_passed: int + rust_only_passed: int + both_failed: int + pattern_type_matches: int + avg_token_similarity: float + avg_execution_time_python: float + avg_execution_time_rust: float + results: List[ComparisonResult] + +class ResultComparator: + def __init__(self): + pass + + def compare_results(self, python_results_file: str, rust_results_file: str) -> ComparisonSummary: + """Compare Python and Rust test results""" + + with open(python_results_file, 'r') as f: + python_data = json.load(f) + + with open(rust_results_file, 'r') as f: + rust_data = json.load(f) + + # Create lookup maps + python_results = {r['test_id']: r for r in python_data['results']} + rust_results = {r['test_id']: r for r in rust_data['results']} + + comparison_results = [] + both_passed = 0 + python_only_passed = 0 + rust_only_passed = 0 + both_failed = 0 + pattern_type_matches = 0 + total_token_similarity = 0.0 + total_python_time = 0.0 + total_rust_time = 0.0 + + all_test_ids = set(python_results.keys()) | set(rust_results.keys()) + + for test_id in all_test_ids: + python_result = python_results.get(test_id) + rust_result = rust_results.get(test_id) + + if not python_result: + print(f"Warning: Test {test_id} missing from Python results") + continue + if not rust_result: + print(f"Warning: Test {test_id} missing from Rust results") + continue + + python_success = python_result['success'] + rust_success = rust_result['success'] + + # Count success patterns + if python_success and rust_success: + both_passed += 1 + elif python_success and not rust_success: + python_only_passed += 1 + elif not python_success and rust_success: + rust_only_passed += 1 + else: + both_failed += 1 + + # Check pattern type match + pattern_type_match = ( + python_result.get('actual_pattern_type') == + rust_result.get('actual_pattern_type') + ) + if pattern_type_match: + pattern_type_matches += 1 + + # Calculate token similarity + token_similarity = self._calculate_token_similarity( + python_result.get('actual_tokens', {}), + rust_result.get('actual_tokens', {}) + ) + total_token_similarity += token_similarity + + # Calculate execution time difference + python_time = python_result.get('execution_time_ms', 0.0) + rust_time = rust_result.get('execution_time_ms', 0.0) + total_python_time += python_time + total_rust_time += rust_time + execution_time_diff = abs(python_time - rust_time) + + # Identify issues + issues = [] + if not pattern_type_match: + issues.append(f"Pattern type mismatch: Python={python_result.get('actual_pattern_type')}, Rust={rust_result.get('actual_pattern_type')}") + if token_similarity < 0.8: + issues.append(f"Low token similarity: {token_similarity:.2f}") + if python_success != rust_success: + issues.append(f"Success mismatch: Python={python_success}, Rust={rust_success}") + if execution_time_diff > 100: # More than 100ms difference + issues.append(f"Large execution time difference: {execution_time_diff:.2f}ms") + + comparison_result = ComparisonResult( + test_id=test_id, + python_success=python_success, + rust_success=rust_success, + both_passed=python_success and rust_success, + pattern_type_match=pattern_type_match, + token_similarity=token_similarity, + execution_time_diff_ms=execution_time_diff, + issues=issues + ) + comparison_results.append(comparison_result) + + total_tests = len(comparison_results) + avg_token_similarity = total_token_similarity / max(total_tests, 1) + avg_python_time = total_python_time / max(total_tests, 1) + avg_rust_time = total_rust_time / max(total_tests, 1) + + return ComparisonSummary( + total_tests=total_tests, + both_passed=both_passed, + python_only_passed=python_only_passed, + rust_only_passed=rust_only_passed, + both_failed=both_failed, + pattern_type_matches=pattern_type_matches, + avg_token_similarity=avg_token_similarity, + avg_execution_time_python=avg_python_time, + avg_execution_time_rust=avg_rust_time, + results=comparison_results + ) + + def _calculate_token_similarity(self, python_tokens: Dict[str, Any], rust_tokens: Dict[str, Any]) -> float: + """Calculate similarity between token dictionaries (0.0 to 1.0)""" + if not python_tokens and not rust_tokens: + return 1.0 + if not python_tokens or not rust_tokens: + return 0.0 + + # Compare keys + python_keys = set(python_tokens.keys()) + rust_keys = set(rust_tokens.keys()) + common_keys = python_keys & rust_keys + total_keys = python_keys | rust_keys + + if not total_keys: + return 1.0 + + key_similarity = len(common_keys) / len(total_keys) + + # Compare values for common keys + value_matches = 0 + for key in common_keys: + if self._tokens_match(python_tokens[key], rust_tokens[key]): + value_matches += 1 + + value_similarity = value_matches / max(len(common_keys), 1) + + # Weight: 50% key similarity, 50% value similarity + return (key_similarity + value_similarity) / 2 + + def _tokens_match(self, python_token: Any, rust_token: Any) -> bool: + """Check if individual tokens match""" + # Handle different token representations + if isinstance(python_token, dict) and isinstance(rust_token, dict): + # Compare key token fields + if 'name' in python_token and 'name' in rust_token: + return python_token['name'] == rust_token['name'] + if 'op' in python_token and 'op' in rust_token: + return python_token['op'] == rust_token['op'] + if 'range' in python_token and 'range' in rust_token: + return python_token['range'] == rust_token['range'] + + # Fallback to direct comparison + return python_token == rust_token + + def generate_report(self, summary: ComparisonSummary, output_file: str): + """Generate a detailed comparison report""" + report = { + 'timestamp': datetime.utcnow().isoformat(), + 'summary': { + 'total_tests': summary.total_tests, + 'both_passed': summary.both_passed, + 'python_only_passed': summary.python_only_passed, + 'rust_only_passed': summary.rust_only_passed, + 'both_failed': summary.both_failed, + 'pattern_type_matches': summary.pattern_type_matches, + 'pattern_type_match_rate': summary.pattern_type_matches / max(summary.total_tests, 1), + 'avg_token_similarity': summary.avg_token_similarity, + 'avg_execution_time_python_ms': summary.avg_execution_time_python, + 'avg_execution_time_rust_ms': summary.avg_execution_time_rust, + 'performance_ratio': summary.avg_execution_time_rust / max(summary.avg_execution_time_python, 0.001) + }, + 'detailed_results': [ + { + 'test_id': r.test_id, + 'python_success': r.python_success, + 'rust_success': r.rust_success, + 'both_passed': r.both_passed, + 'pattern_type_match': r.pattern_type_match, + 'token_similarity': r.token_similarity, + 'execution_time_diff_ms': r.execution_time_diff_ms, + 'issues': r.issues + } + for r in summary.results + ] + } + + with open(output_file, 'w') as f: + json.dump(report, f, indent=2) + +def main(): + if len(sys.argv) < 3: + print("Usage: python result_comparator.py ") + sys.exit(1) + + python_file = sys.argv[1] + rust_file = sys.argv[2] + + comparator = ResultComparator() + + print("Comparing Python and Rust test results...") + print("==========================================") + + try: + summary = comparator.compare_results(python_file, rust_file) + + print(f"\nComparison Summary:") + print(f"Total tests: {summary.total_tests}") + print(f"Both passed: {summary.both_passed}") + print(f"Python only passed: {summary.python_only_passed}") + print(f"Rust only passed: {summary.rust_only_passed}") + print(f"Both failed: {summary.both_failed}") + print(f"Pattern type matches: {summary.pattern_type_matches}/{summary.total_tests} ({summary.pattern_type_matches/max(summary.total_tests,1)*100:.1f}%)") + print(f"Average token similarity: {summary.avg_token_similarity:.2f}") + print(f"Avg execution time - Python: {summary.avg_execution_time_python:.2f}ms") + print(f"Avg execution time - Rust: {summary.avg_execution_time_rust:.2f}ms") + + # Show tests with issues + issues_found = [r for r in summary.results if r.issues] + if issues_found: + print(f"\nTests with issues ({len(issues_found)}):") + for result in issues_found: + print(f" {result.test_id}: {', '.join(result.issues)}") + + # Generate detailed report + output_file = "comparison_report.json" + comparator.generate_report(summary, output_file) + print(f"\nDetailed report written to: {output_file}") + + except Exception as e: + print(f"Error during comparison: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/CommonDependencies/tests/compare_matched_tokens/python_tests/pattern_tests.py b/CommonDependencies/tests/compare_matched_tokens/python_tests/pattern_tests.py new file mode 100644 index 0000000..3900a9f --- /dev/null +++ b/CommonDependencies/tests/compare_matched_tokens/python_tests/pattern_tests.py @@ -0,0 +1,213 @@ +import sys +import os +import time +import promql_parser +from typing import Any, Dict, List, Optional, Tuple + +# Add the dependencies to the path +sys.path.append( + os.path.join( + os.path.dirname(__file__), + "../../../CommonDependencies/dependencies/py/promql_utilities", + ) +) + +from promql_utilities.ast_matching.PromQLPattern import PromQLPattern, MatchResult +from promql_utilities.ast_matching.PromQLPatternBuilder import PromQLPatternBuilder +# Using string keys for pattern categories instead of QueryPatternType enum + +from test_data import TestCase, TestResult + + +class PatternTester: + def __init__(self): + self.patterns = self._build_patterns() + + def _build_patterns(self) -> Dict[str, List[PromQLPattern]]: + patterns = {} + + # ONLY_TEMPORAL patterns + temporal_patterns = [ + # Rate/increase pattern + PromQLPattern( + PromQLPatternBuilder.function( + ["rate", "increase"], + PromQLPatternBuilder.matrix_selector( + PromQLPatternBuilder.metric(collect_as="metric"), + collect_as="range_vector", + ), + collect_as="function", + ) + ), + # Quantile over time pattern + PromQLPattern( + PromQLPatternBuilder.function( + "quantile_over_time", + PromQLPatternBuilder.number(), + PromQLPatternBuilder.matrix_selector( + PromQLPatternBuilder.metric(collect_as="metric"), + collect_as="range_vector", + ), + collect_as="function", + collect_args_as="function_args", + ) + ), + # Other over_time functions + PromQLPattern( + PromQLPatternBuilder.function( + [ + "sum_over_time", + "count_over_time", + "avg_over_time", + "min_over_time", + "max_over_time", + ], + PromQLPatternBuilder.matrix_selector( + PromQLPatternBuilder.metric(collect_as="metric"), + collect_as="range_vector", + ), + collect_as="function", + ) + ), + ] + + # ONLY_SPATIAL patterns + spatial_patterns = [ + # Aggregation pattern + PromQLPattern( + PromQLPatternBuilder.aggregation( + ["sum", "count", "avg", "quantile", "min", "max"], + PromQLPatternBuilder.metric(collect_as="metric"), + collect_as="aggregation", + ) + ), + # Simple metric pattern (for standalone metrics) + PromQLPattern(PromQLPatternBuilder.metric(collect_as="metric")), + ] + + # ONE_TEMPORAL_ONE_SPATIAL patterns + combined_patterns = [ + # Aggregation of quantile_over_time + PromQLPattern( + PromQLPatternBuilder.aggregation( + ["sum", "count", "avg", "quantile", "min", "max"], + PromQLPatternBuilder.function( + "quantile_over_time", + PromQLPatternBuilder.number(), + PromQLPatternBuilder.matrix_selector( + PromQLPatternBuilder.metric(collect_as="metric"), + collect_as="range_vector", + ), + collect_as="function", + collect_args_as="function_args", + ), + collect_as="aggregation", + ) + ), + # Aggregation of other temporal functions + PromQLPattern( + PromQLPatternBuilder.aggregation( + ["sum", "count", "avg", "quantile", "min", "max"], + PromQLPatternBuilder.function( + [ + "sum_over_time", + "count_over_time", + "avg_over_time", + "min_over_time", + "max_over_time", + "rate", + "increase", + ], + PromQLPatternBuilder.matrix_selector( + PromQLPatternBuilder.metric(collect_as="metric"), + collect_as="range_vector", + ), + collect_as="function", + ), + collect_as="aggregation", + ) + ), + ] + + # ONLY_VECTOR mirrors ONLY_SPATIAL but represents plain instant vector selectors + patterns["ONLY_TEMPORAL"] = temporal_patterns + patterns["ONLY_SPATIAL"] = spatial_patterns + patterns["ONLY_VECTOR"] = spatial_patterns + patterns["ONE_TEMPORAL_ONE_SPATIAL"] = combined_patterns + + return patterns + + def test_query(self, test_case: TestCase) -> TestResult: + start_time = time.time() + test_id = test_case.id + + try: + # Parse the query + ast = promql_parser.parse(test_case.query) + except Exception as e: + return TestResult( + test_id=test_id, + success=False, + error_message=f"Failed to parse query: {str(e)}", + execution_time_ms=(time.time() - start_time) * 1000, + ) + + # Try to match against all patterns + matched_pattern_type = None + matched_tokens = None + matched_raw = None + + for pattern_type, pattern_list in self.patterns.items(): + for pattern in pattern_list: + match_result: MatchResult = pattern.matches(ast) + if match_result.matches: + matched_raw = (pattern_type, match_result) + break + if matched_raw: + break + + if matched_raw: + pattern_type, match_result = matched_raw + # If a plain vector selector matched under the spatial patterns, classify as ONLY_VECTOR + if pattern_type == "ONLY_SPATIAL": + if "metric" in match_result.tokens and "aggregation" not in match_result.tokens: + matched_pattern_type = "ONLY_VECTOR" + else: + matched_pattern_type = "ONLY_SPATIAL" + else: + matched_pattern_type = self._pattern_type_to_string(pattern_type) + + matched_tokens = self._serialize_tokens(match_result.tokens) + + execution_time = (time.time() - start_time) * 1000 + + # Check if results match expectations + expected_type = test_case.expected_pattern_type + success = matched_pattern_type == expected_type + + return TestResult( + test_id=test_id, + success=success, + error_message=( + None + if success + else f"Pattern type mismatch. Expected: {expected_type}, Got: {matched_pattern_type}" + ), + actual_pattern_type=matched_pattern_type, + actual_tokens=matched_tokens, + execution_time_ms=execution_time, + ) + + def _pattern_type_to_string(self, pattern_type: Any) -> str: + # pattern_type is already a string in this decoupled design + return pattern_type if isinstance(pattern_type, str) else str(pattern_type) + + def _serialize_tokens(self, tokens: Dict) -> Dict: + """Convert tokens to JSON-serializable format""" + serialized = {} + for key, value in tokens.items(): + if hasattr(value, "__dict__"): + serialized[key] = value.__dict__ + else: + serialized[key] = value + return serialized diff --git a/CommonDependencies/tests/compare_matched_tokens/python_tests/test_data.py b/CommonDependencies/tests/compare_matched_tokens/python_tests/test_data.py new file mode 100644 index 0000000..fdbd2cd --- /dev/null +++ b/CommonDependencies/tests/compare_matched_tokens/python_tests/test_data.py @@ -0,0 +1,112 @@ +import json +from dataclasses import dataclass +from typing import Dict, List, Any, Optional +from datetime import datetime + +@dataclass +class MetricToken: + name: str + labels: Dict[str, str] + at_modifier: Optional[str] + +@dataclass +class FunctionToken: + name: str + +@dataclass +class AggregationToken: + op: str + modifier: Optional[str] + +@dataclass +class RangeToken: + range: str + +@dataclass +class TestCase: + id: str + description: str + query: str + expected_pattern_type: str + expected_tokens: Dict[str, Any] + +@dataclass +class PatternBuilderTest: + id: str + description: str + builder_call: str + parameters: Dict[str, Any] + expected_pattern: Dict[str, Any] + +@dataclass +class TestResult: + test_id: str + success: bool + error_message: Optional[str] = None + actual_pattern_type: Optional[str] = None + actual_tokens: Optional[Dict[str, Any]] = None + execution_time_ms: float = 0.0 + +@dataclass +class TestSuiteResult: + language: str + timestamp: str + total_tests: int + passed_tests: int + failed_tests: int + results: List[TestResult] + +class TestData: + def __init__(self, test_cases: List[TestCase], pattern_builder_tests: List[PatternBuilderTest]): + self.test_cases = test_cases + self.pattern_builder_tests = pattern_builder_tests + + @classmethod + def load_from_file(cls, file_path: str) -> 'TestData': + with open(file_path, 'r') as f: + data = json.load(f) + + test_cases = [ + TestCase( + id=case['id'], + description=case['description'], + query=case['query'], + expected_pattern_type=case['expected_pattern_type'], + expected_tokens=case['expected_tokens'] + ) + for case in data['test_cases'] + ] + + pattern_builder_tests = [ + PatternBuilderTest( + id=test['id'], + description=test['description'], + builder_call=test['builder_call'], + parameters=test['parameters'], + expected_pattern=test['expected_pattern'] + ) + for test in data['pattern_builder_tests'] + ] + + return cls(test_cases, pattern_builder_tests) + + def save_results(self, results: List[TestResult], output_file: str): + passed = sum(1 for r in results if r.success) + total = len(results) + + suite_result = TestSuiteResult( + language="python", + timestamp=datetime.utcnow().isoformat(), + total_tests=total, + passed_tests=passed, + failed_tests=total - passed, + results=results + ) + + with open(output_file, 'w') as f: + json.dump(suite_result.__dict__, f, indent=2, default=self._serialize_result) + + def _serialize_result(self, obj): + if hasattr(obj, '__dict__'): + return obj.__dict__ + return str(obj) diff --git a/CommonDependencies/tests/compare_matched_tokens/python_tests/test_runner.py b/CommonDependencies/tests/compare_matched_tokens/python_tests/test_runner.py new file mode 100755 index 0000000..240a3be --- /dev/null +++ b/CommonDependencies/tests/compare_matched_tokens/python_tests/test_runner.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 + +import sys +import os +from test_data import TestData +from pattern_tests import PatternTester + +def main(): + if len(sys.argv) < 2: + print("Usage: python test_runner.py ") + sys.exit(1) + + test_data_file = sys.argv[1] + + try: + test_data = TestData.load_from_file(test_data_file) + except Exception as e: + print(f"Failed to load test data: {e}") + sys.exit(1) + + tester = PatternTester() + results = [] + + print("Running Python PromQL Pattern Tests...") + print("======================================") + + for test_case in test_data.test_cases: + print(f"Running test: {test_case.id} - {test_case.description}") + result = tester.test_query(test_case) + + if result.success: + print(f"✅ PASSED ({result.execution_time_ms:.2f}ms)") + else: + print(f"❌ FAILED ({result.execution_time_ms:.2f}ms): {result.error_message}") + + results.append(result) + + passed = sum(1 for r in results if r.success) + total = len(results) + + print(f"\nTest Summary:") + print(f"Total: {total}, Passed: {passed}, Failed: {total - passed}") + + # Save results + output_file = "python_test_results.json" + test_data.save_results(results, output_file) + print(f"Results written to: {output_file}") + +if __name__ == "__main__": + main() diff --git a/CommonDependencies/tests/compare_matched_tokens/rust_tests/Cargo.lock b/CommonDependencies/tests/compare_matched_tokens/rust_tests/Cargo.lock new file mode 100644 index 0000000..9116731 --- /dev/null +++ b/CommonDependencies/tests/compare_matched_tokens/rust_tests/Cargo.lock @@ -0,0 +1,1187 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anyhow" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "backtrace" +version = "0.3.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets 0.52.6", +] + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "2.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d" + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "bytes" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" + +[[package]] +name = "cactus" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbc26382d871df4b7442e3df10a9402bf3cf5e55cbd66f12be38861425f0564" + +[[package]] +name = "cc" +version = "1.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" + +[[package]] +name = "cfgrammar" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf" +dependencies = [ + "indexmap", + "lazy_static", + "num-traits", + "regex", + "serde", + "vob", +] + +[[package]] +name = "chrono" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "deranged" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "filetime" +version = "0.2.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed" +dependencies = [ + "cfg-if", + "libc", + "libredox", + "windows-sys 0.60.2", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "getopts" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cba6ae63eb948698e300f645f87c70f76630d505f23b8907cf1e193ee85048c1" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" + +[[package]] +name = "iana-time-zone" +version = "0.1.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "indexmap" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "io-uring" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.175" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" + +[[package]] +name = "libredox" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3" +dependencies = [ + "bitflags", + "libc", + "redox_syscall", +] + +[[package]] +name = "lock_api" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "lrlex" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c71364e868116ee891b0f93559eb9eca5675bec28b22d33c58481e66c3951d7e" +dependencies = [ + "cfgrammar", + "getopts", + "lazy_static", + "lrpar", + "num-traits", + "quote", + "regex", + "regex-syntax", + "serde", + "vergen", +] + +[[package]] +name = "lrpar" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b265a81193d94c92d1c9c715498d6fa505bce3f789ceecb24ab5d6fa2dbc71" +dependencies = [ + "bincode", + "cactus", + "cfgrammar", + "filetime", + "indexmap", + "lazy_static", + "lrtable", + "num-traits", + "packedvec", + "regex", + "serde", + "static_assertions", + "vergen", + "vob", +] + +[[package]] +name = "lrtable" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc36d15214ca997a5097845be1f932b7ee6125c36f5c5e55f6c49e027ddeb6de" +dependencies = [ + "cfgrammar", + "fnv", + "num-traits", + "serde", + "sparsevec", + "vob", +] + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.59.0", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "packedvec" +version = "1.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a69e0a534dd2e6aefce319af62a0aa0066a76bdfcec0201dfe02df226bc9ec70" +dependencies = [ + "num-traits", + "serde", +] + +[[package]] +name = "parking_lot" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "proc-macro2" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "promql-parser" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60d851f6523a8215e2fbf86b6cef4548433f8b76092e9ffb607105de52ae63fd" +dependencies = [ + "cfgrammar", + "chrono", + "lazy_static", + "lrlex", + "lrpar", + "regex", +] + +[[package]] +name = "promql_cross_lang_tests" +version = "0.1.0" +dependencies = [ + "chrono", + "promql-parser", + "promql_utilities", + "serde", + "serde_json", + "tokio", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "promql_utilities" +version = "0.1.0" +dependencies = [ + "chrono", + "promql-parser", + "serde", + "serde_json", + "thiserror", + "tracing", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.5.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" + +[[package]] +name = "rustc-demangle" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.143" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" +dependencies = [ + "libc", +] + +[[package]] +name = "slab" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + +[[package]] +name = "sparsevec" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68b4a8ce3045f0fe173fb5ae3c6b7dcfbec02bfa650bb8618b2301f52af0134d" +dependencies = [ + "num-traits", + "packedvec", + "serde", + "vob", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "2.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "time" +version = "0.3.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" +dependencies = [ + "deranged", + "itoa", + "libc", + "num-conv", + "num_threads", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" + +[[package]] +name = "time-macros" +version = "0.2.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tokio" +version = "1.47.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +dependencies = [ + "backtrace", + "bytes", + "io-uring", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "slab", + "socket2", + "tokio-macros", + "windows-sys 0.59.0", +] + +[[package]] +name = "tokio-macros" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "unicode-width" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "vergen" +version = "8.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2990d9ea5967266ea0ccf413a4aa5c42a93dbcfda9cb49a97de6931726b12566" +dependencies = [ + "anyhow", + "rustversion", + "time", +] + +[[package]] +name = "vob" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc936b5a7202a703aeaf7ce05e7931db2e0c8126813f97db3e9e06d867b0bb38" +dependencies = [ + "num-traits", + "serde", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasm-bindgen" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "windows-core" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-result" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.3", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" diff --git a/CommonDependencies/tests/compare_matched_tokens/rust_tests/Cargo.toml b/CommonDependencies/tests/compare_matched_tokens/rust_tests/Cargo.toml new file mode 100644 index 0000000..b472386 --- /dev/null +++ b/CommonDependencies/tests/compare_matched_tokens/rust_tests/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "promql_cross_lang_tests" +version = "0.1.0" +edition = "2021" + +[[bin]] +name = "test_runner" +path = "src/main.rs" + +[dependencies] +promql-parser = "0.5.0" +serde_json = "1.0" +serde = { version = "1.0", features = ["derive"] } +tokio = { version = "1.0", features = ["full"] } +chrono = "0.4.41" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } + +[dependencies.promql_utilities] +path = "../../../../CommonDependencies/dependencies/rs/promql_utilities" diff --git a/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/main.rs b/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/main.rs new file mode 100644 index 0000000..fe20d3c --- /dev/null +++ b/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/main.rs @@ -0,0 +1,69 @@ +mod test_data; +mod pattern_tests; + +use pattern_tests::PatternTester; +use test_data::*; +use std::env; +use tracing_subscriber::filter::LevelFilter; + +fn main() -> Result<(), Box> { + // Initialize tracing with debug level + tracing_subscriber::fmt() + .with_max_level(LevelFilter::DEBUG) + .init(); + + let args: Vec = env::args().collect(); + if args.len() < 2 { + eprintln!("Usage: {} ", args[0]); + std::process::exit(1); + } + + let test_data_file = &args[1]; + let test_data = TestData::load_from_file(test_data_file)?; + + let tester = PatternTester::new(); + let mut results = Vec::new(); + + println!("Running Rust PromQL Pattern Tests..."); + println!("====================================="); + + for test_case in &test_data.test_cases { + println!("Running test: {} - {}", test_case.id, test_case.description); + let result = tester.test_query(test_case); + + if result.success { + println!("✅ PASSED ({}ms)", result.execution_time_ms); + } else { + println!("❌ FAILED ({}ms): {}", + result.execution_time_ms, + result.error_message.as_deref().unwrap_or("Unknown error")); + } + + results.push(result); + } + + let passed = results.iter().filter(|r| r.success).count(); + let total = results.len(); + + println!("\nTest Summary:"); + println!("Total: {}, Passed: {}, Failed: {}", total, passed, total - passed); + + // Create test suite result + let suite_result = TestSuiteResult { + language: "rust".to_string(), + timestamp: chrono::Utc::now().to_rfc3339(), + total_tests: total, + passed_tests: passed, + failed_tests: total - passed, + results, + }; + + // Write results to file + let output_file = "rust_test_results.json"; + let json_output = serde_json::to_string_pretty(&suite_result)?; + std::fs::write(output_file, json_output)?; + + println!("Results written to: {}", output_file); + + Ok(()) +} diff --git a/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/pattern_tests.rs b/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/pattern_tests.rs new file mode 100644 index 0000000..fcc210f --- /dev/null +++ b/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/pattern_tests.rs @@ -0,0 +1,385 @@ +use crate::test_data::*; +use promql_parser::parser as promql; +use promql_utilities::ast_matching::{PromQLPattern, PromQLPatternBuilder}; +// Decoupled from QueryPatternType: use string category keys +use serde_json::Value; +use std::collections::HashMap; +use std::time::Instant; + +pub struct PatternTester { + patterns: HashMap>, +} + +impl PatternTester { + pub fn new() -> Self { + let mut patterns = HashMap::new(); + + // ONLY_TEMPORAL patterns + let temporal_patterns = vec![ + // Rate pattern + PromQLPattern::new( + Self::build_rate_pattern(), + vec![ + "metric".to_string(), + "function".to_string(), + "range_vector".to_string(), + ], + // Some("ONLY_TEMPORAL".to_string()), + ), + // Quantile over time pattern + PromQLPattern::new( + Self::build_quantile_over_time_pattern(), + vec![ + "metric".to_string(), + "function".to_string(), + "range_vector".to_string(), + "function_args".to_string(), + ], + // Some("ONLY_TEMPORAL".to_string()), + ), + ]; + + // ONLY_SPATIAL patterns + let spatial_patterns = vec![ + // Sum aggregation pattern + PromQLPattern::new( + Self::build_sum_pattern(), + vec!["metric".to_string(), "aggregation".to_string()], + // Some("ONLY_SPATIAL".to_string()), + ), + // Simple metric pattern + PromQLPattern::new( + Self::build_metric_pattern(), + vec!["metric".to_string()], + // Some("ONLY_SPATIAL".to_string()), + ), + ]; + + // ONE_TEMPORAL_ONE_SPATIAL patterns + let combined_patterns = vec![ + // Sum of rate pattern + PromQLPattern::new( + Self::build_one_temporal_one_spatial_pattern(), + vec![ + "metric".to_string(), + "function".to_string(), + "aggregation".to_string(), + "range_vector".to_string(), + ], + // Some("ONE_TEMPORAL_ONE_SPATIAL".to_string()), + ), + ]; + + // Insert in order from simple to complex to avoid panics + patterns.insert("ONLY_VECTOR".to_string(), spatial_patterns.clone()); + patterns.insert("ONLY_SPATIAL".to_string(), spatial_patterns); + patterns.insert("ONLY_TEMPORAL".to_string(), temporal_patterns); + patterns.insert("ONE_TEMPORAL_ONE_SPATIAL".to_string(), combined_patterns); + + Self { patterns } + } + + pub fn test_query(&self, test_case: &TestCase) -> TestResult { + let start_time = Instant::now(); + let test_id = test_case.id.clone(); + + // Parse the query + let ast = match promql::parse(&test_case.query) { + Ok(ast) => ast, + Err(e) => { + return TestResult { + test_id, + success: false, + error_message: Some(format!("Failed to parse query: {}", e)), + actual_pattern_type: None, + actual_tokens: None, + execution_time_ms: start_time.elapsed().as_secs_f64() * 1000.0, + }; + } + }; + + // Try to match against all patterns + let mut matched_pattern_type = None; + let mut matched_tokens = None; + + for (pattern_type, pattern_list) in &self.patterns { + for pattern in pattern_list { + let match_result = pattern.matches(&ast); + if match_result.matches { + // If a plain vector selector matched under the spatial patterns, classify as ONLY_VECTOR + let final_type = if pattern_type == "ONLY_SPATIAL" { + if match_result.tokens.contains_key("aggregation") { + pattern_type.clone() + } else if match_result.tokens.contains_key("metric") { + "ONLY_VECTOR".to_string() + } else { + pattern_type.clone() + } + } else { + pattern_type.clone() + }; + + // Debug: show pattern_type and token keys for failing test + // debug removed + matched_pattern_type = Some(final_type); + // Extract only relevant token data to match Python format + let flattened_tokens = Self::flatten_token_data(&match_result.tokens); + matched_tokens = + Some(serde_json::to_value(&flattened_tokens).unwrap_or_default()); + break; + } + } + if matched_pattern_type.is_some() { + break; + } + } + + let execution_time = start_time.elapsed().as_secs_f64() * 1000.0; + + // Check if results match expectations + let expected_type = &test_case.expected_pattern_type; + let success = matched_pattern_type.as_ref() == Some(expected_type); + + TestResult { + test_id, + success, + error_message: if success { + None + } else { + Some(format!( + "Pattern type mismatch. Expected: {}, Got: {:?}", + expected_type, matched_pattern_type + )) + }, + actual_pattern_type: matched_pattern_type, + actual_tokens: matched_tokens, + execution_time_ms: execution_time, + } + } + + // No conversion needed anymore; keys are already strings + + fn flatten_token_data( + tokens: &HashMap, + ) -> HashMap { + let mut result = HashMap::new(); + + for (token_name, token_data) in tokens { + // Extract only the relevant data from the token based on what's populated + if let Some(metric) = &token_data.metric { + let mut metric_data = serde_json::Map::new(); + metric_data.insert("name".to_string(), Value::String(metric.name.clone())); + metric_data.insert( + "labels".to_string(), + serde_json::to_value(&metric.labels).unwrap_or(Value::Null), + ); + metric_data.insert( + "at".to_string(), + if let Some(at) = metric.at_modifier { + Value::Number(serde_json::Number::from(at)) + } else { + Value::Null + }, + ); + // Note: Skipping AST for now since it's not serializable + result.insert(token_name.clone(), Value::Object(metric_data)); + } else if let Some(function) = &token_data.function { + let mut function_data = serde_json::Map::new(); + function_data.insert("name".to_string(), Value::String(function.name.clone())); + let args_values: Vec = function + .args + .iter() + .map(|arg| Value::String(arg.clone())) + .collect(); + function_data.insert("args".to_string(), Value::Array(args_values)); + // Note: Skipping AST for now since it's not serializable + result.insert(token_name.clone(), Value::Object(function_data)); + } else if let Some(aggregation) = &token_data.aggregation { + let mut aggregation_data = serde_json::Map::new(); + aggregation_data.insert("op".to_string(), Value::String(aggregation.op.clone())); + aggregation_data.insert( + "modifier".to_string(), + if let Some(modifier) = &aggregation.modifier { + serde_json::to_value(modifier).unwrap_or(Value::Null) + } else { + Value::Null + }, + ); + aggregation_data.insert( + "param".to_string(), + if let Some(param) = &aggregation.param { + Value::String(param.clone()) + } else { + Value::Null + }, + ); + // Note: Skipping AST for now since it's not serializable + result.insert(token_name.clone(), Value::Object(aggregation_data)); + } else if let Some(range_vector) = &token_data.range_vector { + let mut range_data = serde_json::Map::new(); + // Convert chrono Duration to human-readable format like Python's "0:05:00" + let total_seconds = range_vector.range.num_seconds() as u64; + let hours = total_seconds / 3600; + let minutes = (total_seconds % 3600) / 60; + let seconds = total_seconds % 60; + let range_str = format!("{}:{:02}:{:02}", hours, minutes, seconds); + range_data.insert("range".to_string(), Value::String(range_str)); + // Note: Skipping AST for now since it's not serializable + result.insert(token_name.clone(), Value::Object(range_data)); + } else if let Some(subquery) = &token_data.subquery { + let mut subquery_data = serde_json::Map::new(); + // Convert chrono Duration to human-readable format like Python's "0:05:00" + let total_seconds = subquery.range.num_seconds() as u64; + let hours = total_seconds / 3600; + let minutes = (total_seconds % 3600) / 60; + let seconds = total_seconds % 60; + let range_str = format!("{}:{:02}:{:02}", hours, minutes, seconds); + subquery_data.insert("range".to_string(), Value::String(range_str)); + if let Some(offset) = &subquery.offset { + subquery_data.insert("offset".to_string(), Value::String(offset.clone())); + } + if let Some(step) = &subquery.step { + subquery_data.insert("step".to_string(), Value::String(step.clone())); + } + // Note: Skipping AST for now since it's not serializable + result.insert(token_name.clone(), Value::Object(subquery_data)); + } else if let Some(number) = &token_data.number { + let mut number_data = serde_json::Map::new(); + number_data.insert( + "value".to_string(), + Value::Number( + serde_json::Number::from_f64(number.value) + .unwrap_or(serde_json::Number::from(0)), + ), + ); + result.insert(token_name.clone(), Value::Object(number_data)); + } + + // Handle special case for function_args (like Python does) + if token_name == "function_args" { + if let Some(function) = &token_data.function { + let args_values: Vec = function + .args + .iter() + .map(|arg| Value::String(arg.clone())) + .collect(); + result.insert(token_name.clone(), Value::Array(args_values)); + } + } + } + + result + } + + fn build_rate_pattern() -> Option> { + let ms = PromQLPatternBuilder::matrix_selector( + PromQLPatternBuilder::metric(None, None, None, Some("metric")), + None, + Some("range_vector"), + ); + + let args: Vec>> = vec![ms]; + + PromQLPatternBuilder::function(vec!["rate", "increase"], args, Some("function"), None) + } + + fn build_quantile_over_time_pattern() -> Option> { + let num = PromQLPatternBuilder::number(None, None); + let ms = PromQLPatternBuilder::matrix_selector( + PromQLPatternBuilder::metric(None, None, None, Some("metric")), + None, + Some("range_vector"), + ); + + let args: Vec>> = vec![num, ms]; + + PromQLPatternBuilder::function( + vec!["quantile_over_time"], + args, + Some("function"), + Some("function_args"), + ) + } + + fn build_sum_pattern() -> Option> { + PromQLPatternBuilder::aggregation( + vec!["sum", "count", "avg", "min", "max"], + PromQLPatternBuilder::metric(None, None, None, Some("metric")), + None, + None, + None, + Some("aggregation"), + ) + } + + fn build_metric_pattern() -> Option> { + PromQLPatternBuilder::metric(None, None, None, Some("metric")) + } + + fn build_one_temporal_one_spatial_pattern() -> Option> { + let ms = PromQLPatternBuilder::matrix_selector( + PromQLPatternBuilder::metric(None, None, None, Some("metric")), + None, + Some("range_vector"), + ); + + let func_args: Vec>> = vec![ms]; + + let func = PromQLPatternBuilder::function( + vec![ + "quantile_over_time", + "sum_over_time", + "count_over_time", + "avg_over_time", + "min_over_time", + "max_over_time", + "rate", + "increase", + ], + func_args, + Some("function"), + None, + ); + + PromQLPatternBuilder::aggregation( + vec!["sum", "count", "avg", "quantile", "min", "max"], + func, + None, + None, + None, + Some("aggregation"), + ) + } + + fn build_sum_rate_pattern() -> Option> { + let ms = PromQLPatternBuilder::matrix_selector( + PromQLPatternBuilder::metric(None, None, None, Some("metric")), + None, + Some("range_vector"), + ); + + let func_args: Vec>> = vec![ms]; + + let func = PromQLPatternBuilder::function( + vec!["rate", "increase"], + func_args, + Some("function"), + None, + ); + + PromQLPatternBuilder::aggregation( + vec!["sum", "count", "avg", "min", "max"], + func, + None, + None, + None, + Some("aggregation"), + ) + } +} + +impl Default for PatternTester { + fn default() -> Self { + Self::new() + } +} diff --git a/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/test_data.rs b/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/test_data.rs new file mode 100644 index 0000000..dc86bf0 --- /dev/null +++ b/CommonDependencies/tests/compare_matched_tokens/rust_tests/src/test_data.rs @@ -0,0 +1,87 @@ +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TestData { + pub test_cases: Vec, + pub pattern_builder_tests: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TestCase { + pub id: String, + pub description: String, + pub query: String, + pub expected_pattern_type: String, + pub expected_tokens: HashMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum ExpectedToken { + Metric(MetricToken), + Function(FunctionToken), + Aggregation(AggregationToken), + RangeVector(RangeToken), + FunctionArgs(Vec), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MetricToken { + pub name: String, + pub labels: HashMap, + pub at_modifier: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FunctionToken { + pub name: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AggregationToken { + pub op: String, + pub modifier: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RangeToken { + pub range: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PatternBuilderTest { + pub id: String, + pub description: String, + pub builder_call: String, + pub parameters: serde_json::Value, + pub expected_pattern: serde_json::Value, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct TestResult { + pub test_id: String, + pub success: bool, + pub error_message: Option, + pub actual_pattern_type: Option, + pub actual_tokens: Option, + pub execution_time_ms: f64, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct TestSuiteResult { + pub language: String, + pub timestamp: String, + pub total_tests: usize, + pub passed_tests: usize, + pub failed_tests: usize, + pub results: Vec, +} + +impl TestData { + pub fn load_from_file(path: &str) -> Result> { + let content = std::fs::read_to_string(path)?; + let test_data: TestData = serde_json::from_str(&content)?; + Ok(test_data) + } +} diff --git a/CommonDependencies/tests/compare_matched_tokens/utilities/master_test_runner.py b/CommonDependencies/tests/compare_matched_tokens/utilities/master_test_runner.py new file mode 100755 index 0000000..43f5df2 --- /dev/null +++ b/CommonDependencies/tests/compare_matched_tokens/utilities/master_test_runner.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 + +import os +import sys +import subprocess +import json +from pathlib import Path +from datetime import datetime + + +class MasterTestRunner: + def __init__(self, base_dir: str): + self.base_dir = Path(base_dir).resolve() + self.test_data_file = self.base_dir / "test_data" / "promql_queries.json" + self.python_dir = self.base_dir / "python_tests" + self.rust_dir = self.base_dir / "rust_tests" + self.comparison_dir = self.base_dir / "comparison_tests" + + def run_all_tests(self): + """Run the complete test suite: Python, Rust, and comparison""" + + print("🚀 Starting Cross-Language PromQL Pattern Testing") + print("=" * 60) + + if not self.test_data_file.exists(): + print(f"❌ Test data file not found: {self.test_data_file}") + return False + + # Run Python tests + print("\n📍 Step 1: Running Python tests...") + python_success = self._run_python_tests() + + # Run Rust tests + print("\n📍 Step 2: Running Rust tests...") + rust_success = self._run_rust_tests() + + # Compare results + if python_success and rust_success: + print("\n📍 Step 3: Comparing results...") + self._compare_results() + else: + print("\n⚠️ Skipping comparison due to test failures") + + print(f"\n✅ Test suite completed at {datetime.now()}") + return python_success and rust_success + + def _run_python_tests(self) -> bool: + """Run Python test suite""" + try: + os.chdir(self.python_dir) + + cmd = [sys.executable, "test_runner.py", str(self.test_data_file)] + + print(f"Running: {' '.join(cmd)}") + result = subprocess.run(cmd, capture_output=True, text=True) + + print("Python test output:") + print(result.stdout) + if result.stderr: + print("Python test errors:") + print(result.stderr) + + return result.returncode == 0 + + except Exception as e: + print(f"❌ Error running Python tests: {e}") + return False + finally: + os.chdir(self.base_dir) + + def _run_rust_tests(self) -> bool: + """Run Rust test suite""" + try: + os.chdir(self.rust_dir) + + # Build the Rust project first + print("Building Rust test runner...") + build_result = subprocess.run( + ["cargo", "build", "--release"], capture_output=True, text=True + ) + + if build_result.returncode != 0: + print("❌ Rust build failed:") + print(build_result.stderr) + return False + + # Run the tests + cmd = ["cargo", "run", "--release", "--", str(self.test_data_file)] + + print(f"Running: {' '.join(cmd)}") + result = subprocess.run(cmd, capture_output=True, text=True) + + print("Rust test output:") + print(result.stdout) + if result.stderr: + print("Rust test errors:") + print(result.stderr) + + return result.returncode == 0 + + except Exception as e: + print(f"❌ Error running Rust tests: {e}") + return False + finally: + os.chdir(self.base_dir) + + def _compare_results(self): + """Compare Python and Rust test results""" + try: + python_results = self.python_dir / "python_test_results.json" + rust_results = self.rust_dir / "rust_test_results.json" + + if not python_results.exists(): + print("❌ Python results file not found") + return + + if not rust_results.exists(): + print("❌ Rust results file not found") + return + + os.chdir(self.comparison_dir) + + cmd = [ + sys.executable, + "result_comparator.py", + str(python_results), + str(rust_results), + ] + + print(f"Running: {' '.join(cmd)}") + result = subprocess.run(cmd, capture_output=True, text=True) + + print("Comparison output:") + print(result.stdout) + if result.stderr: + print("Comparison errors:") + print(result.stderr) + + except Exception as e: + print(f"❌ Error comparing results: {e}") + finally: + os.chdir(self.base_dir) + + def generate_test_summary(self): + """Generate a comprehensive test summary""" + summary = { + "timestamp": datetime.utcnow().isoformat(), + "test_data_file": str(self.test_data_file), + "files_generated": [], + } + + # Collect generated files + for results_file in [ + self.python_dir / "python_test_results.json", + self.rust_dir / "rust_test_results.json", + self.comparison_dir / "comparison_report.json", + ]: + if results_file.exists(): + summary["files_generated"].append(str(results_file)) + + summary_file = self.base_dir / "test_summary.json" + with open(summary_file, "w") as f: + json.dump(summary, f, indent=2) + + print(f"📊 Test summary written to: {summary_file}") + + +def main(): + script_dir = Path(__file__).parent.parent + runner = MasterTestRunner(str(script_dir)) + + success = runner.run_all_tests() + runner.generate_test_summary() + + if success: + print("\n🎉 All tests completed successfully!") + sys.exit(0) + else: + print("\n💥 Some tests failed. Check the output above.") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/CommonDependencies/tests/compare_patterns/Cargo.lock b/CommonDependencies/tests/compare_patterns/Cargo.lock new file mode 100644 index 0000000..920e610 --- /dev/null +++ b/CommonDependencies/tests/compare_patterns/Cargo.lock @@ -0,0 +1,795 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anyhow" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "2.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d" + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "cactus" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbc26382d871df4b7442e3df10a9402bf3cf5e55cbd66f12be38861425f0564" + +[[package]] +name = "cc" +version = "1.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" + +[[package]] +name = "cfgrammar" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf" +dependencies = [ + "indexmap", + "lazy_static", + "num-traits", + "regex", + "serde", + "vob", +] + +[[package]] +name = "chrono" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "compare_patterns_runner" +version = "0.1.0" +dependencies = [ + "promql_utilities", + "serde", + "serde_json", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "deranged" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "filetime" +version = "0.2.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed" +dependencies = [ + "cfg-if", + "libc", + "libredox", + "windows-sys", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "getopts" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cba6ae63eb948698e300f645f87c70f76630d505f23b8907cf1e193ee85048c1" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" + +[[package]] +name = "iana-time-zone" +version = "0.1.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "indexmap" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.175" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" + +[[package]] +name = "libredox" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3" +dependencies = [ + "bitflags", + "libc", + "redox_syscall", +] + +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "lrlex" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c71364e868116ee891b0f93559eb9eca5675bec28b22d33c58481e66c3951d7e" +dependencies = [ + "cfgrammar", + "getopts", + "lazy_static", + "lrpar", + "num-traits", + "quote", + "regex", + "regex-syntax", + "serde", + "vergen", +] + +[[package]] +name = "lrpar" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b265a81193d94c92d1c9c715498d6fa505bce3f789ceecb24ab5d6fa2dbc71" +dependencies = [ + "bincode", + "cactus", + "cfgrammar", + "filetime", + "indexmap", + "lazy_static", + "lrtable", + "num-traits", + "packedvec", + "regex", + "serde", + "static_assertions", + "vergen", + "vob", +] + +[[package]] +name = "lrtable" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc36d15214ca997a5097845be1f932b7ee6125c36f5c5e55f6c49e027ddeb6de" +dependencies = [ + "cfgrammar", + "fnv", + "num-traits", + "serde", + "sparsevec", + "vob", +] + +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "packedvec" +version = "1.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a69e0a534dd2e6aefce319af62a0aa0066a76bdfcec0201dfe02df226bc9ec70" +dependencies = [ + "num-traits", + "serde", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "proc-macro2" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "promql-parser" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60d851f6523a8215e2fbf86b6cef4548433f8b76092e9ffb607105de52ae63fd" +dependencies = [ + "cfgrammar", + "chrono", + "lazy_static", + "lrlex", + "lrpar", + "regex", +] + +[[package]] +name = "promql_utilities" +version = "0.1.0" +dependencies = [ + "chrono", + "promql-parser", + "serde", + "serde_json", + "thiserror", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.5.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.143" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "sparsevec" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68b4a8ce3045f0fe173fb5ae3c6b7dcfbec02bfa650bb8618b2301f52af0134d" +dependencies = [ + "num-traits", + "packedvec", + "serde", + "vob", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "2.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "time" +version = "0.3.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" +dependencies = [ + "deranged", + "itoa", + "libc", + "num-conv", + "num_threads", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" + +[[package]] +name = "time-macros" +version = "0.2.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "unicode-width" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" + +[[package]] +name = "vergen" +version = "8.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2990d9ea5967266ea0ccf413a4aa5c42a93dbcfda9cb49a97de6931726b12566" +dependencies = [ + "anyhow", + "rustversion", + "time", +] + +[[package]] +name = "vob" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc936b5a7202a703aeaf7ce05e7931db2e0c8126813f97db3e9e06d867b0bb38" +dependencies = [ + "num-traits", + "serde", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "windows-core" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-result" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.53.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" diff --git a/CommonDependencies/tests/compare_patterns/Cargo.toml b/CommonDependencies/tests/compare_patterns/Cargo.toml new file mode 100644 index 0000000..8d04a6f --- /dev/null +++ b/CommonDependencies/tests/compare_patterns/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "compare_patterns_runner" +version = "0.1.0" +edition = "2021" + +[dependencies] +promql_utilities = { path = "../../dependencies/rs/promql_utilities" } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" diff --git a/CommonDependencies/tests/compare_patterns/README.md b/CommonDependencies/tests/compare_patterns/README.md new file mode 100644 index 0000000..2b16ff1 --- /dev/null +++ b/CommonDependencies/tests/compare_patterns/README.md @@ -0,0 +1,18 @@ +Compare patterns generated by Python and Rust implementations. + +Steps +1. Generate Python patterns: + + python3 tests/compare_patterns/python_generate_patterns.py + +2. Build and run the Rust generator. From repository root: + + cargo run --manifest-path dependencies/rs/promql_utilities/Cargo.toml --bin tests/compare_patterns/rust_generate_patterns.rs + + (Alternatively, compile the small program with `rustc`.) + +3. Compare: + + python3 tests/compare_patterns/compare_serialized_patterns.py + +The comparator returns exit code 0 when patterns are equivalent. diff --git a/CommonDependencies/tests/compare_patterns/compare_serialized_patterns.py b/CommonDependencies/tests/compare_patterns/compare_serialized_patterns.py new file mode 100644 index 0000000..761962f --- /dev/null +++ b/CommonDependencies/tests/compare_patterns/compare_serialized_patterns.py @@ -0,0 +1,56 @@ +"""Compare serialized pattern JSON files from Python and Rust generators. + +Exits with code 0 if equivalent, 1 otherwise. +""" + +import json +import os +import sys + + +def load(path): + with open(path, "r") as f: + return json.load(f) + + +def normalize(value): + """Normalize pattern structures for comparison: sort keys in dicts and recursively apply.""" + if isinstance(value, dict): + return {k: normalize(value[k]) for k in sorted(value.keys())} + if isinstance(value, list): + return [normalize(v) for v in value] + return value + + +def main(): + base = os.path.dirname(__file__) + out_dir = os.path.join(base, "out") + py_path = os.path.join(out_dir, "python_patterns.json") + rs_path = os.path.join(out_dir, "rust_patterns.json") + + if not os.path.exists(py_path) or not os.path.exists(rs_path): + print( + "Missing generated pattern files. Run python_generate_patterns.py and rust generator." + ) + sys.exit(2) + + py = load(py_path) + rs = load(rs_path) + + py_n = normalize(py) + rs_n = normalize(rs) + + if py_n == rs_n: + print("Patterns match") + sys.exit(0) + else: + print("Patterns differ") + print("--- Python patterns ---") + print(json.dumps(py_n, indent=2)) + print("--- Rust patterns ---") + print(json.dumps(rs_n, indent=2)) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/CommonDependencies/tests/compare_patterns/python_generate_patterns.py b/CommonDependencies/tests/compare_patterns/python_generate_patterns.py new file mode 100644 index 0000000..3ca7bea --- /dev/null +++ b/CommonDependencies/tests/compare_patterns/python_generate_patterns.py @@ -0,0 +1,110 @@ +"""Generate JSON-serialized patterns from Python builder. + +Writes to tests/compare_patterns/out/python_patterns.json +""" + +import json +import os +import sys + +root = os.path.dirname(__file__) +sys.path.append( + os.path.abspath( + os.path.join(root, "../../CommonDependencies/dependencies/py/promql_utilities") + ) +) + +from promql_utilities.ast_matching.PromQLPatternBuilder import PromQLPatternBuilder + + +def build_all(): + patterns = {} + + temporal = [ + PromQLPatternBuilder.function( + ["rate", "increase"], + PromQLPatternBuilder.matrix_selector( + PromQLPatternBuilder.metric(collect_as="metric"), + collect_as="range_vector", + ), + collect_as="function", + ), + PromQLPatternBuilder.function( + "quantile_over_time", + PromQLPatternBuilder.number(), + PromQLPatternBuilder.matrix_selector( + PromQLPatternBuilder.metric(collect_as="metric"), + collect_as="range_vector", + ), + collect_as="function", + collect_args_as="function_args", + ), + ] + + spatial = [ + PromQLPatternBuilder.aggregation( + ["sum", "count", "avg", "quantile", "min", "max"], + PromQLPatternBuilder.metric(collect_as="metric"), + collect_as="aggregation", + ), + PromQLPatternBuilder.metric(collect_as="metric"), + ] + + combined = [ + PromQLPatternBuilder.aggregation( + ["sum", "count", "avg", "quantile", "min", "max"], + PromQLPatternBuilder.function( + "quantile_over_time", + PromQLPatternBuilder.number(), + PromQLPatternBuilder.matrix_selector( + PromQLPatternBuilder.metric(collect_as="metric"), + collect_as="range_vector", + ), + collect_as="function", + collect_args_as="function_args", + ), + collect_as="aggregation", + ), + PromQLPatternBuilder.aggregation( + ["sum", "count", "avg", "quantile", "min", "max"], + PromQLPatternBuilder.function( + [ + "sum_over_time", + "count_over_time", + "avg_over_time", + "min_over_time", + "max_over_time", + "rate", + "increase", + ], + PromQLPatternBuilder.matrix_selector( + PromQLPatternBuilder.metric(collect_as="metric"), + collect_as="range_vector", + ), + collect_as="function", + ), + collect_as="aggregation", + ), + ] + + patterns["ONLY_TEMPORAL"] = temporal + patterns["ONLY_SPATIAL"] = spatial + patterns["ONE_TEMPORAL_ONE_SPATIAL"] = combined + + return patterns + + +def main(): + out_dir = os.path.join(os.path.dirname(__file__), "out") + os.makedirs(out_dir, exist_ok=True) + patterns = build_all() + out_path = os.path.join(out_dir, "python_patterns.json") + with open(out_path, "w") as f: + # sort by keys + sorted_patterns = {k: patterns[k] for k in sorted(patterns.keys())} + json.dump(sorted_patterns, f, indent=2) + print("Wrote", out_path) + + +if __name__ == "__main__": + main() diff --git a/CommonDependencies/tests/compare_patterns/src/main.rs b/CommonDependencies/tests/compare_patterns/src/main.rs new file mode 100644 index 0000000..6c429f1 --- /dev/null +++ b/CommonDependencies/tests/compare_patterns/src/main.rs @@ -0,0 +1,156 @@ +use promql_utilities::ast_matching::promql_pattern::TokenData; +use promql_utilities::ast_matching::{PromQLPattern, PromQLPatternBuilder}; +use serde_json::json; +use std::collections::HashMap; +use std::fs; + +fn tokendata_to_json(_t: &TokenData) -> serde_json::Value { + // We only need the pattern ASTs themselves; tokens are runtime and can be skipped. + json!(null) +} + +fn main() { + let mut out: HashMap> = HashMap::new(); + + // ONLY_TEMPORAL patterns + let mut only_temporal_patterns = Vec::new(); + + // Pattern 1: rate/increase functions + let ms1 = PromQLPatternBuilder::matrix_selector( + PromQLPatternBuilder::metric(None, None, None, Some("metric")), + None, + Some("range_vector"), + ); + let func_args1: Vec>> = vec![ms1]; + let pattern_1 = PromQLPatternBuilder::function(vec!["rate", "increase"], func_args1, Some("function"), None); + let pattern1 = PromQLPattern::new( + pattern_1, + vec!["metric".to_string(), "function".to_string(), "range_vector".to_string()], + ); + if let Some(ast) = pattern1.ast_pattern { + only_temporal_patterns.push(serde_json::Value::Object(ast.into_iter().collect())); + } + + // Pattern 2: quantile_over_time function + let ms2 = PromQLPatternBuilder::matrix_selector( + PromQLPatternBuilder::metric(None, None, None, Some("metric")), + None, + Some("range_vector"), + ); + let func_args2: Vec>> = vec![ + PromQLPatternBuilder::number(None, None), + ms2, + ]; + let pattern_2 = PromQLPatternBuilder::function(vec!["quantile_over_time"], func_args2, Some("function"), Some("function_args")); + let pattern2 = PromQLPattern::new( + pattern_2, + vec!["metric".to_string(), "function".to_string(), "range_vector".to_string(), "function_args".to_string()], + ); + if let Some(ast) = pattern2.ast_pattern { + only_temporal_patterns.push(serde_json::Value::Object(ast.into_iter().collect())); + } + + out.insert("ONLY_TEMPORAL".to_string(), only_temporal_patterns); + + // ONLY_SPATIAL patterns + let mut only_spatial_patterns = Vec::new(); + + // Pattern 1: aggregation functions + let pattern_3 = PromQLPatternBuilder::aggregation( + vec!["sum", "count", "avg", "quantile", "min", "max"], + PromQLPatternBuilder::metric(None, None, None, Some("metric")), + None, + None, + None, + Some("aggregation") + ); + let pattern3 = PromQLPattern::new( + pattern_3, + vec!["metric".to_string(), "aggregation".to_string()], + ); + if let Some(ast) = pattern3.ast_pattern { + only_spatial_patterns.push(serde_json::Value::Object(ast.into_iter().collect())); + } + + // Pattern 2: basic metric + let pattern_4 = PromQLPatternBuilder::metric(None, None, None, Some("metric")); + let pattern4 = PromQLPattern::new( + pattern_4, + vec!["metric".to_string()], + ); + if let Some(ast) = pattern4.ast_pattern { + only_spatial_patterns.push(serde_json::Value::Object(ast.into_iter().collect())); + } + + out.insert("ONLY_SPATIAL".to_string(), only_spatial_patterns); + + // ONE_TEMPORAL_ONE_SPATIAL patterns + let mut one_temporal_one_spatial_patterns = Vec::new(); + + // Pattern 1: aggregation of quantile_over_time + let ms3 = PromQLPatternBuilder::matrix_selector( + PromQLPatternBuilder::metric(None, None, None, Some("metric")), + None, + Some("range_vector"), + ); + let quantile_func_args: Vec>> = vec![ + PromQLPatternBuilder::number(None, None), + ms3, + ]; + let quantile_func = PromQLPatternBuilder::function(vec!["quantile_over_time"], quantile_func_args, Some("function"), Some("function_args")); + let pattern_5 = PromQLPatternBuilder::aggregation( + vec!["sum", "count", "avg", "quantile", "min", "max"], + quantile_func, + None, + None, + None, + Some("aggregation") + ); + let pattern5 = PromQLPattern::new( + pattern_5, + vec!["metric".to_string(), "range_vector".to_string(), "function".to_string(), "function_args".to_string(), "aggregation".to_string()], + ); + if let Some(ast) = pattern5.ast_pattern { + one_temporal_one_spatial_patterns.push(serde_json::Value::Object(ast.into_iter().collect())); + } + + // Pattern 2: aggregation of various temporal functions + let ms4 = PromQLPatternBuilder::matrix_selector( + PromQLPatternBuilder::metric(None, None, None, Some("metric")), + None, + Some("range_vector"), + ); + let temporal_func_args: Vec>> = vec![ms4]; + let temporal_func = PromQLPatternBuilder::function( + vec!["sum_over_time", "count_over_time", "avg_over_time", "min_over_time", "max_over_time", "rate", "increase"], + temporal_func_args, + Some("function"), + None + ); + let pattern_6 = PromQLPatternBuilder::aggregation( + vec!["sum", "count", "avg", "quantile", "min", "max"], + temporal_func, + None, + None, + None, + Some("aggregation") + ); + let pattern6 = PromQLPattern::new( + pattern_6, + vec!["metric".to_string(), "range_vector".to_string(), "function".to_string(), "aggregation".to_string()], + ); + if let Some(ast) = pattern6.ast_pattern { + one_temporal_one_spatial_patterns.push(serde_json::Value::Object(ast.into_iter().collect())); + } + + out.insert("ONE_TEMPORAL_ONE_SPATIAL".to_string(), one_temporal_one_spatial_patterns); + + let out_dir = std::path::Path::new("./out"); + std::fs::create_dir_all(out_dir).unwrap(); + let out_path = out_dir.join("rust_patterns.json"); + // sort by keys + let sorted: HashMap<_, _> = out.into_iter().collect(); + let s = serde_json::to_string_pretty(&sorted).unwrap(); + fs::write(&out_path, s).unwrap(); + println!("Wrote {}", out_path.display()); +} diff --git a/CommonDependencies/tests/rust_pattern_matching/Cargo.lock b/CommonDependencies/tests/rust_pattern_matching/Cargo.lock new file mode 100644 index 0000000..a8c86c6 --- /dev/null +++ b/CommonDependencies/tests/rust_pattern_matching/Cargo.lock @@ -0,0 +1,1204 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anyhow" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "backtrace" +version = "0.3.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets 0.52.6", +] + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "2.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "bytes" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" + +[[package]] +name = "cactus" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbc26382d871df4b7442e3df10a9402bf3cf5e55cbd66f12be38861425f0564" + +[[package]] +name = "cc" +version = "1.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65193589c6404eb80b450d618eaf9a2cafaaafd57ecce47370519ef674a7bd44" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" + +[[package]] +name = "cfgrammar" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf" +dependencies = [ + "indexmap", + "lazy_static", + "num-traits", + "regex", + "serde", + "vob", +] + +[[package]] +name = "chrono" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link 0.2.0", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "deranged" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d630bccd429a5bb5a64b5e94f693bfc48c9f8566418fda4c494cc94f911f87cc" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "filetime" +version = "0.2.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed" +dependencies = [ + "cfg-if", + "libc", + "libredox", + "windows-sys 0.60.2", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fd99930f64d146689264c637b5af2f0233a933bef0d8570e2526bf9e083192d" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "getopts" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" + +[[package]] +name = "iana-time-zone" +version = "0.1.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "indexmap" +version = "2.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92119844f513ffa41556430369ab02c295a3578af21cf945caa3e9e0c2481ac3" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "io-uring" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "js-sys" +version = "0.3.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0b063578492ceec17683ef2f8c5e89121fbd0b172cbc280635ab7567db2738" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.175" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" + +[[package]] +name = "libredox" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb" +dependencies = [ + "bitflags", + "libc", + "redox_syscall", +] + +[[package]] +name = "lock_api" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" + +[[package]] +name = "lrlex" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c71364e868116ee891b0f93559eb9eca5675bec28b22d33c58481e66c3951d7e" +dependencies = [ + "cfgrammar", + "getopts", + "lazy_static", + "lrpar", + "num-traits", + "quote", + "regex", + "regex-syntax", + "serde", + "vergen", +] + +[[package]] +name = "lrpar" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b265a81193d94c92d1c9c715498d6fa505bce3f789ceecb24ab5d6fa2dbc71" +dependencies = [ + "bincode", + "cactus", + "cfgrammar", + "filetime", + "indexmap", + "lazy_static", + "lrtable", + "num-traits", + "packedvec", + "regex", + "serde", + "static_assertions", + "vergen", + "vob", +] + +[[package]] +name = "lrtable" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc36d15214ca997a5097845be1f932b7ee6125c36f5c5e55f6c49e027ddeb6de" +dependencies = [ + "cfgrammar", + "fnv", + "num-traits", + "serde", + "sparsevec", + "vob", +] + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.59.0", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "packedvec" +version = "1.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a69e0a534dd2e6aefce319af62a0aa0066a76bdfcec0201dfe02df226bc9ec70" +dependencies = [ + "num-traits", + "serde", +] + +[[package]] +name = "parking_lot" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "proc-macro2" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "promql-parser" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60d851f6523a8215e2fbf86b6cef4548433f8b76092e9ffb607105de52ae63fd" +dependencies = [ + "cfgrammar", + "chrono", + "lazy_static", + "lrlex", + "lrpar", + "regex", +] + +[[package]] +name = "promql_cross_lang_tests" +version = "0.1.0" +dependencies = [ + "chrono", + "promql-parser", + "promql_utilities", + "serde", + "serde_json", + "tokio", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "promql_utilities" +version = "0.1.0" +dependencies = [ + "chrono", + "promql-parser", + "serde", + "serde_json", + "thiserror", + "tracing", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.5.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" + +[[package]] +name = "rustc-demangle" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde" +version = "1.0.224" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6aaeb1e94f53b16384af593c71e20b095e958dab1d26939c1b70645c5cfbcc0b" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.224" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32f39390fa6346e24defbcdd3d9544ba8a19985d0af74df8501fbfe9a64341ab" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.224" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ff78ab5e8561c9a675bfc1785cb07ae721f0ee53329a595cefd8c04c2ac4e0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", + "serde_core", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" +dependencies = [ + "libc", +] + +[[package]] +name = "slab" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + +[[package]] +name = "sparsevec" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68b4a8ce3045f0fe173fb5ae3c6b7dcfbec02bfa650bb8618b2301f52af0134d" +dependencies = [ + "num-traits", + "packedvec", + "serde", + "vob", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "2.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "time" +version = "0.3.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83bde6f1ec10e72d583d91623c939f623002284ef622b87de38cfd546cbf2031" +dependencies = [ + "deranged", + "libc", + "num-conv", + "num_threads", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" + +[[package]] +name = "time-macros" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tokio" +version = "1.47.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +dependencies = [ + "backtrace", + "bytes", + "io-uring", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "slab", + "socket2", + "tokio-macros", + "windows-sys 0.59.0", +] + +[[package]] +name = "tokio-macros" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "unicode-ident" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" + +[[package]] +name = "unicode-width" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "vergen" +version = "8.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2990d9ea5967266ea0ccf413a4aa5c42a93dbcfda9cb49a97de6931726b12566" +dependencies = [ + "anyhow", + "rustversion", + "time", +] + +[[package]] +name = "vob" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc936b5a7202a703aeaf7ce05e7931db2e0c8126813f97db3e9e06d867b0bb38" +dependencies = [ + "num-traits", + "serde", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasm-bindgen" +version = "0.2.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e14915cadd45b529bb8d1f343c4ed0ac1de926144b746e2710f9cd05df6603b" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28d1ba982ca7923fd01448d5c30c6864d0a14109560296a162f80f305fb93bb" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c3d463ae3eff775b0c45df9da45d68837702ac35af998361e2c84e7c5ec1b0d" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bb4ce89b08211f923caf51d527662b75bdc9c9c7aab40f86dcb9fb85ac552aa" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f143854a3b13752c6950862c906306adb27c7e839f7414cec8fea35beab624c1" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "windows-core" +version = "0.62.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57fe7168f7de578d2d8a05b07fd61870d2e73b4020e9f49aa00da8471723497c" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link 0.2.0", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-link" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" + +[[package]] +name = "windows-result" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f" +dependencies = [ + "windows-link 0.2.0", +] + +[[package]] +name = "windows-strings" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda" +dependencies = [ + "windows-link 0.2.0", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.3", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" +dependencies = [ + "windows-link 0.1.3", + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" diff --git a/CommonDependencies/tests/rust_pattern_matching/Cargo.toml b/CommonDependencies/tests/rust_pattern_matching/Cargo.toml new file mode 100644 index 0000000..6712b6b --- /dev/null +++ b/CommonDependencies/tests/rust_pattern_matching/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "promql_cross_lang_tests" +version = "0.1.0" +edition = "2021" + +[[bin]] +name = "test_runner" +path = "src/main.rs" + +[dependencies] +promql-parser = "0.5.0" +serde_json = "1.0" +serde = { version = "1.0", features = ["derive"] } +tokio = { version = "1.0", features = ["full"] } +chrono = "0.4.41" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } + +[dependencies.promql_utilities] +path = "../../../CommonDependencies/dependencies/rs/promql_utilities" diff --git a/CommonDependencies/tests/rust_pattern_matching/src/main.rs b/CommonDependencies/tests/rust_pattern_matching/src/main.rs new file mode 100644 index 0000000..3a540af --- /dev/null +++ b/CommonDependencies/tests/rust_pattern_matching/src/main.rs @@ -0,0 +1,168 @@ +use promql_utilities::ast_matching::{PromQLPattern, PromQLPatternBuilder}; +use promql_utilities::query_logics::enums::QueryPatternType; +use serde_json::Value; +use std::collections::HashMap; + +// Helper functions (these would be closures or separate methods) +fn temporal_pattern( + pattern_type: &str, + blocks: &HashMap>>, +) -> PromQLPattern { + PromQLPattern::new( + blocks[pattern_type].clone(), + vec![ + "metric".to_string(), + "function".to_string(), + "range_vector".to_string(), + ], + ) +} + +fn spatial_pattern( + pattern_type: &str, + blocks: &HashMap>>, +) -> PromQLPattern { + PromQLPattern::new( + blocks[pattern_type].clone(), + vec!["metric".to_string(), "aggregation".to_string()], + ) +} + +fn spatial_of_temporal_pattern(temporal_block: &Option>) -> PromQLPattern { + let pattern = PromQLPatternBuilder::aggregation( + vec!["sum", "count", "avg", "quantile", "min", "max"], + temporal_block.clone(), + None, + None, + None, + Some("aggregation"), + ); + PromQLPattern::new( + pattern, + vec![ + "metric".to_string(), + "function".to_string(), + "range_vector".to_string(), + "aggregation".to_string(), + ], + ) +} + +fn main() { + let mut temporal_pattern_blocks = HashMap::new(); + temporal_pattern_blocks.insert( + "quantile".to_string(), + PromQLPatternBuilder::function( + vec!["quantile_over_time"], + vec![ + PromQLPatternBuilder::number(None, Some("quantile_param")), + PromQLPatternBuilder::matrix_selector( + PromQLPatternBuilder::metric(None, None, None, Some("metric")), + None, + Some("range_vector"), + ), + ], + Some("function"), + Some("function_args"), + ), + ); + + temporal_pattern_blocks.insert( + "generic".to_string(), + PromQLPatternBuilder::function( + vec![ + "sum_over_time", + "count_over_time", + "avg_over_time", + "min_over_time", + "max_over_time", + "increase", + "rate", + ], + vec![PromQLPatternBuilder::matrix_selector( + PromQLPatternBuilder::metric(None, None, None, Some("metric")), + None, + Some("range_vector"), + )], + Some("function"), + Some("function_args"), + ), + ); + + // Create spatial pattern blocks + let mut spatial_pattern_blocks = HashMap::new(); + spatial_pattern_blocks.insert( + "generic".to_string(), + PromQLPatternBuilder::aggregation( + vec!["sum", "count", "avg", "quantile", "min", "max"], + PromQLPatternBuilder::metric(None, None, None, Some("metric")), + None, + None, + None, + Some("aggregation"), + ), + ); + + // Create controller patterns + let mut controller_patterns = HashMap::new(); + controller_patterns.insert( + QueryPatternType::OnlyTemporal, + vec![ + temporal_pattern("quantile", &temporal_pattern_blocks), + temporal_pattern("generic", &temporal_pattern_blocks), + ], + ); + controller_patterns.insert( + QueryPatternType::OnlySpatial, + vec![spatial_pattern("generic", &spatial_pattern_blocks)], + ); + controller_patterns.insert( + QueryPatternType::OneTemporalOneSpatial, + vec![ + spatial_of_temporal_pattern(&temporal_pattern_blocks["quantile"]), + spatial_of_temporal_pattern(&temporal_pattern_blocks["generic"]), + ], + ); + + let queries = vec![ + // "sum_over_time(fake_metric_total[1m])", + // "count_over_time(fake_metric_total[1m])", + // "quantile_over_time(0.95, fake_metric_total[1m])", + // "sum by (instance, job) (fake_metric_total)", + // "count without (instance) (fake_metric_total)", + // "quantile by (instance) (0.95, fake_metric_total)", + // "sum by (instance, job) (rate(fake_metric_total[1m]))", + "sum by (instance, job) (sum_over_time(fake_metric_total[1m]))", + "sum by (instance, job) (count_over_time(fake_metric_total[1m]))", + ]; + + for query in queries { + let ast = match promql_parser::parser::parse(&query) { + Ok(parsed) => parsed, + Err(e) => { + eprintln!("Failed to parse query '{}': {}", query, e); + continue; + } + }; + + let mut found_match = None; + for (pattern_type, patterns) in &controller_patterns { + for pattern in patterns { + // println!( + // "Trying pattern type: {:?} for query: {}", + // pattern_type, query + // ); + let match_result = pattern.matches(&ast); + if match_result.matches { + println!("Query: {}; Pattern: {:?}", query, pattern_type); + println!("Match result: {:?}", match_result); + found_match = Some((*pattern_type, match_result)); + break; + } + } + if found_match.is_some() { + break; + } + } + } +} diff --git a/Controller/.gitignore b/Controller/.gitignore new file mode 100644 index 0000000..35758b0 --- /dev/null +++ b/Controller/.gitignore @@ -0,0 +1,3 @@ +**/*.pyc +**/__pycache__ +.DS_store diff --git a/Controller/Dockerfile b/Controller/Dockerfile new file mode 100644 index 0000000..7b66ff0 --- /dev/null +++ b/Controller/Dockerfile @@ -0,0 +1,24 @@ +FROM sketchdb-base:latest + +LABEL maintainer="SketchDB Team" +LABEL description="Main Controller for SketchDB" + +# Set working directory +WORKDIR /app + +# Copy requirements first for better layer caching +COPY requirements.txt . + +# Install Python dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY classes/ ./classes/ +COPY utils/ ./utils/ +COPY main_controller.py . + +# Create directories for input/output +RUN mkdir -p /app/input /app/output + +# Set the entry point +ENTRYPOINT ["python", "main_controller.py"] \ No newline at end of file diff --git a/Controller/LICENSE b/Controller/LICENSE new file mode 100644 index 0000000..404d657 --- /dev/null +++ b/Controller/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 SketchDB + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Controller/README.md b/Controller/README.md new file mode 100644 index 0000000..8be2f8b --- /dev/null +++ b/Controller/README.md @@ -0,0 +1,124 @@ +# Controller + +The Controller is ASAP's auto-configuration service that determines optimal sketch parameters based on query workload and SLAs. + +## Purpose + +Given a workload of PromQL queries, the Controller: +1. Analyzes each query to determine which sketch algorithm to use +2. Computes sketch parameters (size, accuracy) based on SLAs +3. Generates `streaming_config.yaml` for ArroyoSketch +4. Generates `inference_config.yaml` for QueryEngine + +This automation eliminates manual configuration and ensures sketches meet performance targets. + +## How It Works + +### Input: controller-config.yaml + +The user provides a configuration file describing: +- **Queries** to accelerate +- **Metrics** metadata (labels, cardinality estimates) +- **SLAs** (accuracy, latency targets) (**CURRENTLY IGNORED**) + +**Example:** +```yaml +query_groups: + - id: 1 + queries: + - "quantile by (job) (0.99, http_request_duration_seconds)" + - "sum by (job) (rate(http_requests_total[5m]))" + client_options: + repetitions: 10 + starting_delay: 60 + controller_options: + accuracy_sla: 0.99 # 99% accuracy + latency_sla: 1.0 # 1 second max latency + +metrics: + - metric: "http_request_duration_seconds" + labels: ["job", "instance", "method", "status"] + cardinality: + job: 10 + instance: 100 + method: 5 + status: 4 + - metric: "http_requests_total" + labels: ["job", "instance", "method", "status"] +``` + +### Process: Analyze and Configure + +1. **Parse queries** (`utils/parse_query.py`) + - Extract query type (quantile, sum, avg, etc.) + - Identify aggregation labels + - Determine time range + +2. **Select sketch algorithm** (`utils/logics.py::decide_sketch_type()`) + - Quantile queries → DDSketch or KLL + - Sum/count queries → Simple aggregation + - Consider query patterns and SLAs + +3. **Compute sketch parameters** (`utils/logics.py`) + - Calculate sketch size based on accuracy SLA + - Determine merge strategy for aggregations + - Set up windowing parameters + +4. **Generate configs** + - `streaming_config.yaml` → Describes which sketches to build + - `inference_config.yaml` → Describes how to query sketches + +### Output Files + +**streaming_config.yaml** (for ArroyoSketch): +```yaml +sketches: + - metric: "http_request_duration_seconds" + sketch_type: "ddsketch" + parameters: + alpha: 0.01 # 1% relative error + max_num_bins: 2048 + aggregation: + - "job" + window: "1h" +``` + +**inference_config.yaml** (for QueryEngine): +```yaml +sketches: + - metric: "http_request_duration_seconds" + sketch_type: "ddsketch" + labels: ["job"] + kafka_topic: "sketches" +``` + +## Key Files + +**TODO** + +## Configuration Schema + +### controller-config.yaml + +```yaml +query_groups: + - id: # Unique group ID + queries: # List of PromQL queries + - "" + client_options: # Query execution options + repetitions: # How many times to run + starting_delay: # Delay before first run (seconds) + repetition_delay: # Delay between runs (seconds) + query_time_offset: # Time offset for queries (seconds) + controller_options: + accuracy_sla: # 0.0-1.0 (default: 0.99) + latency_sla: # Seconds (default: 1.0) + sketch_type: # Optional: force specific sketch + custom_sketch_params: # Optional: override params + +metrics: + - metric: "" # Prometheus metric name + labels: [] # List of label names + cardinality: # Optional: estimated cardinalities + : +``` diff --git a/Controller/classes/SingleQueryConfig.py b/Controller/classes/SingleQueryConfig.py new file mode 100644 index 0000000..5638f10 --- /dev/null +++ b/Controller/classes/SingleQueryConfig.py @@ -0,0 +1,544 @@ +import copy +from loguru import logger + +import promql_parser +from typing import Optional, Tuple, List + +from promql_utilities.ast_matching.PromQLPattern import PromQLPattern, MatchResult +from promql_utilities.ast_matching.PromQLPatternBuilder import PromQLPatternBuilder +from promql_utilities.query_logics.enums import ( + QueryPatternType, + QueryTreatmentType, + CleanupPolicy, +) +from promql_utilities.query_logics.logics import ( + get_is_collapsable, + map_statistic_to_precompute_operator, +) +from promql_utilities.query_logics.parsing import ( + get_metric_and_spatial_filter, + get_statistics_to_compute, +) +from promql_utilities.query_logics.parsing import get_spatial_aggregation_output_labels +from promql_utilities.data_model.KeyByLabelNames import KeyByLabelNames + +from promql_utilities.streaming_config.StreamingAggregationConfig import ( + StreamingAggregationConfig, +) +from utils import logics + +# import utils.promql + +from promql_utilities.streaming_config.MetricConfig import MetricConfig + + +class SingleQueryConfig: + def __init__( + self, + config: dict, + metric_config: MetricConfig, + prometheus_scrape_interval: int, + streaming_engine: str, + sketch_parameters: dict, + ): + self.config = config + self.query = config["query"] + self.query_ast = promql_parser.parse(self.query) + self.t_repeat = int(config["t_repeat"]) + self.prometheus_scrape_interval = prometheus_scrape_interval + self.__dict__.update(config["options"]) + # self.accuracy_sla = float(config["accuracy_sla"]) + # self.latency_sla = float(config["latency_sla"]) + self.metric_config = metric_config + self.streaming_engine = streaming_engine + self.sketch_parameters = sketch_parameters + self.range_duration = config["range_duration"] + self.step = config["step"] + + self.patterns = { + QueryPatternType.ONLY_TEMPORAL: [ + PromQLPattern( + PromQLPatternBuilder.function( + "quantile_over_time", + PromQLPatternBuilder.number(), + PromQLPatternBuilder.matrix_selector( + PromQLPatternBuilder.metric(collect_as="metric"), + collect_as="range_vector", + ), + collect_as="function", + collect_args_as="function_args", + ) + ), + PromQLPattern( + PromQLPatternBuilder.function( + [ + "sum_over_time", + "count_over_time", + "avg_over_time", + "min_over_time", + "max_over_time", + # "stddev_over_time", + # "stdvar_over_time", + "increase", + "rate", + ], + PromQLPatternBuilder.matrix_selector( + PromQLPatternBuilder.metric(collect_as="metric"), + collect_as="range_vector", + ), + collect_as="function", + collect_args_as="function_args", + ) + ), + ], + # TODO: add topk/bottomk + QueryPatternType.ONLY_SPATIAL: [ + PromQLPattern( + PromQLPatternBuilder.aggregation( + [ + "sum", + "count", + "avg", + "quantile", + "min", + "max", + "topk", + # "stddev", + # "stdvar", + ], + PromQLPatternBuilder.metric(collect_as="metric"), + collect_as="aggregation", + ) + ) + ], + # TODO: need some way of specifying pattern using an existing pattern + QueryPatternType.ONE_TEMPORAL_ONE_SPATIAL: [ + PromQLPattern( + PromQLPatternBuilder.aggregation( + [ + "sum", + "count", + "avg", + "quantile", + "min", + "max", + # "stddev", + # "stdvar", + ], + PromQLPatternBuilder.function( + "quantile_over_time", + PromQLPatternBuilder.number(), + PromQLPatternBuilder.matrix_selector( + PromQLPatternBuilder.metric(collect_as="metric"), + collect_as="range_vector", + ), + collect_as="function", + collect_args_as="function_args", + ), + collect_as="aggregation", + ) + ), + PromQLPattern( + PromQLPatternBuilder.aggregation( + [ + "sum", + "count", + "avg", + "quantile", + "min", + "max", + # "stddev", + # "stdvar", + ], + PromQLPatternBuilder.function( + [ + "sum_over_time", + "count_over_time", + "avg_over_time", + "min_over_time", + "max_over_time", + # "stddev_over_time", + # "stdvar_over_time", + "increase", + "rate", + ], + PromQLPatternBuilder.matrix_selector( + PromQLPatternBuilder.metric(collect_as="metric"), + collect_as="range_vector", + ), + collect_as="function", + collect_args_as="function_args", + ), + collect_as="aggregation", + ) + ), + ], + } + + self.query_pattern_type = None + self.query_pattern_match = None + self.query_treatment_type = None + + self.process_query() + + def process_query(self): + query_pattern_type, match = self.match_query_pattern() + + if query_pattern_type and match: + self.query_pattern_type = query_pattern_type + self.query_pattern_match = match + self.query_treatment_type = self.get_query_treatment_type() + logger.debug("Query treatment type: {}", self.query_treatment_type) + else: + # self.logger.warning("Query pattern not supported: %s", self.query) + logger.warning("Query pattern not supported: {}", self.query) + + def should_be_performant(self) -> bool: + if self.query_pattern_type == QueryPatternType.ONLY_TEMPORAL: + # Check quantile_over_time, rate, increase + # Calculate number of data points per key + function_name = self.query_pattern_match.tokens["function"]["name"] + if function_name in ["rate", "increase", "quantile_over_time"]: + num_data_points_per_tumbling_window = ( + self.t_repeat / self.prometheus_scrape_interval + ) + range_duration = int( + self.query_pattern_match.tokens["range_vector"][ + "range" + ].total_seconds() + ) + if num_data_points_per_tumbling_window < 60: + logger.info( + "[Performance Check Failed] num_data_points_per_tumbling_window {} < 60", + num_data_points_per_tumbling_window, + ) + return False + # bound time for merging for quantile_over_time + if function_name == "quantile_over_time": + if range_duration / self.t_repeat > 15: + logger.info( + "[Performance Check Failed] range_duration / t_repeat {} > 15", + range_duration / self.t_repeat, + ) + return False + return True + elif self.query_pattern_type == QueryPatternType.ONLY_SPATIAL: + return True + elif self.query_pattern_type == QueryPatternType.ONE_TEMPORAL_ONE_SPATIAL: + # TODO: might need to add checks here + return True + else: + return True + + def is_supported(self) -> bool: + return ( + self.query_pattern_type is not None and self.query_pattern_match is not None + ) + + def match_query_pattern( + self, + ) -> Tuple[Optional[QueryPatternType], Optional[MatchResult]]: + for pattern_type, patterns in self.patterns.items(): + for pattern in patterns: + match = pattern.matches(self.query_ast, debug=False) + if match: + logger.debug("Matched pattern: {}", pattern_type) + return pattern_type, match + return None, None + + def get_query_treatment_type(self): + assert self.query_pattern_type and self.query_pattern_match + + if ( + self.query_pattern_type == QueryPatternType.ONLY_TEMPORAL + or self.query_pattern_type == QueryPatternType.ONE_TEMPORAL_ONE_SPATIAL + ): + if self.query_pattern_match.tokens["function"]["name"] in [ + "quantile_over_time", + "sum_over_time", + "count_over_time", + "avg_over_time", + ]: + return QueryTreatmentType.APPROXIMATE + else: + return QueryTreatmentType.EXACT + elif self.query_pattern_type == QueryPatternType.ONLY_SPATIAL: + if self.query_pattern_match.tokens["aggregation"]["op"] in [ + "quantile", + "sum", + "count", + "avg", + "topk", + ]: + return QueryTreatmentType.APPROXIMATE + else: + return QueryTreatmentType.EXACT + else: + raise ValueError("Invalid query pattern type") + + def get_streaming_aggregation_configs( + self, + ) -> Tuple[List[StreamingAggregationConfig], int]: + assert ( + self.query_pattern_type + and self.query_pattern_match + and self.query_treatment_type + ) + + template_config = StreamingAggregationConfig() + template_config.aggregationId = -1 + # template_config.metric = self.query_pattern_match.tokens["metric"]["name"] + + # setting spatial filter + # if self.query_pattern_match.tokens["metric"]["labels"].matchers: + # template_config.spatialFilter = ( + # self.query_pattern_match.tokens["metric"]["ast"] + # .prettify() + # .split("{")[1] + # .split("}")[0] + # ) + # template_config.metric = template_config.metric.split("{")[0] + # else: + # template_config.spatialFilter = "" + + template_config.metric, template_config.spatialFilter = ( + get_metric_and_spatial_filter(self.query_pattern_match) + ) + + statistics_to_compute = get_statistics_to_compute( + self.query_pattern_type, self.query_pattern_match + ) + + # if ( + # self.query_pattern_type == QueryPatternType.ONLY_TEMPORAL + # or self.query_pattern_type == QueryPatternType.ONE_TEMPORAL_ONE_SPATIAL + # ): + # statistic_to_compute = self.query_pattern_match.tokens["function"][ + # "name" + # ].split("_")[0] + # template_config.tumblingWindowSize = self.t_repeat + # elif self.query_pattern_type == QueryPatternType.ONLY_SPATIAL: + # statistic_to_compute = self.query_pattern_match.tokens["aggregation"]["op"] + # template_config.tumblingWindowSize = self.prometheus_scrape_interval + # else: + # raise ValueError("Invalid query pattern type") + + configs = [] + + for statistic_to_compute in statistics_to_compute: + + aggregation_type, aggregation_sub_type = ( + map_statistic_to_precompute_operator( + statistic_to_compute, self.query_treatment_type + ) + ) + + # NEW: Set window parameters (auto-decides sliding vs tumbling based on query type) + # Issue #236: Sliding windows for ONLY_TEMPORAL queries (except DeltaSetAggregator) + # Issue #329: For range queries, use min(t_repeat, step) as effective repeat interval + logics.set_window_parameters( + self.query_pattern_type, + self.query_pattern_match, + self.t_repeat, + self.prometheus_scrape_interval, + aggregation_type, + template_config, + self.step, + ) + + # for aggregation_type, aggregation_sub_type in list_of_precompute_operators: + + all_labels = self.metric_config.config[template_config.metric] + + if self.query_pattern_type == QueryPatternType.ONLY_TEMPORAL: + template_config.labels["rollup"] = KeyByLabelNames([]) + + logics.set_subpopulation_labels( + statistic_to_compute, aggregation_type, all_labels, template_config + ) + + # if logics.does_precompute_operator_support_subpopulations( + # statistic_to_compute, aggregation_type + # ): + # template_config.labels["grouping"] = KeyByLabelNames([]) + # template_config.labels["aggregated"] = copy.deepcopy( + # self.metric_config.config[template_config.metric] + # ) + # else: + # template_config.labels["grouping"] = copy.deepcopy( + # self.metric_config.config[template_config.metric] + # ) + # template_config.labels["aggregated"] = KeyByLabelNames([]) + + elif self.query_pattern_type == QueryPatternType.ONLY_SPATIAL: + # aggregation_modifier = self.query_pattern_match.tokens["aggregation"][ + # "modifier" + # ] + # aggregation_modifier_labels = None + # if aggregation_modifier.type == aggregation_modifier.type.By: + # aggregation_modifier_labels = KeyByLabelNames( + # aggregation_modifier.labels + # ) + # elif aggregation_modifier.type == aggregation_modifier.type.Without: + # aggregation_modifier_labels = self.metric_config.config[ + # template_config.metric + # ] - KeyByLabelNames(aggregation_modifier.labels) + # else: + # raise ValueError("Invalid aggregation modifier") + + spatial_aggregation_output_labels = ( + get_spatial_aggregation_output_labels( + self.query_pattern_match, all_labels + ) + ) + + template_config.labels["rollup"] = ( + all_labels - spatial_aggregation_output_labels + ) + + logics.set_subpopulation_labels( + statistic_to_compute, + aggregation_type, + spatial_aggregation_output_labels, + template_config, + ) + + # if logics.does_precompute_operator_support_subpopulations( + # statistic_to_compute, aggregation_type + # ): + # template_config.labels["aggregated"] = copy.deepcopy( + # aggregation_modifier_labels + # ) + # template_config.labels["grouping"] = KeyByLabelNames([]) + # else: + # template_config.labels["aggregated"] = KeyByLabelNames([]) + # template_config.labels["grouping"] = copy.deepcopy( + # aggregation_modifier_labels + # ) + + elif self.query_pattern_type == QueryPatternType.ONE_TEMPORAL_ONE_SPATIAL: + collapsable = get_is_collapsable( + self.query_pattern_match.tokens["function"]["name"], + self.query_pattern_match.tokens["aggregation"]["op"], + ) + + if not collapsable: + template_config.labels["rollup"] = KeyByLabelNames([]) + + logics.set_subpopulation_labels( + statistic_to_compute, + aggregation_type, + all_labels, + template_config, + ) + + # if logics.does_precompute_operator_support_subpopulations( + # statistic_to_compute, aggregation_type + # ): + # template_config.labels["grouping"] = KeyByLabelNames([]) + # template_config.labels["aggregated"] = copy.deepcopy( + # self.metric_config.config[template_config.metric] + # ) + # else: + # template_config.labels["grouping"] = copy.deepcopy( + # self.metric_config.config[template_config.metric] + # ) + # template_config.labels["aggregated"] = KeyByLabelNames([]) + else: + # aggregation_modifier = self.query_pattern_match.tokens[ + # "aggregation" + # ]["modifier"] + # aggregation_modifier_labels = None + # if aggregation_modifier.type == aggregation_modifier.type.By: + # aggregation_modifier_labels = KeyByLabelNames( + # aggregation_modifier.labels + # ) + # elif aggregation_modifier.type == aggregation_modifier.type.Without: + # aggregation_modifier_labels = self.metric_config.config[ + # template_config.metric + # ] - KeyByLabelNames(aggregation_modifier.labels) + # else: + # raise ValueError("Invalid aggregation modifier") + + spatial_aggregation_output_labels = ( + get_spatial_aggregation_output_labels( + self.query_pattern_match, all_labels + ) + ) + + template_config.labels["rollup"] = ( + all_labels - spatial_aggregation_output_labels + ) + + logics.set_subpopulation_labels( + statistic_to_compute, + aggregation_type, + spatial_aggregation_output_labels, + template_config, + ) + + # if logics.does_precompute_operator_support_subpopulations( + # statistic_to_compute, aggregation_type + # ): + # template_config.labels["aggregated"] = copy.deepcopy( + # aggregation_modifier_labels + # ) + # template_config.labels["grouping"] = KeyByLabelNames([]) + # else: + # template_config.labels["aggregated"] = KeyByLabelNames([]) + # template_config.labels["grouping"] = copy.deepcopy( + # aggregation_modifier_labels + # ) + + config = copy.deepcopy(template_config) + config.aggregationType = aggregation_type + config.aggregationSubType = aggregation_sub_type + config.parameters = logics.get_precompute_operator_parameters( + aggregation_type, + aggregation_sub_type, + self.query_pattern_match, + self.sketch_parameters, + ) + + # TODO: remove this hardcoding once promql_utilities.query_logics has updated logic + # https://github.com/SketchDB/Utilities/issues/44 + if aggregation_type in ["CountMinSketch", "HydraKLL"]: + # add another precompute operator for DeltaSetAggregator + delta_set_config = copy.deepcopy(template_config) + if ( + self.streaming_engine == "flink" + or self.streaming_engine == "arroyo" + ): + delta_set_config.aggregationType = "DeltaSetAggregator" + else: + raise ValueError( + f"Unsupported streaming engine: {self.streaming_engine}" + ) + delta_set_config.aggregationSubType = "" + delta_set_config.parameters = logics.get_precompute_operator_parameters( + delta_set_config.aggregationType, + delta_set_config.aggregationSubType, + self.query_pattern_match, + self.sketch_parameters, + ) + configs.append(delta_set_config) + configs.append(config) + + # Calculate cleanup parameter based on cleanup policy and window type + # This must be done AFTER set_window_parameters() has been called + cleanup_policy = self.config["cleanup_policy"] + if cleanup_policy == CleanupPolicy.NO_CLEANUP: + logger.info("Cleanup policy is NO_CLEANUP - cleanup_param will be None") + cleanup_param = None + else: + cleanup_param = logics.get_cleanup_param( + cleanup_policy=cleanup_policy, + query_pattern_type=self.query_pattern_type, + query_pattern_match=self.query_pattern_match, + t_repeat=self.t_repeat, + window_type=template_config.windowType, + range_duration=self.range_duration, + step=self.step, + ) + + return configs, cleanup_param diff --git a/Controller/classes/WorkloadConfig.py b/Controller/classes/WorkloadConfig.py new file mode 100644 index 0000000..c77b3fe --- /dev/null +++ b/Controller/classes/WorkloadConfig.py @@ -0,0 +1,17 @@ +from typing import List + +from classes.SingleQueryConfig import SingleQueryConfig + + +class WorkloadConfig: + def __init__(self, singe_query_configs: List[SingleQueryConfig]): + pass + + def remove_common_subexpressions(self): + pass + + def get_streaming_config(self): + pass + + def get_estimation_config(self): + pass diff --git a/Controller/controller-cli-compose.yml.j2 b/Controller/controller-cli-compose.yml.j2 new file mode 100644 index 0000000..87d0a18 --- /dev/null +++ b/Controller/controller-cli-compose.yml.j2 @@ -0,0 +1,19 @@ +# Docker compose Jinja2 template to be rendered and used by asap-cli + +services: + controller: + image: sketchdb-controller:latest # Change to 'asap' prefix + container_name: asap-controller + networks: + - asap-network + volumes: + - {{ input_config_path }}:/app/input/config.yaml:ro + - {{ output_dir }}:/app/outputs + command: [ + "--input_config", "/app/input/config.yaml", + "--output_dir", "/app/outputs", + "--prometheus_scrape_interval", "{{ prometheus_scrape_interval }}", + "--streaming_engine", "{{ streaming_engine }}"{% if punting %}, + "--enable-punting"{% endif %} + ] + restart: no diff --git a/Controller/docker-compose.yml.j2 b/Controller/docker-compose.yml.j2 new file mode 100644 index 0000000..d36368f --- /dev/null +++ b/Controller/docker-compose.yml.j2 @@ -0,0 +1,15 @@ +services: + controller: + image: sketchdb-controller:latest + container_name: {{ container_name }} + volumes: + - {{ input_config_path }}:/app/input/config.yaml:ro + - {{ output_dir }}:/app/output + command: [ + "--input_config", "/app/input/config.yaml", + "--output_dir", "/app/output", + "--prometheus_scrape_interval", "{{ prometheus_scrape_interval }}", + "--streaming_engine", "{{ streaming_engine }}"{% if punting %}, + "--enable-punting"{% endif %} + ] + restart: no diff --git a/Controller/installation/install.sh b/Controller/installation/install.sh new file mode 100755 index 0000000..7fe93cd --- /dev/null +++ b/Controller/installation/install.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -e + +THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") +PARENT_DIR=$(dirname "$THIS_DIR") + +echo "Building Controller Docker image..." +cd "$PARENT_DIR" +docker build . -f Dockerfile -t sketchdb-controller:latest + +echo "Controller Docker image built successfully: sketchdb-controller:latest" diff --git a/Controller/main_controller.py b/Controller/main_controller.py new file mode 100644 index 0000000..471697b --- /dev/null +++ b/Controller/main_controller.py @@ -0,0 +1,173 @@ +import os +import yaml +import argparse +from loguru import logger + +from classes.SingleQueryConfig import SingleQueryConfig +from promql_utilities.streaming_config.MetricConfig import MetricConfig +from promql_utilities.query_logics.enums import CleanupPolicy + + +def read_config(config_path) -> dict: + config_yaml = None + with open(config_path, "r") as f: + config_yaml = yaml.safe_load(f) + return config_yaml + + +def validate_config(config_yaml): + # NOTE: only allow unique query strings for now + query_strings = set() + for query_group_yaml in config_yaml["query_groups"]: + for query_string in query_group_yaml["queries"]: + if query_string in query_strings: + raise ValueError(f"Duplicate query string: {query_string}") + query_strings.add(query_string) + + +def main(args): + input_config_yaml = read_config(args.input_config) + + validate_config(input_config_yaml) + + metric_config = MetricConfig.from_list(input_config_yaml["metrics"]) + + # Read cleanup policy configuration (default to READ_BASED if not specified) + cleanup_policy_str = input_config_yaml.get("aggregate_cleanup", {}).get( + "policy", "read_based" + ) + try: + cleanup_policy = CleanupPolicy(cleanup_policy_str) + except ValueError: + valid_policies = [p.value for p in CleanupPolicy] + raise ValueError( + f"Invalid cleanup policy: '{cleanup_policy_str}'. " + f"Valid options: {valid_policies}" + ) + logger.info("Cleanup policy: {}", cleanup_policy.value) + + # Read sketch parameters configuration (use None to apply defaults in logics.py) + sketch_parameters = input_config_yaml.get("sketch_parameters", None) + if sketch_parameters: + logger.info("Using custom sketch parameters: {}", sketch_parameters) + else: + logger.info("Using default sketch parameters") + + streaming_aggregation_configs_map = {} + query_aggregation_config_keys_map = {} + + for query_group_yaml in input_config_yaml["query_groups"]: + for query_string in query_group_yaml["queries"]: + single_query_config_yaml = { + "query": query_string, + "t_repeat": query_group_yaml["repetition_delay"], + "options": query_group_yaml["controller_options"], + "cleanup_policy": cleanup_policy, + "range_duration": args.range_duration, + "step": args.step, + } + + logger.debug("Processing query {}", query_string) + + single_query_config = SingleQueryConfig( + single_query_config_yaml, + metric_config, + args.prometheus_scrape_interval, + args.streaming_engine, + sketch_parameters, + ) + + should_process_query = single_query_config.is_supported() + if args.enable_punting: + should_process_query = ( + should_process_query and single_query_config.should_be_performant() + ) + + if should_process_query: + query_aggregation_config_keys_map[single_query_config.query] = [] + current_configs, num_aggregates_to_retain = ( + single_query_config.get_streaming_aggregation_configs() + ) + + for current_config in current_configs: + key = current_config.get_identifying_key() + query_aggregation_config_keys_map[single_query_config.query].append( + (key, num_aggregates_to_retain) + ) + if key not in streaming_aggregation_configs_map: + streaming_aggregation_configs_map[key] = current_config + else: + logger.warning("Unsupported query") + + for idx, k in enumerate(streaming_aggregation_configs_map.keys()): + streaming_aggregation_configs_map[k].aggregationId = idx + 1 + + streaming_config = { + "aggregations": [ + config.to_dict(metric_config, "promql") + for config in streaming_aggregation_configs_map.values() + ], + "metrics": metric_config.config, + } + inference_config = { + "cleanup_policy": {"name": cleanup_policy.value}, + "queries": [], + "metrics": metric_config.config, + } + for query, streaming_config_keys in query_aggregation_config_keys_map.items(): + inference_config["queries"].append({"query": query, "aggregations": []}) + for streaming_config_key in streaming_config_keys: + aggregation_entry = { + "aggregation_id": streaming_aggregation_configs_map[ + streaming_config_key[0] + ].aggregationId, + } + # Add the appropriate parameter based on cleanup policy + cleanup_value = streaming_config_key[1] + if ( + cleanup_policy == CleanupPolicy.CIRCULAR_BUFFER + and cleanup_value is not None + ): + aggregation_entry["num_aggregates_to_retain"] = cleanup_value + elif ( + cleanup_policy == CleanupPolicy.READ_BASED and cleanup_value is not None + ): + aggregation_entry["read_count_threshold"] = cleanup_value + # For NO_CLEANUP, we don't add any parameter + inference_config["queries"][-1]["aggregations"].append(aggregation_entry) + + os.makedirs(args.output_dir, exist_ok=True) + with open(f"{args.output_dir}/streaming_config.yaml", "w") as f: + f.write(yaml.dump(streaming_config)) + + with open(f"{args.output_dir}/inference_config.yaml", "w") as f: + f.write(yaml.dump(inference_config)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--input_config", type=str, required=True) + parser.add_argument("--output_dir", type=str, required=True) + parser.add_argument("--prometheus_scrape_interval", type=int, required=True) + parser.add_argument( + "--streaming_engine", type=str, choices=["flink", "arroyo"], required=True + ) + parser.add_argument( + "--enable-punting", + action="store_true", + help="Enable query punting based on performance heuristics", + ) + parser.add_argument( + "--range-duration", + type=int, + default=0, + help="Range query duration (end - start) in seconds. 0 for instant queries.", + ) + parser.add_argument( + "--step", + type=int, + default=0, + help="Range query step in seconds. Required if range-duration > 0.", + ) + args = parser.parse_args() + main(args) diff --git a/Controller/requirements.txt b/Controller/requirements.txt new file mode 100644 index 0000000..ee973aa --- /dev/null +++ b/Controller/requirements.txt @@ -0,0 +1,3 @@ +loguru==0.7.3 +promql_parser==0.5.0 +PyYAML==6.0.2 diff --git a/Controller/utils/logics.py b/Controller/utils/logics.py new file mode 100644 index 0000000..31bb761 --- /dev/null +++ b/Controller/utils/logics.py @@ -0,0 +1,396 @@ +import copy +import math +from loguru import logger + +from promql_utilities.data_model.KeyByLabelNames import KeyByLabelNames +from promql_utilities.query_logics.enums import QueryPatternType, CleanupPolicy +from promql_utilities.ast_matching.PromQLPattern import MatchResult +from promql_utilities.query_logics.logics import ( + does_precompute_operator_support_subpopulations, +) + +CMS_WITH_HEAP_MULT = 4 + +# Default sketch parameters for backward compatibility +DEFAULT_SKETCH_PARAMETERS = { + "CountMinSketch": {"depth": 3, "width": 1024}, + "CountMinSketchWithHeap": {"depth": 3, "width": 1024, "heap_multiplier": 4}, + "DatasketchesKLL": {"K": 20}, + "HydraKLL": {"row_num": 3, "col_num": 1024, "k": 20}, +} + + +def get_effective_repeat(t_repeat: int, step: int) -> int: + """ + Calculate effective repeat interval for range queries. + + For range queries (step > 0), use the smaller of t_repeat and step to ensure + we produce aggregates frequently enough to support the query step size. + For instant queries (step = 0), use t_repeat. + """ + return min(t_repeat, step) if step > 0 else t_repeat + + +# TODO: +# We only show the logic of `get_precompute_operator_parameters` here. +# Semantics for topk query will be added in later PRs. +def get_precompute_operator_parameters( + aggregation_type: str, + aggregation_sub_type: str, + query_pattern_match: MatchResult, + sketch_parameters: dict, +) -> dict: + # Allow partial overrides: use provided parameters, fall back to defaults per sketch type + if sketch_parameters is None: + sketch_parameters = {} + + if aggregation_type in [ + "Increase", + "MinMax", + "Sum", + "MultipleIncrease", + "MultipleMinMax", + "MultipleSum", + "DeltaSetAggregator", + "SetAggregator", + ]: + return {} + elif aggregation_type == "CountMinSketch": + params = sketch_parameters.get( + "CountMinSketch", DEFAULT_SKETCH_PARAMETERS["CountMinSketch"] + ) + return {"depth": params["depth"], "width": params["width"]} + elif aggregation_type == "CountMinSketchWithHeap": + if aggregation_sub_type == "topk": + if "aggregation" not in query_pattern_match.tokens: + raise ValueError( + f"{aggregation_sub_type} query missing aggregator in the match tokens" + ) + if "param" not in query_pattern_match.tokens["aggregation"]: + raise ValueError( + f"{aggregation_sub_type} query missing required 'k' parameter" + ) + k = int(query_pattern_match.tokens["aggregation"]["param"].val) + params = sketch_parameters.get( + "CountMinSketchWithHeap", + DEFAULT_SKETCH_PARAMETERS["CountMinSketchWithHeap"], + ) + heap_mult = params.get("heap_multiplier", CMS_WITH_HEAP_MULT) + return { + "depth": params["depth"], + "width": params["width"], + "heapsize": k * heap_mult, + } + else: + raise ValueError( + f"Aggregation sub-type {aggregation_sub_type} for CountMinSketchWithHeap not supported" + ) + elif aggregation_type == "DatasketchesKLL": + params = sketch_parameters.get( + "DatasketchesKLL", DEFAULT_SKETCH_PARAMETERS["DatasketchesKLL"] + ) + return {"K": params["K"]} + elif aggregation_type == "HydraKLL": + params = sketch_parameters.get( + "HydraKLL", DEFAULT_SKETCH_PARAMETERS["HydraKLL"] + ) + return { + "row_num": params["row_num"], + "col_num": params["col_num"], + "k": params["k"], + } + # elif aggregation_type == "UnivMon": + # return {"depth": 3, "width": 2048, "levels": 16} + else: + raise NotImplementedError(f"Aggregation type {aggregation_type} not supported") + + +def get_cleanup_param( + cleanup_policy: CleanupPolicy, + query_pattern_type, + query_pattern_match, + t_repeat: int, + window_type: str, + range_duration: int, + step: int, +) -> int: + """ + Calculate cleanup parameter based on cleanup policy and range query params. + + Sliding windows (both policies): range_duration / step + 1 + Tumbling circular_buffer: (T_lookback + range_duration) / min(T_repeat, step) + Tumbling read_based: (T_lookback / min(T_repeat, step)) * (range_duration / step + 1) + + For ONLY_SPATIAL queries, T_lookback = T_repeat. + For instant queries, range_duration = 0 and effective_repeat = T_repeat. + + Args: + cleanup_policy: CleanupPolicy.CIRCULAR_BUFFER or CleanupPolicy.READ_BASED + query_pattern_type: QueryPatternType enum + query_pattern_match: MatchResult with query tokens + t_repeat: Query repeat interval in seconds + window_type: "sliding" or "tumbling" + range_duration: end - start in seconds (0 for instant queries) + step: Range query step in seconds (required if range_duration > 0) + + Raises: + ValueError: If exactly one of range_duration or step is zero + """ + # Validation: range_duration and step must both be zero (instant) or both non-zero (range) + if (range_duration == 0) != (step == 0): + raise ValueError( + f"range_duration and step must both be 0 (instant query) or both > 0 (range query). " + f"Got range_duration={range_duration}, step={step}" + ) + + is_range_query = step > 0 + + # For ONLY_SPATIAL, T_lookback = T_repeat + if query_pattern_type == QueryPatternType.ONLY_SPATIAL: + t_lookback = t_repeat + else: + t_lookback = int( + query_pattern_match.tokens["range_vector"]["range"].total_seconds() + ) + + # For sliding windows: range_duration / step + 1 (same for both policies) + if window_type == "sliding": + if is_range_query: + result = range_duration // step + 1 + else: + result = 1 # instant query + logger.debug( + f"Sliding window mode: cleanup_param = {result} " + f"(range_duration={range_duration}s, step={step}s)" + ) + return result + + # Tumbling window calculations + effective_repeat = get_effective_repeat(t_repeat, step) + + # We use ceiling division because even if the time span doesn't fully fill + # a bucket, we still need that bucket to cover the partial data. + # E.g., if T_lookback=10s and effective_repeat=100s, we still need 1 bucket. + if cleanup_policy == CleanupPolicy.CIRCULAR_BUFFER: + # ceil((T_lookback + range_duration) / effective_repeat) + result = math.ceil((t_lookback + range_duration) / effective_repeat) + elif cleanup_policy == CleanupPolicy.READ_BASED: + # ceil(T_lookback / effective_repeat) * (range_duration / step + 1) + lookback_buckets = math.ceil(t_lookback / effective_repeat) + if is_range_query: + num_steps = range_duration // step + 1 + else: + num_steps = 1 # instant query + result = lookback_buckets * num_steps + else: + raise ValueError(f"Invalid cleanup policy: {cleanup_policy}") + + logger.debug( + f"Tumbling window mode ({cleanup_policy.value}): cleanup_param = {result} " + f"(t_lookback={t_lookback}s, t_repeat={t_repeat}s, " + f"range_duration={range_duration}s, step={step}s)" + ) + return result + + +def should_use_sliding_window(query_pattern_type, aggregation_type): + """ + Decide if sliding windows should be used based on query type and aggregation type. + + For Issue #236: Use sliding windows for ALL ONLY_TEMPORAL queries except DeltaSetAggregator. + This eliminates merging overhead in QueryEngine at the cost of more computation in Arroyo. + + Args: + query_pattern_type: ONLY_TEMPORAL, ONLY_SPATIAL, or ONE_TEMPORAL_ONE_SPATIAL + aggregation_type: Type of aggregation (e.g., 'DatasketchesKLL', 'Sum', etc.) + + Returns: + bool: True if sliding windows should be used + """ + # NOTE: returning False since sliding window pipelines are causing arroyo to crash + return False + # Only use sliding for ONLY_TEMPORAL queries (not ONE_TEMPORAL_ONE_SPATIAL or ONLY_SPATIAL) + if query_pattern_type != QueryPatternType.ONLY_TEMPORAL: + logger.debug( + f"Query pattern {query_pattern_type} not eligible for sliding windows " + f"(only ONLY_TEMPORAL supported)" + ) + return False + + # Explicitly exclude DeltaSetAggregator (paired with CMS but needs tumbling) + if aggregation_type == "DeltaSetAggregator": + logger.debug("DeltaSetAggregator excluded from sliding windows") + return False + + # All other ONLY_TEMPORAL aggregations use sliding windows + logger.info( + f"Aggregation type '{aggregation_type}' with {query_pattern_type} -> SLIDING windows" + ) + return True + + +def set_window_parameters( + query_pattern_type, + query_pattern_match, + t_repeat, + prometheus_scrape_interval, + aggregation_type, + template_config, + step: int, +): + """ + Set window parameters for streaming aggregation config. + Auto-decides between sliding and tumbling windows based on query type and aggregation cost. + + For ONLY_TEMPORAL queries with expensive aggregations (KLL, CMS): + - Uses SLIDING windows: windowSize = range duration, slideInterval = effective_repeat + - This reduces QueryEngine latency by avoiding merges (Arroyo does more work upfront) + + For other queries: + - Uses TUMBLING windows: windowSize = slideInterval = effective_repeat + - This is the original behavior + + For range queries (step > 0), effective_repeat = min(t_repeat, step). + For instant queries (step = 0), effective_repeat = t_repeat. + + Args: + query_pattern_type: Pattern type (ONLY_TEMPORAL, ONLY_SPATIAL, ONE_TEMPORAL_ONE_SPATIAL) + query_pattern_match: Matched PromQL pattern containing query metadata + t_repeat: Query repeat interval in seconds + prometheus_scrape_interval: Scrape interval in seconds + aggregation_type: Type of aggregation operator + template_config: StreamingAggregationConfig to update + step: Range query step in seconds (0 for instant queries) + """ + # For range queries, use min(t_repeat, step) as the effective repeat interval + effective_repeat = get_effective_repeat(t_repeat, step) + + # Decide if we should use sliding windows + use_sliding_window = should_use_sliding_window(query_pattern_type, aggregation_type) + + if use_sliding_window: + # SLIDING WINDOW for ONLY_TEMPORAL queries with expensive aggregations + logger.info( + f"Configuring SLIDING WINDOW for {query_pattern_type} " + f"with {aggregation_type}" + ) + + if query_pattern_type == QueryPatternType.ONLY_TEMPORAL: + # Window size = range duration (e.g., 15m = 900s) + range_seconds = int( + query_pattern_match.tokens["range_vector"]["range"].total_seconds() + ) + + # Check if this is actually a tumbling window (windowSize == slideInterval) + if range_seconds == effective_repeat: + logger.info( + f"Detected windowSize == slideInterval ({range_seconds}s). " + f"Using tumbling window instead of sliding for efficiency." + ) + template_config.windowSize = effective_repeat + template_config.slideInterval = effective_repeat + template_config.windowType = "tumbling" + template_config.tumblingWindowSize = effective_repeat + else: + # True sliding window + template_config.windowSize = range_seconds + template_config.slideInterval = effective_repeat + template_config.windowType = "sliding" + + logger.info( + f"Sliding window params: windowSize={range_seconds}s, " + f"slideInterval={effective_repeat}s " + f"(each window has {range_seconds} seconds of data, slides every {effective_repeat}s)" + ) + + # Set deprecated field for backward compatibility + template_config.tumblingWindowSize = effective_repeat + else: + # This should never be reached due to should_use_sliding_window() check + assert False, ( + f"should_use_sliding_window returned True for {query_pattern_type}, " + f"but sliding windows only supported for ONLY_TEMPORAL" + ) + else: + # TUMBLING WINDOW (existing logic) + logger.info( + f"Configuring TUMBLING WINDOW for {query_pattern_type} " + f"with {aggregation_type}" + ) + _set_tumbling_window_parameters( + query_pattern_type, + effective_repeat, + prometheus_scrape_interval, + template_config, + ) + + +def _set_tumbling_window_parameters( + query_pattern_type, effective_repeat, prometheus_scrape_interval, template_config +): + """ + Set tumbling window parameters. + + Args: + query_pattern_type: Pattern type (ONLY_TEMPORAL, ONLY_SPATIAL, ONE_TEMPORAL_ONE_SPATIAL) + effective_repeat: Effective repeat interval (min(t_repeat, step) for range queries) + prometheus_scrape_interval: Scrape interval in seconds + template_config: StreamingAggregationConfig to update + """ + if ( + query_pattern_type == QueryPatternType.ONLY_TEMPORAL + or query_pattern_type == QueryPatternType.ONE_TEMPORAL_ONE_SPATIAL + ): + template_config.windowSize = effective_repeat + template_config.slideInterval = effective_repeat + template_config.windowType = "tumbling" + template_config.tumblingWindowSize = effective_repeat + + logger.debug( + f"Tumbling window params: windowSize={effective_repeat}s, slideInterval={effective_repeat}s" + ) + elif query_pattern_type == QueryPatternType.ONLY_SPATIAL: + template_config.windowSize = prometheus_scrape_interval + template_config.slideInterval = prometheus_scrape_interval + template_config.windowType = "tumbling" + template_config.tumblingWindowSize = prometheus_scrape_interval + + logger.debug( + f"Tumbling window params: windowSize={prometheus_scrape_interval}s, " + f"slideInterval={prometheus_scrape_interval}s" + ) + else: + raise ValueError("Invalid query pattern type") + + +# COMMENTED OUT - Original function kept for rollback +# Issue #236: Replaced with set_window_parameters() to support sliding windows +# +# def set_tumbling_window_size( +# query_pattern_type, t_repeat, prometheus_scrape_interval, template_config +# ): +# if ( +# query_pattern_type == QueryPatternType.ONLY_TEMPORAL +# or query_pattern_type == QueryPatternType.ONE_TEMPORAL_ONE_SPATIAL +# ): +# template_config.tumblingWindowSize = t_repeat +# elif query_pattern_type == QueryPatternType.ONLY_SPATIAL: +# template_config.tumblingWindowSize = prometheus_scrape_interval +# else: +# raise ValueError("Invalid query pattern type") + + +def set_subpopulation_labels( + statistic_to_compute, + aggregation_type, + subpopulation_labels: KeyByLabelNames, + template_config, +): + if does_precompute_operator_support_subpopulations( + statistic_to_compute, aggregation_type + ): + template_config.labels["grouping"] = KeyByLabelNames([]) + template_config.labels["aggregated"] = copy.deepcopy(subpopulation_labels) + else: + template_config.labels["grouping"] = copy.deepcopy(subpopulation_labels) + template_config.labels["aggregated"] = KeyByLabelNames([]) diff --git a/Controller/utils/test_logics.py b/Controller/utils/test_logics.py new file mode 100644 index 0000000..b44c889 --- /dev/null +++ b/Controller/utils/test_logics.py @@ -0,0 +1,268 @@ +"""Unit tests for logics.py cleanup parameter calculations.""" + +import pytest +from datetime import timedelta +from unittest.mock import MagicMock + +from promql_utilities.query_logics.enums import QueryPatternType, CleanupPolicy +from logics import get_cleanup_param + + +def create_mock_match(range_seconds: int) -> MagicMock: + """Create a mock match result with the given range duration.""" + mock = MagicMock() + mock.tokens = {"range_vector": {"range": timedelta(seconds=range_seconds)}} + return mock + + +class TestGetCleanupParamValidation: + """Tests for validation logic in get_cleanup_param.""" + + def test_range_duration_without_step_raises_error(self): + """range_duration > 0 with step = 0 is invalid.""" + mock_match = create_mock_match(900) + with pytest.raises(ValueError, match="must both be 0.*or both > 0"): + get_cleanup_param( + cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER, + query_pattern_type=QueryPatternType.ONLY_TEMPORAL, + query_pattern_match=mock_match, + t_repeat=30, + window_type="tumbling", + range_duration=3600, + step=0, + ) + + def test_step_without_range_duration_raises_error(self): + """step > 0 with range_duration = 0 is invalid.""" + mock_match = create_mock_match(900) + with pytest.raises(ValueError, match="must both be 0.*or both > 0"): + get_cleanup_param( + cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER, + query_pattern_type=QueryPatternType.ONLY_TEMPORAL, + query_pattern_match=mock_match, + t_repeat=30, + window_type="tumbling", + range_duration=0, + step=60, + ) + + def test_instant_query_both_zero_is_valid(self): + """Instant queries: both range_duration=0 and step=0 is valid.""" + mock_match = create_mock_match(900) + result = get_cleanup_param( + cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER, + query_pattern_type=QueryPatternType.ONLY_TEMPORAL, + query_pattern_match=mock_match, + t_repeat=30, + window_type="tumbling", + range_duration=0, + step=0, + ) + assert result == 30 # ceil(900 / 30) = 30 + + +class TestSlidingWindowCleanupParam: + """Tests for sliding window cleanup parameter calculations.""" + + def test_sliding_instant_query(self): + """Sliding window instant query returns 1.""" + mock_match = create_mock_match(900) + result = get_cleanup_param( + cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER, + query_pattern_type=QueryPatternType.ONLY_TEMPORAL, + query_pattern_match=mock_match, + t_repeat=30, + window_type="sliding", + range_duration=0, + step=0, + ) + assert result == 1 + + def test_sliding_range_query(self): + """Sliding window: range_duration / step + 1.""" + mock_match = create_mock_match(900) + # range_duration=3600, step=60 -> 3600/60 + 1 = 61 + result = get_cleanup_param( + cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER, + query_pattern_type=QueryPatternType.ONLY_TEMPORAL, + query_pattern_match=mock_match, + t_repeat=30, + window_type="sliding", + range_duration=3600, + step=60, + ) + assert result == 61 + + def test_sliding_same_for_both_policies(self): + """Sliding windows use same formula for both policies.""" + mock_match = create_mock_match(900) + result_cb = get_cleanup_param( + cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER, + query_pattern_type=QueryPatternType.ONLY_TEMPORAL, + query_pattern_match=mock_match, + t_repeat=30, + window_type="sliding", + range_duration=3600, + step=60, + ) + result_rb = get_cleanup_param( + cleanup_policy=CleanupPolicy.READ_BASED, + query_pattern_type=QueryPatternType.ONLY_TEMPORAL, + query_pattern_match=mock_match, + t_repeat=30, + window_type="sliding", + range_duration=3600, + step=60, + ) + assert result_cb == result_rb == 61 + + +class TestTumblingCircularBufferCleanupParam: + """Tests for tumbling window + circular_buffer cleanup parameter.""" + + def test_instant_query(self): + """Instant query: T_lookback / T_repeat.""" + mock_match = create_mock_match(900) # 15 minutes + # T_lookback=900, T_repeat=30 -> 900/30 = 30 + result = get_cleanup_param( + cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER, + query_pattern_type=QueryPatternType.ONLY_TEMPORAL, + query_pattern_match=mock_match, + t_repeat=30, + window_type="tumbling", + range_duration=0, + step=0, + ) + assert result == 30 + + def test_range_query(self): + """Range query: (T_lookback + range_duration) / min(T_repeat, step).""" + mock_match = create_mock_match(900) # 15 minutes + # T_lookback=900, range_duration=3600, T_repeat=30, step=60 + # effective_repeat = min(30, 60) = 30 + # (900 + 3600) / 30 = 150 + result = get_cleanup_param( + cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER, + query_pattern_type=QueryPatternType.ONLY_TEMPORAL, + query_pattern_match=mock_match, + t_repeat=30, + window_type="tumbling", + range_duration=3600, + step=60, + ) + assert result == 150 + + def test_step_smaller_than_t_repeat(self): + """When step < T_repeat, use step as effective_repeat.""" + mock_match = create_mock_match(900) + # T_lookback=900, range_duration=3600, T_repeat=60, step=30 + # effective_repeat = min(60, 30) = 30 + # (900 + 3600) / 30 = 150 + result = get_cleanup_param( + cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER, + query_pattern_type=QueryPatternType.ONLY_TEMPORAL, + query_pattern_match=mock_match, + t_repeat=60, + window_type="tumbling", + range_duration=3600, + step=30, + ) + assert result == 150 + + +class TestTumblingReadBasedCleanupParam: + """Tests for tumbling window + read_based cleanup parameter.""" + + def test_instant_query(self): + """Instant query: (T_lookback / T_repeat) * 1.""" + mock_match = create_mock_match(900) # 15 minutes + # T_lookback=900, T_repeat=30 -> (900/30) * 1 = 30 + result = get_cleanup_param( + cleanup_policy=CleanupPolicy.READ_BASED, + query_pattern_type=QueryPatternType.ONLY_TEMPORAL, + query_pattern_match=mock_match, + t_repeat=30, + window_type="tumbling", + range_duration=0, + step=0, + ) + assert result == 30 + + def test_range_query(self): + """Range query: (T_lookback / min(T_repeat, step)) * (range_duration / step + 1).""" + mock_match = create_mock_match(900) # 15 minutes + # T_lookback=900, range_duration=3600, T_repeat=30, step=60 + # effective_repeat = min(30, 60) = 30 + # lookback_buckets = 900 / 30 = 30 + # num_steps = 3600 / 60 + 1 = 61 + # result = 30 * 61 = 1830 + result = get_cleanup_param( + cleanup_policy=CleanupPolicy.READ_BASED, + query_pattern_type=QueryPatternType.ONLY_TEMPORAL, + query_pattern_match=mock_match, + t_repeat=30, + window_type="tumbling", + range_duration=3600, + step=60, + ) + assert result == 1830 + + +class TestOnlySpatialQueries: + """Tests for ONLY_SPATIAL queries (T_lookback = T_repeat).""" + + def test_only_spatial_instant_query(self): + """ONLY_SPATIAL uses T_lookback = T_repeat.""" + mock_match = MagicMock() # No range_vector token needed + # T_lookback = T_repeat = 30 + # circular_buffer instant: T_lookback / T_repeat = 30/30 = 1 + result = get_cleanup_param( + cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER, + query_pattern_type=QueryPatternType.ONLY_SPATIAL, + query_pattern_match=mock_match, + t_repeat=30, + window_type="tumbling", + range_duration=0, + step=0, + ) + assert result == 1 + + def test_only_spatial_range_query(self): + """ONLY_SPATIAL range query uses T_lookback = T_repeat.""" + mock_match = MagicMock() + # T_lookback = T_repeat = 30, range_duration=3600, step=60 + # effective_repeat = min(30, 60) = 30 + # circular_buffer: (30 + 3600) / 30 = 121 + result = get_cleanup_param( + cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER, + query_pattern_type=QueryPatternType.ONLY_SPATIAL, + query_pattern_match=mock_match, + t_repeat=30, + window_type="tumbling", + range_duration=3600, + step=60, + ) + assert result == 121 + + +class TestMinimumResult: + """Tests that result is always at least 1.""" + + def test_minimum_result_is_one(self): + """Result should never be less than 1.""" + mock_match = create_mock_match(10) # Very small lookback + result = get_cleanup_param( + cleanup_policy=CleanupPolicy.CIRCULAR_BUFFER, + query_pattern_type=QueryPatternType.ONLY_TEMPORAL, + query_pattern_match=mock_match, + t_repeat=100, # Larger than lookback + window_type="tumbling", + range_duration=0, + step=0, + ) + # 10 / 100 = 0, but should be at least 1 + assert result == 1 + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/ExecutionUtilities/.gitignore b/ExecutionUtilities/.gitignore new file mode 100644 index 0000000..533dc14 --- /dev/null +++ b/ExecutionUtilities/.gitignore @@ -0,0 +1,9 @@ +**/__pycache__ +**/*.pyc +**/*.swp +.DS_Store + +**/target + +clickhouse-benchmark-pipeline/benchmark_results/ +**/data/ diff --git a/ExecutionUtilities/LICENSE b/ExecutionUtilities/LICENSE new file mode 100644 index 0000000..404d657 --- /dev/null +++ b/ExecutionUtilities/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 SketchDB + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/README.md b/ExecutionUtilities/clickhouse-benchmark-pipeline/README.md new file mode 100644 index 0000000..1680e91 --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/README.md @@ -0,0 +1,82 @@ +# ClickHouse Benchmark Pipeline + +Benchmark ClickHouse with 43 ClickBench queries using Kafka data ingestion. + +## Prerequisites + +Install and run Kafka and ClickHouse natively (no Docker): + +```bash +# Install Kafka (one-time) +cd /path/to/Utilities/installation/kafka +./install.sh /path/to/Utilities/installation/kafka + +# Install ClickHouse (one-time) +cd /path/to/Utilities/installation/clickhouse +./install.sh /path/to/Utilities/installation/clickhouse +``` + +## Usage + +```bash +# 1. Start Kafka (in a terminal) +cd /path/to/Utilities/installation/kafka +./run.sh kafka/ + +# 2. Create Kafka topic +kafka/bin/kafka-topics.sh --create --topic hits --bootstrap-server localhost:9092 + +# 3. Start ClickHouse (in another terminal) +cd /path/to/Utilities/installation/clickhouse +./run.sh /path/to/Utilities/installation/clickhouse + +# 4. Initialize ClickHouse tables +./scripts/init_clickhouse.sh + +# 5. Generate data (choose one mode) +DATA_MODE=clickbench TOTAL_RECORDS=100000 ./scripts/generate_data.sh +DATA_MODE=fake TOTAL_RECORDS=100000 ./scripts/generate_data.sh + +# 6. Check data +./scripts/check_data.sh + +# 7. Run benchmark +./scripts/run_benchmark.sh +``` + +## Data Modes + +| Mode | Description | +|------|-------------| +| `fake` | Synthetic data | +| `clickbench` | Real ClickBench dataset (~100M rows) | + +## Configuration + +Edit `config.env` to change defaults. Environment variables override config values. + +# Elasticsearch Benchmark Pipeline + +## Prerequisites + +Follow instructions to install Elasticsearch: +```bash +cd /path/to/Utilities/installation/elastic +``` + +## Configuration + +Edit `config.env` to update the `ES_API_KEY` field after installing Elasticsearch locally. + +## Usage + +```bash +# 1. Load H2O CSV data into Elastic +./scripts/init_elastic.sh + +# 2. Check data +./scripts/check_elastic_data.sh + +# 3. Run benchmark +./scripts/run_benchmark.sh h2o_elastic +``` \ No newline at end of file diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/download_clickbench_data.py b/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/download_clickbench_data.py new file mode 100644 index 0000000..0c774b4 --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/download_clickbench_data.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +""" +ClickBench Data Downloader + +Downloads the official ClickBench dataset (hits.json.gz). + +Usage: + python download_data.py --output-dir ./data + + # Specify output file directly: + python download_data.py --output-file /path/to/hits.json.gz +""" + +import argparse +import os +import sys +import urllib.request + + +def load_config(): + """Load configuration from config.env file. All values must be defined there.""" + config = {} + script_dir = os.path.dirname(os.path.abspath(__file__)) + project_root = os.path.dirname(script_dir) + + # Load config (environment variables take precedence) + config_file = os.path.join(project_root, "config.env") + if os.path.exists(config_file): + with open(config_file) as f: + for line in f: + line = line.strip() + if line and not line.startswith("#") and "=" in line: + key, value = line.split("=", 1) + if key not in os.environ: + config[key] = value + + # Store project root for path resolution + config["_PROJECT_ROOT"] = project_root + return config + + +_config = load_config() + + +def get_config(key): + """Get config value from environment or config.env. Raises error if not found.""" + if key in os.environ: + return os.environ[key] + if key in _config: + return _config[key] + raise ValueError(f"Configuration '{key}' not found. Please set it in config.env") + + +# Get values from config (no hardcoded fallbacks) +CLICKBENCH_URL = get_config("CLICKBENCH_URL") +CLICKBENCH_FILENAME = get_config("CLICKBENCH_FILENAME") +CLICKBENCH_DATA_DIR = get_config("CLICKBENCH_DATA_DIR") + + +def download_clickbench_data(output_path: str) -> str: + """Download ClickBench dataset if not already present.""" + if os.path.exists(output_path): + print(f"Using existing file: {output_path}") + return output_path + + print(f"Downloading ClickBench dataset from {CLICKBENCH_URL}") + print("This is a large file (~14GB compressed, ~100M rows). Please wait...") + + request = urllib.request.Request( + CLICKBENCH_URL, + headers={"User-Agent": "Mozilla/5.0 (compatible; ClickBench-Importer/1.0)"}, + ) + + try: + with urllib.request.urlopen(request) as response: + total_size = int(response.headers.get("Content-Length", 0)) + downloaded = 0 + last_percent = -1 + block_size = 8192 * 128 # 1MB blocks + + with open(output_path, "wb") as out_file: + while True: + block = response.read(block_size) + if not block: + break + out_file.write(block) + downloaded += len(block) + + if total_size > 0: + percent = downloaded * 100 // total_size + if percent != last_percent: + last_percent = percent + downloaded_mb = downloaded / (1024 * 1024) + total_mb = total_size / (1024 * 1024) + sys.stdout.write( + f"\rProgress: {percent}% ({downloaded_mb:.1f}/{total_mb:.1f} MB)" + ) + sys.stdout.flush() + + print("\nDownload complete!") + return output_path + + except urllib.error.HTTPError as e: + print(f"\nDownload failed: HTTP {e.code} - {e.reason}") + print("You can manually download the file and use --output-file option:") + print(f" wget {CLICKBENCH_URL}") + print(f" curl -L -o {CLICKBENCH_FILENAME} {CLICKBENCH_URL}") + raise + + +def main(): + # Compute default output dir from config + default_output_dir = os.path.join(_config["_PROJECT_ROOT"], CLICKBENCH_DATA_DIR) + + parser = argparse.ArgumentParser( + description="Download ClickBench dataset", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + parser.add_argument( + "--output-file", + help=f"Path to save {CLICKBENCH_FILENAME} (overrides --output-dir)", + ) + parser.add_argument( + "--output-dir", + default=default_output_dir, + help=f"Directory to download data to (default from config: {CLICKBENCH_DATA_DIR})", + ) + + args = parser.parse_args() + + if args.output_file: + output_path = args.output_file + os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True) + else: + os.makedirs(args.output_dir, exist_ok=True) + output_path = os.path.join(args.output_dir, CLICKBENCH_FILENAME) + + download_clickbench_data(output_path) + + +if __name__ == "__main__": + main() diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/download_h2o_data.py b/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/download_h2o_data.py new file mode 100644 index 0000000..b438728 --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/download_h2o_data.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +import sys +import os +import argparse + +# Check for gdown dependency +try: + import gdown +except ImportError: + # This should be handled by the shell script, but just in case: + print("Error: 'gdown' is missing. Installing it now...") + import subprocess + + subprocess.check_call([sys.executable, "-m", "pip", "install", "gdown"]) + import gdown + +# H2O GroupBy Dataset ID +FILE_ID = "15SVQjQ2QehzYDLoDonio4aP7xqdMiNyi" +DEFAULT_FILENAME = "G1_1e7_1e2_0_0.csv" + + +def main(): + parser = argparse.ArgumentParser(description="Download H2O Benchmark Data") + parser.add_argument( + "--output-dir", required=True, help="Directory to save the file" + ) + args = parser.parse_args() + + os.makedirs(args.output_dir, exist_ok=True) + output_path = os.path.join(args.output_dir, DEFAULT_FILENAME) + + # Simple check to avoid redownloading if it looks valid (>100MB) + if os.path.exists(output_path) and os.path.getsize(output_path) > 100 * 1024 * 1024: + print(f"File {output_path} already exists. Skipping download.") + return + + print(f"Downloading H2O dataset (ID: {FILE_ID}) using gdown...") + + # gdown automatically handles the 'virus scan' warning and cookies + url = f"https://drive.google.com/uc?id={FILE_ID}" + gdown.download(url, output_path, quiet=False) + + +if __name__ == "__main__": + main() diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/requirements.txt b/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/requirements.txt new file mode 100644 index 0000000..42edc69 --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_importer/requirements.txt @@ -0,0 +1 @@ +confluent-kafka>=2.0.0 diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_queries.sql b/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_queries.sql new file mode 100644 index 0000000..e482da5 --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/benchmark_queries.sql @@ -0,0 +1,132 @@ +-- ClickBench Benchmark Queries +-- Source: https://benchmark.clickhouse.com/ +-- These 43 queries test various aspects of analytical query performance + +-- Q1: Simple count +SELECT COUNT(*) FROM hits; + +-- Q2: Count with filter +SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; + +-- Q3: Aggregation functions +SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; + +-- Q4: Average of large integers +SELECT AVG(UserID) FROM hits; + +-- Q5: Count distinct users +SELECT COUNT(DISTINCT UserID) FROM hits; + +-- Q6: Count distinct search phrases +SELECT COUNT(DISTINCT SearchPhrase) FROM hits; + +-- Q7: Min/Max dates +SELECT MIN(EventDate), MAX(EventDate) FROM hits; + +-- Q8: Group by with filter and order +SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; + +-- Q9: Count distinct with group by +SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; + +-- Q10: Multiple aggregations with group by +SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; + +-- Q11: Filter on string column with group by +SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; + +-- Q12: Multiple string columns group by +SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; + +-- Q13: Search phrase analysis +SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; + +-- Q14: Search phrase with distinct users +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; + +-- Q15: Search engine and phrase analysis +SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; + +-- Q16: Top users by activity +SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; + +-- Q17: User search behavior +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; + +-- Q18: User search behavior (no order) +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; + +-- Q19: Time-based user analysis +SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; + +-- Q20: Point query on specific user +SELECT UserID FROM hits WHERE UserID = 435090932899640449; + +-- Q21: URL pattern matching +SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; + +-- Q22: URL pattern with search phrase +SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; + +-- Q23: Complex pattern matching +SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; + +-- Q24: Full row retrieval with pattern +SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; + +-- Q25: String column ordering +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; + +-- Q26: Alphabetical ordering +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; + +-- Q27: Compound ordering +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; + +-- Q28: URL length analysis +SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; + +-- Q29: Referer domain extraction with regex +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\\.)?([^/]+)/.*$', '\\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; + +-- Q30: Wide aggregation (sum of 90 derived columns) +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; + +-- Q31: Search with IP grouping +SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; + +-- Q32: Watch and IP grouping with search filter +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; + +-- Q33: Watch and IP grouping +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; + +-- Q34: URL popularity +SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; + +-- Q35: URL popularity with constant column +SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; + +-- Q36: IP arithmetic +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; + +-- Q37: Counter-specific page views +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; + +-- Q38: Counter-specific title analysis +SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; + +-- Q39: Link analysis with offset +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; + +-- Q40: Traffic source analysis +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; + +-- Q41: URL hash analysis +SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; + +-- Q42: Window dimensions analysis +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; + +-- Q43: Time series analysis +SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/clickhouse/clickbench_init.sql b/ExecutionUtilities/clickhouse-benchmark-pipeline/clickhouse/clickbench_init.sql new file mode 100644 index 0000000..cc06844 --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/clickhouse/clickbench_init.sql @@ -0,0 +1,344 @@ +-- ClickHouse initialization script for ClickBench pipeline +-- Schema matches actual ClickBench JSON data types + +-- Create the main hits table with MergeTree engine (ClickBench compatible) +CREATE TABLE IF NOT EXISTS hits +( + WatchID Int64, + JavaEnable UInt8, + Title String, + GoodEvent Int16, + EventTime DateTime, + EventDate Date, + CounterID UInt32, + ClientIP Int32, + RegionID UInt32, + UserID Int64, + CounterClass Int8, + OS UInt8, + UserAgent UInt8, + URL String, + Referer String, + IsRefresh UInt8, + RefererCategoryID UInt16, + RefererRegionID UInt32, + URLCategoryID UInt16, + URLRegionID UInt32, + ResolutionWidth UInt16, + ResolutionHeight UInt16, + ResolutionDepth UInt8, + FlashMajor UInt8, + FlashMinor UInt8, + FlashMinor2 String, + NetMajor UInt8, + NetMinor UInt8, + UserAgentMajor UInt16, + UserAgentMinor String, + CookieEnable UInt8, + JavascriptEnable UInt8, + IsMobile UInt8, + MobilePhone UInt8, + MobilePhoneModel String, + Params String, + IPNetworkID UInt32, + TraficSourceID Int8, + SearchEngineID UInt16, + SearchPhrase String, + AdvEngineID UInt8, + IsArtifical UInt8, + WindowClientWidth UInt16, + WindowClientHeight UInt16, + ClientTimeZone Int16, + ClientEventTime DateTime, + SilverlightVersion1 UInt8, + SilverlightVersion2 UInt8, + SilverlightVersion3 UInt32, + SilverlightVersion4 UInt16, + PageCharset String, + CodeVersion UInt32, + IsLink UInt8, + IsDownload UInt8, + IsNotBounce UInt8, + FUniqID Int64, + OriginalURL String, + HID UInt32, + IsOldCounter UInt8, + IsEvent UInt8, + IsParameter UInt8, + DontCountHits UInt8, + WithHash UInt8, + HitColor String, + LocalEventTime DateTime, + Age UInt8, + Sex UInt8, + Income UInt8, + Interests UInt16, + Robotness UInt8, + RemoteIP Int32, + WindowName Int32, + OpenerName Int32, + HistoryLength Int16, + BrowserLanguage String, + BrowserCountry String, + SocialNetwork String, + SocialAction String, + HTTPError UInt16, + SendTiming UInt32, + DNSTiming UInt32, + ConnectTiming UInt32, + ResponseStartTiming UInt32, + ResponseEndTiming UInt32, + FetchTiming UInt32, + SocialSourceNetworkID UInt8, + SocialSourcePage String, + ParamPrice Int64, + ParamOrderID String, + ParamCurrency String, + ParamCurrencyID UInt16, + OpenstatServiceName String, + OpenstatCampaignID String, + OpenstatAdID String, + OpenstatSourceID String, + UTMSource String, + UTMMedium String, + UTMCampaign String, + UTMContent String, + UTMTerm String, + FromTag String, + HasGCLID UInt8, + RefererHash Int64, + URLHash Int64, + CLID UInt32 +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(EventDate) +ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime, WatchID) +SETTINGS index_granularity = 8192; + +-- Create Kafka engine table for consuming from Kafka +CREATE TABLE IF NOT EXISTS hits_kafka +( + WatchID Int64, + JavaEnable UInt8, + Title String, + GoodEvent Int16, + EventTime DateTime, + EventDate Date, + CounterID UInt32, + ClientIP Int32, + RegionID UInt32, + UserID Int64, + CounterClass Int8, + OS UInt8, + UserAgent UInt8, + URL String, + Referer String, + IsRefresh UInt8, + RefererCategoryID UInt16, + RefererRegionID UInt32, + URLCategoryID UInt16, + URLRegionID UInt32, + ResolutionWidth UInt16, + ResolutionHeight UInt16, + ResolutionDepth UInt8, + FlashMajor UInt8, + FlashMinor UInt8, + FlashMinor2 String, + NetMajor UInt8, + NetMinor UInt8, + UserAgentMajor UInt16, + UserAgentMinor String, + CookieEnable UInt8, + JavascriptEnable UInt8, + IsMobile UInt8, + MobilePhone UInt8, + MobilePhoneModel String, + Params String, + IPNetworkID UInt32, + TraficSourceID Int8, + SearchEngineID UInt16, + SearchPhrase String, + AdvEngineID UInt8, + IsArtifical UInt8, + WindowClientWidth UInt16, + WindowClientHeight UInt16, + ClientTimeZone Int16, + ClientEventTime DateTime, + SilverlightVersion1 UInt8, + SilverlightVersion2 UInt8, + SilverlightVersion3 UInt32, + SilverlightVersion4 UInt16, + PageCharset String, + CodeVersion UInt32, + IsLink UInt8, + IsDownload UInt8, + IsNotBounce UInt8, + FUniqID Int64, + OriginalURL String, + HID UInt32, + IsOldCounter UInt8, + IsEvent UInt8, + IsParameter UInt8, + DontCountHits UInt8, + WithHash UInt8, + HitColor String, + LocalEventTime DateTime, + Age UInt8, + Sex UInt8, + Income UInt8, + Interests UInt16, + Robotness UInt8, + RemoteIP Int32, + WindowName Int32, + OpenerName Int32, + HistoryLength Int16, + BrowserLanguage String, + BrowserCountry String, + SocialNetwork String, + SocialAction String, + HTTPError UInt16, + SendTiming UInt32, + DNSTiming UInt32, + ConnectTiming UInt32, + ResponseStartTiming UInt32, + ResponseEndTiming UInt32, + FetchTiming UInt32, + SocialSourceNetworkID UInt8, + SocialSourcePage String, + ParamPrice Int64, + ParamOrderID String, + ParamCurrency String, + ParamCurrencyID UInt16, + OpenstatServiceName String, + OpenstatCampaignID String, + OpenstatAdID String, + OpenstatSourceID String, + UTMSource String, + UTMMedium String, + UTMCampaign String, + UTMContent String, + UTMTerm String, + FromTag String, + HasGCLID UInt8, + RefererHash Int64, + URLHash Int64, + CLID UInt32 +) +ENGINE = Kafka +SETTINGS + kafka_broker_list = 'localhost:9092', + kafka_topic_list = 'hits', + kafka_group_name = 'clickhouse_hits_consumer', + kafka_format = 'JSONEachRow', + kafka_num_consumers = 1, + kafka_max_block_size = 65536; + +-- Create materialized view to move data from Kafka to MergeTree +CREATE MATERIALIZED VIEW IF NOT EXISTS hits_mv TO hits AS +SELECT + WatchID, + JavaEnable, + Title, + GoodEvent, + EventTime, + EventDate, + CounterID, + ClientIP, + RegionID, + UserID, + CounterClass, + OS, + UserAgent, + URL, + Referer, + IsRefresh, + RefererCategoryID, + RefererRegionID, + URLCategoryID, + URLRegionID, + ResolutionWidth, + ResolutionHeight, + ResolutionDepth, + FlashMajor, + FlashMinor, + FlashMinor2, + NetMajor, + NetMinor, + UserAgentMajor, + UserAgentMinor, + CookieEnable, + JavascriptEnable, + IsMobile, + MobilePhone, + MobilePhoneModel, + Params, + IPNetworkID, + TraficSourceID, + SearchEngineID, + SearchPhrase, + AdvEngineID, + IsArtifical, + WindowClientWidth, + WindowClientHeight, + ClientTimeZone, + ClientEventTime, + SilverlightVersion1, + SilverlightVersion2, + SilverlightVersion3, + SilverlightVersion4, + PageCharset, + CodeVersion, + IsLink, + IsDownload, + IsNotBounce, + FUniqID, + OriginalURL, + HID, + IsOldCounter, + IsEvent, + IsParameter, + DontCountHits, + WithHash, + HitColor, + LocalEventTime, + Age, + Sex, + Income, + Interests, + Robotness, + RemoteIP, + WindowName, + OpenerName, + HistoryLength, + BrowserLanguage, + BrowserCountry, + SocialNetwork, + SocialAction, + HTTPError, + SendTiming, + DNSTiming, + ConnectTiming, + ResponseStartTiming, + ResponseEndTiming, + FetchTiming, + SocialSourceNetworkID, + SocialSourcePage, + ParamPrice, + ParamOrderID, + ParamCurrency, + ParamCurrencyID, + OpenstatServiceName, + OpenstatCampaignID, + OpenstatAdID, + OpenstatSourceID, + UTMSource, + UTMMedium, + UTMCampaign, + UTMContent, + UTMTerm, + FromTag, + HasGCLID, + RefererHash, + URLHash, + CLID +FROM hits_kafka; \ No newline at end of file diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/clickhouse/h2o_init.sql b/ExecutionUtilities/clickhouse-benchmark-pipeline/clickhouse/h2o_init.sql new file mode 100644 index 0000000..aa69de4 --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/clickhouse/h2o_init.sql @@ -0,0 +1,35 @@ +CREATE TABLE IF NOT EXISTS h2o_groupby_queue +( + timestamp BIGINT, + id1 String, + id2 String, + id3 String, + id4 Int32, + id5 Int32, + id6 Int32, + v1 Int32, + v2 Int32, + v3 Float64 +) ENGINE = Kafka +SETTINGS kafka_broker_list = 'localhost:9092', + kafka_topic_list = 'h2o_groupby', + kafka_group_name = 'clickhouse_h2o', + kafka_format = 'JSONEachRow'; + +CREATE TABLE IF NOT EXISTS h2o_groupby +( + timestamp BIGINT, + id1 String, + id2 String, + id3 String, + id4 Int32, + id5 Int32, + id6 Int32, + v1 Int32, + v2 Int32, + v3 Float64 +) ENGINE = MergeTree +ORDER BY (id1, id2, id3, id4); + +CREATE MATERIALIZED VIEW IF NOT EXISTS h2o_groupby_mv TO h2o_groupby AS +SELECT * FROM h2o_groupby_queue; \ No newline at end of file diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/config.env b/ExecutionUtilities/clickhouse-benchmark-pipeline/config.env new file mode 100644 index 0000000..0ca2c41 --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/config.env @@ -0,0 +1,40 @@ +# ClickHouse Benchmark Pipeline Configuration +# Override any value by setting the environment variable before running scripts. + +PROJECT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Kafka +KAFKA_BROKER=localhost:9092 # Native Kafka +KAFKA_TOPIC=hits +KAFKA_HOME="${PROJECT_DIR}/../../Utilities/installation/kafka/kafka" + +# ClickHouse +CLICKHOUSE_HOST=localhost +CLICKHOUSE_HTTP_PORT=8123 + +# Data Generation +DATA_BATCH_SIZE=50000 +DATA_FREQUENCY_SECONDS=0 +FAKE_NUM_USERS=10000 +FAKE_NUM_COUNTERS=100 + +# ClickBench Data +CLICKBENCH_URL=https://datasets.clickhouse.com/hits_compatible/hits.json.gz +CLICKBENCH_FILENAME=hits.json.gz +CLICKBENCH_DATA_DIR=clickbench_importer/data + +# Benchmark +BENCHMARK_QUERIES_FILE=benchmark_queries.sql +BENCHMARK_RESULTS_DIR=benchmark_results + +# H2O Benchmark +H2O_DATA_URL="https://drive.google.com/uc?export=download&id=15SVQjQ2QehzYDLoDonio4aP7xqdMiNyi" +H2O_FILENAME="G1_1e7_1e2_0_0.csv" +H2O_KAFKA_TOPIC="h2o_groupby" + +# Elasticsearch Configuration +ES_HOST=localhost +ES_PORT=9200 +ES_INDEX_NAME=h2o_benchmark +ES_BULK_SIZE=10000 +ES_API_KEY=your-api-key \ No newline at end of file diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/Cargo.lock b/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/Cargo.lock new file mode 100644 index 0000000..f5e3c48 --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/Cargo.lock @@ -0,0 +1,2478 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "anyhow" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" + +[[package]] +name = "async-compression" +version = "0.4.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68650b7df54f0293fd061972a0fb05aaf4fc0879d3b3d21a638a182c5c543b9f" +dependencies = [ + "compression-codecs", + "compression-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "base64" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7" + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "bumpalo" +version = "3.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" + +[[package]] +name = "bytes" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" + +[[package]] +name = "cc" +version = "1.2.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "chrono" +version = "0.4.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "clap" +version = "4.5.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim 0.11.1", +] + +[[package]] +name = "clap_derive" +version = "4.5.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.114", +] + +[[package]] +name = "clap_lex" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" + +[[package]] +name = "cmake" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" +dependencies = [ + "cc", +] + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "compression-codecs" +version = "0.4.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00828ba6fd27b45a448e57dbfe84f1029d4c9f26b368157e9a448a5f49a2ec2a" +dependencies = [ + "compression-core", + "flate2", + "memchr", +] + +[[package]] +name = "compression-core" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" + +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys 0.59.0", +] + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "csv" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde_core", +] + +[[package]] +name = "csv-core" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" +dependencies = [ + "memchr", +] + +[[package]] +name = "darling" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a01d95850c592940db9b8194bc39f4bc0e89dee5c4265e4b1807c34a9aba453c" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "859d65a907b6852c9361e3185c862aae7fafd2887876799fa55f5f99dc40d610" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim 0.10.0", + "syn 1.0.109", +] + +[[package]] +name = "darling_macro" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835" +dependencies = [ + "darling_core", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "data_exporter" +version = "0.1.0" +dependencies = [ + "chrono", + "clap", + "csv", + "elasticsearch", + "flate2", + "futures", + "indicatif", + "rand", + "rand_distr", + "rdkafka", + "serde", + "serde_json", + "tokio", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + +[[package]] +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" + +[[package]] +name = "elasticsearch" +version = "8.5.0-alpha.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40d9bd57d914cc66ce878f098f63ed7b5d5b64c30644a5adb950b008f874a6c6" +dependencies = [ + "base64 0.11.0", + "bytes", + "dyn-clone", + "lazy_static", + "percent-encoding", + "reqwest", + "rustc_version", + "serde", + "serde_json", + "serde_with", + "url", + "void", +] + +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "find-msvc-tools" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" + +[[package]] +name = "flate2" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getrandom" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", + "wasip3", +] + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper", + "native-tls", + "tokio", + "tokio-native-tls", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" + +[[package]] +name = "icu_properties" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" + +[[package]] +name = "icu_provider" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", + "serde", + "serde_core", +] + +[[package]] +name = "indicatif" +version = "0.17.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" +dependencies = [ + "console", + "number_prefix", + "portable-atomic", + "unicode-width", + "web-time", +] + +[[package]] +name = "ipnet" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "js-sys" +version = "0.3.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.180" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" + +[[package]] +name = "libm" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" + +[[package]] +name = "libz-sys" +version = "1.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15d118bbf3771060e7311cc7bb0545b01d08a8b4a7de949198dec1fa0ca1c0f7" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + +[[package]] +name = "litemap" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "mio" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.61.2", +] + +[[package]] +name = "native-tls" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_enum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.114", +] + +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "openssl" +version = "0.10.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" +dependencies = [ + "bitflags 2.10.0", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + +[[package]] +name = "openssl-probe" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" + +[[package]] +name = "openssl-sys" +version = "0.9.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "portable-atomic" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" + +[[package]] +name = "potential_utf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +dependencies = [ + "zerovec", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.114", +] + +[[package]] +name = "proc-macro-crate" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand", +] + +[[package]] +name = "rdkafka" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1beea247b9a7600a81d4cc33f659ce1a77e1988323d7d2809c7ed1c21f4c316d" +dependencies = [ + "futures-channel", + "futures-util", + "libc", + "log", + "rdkafka-sys", + "serde", + "serde_derive", + "serde_json", + "slab", + "tokio", +] + +[[package]] +name = "rdkafka-sys" +version = "4.9.0+2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5230dca48bc354d718269f3e4353280e188b610f7af7e2fcf54b7a79d5802872" +dependencies = [ + "cmake", + "libc", + "libz-sys", + "num_enum", + "pkg-config", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags 2.10.0", +] + +[[package]] +name = "reqwest" +version = "0.11.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" +dependencies = [ + "async-compression", + "base64 0.21.7", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-tls", + "ipnet", + "js-sys", + "log", + "mime", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls-pemfile", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "system-configuration", + "tokio", + "tokio-native-tls", + "tokio-util", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg", +] + +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +dependencies = [ + "semver 0.9.0", +] + +[[package]] +name = "rustix" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +dependencies = [ + "bitflags 2.10.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64 0.21.7", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + +[[package]] +name = "schannel" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags 2.10.0", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_with" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678b5a069e50bf00ecd22d0cd8ddf7c236f68581b03db652061ed5eb13a312ff" +dependencies = [ + "serde", + "serde_with_macros", +] + +[[package]] +name = "serde_with_macros" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e182d6ec6f05393cc0e5ed1bf81ad6db3a8feedf8ee515ecdd369809bcce8082" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" +dependencies = [ + "errno", + "libc", +] + +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + +[[package]] +name = "slab" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "socket2" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" +dependencies = [ + "libc", + "windows-sys 0.60.2", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "tempfile" +version = "3.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" +dependencies = [ + "fastrand", + "getrandom 0.4.1", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tokio" +version = "1.49.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" +dependencies = [ + "bytes", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2 0.6.2", + "tokio-macros", + "windows-sys 0.61.2", +] + +[[package]] +name = "tokio-macros" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "toml_datetime" +version = "0.7.5+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.23.10+spec-1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" +dependencies = [ + "indexmap", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.6+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" +dependencies = [ + "winnow", +] + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "void" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" +dependencies = [ + "cfg-if", + "futures-util", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn 2.0.114", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.10.0", + "hashbrown 0.15.5", + "indexmap", + "semver 1.0.27", +] + +[[package]] +name = "web-sys" +version = "0.3.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + +[[package]] +name = "winnow" +version = "0.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" +dependencies = [ + "memchr", +] + +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn 2.0.114", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.114", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.10.0", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver 1.0.27", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "writeable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" + +[[package]] +name = "yoke" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", + "synstructure", +] + +[[package]] +name = "zerotrie" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + +[[package]] +name = "zmij" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfcd145825aace48cff44a8844de64bf75feec3080e0aa5cdbde72961ae51a65" diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/Cargo.toml b/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/Cargo.toml new file mode 100644 index 0000000..5107663 --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "data_exporter" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = { version = "4", features = ["derive", "env"] } +csv = "1.3" +flate2 = "1" +rand = { version = "0.8", features = ["small_rng"] } +rand_distr = "0.4" +rdkafka = { version = "0.36", features = ["cmake-build"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +tokio = { version = "1", features = ["full"] } +chrono = "0.4" +futures = "0.3" +indicatif = "0.17" + +# Elasticsearch support +elasticsearch = "8.5.0-alpha.1" + +[profile.release] +opt-level = 3 +lto = true +codegen-units = 1 \ No newline at end of file diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/src/main.rs b/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/src/main.rs new file mode 100644 index 0000000..58c63f7 --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/data_exporter/src/main.rs @@ -0,0 +1,1075 @@ +use clap::{Parser, ValueEnum}; +use elasticsearch::{http::transport::Transport, BulkParts, Elasticsearch}; +use flate2::read::GzDecoder; +use futures::future::join_all; +use indicatif::{ProgressBar, ProgressStyle}; +use rand::rngs::SmallRng; +use rand::SeedableRng; +use rand_distr::{Distribution, Uniform}; +use rdkafka::config::ClientConfig; +use rdkafka::producer::{FutureProducer, FutureRecord}; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::path::PathBuf; +use std::time::Duration; +use tokio::time::sleep; + +const RNG_SEED: u64 = 42; + +// Sample data for realistic values +const SEARCH_PHRASES: &[&str] = &[ + "", "", "", "", "", // Most are empty + "clickhouse benchmark", + "analytics database", + "olap performance", + "data warehouse", + "columnar storage", + "real-time analytics", + "big data processing", +]; + +const URLS: &[&str] = &[ + "https://example.com/", + "https://example.com/products", + "https://example.com/about", + "https://google.com/search?q=test", + "https://example.com/blog/analytics", + "https://news.example.com/article/123", +]; + +const REFERERS: &[&str] = &[ + "", + "https://google.com/", + "https://bing.com/", + "https://duckduckgo.com/", + "https://example.com/", +]; + +const MOBILE_PHONE_MODELS: &[&str] = &[ + "", "", "", // Most are empty (desktop) + "iPhone", + "Samsung Galaxy", + "Pixel", + "OnePlus", +]; + +const TITLES: &[&str] = &[ + "Home Page", + "Product Catalog", + "About Us", + "Contact", + "Blog - Analytics Tips", + "Google Search Results", + "Dashboard", +]; + +const BROWSER_LANGUAGES: &[&str] = &["en", "de", "fr", "es", "ru", "zh", "jp"]; +const BROWSER_COUNTRIES: &[&str] = &["US", "DE", "FR", "GB", "RU", "CN", "JP"]; + +/// ClickBench-compatible hits record +/// Schema from: https://github.com/ClickHouse/ClickBench +#[derive(Serialize)] +struct HitsRecord { + #[serde(rename = "WatchID")] + watch_id: u64, + #[serde(rename = "JavaEnable")] + java_enable: u8, + #[serde(rename = "Title")] + title: String, + #[serde(rename = "GoodEvent")] + good_event: i16, + #[serde(rename = "EventTime")] + event_time: String, + #[serde(rename = "EventDate")] + event_date: String, + #[serde(rename = "CounterID")] + counter_id: u32, + #[serde(rename = "ClientIP")] + client_ip: u32, + #[serde(rename = "RegionID")] + region_id: u32, + #[serde(rename = "UserID")] + user_id: u64, + #[serde(rename = "CounterClass")] + counter_class: i8, + #[serde(rename = "OS")] + os: u8, + #[serde(rename = "UserAgent")] + user_agent: u8, + #[serde(rename = "URL")] + url: String, + #[serde(rename = "Referer")] + referer: String, + #[serde(rename = "IsRefresh")] + is_refresh: u8, + #[serde(rename = "RefererCategoryID")] + referer_category_id: u16, + #[serde(rename = "RefererRegionID")] + referer_region_id: u32, + #[serde(rename = "URLCategoryID")] + url_category_id: u16, + #[serde(rename = "URLRegionID")] + url_region_id: u32, + #[serde(rename = "ResolutionWidth")] + resolution_width: u16, + #[serde(rename = "ResolutionHeight")] + resolution_height: u16, + #[serde(rename = "ResolutionDepth")] + resolution_depth: u8, + #[serde(rename = "FlashMajor")] + flash_major: u8, + #[serde(rename = "FlashMinor")] + flash_minor: u8, + #[serde(rename = "FlashMinor2")] + flash_minor2: String, + #[serde(rename = "NetMajor")] + net_major: u8, + #[serde(rename = "NetMinor")] + net_minor: u8, + #[serde(rename = "UserAgentMajor")] + user_agent_major: u16, + #[serde(rename = "UserAgentMinor")] + user_agent_minor: String, + #[serde(rename = "CookieEnable")] + cookie_enable: u8, + #[serde(rename = "JavascriptEnable")] + javascript_enable: u8, + #[serde(rename = "IsMobile")] + is_mobile: u8, + #[serde(rename = "MobilePhone")] + mobile_phone: u8, + #[serde(rename = "MobilePhoneModel")] + mobile_phone_model: String, + #[serde(rename = "Params")] + params: String, + #[serde(rename = "IPNetworkID")] + ip_network_id: u32, + #[serde(rename = "TraficSourceID")] + trafic_source_id: i8, + #[serde(rename = "SearchEngineID")] + search_engine_id: u16, + #[serde(rename = "SearchPhrase")] + search_phrase: String, + #[serde(rename = "AdvEngineID")] + adv_engine_id: u8, + #[serde(rename = "IsArtifical")] + is_artifical: u8, + #[serde(rename = "WindowClientWidth")] + window_client_width: u16, + #[serde(rename = "WindowClientHeight")] + window_client_height: u16, + #[serde(rename = "ClientTimeZone")] + client_time_zone: i16, + #[serde(rename = "ClientEventTime")] + client_event_time: String, + #[serde(rename = "SilverlightVersion1")] + silverlight_version1: u8, + #[serde(rename = "SilverlightVersion2")] + silverlight_version2: u8, + #[serde(rename = "SilverlightVersion3")] + silverlight_version3: u32, + #[serde(rename = "SilverlightVersion4")] + silverlight_version4: u16, + #[serde(rename = "PageCharset")] + page_charset: String, + #[serde(rename = "CodeVersion")] + code_version: u32, + #[serde(rename = "IsLink")] + is_link: u8, + #[serde(rename = "IsDownload")] + is_download: u8, + #[serde(rename = "IsNotBounce")] + is_not_bounce: u8, + #[serde(rename = "FUniqID")] + f_uniq_id: u64, + #[serde(rename = "OriginalURL")] + original_url: String, + #[serde(rename = "HID")] + hid: u32, + #[serde(rename = "IsOldCounter")] + is_old_counter: u8, + #[serde(rename = "IsEvent")] + is_event: u8, + #[serde(rename = "IsParameter")] + is_parameter: u8, + #[serde(rename = "DontCountHits")] + dont_count_hits: u8, + #[serde(rename = "WithHash")] + with_hash: u8, + #[serde(rename = "HitColor")] + hit_color: String, + #[serde(rename = "LocalEventTime")] + local_event_time: String, + #[serde(rename = "Age")] + age: u8, + #[serde(rename = "Sex")] + sex: u8, + #[serde(rename = "Income")] + income: u8, + #[serde(rename = "Interests")] + interests: u16, + #[serde(rename = "Robotness")] + robotness: u8, + #[serde(rename = "RemoteIP")] + remote_ip: u32, + #[serde(rename = "WindowName")] + window_name: i32, + #[serde(rename = "OpenerName")] + opener_name: i32, + #[serde(rename = "HistoryLength")] + history_length: i16, + #[serde(rename = "BrowserLanguage")] + browser_language: String, + #[serde(rename = "BrowserCountry")] + browser_country: String, + #[serde(rename = "SocialNetwork")] + social_network: String, + #[serde(rename = "SocialAction")] + social_action: String, + #[serde(rename = "HTTPError")] + http_error: u16, + #[serde(rename = "SendTiming")] + send_timing: u32, + #[serde(rename = "DNSTiming")] + dns_timing: u32, + #[serde(rename = "ConnectTiming")] + connect_timing: u32, + #[serde(rename = "ResponseStartTiming")] + response_start_timing: u32, + #[serde(rename = "ResponseEndTiming")] + response_end_timing: u32, + #[serde(rename = "FetchTiming")] + fetch_timing: u32, + #[serde(rename = "SocialSourceNetworkID")] + social_source_network_id: u8, + #[serde(rename = "SocialSourcePage")] + social_source_page: String, + #[serde(rename = "ParamPrice")] + param_price: i64, + #[serde(rename = "ParamOrderID")] + param_order_id: String, + #[serde(rename = "ParamCurrency")] + param_currency: String, + #[serde(rename = "ParamCurrencyID")] + param_currency_id: u16, + #[serde(rename = "OpenstatServiceName")] + openstat_service_name: String, + #[serde(rename = "OpenstatCampaignID")] + openstat_campaign_id: String, + #[serde(rename = "OpenstatAdID")] + openstat_ad_id: String, + #[serde(rename = "OpenstatSourceID")] + openstat_source_id: String, + #[serde(rename = "UTMSource")] + utm_source: String, + #[serde(rename = "UTMMedium")] + utm_medium: String, + #[serde(rename = "UTMCampaign")] + utm_campaign: String, + #[serde(rename = "UTMContent")] + utm_content: String, + #[serde(rename = "UTMTerm")] + utm_term: String, + #[serde(rename = "FromTag")] + from_tag: String, + #[serde(rename = "HasGCLID")] + has_gclid: u8, + #[serde(rename = "RefererHash")] + referer_hash: u64, + #[serde(rename = "URLHash")] + url_hash: u64, + #[serde(rename = "CLID")] + clid: u32, +} + +// H2O Row Structure +#[derive(Debug, Deserialize, Serialize)] +struct H2oRow { + id1: String, + id2: String, + id3: String, + id4: i32, + id5: i32, + id6: i32, + v1: i32, + v2: i32, + v3: f64, +} + +// H2O Elasticsearch Document (with timestamp) +#[derive(Debug, Serialize)] +struct H2oEsDoc { + timestamp: i64, + id1: String, + id2: String, + id3: String, + id4: i32, + id5: i32, + id6: i32, + v1: i32, + v2: i32, + v3: f64, +} + +fn random_choice<'a, T>(items: &'a [T], rng: &mut SmallRng) -> &'a T { + let dist = Uniform::new(0, items.len()); + &items[dist.sample(rng)] +} + +fn generate_hits_record( + rng: &mut SmallRng, + watch_id_counter: &mut u64, + user_ids: &[u64], + counter_ids: &[u32], +) -> HitsRecord { + let now = chrono::Utc::now(); + let event_time = now.format("%Y-%m-%d %H:%M:%S").to_string(); + let event_date = now.format("%Y-%m-%d").to_string(); + + let dist_bool = Uniform::new(0u8, 2); + let dist_u8 = Uniform::new(0u8, 100); + let dist_u16 = Uniform::new(0u16, 10000); + let dist_u32 = Uniform::new(0u32, 100000); + let dist_u64 = Uniform::new(0u64, 1000000); + let dist_region = Uniform::new(1u32, 250); + let dist_resolution_w = Uniform::new(320u16, 2560); + let dist_resolution_h = Uniform::new(240u16, 1440); + let dist_age = Uniform::new(0u8, 100); + let dist_timing = Uniform::new(0u32, 5000); + + *watch_id_counter += 1; + let watch_id = *watch_id_counter; + + let user_id = *random_choice(user_ids, rng); + let counter_id = *random_choice(counter_ids, rng); + let url = random_choice(URLS, rng).to_string(); + let referer = random_choice(REFERERS, rng).to_string(); + let search_phrase = random_choice(SEARCH_PHRASES, rng).to_string(); + let mobile_phone_model = random_choice(MOBILE_PHONE_MODELS, rng).to_string(); + let is_mobile = if mobile_phone_model.is_empty() { 0 } else { 1 }; + let title = random_choice(TITLES, rng).to_string(); + + let url_hash = url.bytes().fold(0u64, |acc, b| acc.wrapping_mul(31).wrapping_add(b as u64)); + let referer_hash = referer.bytes().fold(0u64, |acc, b| acc.wrapping_mul(31).wrapping_add(b as u64)); + + HitsRecord { + watch_id, + java_enable: dist_bool.sample(rng), + title, + good_event: 1, + event_time: event_time.clone(), + event_date, + counter_id, + client_ip: dist_u32.sample(rng), + region_id: dist_region.sample(rng), + user_id, + counter_class: (dist_bool.sample(rng) as i8), + os: dist_u8.sample(rng) % 20, + user_agent: dist_u8.sample(rng) % 50, + url, + referer, + is_refresh: dist_bool.sample(rng), + referer_category_id: dist_u16.sample(rng) % 20, + referer_region_id: dist_region.sample(rng), + url_category_id: dist_u16.sample(rng) % 20, + url_region_id: dist_region.sample(rng), + resolution_width: dist_resolution_w.sample(rng), + resolution_height: dist_resolution_h.sample(rng), + resolution_depth: 24, + flash_major: dist_u8.sample(rng) % 20, + flash_minor: dist_u8.sample(rng) % 10, + flash_minor2: String::new(), + net_major: dist_u8.sample(rng) % 5, + net_minor: dist_u8.sample(rng) % 10, + user_agent_major: dist_u16.sample(rng) % 100, + user_agent_minor: String::new(), + cookie_enable: 1, + javascript_enable: 1, + is_mobile, + mobile_phone: if is_mobile == 1 { dist_u8.sample(rng) % 10 } else { 0 }, + mobile_phone_model, + params: String::new(), + ip_network_id: dist_u32.sample(rng), + trafic_source_id: ((dist_u8.sample(rng) % 10) as i8) - 1, + search_engine_id: dist_u16.sample(rng) % 30, + search_phrase, + adv_engine_id: dist_u8.sample(rng) % 30, + is_artifical: 0, + window_client_width: dist_resolution_w.sample(rng), + window_client_height: dist_resolution_h.sample(rng), + client_time_zone: ((dist_u8.sample(rng) % 24) as i16) - 12, + client_event_time: event_time.clone(), + silverlight_version1: 0, + silverlight_version2: 0, + silverlight_version3: 0, + silverlight_version4: 0, + page_charset: "UTF-8".to_string(), + code_version: dist_u32.sample(rng) % 1000, + is_link: dist_bool.sample(rng), + is_download: dist_bool.sample(rng), + is_not_bounce: dist_bool.sample(rng), + f_uniq_id: dist_u64.sample(rng), + original_url: String::new(), + hid: dist_u32.sample(rng), + is_old_counter: 0, + is_event: dist_bool.sample(rng), + is_parameter: 0, + dont_count_hits: dist_bool.sample(rng), + with_hash: 0, + hit_color: "E".to_string(), + local_event_time: event_time, + age: dist_age.sample(rng), + sex: dist_bool.sample(rng), + income: dist_u8.sample(rng) % 5, + interests: dist_u16.sample(rng), + robotness: dist_bool.sample(rng), + remote_ip: dist_u32.sample(rng), + window_name: 0, + opener_name: 0, + history_length: (dist_u8.sample(rng) % 20) as i16, + browser_language: random_choice(BROWSER_LANGUAGES, rng).to_string(), + browser_country: random_choice(BROWSER_COUNTRIES, rng).to_string(), + social_network: String::new(), + social_action: String::new(), + http_error: 0, + send_timing: dist_timing.sample(rng), + dns_timing: dist_timing.sample(rng) % 100, + connect_timing: dist_timing.sample(rng) % 200, + response_start_timing: dist_timing.sample(rng), + response_end_timing: dist_timing.sample(rng), + fetch_timing: dist_timing.sample(rng), + social_source_network_id: 0, + social_source_page: String::new(), + param_price: 0, + param_order_id: String::new(), + param_currency: String::new(), + param_currency_id: 0, + openstat_service_name: String::new(), + openstat_campaign_id: String::new(), + openstat_ad_id: String::new(), + openstat_source_id: String::new(), + utm_source: String::new(), + utm_medium: String::new(), + utm_campaign: String::new(), + utm_content: String::new(), + utm_term: String::new(), + from_tag: String::new(), + has_gclid: 0, + referer_hash, + url_hash, + clid: 0, + } +} + +#[derive(Clone, ValueEnum, Debug)] +enum Mode { + /// Generate synthetic fake data and send to Kafka + Fake, + /// Read ClickBench JSON data from file and send to Kafka + Clickbench, + /// Read H2O CSV data from file and send to Kafka + H2o, + /// Read H2O CSV data from file and send to Elasticsearch + H2oElasticsearch, +} + +#[derive(Parser)] +#[command(name = "data_exporter")] +#[command(about = "ClickBench-compatible data exporter to Kafka (fake, clickbench, or h2o data)")] +struct Args { + #[arg(long, value_enum, env = "DATA_MODE", help = "Data source mode (fake, clickbench, or h2o)")] + mode: Mode, + + #[arg(long, env = "KAFKA_BROKER", help = "Kafka broker address")] + kafka_broker: Option, + + #[arg(long, env = "KAFKA_TOPIC", help = "Kafka topic name")] + kafka_topic: Option, + + #[arg(long, env = "DATA_BATCH_SIZE", help = "Number of records per batch")] + batch_size: usize, + + #[arg(long, env = "DATA_FREQUENCY_SECONDS", help = "Seconds between batches (fake mode only)")] + frequency: u64, + + #[arg(long, env = "FAKE_NUM_USERS", help = "Number of unique users (fake mode only)")] + num_users: usize, + + #[arg(long, env = "FAKE_NUM_COUNTERS", help = "Number of unique counters (fake mode only)")] + num_counters: usize, + + #[arg(long, env = "DEBUG_PRINT", default_value = "false", help = "Print records to console")] + debug_print: bool, + + #[arg(long, env = "TOTAL_RECORDS", help = "Total records to generate/send (0 = infinite/all)")] + total_records: Option, + + #[arg(long, env = "CLICKBENCH_FILE", help = "Path to hits.json or hits.json.gz (clickbench mode)")] + input_file: Option, + + #[arg(long, env = "INPUT_FILE", help = "Path to input file (h2o/general usage)")] + general_input_file: Option, + + #[arg( + long, + env = "ELASTIC_HOST", + default_value = "localhost", + help = "Elasticsearch host" + )] + elastic_host: String, + + #[arg( + long, + env = "ELASTIC_PORT", + default_value = "9200", + help = "Elasticsearch port" + )] + elastic_port: u16, + + #[arg( + long, + env = "ELASTIC_INDEX_NAME", + default_value = "h2o_benchmark", + help = "Elasticsearch index name" + )] + elastic_index: String, + + #[arg( + long, + env = "ELASTIC_API_KEY", + help = "Elasticsearch API key (optional)" + )] + elastic_api_key: Option, +} + +async fn run_fake_mode(args: &Args, producer: &FutureProducer) -> Result<(), Box> { + let mut rng = SmallRng::seed_from_u64(RNG_SEED); + let user_dist = Uniform::new(1u64, u64::MAX / 2); + let counter_dist = Uniform::new(1u32, 1000); + + let user_ids: Vec = (0..args.num_users) + .map(|_| user_dist.sample(&mut rng)) + .collect(); + + let mut counter_ids: Vec = (0..args.num_counters) + .map(|_| counter_dist.sample(&mut rng)) + .collect(); + counter_ids.push(62); // Required for ClickBench queries 37-43 + + println!( + "Generated {} unique users and {} unique counters", + user_ids.len(), + counter_ids.len() + ); + println!( + "Generating {} records per batch every {} second(s)", + args.batch_size, args.frequency + ); + + let mut watch_id_counter: u64 = 0; + let mut total_sent: u64 = 0; + + loop { + for _ in 0..args.batch_size { + let record = generate_hits_record(&mut rng, &mut watch_id_counter, &user_ids, &counter_ids); + let record_str = serde_json::to_string(&record)?; + + if args.debug_print { + println!("{}", record_str); + } + + let delivery_status = producer + .send( + FutureRecord::to(&args.kafka_topic.as_ref().unwrap()) + .payload(&record_str) + .key(&watch_id_counter.to_string()), + Duration::from_secs(0), + ) + .await; + + if let Err((err, _)) = delivery_status { + eprintln!("Failed to send message to Kafka: {}", err); + } + + total_sent += 1; + + if let Some(limit) = args.total_records { + if limit > 0 && total_sent >= limit { + println!("Reached target of {} records. Exiting.", limit); + return Ok(()); + } + } + } + + println!("Sent batch. Total records: {}", total_sent); + + if args.total_records.map_or(true, |l| l == 0 || total_sent < l) { + sleep(Duration::from_secs(args.frequency)).await; + } + } +} + +async fn run_clickbench_mode(args: &Args, producer: &FutureProducer) -> Result<(), Box> { + let input_file = args.input_file.as_deref() + .ok_or("--input-file is required for clickbench mode")?; + + println!("Reading ClickBench data from: {}", input_file); + + let file = File::open(input_file)?; + let reader: Box = if input_file.ends_with(".gz") { + Box::new(BufReader::new(GzDecoder::new(file))) + } else { + Box::new(BufReader::new(file)) + }; + + let mut total_sent: u64 = 0; + let total_limit = args.total_records.unwrap_or(0); + let mut batch: Vec<(String, String)> = Vec::with_capacity(args.batch_size); // (key, payload) + + // ClickBench dataset has ~100M rows + let total_rows = if total_limit > 0 { total_limit } else { 99_997_497 }; + let pb = ProgressBar::new(total_rows); + pb.set_style(ProgressStyle::default_bar() + .template("{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({percent}%) {per_sec} ETA: {eta}")? + .progress_chars("#>-")); + + for line_result in reader.lines() { + let line = match line_result { + Ok(l) => l, + Err(e) => { + eprintln!("Warning: Error reading line: {}", e); + continue; + } + }; + + let line = line.trim(); + if line.is_empty() { + continue; + } + + let key = (total_sent + batch.len() as u64).to_string(); + batch.push((key, line.to_string())); + + if batch.len() >= args.batch_size { + let futures: Vec<_> = batch + .iter() + .map(|(key, payload)| { + producer.send( + FutureRecord::to(&args.kafka_topic.as_ref().unwrap()) + .payload(payload) + .key(key), + Duration::from_secs(5), + ) + }) + .collect(); + + let results = join_all(futures).await; + for result in results { + if let Err((err, _)) = result { + eprintln!("Failed to send message to Kafka: {}", err); + } + } + + total_sent += batch.len() as u64; + pb.set_position(total_sent); + batch.clear(); + + if total_limit > 0 && total_sent >= total_limit { + break; + } + + if args.frequency > 0 { + sleep(Duration::from_secs(args.frequency)).await; + } + } + } + + // Send remaining records + if !batch.is_empty() && (total_limit == 0 || total_sent < total_limit) { + let futures: Vec<_> = batch + .iter() + .map(|(key, payload)| { + producer.send( + FutureRecord::to(&args.kafka_topic.as_ref().unwrap()) + .payload(payload) + .key(key), + Duration::from_secs(5), + ) + }) + .collect(); + + let results = join_all(futures).await; + for result in results { + if let Err((err, _)) = result { + eprintln!("Failed to send message to Kafka: {}", err); + } + } + total_sent += batch.len() as u64; + pb.set_position(total_sent); + } + + pb.finish_with_message(format!("Done! Sent {} records", total_sent)); + Ok(()) +} + +async fn run_h2o_mode(args: &Args, producer: &FutureProducer) -> Result<(), Box> { + // 1. Handle Input File Selection safely + let file_path = if let Some(path) = &args.general_input_file { + path.clone() + } else if let Some(path_str) = &args.input_file { + PathBuf::from(path_str) + } else { + panic!("Input file required for H2O mode (use --input-file or --clickbench-file)"); + }; + + println!("Reading H2O data from: {:?}", file_path); + let file = File::open(file_path)?; + let reader = BufReader::new(file); + + let mut batch = Vec::with_capacity(args.batch_size); + let mut total_sent = 0; + + let pb = ProgressBar::new_spinner(); + pb.set_style(ProgressStyle::default_spinner().template("{spinner:.green} [{elapsed_precise}] {msg}")?); + + for line in reader.lines() { + let line = line?; + if line.is_empty() || line.starts_with("id1") { continue; } // Skip header + + let cols: Vec<&str> = line.split(',').collect(); + if cols.len() < 9 { continue; } + + let row = H2oRow { + id1: cols[0].to_string(), + id2: cols[1].to_string(), + id3: cols[2].to_string(), + id4: cols[3].parse().unwrap_or(0), + id5: cols[4].parse().unwrap_or(0), + id6: cols[5].parse().unwrap_or(0), + v1: cols[6].parse().unwrap_or(0), + v2: cols[7].parse().unwrap_or(0), + v3: cols[8].parse().unwrap_or(0.0), + }; + + let payload = serde_json::to_string(&row)?; + batch.push(payload); + + // SEND BATCH + if batch.len() >= args.batch_size { + let mut futures = Vec::with_capacity(batch.len()); + + for payload in batch.iter() { + let future = producer.send( + FutureRecord::to(&args.kafka_topic.as_ref().unwrap()).payload(payload).key(""), + Duration::from_secs(5), + ); + futures.push(future); + } + + // Wait for all sends to complete + join_all(futures).await; + + // NOW it is safe to clear the batch, as futures are done + batch.clear(); + + total_sent += args.batch_size; + pb.set_message(format!("Sent {} records", total_sent)); + + if args.frequency > 0 { + sleep(Duration::from_secs(args.frequency)).await; + } + } + } + + // SEND REMAINING + if !batch.is_empty() { + let mut futures = Vec::with_capacity(batch.len()); + let count = batch.len(); + + for payload in batch.iter() { + let future = producer.send( + FutureRecord::to(&args.kafka_topic.as_ref().unwrap()).payload(payload).key(""), + Duration::from_secs(5), + ); + futures.push(future); + } + + join_all(futures).await; + batch.clear(); + + total_sent += count; + } + + pb.finish_with_message(format!("Done! Sent {} H2O records", total_sent)); + Ok(()) +} + +async fn run_h2o_elasticsearch_mode(args: &Args) -> Result<(), Box> { + let file_path = if let Some(path) = &args.general_input_file { + path.clone() + } else if let Some(path_str) = &args.input_file { + PathBuf::from(path_str) + } else { + return Err("Input file required for H2O Elasticsearch mode".into()); + }; + + println!("Reading H2O data from: {:?}", file_path); + + // Connect to Elasticsearch with optional API key + let elastic_url = format!("http://{}:{}", args.elastic_host, args.elastic_port); + + let transport = if let Some(api_key) = &args.elastic_api_key { + println!("Using API key authentication"); + use elasticsearch::http::headers::HeaderMap; + use elasticsearch::http::headers::HeaderValue; + use elasticsearch::http::transport::TransportBuilder; + + let mut headers = HeaderMap::new(); + headers.insert( + "Authorization", + HeaderValue::from_str(&format!("ApiKey {}", api_key))?, + ); + + TransportBuilder::new( + elasticsearch::http::transport::SingleNodeConnectionPool::new(elastic_url.parse()?), + ) + .headers(headers) + .build()? + } else { + println!("No API key provided, connecting without authentication"); + Transport::single_node(&elastic_url)? + }; + + let client = Elasticsearch::new(transport); + + println!("Connected to Elasticsearch at {}", elastic_url); + + // Check if index exists, create if not + let index_exists = client + .indices() + .exists(elasticsearch::indices::IndicesExistsParts::Index(&[ + &args.elastic_index + ])) + .send() + .await? + .status_code() + .is_success(); + + if !index_exists { + println!("Creating index: {}", args.elastic_index); + let create_response = client + .indices() + .create(elasticsearch::indices::IndicesCreateParts::Index( + &args.elastic_index, + )) + .body(json!({ + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0, + "refresh_interval": "30s" + }, + "mappings": { + "properties": { + "timestamp": {"type": "date", "format": "epoch_millis"}, + "id1": {"type": "keyword"}, + "id2": {"type": "keyword"}, + "id3": {"type": "keyword"}, + "id4": {"type": "long"}, + "id5": {"type": "long"}, + "id6": {"type": "long"}, + "v1": {"type": "long"}, + "v2": {"type": "long"}, + "v3": {"type": "double"} + } + } + })) + .send() + .await?; + + if !create_response.status_code().is_success() { + let error_text = create_response.text().await?; + eprintln!("Failed to create index. Error response: {}", error_text); + return Err("Failed to create index".into()); + } + println!("Index created successfully"); + } else { + println!( + "Index {} already exists, skipping creation", + args.elastic_index + ); + } + + let file = File::open(file_path)?; + let reader = BufReader::new(file); + + let mut batch = Vec::with_capacity(args.batch_size); + let mut total_sent = 0u64; + let mut row_num = 0i64; + + let base_timestamp = + chrono::DateTime::parse_from_rfc3339("2024-01-01T00:00:00Z")?.timestamp_millis(); + + let pb = ProgressBar::new_spinner(); + pb.set_style( + ProgressStyle::default_spinner().template("{spinner:.green} [{elapsed_precise}] {msg}")?, + ); + + for line in reader.lines() { + let line = line?; + if line.is_empty() || line.starts_with("id1") { + continue; + } + + let cols: Vec<&str> = line.split(',').collect(); + if cols.len() < 9 { + continue; + } + + // Create document with timestamp + let doc = H2oEsDoc { + timestamp: base_timestamp + (row_num * 1000), // Increment by 1 second per row + id1: cols[0].to_string(), + id2: cols[1].to_string(), + id3: cols[2].to_string(), + id4: cols[3].parse().unwrap_or(0), + id5: cols[4].parse().unwrap_or(0), + id6: cols[5].parse().unwrap_or(0), + v1: cols[6].parse().unwrap_or(0), + v2: cols[7].parse().unwrap_or(0), + v3: cols[8].parse().unwrap_or(0.0), + }; + + batch.push(serde_json::to_value(&doc)?); + row_num += 1; + + // Send batch when full + if batch.len() >= args.batch_size { + let mut body: Vec = Vec::with_capacity(batch.len() * 2); + + for doc in &batch { + body.push(serde_json::to_string(&json!({"index": {}}))?); + body.push(serde_json::to_string(&doc)?); + } + + let response = client + .bulk(BulkParts::Index(&args.elastic_index)) + .body(body) + .send() + .await?; + + if !response.status_code().is_success() { + eprintln!("Bulk indexing error: {:?}", response.text().await?); + } + + total_sent += batch.len() as u64; + batch.clear(); + + pb.set_message(format!("Indexed {} documents", total_sent)); + + if args.frequency > 0 { + sleep(Duration::from_secs(args.frequency)).await; + } + + // Check limit + if let Some(limit) = args.total_records { + if limit > 0 && total_sent >= limit { + break; + } + } + } + } + + // Send remaining documents + if !batch.is_empty() { + let mut body: Vec = Vec::with_capacity(batch.len() * 2); + + for doc in &batch { + body.push(serde_json::to_string(&json!({"index": {}}))?); + body.push(serde_json::to_string(&doc)?); + } + + let response = client + .bulk(BulkParts::Index(&args.elastic_index)) + .body(body) + .send() + .await?; + + if !response.status_code().is_success() { + eprintln!("Bulk indexing error: {:?}", response.text().await?); + } + + total_sent += batch.len() as u64; + } + + // Refresh index + println!("Refreshing index..."); + client + .indices() + .refresh(elasticsearch::indices::IndicesRefreshParts::Index(&[ + &args.elastic_index + ])) + .send() + .await?; + + pb.finish_with_message(format!( + "Done! Indexed {} H2O documents to Elasticsearch", + total_sent + )); + Ok(()) +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + let args = Args::parse(); + + match args.mode { + Mode::Fake | Mode::Clickbench | Mode::H2o => { + // Kafka modes - require broker and topic + let kafka_broker = args + .kafka_broker + .as_ref() + .ok_or("--kafka-broker required for Kafka modes")?; + let kafka_topic = args + .kafka_topic + .as_ref() + .ok_or("--kafka-topic required for Kafka modes")?; + + let producer: FutureProducer = ClientConfig::new() + .set("bootstrap.servers", kafka_broker) + .set("message.timeout.ms", "30000") + .set("queue.buffering.max.messages", "100000") + .set("batch.num.messages", "1000") + .create() + .expect("Failed to create Kafka producer"); + + println!( + "Connected to Kafka broker: {}, topic: {}", + kafka_broker, kafka_topic + ); + + match args.mode { + Mode::Fake => { + println!("Mode: fake (generating synthetic data)"); + run_fake_mode(&args, &producer).await + } + Mode::Clickbench => { + println!("Mode: clickbench (reading from file)"); + run_clickbench_mode(&args, &producer).await + } + Mode::H2o => { + println!("Mode: h2o (reading from file)"); + run_h2o_mode(&args, &producer).await + } + Mode::H2oElasticsearch => { + panic!("Invalid mode after setting up Kafka broker and topic"); + } + } + } + Mode::H2oElasticsearch => { + println!("Mode: h2o-elasticsearch (direct to Elasticsearch)"); + run_h2o_elasticsearch_mode(&args).await + } + } +} diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/h2o_elastic_queries.sql b/ExecutionUtilities/clickhouse-benchmark-pipeline/h2o_elastic_queries.sql new file mode 100644 index 0000000..90e493a --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/h2o_elastic_queries.sql @@ -0,0 +1,29 @@ +-- Q1: Sum v1 by id1 +SELECT id1, sum(v1) AS v1 FROM "h2o_benchmark" GROUP BY id1 ORDER BY id1; + +-- Q2: Sum v1 by id1:id2 +SELECT id1, id2, sum(v1) AS v1 FROM "h2o_benchmark" GROUP BY id1, id2 ORDER BY id1, id2; + +-- Q3: Sum v1 mean v3 by id3 +SELECT id3, sum(v1) AS v1, avg(v3) AS v3 FROM "h2o_benchmark" GROUP BY id3 ORDER BY id3; + +-- Q4: Mean v1:v3 by id4 +SELECT id4, avg(v1) AS v1, avg(v2) AS v2, avg(v3) AS v3 FROM "h2o_benchmark" GROUP BY id4 ORDER BY id4; + +-- Q5: Sum v1:v3 by id6 +SELECT id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 FROM "h2o_benchmark" GROUP BY id6 ORDER BY id6; + +-- Q6: Median v3 sd v3 by id4 id5 +SELECT id4, id5, PERCENTILE(v3, 50) AS median_v3, STDDEV_SAMP(v3) AS sd_v3 FROM "h2o_benchmark" GROUP BY id4, id5 ORDER BY id4, id5; + +-- Q7: Max v1 - min v2 by id3 +SELECT id3, max(v1) - min(v2) AS range_v1_v2 FROM "h2o_benchmark" GROUP BY id3 ORDER BY id3; + +-- Q8: Largest two v3 by id6 (Elasticsearch SQL doesn't support LIMIT BY) +SELECT id6, v3 FROM "h2o_benchmark" ORDER BY v3 DESC LIMIT 20; + +-- Q9: Count rows +SELECT id2, id4, COUNT(*) as count FROM "h2o_benchmark" GROUP BY id2, id4 ORDER BY id2, id4; + +-- Q10: Sum v3 count by id1:id6 +SELECT id1, id6, sum(v3) AS v3, count(*) AS count FROM "h2o_benchmark" GROUP BY id1, id6 ORDER BY id1, id6; \ No newline at end of file diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/h2o_queries.sql b/ExecutionUtilities/clickhouse-benchmark-pipeline/h2o_queries.sql new file mode 100644 index 0000000..1d29987 --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/h2o_queries.sql @@ -0,0 +1,29 @@ +-- Q1: Sum v1 by id1 +SELECT id1, sum(v1) AS v1 FROM h2o_groupby GROUP BY id1 ORDER BY id1; + +-- Q2: Sum v1 by id1:id2 +SELECT id1, id2, sum(v1) AS v1 FROM h2o_groupby GROUP BY id1, id2 ORDER BY id1, id2; + +-- Q3: Sum v1 mean v3 by id3 +SELECT id3, sum(v1) AS v1, avg(v3) AS v3 FROM h2o_groupby GROUP BY id3 ORDER BY id3; + +-- Q4: Mean v1:v3 by id4 +SELECT id4, avg(v1) AS v1, avg(v2) AS v2, avg(v3) AS v3 FROM h2o_groupby GROUP BY id4 ORDER BY id4; + +-- Q5: Sum v1:v3 by id6 +SELECT id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 FROM h2o_groupby GROUP BY id6 ORDER BY id6; + +-- Q6: Median v3 sd v3 by id4 id5 +SELECT id4, id5, median(v3) AS median_v3, stddevSamp(v3) AS sd_v3 FROM h2o_groupby GROUP BY id4, id5 ORDER BY id4, id5; + +-- Q7: Max v1 - min v2 by id3 +SELECT id3, max(v1) - min(v2) AS range_v1_v2 FROM h2o_groupby GROUP BY id3 ORDER BY id3; + +-- Q8: Largest two v3 by id6 +SELECT id6, v3 FROM h2o_groupby ORDER BY v3 DESC LIMIT 2 BY id6; + +-- Q9: Regression v1 v2 by id2 id4 (Approximation using corr for benchmark simplicity or skip) +SELECT id2, id4, corr(v1, v2) FROM h2o_groupby GROUP BY id2, id4 ORDER BY id2, id4; + +-- Q10: Sum v3 count by id1:id6 +SELECT id1, id6, sum(v3) AS v3, count(*) AS count FROM h2o_groupby GROUP BY id1, id6 ORDER BY id1, id6; \ No newline at end of file diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/check_data.sh b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/check_data.sh new file mode 100755 index 0000000..c8a2498 --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/check_data.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# Check data ingestion status + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="${SCRIPT_DIR}/.." + +# Load config +set -a +source "${PROJECT_DIR}/config.env" +set +a + +echo "=== ClickHouse Data Status ===" +echo "" + +# Count total rows +echo "Total rows in hits table:" +curl -s "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/" \ + --data-binary "SELECT count() FROM hits FORMAT Pretty" +echo "" + +# Check recent data +echo "Most recent records:" +curl -s "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/" \ + --data-binary "SELECT EventTime, CounterID, UserID, URL FROM hits ORDER BY EventTime DESC LIMIT 5 FORMAT Pretty" +echo "" + +# Table size +echo "Table size:" +curl -s "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/" \ + --data-binary "SELECT formatReadableSize(sum(bytes)) as size, count() as parts FROM system.parts WHERE table = 'hits' AND active FORMAT Pretty" +echo "" + +# Kafka consumer lag (if available) +echo "Kafka consumer status:" +curl -s "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/" \ + --data-binary "SELECT * FROM system.kafka_consumers FORMAT Pretty" 2>/dev/null || echo "No Kafka consumer info available" + +echo "=== H2O Data Status ===" +curl -s "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/" \ + --data-binary "SELECT count() as h2o_rows FROM h2o_groupby FORMAT Pretty" +echo "" diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/check_elastic_data.sh b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/check_elastic_data.sh new file mode 100755 index 0000000..06a740d --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/check_elastic_data.sh @@ -0,0 +1,81 @@ +#!/bin/bash +# Check Elasticsearch data ingestion status for H2O benchmark + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="${SCRIPT_DIR}/.." + +# Load config +set -a +source "${PROJECT_DIR}/config.env" +set +a + +AUTH=(-H "Authorization: ApiKey ${ES_API_KEY}") +BASE="http://${ES_HOST}:${ES_PORT}" + +echo "=== Elasticsearch Data Status ===" +echo "" + +# Check connection +if ! curl -s "${AUTH[@]}" "${BASE}/" > /dev/null; then + echo "Error: Cannot connect to Elasticsearch at ${ES_HOST}:${ES_PORT}" + exit 1 +fi + +# Total document count +echo "Total documents in ${ES_INDEX_NAME} index:" +curl -s "${AUTH[@]}" "${BASE}/${ES_INDEX_NAME}/_count" | \ + python3 -c "import sys, json; print(json.load(sys.stdin)['count'])" +echo "" + +# Index stats +echo "Index statistics:" +curl -s "${AUTH[@]}" "${BASE}/${ES_INDEX_NAME}/_stats" | \ + python3 -c " +import sys, json +stats = json.load(sys.stdin) +idx_stats = stats['indices']['${ES_INDEX_NAME}']['primaries'] +print(f\" Total size: {idx_stats['store']['size_in_bytes'] / (1024*1024*1024):.2f} GB\") +print(f\" Document count: {idx_stats['docs']['count']:,}\") +print(f\" Deleted docs: {idx_stats['docs']['deleted']:,}\") +" +echo "" + +# Sample documents +echo "Sample documents (first 5 by timestamp):" +curl -s "${AUTH[@]}" -X POST "${BASE}/${ES_INDEX_NAME}/_search" \ + -H 'Content-Type: application/json' \ + -d '{ + "size": 5, + "sort": [{"timestamp": "asc"}], + "_source": ["timestamp", "id1", "id2", "id3", "v1", "v2", "v3"] + }' | \ + python3 -c " +import sys, json +from datetime import datetime +results = json.load(sys.stdin) +for hit in results['hits']['hits']: + doc = hit['_source'] + ts = datetime.fromtimestamp(doc['timestamp'] / 1000).strftime('%Y-%m-%d %H:%M:%S') + print(f\" {ts} | id1={doc['id1']} id2={doc['id2']} id3={doc['id3']} | v1={doc['v1']} v2={doc['v2']} v3={doc['v3']}\") +" +echo "" + +# Aggregation test +echo "Sample aggregation (count by id1):" +curl -s "${AUTH[@]}" -X POST "${BASE}/${ES_INDEX_NAME}/_search" \ + -H 'Content-Type: application/json' \ + -d '{ + "size": 0, + "aggs": { + "by_id1": { + "terms": {"field": "id1", "size": 5} + } + } + }' | \ + python3 -c " +import sys, json +results = json.load(sys.stdin) +for bucket in results['aggregations']['by_id1']['buckets']: + print(f\" {bucket['key']}: {bucket['doc_count']:,} documents\") +" +echo "" \ No newline at end of file diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/generate_data.sh b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/generate_data.sh new file mode 100755 index 0000000..df145e2 --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/generate_data.sh @@ -0,0 +1,127 @@ +#!/bin/bash +# Data generation script with multiple data source modes +# +# Usage: +# DATA_MODE=fake ./scripts/generate_data.sh +# DATA_MODE=clickbench ./scripts/generate_data.sh +# DATA_MODE=h2o ./scripts/generate_data.sh + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="${SCRIPT_DIR}/.." + +# Load config +set -a +source "${PROJECT_DIR}/config.env" +set +a + +# Define Directories +IMPORTER_DIR="${PROJECT_DIR}/benchmark_importer" +DATA_DIR="${IMPORTER_DIR}/data" + +# Hardcoded Kafka Path +KAFKA_HOME="${PROJECT_DIR}/../../Utilities/installation/kafka/kafka" +KAFKA_BIN="${KAFKA_HOME}/bin" + +# Check Mode +if [ -z "${DATA_MODE}" ]; then + echo "Error: DATA_MODE is required" + echo "Usage: DATA_MODE=fake|clickbench|h2o $0" + exit 1 +fi + +echo "Data generation mode: ${DATA_MODE}" +echo "Kafka broker: ${KAFKA_BROKER}" + +# 1. Build Rust Binary (Common for all modes) +echo "Building data_exporter..." +cd "${PROJECT_DIR}/data_exporter" +if [ ! -f target/release/data_exporter ]; then + cargo build --release +fi +cd "${PROJECT_DIR}" + +case "${DATA_MODE}" in + fake) + # Synthetic Data Generation + # Note: We don't strictly need to create the topic here as the producer will auto-create it, + # but if you wanted to enforce partitions, you would use ${KAFKA_BIN}/kafka-topics.sh here. + + EXTRA_ARGS=() + if [ -n "${TOTAL_RECORDS}" ] && [ "${TOTAL_RECORDS}" -gt 0 ]; then + EXTRA_ARGS+=(--total-records "${TOTAL_RECORDS}") + fi + + ./data_exporter/target/release/data_exporter \ + --mode fake \ + --kafka-broker "${KAFKA_BROKER}" \ + --kafka-topic "${KAFKA_TOPIC}" \ + "${EXTRA_ARGS[@]}" + ;; + + clickbench) + # Real ClickBench Data + echo "Ensuring ClickBench data exists..." + + # Call the Python downloader + python3 "${IMPORTER_DIR}/download_data.py" \ + --output-dir "${DATA_DIR}" + + FILE_NAME="hits.json" + # Check for .gz if .json doesn't exist + if [ ! -f "${DATA_DIR}/${FILE_NAME}" ] && [ -f "${DATA_DIR}/hits.json.gz" ]; then + FILE_NAME="hits.json.gz" + fi + + echo "Ingesting ${FILE_NAME} to Kafka..." + ./data_exporter/target/release/data_exporter \ + --mode clickbench \ + --clickbench-file "${DATA_DIR}/${FILE_NAME}" \ + --kafka-broker "${KAFKA_BROKER}" \ + --kafka-topic "${KAFKA_TOPIC}" + ;; + + h2o) + # H2O Benchmark Data + H2O_TOPIC=${H2O_KAFKA_TOPIC:-h2o_groupby} + H2O_FILE="G1_1e7_1e2_0_0.csv" + + # 1. Ensure Topic Exists using hardcoded path + echo "Ensuring topic '${H2O_TOPIC}' exists..." + + if [ -x "${KAFKA_BIN}/kafka-topics.sh" ]; then + "${KAFKA_BIN}/kafka-topics.sh" --create --if-not-exists \ + --topic "${H2O_TOPIC}" \ + --bootstrap-server "${KAFKA_BROKER}" \ + --partitions 1 --replication-factor 1 2>/dev/null || true + else + echo "Warning: ${KAFKA_BIN}/kafka-topics.sh not found. Skipping explicit topic creation." + fi + + # 2. Download Data + echo "Ensuring H2O data exists..." + + # Check and install gdown if missing (required for H2O download) + if ! python3 -c "import gdown" 2>/dev/null; then + echo "Installing python dependency: gdown..." + pip install gdown + fi + + python3 "${IMPORTER_DIR}/download_h2o_data.py" \ + --output-dir "${DATA_DIR}" + + # 3. Ingest Data + echo "Ingesting ${H2O_FILE} to Kafka topic ${H2O_TOPIC}..." + ./data_exporter/target/release/data_exporter \ + --mode h2o \ + --input-file "${DATA_DIR}/${H2O_FILE}" \ + --kafka-broker "${KAFKA_BROKER}" \ + --kafka-topic "${H2O_TOPIC}" + ;; + + *) + echo "Error: Unknown DATA_MODE '${DATA_MODE}'" + exit 1 + ;; +esac \ No newline at end of file diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/init_clickhouse.sh b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/init_clickhouse.sh new file mode 100755 index 0000000..e6fe485 --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/init_clickhouse.sh @@ -0,0 +1,47 @@ +#!/bin/bash +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="${SCRIPT_DIR}/.." + +# Load config +set -a +source "${PROJECT_DIR}/config.env" +set +a + +# --- CONFIGURATION --- +CH_INSTALL_DIR="${PROJECT_DIR}/../../Utilities/installation/clickhouse/clickhouse" +CLICKHOUSE_BIN="${CH_INSTALL_DIR}/clickhouse" + +# Set Defaults if config variables are missing +CH_HOST="${CLICKHOUSE_HOST:-127.0.0.1}" +CH_PORT="${CLICKHOUSE_PORT:-9000}" + +# Check binary +if [ ! -f "${CLICKHOUSE_BIN}" ]; then + echo "Error: ClickHouse binary not found at ${CLICKHOUSE_BIN}" + exit 1 +fi + +# Select Mode +if [ "${DATA_MODE}" == "fake" ] || [ "${DATA_MODE}" == "clickbench" ]; then + echo "Mode: ClickBench (Real or Fake)" + SQL_FILE="${PROJECT_DIR}/clickhouse/schema.sql" +elif [ "${DATA_MODE}" == "h2o" ]; then + echo "Mode: H2O Benchmark" + SQL_FILE="${PROJECT_DIR}/clickhouse/h2o_init.sql" +else + echo "Error: Unknown DATA_MODE '${DATA_MODE}'" + exit 1 +fi + +echo "Initializing ClickHouse tables using ${SQL_FILE}..." +echo "Connecting to ${CH_HOST}:${CH_PORT}..." + +# Execute SQL (Using --flag=value syntax to prevent parsing errors) +"${CLICKHOUSE_BIN}" client \ + --host="${CH_HOST}" \ + --port="${CH_PORT}" \ + --multiquery < "${SQL_FILE}" + +echo "ClickHouse initialization complete." \ No newline at end of file diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/init_elastic.sh b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/init_elastic.sh new file mode 100755 index 0000000..5867228 --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/init_elastic.sh @@ -0,0 +1,85 @@ +#!/bin/bash +# Load H2O data directly into Elasticsearch using Rust data_exporter + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="${SCRIPT_DIR}/.." + +# Load config +set -a +source "${PROJECT_DIR}/config.env" +set +a + +H2O_FILE="${PROJECT_DIR}/${H2O_DATA_DIR}/${H2O_FILENAME}" + +echo "=== H2O → Elasticsearch Direct Import (Rust) ===" +echo "" + +# Check if H2O data file exists +if [ ! -f "${H2O_FILE}" ]; then + echo "H2O data file not found at ${H2O_FILE}" + echo "Downloading H2O dataset..." + cd "${PROJECT_DIR}/benchmark_importer" + python3 download_h2o_data.py --output-dir "${PROJECT_DIR}/${H2O_DATA_DIR}" + cd "${SCRIPT_DIR}" + + if [ ! -f "${H2O_FILE}" ]; then + echo "Error: Failed to download H2O data" + exit 1 + fi +fi + +FILE_SIZE=$(du -h "${H2O_FILE}" | cut -f1) +echo "Found H2O data file: ${H2O_FILE} (${FILE_SIZE})" +echo "" + +# Check Elasticsearch connection +echo "Checking Elasticsearch connection at ${ES_HOST}:${ES_PORT}..." +if ! curl -s "http://${ES_HOST}:${ES_PORT}/" > /dev/null; then + echo "Error: Cannot connect to Elasticsearch at ${ES_HOST}:${ES_PORT}" + echo "Please ensure Elasticsearch is running" + exit 1 +fi + +ES_VERSION=$(curl -s "http://${ES_HOST}:${ES_PORT}/" | grep -o '"number" : "[^"]*"' | head -1 | sed 's/.*: "\(.*\)"/\1/') +echo "Connected to Elasticsearch version: ${ES_VERSION}" +echo "" + +# Build the data_exporter binary if needed +cd "${PROJECT_DIR}/data_exporter" +if [ ! -f target/release/data_exporter ]; then + echo "Building data_exporter..." + cargo build --release +fi + +echo "" +echo "Importing H2O data into Elasticsearch..." +echo "This may take several minutes..." +echo "" + +EXTRA_ARGS=() +if [ -n "${TOTAL_RECORDS}" ] && [ "${TOTAL_RECORDS}" -gt 0 ]; then + EXTRA_ARGS+=(--total-records "${TOTAL_RECORDS}") +fi + +./target/release/data_exporter \ + --mode h2o-elasticsearch \ + --input-file "${H2O_FILE}" \ + --elastic-host "${ES_HOST}" \ + --elastic-port "${ES_PORT}" \ + --elastic-index "${ES_INDEX_NAME}" \ + --elastic-api-key "${ES_API_KEY}" \ + --batch-size "${ES_BULK_SIZE}" \ + "${EXTRA_ARGS[@]}" + +if [ $? -eq 0 ]; then + echo "" + echo "✓ Import complete!" + echo "" + echo "Index: ${ES_INDEX_NAME}" +else + echo "" + echo "Error: Failed to import data into Elasticsearch" + exit 1 +fi diff --git a/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/run_benchmark.sh b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/run_benchmark.sh new file mode 100755 index 0000000..f3c431f --- /dev/null +++ b/ExecutionUtilities/clickhouse-benchmark-pipeline/scripts/run_benchmark.sh @@ -0,0 +1,132 @@ +#!/bin/bash +# ClickBench Benchmark Runner +# Reads queries from benchmark_queries.sql and reports timing + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="${SCRIPT_DIR}/.." + +# Load config +set -a +source "${PROJECT_DIR}/config.env" +set +a + +OUTPUT_DIR="${PROJECT_DIR}/${BENCHMARK_RESULTS_DIR}" + +mkdir -p "$OUTPUT_DIR" + +TIMESTAMP=$(date +%Y%m%d_%H%M%S) + +if [ "$1" == "h2o" ]; then + QUERIES_FILE="${PROJECT_DIR}/h2o_queries.sql" + RESULTS_FILE="${OUTPUT_DIR}/benchmark_h2o_${TIMESTAMP}.csv" + DATABASE_TYPE="clickhouse" + echo "Running H2O Benchmark on ClickHouse..." +elif [ "$1" == "h2o_elastic" ]; then + QUERIES_FILE="${PROJECT_DIR}/h2o_elastic_queries.sql" + RESULTS_FILE="${OUTPUT_DIR}/benchmark_h2o_elastic_${TIMESTAMP}.csv" + DATABASE_TYPE="elasticsearch" + echo "Running H2O Benchmark on Elasticsearch..." +elif [ "$1" == "clickbench" ]; then + QUERIES_FILE="${PROJECT_DIR}/${BENCHMARK_QUERIES_FILE}" + RESULTS_FILE="${OUTPUT_DIR}/benchmark_clickbench_${TIMESTAMP}.csv" + DATABASE_TYPE="clickhouse" + echo "Running ClickBench Benchmark..." +else + echo "Error: invalid benchmark name '$1'" + exit 1 +fi + +echo "query_num,query_time_ms,rows_read,bytes_read" > "$RESULTS_FILE" + +run_clickhouse_query() { + local query="$1" + local query_num="$2" + + echo "Running Q${query_num}..." + + RESULT=$(curl -s "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/" \ + --data-binary "$query FORMAT JSON" 2>/dev/null || echo '{}') + + ELAPSED=$(echo "$RESULT" | grep -o '"elapsed": *[0-9.]*' | head -1 | sed 's/.*: *//') + ROWS_READ=$(echo "$RESULT" | grep -o '"rows_read": *[0-9]*' | head -1 | sed 's/.*: *//') + BYTES_READ=$(echo "$RESULT" | grep -o '"bytes_read": *[0-9]*' | head -1 | sed 's/.*: *//') + + if [ -n "$ELAPSED" ]; then + TIME_MS=$(echo "$ELAPSED * 1000" | bc 2>/dev/null || echo "0") + else + TIME_MS="0" + fi + + echo "${query_num},${TIME_MS:-0},${ROWS_READ:-0},${BYTES_READ:-0}" >> "$RESULTS_FILE" + echo " Q${query_num}: ${TIME_MS:-0}ms (${ROWS_READ:-0} rows, ${BYTES_READ:-0} bytes)" +} + +run_elasticsearch_sql_query() { + local sql_query="$1" + local query_num="$2" + + echo "Running Q${query_num}..." + + START=$(date +%s%N) + + RESULT=$(jq -n --arg q "$sql_query" '{"query": $q}' | \ + curl -s -X POST "http://${ES_HOST}:${ES_PORT}/_sql?format=json" \ + -H "Authorization: ApiKey ${ES_API_KEY}" \ + -H "Content-Type: application/json" \ + -d @-) + + END=$(date +%s%N) + + TIME_MS=$(( (END - START) / 1000000 )) + + ROWS_READ=$(echo "$RESULT" | python3 -c " +import sys, json +try: + d = json.load(sys.stdin) + print(len(d.get('rows', []))) +except: + print(0) +") + + echo "${query_num},${TIME_MS},${ROWS_READ},0" >> "$RESULTS_FILE" + echo " Q${query_num}: ${TIME_MS}ms (${ROWS_READ} rows)" +} + +echo "Reading queries from: $QUERIES_FILE" +echo "Results will be saved to: $RESULTS_FILE" +echo "" + +# Read queries from file (skip comments and empty lines) +QUERY_NUM=0 +while IFS= read -r line; do + # Skip empty lines and comments + [[ -z "$line" || "$line" =~ ^[[:space:]]*-- ]] && continue + + # Remove trailing semicolon and whitespace + query="${line%;}" + query="${query%"${query##*[![:space:]]}"}" + + QUERY_NUM=$((QUERY_NUM + 1)) + + if [ "$DATABASE_TYPE" == "elasticsearch" ]; then + run_elasticsearch_sql_query "$query" "$QUERY_NUM" + else + run_clickhouse_query "$query" "$QUERY_NUM" + fi +done < "$QUERIES_FILE" + +echo "" +echo "Benchmark complete!" +echo "Results saved to: $RESULTS_FILE" + +echo "" +echo "=== Summary ===" +TOTAL_TIME=$(awk -F',' 'NR>1 {sum+=$2} END {print sum}' "$RESULTS_FILE") +echo "Total queries: $QUERY_NUM" +echo "Total query time: ${TOTAL_TIME}ms" +if [ "$QUERY_NUM" -gt 0 ]; then + AVG_TIME=$((TOTAL_TIME / QUERY_NUM)) + echo "Average query time: ${AVG_TIME}ms" +fi diff --git a/ExecutionUtilities/csv_to_prometheus/__init__.py b/ExecutionUtilities/csv_to_prometheus/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ExecutionUtilities/csv_to_prometheus/csv_to_prometheus.py b/ExecutionUtilities/csv_to_prometheus/csv_to_prometheus.py new file mode 100644 index 0000000..4abfb71 --- /dev/null +++ b/ExecutionUtilities/csv_to_prometheus/csv_to_prometheus.py @@ -0,0 +1,394 @@ +import argparse +from http.server import HTTPServer, BaseHTTPRequestHandler +import csv + +import logging +from typing import Dict, Optional, List +from pathlib import Path + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class MetricsHandler(BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(200) + self.send_header("Content-type", "text/plain") + self.end_headers() + + metrics = self.server.get_metrics() # type: ignore + self.wfile.write(metrics.encode("utf-8")) + + +# class CSVMetricsExporter: +# def __init__(self, csv_path, timestamp_column, metric_column): +# self.df = pd.read_csv(csv_path) +# print("CSV loaded") +# self.timestamp_column = timestamp_column +# self.metric_column = metric_column +# self.label_columns = [ +# col +# for col in self.df.columns +# if col not in [timestamp_column, metric_column] +# ] + +# # Convert timestamp strings to Unix timestamps (milliseconds) +# self.df[timestamp_column] = ( +# pd.to_datetime(self.df[timestamp_column], unit="ns").astype(int) // 10**6 +# ) +# self.current_index = 0 + +# # Preprocess data by grouping rows by timestamp +# self.metrics_by_timestamp = {} +# self.lengths_by_timestamp = {} +# # grouped = self.df.groupby(timestamp_column) +# # print("Grouping done") +# # # TODO: this is horribly slow, fix +# # for group_idx, (timestamp, group) in enumerate(grouped): +# # self.metrics_by_timestamp[timestamp] = self._format_metrics(group) +# # self.lengths_by_timestamp[timestamp] = len(group) +# # # if group_idx % 500 == 0: +# # print(f"Group {group_idx} done") +# self.metrics_by_timestamp = ( +# self.df.groupby(timestamp_column).apply(self._format_metrics).to_dict() +# ) +# self.lengths_by_timestamp = self.df[timestamp_column].value_counts().to_dict() +# print("Metrics done") + +# def _format_metrics(self, group): +# output = [] +# timestamp = int(group.iloc[0][self.timestamp_column]) # type: ignore +# metric_name = f"csv_{self.metric_column}" +# # Add TYPE header only for first occurrence +# output.append(f"# TYPE {metric_name} gauge") +# for _, row in group.iterrows(): +# value = row[self.metric_column] +# labels = ",".join( +# [f'{label}="{row[label]}"' for label in self.label_columns] +# ) +# output.append(f"{metric_name}{{{labels}}} {value} {timestamp}") +# return "\n".join(output) + +# def get_metrics(self): +# if self.current_index >= len(self.df): +# return "" + +# current_timestamp = self.df.iloc[self.current_index][self.timestamp_column] +# metrics = self.metrics_by_timestamp[current_timestamp] +# self.current_index += self.lengths_by_timestamp[current_timestamp] +# return metrics + + +# class ChunkedCSVReader: +# def __init__(self, csv_path: str): +# self.csv_path = csv_path +# self.file = None +# self.reader = None +# self.fieldnames = None + +# def __enter__(self): +# self.file = open(self.csv_path, "r") +# self.reader = csv.DictReader(self.file) +# self.fieldnames = self.reader.fieldnames +# return self + +# def __exit__(self, exc_type, exc_val, exc_tb): +# if self.file: +# self.file.close() + + +class CSVMetricsExporterNoPandas: + def __init__(self, csv_path: str, timestamp_column: str, metric_column: str): + self.csv_path = csv_path + self.timestamp_column = timestamp_column + self.metric_column = metric_column + self.label_columns: List[str] = [] + self.current_timestamp: Optional[int] = None + self.current_chunk: List[Dict] = [] + self.file_size = Path(csv_path).stat().st_size + self.bytes_processed = 0 + + # Open file and initialize reader + self.file = open(csv_path, "r") + self.reader = csv.DictReader(self.file) + assert self.reader.fieldnames is not None + + # Initialize label columns + self.label_columns = [ + col + for col in self.reader.fieldnames + if col not in [timestamp_column, metric_column] + ] + logger.info(f"Initialized with {len(self.label_columns)} label columns") + + def __del__(self): + """Cleanup when object is destroyed""" + try: + if hasattr(self, "file") and self.file: + self.file.close() + except Exception as e: + logger.error(f"Error closing file: {str(e)}") + + def reset_file(self): + """Reset file to beginning and reinitialize reader""" + try: + self.file.seek(0) + self.reader = csv.DictReader(self.file) + self.bytes_processed = 0 + self.current_timestamp = None + self.current_chunk = [] + logger.info("File reset to beginning") + except Exception as e: + logger.error(f"Error resetting file: {str(e)}") + # If there's an error, try to reopen the file + self.file = open(self.csv_path, "r") + self.reader = csv.DictReader(self.file) + self.bytes_processed = 0 + + def read_next_chunk(self) -> Optional[List[Dict]]: + """Read the next chunk of rows with the same millisecond timestamp""" + if not self.current_chunk: + try: + # Read until we find a new timestamp + for row in self.reader: + self.bytes_processed += 1 + timestamp = int( + int(row[self.timestamp_column]) // 10**6 + ) # ns to ms + + if self.current_timestamp is None: + self.current_timestamp = timestamp + self.current_chunk.append(row) + elif timestamp == self.current_timestamp: + self.current_chunk.append(row) + else: + # Found a new timestamp, save it for next time + self.current_timestamp = timestamp + self.current_chunk.append(row) + break + + progress = (self.bytes_processed / self.file_size) * 100 + logger.info(f"Progress: {progress:.2f}% processed") + + if self.current_chunk: + return self.current_chunk + else: + # End of file reached + self.reset_file() + return None + + except Exception as e: + logger.error(f"Error reading chunk: {str(e)}") + # Try to recover by resetting the file + self.reset_file() + return None + + return self.current_chunk + + def _format_metrics(self, group: List[Dict], timestamp: int) -> str: + """Format metrics in Prometheus exposition format""" + output = [] + metric_name = f"csv_{self.metric_column}" + output.append(f"# TYPE {metric_name} gauge") + + for row in group[:10]: + value = row[self.metric_column] + labels = ",".join( + [f'{label}="{row[label]}"' for label in self.label_columns] + ) + output.append(f"{metric_name}{{{labels}}} {value} {timestamp}") + + return "\n".join(output) + + def get_metrics(self) -> str: + """Get metrics for the current timestamp chunk""" + chunk = self.read_next_chunk() + if not chunk: + return "" + + metrics = self._format_metrics(chunk, self.current_timestamp or 0) + # Clear the chunk after processing + self.current_chunk = [] + return metrics + + +def run_server( + port: int, csv_path: str, timestamp_column: str, metric_column: str +) -> None: + exporter = CSVMetricsExporterNoPandas(csv_path, timestamp_column, metric_column) + + class MetricsServer(HTTPServer): + def get_metrics(self): + return exporter.get_metrics() + + server = MetricsServer(("", port), MetricsHandler) + logger.info(f"Ready to serve metrics on port {port}") + server.serve_forever() + + +# logging.basicConfig(level=logging.INFO) +# logger = logging.getLogger(__name__) + + +# class TimeBasedCSVExporter: +# def __init__( +# self, csv_path: str, metric_column, timestamp_column, update_interval: int = 60 +# ): +# self.csv_path = csv_path +# self.update_interval = update_interval +# self.metrics: Dict[str, Gauge] = {} +# self.current_ms: Optional[int] = None +# self.csv_iterator = None +# self.metric_column = metric_column +# self.timestamp_column = timestamp_column + +# def ns_to_ms(self, ns: int) -> int: +# """Convert nanosecond timestamp to milliseconds""" +# return ns // 1_000_000 + +# def get_next_batch(self) -> Optional[pd.DataFrame]: +# """Read rows until timestamp changes by 1ms""" +# if self.csv_iterator is None: +# # Initialize CSV reader with appropriate timestamp parsing +# try: +# self.csv_iterator = pd.read_csv( +# self.csv_path, +# iterator=True, +# dtype={ +# self.timestamp_column: "int64" +# }, # Ensure timestamp is read as int64 +# ) +# except Exception as e: +# logger.error(f"Error initializing CSV reader: {str(e)}") +# raise + +# rows = [] +# try: +# while True: +# # Read one row at a time +# row = next(self.csv_iterator) + +# # Convert timestamp to ms +# row_ms = self.ns_to_ms(row[self.timestamp_column].iloc[0]) + +# if self.current_ms is None: +# # First batch +# self.current_ms = row_ms +# rows.append(row) +# elif row_ms == self.current_ms: +# # Same millisecond, add to batch +# rows.append(row) +# else: +# # New millisecond reached +# # Save this row's ms for next batch +# self.current_ms = row_ms +# # Return concatenated batch +# result = pd.concat(rows, ignore_index=True) +# # Start new batch with current row +# rows = [row] +# return result + +# except StopIteration: +# # End of file reached +# if rows: +# # Return final batch if any rows accumulated +# return pd.concat(rows, ignore_index=True) +# self.csv_iterator = None +# self.current_ms = None +# return None +# except Exception as e: +# logger.error(f"Error reading batch: {str(e)}") +# raise + +# def create_metric(self, metric_name: str, label_names: list) -> None: +# """Create a Prometheus metric if it doesn't exist""" +# if metric_name not in self.metrics: +# self.metrics[metric_name] = Gauge( +# metric_name, f"Metric imported from CSV: {metric_name}", label_names +# ) + +# def process_batch(self, batch: pd.DataFrame) -> None: +# """Process a batch of rows with the same millisecond timestamp""" +# try: +# # Get label columns (excluding special columns) +# label_columns = [ +# col +# for col in batch.columns +# if col not in [self.timestamp_column, self.metric_column] +# ] + +# # Process each row in the batch +# for _, row in batch.iterrows(): +# metric_name = row[self.metric_column] + +# # Create metric if it doesn't exist +# self.create_metric(metric_name, label_columns) + +# # Extract labels +# labels = {col: row[col] for col in label_columns} + +# # Update metric value with timestamp in milliseconds +# # Set timestamp explicitly using the current batch timestamp +# self.metrics[metric_name].labels(**labels).set(row["value"]) + +# except Exception as e: +# logger.error(f"Error processing batch: {str(e)}") +# raise + +# def update_metrics(self) -> bool: +# """Read and update metrics from CSV in timestamp-based batches""" +# try: +# batch = self.get_next_batch() +# if batch is not None: +# self.process_batch(batch) +# # Force garbage collection after batch processing +# gc.collect() +# logger.info(f"Processed batch for timestamp {self.current_ms}ms") +# return True +# return False + +# except Exception as e: +# logger.error(f"Error updating metrics: {str(e)}") +# return False + +# def run(self, port: int = 8000) -> None: +# """Start the exporter""" +# start_http_server(port) +# logger.info(f"Metrics server started on port {port}") + +# while True: +# start_time = time.time() + +# # Process all available batches +# while self.update_metrics(): +# pass + +# # Calculate sleep time +# elapsed = time.time() - start_time +# sleep_time = max(0, self.update_interval - elapsed) + +# logger.info( +# f"Update cycle took {elapsed:.2f}s, sleeping for {sleep_time:.2f}s" +# ) +# time.sleep(sleep_time) + + +def main(args): + # exporter = TimeBasedCSVExporter( + # args.input_file, args.metric_column, args.timestamp_column + # ) + # exporter.run() + + run_server( + args.http_port, args.input_file, args.timestamp_column, args.metric_column + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--input_file", type=str, required=True) + parser.add_argument("--timestamp_column", type=str, required=True) + parser.add_argument("--metric_column", type=str, required=True) + parser.add_argument("--http_port", default=8000) + args = parser.parse_args() + main(args) diff --git a/ExecutionUtilities/csv_to_prometheus/setup.py b/ExecutionUtilities/csv_to_prometheus/setup.py new file mode 100644 index 0000000..7925f8c --- /dev/null +++ b/ExecutionUtilities/csv_to_prometheus/setup.py @@ -0,0 +1,8 @@ +from setuptools import setup, find_packages + +setup( + name="csv_to_prometheus", + version="0.1", + packages=find_packages(), + install_requires=[], +) diff --git a/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/Cargo.lock b/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/Cargo.lock new file mode 100644 index 0000000..cda575a --- /dev/null +++ b/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/Cargo.lock @@ -0,0 +1,1250 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "0.6.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.59.0", +] + +[[package]] +name = "anyhow" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "backtrace" +version = "0.3.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", +] + +[[package]] +name = "bitflags" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "bytes" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" + +[[package]] +name = "cc" +version = "1.2.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" + +[[package]] +name = "chrono" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "clap" +version = "4.5.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" + +[[package]] +name = "cmake" +version = "0.1.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" +dependencies = [ + "cc", +] + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "hashbrown" +version = "0.15.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "iana-time-zone" +version = "0.1.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "indexmap" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "io-uring" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "kafka-throughput-producer" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "clap", + "futures", + "itertools", + "rand", + "rdkafka", + "serde", + "serde_json", + "tokio", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.174" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" + +[[package]] +name = "libz-sys" +version = "1.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "lock_api" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.59.0", +] + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_enum" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a973b4e44ce6cad84ce69d797acf9a044532e4184c4f267913d1b546a0727b7a" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77e878c846a8abae00dd069496dbe8751b16ac1c3d6bd2a7283a938e8228f90d" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + +[[package]] +name = "openssl-sys" +version = "0.9.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "parking_lot" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro-crate" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro2" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rdkafka" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1beea247b9a7600a81d4cc33f659ce1a77e1988323d7d2809c7ed1c21f4c316d" +dependencies = [ + "futures-channel", + "futures-util", + "libc", + "log", + "rdkafka-sys", + "serde", + "serde_derive", + "serde_json", + "slab", + "tokio", +] + +[[package]] +name = "rdkafka-sys" +version = "4.9.0+2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5230dca48bc354d718269f3e4353280e188b610f7af7e2fcf54b7a79d5802872" +dependencies = [ + "cmake", + "libc", + "libz-sys", + "num_enum", + "openssl-sys", + "pkg-config", +] + +[[package]] +name = "redox_syscall" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e8af0dde094006011e6a740d4879319439489813bd0bcdc7d821beaeeff48ec" +dependencies = [ + "bitflags", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f" + +[[package]] +name = "rustversion" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.141" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410" +dependencies = [ + "libc", +] + +[[package]] +name = "slab" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "tokio" +version = "1.46.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17" +dependencies = [ + "backtrace", + "bytes", + "io-uring", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "slab", + "socket2", + "tokio-macros", + "windows-sys 0.52.0", +] + +[[package]] +name = "tokio-macros" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + +[[package]] +name = "tracing" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +dependencies = [ + "nu-ansi-term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasm-bindgen" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-result" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95" +dependencies = [ + "memchr", +] + +[[package]] +name = "zerocopy" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/Cargo.toml b/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/Cargo.toml new file mode 100644 index 0000000..9c1da7a --- /dev/null +++ b/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "kafka-throughput-producer" +version = "0.1.0" +edition = "2021" + +[[bin]] +name = "producer" +path = "src/main.rs" + +[dependencies] +rdkafka = { version = "0.36", features = ["cmake-build", "ssl"] } +tokio = { version = "1.0", features = ["full"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +clap = { version = "4.0", features = ["derive"] } +rand = "0.8" +chrono = { version = "0.4", features = ["serde"] } +itertools = "0.12" +tracing = "0.1" +tracing-subscriber = "0.3" +anyhow = "1.0" +futures = "0.3" diff --git a/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/src/main.rs b/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/src/main.rs new file mode 100644 index 0000000..b93da31 --- /dev/null +++ b/ExecutionUtilities/high-throughput-kafka-producer-single-threaded/src/main.rs @@ -0,0 +1,450 @@ +use anyhow::Result; +use clap::Parser; +use futures::future::join_all; +use itertools::Itertools; +use rand::seq::SliceRandom; +use rand::{thread_rng, Rng}; +use rdkafka::admin::{AdminClient, AdminOptions, NewTopic, TopicReplication}; +use rdkafka::config::ClientConfig; +use rdkafka::producer::{FutureProducer, FutureRecord, Producer}; +use serde_json; +use std::collections::HashMap; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; +use tokio::time::sleep; +use tracing::{error, info, warn}; + + +#[derive(Debug, Clone)] +struct LabelChoices { + hostname: Vec, + location: Vec, + application_name: Vec, + instance: Vec, + job: Vec, +} + +impl Default for LabelChoices { + fn default() -> Self { + Self { + hostname: vec![ + "host1".to_string(), + "host2".to_string(), + "host3".to_string(), + "host4".to_string(), + "host5".to_string(), + ], + location: vec![ + "us-east".to_string(), + "us-west".to_string(), + "eu-central".to_string(), + "ap-southeast".to_string(), + ], + application_name: vec![ + "app1".to_string(), + "app2".to_string(), + "app3".to_string(), + "app4".to_string(), + ], + instance: vec![ + "worker1".to_string(), + "worker2".to_string(), + "worker3".to_string(), + "worker4".to_string(), + ], + job: vec![ + "throughput-test".to_string(), + "latency-test".to_string(), + "stress-test".to_string(), + ], + } + } +} + +static METRIC_NAMES: &[&str] = &[ + "cpu_usage", + "memory_usage", + "network_throughput", + "disk_iops", + "response_time", + "error_rate", +]; + +#[derive(Debug, Clone)] +struct ProducerStats { + messages_sent: u64, + bytes_sent: u64, + errors: u64, +} + +impl ProducerStats { + fn new() -> Self { + Self { + messages_sent: 0, + bytes_sent: 0, + errors: 0, + } + } + + fn add_message(&mut self, bytes: u64) { + self.messages_sent += 1; + self.bytes_sent += bytes; + } + + fn add_error(&mut self) { + self.errors += 1; + } + + fn get_stats(&self) -> (u64, u64, u64) { + (self.messages_sent, self.bytes_sent, self.errors) + } +} + +#[derive(Parser, Debug)] +#[command(name = "kafka-throughput-producer")] +#[command(about = "High-performance Kafka producer for Arroyo benchmarking")] +struct Args { + #[arg(long, default_value = "localhost:9092")] + kafka_broker: String, + + #[arg(long)] + kafka_topic: String, + + #[arg(long, default_value = "1000000")] + total_messages: u64, + + #[arg(long, default_value = "10000")] + messages_per_second: u64, + + #[arg(long)] + duration: Option, + + + #[arg(long, default_value = "1")] + num_partitions: i32, + + #[arg(long, default_value = "1")] + replication_factor: i32, + + #[arg(long)] + vary_labels: bool, + + #[arg(long, default_value = "false")] + enable_flush: bool, + + #[arg(long, default_value = "none")] + compression: String, + + #[arg(long, default_value = "65536")] + batch_size: usize, +} + +#[derive(Clone)] +struct HighThroughputProducer { + producer: FutureProducer, + topic_name: String, + stats: ProducerStats, + label_choices: LabelChoices, +} + + +impl HighThroughputProducer { + async fn new_with_compression( + kafka_broker: &str, + topic_name: String, + num_partitions: i32, + replication_factor: i32, + compression: &str, + ) -> Result { + // High-performance producer configuration optimized for throughput + let producer: FutureProducer = ClientConfig::new() + .set("bootstrap.servers", kafka_broker) + .set("linger.ms", "5") + .set("batch.size", "1048576") // 1MB batches + .set("compression.type", compression) + .set("queue.buffering.max.messages", "1000000") + .set("queue.buffering.max.kbytes", "2097152") // 2GB + .set("batch.num.messages", "10000") + .set("acks", "0") // No acknowledgments for max throughput + .set("retries", "0") // No retries for max throughput + .set("message.max.bytes", "1048576") // 1MB + .set("queue.buffering.max.ms", "10") + .set("delivery.timeout.ms", "30000") + .create()?; + + let label_choices = LabelChoices::default(); + + let kafka_producer = Self { + producer, + topic_name: topic_name.clone(), + stats: ProducerStats::new(), + label_choices, + }; + + kafka_producer + .create_topic_if_not_exists(kafka_broker, &topic_name, num_partitions, replication_factor) + .await?; + + Ok(kafka_producer) + } + + async fn create_topic_if_not_exists( + &self, + kafka_broker: &str, + topic_name: &str, + num_partitions: i32, + replication_factor: i32, + ) -> Result<()> { + let admin: AdminClient<_> = ClientConfig::new() + .set("bootstrap.servers", kafka_broker) + .create()?; + + let metadata = admin.inner().fetch_metadata(None, Duration::from_secs(10))?; + + let topic_exists = metadata.topics().iter().any(|t| t.name() == topic_name); + + if !topic_exists { + let new_topic = NewTopic::new( + topic_name, + num_partitions, + TopicReplication::Fixed(replication_factor), + ); + + let opts = AdminOptions::new().request_timeout(Some(Duration::from_secs(10))); + let results = admin.create_topics(&[new_topic], &opts).await?; + + for result in results { + match result { + Ok(topic) => info!("Created topic: {}", topic), + Err((topic, error)) => { + error!("Failed to create topic {}: {}", topic, error); + return Err(anyhow::anyhow!("Topic creation failed")); + } + } + } + + // Wait for topic creation to propagate + sleep(Duration::from_secs(2)).await; + info!("Topic '{}' created with {} partitions", topic_name, num_partitions); + } + + Ok(()) + } + + + fn generate_prometheus_metric(&self, labels: &[String]) -> Result> { + let mut rng = thread_rng(); + + let metric_name = METRIC_NAMES.choose(&mut rng).unwrap().to_string(); + let timestamp = SystemTime::now().duration_since(UNIX_EPOCH)?.as_millis() as u64; + + let value = match metric_name.as_str() { + "cpu_usage" | "memory_usage" => rng.gen_range(0.0..100.0), + "network_throughput" => rng.gen_range(1000.0..10000.0), + "disk_iops" => rng.gen_range(100.0..5000.0), + "response_time" => rng.gen_range(0.1..1000.0), + "error_rate" => rng.gen_range(0.0..5.0), + _ => rng.gen_range(0.0..1000.0), + }; + + let label_keys = ["hostname", "location", "application_name", "instance", "job"]; + let mut label_map = HashMap::new(); + + for (i, key) in label_keys.iter().enumerate() { + if i < labels.len() { + label_map.insert(key.to_string(), serde_json::Value::String(labels[i].clone())); + } + } + + let mut metric = HashMap::new(); + metric.insert("metric_name".to_string(), serde_json::Value::String(metric_name)); + metric.insert("timestamp".to_string(), serde_json::Value::Number(serde_json::Number::from(timestamp))); + metric.insert("value".to_string(), serde_json::Value::Number(serde_json::Number::from_f64(value).unwrap())); + metric.insert("labels".to_string(), serde_json::Value::Object(label_map.into_iter().collect())); + + Ok(metric) + } + + async fn produce_message_batch( + &mut self, + batch: Vec<(String, Vec)>, + ) -> Result<()> { + let mut send_futures = Vec::new(); + + for (partition_key, labels) in batch { + let metric = self.generate_prometheus_metric(&labels)?; + let message_data = serde_json::to_vec(&metric)?; + let message_size = message_data.len(); + + let producer = self.producer.clone(); + let topic_name = self.topic_name.clone(); + + let send_future = async move { + let record = FutureRecord::to(&topic_name) + .key(&partition_key) + .payload(&message_data); + + let result = producer.send(record, Duration::from_secs(10)).await; + (result, message_size) + }; + + send_futures.push(send_future); + } + + // Send all messages concurrently and collect results + let results = join_all(send_futures).await; + + for (result, message_size) in results { + match result { + Ok(_) => { + self.stats.add_message(message_size as u64); + } + Err((kafka_error, _)) => { + self.stats.add_error(); + warn!("Failed to send message: {}", kafka_error); + } + } + } + Ok(()) + } + + + + fn generate_all_label_combinations(&self) -> Vec> { + let label_values = vec![ + &self.label_choices.hostname, + &self.label_choices.location, + &self.label_choices.application_name, + &self.label_choices.instance, + &self.label_choices.job, + ]; + + label_values + .into_iter() + .multi_cartesian_product() + .map(|combo| combo.into_iter().cloned().collect()) + .collect() + } + + async fn run_benchmark( + &mut self, + args: &Args, + ) -> Result<()> { + info!( + "Starting single-threaded benchmark: {} messages at {} msg/s", + args.total_messages, args.messages_per_second + ); + info!("Producer initialized for single-threaded operation"); + info!("🚀 Data generation started!"); + + let all_labels = self.generate_all_label_combinations(); + let start_time = Instant::now(); + let mut messages_sent = 0u64; + + let messages_per_interval = args.messages_per_second; + let batch_size = std::cmp::max(1, args.batch_size); + let interval = Duration::from_secs(1); + + while messages_sent < args.total_messages { + if let Some(duration) = args.duration { + if start_time.elapsed().as_secs() > duration { + break; + } + } + + let interval_start = Instant::now(); + + // Select labels for this interval + let labels_subset = if args.vary_labels { + let mut rng = thread_rng(); + let num_labels = rng.gen_range(1..=std::cmp::min(all_labels.len(), messages_per_interval as usize)); + all_labels.choose_multiple(&mut rng, num_labels).cloned().collect::>() + } else { + all_labels[..std::cmp::min(all_labels.len(), messages_per_interval as usize)].to_vec() + }; + + // Process messages in batches sequentially + let mut remaining_messages = std::cmp::min( + messages_per_interval, + args.total_messages - messages_sent + ); + + while remaining_messages > 0 { + let current_batch_size = std::cmp::min(batch_size as u64, remaining_messages) as usize; + let batch: Vec<(String, Vec)> = (0..current_batch_size) + .map(|i| { + let labels = &labels_subset[i % labels_subset.len()]; + let partition_key = format!("{}_{}", labels[0], labels[1]); + (partition_key, labels.clone()) + }) + .collect(); + + // Process batch sequentially in single thread + if let Err(e) = self.produce_message_batch(batch).await { + error!("Batch processing failed: {}", e); + } + + remaining_messages -= current_batch_size as u64; + messages_sent += current_batch_size as u64; + } + + // Rate limiting + let elapsed = interval_start.elapsed(); + if elapsed < interval { + sleep(interval - elapsed).await; + } + + // Print progress + if messages_sent % (args.messages_per_second) == 0 { + self.print_stats(start_time); + } + } + + // Final flush - wait for all messages to be delivered + info!("Flushing remaining messages..."); + if let Err(e) = self.producer.flush(Duration::from_secs(30)) { + warn!("Error during flush: {}", e); + } + + info!("Benchmark completed!"); + self.print_stats(start_time); + + Ok(()) + } + + fn print_stats(&self, start_time: Instant) { + let (messages, bytes, errors) = self.stats.get_stats(); + let elapsed = start_time.elapsed().as_secs_f64(); + + let rate = if elapsed > 0.0 { messages as f64 / elapsed } else { 0.0 }; + let throughput_mb = if elapsed > 0.0 { + (bytes as f64 / (1024.0 * 1024.0)) / elapsed + } else { + 0.0 + }; + + info!( + "Messages: {}, Rate: {:.2} msg/s, Throughput: {:.2} MB/s, Errors: {}", + messages, rate, throughput_mb, errors + ); + } +} + +#[tokio::main] +async fn main() -> Result<()> { + tracing_subscriber::fmt::init(); + + let args = Args::parse(); + + let producer = HighThroughputProducer::new_with_compression( + &args.kafka_broker, + args.kafka_topic.clone(), + args.num_partitions, + args.replication_factor, + &args.compression, + ).await?; + + let mut producer = producer; + producer.run_benchmark(&args).await?; + + Ok(()) +} diff --git a/ExecutionUtilities/high-throughput-kafka-producer/Cargo.lock b/ExecutionUtilities/high-throughput-kafka-producer/Cargo.lock new file mode 100644 index 0000000..cda575a --- /dev/null +++ b/ExecutionUtilities/high-throughput-kafka-producer/Cargo.lock @@ -0,0 +1,1250 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "0.6.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.59.0", +] + +[[package]] +name = "anyhow" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "backtrace" +version = "0.3.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", +] + +[[package]] +name = "bitflags" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "bytes" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" + +[[package]] +name = "cc" +version = "1.2.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" + +[[package]] +name = "chrono" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "clap" +version = "4.5.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" + +[[package]] +name = "cmake" +version = "0.1.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" +dependencies = [ + "cc", +] + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "hashbrown" +version = "0.15.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "iana-time-zone" +version = "0.1.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "indexmap" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "io-uring" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "kafka-throughput-producer" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "clap", + "futures", + "itertools", + "rand", + "rdkafka", + "serde", + "serde_json", + "tokio", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.174" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" + +[[package]] +name = "libz-sys" +version = "1.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "lock_api" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.59.0", +] + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_enum" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a973b4e44ce6cad84ce69d797acf9a044532e4184c4f267913d1b546a0727b7a" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77e878c846a8abae00dd069496dbe8751b16ac1c3d6bd2a7283a938e8228f90d" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + +[[package]] +name = "openssl-sys" +version = "0.9.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "parking_lot" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro-crate" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro2" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rdkafka" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1beea247b9a7600a81d4cc33f659ce1a77e1988323d7d2809c7ed1c21f4c316d" +dependencies = [ + "futures-channel", + "futures-util", + "libc", + "log", + "rdkafka-sys", + "serde", + "serde_derive", + "serde_json", + "slab", + "tokio", +] + +[[package]] +name = "rdkafka-sys" +version = "4.9.0+2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5230dca48bc354d718269f3e4353280e188b610f7af7e2fcf54b7a79d5802872" +dependencies = [ + "cmake", + "libc", + "libz-sys", + "num_enum", + "openssl-sys", + "pkg-config", +] + +[[package]] +name = "redox_syscall" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e8af0dde094006011e6a740d4879319439489813bd0bcdc7d821beaeeff48ec" +dependencies = [ + "bitflags", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f" + +[[package]] +name = "rustversion" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.141" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410" +dependencies = [ + "libc", +] + +[[package]] +name = "slab" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "tokio" +version = "1.46.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17" +dependencies = [ + "backtrace", + "bytes", + "io-uring", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "slab", + "socket2", + "tokio-macros", + "windows-sys 0.52.0", +] + +[[package]] +name = "tokio-macros" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + +[[package]] +name = "tracing" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +dependencies = [ + "nu-ansi-term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasm-bindgen" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-result" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95" +dependencies = [ + "memchr", +] + +[[package]] +name = "zerocopy" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/ExecutionUtilities/high-throughput-kafka-producer/Cargo.toml b/ExecutionUtilities/high-throughput-kafka-producer/Cargo.toml new file mode 100644 index 0000000..9c1da7a --- /dev/null +++ b/ExecutionUtilities/high-throughput-kafka-producer/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "kafka-throughput-producer" +version = "0.1.0" +edition = "2021" + +[[bin]] +name = "producer" +path = "src/main.rs" + +[dependencies] +rdkafka = { version = "0.36", features = ["cmake-build", "ssl"] } +tokio = { version = "1.0", features = ["full"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +clap = { version = "4.0", features = ["derive"] } +rand = "0.8" +chrono = { version = "0.4", features = ["serde"] } +itertools = "0.12" +tracing = "0.1" +tracing-subscriber = "0.3" +anyhow = "1.0" +futures = "0.3" diff --git a/ExecutionUtilities/high-throughput-kafka-producer/code_for_reference/ProducerPerformance.java b/ExecutionUtilities/high-throughput-kafka-producer/code_for_reference/ProducerPerformance.java new file mode 100644 index 0000000..d6ed1d0 --- /dev/null +++ b/ExecutionUtilities/high-throughput-kafka-producer/code_for_reference/ProducerPerformance.java @@ -0,0 +1,598 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.tools; + +import org.apache.kafka.clients.producer.Callback; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.Uuid; +import org.apache.kafka.common.utils.Exit; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.server.util.ThroughputThrottler; + +import net.sourceforge.argparse4j.ArgumentParsers; +import net.sourceforge.argparse4j.inf.ArgumentParser; +import net.sourceforge.argparse4j.inf.ArgumentParserException; +import net.sourceforge.argparse4j.inf.MutuallyExclusiveGroup; +import net.sourceforge.argparse4j.inf.Namespace; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.Properties; +import java.util.Scanner; +import java.util.SplittableRandom; + +import static net.sourceforge.argparse4j.impl.Arguments.store; +import static net.sourceforge.argparse4j.impl.Arguments.storeTrue; + +public class ProducerPerformance { + + public static final String DEFAULT_TRANSACTION_ID_PREFIX = "performance-producer-"; + public static final long DEFAULT_TRANSACTION_DURATION_MS = 3000L; + + public static void main(String[] args) throws Exception { + ProducerPerformance perf = new ProducerPerformance(); + perf.start(args); + } + + void start(String[] args) throws IOException { + ArgumentParser parser = argParser(); + + try { + ConfigPostProcessor config = new ConfigPostProcessor(parser, args); + KafkaProducer producer = createKafkaProducer(config.producerProps); + + if (config.transactionsEnabled) + producer.initTransactions(); + + /* setup perf test */ + byte[] payload = null; + if (config.recordSize != null) { + payload = new byte[config.recordSize]; + } + // not thread-safe, do not share with other threads + SplittableRandom random = new SplittableRandom(0); + ProducerRecord record; + + if (config.warmupRecords > 0) { + System.out.println("Warmup first " + config.warmupRecords + " records. Steady state results will print after the complete test summary."); + } + boolean isSteadyState = false; + stats = new Stats(config.numRecords, isSteadyState); + long startMs = System.currentTimeMillis(); + + ThroughputThrottler throttler = new ThroughputThrottler(config.throughput, startMs); + + int currentTransactionSize = 0; + long transactionStartTime = 0; + for (long i = 0; i < config.numRecords; i++) { + + payload = generateRandomPayload(config.recordSize, config.payloadByteList, payload, random, config.payloadMonotonic, i); + + if (config.transactionsEnabled && currentTransactionSize == 0) { + producer.beginTransaction(); + transactionStartTime = System.currentTimeMillis(); + } + + record = new ProducerRecord<>(config.topicName, payload); + + long sendStartMs = System.currentTimeMillis(); + if ((isSteadyState = config.warmupRecords > 0) && i == config.warmupRecords) { + steadyStateStats = new Stats(config.numRecords - config.warmupRecords, isSteadyState); + stats.suppressPrinting(); + } + cb = new PerfCallback(sendStartMs, payload.length, stats, steadyStateStats); + producer.send(record, cb); + + currentTransactionSize++; + if (config.transactionsEnabled && config.transactionDurationMs <= (sendStartMs - transactionStartTime)) { + producer.commitTransaction(); + currentTransactionSize = 0; + } + + if (throttler.shouldThrottle(i, sendStartMs)) { + throttler.throttle(); + } + } + + if (config.transactionsEnabled && currentTransactionSize != 0) + producer.commitTransaction(); + + if (!config.shouldPrintMetrics) { + producer.close(); + + /* print final results */ + stats.printTotal(); + /* print steady-state stats if relevant */ + if (steadyStateStats != null) { + steadyStateStats.printTotal(); + } + } else { + // Make sure all messages are sent before printing out the stats and the metrics + // We need to do this in a different branch for now since tests/kafkatest/sanity_checks/test_performance_services.py + // expects this class to work with older versions of the client jar that don't support flush(). + producer.flush(); + + /* print final results */ + stats.printTotal(); + /* print steady-state stats if relevant */ + if (steadyStateStats != null) { + steadyStateStats.printTotal(); + } + + /* print out metrics */ + ToolsUtils.printMetrics(producer.metrics()); + producer.close(); + } + } catch (ArgumentParserException e) { + if (args.length == 0) { + parser.printHelp(); + Exit.exit(0); + } else { + parser.handleError(e); + Exit.exit(1); + } + } + + } + + KafkaProducer createKafkaProducer(Properties props) { + return new KafkaProducer<>(props); + } + + Callback cb; + Stats stats; + Stats steadyStateStats; + + static byte[] generateRandomPayload(Integer recordSize, List payloadByteList, byte[] payload, + SplittableRandom random, boolean payloadMonotonic, long recordValue) { + if (!payloadByteList.isEmpty()) { + payload = payloadByteList.get(random.nextInt(payloadByteList.size())); + } else if (recordSize != null) { + for (int j = 0; j < payload.length; ++j) + payload[j] = (byte) (random.nextInt(26) + 65); + } else if (payloadMonotonic) { + payload = Long.toString(recordValue).getBytes(StandardCharsets.UTF_8); + } else { + throw new IllegalArgumentException("no payload File Path or record Size or payload-monotonic option provided"); + } + return payload; + } + + static Properties readProps(List producerProps, String producerConfig) throws IOException { + Properties props = new Properties(); + if (producerConfig != null) { + props.putAll(Utils.loadProps(producerConfig)); + } + if (producerProps != null) + for (String prop : producerProps) { + String[] pieces = prop.split("="); + if (pieces.length != 2) + throw new IllegalArgumentException("Invalid property: " + prop); + props.put(pieces[0], pieces[1]); + } + + props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer"); + props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer"); + if (props.getProperty(ProducerConfig.CLIENT_ID_CONFIG) == null) { + props.put(ProducerConfig.CLIENT_ID_CONFIG, "perf-producer-client"); + } + return props; + } + + static List readPayloadFile(String payloadFilePath, String payloadDelimiter) throws IOException { + List payloadByteList = new ArrayList<>(); + if (payloadFilePath != null) { + Path path = Paths.get(payloadFilePath); + System.out.println("Reading payloads from: " + path.toAbsolutePath()); + if (Files.notExists(path) || Files.size(path) == 0) { + throw new IllegalArgumentException("File does not exist or empty file provided."); + } + + try (Scanner payLoadScanner = new Scanner(path, StandardCharsets.UTF_8)) { + //setting the delimiter while parsing the file, avoids loading entire data in memory before split + payLoadScanner.useDelimiter(payloadDelimiter); + while (payLoadScanner.hasNext()) { + byte[] payloadBytes = payLoadScanner.next().getBytes(StandardCharsets.UTF_8); + payloadByteList.add(payloadBytes); + } + } + + System.out.println("Number of messages read: " + payloadByteList.size()); + + } + return payloadByteList; + } + + /** Get the command-line argument parser. */ + static ArgumentParser argParser() { + ArgumentParser parser = ArgumentParsers + .newArgumentParser("producer-performance") + .defaultHelp(true) + .description("This tool is used to verify the producer performance. To enable transactions, " + + "you can specify a transaction id or set a transaction duration using --transaction-duration-ms. " + + "There are three ways to specify the transaction id: set transaction.id= via --producer-props, " + + "set transaction.id= in the config file via --producer.config, or use --transaction-id ."); + + MutuallyExclusiveGroup payloadOptions = parser + .addMutuallyExclusiveGroup() + .required(true) + .description("either --record-size or --payload-file must be specified but not both."); + + parser.addArgument("--topic") + .action(store()) + .required(true) + .type(String.class) + .metavar("TOPIC") + .help("produce messages to this topic"); + + parser.addArgument("--num-records") + .action(store()) + .required(true) + .type(Long.class) + .metavar("NUM-RECORDS") + .dest("numRecords") + .help("number of messages to produce"); + + payloadOptions.addArgument("--record-size") + .action(store()) + .required(false) + .type(Integer.class) + .metavar("RECORD-SIZE") + .dest("recordSize") + .help("message size in bytes. Note that you must provide exactly one of --record-size or --payload-file " + + "or --payload-monotonic."); + + payloadOptions.addArgument("--payload-file") + .action(store()) + .required(false) + .type(String.class) + .metavar("PAYLOAD-FILE") + .dest("payloadFile") + .help("file to read the message payloads from. This works only for UTF-8 encoded text files. " + + "Payloads will be read from this file and a payload will be randomly selected when sending messages. " + + "Note that you must provide exactly one of --record-size or --payload-file or --payload-monotonic."); + + payloadOptions.addArgument("--payload-monotonic") + .action(storeTrue()) + .type(Boolean.class) + .metavar("PAYLOAD-MONOTONIC") + .dest("payloadMonotonic") + .help("payload is monotonically increasing integer. Note that you must provide exactly one of --record-size " + + "or --payload-file or --payload-monotonic."); + + parser.addArgument("--payload-delimiter") + .action(store()) + .required(false) + .type(String.class) + .metavar("PAYLOAD-DELIMITER") + .dest("payloadDelimiter") + .setDefault("\\n") + .help("provides delimiter to be used when --payload-file is provided. " + + "Defaults to new line. " + + "Note that this parameter will be ignored if --payload-file is not provided."); + + parser.addArgument("--throughput") + .action(store()) + .required(true) + .type(Double.class) + .metavar("THROUGHPUT") + .help("throttle maximum message throughput to *approximately* THROUGHPUT messages/sec. Set this to -1 to disable throttling."); + + parser.addArgument("--producer-props") + .nargs("+") + .required(false) + .metavar("PROP-NAME=PROP-VALUE") + .type(String.class) + .dest("producerConfig") + .help("kafka producer related configuration properties like bootstrap.servers,client.id etc. " + + "These configs take precedence over those passed via --producer.config."); + + parser.addArgument("--producer.config") + .action(store()) + .required(false) + .type(String.class) + .metavar("CONFIG-FILE") + .dest("producerConfigFile") + .help("producer config properties file."); + + parser.addArgument("--print-metrics") + .action(storeTrue()) + .type(Boolean.class) + .metavar("PRINT-METRICS") + .dest("printMetrics") + .help("print out metrics at the end of the test."); + + parser.addArgument("--transactional-id") + .action(store()) + .required(false) + .type(String.class) + .metavar("TRANSACTIONAL-ID") + .dest("transactionalId") + .help("The transactional id to use. This config takes precedence over the transactional.id " + + "specified via --producer.config or --producer-props. Note that if the transactional id " + + "is not specified while --transaction-duration-ms is provided, the default value for the " + + "transactional id will be performance-producer- followed by a random uuid."); + + parser.addArgument("--transaction-duration-ms") + .action(store()) + .required(false) + .type(Long.class) + .metavar("TRANSACTION-DURATION") + .dest("transactionDurationMs") + .help("The max age of each transaction. The commitTransaction will be called after this time has elapsed. " + + "The value should be greater than 0. If the transactional id is specified via --producer-props, " + + "--producer.config, or --transactional-id but --transaction-duration-ms is not specified, " + + "the default value will be 3000."); + + parser.addArgument("--warmup-records") + .action(store()) + .required(false) + .type(Long.class) + .metavar("WARMUP-RECORDS") + .dest("warmupRecords") + .setDefault(0L) + .help("The number of records to treat as warmup; these initial records will not be included in steady-state statistics. " + + "An additional summary line will be printed describing the steady-state statistics. (default: 0)."); + + return parser; + } + + // Visible for testing + static class Stats { + private final long start; + private final int[] latencies; + private final long sampling; + private final long reportingInterval; + private long iteration; + private int index; + private long count; + private long bytes; + private int maxLatency; + private long totalLatency; + private long windowCount; + private int windowMaxLatency; + private long windowTotalLatency; + private long windowBytes; + private long windowStart; + private final boolean isSteadyState; + private boolean suppressPrint; + + public Stats(long numRecords, boolean isSteadyState) { + this.start = System.currentTimeMillis(); + this.windowStart = System.currentTimeMillis(); + this.iteration = 0; + this.sampling = numRecords / Math.min(numRecords, 500000); + this.latencies = new int[(int) (numRecords / this.sampling) + 1]; + this.index = 0; + this.maxLatency = 0; + this.windowCount = 0; + this.windowMaxLatency = 0; + this.windowTotalLatency = 0; + this.windowBytes = 0; + this.totalLatency = 0; + this.reportingInterval = 5000; + this.isSteadyState = isSteadyState; + this.suppressPrint = false; + } + + public void record(int latency, int bytes, long time) { + this.count++; + this.bytes += bytes; + this.totalLatency += latency; + this.maxLatency = Math.max(this.maxLatency, latency); + this.windowCount++; + this.windowBytes += bytes; + this.windowTotalLatency += latency; + this.windowMaxLatency = Math.max(windowMaxLatency, latency); + if (this.iteration % this.sampling == 0) { + this.latencies[index] = latency; + this.index++; + } + /* maybe report the recent perf */ + if (time - windowStart >= reportingInterval) { + if (this.isSteadyState && count == windowCount) { + System.out.println("In steady state."); + } + if (!this.suppressPrint) { + printWindow(); + } + newWindow(); + } + this.iteration++; + } + + public long totalCount() { + return this.count; + } + + public long currentWindowCount() { + return this.windowCount; + } + + public long iteration() { + return this.iteration; + } + + public long bytes() { + return this.bytes; + } + + public int index() { + return this.index; + } + + public void printWindow() { + long elapsed = System.currentTimeMillis() - windowStart; + double recsPerSec = 1000.0 * windowCount / (double) elapsed; + double mbPerSec = 1000.0 * this.windowBytes / (double) elapsed / (1024.0 * 1024.0); + System.out.printf("%d records sent, %.1f records/sec (%.2f MB/sec), %.1f ms avg latency, %.1f ms max latency.%n", + windowCount, + recsPerSec, + mbPerSec, + windowTotalLatency / (double) windowCount, + (double) windowMaxLatency); + } + + public void newWindow() { + this.windowStart = System.currentTimeMillis(); + this.windowCount = 0; + this.windowMaxLatency = 0; + this.windowTotalLatency = 0; + this.windowBytes = 0; + } + + public void printTotal() { + long elapsed = System.currentTimeMillis() - start; + double recsPerSec = 1000.0 * count / (double) elapsed; + double mbPerSec = 1000.0 * this.bytes / (double) elapsed / (1024.0 * 1024.0); + int[] percs = percentiles(this.latencies, index, 0.5, 0.95, 0.99, 0.999); + System.out.printf("%d%s records sent, %f records/sec (%.2f MB/sec), %.2f ms avg latency, %.2f ms max latency, %d ms 50th, %d ms 95th, %d ms 99th, %d ms 99.9th.%n", + count, + this.isSteadyState ? " steady state" : "", + recsPerSec, + mbPerSec, + totalLatency / (double) count, + (double) maxLatency, + percs[0], + percs[1], + percs[2], + percs[3]); + } + + private static int[] percentiles(int[] latencies, int count, double... percentiles) { + int size = Math.min(count, latencies.length); + Arrays.sort(latencies, 0, size); + int[] values = new int[percentiles.length]; + for (int i = 0; i < percentiles.length; i++) { + int index = (int) (percentiles[i] * size); + values[i] = latencies[index]; + } + return values; + } + + public void suppressPrinting() { + this.suppressPrint = true; + } + } + + static final class PerfCallback implements Callback { + private final long start; + private final int bytes; + private final Stats stats; + private final Stats steadyStateStats; + + public PerfCallback(long start, int bytes, Stats stats, Stats steadyStateStats) { + this.start = start; + this.stats = stats; + this.steadyStateStats = steadyStateStats; + this.bytes = bytes; + } + + public void onCompletion(RecordMetadata metadata, Exception exception) { + long now = System.currentTimeMillis(); + int latency = (int) (now - start); + // It will only be counted when the sending is successful, otherwise the number of sent records may be + // magically printed when the sending fails. + if (exception == null) { + this.stats.record(latency, bytes, now); + if (steadyStateStats != null) { + this.steadyStateStats.record(latency, bytes, now); + } + } + if (exception != null) + exception.printStackTrace(); + } + } + + static final class ConfigPostProcessor { + final String topicName; + final long numRecords; + final long warmupRecords; + final Integer recordSize; + final double throughput; + final boolean payloadMonotonic; + final Properties producerProps; + final boolean shouldPrintMetrics; + final Long transactionDurationMs; + final boolean transactionsEnabled; + final List payloadByteList; + + public ConfigPostProcessor(ArgumentParser parser, String[] args) throws IOException, ArgumentParserException { + Namespace namespace = parser.parseArgs(args); + this.topicName = namespace.getString("topic"); + this.numRecords = namespace.getLong("numRecords"); + this.warmupRecords = Math.max(namespace.getLong("warmupRecords"), 0); + this.recordSize = namespace.getInt("recordSize"); + this.throughput = namespace.getDouble("throughput"); + this.payloadMonotonic = namespace.getBoolean("payloadMonotonic"); + this.shouldPrintMetrics = namespace.getBoolean("printMetrics"); + + List producerConfigs = namespace.getList("producerConfig"); + String producerConfigFile = namespace.getString("producerConfigFile"); + String payloadFilePath = namespace.getString("payloadFile"); + Long transactionDurationMsArg = namespace.getLong("transactionDurationMs"); + String transactionIdArg = namespace.getString("transactionalId"); + if (numRecords <= 0) { + throw new ArgumentParserException("--num-records should be greater than zero", parser); + } + if (warmupRecords >= numRecords) { + throw new ArgumentParserException("The value for --warmup-records must be strictly fewer than the number of records in the test, --num-records.", parser); + } + if (recordSize != null && recordSize <= 0) { + throw new ArgumentParserException("--record-size should be greater than zero", parser); + } + if (producerConfigs == null && producerConfigFile == null) { + throw new ArgumentParserException("Either --producer-props or --producer.config must be specified.", parser); + } + if (transactionDurationMsArg != null && transactionDurationMsArg <= 0) { + throw new ArgumentParserException("--transaction-duration-ms should be greater than zero", parser); + } + + // since default value gets printed with the help text, we are escaping \n there and replacing it with correct value here. + String payloadDelimiter = namespace.getString("payloadDelimiter").equals("\\n") + ? "\n" : namespace.getString("payloadDelimiter"); + this.payloadByteList = readPayloadFile(payloadFilePath, payloadDelimiter); + this.producerProps = readProps(producerConfigs, producerConfigFile); + // setup transaction related configs + this.transactionsEnabled = transactionDurationMsArg != null + || transactionIdArg != null + || producerProps.containsKey(ProducerConfig.TRANSACTIONAL_ID_CONFIG); + if (transactionsEnabled) { + Optional txIdInProps = + Optional.ofNullable(producerProps.get(ProducerConfig.TRANSACTIONAL_ID_CONFIG)) + .map(Object::toString); + String transactionId = Optional.ofNullable(transactionIdArg).orElse(txIdInProps.orElse(DEFAULT_TRANSACTION_ID_PREFIX + Uuid.randomUuid().toString())); + producerProps.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, transactionId); + + if (transactionDurationMsArg == null) { + transactionDurationMsArg = DEFAULT_TRANSACTION_DURATION_MS; + } + } + this.transactionDurationMs = transactionDurationMsArg; + } + } +} diff --git a/ExecutionUtilities/high-throughput-kafka-producer/src/main.rs b/ExecutionUtilities/high-throughput-kafka-producer/src/main.rs new file mode 100644 index 0000000..e7e06f6 --- /dev/null +++ b/ExecutionUtilities/high-throughput-kafka-producer/src/main.rs @@ -0,0 +1,556 @@ +use anyhow::Result; +use clap::Parser; +use futures::future::join_all; +use itertools::Itertools; +use rand::seq::SliceRandom; +use rand::{thread_rng, Rng}; +use rdkafka::admin::{AdminClient, AdminOptions, NewTopic, TopicReplication}; +use rdkafka::config::ClientConfig; +use rdkafka::producer::{FutureProducer, FutureRecord, Producer}; +use serde_json; +use std::collections::HashMap; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Arc; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; +use tokio::time::sleep; +use tracing::{error, info, warn}; + +#[derive(Debug, Clone)] +struct PrometheusTemplate { + template: String, + metric_name: String, +} + +impl PrometheusTemplate { + fn new(metric_name: String, labels: &[String]) -> Self { + // Create JSON template with placeholders + let mut label_parts = Vec::new(); + let label_keys = ["hostname", "location", "application_name", "instance", "job"]; + + for (i, key) in label_keys.iter().enumerate() { + if i < labels.len() { + label_parts.push(format!("\"{}\": \"{}\"", key, labels[i])); + } + } + let labels_json = label_parts.join(", "); + + let template = format!( + "{{\"metric_name\": \"{}\", \"timestamp\": {{TIMESTAMP}}, \"value\": {{VALUE}}, \"labels\": {{{}}}}}", + metric_name, labels_json + ); + + Self { + template, + metric_name, + } + } + + fn generate_message(&self, timestamp: u64, value: f64) -> Vec { + // Fast string replacement instead of JSON serialization + let mut result = self.template.clone(); + result = result.replace("{TIMESTAMP}", ×tamp.to_string()); + result = result.replace("{VALUE}", &format!("{:.2}", value)); + result.into_bytes() + } +} + +#[derive(Debug, Clone)] +struct LabelChoices { + hostname: Vec, + location: Vec, + application_name: Vec, + instance: Vec, + job: Vec, +} + +impl Default for LabelChoices { + fn default() -> Self { + Self { + hostname: vec![ + "host1".to_string(), + "host2".to_string(), + "host3".to_string(), + "host4".to_string(), + "host5".to_string(), + ], + location: vec![ + "us-east".to_string(), + "us-west".to_string(), + "eu-central".to_string(), + "ap-southeast".to_string(), + ], + application_name: vec![ + "app1".to_string(), + "app2".to_string(), + "app3".to_string(), + "app4".to_string(), + ], + instance: vec![ + "worker1".to_string(), + "worker2".to_string(), + "worker3".to_string(), + "worker4".to_string(), + ], + job: vec![ + "throughput-test".to_string(), + "latency-test".to_string(), + "stress-test".to_string(), + ], + } + } +} + +static METRIC_NAMES: &[&str] = &[ + "cpu_usage", + "memory_usage", + "network_throughput", + "disk_iops", + "response_time", + "error_rate", +]; + +#[derive(Debug, Clone)] +struct ProducerStats { + messages_sent: Arc, + bytes_sent: Arc, + errors: Arc, +} + +impl ProducerStats { + fn new() -> Self { + Self { + messages_sent: Arc::new(AtomicU64::new(0)), + bytes_sent: Arc::new(AtomicU64::new(0)), + errors: Arc::new(AtomicU64::new(0)), + } + } + + fn add_message(&self, bytes: u64) { + self.messages_sent.fetch_add(1, Ordering::Relaxed); + self.bytes_sent.fetch_add(bytes, Ordering::Relaxed); + } + + fn add_error(&self) { + self.errors.fetch_add(1, Ordering::Relaxed); + } + + fn get_stats(&self) -> (u64, u64, u64) { + ( + self.messages_sent.load(Ordering::Relaxed), + self.bytes_sent.load(Ordering::Relaxed), + self.errors.load(Ordering::Relaxed), + ) + } +} + +#[derive(Parser, Debug)] +#[command(name = "kafka-throughput-producer")] +#[command(about = "High-performance Kafka producer for Arroyo benchmarking")] +struct Args { + #[arg(long, default_value = "localhost:9092")] + kafka_broker: String, + + #[arg(long)] + kafka_topic: String, + + #[arg(long, default_value = "1000000")] + total_messages: u64, + + #[arg(long, default_value = "10000")] + messages_per_second: u64, + + #[arg(long)] + duration: Option, + + #[arg(long, default_value = "1")] + num_threads: usize, + + #[arg(long, default_value = "1")] + num_partitions: i32, + + #[arg(long, default_value = "1")] + replication_factor: i32, + + #[arg(long)] + vary_labels: bool, + + #[arg(long, default_value = "false")] + enable_flush: bool, + + #[arg(long, default_value = "none")] + compression: String, + + #[arg(long, default_value = "65536")] + batch_size: usize, +} + +#[derive(Clone)] +struct HighThroughputProducer { + producer: FutureProducer, + topic_name: String, + stats: ProducerStats, + templates: Arc>, + label_choices: LabelChoices, +} + + +impl HighThroughputProducer { + async fn new_with_compression( + kafka_broker: &str, + topic_name: String, + num_partitions: i32, + replication_factor: i32, + compression: &str, + ) -> Result { + // High-performance producer configuration optimized for throughput + let producer: FutureProducer = ClientConfig::new() + .set("bootstrap.servers", kafka_broker) + .set("linger.ms", "5") + .set("batch.size", "1048576") // 1MB batches + .set("compression.type", compression) + .set("queue.buffering.max.messages", "1000000") + .set("queue.buffering.max.kbytes", "2097152") // 2GB + .set("batch.num.messages", "10000") + .set("acks", "0") // No acknowledgments for max throughput + .set("retries", "0") // No retries for max throughput + .set("message.max.bytes", "1048576") // 1MB + .set("queue.buffering.max.ms", "10") + .set("delivery.timeout.ms", "30000") + .create()?; + + // Pre-generate templates for all label combinations + let label_choices = LabelChoices::default(); + let all_labels = Self::generate_all_label_combinations_static(&label_choices); + let mut templates = Vec::new(); + + for metric_name in METRIC_NAMES { + for labels in &all_labels { + templates.push(PrometheusTemplate::new(metric_name.to_string(), labels)); + } + } + + let kafka_producer = Self { + producer, + topic_name: topic_name.clone(), + stats: ProducerStats::new(), + templates: Arc::new(templates), + label_choices, + }; + + kafka_producer + .create_topic_if_not_exists(kafka_broker, &topic_name, num_partitions, replication_factor) + .await?; + + Ok(kafka_producer) + } + + async fn create_topic_if_not_exists( + &self, + kafka_broker: &str, + topic_name: &str, + num_partitions: i32, + replication_factor: i32, + ) -> Result<()> { + let admin: AdminClient<_> = ClientConfig::new() + .set("bootstrap.servers", kafka_broker) + .create()?; + + let metadata = admin.inner().fetch_metadata(None, Duration::from_secs(10))?; + + let topic_exists = metadata.topics().iter().any(|t| t.name() == topic_name); + + if !topic_exists { + let new_topic = NewTopic::new( + topic_name, + num_partitions, + TopicReplication::Fixed(replication_factor), + ); + + let opts = AdminOptions::new().request_timeout(Some(Duration::from_secs(10))); + let results = admin.create_topics(&[new_topic], &opts).await?; + + for result in results { + match result { + Ok(topic) => info!("Created topic: {}", topic), + Err((topic, error)) => { + error!("Failed to create topic {}: {}", topic, error); + return Err(anyhow::anyhow!("Topic creation failed")); + } + } + } + + // Wait for topic creation to propagate + sleep(Duration::from_secs(2)).await; + info!("Topic '{}' created with {} partitions", topic_name, num_partitions); + } + + Ok(()) + } + + fn generate_fast_prometheus_message(&self) -> Vec { + let mut rng = thread_rng(); + + // Select random template (pre-built with metric name and labels) + let template = self.templates.choose(&mut rng).unwrap(); + + let timestamp = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_millis() as u64; + + let value = match template.metric_name.as_str() { + "cpu_usage" | "memory_usage" => rng.gen_range(0.0..100.0), + "network_throughput" => rng.gen_range(1000.0..10000.0), + "disk_iops" => rng.gen_range(100.0..5000.0), + "response_time" => rng.gen_range(0.1..1000.0), + "error_rate" => rng.gen_range(0.0..5.0), + _ => rng.gen_range(0.0..1000.0), + }; + + template.generate_message(timestamp, value) + } + + fn generate_prometheus_metric(&self, labels: &[String]) -> Result> { + let mut rng = thread_rng(); + + let metric_name = METRIC_NAMES.choose(&mut rng).unwrap().to_string(); + let timestamp = SystemTime::now().duration_since(UNIX_EPOCH)?.as_millis() as u64; + + let value = match metric_name.as_str() { + "cpu_usage" | "memory_usage" => rng.gen_range(0.0..100.0), + "network_throughput" => rng.gen_range(1000.0..10000.0), + "disk_iops" => rng.gen_range(100.0..5000.0), + "response_time" => rng.gen_range(0.1..1000.0), + "error_rate" => rng.gen_range(0.0..5.0), + _ => rng.gen_range(0.0..1000.0), + }; + + let label_keys = ["hostname", "location", "application_name", "instance", "job"]; + let mut label_map = HashMap::new(); + + for (i, key) in label_keys.iter().enumerate() { + if i < labels.len() { + label_map.insert(key.to_string(), serde_json::Value::String(labels[i].clone())); + } + } + + let mut metric = HashMap::new(); + metric.insert("metric_name".to_string(), serde_json::Value::String(metric_name)); + metric.insert("timestamp".to_string(), serde_json::Value::Number(serde_json::Number::from(timestamp))); + metric.insert("value".to_string(), serde_json::Value::Number(serde_json::Number::from_f64(value).unwrap())); + metric.insert("labels".to_string(), serde_json::Value::Object(label_map.into_iter().collect())); + + Ok(metric) + } + + async fn produce_message_batch( + &self, + batch: Vec<(String, Vec)>, + ) -> Result<()> { + let mut futures = Vec::new(); + + for (partition_key, labels) in batch { + let metric = self.generate_prometheus_metric(&labels)?; + let message_data = serde_json::to_vec(&metric)?; + let message_size = message_data.len(); + + let stats = self.stats.clone(); + let producer = self.producer.clone(); + let topic_name = self.topic_name.clone(); + + let future = async move { + let record = FutureRecord::to(&topic_name) + .key(&partition_key) + .payload(&message_data); + + match producer.send(record, Duration::from_secs(10)).await { + Ok((partition, offset)) => { + stats.add_message(message_size as u64); + Ok((partition, offset)) + } + Err((kafka_error, message)) => { + stats.add_error(); + warn!("Failed to send message: {}", kafka_error); + Err((kafka_error, message)) + } + } + }; + futures.push(future); + } + + let _results = join_all(futures).await; + Ok(()) + } + + + fn generate_all_label_combinations_static(label_choices: &LabelChoices) -> Vec> { + let label_values = vec![ + &label_choices.hostname, + &label_choices.location, + &label_choices.application_name, + &label_choices.instance, + &label_choices.job, + ]; + + label_values + .into_iter() + .multi_cartesian_product() + .map(|combo| combo.into_iter().cloned().collect()) + .collect() + } + + fn generate_all_label_combinations(&self) -> Vec> { + let label_values = vec![ + &self.label_choices.hostname, + &self.label_choices.location, + &self.label_choices.application_name, + &self.label_choices.instance, + &self.label_choices.job, + ]; + + label_values + .into_iter() + .multi_cartesian_product() + .map(|combo| combo.into_iter().cloned().collect()) + .collect() + } + + async fn run_benchmark( + &self, + args: &Args, + ) -> Result<()> { + info!( + "Starting benchmark: {} messages at {} msg/s using {} threads", + args.total_messages, args.messages_per_second, args.num_threads + ); + info!("Producer initialized with {} pre-generated templates", self.templates.len()); + info!("🚀 Data generation started!"); + + let all_labels = self.generate_all_label_combinations(); + let start_time = Instant::now(); + let mut messages_sent = 0u64; + + let messages_per_interval = args.messages_per_second; + let batch_size = std::cmp::max(1, args.batch_size); + let interval = Duration::from_secs(1); + + while messages_sent < args.total_messages { + if let Some(duration) = args.duration { + if start_time.elapsed().as_secs() > duration { + break; + } + } + + let interval_start = Instant::now(); + + // Select labels for this interval + let labels_subset = if args.vary_labels { + let mut rng = thread_rng(); + let num_labels = rng.gen_range(1..=std::cmp::min(all_labels.len(), messages_per_interval as usize)); + all_labels.choose_multiple(&mut rng, num_labels).cloned().collect::>() + } else { + all_labels[..std::cmp::min(all_labels.len(), messages_per_interval as usize)].to_vec() + }; + + // Create batches for parallel processing + let mut tasks = Vec::new(); + let mut remaining_messages = std::cmp::min( + messages_per_interval, + args.total_messages - messages_sent + ); + + while remaining_messages > 0 && tasks.len() < args.num_threads { + let current_batch_size = std::cmp::min(batch_size as u64, remaining_messages) as usize; + let batch: Vec<(String, Vec)> = (0..current_batch_size) + .map(|i| { + let labels = &labels_subset[i % labels_subset.len()]; + let partition_key = format!("{}_{}", labels[0], labels[1]); + (partition_key, labels.clone()) + }) + .collect(); + + // Clone necessary data for the async task + let producer = self.producer.clone(); + let topic_name = self.topic_name.clone(); + let stats = self.stats.clone(); + let label_choices = self.label_choices.clone(); + + tasks.push(tokio::spawn(async move { + let temp_producer = HighThroughputProducer { + producer, + topic_name, + stats, + templates: Arc::new(Vec::new()), // Empty templates for batch producer + label_choices, + }; + temp_producer.produce_message_batch(batch).await + })); + + remaining_messages -= current_batch_size as u64; + messages_sent += current_batch_size as u64; + } + + // Wait for all tasks to complete + for task in tasks { + if let Err(e) = task.await? { + error!("Batch processing failed: {}", e); + } + } + + // Rate limiting + let elapsed = interval_start.elapsed(); + if elapsed < interval { + sleep(interval - elapsed).await; + } + + // Print progress + if messages_sent % (args.messages_per_second) == 0 { + self.print_stats(start_time); + } + } + + // Final flush - wait for all messages to be delivered + info!("Flushing remaining messages..."); + if let Err(e) = self.producer.flush(Duration::from_secs(30)) { + warn!("Error during flush: {}", e); + } + + info!("Benchmark completed!"); + self.print_stats(start_time); + + Ok(()) + } + + fn print_stats(&self, start_time: Instant) { + let (messages, bytes, errors) = self.stats.get_stats(); + let elapsed = start_time.elapsed().as_secs_f64(); + + let rate = if elapsed > 0.0 { messages as f64 / elapsed } else { 0.0 }; + let throughput_mb = if elapsed > 0.0 { + (bytes as f64 / (1024.0 * 1024.0)) / elapsed + } else { + 0.0 + }; + + info!( + "Messages: {}, Rate: {:.2} msg/s, Throughput: {:.2} MB/s, Errors: {}", + messages, rate, throughput_mb, errors + ); + } +} + +#[tokio::main] +async fn main() -> Result<()> { + tracing_subscriber::fmt::init(); + + let args = Args::parse(); + + let producer = HighThroughputProducer::new_with_compression( + &args.kafka_broker, + args.kafka_topic.clone(), + args.num_partitions, + args.replication_factor, + &args.compression, + ).await?; + + producer.run_benchmark(&args).await?; + + Ok(()) +} diff --git a/ExecutionUtilities/kafka-consumer/Cargo.lock b/ExecutionUtilities/kafka-consumer/Cargo.lock new file mode 100644 index 0000000..63bdf55 --- /dev/null +++ b/ExecutionUtilities/kafka-consumer/Cargo.lock @@ -0,0 +1,611 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "backtrace" +version = "0.3.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", +] + +[[package]] +name = "bitflags" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" + +[[package]] +name = "bytes" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" + +[[package]] +name = "cc" +version = "1.2.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "pin-utils", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "hashbrown" +version = "0.15.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" + +[[package]] +name = "indexmap" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "io-uring" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "kafka-throughput-consumer" +version = "0.1.0" +dependencies = [ + "rdkafka", + "tokio", +] + +[[package]] +name = "libc" +version = "0.2.174" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" + +[[package]] +name = "libz-sys" +version = "1.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "lock_api" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.59.0", +] + +[[package]] +name = "num_enum" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a973b4e44ce6cad84ce69d797acf9a044532e4184c4f267913d1b546a0727b7a" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77e878c846a8abae00dd069496dbe8751b16ac1c3d6bd2a7283a938e8228f90d" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "memchr", +] + +[[package]] +name = "parking_lot" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "proc-macro-crate" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro2" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rdkafka" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1beea247b9a7600a81d4cc33f659ce1a77e1988323d7d2809c7ed1c21f4c316d" +dependencies = [ + "futures-channel", + "futures-util", + "libc", + "log", + "rdkafka-sys", + "serde", + "serde_derive", + "serde_json", + "slab", + "tokio", +] + +[[package]] +name = "rdkafka-sys" +version = "4.9.0+2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5230dca48bc354d718269f3e4353280e188b610f7af7e2fcf54b7a79d5802872" +dependencies = [ + "libc", + "libz-sys", + "num_enum", + "pkg-config", +] + +[[package]] +name = "redox_syscall" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e8af0dde094006011e6a740d4879319439489813bd0bcdc7d821beaeeff48ec" +dependencies = [ + "bitflags", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f" + +[[package]] +name = "rustversion" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.141" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410" +dependencies = [ + "libc", +] + +[[package]] +name = "slab" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "syn" +version = "2.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tokio" +version = "1.46.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17" +dependencies = [ + "backtrace", + "bytes", + "io-uring", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "slab", + "socket2", + "tokio-macros", + "windows-sys 0.52.0", +] + +[[package]] +name = "tokio-macros" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95" +dependencies = [ + "memchr", +] diff --git a/ExecutionUtilities/kafka-consumer/Cargo.toml b/ExecutionUtilities/kafka-consumer/Cargo.toml new file mode 100644 index 0000000..d40d77f --- /dev/null +++ b/ExecutionUtilities/kafka-consumer/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "kafka-throughput-consumer" +version = "0.1.0" +edition = "2021" + +[dependencies] +rdkafka = "0.36" +tokio = { version = "1.0", features = ["full"] } diff --git a/ExecutionUtilities/kafka-consumer/src/main.rs b/ExecutionUtilities/kafka-consumer/src/main.rs new file mode 100644 index 0000000..1ae08b8 --- /dev/null +++ b/ExecutionUtilities/kafka-consumer/src/main.rs @@ -0,0 +1,84 @@ +use rdkafka::config::ClientConfig; +use rdkafka::consumer::{Consumer, StreamConsumer}; +use rdkafka::message::Message; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use tokio::signal; +use tokio::time::interval; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let consumer: StreamConsumer = ClientConfig::new() + .set("group.id", "throughput-test-group") + .set("bootstrap.servers", "localhost:9092") + .set("auto.offset.reset", "earliest") + // Performance optimizations + .set("fetch.min.bytes", "50000") + .set("fetch.wait.max.ms", "500") + .set("queued.min.messages", "100000") + .set("receive.message.max.bytes", "100000000") + .set("enable.auto.commit", "true") + .create()?; + + consumer.subscribe(&["test_input"])?; + + let message_count = Arc::new(AtomicU64::new(0)); + let total_bytes = Arc::new(AtomicU64::new(0)); + let start_time = Instant::now(); + + // Stats reporting task + let stats_count = Arc::clone(&message_count); + let stats_bytes = Arc::clone(&total_bytes); + let stats_start = start_time; + + tokio::spawn(async move { + let mut interval = interval(Duration::from_secs(5)); + loop { + interval.tick().await; + print_stats(&stats_count, &stats_bytes, stats_start); + } + }); + + // Main consumer loop + loop { + tokio::select! { + message = consumer.recv() => { + match message { + Ok(msg) => { + message_count.fetch_add(1, Ordering::Relaxed); + if let Some(payload) = msg.payload() { + total_bytes.fetch_add(payload.len() as u64, Ordering::Relaxed); + } + } + Err(e) => eprintln!("Error receiving message: {}", e), + } + } + _ = signal::ctrl_c() => { + println!("\n=== Final Statistics ==="); + print_stats(&message_count, &total_bytes, start_time); + break; + } + } + } + + Ok(()) +} + +fn print_stats( + message_count: &Arc, + total_bytes: &Arc, + start_time: Instant, +) { + let elapsed = start_time.elapsed().as_millis() as f64; + let count = message_count.load(Ordering::Relaxed) as f64; + let bytes = total_bytes.load(Ordering::Relaxed) as f64; + + let messages_per_sec = (count * 1000.0) / elapsed; + let mb_per_sec = (bytes * 1000.0) / (elapsed * 1024.0 * 1024.0); + + println!( + "Messages: {}, Rate: {:.2} msg/s, Throughput: {:.2} MB/s", + count as u64, messages_per_sec, mb_per_sec + ); +} diff --git a/ExecutionUtilities/kafka_toy_data_generator/main.py b/ExecutionUtilities/kafka_toy_data_generator/main.py new file mode 100644 index 0000000..9bc32d2 --- /dev/null +++ b/ExecutionUtilities/kafka_toy_data_generator/main.py @@ -0,0 +1,118 @@ +import argparse +import json +import time +import pytz +import random +import itertools +from datetime import datetime +from confluent_kafka import Producer, admin + +random.seed(42) + +LABEL_CHOICES = { + "hostname": ["host1", "host2", "host3"], + "location": ["us-east", "us-west", "eu-central"], + "application_name": ["app1", "app2", "app3"], +} +METRIC_NAME = "cpu_usage" + + +def create_topic_if_not_exists(producer, topic): + admin_client = admin.AdminClient( + {"bootstrap.servers": producer.list_topics().brokers} + ) + topic_metadata = admin_client.list_topics(timeout=10) + if topic not in topic_metadata.topics: + new_topic = admin.NewTopic(topic, num_partitions=1, replication_factor=1) + admin_client.create_topics([new_topic]) + print(f"Topic '{topic}' created.") + + +def generate_data(labels): + label_names = list(LABEL_CHOICES.keys()) + labels = {label_names[i]: labels[i] for i in range(len(label_names))} + + metric_name = METRIC_NAME + + local_datetime = datetime.now() + utc_datetime = local_datetime.astimezone(pytz.utc) + + data = { + "labels": labels, + "value": random.uniform(0, 100), + "name": metric_name, + "timestamp": utc_datetime.strftime("%Y-%m-%dT%H:%M:%S.") + + f"{utc_datetime.microsecond // 1000:03d}Z", + } + return json.dumps(data).encode("utf-8") + + +def main(args): + label_names = list(LABEL_CHOICES.keys()) + total_combinations = 1 + for label_name in label_names: + total_combinations *= len(LABEL_CHOICES[label_name]) + + if args.data_points > total_combinations: + raise ValueError( + "data_points cannot be greater than the number of possible combinations for labels" + ) + + producer = Producer({"bootstrap.servers": args.kafka_broker}) + create_topic_if_not_exists(producer, args.kafka_topic) + + # Generate all possible label combinations dynamically + label_values = [LABEL_CHOICES[label_name] for label_name in label_names] + all_labels = list(itertools.product(*label_values)) + + num_labels = args.data_points + + while True: + for idx in range(num_labels): + data = generate_data(labels=all_labels[idx]) + producer.produce(args.kafka_topic, value=data) + producer.flush() + + if args.debug_print: + print(data) + + if args.debug_print: + print("-" * 50) + time.sleep(args.frequency) + + if args.vary_labels: + num_labels += random.randint(-1, 1) + if num_labels < 1: + num_labels = 1 + if num_labels > total_combinations: + num_labels = total_combinations + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--kafka_broker", type=str, default="localhost:9092") + parser.add_argument( + "--debug_print", + action="store_true", + help="Print data to console instead of sending to Kafka", + ) + parser.add_argument("--kafka_topic", type=str, required=True) + parser.add_argument( + "--frequency", + type=int, + default=1, + help="Frequency in seconds to dump data to Kafka", + ) + parser.add_argument( + "--data_points", + type=int, + required=True, + help="Number of data points to dump at each frequency interval", + ) + parser.add_argument( + "--vary_labels", + action="store_true", + help="Vary the number of labels to dump data for", + ) + args = parser.parse_args() + main(args) diff --git a/ExecutionUtilities/kafka_toy_data_generator/requirements.txt b/ExecutionUtilities/kafka_toy_data_generator/requirements.txt new file mode 100644 index 0000000..95854a4 --- /dev/null +++ b/ExecutionUtilities/kafka_toy_data_generator/requirements.txt @@ -0,0 +1 @@ +confluent-kafka==2.3.0 diff --git a/PrometheusClient/.dockerignore b/PrometheusClient/.dockerignore new file mode 100644 index 0000000..ec71510 --- /dev/null +++ b/PrometheusClient/.dockerignore @@ -0,0 +1,16 @@ +.DS_Store +__pycache__/ +*.pyc +*.pyo +*.pyd +.Python +*.log +outputs/ +bin/ +lib/ +share/ +pyvenv.cfg +.git/ +.gitignore +README.md +*.md diff --git a/PrometheusClient/.gitignore b/PrometheusClient/.gitignore new file mode 100644 index 0000000..3dbed02 --- /dev/null +++ b/PrometheusClient/.gitignore @@ -0,0 +1,2 @@ +**/*.pyc +**/__pycache__ diff --git a/PrometheusClient/Dockerfile b/PrometheusClient/Dockerfile new file mode 100644 index 0000000..7703652 --- /dev/null +++ b/PrometheusClient/Dockerfile @@ -0,0 +1,13 @@ +FROM sketchdb-base:latest + +LABEL maintainer="SketchDB Team" +LABEL description="Prometheus Client for SketchDB" + +# Copy requirements and install Python dependencies +COPY . . +RUN pip install --upgrade pip +RUN pip install --no-cache-dir -r requirements.txt +RUN apt-get update && apt-get install -y sudo + +# Run with host network access for communicating with host services +ENTRYPOINT ["python", "main_prometheus_client.py"] diff --git a/PrometheusClient/LICENSE b/PrometheusClient/LICENSE new file mode 100644 index 0000000..404d657 --- /dev/null +++ b/PrometheusClient/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 SketchDB + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/PrometheusClient/classes/QueryLatencyExporter.py b/PrometheusClient/classes/QueryLatencyExporter.py new file mode 100644 index 0000000..405a8db --- /dev/null +++ b/PrometheusClient/classes/QueryLatencyExporter.py @@ -0,0 +1,171 @@ +from typing import Any, Dict, Optional, Union +from prometheus_client import start_http_server, Gauge +from loguru import logger + + +class QueryLatencyExporter: + + @staticmethod + def _IP_valid(addr: Optional[Union[str, object]]) -> None: + """ + Verifies that a given ip address is of the correct type and is a "valid" + IP address for running the exporter. At the moment, this function considers + any properly formatted IP address as valid + """ + if addr is None: + raise TypeError("IP address cannot be None") + elif not isinstance(addr, str): + raise TypeError("IP address must be a string") + elif addr == "localhost": + return + + addr_nums = addr.split(sep=".") + if len(addr_nums) != 4: + raise ValueError("Improperly formatted IPv4 address") + for num_str in addr_nums: + if int(num_str) < 0 or int(num_str) > 255: + raise ValueError("Improperly formatted IPv4 address") + + @staticmethod + def _port_valid(port: Optional[Union[int, object]]) -> None: + """ + Verifies that a given ip address is of the correct type and is a "valid" + IP address for running the exporter. At the moment, this function considers + any properly formatted IP address as valid + """ + if port is None: + raise TypeError("Port cannot be None") + elif not isinstance(port, int): + raise TypeError("Port must be an integer") + elif port < 0 or port > 65535: + raise ValueError("Improperly formatted port") + + def __init__(self, addr: str, port: int): + self.logger = logger.bind(module="query_latency_exporter") + self.port = port + self.addr = addr + + self.http_server: Optional[Any] = None + self.server_thread: Optional[Any] = None + + try: + QueryLatencyExporter._IP_valid(self.addr) + QueryLatencyExporter._port_valid(self.port) + except (TypeError, ValueError) as e: + self.logger.error(f"Failed to create QueryLatencyExporter: {str(e)}") + raise e + + self.latencies_metric = Gauge( + "query_latencies", "Query latencies", labelnames=["query_index", "server"] + ) + self.cumulative_latencies_metric = Gauge( + "cumulative_query_latencies", + "Query cumulative latencies", + labelnames=["query_index", "server"], + ) + self.logger.info("QueryLatencyExporter object created") + + def __enter__(self) -> "QueryLatencyExporter": + return self + + def __exit__(self, *args: object) -> None: + self.shutdown() + + def launch(self) -> None: + """ + Launches the exporter's http_server and server thread for exporting metrics + to be scraped by Prometheus + """ + if self.addr is None: + self.logger.error("Launch failed: Exporter IP address is None") + raise RuntimeError("Latency exporter failed to launch: exporter IP is None") + elif self.port is None: + self.logger.error("Launch failed: Exporter port is None") + raise RuntimeError( + "Latency exporter failed to launch: exporter port is None" + ) + + self.logger.info(f"Launching latency exporter at {self.addr}: {self.port}") + + try: + result = start_http_server(addr=self.addr, port=self.port) + assert result is not None + self.http_server, self.server_thread = result + except Exception as e: + self.logger.error(f"Failed to start http server due to exception: {str(e)}") + # e.add_note is only available in Python 3.11+, commenting out for compatibility + # e.add_note("Latency exporter failed to launch") + raise e + + self.logger.info(f"Exporter successfully started at {self.addr}: {self.port}") + print(f"Exporter running at {self.addr}: {self.port}") + + def shutdown(self) -> None: + """ + Cleans up all resources associated with the exporter, mainly the + http_server and corresponding server thread + """ + print("Shutting down latency exporter server and joining server thread...") + + self.logger.info("Shutting down server...") + if self.http_server is not None: + try: + self.http_server.shutdown() + except Exception as e: + self.logger.error(f"Error shutting down http_server: {str(e)}") + # e.add_note is only available in Python 3.11+, commenting out for compatibility + # e.add_note("Attempt to shutdown exporter http_server failed.") + raise e + self.logger.info("Shut down server successfully") + else: + self.logger.error("Exporter http_server is None") + raise RuntimeError("Exporter http_server is None") + + self.logger.info("Joining server thread...") + if self.server_thread is not None: + try: + self.server_thread.join() + except Exception as e: + self.logger.error(f"Error joining server thread: {str(e)}") + # e.add_note is only available in Python 3.11+, commenting out for compatibility + # e.add_note("Attempt to join exporter's server thread failed.") + raise e + self.logger.info("Joined server thread successfully") + else: + self.logger.error("Exporter server thread is None") + raise RuntimeError("Exporter server thread is None") + + print("Exporter shut down successfully") + + def export_repetition(self, repetition_idx: int, result: Dict[str, Any]) -> None: + """ + Exports a single repetition result for all queries + """ + if not isinstance(repetition_idx, int): + self.logger.error("Given non-integer repetition_idx") + raise TypeError("Repetition index must be an integer") + + self.logger.trace(f"Updating metrics for repetition no.{repetition_idx}") + + if result is None: + self.logger.error("Repetition result is None") + raise TypeError("Repetition result is None") + + for server_name in result: + for query_idx in result[server_name]: + query_result_across_time = result[server_name][query_idx] + query_rep_result = query_result_across_time.query_results[ + repetition_idx + ] + latency = query_rep_result.latency + cumulative_latency = query_rep_result.cumulative_latency + + if latency is not None: + self.latencies_metric.labels( + query_index=str(query_idx), server=server_name + ).set(latency) + + if cumulative_latency is not None: + self.cumulative_latencies_metric.labels( + query_index=str(query_idx), server=server_name + ).set(cumulative_latency) diff --git a/PrometheusClient/classes/__init__.py b/PrometheusClient/classes/__init__.py new file mode 100644 index 0000000..485ca37 --- /dev/null +++ b/PrometheusClient/classes/__init__.py @@ -0,0 +1 @@ +# Make classes a proper Python package diff --git a/PrometheusClient/classes/clickhouse_query_client.py b/PrometheusClient/classes/clickhouse_query_client.py new file mode 100644 index 0000000..1ab9785 --- /dev/null +++ b/PrometheusClient/classes/clickhouse_query_client.py @@ -0,0 +1,125 @@ +import requests +from typing import Any, Dict, Optional +from requests.adapters import HTTPAdapter + +from .query_client import QueryClient, QueryResponse + + +class ClickHouseQueryClient(QueryClient): + """Query client for ClickHouse HTTP API.""" + + def __init__( + self, + server_url: str, + server_name: str, + database: str = "default", + user: str = "default", + password: str = "", + timeout: int = 30, + **kwargs: Any, + ): + super().__init__(server_url, server_name) + self.database = database + self.user = user + self.password = password + self.timeout = timeout + + self._session = requests.Session() + if user and password: + self._session.auth = (user, password) + + @property + def protocol_name(self) -> str: + return "clickhouse" + + @property + def session(self) -> requests.Session: + """Access to underlying requests Session for mounting debug adapters.""" + return self._session + + def mount_adapter(self, prefix: str, adapter: HTTPAdapter) -> None: + """Mount an HTTP adapter (e.g., for debug logging).""" + self._session.mount(prefix, adapter) + + def execute_query( + self, + query: str, + query_time: Optional[int] = None, + ) -> QueryResponse: + """ + Execute SQL query via ClickHouse HTTP interface. + + Args: + query: SQL query string (may contain template variables already substituted) + query_time: Not directly used - time filtering should be done via + template substitution before calling this method + + Returns: + QueryResponse with normalized data + """ + try: + params = {"database": self.database} + + formatted_query = query.strip() + + # Reject queries with FORMAT clause - we need raw TSV for parsing + if self._has_format_clause(formatted_query): + return QueryResponse( + success=False, + data=None, + error_message="Queries must not contain FORMAT clause - raw TSV output is required for parsing", + ) + + response = self._session.post( + self.server_url, + params=params, + data=formatted_query.encode("utf-8"), + timeout=self.timeout, + ) + + if response.status_code != 200: + return QueryResponse( + success=False, + data=None, + error_message=f"HTTP {response.status_code}: {response.text}", + raw_response=response.text, + ) + + # Return raw TSV text - will be stored in QueryResult.raw_text_result + return QueryResponse( + success=True, + data=None, + raw_response=response.text, + ) + + except requests.exceptions.Timeout: + return QueryResponse( + success=False, + data=None, + error_message=f"Request timed out after {self.timeout}s", + ) + except Exception as e: + return QueryResponse( + success=False, + data=None, + error_message=f"{type(e).__name__}: {e}", + ) + + def _has_format_clause(self, query: str) -> bool: + """Check if query already has a FORMAT clause.""" + # Simple check - look for FORMAT keyword followed by format name + upper_query = query.upper() + return " FORMAT " in upper_query or "\nFORMAT " in upper_query + + def get_runtime_info(self) -> Optional[Dict[str, Any]]: + """Check ClickHouse availability via ping endpoint.""" + try: + response = self._session.get( + f"{self.server_url}/ping", + timeout=5, + ) + if response.status_code == 200: + return {"status": "ok", "response": response.text.strip()} + except Exception: + pass + return None diff --git a/PrometheusClient/classes/config.py b/PrometheusClient/classes/config.py new file mode 100644 index 0000000..a7fe215 --- /dev/null +++ b/PrometheusClient/classes/config.py @@ -0,0 +1,93 @@ +from typing import List, Dict, Any, Optional + + +class ServerConfig: + def __init__( + self, + name: str, + url: str, + protocol: Optional[str], + # ClickHouse-specific options + database: Optional[str], + user: Optional[str], + password: Optional[str], + ): + self.name = name + self.url = url + self.protocol = protocol + self.database = database + self.user = user + self.password = password + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "ServerConfig": + return cls( + name=data["name"], + url=data["url"], + protocol=data.get("protocol"), + database=data.get("database"), + user=data.get("user"), + password=data.get("password"), + ) + + +class QueryGroupConfig: + def __init__( + self, + id: int, + queries: List[str], + # repetitions: int, + repetition_delay: int, + options: Dict[str, Any], + time_window_seconds: Optional[int], + # starting_delay: int, + # options: Dict[str, Any], + ): + # set defaults + self.starting_delay = 0 + self.repetitions = None + + self.id = id + self.queries = queries + # self.repetitions = repetitions + self.repetition_delay = repetition_delay + self.time_window_seconds = time_window_seconds + self.__dict__.update(options) + # self.starting_delay = starting_delay + # self.options = options + + assert self.repetitions is not None + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "QueryGroupConfig": + # return cls( + # id=data["id"], + # repetitions=data["repetitions"], + # repetition_delay=data["repetition_delay"], + # starting_delay=data["starting_delay"] if "starting_delay" in data else 0, + # options=data["options"], + # queries=data["queries"], + # ) + return cls( + id=data["id"], + queries=data["queries"], + repetition_delay=data["repetition_delay"], + options=data["client_options"], + time_window_seconds=data.get("time_window_seconds"), + ) + + +class Config: + def __init__( + self, servers: List[ServerConfig], query_groups: List[QueryGroupConfig] + ): + self.servers = servers + self.query_groups = query_groups + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "Config": + servers = [ServerConfig.from_dict(server) for server in data["servers"]] + query_groups = [ + QueryGroupConfig.from_dict(group) for group in data["query_groups"] + ] + return cls(servers=servers, query_groups=query_groups) diff --git a/PrometheusClient/classes/prometheus_query_client.py b/PrometheusClient/classes/prometheus_query_client.py new file mode 100644 index 0000000..e3ad868 --- /dev/null +++ b/PrometheusClient/classes/prometheus_query_client.py @@ -0,0 +1,111 @@ +from typing import Any, Dict, Optional +import requests + +from prometheus_api_client import PrometheusConnect +from .query_client import QueryClient, QueryResponse + + +class PrometheusQueryClient(QueryClient): + """Query client for Prometheus HTTP API.""" + + def __init__( + self, + server_url: str, + server_name: str, + disable_ssl: bool = True, + retry: Optional[Any] = None, + **kwargs: Any, + ): + super().__init__(server_url, server_name) + self._client = PrometheusConnect( + url=server_url, + disable_ssl=disable_ssl, + retry=retry, + **kwargs, + ) + + @property + def protocol_name(self) -> str: + return "prometheus" + + @property + def underlying_client(self) -> PrometheusConnect: + """Access to underlying PrometheusConnect for advanced usage (e.g., mounting HTTP adapters).""" + return self._client + + @property + def session(self) -> requests.Session: + """Access to underlying requests Session for mounting debug adapters.""" + return self._client._session + + def execute_query( + self, + query: str, + query_time: Optional[int] = None, + ) -> QueryResponse: + """ + Execute PromQL query via Prometheus HTTP API. + + Args: + query: PromQL query string + query_time: Optional Unix timestamp for point-in-time query + + Returns: + QueryResponse with normalized data + """ + try: + if query_time: + raw_result = self._client.custom_query( + query=query, params={"time": query_time} + ) + else: + raw_result = self._client.custom_query(query=query) + + # Normalize to Dict[frozenset, float] + normalized = self._normalize_response(raw_result) + return QueryResponse( + success=True, + data=normalized, + raw_response=raw_result, + ) + except Exception as e: + return QueryResponse( + success=False, + data=None, + error_message=str(e), + ) + + def _normalize_response(self, raw_result: list) -> Dict[frozenset, float]: + """ + Convert Prometheus response to normalized format. + + Prometheus response format: + [{"metric": {"label1": "value1", ...}, "value": [timestamp, "value_str"]}, ...] + + Returns: + Dict mapping frozenset of labels to float value + """ + result = {} + for item in raw_result: + metric_labels = frozenset(item.get("metric", {}).items()) + value = item.get("value", [None, None]) + if len(value) >= 2 and value[1] is not None: + try: + result[metric_labels] = float(value[1]) + except (ValueError, TypeError): + # Skip non-numeric values (e.g., NaN represented as string) + pass + return result + + def get_runtime_info(self) -> Optional[Dict[str, Any]]: + """Query SketchDB/Prometheus runtime info endpoint.""" + try: + response = requests.get( + f"{self.server_url}/api/v1/status/runtimeinfo", + timeout=10, + ) + if response.status_code == 200: + return response.json().get("data", {}) + except Exception: + pass + return None diff --git a/PrometheusClient/classes/query_client.py b/PrometheusClient/classes/query_client.py new file mode 100644 index 0000000..3c1ebdc --- /dev/null +++ b/PrometheusClient/classes/query_client.py @@ -0,0 +1,63 @@ +from abc import ABC, abstractmethod +from typing import Any, Dict, Optional +from dataclasses import dataclass +import requests + + +@dataclass +class QueryResponse: + """Normalized response from any query backend.""" + + success: bool + data: Optional[Dict[frozenset, float]] # metric_labels -> value + error_message: Optional[str] = None + raw_response: Optional[Any] = None # For debugging + + +class QueryClient(ABC): + """Abstract base class for query protocol adapters.""" + + def __init__(self, server_url: str, server_name: str): + self.server_url = server_url + self.server_name = server_name + + @abstractmethod + def execute_query( + self, + query: str, + query_time: Optional[int] = None, + ) -> QueryResponse: + """ + Execute a query and return normalized response. + + Args: + query: The query string (PromQL, SQL, etc.) + query_time: Optional Unix timestamp for point-in-time queries + + Returns: + QueryResponse with normalized data + """ + pass + + @abstractmethod + def get_runtime_info(self) -> Optional[Dict[str, Any]]: + """ + Get runtime/status info from the backend. + Used for query alignment with SketchDB. + + Returns: + Dict with backend-specific runtime info, or None if unavailable + """ + pass + + @property + @abstractmethod + def protocol_name(self) -> str: + """Return the protocol name (e.g., 'prometheus', 'clickhouse').""" + pass + + @property + @abstractmethod + def session(self) -> requests.Session: + """Access to underlying requests Session for mounting debug adapters.""" + pass diff --git a/PrometheusClient/classes/query_client_factory.py b/PrometheusClient/classes/query_client_factory.py new file mode 100644 index 0000000..b7eefbe --- /dev/null +++ b/PrometheusClient/classes/query_client_factory.py @@ -0,0 +1,63 @@ +from typing import Any, Dict, List, Type + +from .query_client import QueryClient +from .prometheus_query_client import PrometheusQueryClient +from .clickhouse_query_client import ClickHouseQueryClient + + +class QueryClientFactory: + """Factory for creating protocol-specific query clients.""" + + _registry: Dict[str, Type[QueryClient]] = { + "prometheus": PrometheusQueryClient, + "clickhouse": ClickHouseQueryClient, + } + + @classmethod + def register(cls, protocol: str, client_class: Type[QueryClient]) -> None: + """ + Register a new protocol handler. + + Args: + protocol: Protocol name (e.g., 'influxdb') + client_class: QueryClient subclass to handle this protocol + """ + cls._registry[protocol] = client_class + + @classmethod + def create( + cls, + protocol: str, + server_url: str, + server_name: str, + **kwargs: Any, + ) -> QueryClient: + """ + Create a query client for the specified protocol. + + Args: + protocol: Protocol name ('prometheus', 'clickhouse', etc.) + server_url: Backend server URL + server_name: Logical name for the server + **kwargs: Protocol-specific options passed to the client constructor + + Returns: + QueryClient instance + + Raises: + ValueError: If protocol is not supported + """ + if protocol not in cls._registry: + supported = ", ".join(sorted(cls._registry.keys())) + raise ValueError( + f"Unsupported protocol '{protocol}'. " + f"Supported protocols: {supported}" + ) + + client_class = cls._registry[protocol] + return client_class(server_url, server_name, **kwargs) + + @classmethod + def supported_protocols(cls) -> List[str]: + """Return list of supported protocol names.""" + return sorted(cls._registry.keys()) diff --git a/PrometheusClient/classes/query_template.py b/PrometheusClient/classes/query_template.py new file mode 100644 index 0000000..d92881a --- /dev/null +++ b/PrometheusClient/classes/query_template.py @@ -0,0 +1,177 @@ +import time +from datetime import datetime, timezone +from typing import Optional, Set +from dataclasses import dataclass + +from jinja2 import Environment, BaseLoader, TemplateSyntaxError, UndefinedError + + +@dataclass +class TimeRange: + """Represents a query time range with Unix timestamps.""" + + start_time: int # Unix timestamp (seconds) + end_time: int # Unix timestamp (seconds) + + @property + def start_datetime(self) -> str: + """ISO format datetime string for start (UTC).""" + return datetime.fromtimestamp(self.start_time, tz=timezone.utc).strftime( + "%Y-%m-%d %H:%M:%S" + ) + + @property + def end_datetime(self) -> str: + """ISO format datetime string for end (UTC).""" + return datetime.fromtimestamp(self.end_time, tz=timezone.utc).strftime( + "%Y-%m-%d %H:%M:%S" + ) + + @property + def start_time_ms(self) -> int: + """Start time in milliseconds.""" + return self.start_time * 1000 + + @property + def end_time_ms(self) -> int: + """End time in milliseconds.""" + return self.end_time * 1000 + + +class QueryTemplate: + """ + Handles Jinja2 template variable substitution in queries. + + Supported variables: + {{ start_time }} - Unix timestamp in seconds (int) + {{ end_time }} - Unix timestamp in seconds (int) + {{ start_time_ms }} - Unix timestamp in milliseconds (int) + {{ end_time_ms }} - Unix timestamp in milliseconds (int) + {{ start_datetime }} - ISO datetime string (e.g., '2024-01-16 12:00:00') + {{ end_datetime }} - ISO datetime string (e.g., '2024-01-16 12:01:00') + + Example usage: + template = QueryTemplate( + "SELECT * FROM metrics WHERE ts >= {{ start_time }} AND ts < {{ end_time }}" + ) + time_range = TimeRange(start_time=1705420800, end_time=1705420860) + query = template.render(time_range) + # Result: "SELECT * FROM metrics WHERE ts >= 1705420800 AND ts < 1705420860" + """ + + SUPPORTED_VARS = { + "start_time", + "end_time", + "start_time_ms", + "end_time_ms", + "start_datetime", + "end_datetime", + } + + def __init__(self, template: str): + """ + Initialize with a query template. + + Args: + template: Query string potentially containing {{ variable }} placeholders + + Raises: + ValueError: If template has syntax errors + """ + self.template_str = template + self._env = Environment(loader=BaseLoader(), autoescape=False) + + try: + self._template = self._env.from_string(template) + except TemplateSyntaxError as e: + raise ValueError(f"Invalid template syntax: {e}") + + self._variables = self._extract_variables() + + def _extract_variables(self) -> Set[str]: + """Extract all template variable names from the query.""" + # Parse the AST to find all variable references + from jinja2 import meta + + ast = self._env.parse(self.template_str) + return meta.find_undeclared_variables(ast) + + @property + def has_time_variables(self) -> bool: + """Check if template contains any time variables.""" + return bool(self._variables) + + @property + def variables(self) -> Set[str]: + """Return set of variables used in this template.""" + return self._variables.copy() + + def render(self, time_range: TimeRange) -> str: + """ + Substitute template variables with actual values. + + Args: + time_range: TimeRange object with start/end times + + Returns: + Query string with variables substituted + + Raises: + ValueError: If template uses unsupported variables + """ + context = { + "start_time": time_range.start_time, + "end_time": time_range.end_time, + "start_time_ms": time_range.start_time_ms, + "end_time_ms": time_range.end_time_ms, + "start_datetime": time_range.start_datetime, + "end_datetime": time_range.end_datetime, + } + + try: + return self._template.render(**context) + except UndefinedError as e: + unsupported = self._variables - self.SUPPORTED_VARS + raise ValueError( + f"Unsupported template variables: {unsupported}. " + f"Supported: {sorted(self.SUPPORTED_VARS)}" + ) from e + + @staticmethod + def calculate_time_range( + current_time: Optional[int] = None, + window_seconds: int = 60, + offset_seconds: int = 0, + ) -> TimeRange: + """ + Calculate a time range for query execution. + + The time range is calculated as: + end_time = current_time - offset_seconds + start_time = end_time - window_seconds + + Args: + current_time: Reference Unix timestamp (default: now) + window_seconds: Size of time window in seconds + offset_seconds: How far back from current_time to end the window + (positive = past, useful for query_time_offset) + + Returns: + TimeRange object + + Examples: + # Current time query with 60s window + calculate_time_range(current_time=1000, window_seconds=60, offset_seconds=0) + -> TimeRange(start=940, end=1000) + + # Query with 30s offset (for delayed data) + calculate_time_range(current_time=1000, window_seconds=60, offset_seconds=30) + -> TimeRange(start=910, end=970) + """ + if current_time is None: + current_time = int(time.time()) + + end_time = current_time - offset_seconds + start_time = end_time - window_seconds + + return TimeRange(start_time=start_time, end_time=end_time) diff --git a/PrometheusClient/docker-compose.yml.j2 b/PrometheusClient/docker-compose.yml.j2 new file mode 100644 index 0000000..9621b84 --- /dev/null +++ b/PrometheusClient/docker-compose.yml.j2 @@ -0,0 +1,51 @@ +# QueryEngine Docker Compose Template +# This template is rendered with Jinja2 to generate the final docker-compose.yml +services: + prometheusclient: + image: sketchdb-prometheusclient:latest + cap_add: + - SYS_PTRACE # For py-spy monitoring + security_opt: # For pyspy monitoring, gives container permission to trace processes + - seccomp=unconfined + - apparmor=unconfined + container_name: {{ container_name }}{% if latency_exporter_socket_addr is defined and latency_exporter_socket_addr is not none %} + ports: + - "{{ latency_exporter_socket_addr.split(":")[1] }}"{% endif %} + volumes: + - "{{ experiment_output_dir }}:/app/outputs" + - "{{ client_output_dir }}:/app/prometheus_client_output" + - "{{ config_file }}:/app/prometheus_client_config.yaml:ro"{% if query_engine_config_file is defined and query_engine_config_file is not none %} + - "{{ query_engine_config_file}}:/app/query_engine_config_file.yaml:ro"{% endif %} + network_mode: "host" # Allows prometheus client to send requests to localhost, not ideal for production + command: [ + "--config_file", "/app/prometheus_client_config.yaml", + "--output_dir", "/app/prometheus_client_output", + "--output_file", "/app/prometheus_client_output/{{ client_output_file }}",{% if result_output_file is defined and result_output_file is not none %} + "--result_output_file", "{{ result_output_file }}",{% endif %}{% if query_engine_config_file is defined and query_engine_config_file is not none %} + "--query_engine_config_file", "/app/query_engine_config_file.yaml",{% endif %}{% if align_query_time %} + "--align_query_time",{% endif %}{% if server_for_alignment is defined and server_for_alignment is not none %} + "--server_for_alignment", "{{ server_for_alignment }}",{% endif %}{% if dry_run %} + "--dry_run",{% endif %}{% if compare_results %} + "--compare_results",{% endif %}{% if parallel %} + "--parallel",{% endif %}{% if query_engine_pid is defined and query_engine_pid is not none %} + "--profile_query_engine_pid", "{{ query_engine_pid }}",{% endif %}{% if profile_prometheus_time is defined and profile_prometheus_time is not none %} + "--profile_prometheus_time", "{{ profile_prometheus_time }}",{% endif %}{% if latency_exporter_socket_addr is defined and latency_exporter_socket_addr is not none %} + "--export_latencies_for_prometheus", "{{ latency_exporter_socket_addr }}",{% endif %} + ] + # pid: "container:sketchdb-queryengine" + pid: "host" # So py-spy can access processes running on the host + extra_hosts: + # host.docker.internal allows prometheus client to curl servers running on host system localhost, + # but this requires changing the server urls that the client reads from localhost: to host.docker.internal: + # Maybe there is a more elegant solution? + # - "host.docker.internal:host-gateway" + - "prometheus:{{ prometheus_host }}" + - "sketchdb:{{ sketchdb_host }}" + restart: no + +# networks: +# prometheusclient-net: +# driver: bridge +# ipam: +# config: +# - subnet: 172.20.0.0/16 diff --git a/PrometheusClient/installation/install.sh b/PrometheusClient/installation/install.sh new file mode 100755 index 0000000..525e0ca --- /dev/null +++ b/PrometheusClient/installation/install.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -e + +THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") +PARENT_DIR=$(dirname "$THIS_DIR") + +echo "Building PrometheusClient Docker image..." +cd "$PARENT_DIR" +docker build . -f Dockerfile -t sketchdb-prometheusclient:latest + +echo "PrometheusClient Docker image built successfully: sketchdb-prometheusclient:latest" diff --git a/PrometheusClient/installation/setup_dependencies.sh b/PrometheusClient/installation/setup_dependencies.sh new file mode 100755 index 0000000..b8a209c --- /dev/null +++ b/PrometheusClient/installation/setup_dependencies.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") + +sudo apt-get install -y python3-pip +# TODO: change to virtualenv +pip3 install --user -r "${THIS_DIR}/../requirements.txt" diff --git a/PrometheusClient/main_prometheus_client.py b/PrometheusClient/main_prometheus_client.py new file mode 100644 index 0000000..08a9bf5 --- /dev/null +++ b/PrometheusClient/main_prometheus_client.py @@ -0,0 +1,846 @@ +import os +import yaml +import time +import requests +import argparse +import datetime +import logging + +# import urllib3 +from loguru import logger +from typing import Dict, Set, Optional, List, Any +from type_aliases import ( + ServerDict, + Query, + QueryIndex, + RepetitionIndex, + UnixTimestamp, + ResultDict, + QueryStartTimes, + QueryEngineConfig, +) +import threading +import subprocess +import concurrent.futures +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +from classes.config import Config +from classes.QueryLatencyExporter import QueryLatencyExporter +from classes.query_client import QueryClient +from classes.query_client_factory import QueryClientFactory +from classes.query_template import QueryTemplate +from promql_utilities.query_results.classes import QueryResult, QueryResultAcrossTime +from promql_utilities.query_results.serializers import SerializerFactory + + +class PrometheusDebugRetry(Retry): + def __init__(self, *args: Any, server_name: str = "", **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.server_name = server_name + + def new(self, **kw: Any) -> "PrometheusDebugRetry": + """Override new() to preserve server_name when creating new instances.""" + new_retry = super().new(**kw) + new_retry.server_name = self.server_name + return new_retry + + def increment( + self, + method: Optional[str] = None, + url: Optional[str] = None, + response: Optional[Any] = None, + error: Optional[Exception] = None, + _pool: Optional[Any] = None, + _stacktrace: Optional[Any] = None, + ) -> "PrometheusDebugRetry": + # Calculate current attempt number + assert self.total is not None + current_retries = self.total - ( + self.total if hasattr(self, "history") and self.history else 0 + ) + attempt_num = (3 - current_retries) + 1 # Assuming max 3 retries + + if response: + logger.bind(module="http_debug").debug( + f"RETRY ATTEMPT {attempt_num} for {self.server_name}: " + f"{method} {url} -> HTTP {response.status} " + f"(will retry: {response.status in self.status_forcelist})" + ) + elif error: + logger.bind(module="http_debug").debug( + f"RETRY ATTEMPT {attempt_num} for {self.server_name}: " + f"{method} {url} -> ERROR: {type(error).__name__}: {error}" + ) + + result = super().increment(method, url, response, error, _pool, _stacktrace) + assert isinstance(result, PrometheusDebugRetry) + return result + + +class PrometheusDebugHTTPAdapter(HTTPAdapter): + def __init__(self, server_name: str, *args: Any, **kwargs: Any) -> None: + self.server_name = server_name + super().__init__(*args, **kwargs) + + def send(self, request: Any, *args: Any, **kwargs: Any) -> Any: + logger.bind(module="http_debug").debug( + f"HTTP REQUEST START for {self.server_name}: " + f"{request.method} {request.url}" + ) + start_time = time.time() + + try: + response = super().send(request, *args, **kwargs) + elapsed = time.time() - start_time + + logger.bind(module="http_debug").debug( + f"HTTP REQUEST END for {self.server_name}: " + f"{request.method} {request.url} -> HTTP {response.status_code} " + f"({elapsed:.3f}s, {len(response.content)} bytes)" + ) + return response + except Exception as e: + elapsed = time.time() - start_time + logger.bind(module="http_debug").error( + f"HTTP REQUEST FAILED for {self.server_name}: " + f"{request.method} {request.url} -> {type(e).__name__}: {e} " + f"(after {elapsed:.3f}s)" + ) + raise + + +def create_loggers(logging_dir: str, log_level: str) -> None: + logger.remove(None) # remove default loggers + + logger.add("{}/prometheus_client.log".format(logging_dir), filter="__main__") + + logger.add( # add latency exporter logger + "{}/query_latency_exporter.log".format(logging_dir), + level=log_level, + filter=lambda record: record["extra"].get("module") == "query_latency_exporter", + ) + + # NEW: HTTP request debugging logger + logger.add( + "{}/http_requests.log".format(logging_dir), + level="DEBUG", + filter=lambda record: record["extra"].get("module") == "http_debug", + ) + + # Enable urllib3 debug logging for connection-level details + urllib3_logger = logging.getLogger("urllib3.connectionpool") + urllib3_logger.setLevel(logging.DEBUG) + urllib3_handler = logging.FileHandler("{}/urllib3_debug.log".format(logging_dir)) + urllib3_handler.setFormatter( + logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + ) + urllib3_logger.addHandler(urllib3_handler) + + +def get_query_unix_time( + query: Query, + query_unix_time: UnixTimestamp, + query_start_times: Optional[QueryStartTimes], + repetition_delay: int, +) -> UnixTimestamp: + if query_start_times is None or query not in query_start_times: + return query_unix_time + + query_alignment_time = query_start_times[query] + # we want the latest timestamp that is query_aligment_time + N * repetition_delay + query_unix_time = int( + query_unix_time - (query_unix_time - query_alignment_time) % repetition_delay + ) + return query_unix_time + + +def execute_single_query( + server_name: str, + server_object: QueryClient, + query: Query, + query_idx: QueryIndex, + repetition_idx: RepetitionIndex, + query_unix_time: Optional[UnixTimestamp], + dry_run: bool, + query_group_idx: int, + time_window_seconds: Optional[int], +) -> QueryResult: + """Execute a single query and return the result with latency information.""" + logger.debug( + f"Running query {query} on server {server_name} at time {query_unix_time}" + ) + + # Handle template substitution for queries with time variables + template = QueryTemplate(query) + if template.has_time_variables: + if time_window_seconds is None: + raise ValueError( + f"Query contains time template variables but time_window_seconds is not set: {query[:100]}" + ) + if query_unix_time is None: + raise ValueError( + f"Query contains time template variables but query_unix_time is not set: {query[:100]}" + ) + time_range = QueryTemplate.calculate_time_range( + current_time=query_unix_time, + window_seconds=time_window_seconds, + ) + rendered_query = template.render(time_range) + logger.debug(f"Rendered query template: {rendered_query}") + else: + rendered_query = query + + # Enhanced HTTP debug logging for query start + logger.bind(module="http_debug").info( + f"QUERY START - Server: {server_name}, Query: {rendered_query[:100]}{'...' if len(rendered_query) > 100 else ''}, " + f"QueryIdx: {query_idx}, QueryGroupIdx: {query_group_idx}, Rep: {repetition_idx}, Time: {query_unix_time}" + ) + + empty_query_result = QueryResult( + server_name, + query, # Store original query template, not rendered + query_idx, + repetition_idx, + result=None, + latency=None, + cumulative_latency=None, + query_group_idx=query_group_idx, + ) + + if dry_run: + logger.bind(module="http_debug").debug( + f"DRY RUN - Skipping actual HTTP request for {server_name}" + ) + return empty_query_result + + try: + query_start_time = time.time() + # Use the QueryClient abstraction + response = server_object.execute_query( + query=rendered_query, + query_time=query_unix_time, + ) + query_end_time = time.time() + + latency = query_end_time - query_start_time + logger.debug("Latency: {}", latency) + + if not response.success: + logger.error(f"Query failed: {response.error_message}") + logger.bind(module="http_debug").error( + f"QUERY ERROR - Server: {server_name}, Error: {response.error_message}" + ) + return empty_query_result + + # Determine result type based on response format + if isinstance(response.raw_response, str): + # ClickHouse/SQL - raw text result + query_result_data = None + raw_text_result = response.raw_response + result_count = ( + len(response.raw_response.strip().split("\n")) + if response.raw_response + else 0 + ) + else: + # Prometheus - list of dicts + query_result_data = response.raw_response + raw_text_result = None + result_count = len(response.raw_response) if response.raw_response else 0 + + # Enhanced HTTP debug logging for query success + logger.debug("Query result: {}", response.raw_response) + logger.bind(module="http_debug").info( + f"QUERY SUCCESS - Server: {server_name}, Total latency: {latency:.3f}s, " + f"Results: {result_count} data points" + ) + + except Exception as e: + logger.error(f"Error running query: {str(e)}") + + # Enhanced HTTP debug logging for query error + logger.bind(module="http_debug").error( + f"QUERY ERROR - Server: {server_name}, Error: {type(e).__name__}: {e}" + ) + return empty_query_result + + return QueryResult( + server_name, + query, # Store original query template + query_idx, + repetition_idx, + result=query_result_data, + latency=latency, + cumulative_latency=None, + query_group_idx=query_group_idx, + raw_text_result=raw_text_result, + ) + + +def handle_query_group( + servers: ServerDict, + query_group: Any, + query_group_idx: int, + query_start_times: Optional[QueryStartTimes], + dry_run: bool, + parallel: bool = False, + latency_exporter: Optional[Any] = None, + streaming_serializer: Optional[Any] = None, +) -> ResultDict: + logger.debug(f"Starting query group {query_group.id}") + if query_group.starting_delay: + logger.debug( + f"Waiting for {query_group.starting_delay} seconds before starting" + ) + time.sleep(query_group.starting_delay) + + logger.debug("Query start times: {}", query_start_times) + + current_time = None + query_unix_time = None + + # Calculate global query indices (combining group offset with local index) + global_query_idx_start: int = query_group._global_query_idx_start + + result = { + server_name: { + global_query_idx_start + + local_query_idx: QueryResultAcrossTime( + server_name, + query, + global_query_idx_start + local_query_idx, + query_group.repetitions, + ) + for local_query_idx, query in enumerate(query_group.queries) + } + for server_name in servers + } + + for repetition_idx in range(query_group.repetitions): + current_time = datetime.datetime.now() + logger.debug("Current unix time: {}", int(current_time.timestamp())) + + if hasattr(query_group, "query_time_offset"): + current_time = current_time - datetime.timedelta( + seconds=query_group.query_time_offset + ) + logger.debug( + "Offsetting query time by {} seconds", query_group.query_time_offset + ) + + query_unix_time = int(current_time.timestamp()) + logger.debug("Unix time after query_time_offset: {}", query_unix_time) + + if parallel: + # Execute queries in parallel + with concurrent.futures.ThreadPoolExecutor() as executor: + futures = [] + for local_query_idx, query in enumerate(query_group.queries): + global_query_idx = global_query_idx_start + local_query_idx + current_query_unix_time = get_query_unix_time( + query, + query_unix_time, + query_start_times, + query_group.repetition_delay, + ) + + for server_name, server_object in servers.items(): + futures.append( + executor.submit( + execute_single_query, + server_name, + server_object, + query, + global_query_idx, + repetition_idx, + current_query_unix_time, + dry_run, + query_group_idx, + query_group.time_window_seconds, + ) + ) + + # Collect results + for future in concurrent.futures.as_completed(futures): + query_result = future.result() + server_name = query_result.server_name + query_idx = query_result.query_idx + + query_result.cumulative_latency = query_result.latency + + result[server_name][query_idx].add_result(query_result) + + # Stream result immediately if streaming serializer is provided + if streaming_serializer is not None and not dry_run: + streaming_serializer.streaming_write_result(query_result) + else: + # Reset cumulative latency for each repetition + cumulative_latency = {server_name: 0.0 for server_name in servers} + + # Serial execution - use the same execute_single_query function + for local_query_idx, query in enumerate(query_group.queries): + global_query_idx = global_query_idx_start + local_query_idx + current_query_unix_time = get_query_unix_time( + query, + query_unix_time, + query_start_times, + query_group.repetition_delay, + ) + + logger.debug("Unix time for query: {}", current_query_unix_time) + + for server_name, server_object in servers.items(): + query_result = execute_single_query( + server_name, + server_object, + query, + global_query_idx, + repetition_idx, + current_query_unix_time, + dry_run, + query_group_idx, + query_group.time_window_seconds, + ) + + # Update cumulative latency for this repetition + if query_result.latency is not None: + cumulative_latency[server_name] += query_result.latency + + query_result.cumulative_latency = cumulative_latency[server_name] + + try: + result[server_name][global_query_idx].add_result(query_result) + except Exception as e: + logger.error( + f"{type(e).__name__} accessing result dict: {e}, " + f"server_name={server_name}, " + f"global_query_idx={global_query_idx}, " + f"local_query_idx={local_query_idx}, " + f"query_group_idx={query_group_idx}, " + f"available_keys={list(result[server_name].keys())}" + ) + raise + + # Stream result immediately if streaming serializer is provided + if streaming_serializer is not None and not dry_run: + streaming_serializer.streaming_write_result(query_result) + + if latency_exporter is not None: + latency_exporter.export_repetition(repetition_idx, result) + + if repetition_idx < query_group.repetitions - 1: + time.sleep(query_group.repetition_delay) + + if latency_exporter is not None: + latency_exporter.shutdown() + + return result + + +def get_query_start_times( + server_url: str, query_engine_config: QueryEngineConfig +) -> QueryStartTimes: + aggregation_id_start_time_map = {} + query_aggregation_id_map = {} + query_start_time_map = {} + + required_aggregation_ids: Set[int] = set() + for query_yaml in query_engine_config["queries"]: + # add all aggregation IDs from the query YAML to the required_aggregation_ids set + required_aggregation_ids.update( + int(aggregation["aggregation_id"]) + for aggregation in query_yaml["aggregations"] + ) + # assert len(query_yaml["aggregations"]) == 1 + # required_aggregation_ids.add( + # int(query_yaml["aggregations"][0]["aggregation_id"]) + # ) + logger.debug("Required aggregation IDs: {}", required_aggregation_ids) + + # wait for all required aggregation IDs to be present + while True: + server_response = requests.get( + server_url + "/api/v1/status/runtimeinfo", + headers={"Content-Type": "application/json"}, + ) + server_response.raise_for_status() + server_response_json = server_response.json() + logger.debug("Server response: {}", server_response_json) + aggregation_id_start_time_map = server_response_json["data"][ + "earliest_timestamp_per_aggregation_id" + ] + + # change all keys from string to int + aggregation_id_start_time_map = { + int(k): v for k, v in aggregation_id_start_time_map.items() + } + + if not set(aggregation_id_start_time_map.keys()).issuperset( + required_aggregation_ids + ): + logger.debug( + "Waiting for aggregation IDs {} to be present", + required_aggregation_ids - set(aggregation_id_start_time_map.keys()), + ) + time.sleep(10) + else: + break + + # TODO: make this more robust. What happens if there are multiple aggregations with + # different tumbling windows? How long do we wait here? What happens with multiple query groups? + + # get query to aggregate ID mapping from query_engine_config + for query_yaml in query_engine_config["queries"]: + # TODO: this assert will fail if there are multiple aggregations in a query YAML, including for DeltaSet, so commenting it out + # assert len(query_yaml["aggregations"]) == 1 + # for now, just take the first aggregation ID + # TODO: make this more robust, eg for cases where aggregations for the same query have different tumbling windows or start times + query_aggregation_id_map[query_yaml["query"]] = int( + query_yaml["aggregations"][0]["aggregation_id"] + ) + + for query, aggregation_id in query_aggregation_id_map.items(): + # aggregation_id_start_time_map is in milliseconds, convert to seconds + query_start_time_map[query] = ( + # aggregation_id_start_time_map[str(aggregation_id)] / 1000 + aggregation_id_start_time_map[aggregation_id] + / 1000 + ) + + return query_start_time_map + + +def check_args(args: Any) -> None: + if args.align_query_time and args.query_engine_config_file is None: + raise ValueError( + "If align_query_time is set, query_engine_config_file must be provided" + ) + + +def start_query_engine_profiler( + pid: int, output_dir: str, starting_delay: int, duration: int +) -> None: + """ + Create and start a subprocess to run py-spy on the specified process. + + Args: + pid: Process ID of the query engine + output_dir: Directory to save the profile output + duration: Duration in seconds to run the profiler + + Returns: + subprocess.Popen: The created subprocess + """ + output_file = os.path.join(output_dir, "query_engine_profile.svg") + logger.debug(f"Waiting for {starting_delay} seconds before starting profiler") + time.sleep(starting_delay) + logger.debug(f"Starting py-spy profiling of PID {pid} for {duration} seconds") + + try: + cmd = "bash --login -c 'sudo env \"PATH=$PATH\" py-spy record --pid {} -o {} --duration {} --idle'".format( + str(pid), output_file, str(duration) + ) + logger.info(f"Running command: {cmd}") + + subprocess.run(cmd, shell=True) + except Exception as e: + logger.error(f"Error starting profiler: {str(e)}") + raise e + + +def start_prometheus_profiler( + output_dir: str, starting_delay: int, duration: int +) -> None: + output_file = os.path.join(output_dir, "prometheus_profile.pprof") + logger.debug(f"Waiting for {starting_delay} seconds before starting profiler") + time.sleep(starting_delay) + logger.debug(f"Starting pprof profiling of Prometheus for {duration} seconds") + + try: + # cmd = "go tool pprof -seconds {} -output {} http://localhost:9090/debug/pprof/profile".format( + cmd = "curl -o {} http://localhost:9090/debug/pprof/profile?seconds={}".format( + output_file, + str(duration), + ) + logger.info(f"Running command: {cmd}") + + subprocess.run(cmd, shell=True) + except Exception as e: + logger.error(f"Error starting profiler: {str(e)}") + + +def main(args: Any) -> None: + check_args(args) + os.makedirs(args.output_dir, exist_ok=True) + + create_loggers(args.output_dir, "DEBUG") + + if args.dry_run: + logger.info("Running in dry-run mode") + + if args.parallel: + logger.info("Running queries in parallel mode") + + with open(args.config_file, "r") as file: + config_data = yaml.safe_load(file) + + query_engine_config = None + if args.query_engine_config_file: + with open(args.query_engine_config_file, "r") as file: + query_engine_config = yaml.safe_load(file) + + config = Config.from_dict(config_data) + + logger.debug("Read config") + + # Calculate global query indices for each query group + global_query_idx = 0 + for query_group in config.query_groups: + query_group._global_query_idx_start = global_query_idx # type: ignore[attr-defined] + global_query_idx += len(query_group.queries) + + server_url_for_alignment = None + + servers: Dict[str, QueryClient] = {} + for server in config.servers: + # Determine protocol (default to prometheus for backward compatibility) + protocol = server.protocol if server.protocol else "prometheus" + + if protocol == "prometheus": + # Create custom retry adapter with debug logging + debug_retry = PrometheusDebugRetry( + server_name=server.name, + total=3, + backoff_factor=1, + status_forcelist=[408, 429, 500, 502, 503, 504], + ) + + client = QueryClientFactory.create( + protocol=protocol, + server_url=server.url, + server_name=server.name, + disable_ssl=True, + retry=debug_retry, + ) + + # Mount debug adapter for HTTP request logging + debug_adapter = PrometheusDebugHTTPAdapter(server.name) + client.session.mount("http://", debug_adapter) + client.session.mount("https://", debug_adapter) + else: + # ClickHouse or other protocols + client = QueryClientFactory.create( + protocol=protocol, + server_url=server.url, + server_name=server.name, + database=server.database if server.database else "default", + user=server.user if server.user else "default", + password=server.password if server.password else "", + ) + + # Mount debug adapter for HTTP request logging + debug_adapter = PrometheusDebugHTTPAdapter(server.name) + client.session.mount("http://", debug_adapter) + client.session.mount("https://", debug_adapter) + + servers[server.name] = client + logger.debug( + "Connected to server {} ({}) with HTTP debug logging enabled", + server.name, + protocol, + ) + + if args.align_query_time and server.name == args.server_for_alignment: + server_url_for_alignment = server.url + + query_start_times = None + if args.align_query_time: + assert server_url_for_alignment is not None + assert query_engine_config is not None + query_start_times = get_query_start_times( + server_url_for_alignment, query_engine_config + ) + logger.debug("Got query start times") + + # Calculate profiler timing based on all query groups + min_starting_delay = min(qg.starting_delay for qg in config.query_groups) + max_duration = 0 + for query_group in config.query_groups: + assert query_group.repetitions is not None + assert query_group.repetition_delay is not None + duration = ( + query_group.repetition_delay * query_group.repetitions + + query_group.starting_delay + - min_starting_delay + ) + max_duration = max(max_duration, duration) + + query_engine_profiler_thread = None + if args.profile_query_engine_pid: + query_engine_profiler_thread = threading.Thread( + target=start_query_engine_profiler, + args=( + args.profile_query_engine_pid, + args.output_dir, + min_starting_delay, + max_duration, + ), + ) + if query_engine_profiler_thread: + logger.debug("Starting query engine profiler thread...") + query_engine_profiler_thread.daemon = True + query_engine_profiler_thread.start() + + prometheus_profiler_thread = None + if args.profile_prometheus_time is not None: + prometheus_profiler_thread = threading.Thread( + target=start_prometheus_profiler, + args=( + args.output_dir, + min_starting_delay, + args.profile_prometheus_time, + ), + ) + if prometheus_profiler_thread: + prometheus_profiler_thread.daemon = True + prometheus_profiler_thread.start() + + if args.export_latencies_for_prometheus is not None: + exporter_socket_addr = args.export_latencies_for_prometheus.split(sep=":") + exporter_ip = exporter_socket_addr[0] + exporter_port = int(exporter_socket_addr[1]) + latency_exporter = QueryLatencyExporter(addr=exporter_ip, port=exporter_port) + logger.debug( + f"Running with query latency exporter at {args.export_latencies_for_prometheus}" + ) + latency_exporter.launch() + else: + latency_exporter = None + + # Initialize streaming serializer if not in dry run mode + streaming_serializer = None + if not args.dry_run: + streaming_serializer = SerializerFactory.create( + args.serialization_format, args.output_dir + ) + + # Prepare metadata for streaming - include per-group information + query_groups_metadata = [] + for query_group_idx, query_group in enumerate(config.query_groups): + query_groups_metadata.append( + { + "query_group_idx": query_group_idx, + "query_group_id": query_group.id, + "queries": query_group.queries, + "repetitions": query_group.repetitions, + } + ) + + metadata = { + "query_groups": query_groups_metadata, + "servers": list(servers.keys()), + } + streaming_serializer.streaming_write_start(metadata) + + # Spawn threads for each query group + query_group_threads = [] + results_per_group: List[Optional[ResultDict]] = [None] * len(config.query_groups) + + def run_query_group(query_group_idx: int, query_group: Any) -> None: + """Wrapper function to run a query group and store results.""" + try: + results = handle_query_group( + servers, + query_group, + query_group_idx, + query_start_times, + args.dry_run, + args.parallel, + latency_exporter, + streaming_serializer, + ) + results_per_group[query_group_idx] = results + except Exception as e: + logger.error( + f"Query group {query_group_idx} (id={query_group.id}) failed with " + f"{type(e).__name__}: {e}", + exc_info=True, + ) + results_per_group[query_group_idx] = None + raise # Re-raise to ensure it's logged but thread still terminates + + for query_group_idx, query_group in enumerate(config.query_groups): + thread = threading.Thread( + target=run_query_group, + args=(query_group_idx, query_group), + ) + query_group_threads.append(thread) + thread.start() + logger.debug(f"Started thread for query group {query_group_idx}") + + # Wait for all query group threads to complete + for idx, thread in enumerate(query_group_threads): + thread.join() + logger.debug(f"Query group {idx} thread completed") + + # Merge results from all query groups into single structure + results_across_servers: Dict[str, Dict[int, Any]] = {} + for server_name in servers.keys(): + results_across_servers[server_name] = {} + + for group_results in results_per_group: + if group_results: + for server_name, server_results in group_results.items(): + results_across_servers[server_name].update(server_results) + + if not args.dry_run and streaming_serializer is not None: + # Finalize streaming write + streaming_serializer.streaming_write_end() + + # deprecated: save results in a pickle file + # with open(os.path.join(args.output_dir, args.result_output_file), "wb") as fout: + # pickle.dump(results_across_servers, fout) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="") + parser.add_argument("--config_file", type=str, required=True) + parser.add_argument("--output_dir", type=str, required=True) + parser.add_argument("--output_file", type=str, required=True) + # deprecated: + # parser.add_argument("--result_output_file", type=str, default="results.pkl") + + parser.add_argument("--query_engine_config_file", type=str, required=False) + parser.add_argument("--align_query_time", action="store_true", required=False) + parser.add_argument("--server_for_alignment", type=str, default="sketchdb") + + parser.add_argument("--dry_run", action="store_true", required=False) + parser.add_argument( + "--parallel", + action="store_true", + help="Execute queries in parallel", + required=False, + ) + + parser.add_argument("--profile_query_engine_pid", type=int, required=False) + parser.add_argument("--profile_prometheus_time", type=int, required=False) + + parser.add_argument( + "--export_latencies_for_prometheus", + type=str, + help="Run prometheus query latency exporter at ", + required=False, + ) + + parser.add_argument( + "--serialization_format", + type=str, + choices=["jsonl", "parquet"], + default="jsonl", + help="Format for serializing query results (jsonl or parquet)", + required=False, + ) + + args = parser.parse_args() + main(args) diff --git a/PrometheusClient/pyproject.toml b/PrometheusClient/pyproject.toml new file mode 100644 index 0000000..000d249 --- /dev/null +++ b/PrometheusClient/pyproject.toml @@ -0,0 +1,36 @@ +[tool.black] +line-length = 88 +target-version = ['py38', 'py39', 'py310', 'py311'] +include = '\.pyi?$' +exclude = ''' +/( + \.eggs + | \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | buck-out + | build + | dist +)/ +''' + +[tool.mypy] +python_version = "3.8" +mypy_path = "../../../CommonDependencies/py" + +# STRICT MODE ENABLED +# This enables all 14 strict type checking flags +strict = false + +# Ignore missing imports for third-party libraries without stubs +[[tool.mypy.overrides]] +module = [ + "prometheus_api_client.*", + "loguru.*", + "promql_utilities.*", + "jinja2.*" +] +ignore_missing_imports = true diff --git a/PrometheusClient/requirements.txt b/PrometheusClient/requirements.txt new file mode 100644 index 0000000..95ba595 --- /dev/null +++ b/PrometheusClient/requirements.txt @@ -0,0 +1,10 @@ +prometheus-api-client==0.5.5 +prometheus_client==0.21.1 +pyyaml +numpy +loguru +requests +py-spy +typing_extensions==4.13.2 +jinja2 +types-jinja2 diff --git a/PrometheusClient/similarity_scores.py b/PrometheusClient/similarity_scores.py new file mode 100644 index 0000000..986490f --- /dev/null +++ b/PrometheusClient/similarity_scores.py @@ -0,0 +1,21 @@ +import numpy as np +from numpy.typing import NDArray +from typing import Any + + +def correlation( + exact: NDArray[np.floating[Any]], estimate: NDArray[np.floating[Any]] +) -> float: + return float(np.corrcoef(exact, estimate)[0, 1]) + + +def l1_norm( + exact: NDArray[np.floating[Any]], estimate: NDArray[np.floating[Any]] +) -> float: + return float(np.sum(np.abs(exact - estimate))) + + +def l2_norm( + exact: NDArray[np.floating[Any]], estimate: NDArray[np.floating[Any]] +) -> float: + return float(np.sum(np.square(exact - estimate))) diff --git a/PrometheusClient/type_aliases.py b/PrometheusClient/type_aliases.py new file mode 100644 index 0000000..7e343eb --- /dev/null +++ b/PrometheusClient/type_aliases.py @@ -0,0 +1,31 @@ +"""Type aliases for PrometheusClient codebase.""" + +from typing import Dict, Any, Callable +from typing_extensions import TypeAlias + +# Server and connection types +ServerName: TypeAlias = str +ServerURL: TypeAlias = str +ServerDict: TypeAlias = Dict[str, Any] # Dict of server_name -> QueryClient + +# Query related types +Query: TypeAlias = str +QueryIndex: TypeAlias = int +RepetitionIndex: TypeAlias = int +UnixTimestamp: TypeAlias = int + +# Result types +ResultDict: TypeAlias = Dict[ + str, Dict[int, Any] +] # Dict[server_name][query_idx] -> QueryResultAcrossTime +SimilarityScores: TypeAlias = Dict[ + str, Dict[str, float] +] # Dict[function_name][query] -> score + +# Configuration types +QueryStartTimes: TypeAlias = Dict[str, float] # Dict[query] -> start_time +AggregationConfig: TypeAlias = Dict[str, Any] +QueryEngineConfig: TypeAlias = Dict[str, Any] + +# Function types +SimilarityFunction: TypeAlias = Callable[[Any, Any], float] diff --git a/PrometheusExporters/.gitignore b/PrometheusExporters/.gitignore new file mode 100644 index 0000000..3540786 --- /dev/null +++ b/PrometheusExporters/.gitignore @@ -0,0 +1,7 @@ +pyvenv.cfg +bin/ +lib/ +include/ +__pycache__/ +.DS_Store +**/target/ diff --git a/PrometheusExporters/.isort.cfg b/PrometheusExporters/.isort.cfg new file mode 100644 index 0000000..b9fb3f3 --- /dev/null +++ b/PrometheusExporters/.isort.cfg @@ -0,0 +1,2 @@ +[settings] +profile=black diff --git a/PrometheusExporters/.mypy.ini b/PrometheusExporters/.mypy.ini new file mode 100644 index 0000000..8cb50b6 --- /dev/null +++ b/PrometheusExporters/.mypy.ini @@ -0,0 +1,4 @@ +[mypy] +files = "**/*.py" +ignore_missing_imports = True +disable_error_code = import-untyped diff --git a/PrometheusExporters/LICENSE b/PrometheusExporters/LICENSE new file mode 100644 index 0000000..404d657 --- /dev/null +++ b/PrometheusExporters/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 SketchDB + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/PrometheusExporters/README.md b/PrometheusExporters/README.md new file mode 100644 index 0000000..31eaeea --- /dev/null +++ b/PrometheusExporters/README.md @@ -0,0 +1,17 @@ +# SketchDB Prometheus Exporters + +This repository contains multiple Prometheus exporters for exposing various types of metrics that can be scraped by a Prometheus server. + +## Available Exporters + +- **Cluster Data Exporter** (Rust) - Exposes cluster resource usage metrics from Google and Alibaba cluster trace datasets +- **Fake Exporter** (Rust or Python) - Generates synthetic, pseudorandom Prometheus metrics +- **Query Cost Exporter** (Python) - Exports query cost metrics and resource usage statistics +- **Query Latency Exporter** (Python) - Monitors and exports query latency metrics + +## Metrics Endpoint + +All exporters expose metrics at: +``` +http://localhost:/metrics +``` diff --git a/PrometheusExporters/cluster_data_exporter/.dockerignore b/PrometheusExporters/cluster_data_exporter/.dockerignore new file mode 100644 index 0000000..1634158 --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/.dockerignore @@ -0,0 +1,9 @@ +target/ +.git/ +.gitignore +README.md +data/ +*.csv +*.gz +docker_compose_frames/ +scripts/ diff --git a/PrometheusExporters/cluster_data_exporter/.gitignore b/PrometheusExporters/cluster_data_exporter/.gitignore new file mode 100644 index 0000000..1e7caa9 --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/.gitignore @@ -0,0 +1,2 @@ +Cargo.lock +target/ diff --git a/PrometheusExporters/cluster_data_exporter/Cargo.toml b/PrometheusExporters/cluster_data_exporter/Cargo.toml new file mode 100644 index 0000000..96ef3aa --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "cluster_data_exporter" +version = "0.1.0" +edition = "2021" + +[dependencies] +prometheus = "0.14.0" +tokio = { version = "1", features = ["full"] } +hyper = { version = "1", features = ["full"] } +hyper-util = { version = "0.1", features = ["full"] } +lazy_static = "1.5" +csv = "1.3" +serde = { version = "1.0", features = ["derive"] } +concurrent-queue = "2.5.0" +flate2 = "1.1.2" +clap = { version = "4.5.41", features = ["derive"] } +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +tracing-appender = "0.2" diff --git a/PrometheusExporters/cluster_data_exporter/Dockerfile b/PrometheusExporters/cluster_data_exporter/Dockerfile new file mode 100644 index 0000000..6e5dfc2 --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/Dockerfile @@ -0,0 +1,38 @@ +# Use the official Rust image as a build environment +FROM rust:latest AS builder + +# Set the working directory inside the container +WORKDIR /usr/src/app + +# Copy the Cargo.toml and Cargo.lock files +COPY Cargo.toml Cargo.lock ./ + +# Copy the source code +COPY src ./src + +# Build the application in release mode +RUN cargo build --release + +# Use a minimal runtime image +FROM debian:bookworm-slim + +# Install necessary runtime dependencies +RUN apt-get update && apt-get install -y \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Create a non-root user +RUN useradd -r -s /bin/false exporter + +# Create the data and output directories that will be mounted as volumes +RUN mkdir -p /data /output && chown exporter:exporter /data /output + +# Copy the binary from the builder stage +COPY --from=builder /usr/src/app/target/release/cluster_data_exporter /usr/local/bin/cluster_data_exporter + +# Change to the non-root user +USER exporter + +# Set the entrypoint to the binary +# All arguments including port and input directory must be provided via docker run or docker-compose +ENTRYPOINT ["cluster_data_exporter"] diff --git a/PrometheusExporters/cluster_data_exporter/README.md b/PrometheusExporters/cluster_data_exporter/README.md new file mode 100644 index 0000000..e075e13 --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/README.md @@ -0,0 +1,244 @@ +# CLUSTER DATA EXPORTER + +A Prometheus exporter that exposes cluster resource usage metrics from Google and Alibaba cluster trace datasets. + +## DESCRIPTION + +This exporter reads CSV data from certain datasets provided by Google or Alibaba and exposes them as Prometheus metrics. The exporter supports both Google task resource usage data from 2011 and Alibaba node and microservice resource data from 2021 and 2022. Instructions for downloading this data are linked in this document. + +## INSTALLATION + +### Prerequisites + +- Rust 1.70+ (edition 2021) +- Access to Google or Alibaba cluster datasets + +### Building + +```bash +cargo build --release +``` + +## USAGE + +```bash +cluster_data_exporter -i -p [OPTIONS] +``` + +### Google Provider + +```bash +cluster_data_exporter -i ./google/clusterdata-2011/ -p 8080 google [OPTIONS] +``` + +### Alibaba Provider + +```bash +cluster_data_exporter -i ./alibaba/2021/ -p 8080 alibaba [OPTIONS] +``` + +## DATA SOURCES + +### Google Cluster Data + +Instructions on how to download the Google Cluster 2011 task usage data: +https://github.com/google/cluster-data/blob/master/ClusterData2011_2.md + +The only part of the dataset used by the exporter is the task_usage section, so there's no need to install the whole dataset + +Expected directory structure: +``` +path/to/task/resource/usage/dir/ +├── part-00000-of-00500.csv.gz +├── part-00001-of-00500.csv.gz +└── ... +``` + +### Alibaba Cluster Data + +Instructions on downloading the Alibaba microservice trace datasets: +- 2021: https://github.com/alibaba/clusterdata/blob/master/cluster-trace-microservices-v2021/README.md#introduction-of-trace-data +- 2022: https://github.com/alibaba/clusterdata/tree/master/cluster-trace-microservices-v2022#trace-data-download + +The only parts of the datasets used by the exporter are the Node and MSResource sections, the rest can be discarded. + +Expected directory structure (after preprocessing): + +2021 Data: +``` +path/to/Node/ +├── Node_0.csv.gz +├── Node_1.csv.gz +└── ... + +path/to/MSResource/ +├── MSResource_0.csv.gz +├── MSResource_1.csv.gz +└── ... +``` + +2022 Data: +``` +path/to/NodeMetrics/ +├── NodeMetrics_0.csv.gz +├── NodeMetrics_1.csv.gz +└── ... + +path/to/MSMetrics/ +├── MSMetrics_0.csv.gz +├── MSMetrics_1.csv.gz +└── ... +``` + +## DATA PREPROCESSING FOR ALIBABA + +IMPORTANT: Before running the exporter on Alibaba data, you must run the preprocessing script to sort the data by timestamp and recompress it as a .csv.gz: + +```bash +./bin/alibaba/sort_and_format.sh --year <2021|2022> [-n] [-m] +``` + +This script extracts, sorts by timestamp, and recompresses the Alibaba CSV files in a format the exporter can read (.csv.gz). The sorting is necessary because some datasets (mainly 2022 data) are not sorted by timestamp, which is required for proper metric export timing. + +### Input Directory Structure + +The input directory should contain one or both of the subdirectories with unprocessed files, i.e. the untouched /data/ directory created from running the fetchData.sh scripts from the Alibaba github repos. For example: + +``` +alibaba/2021/data/ +├── Node/ +│ ├── Node_0.tar.gz +│ ├── Node_1.tar.gz +│ └── ... +└── MSResource/ + ├── MSResource_0.tar.gz + ├── MSResource_1.tar.gz + └── ... + +alibaba/2022/data/ +├── NodeMetrics/ +│ ├── NodeMetrics_0.tar.gz +│ ├── NodeMetrics_1.tar.gz +│ └── ... +└── MSMetrics/ + ├── MSMetrics_0.tar.gz + ├── MSMetrics_1.tar.gz + └── ... +``` + +Examples: + +```bash +# Process 2021 Node data +./bin/alibaba/sort_and_format.sh alibaba/2021/data --year 2021 -n + +# Process 2021 MSResource data +./bin/alibaba/sort_and_format.sh alibaba/2021/data --year 2021 -m + +# Process both Node and MSResource data for 2021 +./bin/alibaba/sort_and_format.sh alibaba/2021/data --year 2021 -n -m +``` + +## COMMAND LINE ARGUMENTS + +- -i, --input-directory: Path to the directory containing CSV data files +- -p, --port: Port number for the HTTP server + +### Provider-specific Options + +#### Google +- --metrics: Specific metrics to export from task resource usage data +- --all-parts: Process all CSV parts (default behavior) +- --part-index: Process only a specific part index (0-499) + +#### Alibaba +- --data-type: Type of data to export (node or msresource) +- --data-year: Year of the dataset (2021 or 2022) +- --all-parts: Process all CSV parts (default behavior) +- --part-index: Process only a specific part index + +## DOCKER USAGE + +### Prerequisites for Docker + +1. Download and preprocess your CSV data as described in the DATA SOURCES section above +2. Place the preprocessed data in a local directory (e.g., `./data/`) + +### Building and Running with Docker + +Build the Docker image: +```bash +docker build -t cluster-data-exporter . +``` + +Run with Docker (example for Google data): +```bash +docker run -v ./data:/data:ro -p 40000:40000 cluster-data-exporter \ + --input-directory /data \ + --port 40000 \ + google \ + --metrics mean_cpu_usage_rate,canonical_memory_usage \ + --all-parts +``` + +Run with Docker (example for Alibaba data): +```bash +docker run -v ./data:/data:ro -p 40000:40000 cluster-data-exporter \ + --input-directory /data \ + --port 40000 \ + alibaba \ + --data-type node \ + --data-year 2021 \ + --all-parts +``` + +### Using Docker Compose + +#### Automated Generation with Python Script + +The `scripts/generate_docker_compose.py` script automatically generates docker-compose.yml files from the frame templates and fill in certain fields. + +**Google Provider Example:** +```bash +python scripts/generate_docker_compose.py google --metrics mean_cpu_usage_rate,max_cpu_usage --port 8080 --input-dir ./data +``` + +**Alibaba Provider Example:** +```bash +python scripts/generate_docker_compose.py alibaba --data-type node --data-year 2021 --port 8080 --input-dir ./data +``` + +The script will: +- Validate your configuration options +- Generate a docker-compose.yml file with correct settings +- Update port mappings and volume mounts automatically + +#### Manual Setup with Frame Files + +Alternatively, the `docker_compose_frames/` directory contains pre-configured docker-compose files for different providers and configurations. These frame files will still require small edits before running docker-compose, see each frame file for more information. + +- **Google Provider**: `google-docker-compose.yml` - Edit list of metrics to export +- **Alibaba Provider**: Provider-specific frames for each data type and year combination: + - `alibaba-node-2021-docker-compose.yml` + - `alibaba-node-2022-docker-compose.yml` + - `alibaba-msresource-2021-docker-compose.yml` + - `alibaba-msresource-2022-docker-compose.yml` + +To use a frame file: +1. Copy the appropriate frame file from `docker_compose_frames/` to your working directory as `docker-compose.yml` +2. Edit the file with any options that still need to be filled in (marked with "CHANGE THIS" comments) +3. Run: `docker-compose up -d` + +### Data Volume Requirements + +- The container expects data to be mounted at `/data` +- Data must be preprocessed according to the instructions in the DATA SOURCES section +- For Alibaba data, ensure you've run the sorting and compression scripts before mounting +- Mount the volume as read-only (`:ro`) + +## METRICS ENDPOINT + +Once running, metrics are available at: +``` +http://localhost:/metrics +``` diff --git a/PrometheusExporters/cluster_data_exporter/docker-compose.yml.j2 b/PrometheusExporters/cluster_data_exporter/docker-compose.yml.j2 new file mode 100644 index 0000000..2587b52 --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/docker-compose.yml.j2 @@ -0,0 +1,41 @@ +# cluster_data_exporter Docker Compose Template +# This template is rendered with Jinja2 to generate the final docker-compose.yml + +{% if x_bake %} +x-bake: + - COMPOSE_BAKE=true + +{% endif %} +services: + cluster-data-exporter: + image: sketchdb-cluster-data-exporter:latest + container_name: {{ container_name | default('sketchdb-cluster-data-exporter') }} + volumes: + - {{ data_directory }}:/data:ro + ports: + - "{{ port }}:{{ port }}" + command: [ + "--input-directory","/data", + "--port","{{ port }}", + "{{ provider }}",{% if provider == "google" %} + "--metrics={{ metrics }}",{% if process_mode == "all-parts" %} + "--all-parts"{% else %} + "--part-index={{ part_index }}"{% endif %}{% elif provider == "alibaba" %} + "--data-type={{ data_type }}", + "--data-year={{ data_year }}",{% if process_mode == "all-parts" %} + "--all-parts"{% else %} + "--part-index={{ part_index }}"{% endif %}{% endif %} + ] + restart: unless-stopped +{% if memory_limit or memory_reservation %} + deploy: + resources: +{% if memory_limit %} + limits: + memory: {{ memory_limit }} +{% endif %} +{% if memory_reservation %} + reservations: + memory: {{ memory_reservation }} +{% endif %} +{% endif %} diff --git a/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-msresource-2021-docker-compose.yml b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-msresource-2021-docker-compose.yml new file mode 100644 index 0000000..683bbfa --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-msresource-2021-docker-compose.yml @@ -0,0 +1,31 @@ +x-bake: + - COMPOSE_BAKE=true + +services: + cluster-data-exporter: + build: . + container_name: cluster-data-exporter + volumes: + # CHANGE THIS: Replace './data' with the path to your Alibaba MsResource 2021 preprocessed csv.gz files + - ./data:/data:ro + ports: + # Map container port to host port - adjust as needed + - "40000:40000" + command: [ + "--input-directory", + "/data", + "--port", + "40000", + "alibaba", + "--data-type=ms-resource", + "--data-year=2021", + "--all-parts", # or "--part-index=" + ] + restart: unless-stopped + # Optional: set resource limits + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 512M diff --git a/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-msresource-2022-docker-compose.yml b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-msresource-2022-docker-compose.yml new file mode 100644 index 0000000..4bdf90e --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-msresource-2022-docker-compose.yml @@ -0,0 +1,32 @@ +x-bake: + - COMPOSE_BAKE=true + +services: + cluster-data-exporter: + build: . + container_name: cluster-data-exporter + volumes: + # CHANGE THIS: Replace './data' with the path to your Alibaba MSResource 2022 preprocessed csv.gz files + - ./data:/data:ro + ports: + # Map container port to host port - adjust as needed + - "40000:40000" + command: [ + "--input-directory", + "/data", + "--port", + "40000", + "alibaba", + "--data-type=ms-resource", + "--data-year=2022", + "--all-parts", # or "--part-index=" + + ] + restart: unless-stopped + # Optional: set resource limits + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 512M diff --git a/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-node-2021-docker-compose.yml b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-node-2021-docker-compose.yml new file mode 100644 index 0000000..2134943 --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-node-2021-docker-compose.yml @@ -0,0 +1,32 @@ +x-bake: + - COMPOSE_BAKE=true + +services: + cluster-data-exporter: + build: . + container_name: cluster-data-exporter + volumes: + # CHANGE THIS: Replace './data' with the path to your Alibaba Node 2021 preprocessed csv.gz files + - ./data:/data:ro + ports: + # Map container port to host port - adjust as needed + - "40000:40000" + command: [ + "--input-directory", + "/data", + "--port", + "40000", + "alibaba", + "--data-type=node", + "--data-year=2021", + "--all-parts", # or "--part-index=" + + ] + restart: unless-stopped + # Optional: set resource limits + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 512M diff --git a/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-node-2022-docker-compose.yml b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-node-2022-docker-compose.yml new file mode 100644 index 0000000..bbe7b7b --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/alibaba-node-2022-docker-compose.yml @@ -0,0 +1,32 @@ +x-bake: + - COMPOSE_BAKE=true + +services: + cluster-data-exporter: + build: . + container_name: cluster-data-exporter + volumes: + # CHANGE THIS: Replace './data' with the path to your Alibaba Node 2022 preprocessed csv.gz files + - ./data:/data:ro + ports: + # Map container port to host port - adjust as needed + - "40000:40000" + command: [ + "--input-directory", + "/data", + "--port", + "40000", + "alibaba", + "--data-type=node", + "--data-year=2022", + "--all-parts", # or "--part-index=" + + ] + restart: unless-stopped + # Optional: set resource limits + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 512M diff --git a/PrometheusExporters/cluster_data_exporter/docker_compose_frames/base-docker-compose.yml b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/base-docker-compose.yml new file mode 100644 index 0000000..e013892 --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/base-docker-compose.yml @@ -0,0 +1,39 @@ +x-bake: + - COMPOSE_BAKE=true + +services: + cluster-data-exporter: + build: . + container_name: cluster-data-exporter + volumes: + # Mount the local data directory to /data in the container + # Replace './data' with the path to your csv.gz files + - ./data:/data:ro + ports: + # Map container port to host port - adjust as needed + - "40000:40000" + command: [ + "--input-directory", + "/data", + "--port", + "40000", + # Add your provider-specific arguments here + # For Google data example: + # "google", + # "--metrics", "mean_cpu_usage_rate,canonical_memory_usage", + # "--all-parts" + # + # For Alibaba data example: + # "alibaba", + # "--data-type", "node", + # "--data-year", "2021", + # "--all-parts" + ] + restart: unless-stopped + # Optional: set resource limits + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 512M diff --git a/PrometheusExporters/cluster_data_exporter/docker_compose_frames/google-docker-compose.yml b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/google-docker-compose.yml new file mode 100644 index 0000000..22ce384 --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/docker_compose_frames/google-docker-compose.yml @@ -0,0 +1,32 @@ +x-bake: + - COMPOSE_BAKE=true + +services: + cluster-data-exporter: + build: . + container_name: cluster-data-exporter + volumes: + # CHANGE THIS: Replace './data' with the path to your Google csv.gz files + - ./data:/data:ro + ports: + # Map container port to host port - adjust as needed + - "40000:40000" + command: [ + "--input-directory", + "/data", + "--port", + "40000", + "google", + # CHANGE THIS: Replace with your desired metrics (comma-separated) + "--metrics=mean_cpu_usage_rate,canonical_memory_usage", + "--all-parts", # or "--part-index=" + + ] + restart: unless-stopped + # Optional: set resource limits + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 512M diff --git a/PrometheusExporters/cluster_data_exporter/installation/install.sh b/PrometheusExporters/cluster_data_exporter/installation/install.sh new file mode 100755 index 0000000..915c68b --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/installation/install.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -e + +THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") +PARENT_DIR=$(dirname "$THIS_DIR") + +echo "Building Cluster Data Exporter Docker image..." +cd "$PARENT_DIR" +docker build . -f Dockerfile -t sketchdb-cluster-data-exporter:latest + +echo "Cluster Data Exporter Docker image built successfully: sketchdb-cluster-data-exporter:latest" \ No newline at end of file diff --git a/PrometheusExporters/cluster_data_exporter/scripts/generate_docker_compose.py b/PrometheusExporters/cluster_data_exporter/scripts/generate_docker_compose.py new file mode 100644 index 0000000..38c32f9 --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/scripts/generate_docker_compose.py @@ -0,0 +1,253 @@ +""" +Script to generate docker-compose.yml files from frame templates based on data provider configuration. + +This script takes a data provider (google or alibaba) and provider-specific arguments, +then generates a docker-compose.yml file by copying and modifying the appropriate frame file. +""" + +import argparse +import sys +from pathlib import Path +from typing import Any, Dict, List, Optional + +import yaml + +# Valid values from Rust enums (CLI format with hyphens) +VALID_GOOGLE_METRICS = [ + "mean-cpu-usage-rate", + "canonical-memory-usage", + "assigned-memory-usage", + "unmapped-page-cache-memory-usage", + "total-page-cache-memory-usage", + "max-memory-usage", + "mean-disk-io-time", + "mean-local-disk-space-used", + "max-cpu-usage", + "max-disk-io-time", + "cycles-per-instruction", + "memory-accesses-per-instruction", + "sample-portion", + "sampled-cpu-usage", +] + +VALID_ALIBABA_DATA_TYPES = ["node", "msresource"] +VALID_ALIBABA_DATA_YEARS = [2021, 2022] + + +def validate_google_metrics(metrics: List[str]) -> None: + """Validate that all provided Google metrics are valid.""" + invalid_metrics = [m for m in metrics if m not in VALID_GOOGLE_METRICS] + if invalid_metrics: + print(f"Error: Invalid Google metrics: {', '.join(invalid_metrics)}") + print(f"Valid metrics: {', '.join(VALID_GOOGLE_METRICS)}") + sys.exit(1) + + +def validate_alibaba_args(data_type: str, data_year: int) -> None: + """Validate Alibaba data type and year arguments.""" + if data_type not in VALID_ALIBABA_DATA_TYPES: + print(f"Error: Invalid data type: {data_type}") + print(f"Valid data types: {', '.join(VALID_ALIBABA_DATA_TYPES)}") + sys.exit(1) + + if data_year not in VALID_ALIBABA_DATA_YEARS: + print(f"Error: Invalid data year: {data_year}") + print(f"Valid years: {', '.join(map(str, VALID_ALIBABA_DATA_YEARS))}") + sys.exit(1) + + +def get_frame_file_path( + provider: str, data_type: Optional[str] = None, data_year: Optional[int] = None +) -> Path: + """Get the path to the appropriate frame file based on provider and arguments.""" + frames_dir = Path("docker_compose_frames") + + if provider == "google": + return frames_dir / "google-docker-compose.yml" + elif provider == "alibaba": + return frames_dir / f"alibaba-{data_type}-{data_year}-docker-compose.yml" + else: + raise ValueError(f"Unknown provider: {provider}") + + +def load_yaml_file(file_path: Path) -> Dict[str, Any]: + """Load YAML file and return parsed content.""" + with open(file_path, "r") as f: + return yaml.safe_load(f) + + +def save_yaml_file(file_path: Path, data: Dict[str, Any]) -> None: + """Save data to YAML file.""" + with open(file_path, "w") as f: + yaml.dump(data, f, default_flow_style=False, sort_keys=False) + + +def update_command_arg(command: List[str], arg_name: str, new_value: str) -> List[str]: + """Update a command line argument in the command list.""" + updated_command = [] + i = 0 + while i < len(command): + if command[i] == arg_name: + updated_command.append(command[i]) + if i + 1 < len(command): + updated_command.append(new_value) + i += 2 + else: + updated_command.append(new_value) + i += 1 + elif command[i].startswith(f"{arg_name}="): + updated_command.append(f"{arg_name}={new_value}") + i += 1 + else: + updated_command.append(command[i]) + i += 1 + return updated_command + + +def generate_google_compose( + metrics: List[str], port: Optional[int], input_dir: Optional[str] +) -> None: + """Generate docker-compose.yml for Google provider.""" + frame_file = get_frame_file_path("google") + output_file = Path("docker-compose.yml") + + # Load frame file + compose_data = load_yaml_file(frame_file) + + # Update metrics + metrics_str = ",".join(metrics) + service = compose_data["services"]["cluster-data-exporter"] + command = service["command"] + + # Find and update metrics argument + for i, arg in enumerate(command): + if arg.startswith("--metrics="): + command[i] = f"--metrics={metrics_str}" + break + + # Update optional arguments if provided + if port is not None: + # Update port mapping + service["ports"] = [f"{port}:{port}"] + # Update port in command + command = update_command_arg(command, "--port", str(port)) + service["command"] = command + + if input_dir is not None: + # Update volume mapping + service["volumes"] = [f"{input_dir}:/data:ro"] + + # Save updated compose file + save_yaml_file(output_file, compose_data) + + +def generate_alibaba_compose( + data_type: str, + data_year: int, + port: Optional[int], + input_dir: Optional[str], + speedup: Optional[int], +) -> None: + """Generate docker-compose.yml for Alibaba provider.""" + frame_file = get_frame_file_path("alibaba", data_type, data_year) + output_file = Path("docker-compose.yml") + + # Load frame file + compose_data = load_yaml_file(frame_file) + + service = compose_data["services"]["cluster-data-exporter"] + command = service["command"] + + # Update optional arguments if provided + if port is not None: + # Update port mapping + service["ports"] = [f"{port}:{port}"] + # Update port in command + command = update_command_arg(command, "--port", str(port)) + service["command"] = command + + if input_dir is not None: + # Update volume mapping + service["volumes"] = [f"{input_dir}:/data:ro"] + + # Add speedup if specified + if speedup is not None: + if "--speedup" not in " ".join(command): + command.append(f"--speedup={speedup}") + else: + command = update_command_arg(command, "--speedup", str(speedup)) + service["command"] = command + + # Save updated compose file + save_yaml_file(output_file, compose_data) + + +def main(): + parser = argparse.ArgumentParser( + description="Generate docker-compose.yml from frame files based on data provider configuration", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Google provider with specific metrics + python scripts/generate_docker_compose.py google --metrics mean_cpu_usage_rate,max_cpu_usage --port 8080 + + # Alibaba provider with node data from 2021 + python scripts/generate_docker_compose.py alibaba --data-type node --data-year 2021 --port 8080 + + # With custom input directory + python scripts/generate_docker_compose.py google --metrics canonical_memory_usage --input-dir /path/to/data + """, + ) + + parser.add_argument("provider", choices=["google", "alibaba"], help="Data provider") + parser.add_argument("--port", type=int, help="Port number for the HTTP server") + parser.add_argument("--input-dir", "--input-directory", help="Input directory path") + + # Google-specific arguments + google_group = parser.add_argument_group("Google provider arguments") + google_group.add_argument( + "--metrics", type=str, help="Comma-separated list of metrics to export" + ) + + # Alibaba-specific arguments + alibaba_group = parser.add_argument_group("Alibaba provider arguments") + alibaba_group.add_argument( + "--data-type", choices=VALID_ALIBABA_DATA_TYPES, help="Type of data to export" + ) + alibaba_group.add_argument( + "--data-year", + type=int, + choices=VALID_ALIBABA_DATA_YEARS, + help="Year of the dataset", + ) + alibaba_group.add_argument( + "--speedup", + type=int, + help="Speedup factor for faster-than-realtime export (1=real-time, 10=10x faster)", + ) + + args = parser.parse_args() + + # Validate provider-specific required arguments + if args.provider == "google": + if not args.metrics: + parser.error("Google provider requires --metrics argument") + metrics_list = [m.strip() for m in args.metrics.split(",")] + validate_google_metrics(metrics_list) + generate_google_compose(metrics_list, args.port, args.input_dir) + + elif args.provider == "alibaba": + if not args.data_type: + parser.error("Alibaba provider requires --data-type argument") + if not args.data_year: + parser.error("Alibaba provider requires --data-year argument") + validate_alibaba_args(args.data_type, args.data_year) + generate_alibaba_compose( + args.data_type, args.data_year, args.port, args.input_dir, args.speedup + ) + + print(f"Generated docker-compose.yml for {args.provider} provider") + + +if __name__ == "__main__": + main() diff --git a/PrometheusExporters/cluster_data_exporter/scripts/requirements.txt b/PrometheusExporters/cluster_data_exporter/scripts/requirements.txt new file mode 100644 index 0000000..5fde258 --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/scripts/requirements.txt @@ -0,0 +1,2 @@ +PyYAML==6.0.2 +types-PyYAML==6.0.12.20250516 diff --git a/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics.rs b/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics.rs new file mode 100644 index 0000000..98bc4ec --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics.rs @@ -0,0 +1,71 @@ +use clap::ValueEnum; +use std::sync::OnceLock; + +type BoxedErr = Box; + +// Speedup factor for faster-than-realtime export (set via CLI) +pub static SPEEDUP_FACTOR: OnceLock = OnceLock::new(); + +#[derive(Copy, Clone, Debug, ValueEnum)] +pub enum MsDataType { + // BM Node runtime information. + // It records CPU and memory utilization of 1300+ BM nodes in a production cluster. + Node, + // MS runtime information. + // It records CPU and memory utilization of 90000+ containers for 1300+ MSs in the same production cluster. + MsResource, +} + +pub mod ms_resource; +pub mod node; + +// The type of microservice data to export. Should be initialized before any +// reading or exporting begins +pub static EXPORTER_DATA_TYPE: OnceLock = OnceLock::new(); + +/// @brief Calls the export_from_queue() function based on runtime initialized +/// EXPORTER_DATA_TYPE +pub fn export_from_queue() { + match EXPORTER_DATA_TYPE.get().unwrap() { + MsDataType::Node => node::export_from_queue(), + MsDataType::MsResource => ms_resource::export_from_queue(), + } +} + +/// @brief Main routine for the thread that will be reading csv data and +/// exporting. This function just uses a match statement to call the reading +/// and exporting routine required by the specified mode +/// +/// @param[in] input_dir The input directory containing csv files +/// @param[in] all_parts Whether to start from part 0 of csv files and continue +/// until no more files are found. This should be false if +/// part_index is Some(part) +/// @param[in] part_index Which csv file part to use as the data source. +/// This should be None if all_parts is true. +/// @param[in] data_type The type of data out of the different types of trace +/// data in the Alibaba micro-services trace data +/// @param[in] data_year The year of the trace data. Supported values are +/// 2021 and 2022 +/// @param[in] speedup Speedup factor for faster-than-realtime export +/// +/// @return The result returned by the reader thread. +pub fn reader_thread_routine( + input_dir: String, + all_parts: bool, + part_index: Option, + data_type: MsDataType, + data_year: u32, + speedup: u64, +) -> Result<(), BoxedErr> { + use crate::alibaba_metrics::node; + let _ = EXPORTER_DATA_TYPE.set(data_type); + let _ = SPEEDUP_FACTOR.set(speedup); + let result = match EXPORTER_DATA_TYPE.get().unwrap() { + MsDataType::Node => node::read_and_queue(&input_dir, all_parts, part_index, data_year), + MsDataType::MsResource => { + ms_resource::read_and_queue(&input_dir, all_parts, part_index, data_year) + } + }; + + result +} diff --git a/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics/ms_resource.rs b/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics/ms_resource.rs new file mode 100644 index 0000000..cb0e91b --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics/ms_resource.rs @@ -0,0 +1,248 @@ +use crate::utilities; +pub use concurrent_queue::ConcurrentQueue; +use csv::Reader; +use flate2::read::GzDecoder; +use lazy_static::lazy_static; +use prometheus::{register_gauge_vec, GaugeVec}; +use std::fs::File; +use std::io::BufReader; +use std::thread; +use std::time::Duration; +use tracing::{debug, info}; + +const FILENAME_PARTS_2021: [&str; 2] = ["MSResource_", ".csv.gz"]; +const FILENAME_PARTS_2022: [&str; 2] = ["MSMetrics_", ".csv.gz"]; + +const DATA_QUEUE_CAP: usize = 400_000; +const QUEUE_POLL_INTERVAL_MS: u64 = 250; +const CSV_DELIMITER: u8 = b','; +const LABELS: [&str; 3] = ["ms_name", "ms_instance_id", "node_id"]; + +type CsvGzReader = Reader>>; +type BoxedErr = Box; +/// Struct for holding fields after deserialization +/// for both 2021 and 2022 +#[derive(Debug, serde::Deserialize)] +pub struct MsResourceCsvFields { + #[serde(rename = "", skip)] + _trace: u64, + + #[serde(rename = "timestamp")] + timestamp: u64, + + #[serde(rename = "nodeid")] + node_id: String, + + #[serde(rename = "msname")] + ms_name: String, + + #[serde(rename = "msinstanceid")] + ms_instance_id: String, + + #[serde(alias = "instance_cpu_usage", alias = "cpu_utilization")] + cpu_usage: Option, + + #[serde(alias = "instance_memory_usage", alias = "memory_utilization")] + memory_usage: Option, +} + +lazy_static! { + pub static ref MS_RESOURCE_DATA_QUEUE: ConcurrentQueue = + ConcurrentQueue::bounded(DATA_QUEUE_CAP); + pub static ref CPU_USAGE: GaugeVec = register_gauge_vec!( + "alibaba_microservice_cpu_usage", + "Cpu usages for microservices by alibaba nodes", + &LABELS, + ) + .unwrap(); + pub static ref MEMORY_USAGE: GaugeVec = register_gauge_vec!( + "alibaba_microservice_memory_usage", + "Memory usages for microservices by alibaba nodes", + &LABELS, + ) + .unwrap(); +} + +/// @brief Gets the filename for the MsResource csv data based on the year +/// and the index number +/// +/// @param[in] year The year of the trace data. Supported values are 2021 +/// and 2022 +/// @param[in] index_no The index of the csv file +/// +/// @return A String of the filename based on the data year and index num +fn get_filename(year: u32, index_no: u16) -> String { + let mut filename: String = String::new(); + let prefix: &str; + let suffix: &str; + let index: &str = &format!("{}", index_no); + + match year { + 2021 => { + prefix = FILENAME_PARTS_2021[0]; + suffix = FILENAME_PARTS_2021[1]; + } + 2022 => { + prefix = FILENAME_PARTS_2022[0]; + suffix = FILENAME_PARTS_2022[1]; + } + _ => { + panic!("Invalid year: {}", year); + } + } + filename.push_str(prefix); + filename.push_str(index); + filename.push_str(suffix); + + filename +} + +/// @brief Gets a csv reader for MsResource data +/// +/// @param[in] input_dir The directory containing the csv file +/// @param[in] year Which trace data year to create the reader for. +/// supported years are 2021 and 2022 +/// @param[in] index The index of the csv file +/// +/// @return A Result type containing either the reader or an Error if the file +/// cannot be found +pub fn get_reader(input_dir: &str, year: u32, index: u16) -> Result, BoxedErr> { + use csv::ReaderBuilder; + use std::path::Path; + + let filename: String = get_filename(year, index); + let file_path = Path::new(input_dir).join(&filename); + let fd: File = File::open(file_path)?; + let buf_rdr: BufReader = BufReader::new(fd); + let gz_decoder: GzDecoder> = GzDecoder::new(buf_rdr); + + let csv_rdr: CsvGzReader = ReaderBuilder::new() + .delimiter(CSV_DELIMITER) + .flexible(true) + .has_headers(true) + .from_reader(gz_decoder); + + Ok(csv_rdr) +} + +/// @brief Routine for reading MSResource csv data and enqueuing it +/// +/// @param[in] input_dir The input directory containing the csv file +/// @param[in] all_parts Whether or not to read all csv files in the +/// directory, starting from part 0. Once a file +/// cannot be found, this will return. This should +/// be false if a part_index is given. +/// @param[in] part_index The part index for a single csv file to use as +/// the data source. This should be None if all_parts +/// is true. +/// @param[in] year The year of the trace data. Supported values are +/// 2021 and 2022 +/// +/// @pre All csv files are uncompressed +/// @pre If all_parts is specified, at least part 0 must exist +/// @pre Either all_parts is true and part_index is None, or all_parts is +/// false and part_index is Some(part) +pub fn read_and_queue( + input_dir: &str, + all_parts: bool, + part_index: Option, + year: u32, +) -> Result<(), BoxedErr> { + let mut part: u16 = 0; + if !all_parts { + part = part_index.unwrap(); + } + + while let Ok(mut rdr) = get_reader(input_dir, year, part) { + let csv_iter = rdr.deserialize(); + for csv_line in csv_iter { + while MS_RESOURCE_DATA_QUEUE.is_full() { + thread::sleep(Duration::from_millis(QUEUE_POLL_INTERVAL_MS)); + } + let parsed_line: MsResourceCsvFields = csv_line?; + let _ = MS_RESOURCE_DATA_QUEUE.push(parsed_line); + } + part += 1; + if !all_parts { + break; + } + } // No more files to read, or couldn't find initial file + + if part == 0 { + // Reading always starts at part 0 + panic!( + "Failed to read initial .csv.gz file. Check that all data files + are named in the correct format (2021: '{}{}', 2022: '{}{}), + and that the csv files contian the field headers at the top + ", + FILENAME_PARTS_2021[0], + FILENAME_PARTS_2021[1], + FILENAME_PARTS_2022[0], + FILENAME_PARTS_2022[1] + ); + } else { + MS_RESOURCE_DATA_QUEUE.close(); + Ok(()) + } +} + +/// @brief Takes the timestamp of a trace in milliseconds and +/// returns the normalized time as a Duration +/// +/// @param[in] time_millis The trace timestamp in milliseconds +/// +/// @return The normalized timestamp as a Duration +/// +/// @NOTE: Brief check of data suggests no dilation is necessary +/// +/// @NOTE: MSResource data from 2022 is not sorted by timestamp whatsoever, +/// sometimes the data is listed in order of decreasing timestamp and other +/// times it's listed in order of increasing timestamp, so the timestamps +/// are modified to work with the exporter before being queued +/// +/// @NOTE: SPEEDUP_FACTOR can be set via --speedup CLI argument for faster-than-realtime export +pub fn get_normalized_start_time(time_millis: u64) -> Duration { + let speedup = crate::alibaba_metrics::SPEEDUP_FACTOR.get().unwrap_or(&1); + Duration::from_millis(time_millis / speedup) +} + +/// @brief Exports a single line from the MS_RESOURCE_DATA_QUEUE +/// +/// @param[in] csv_line A parsed line from a MsResource csv file +pub fn export_line(csv_line: MsResourceCsvFields) { + let label_vals: [&str; 3] = [ + csv_line.ms_name.as_str(), + csv_line.ms_instance_id.as_str(), + csv_line.node_id.as_str(), + ]; + + if let Some(cpu_usage) = csv_line.cpu_usage { + CPU_USAGE.with_label_values(&label_vals).set(cpu_usage); + } + + if let Some(memory_usage) = csv_line.memory_usage { + MEMORY_USAGE + .with_label_values(&label_vals) + .set(memory_usage); + } +} + +/// @brief Exports lines from the queue until a line is found with a timestamp +/// later than the current runtime. This function will terminate the +/// the program once the queue has both been closed by the reader thread +/// and the queue is empty +pub fn export_from_queue() { + let elapsed_t: Duration = utilities::get_time_elapsed(); + let check_time = + |line: &MsResourceCsvFields| get_normalized_start_time(line.timestamp) <= elapsed_t; + MS_RESOURCE_DATA_QUEUE + .try_iter() + .take_while(check_time) + .for_each(export_line); + + // No more files to read and empty queue + if MS_RESOURCE_DATA_QUEUE.is_closed() && MS_RESOURCE_DATA_QUEUE.is_empty() { + info!("No more MSResource data to export, shutting down"); + std::process::exit(0); + } +} diff --git a/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics/node.rs b/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics/node.rs new file mode 100644 index 0000000..e1bf60e --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/src/alibaba_metrics/node.rs @@ -0,0 +1,232 @@ +use crate::utilities; +use concurrent_queue::ConcurrentQueue; +use csv::{Reader, ReaderBuilder}; +use flate2::read::GzDecoder; +use lazy_static::lazy_static; +use prometheus::{register_gauge_vec, GaugeVec}; +use std::fs::File; +use std::io::BufReader; +use std::path::Path; +use std::thread; +use std::time::Duration; +use tracing::{debug, info}; + +type BoxedErr = Box; +type CsvGzReader = Reader>>; + +const FILENAME_PARTS_2021: [&str; 2] = ["Node_", ".csv.gz"]; +const FILENAME_PARTS_2022: [&str; 2] = ["NodeMetrics_", ".csv.gz"]; + +const DATA_QUEUE_CAP: usize = 400_000; +const QUEUE_POLL_INTERVAL_MS: u64 = 250; +const CSV_DELIMITER: u8 = b','; + +const LABELS: [&str; 1] = ["node_id"]; + +/// Struct for holding fields after deserialization +#[derive(Debug, serde::Deserialize)] +pub struct NodeCsvFields { + #[serde(rename = "", skip)] + _trace: u64, + + #[serde(rename = "timestamp")] + timestamp: u64, + + #[serde(rename = "nodeid")] + node_id: String, + + #[serde(alias = "node_cpu_usage", alias = "cpu_utilization")] + cpu_usage: Option, + + #[serde(alias = "node_memory_usage", alias = "memory_utilization")] + memory_usage: Option, +} + +lazy_static! { + pub static ref NODE_DATA_QUEUE: ConcurrentQueue = + ConcurrentQueue::bounded(DATA_QUEUE_CAP); + pub static ref CPU_USAGE: GaugeVec = register_gauge_vec!( + "alibaba_node_cpu_usage", + "Cpu usages by alibaba nodes", + &LABELS, + ) + .unwrap(); + pub static ref MEMORY_USAGE: GaugeVec = register_gauge_vec!( + "alibaba_node_memory_usage", + "Memory usages by alibaba nodes", + &LABELS, + ) + .unwrap(); +} + +/// @brief Gets the filename for the Node_.csv.gz data based on the year +/// and the index number +/// +/// @param[in] year The year of the trace data. Supported values are 2021 +/// and 2022 +/// @param[in] index_no The index of the csv file +/// +/// @return A String of the filename based on the data year and index num +fn get_filename(year: u32, index_no: u16) -> String { + let mut filename: String = String::new(); + let prefix: &str; + let suffix: &str; + let index: &str = &format!("{}", index_no); + + match year { + 2021 => { + prefix = FILENAME_PARTS_2021[0]; + suffix = FILENAME_PARTS_2021[1]; + } + 2022 => { + prefix = FILENAME_PARTS_2022[0]; + suffix = FILENAME_PARTS_2022[1]; + } + _ => { + panic!("Invalid year: {}", year); + } + } + filename.push_str(prefix); + filename.push_str(index); + filename.push_str(suffix); + + filename +} + +/// @brief Gets a csv reader for Node data +/// +/// @param[in] input_dir The directory containing the csv file +/// @param[in] year Which trace data year to create the reader for. +/// supported years are 2021 and 2022 +/// +/// @return A reader for the .csv.gz files +/// +/// @pre All files should have been converted to a .csv.gz format from the +/// .tar.gz format that they come as initially. +pub fn get_reader( + input_dir: &str, + year: u32, + index_no: u16, +) -> Result, BoxedErr> { + let filename = get_filename(year, index_no); + let file_path = Path::new(input_dir).join(&filename); + let fd: File = File::open(file_path)?; + let buf_rdr: BufReader = BufReader::new(fd); + let gz_decoder: GzDecoder> = GzDecoder::new(buf_rdr); + + let csv_rdr: CsvGzReader = ReaderBuilder::new() + .delimiter(CSV_DELIMITER) + .flexible(true) + .has_headers(true) + .from_reader(gz_decoder); + + Ok(csv_rdr) +} + +/// @brief Takes the timestamp of a trace in milliseconds and +/// returns the normalized time as a Duration +/// +/// @param[in] time_millis The trace timestamp in milliseconds +/// +/// @return The normalized timestamp as a Duration +/// +/// @NOTE: Brief check of data suggests no dilation is necessary +/// +/// @NOTE: Node data from 2022 is not sorted by timestamp whatsoever, +/// sometimes the data is listed in order of decreasing timestamp and other +/// times it's listed in order of increasing timestamp, so the timestamps +/// are modified to work with the exporter before being queued +/// +/// @NOTE: SPEEDUP_FACTOR can be set via --speedup CLI argument for faster-than-realtime export +pub fn get_normalized_start_time(time_millis: u64) -> Duration { + let speedup = crate::alibaba_metrics::SPEEDUP_FACTOR.get().unwrap_or(&1); + Duration::from_millis(time_millis / speedup) +} + +/// @brief Reads the csv data from .csv.gz files and adds them to the queue. +/// +/// @param[in] input_dir The input directory +/// @param[in] data_year The year of the trace data +/// +/// @pre All csv data should have been sorted by timestamp and compressed with +/// gzip +pub fn read_and_queue( + input_dir: &str, + all_parts: bool, + part_index: Option, + data_year: u32, +) -> Result<(), BoxedErr> { + let mut part: u16 = 0; + if !all_parts { + part = part_index.unwrap(); + } + + while let Ok(mut rdr) = get_reader(input_dir, data_year, part) { + let csv_iter = rdr.deserialize(); + for csv_line in csv_iter { + while NODE_DATA_QUEUE.is_full() { + thread::sleep(Duration::from_millis(QUEUE_POLL_INTERVAL_MS)); + } + let parsed_line: NodeCsvFields = csv_line?; + let _ = NODE_DATA_QUEUE.push(parsed_line); + } // EOF + part += 1; + + if !all_parts { + break; + } + } // No more files to read, or couldn't find initial file + + if part == 0 { + // Reading always starts at part 0 + panic!( + "Failed to read initial .csv.gz file. Check that all data files + are named in the correct format (2021: '{}{}', 2022: '{}{}), + and that the csv files contain the field headers at the top. + ", + FILENAME_PARTS_2021[0], + FILENAME_PARTS_2021[1], + FILENAME_PARTS_2022[0], + FILENAME_PARTS_2022[1] + ); + } else { + NODE_DATA_QUEUE.close(); + Ok(()) + } +} + +/// @brief Exports a single line from the NODE_DATA_QUEUE +/// +/// @param[in] csv_line A parsed line from a Node csv file +pub fn export_line(csv_line: NodeCsvFields) { + let label_vals: [&str; 1] = [csv_line.node_id.as_str()]; + + if let Some(cpu_usage) = csv_line.cpu_usage { + CPU_USAGE.with_label_values(&label_vals).set(cpu_usage); + } + + if let Some(memory_usage) = csv_line.memory_usage { + MEMORY_USAGE + .with_label_values(&label_vals) + .set(memory_usage); + } +} + +/// @brief Exports lines from the queue until a line is found with a timestamp +/// later than the current runtime. This function will terminate the +/// the program once the queue has both been closed by the reader thread +/// and the queue is empty +pub fn export_from_queue() { + let elapsed_t: Duration = utilities::get_time_elapsed(); + let check_time = |line: &NodeCsvFields| get_normalized_start_time(line.timestamp) <= elapsed_t; + NODE_DATA_QUEUE + .try_iter() + .take_while(check_time) + .for_each(export_line); + + // No more files to read and empty queue + if NODE_DATA_QUEUE.is_closed() && NODE_DATA_QUEUE.is_empty() { + info!("No more Node data to export, shutting down"); + std::process::exit(0); + } +} diff --git a/PrometheusExporters/cluster_data_exporter/src/google_metrics.rs b/PrometheusExporters/cluster_data_exporter/src/google_metrics.rs new file mode 100644 index 0000000..2fefed6 --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/src/google_metrics.rs @@ -0,0 +1,452 @@ +use crate::utilities; +use crate::utilities::*; +use clap::ValueEnum; +use concurrent_queue::ConcurrentQueue; +use csv::Reader; +use flate2::read::GzDecoder; +use lazy_static::lazy_static; +use prometheus::{register_gauge_vec, GaugeVec}; +use std::sync::OnceLock; +use std::thread; +use std::time::Duration; +use std::{fs::File, io::BufReader}; +use tracing::{debug, info}; + +type CsvGzReader = Reader>>; + +/* Standard labels for google's task resource usage data */ +const TRU_LABELS: [&str; 3] = ["job_id", "task_index", "machine_id"]; +const TRU_CSV_DELIMITER: u8 = b','; +const DATA_QUEUE_CAP: usize = 400_000; // Max lines in the queue +const CSV_MAX_PART_NO: u16 = 500; + +const MICRO_SECONDS_PER_SECOND: u64 = 1_000_000; +const T_OFFSET_SECS: u64 = 600; +const DILATION_FACTOR: u64 = 10; // Factor for scaling time stamps relative to when they are exported + +/// Each line of the csv file is serialized into the following struct. +/// The ordering of the struct fields MUST match the order that fields +/// appear in a line of the csv file. +/// +/// All fields wrapped in Option<> are not considered mandatory by +/// the schema and, therefore, may be missing from a given trace. +/// The rest of the fields should never be missing, so failure to +/// deserialize will result in an error and program termination +#[derive(Debug, serde::Deserialize)] +pub struct TruCsvFields { + pub start_time: u64, + pub _end_time: u64, // unused, only here for parsing + pub job_id: String, // label + pub task_index: String, // label + pub machine_id: String, // label + pub mean_cpu_usage_rate: Option, + pub canonical_memory_usage: Option, + pub assigned_memory_usage: Option, + pub unmapped_page_cache_memory_usage: Option, + pub total_page_cache_memory_usage: Option, + pub max_memory_usage: Option, + pub mean_disk_io_time: Option, + pub mean_local_disk_space_used: Option, + pub max_cpu_usage: Option, + pub max_disk_io_time: Option, + pub cycles_per_instruction: Option, + pub memory_accesses_per_instruction: Option, + pub sample_portion: Option, + pub aggregation_type: Option, // Divides metrics into two + pub sampled_cpu_usage: Option, +} + +/// @brief An enum for matching the metrics to export with their +/// corresponding prometheus gauges +#[derive(Copy, Clone, Debug, ValueEnum)] +pub enum TruMetrics { + MeanCpuUsageRate, + CanonicalMemoryUsage, + AssignedMemoryUsage, + UnmappedPageCacheMemoryUsage, + TotalPageCacheMemoryUsage, + MaxMemoryUsage, + MeanDiskIoTime, + MeanLocalDiskSpaceUsed, + MaxCpuUsage, + MaxDiskIoTime, + CyclesPerInstruction, + MemoryAccessesPerInstruction, + SamplePortion, + SampledCpuUsage, +} + +/// @brief A tuple struct representing two of the same prometheus metrics, +/// but partitioned by their aggregation type. Index number directly +/// corresponds to the aggregation type, i.e. i=0 => aggregation_type=0 +pub struct GaugePair(GaugeVec, GaugeVec); + +impl GaugePair { + /// @brief Create and register both GaugeVecs in the GaugePair to the + /// default registry. + /// + /// @param[in] base_name The string used as the base of both metrics + /// names as seen by prometheus, where aggregation type will be appended + /// + /// @param[in] base_help The string used as the base of both metrics + /// help strings when scraped by prometheus. Aggregation type is + /// appended + fn new(base_name: &str, base_help: &str) -> GaugePair { + let mut name_0 = String::from(base_name); + name_0.push_str("_0"); + let mut help_0 = String::from(base_help); + help_0.push_str(" (aggregation_type=0)"); + let gauge_0 = register_gauge_vec!(name_0.as_str(), help_0.as_str(), &TRU_LABELS).unwrap(); + + let mut name_1 = String::from(base_name); + name_1.push_str("_1"); + let mut help_1 = String::from(base_help); + help_1.push_str(" (aggregation_type=1)"); + let gauge_1 = register_gauge_vec!(name_1.as_str(), help_1.as_str(), &TRU_LABELS).unwrap(); + + GaugePair(gauge_0, gauge_1) + } + + /// @brief Retrieve a static reference to the gauge from the pair for + /// the given aggregation type + /// + /// @param[in] self Statically defined GaugePair + /// @param[in] aggregation_type 0 or 1 (The aggregation type) + fn get(&'static self, aggregation_type: u8) -> &'static GaugeVec { + match aggregation_type { + 0 => &self.0, + 1 => &self.1, + _ => panic!("Invalid index into gauge vec"), + } + } +} + +/// List of metrics to export from the google task resource usage data +pub static GOOGLE_METRICS: OnceLock> = OnceLock::new(); + +lazy_static! { + /// Queue for parsed csv lines + pub static ref GOOGLE_DATA_QUEUE: ConcurrentQueue = ConcurrentQueue::bounded(DATA_QUEUE_CAP); + + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * ALL METRICS * + * * + * Each static reference is a GaugePair corresponding to a single * + * metric. Each element of the pair corresponds to an aggregation * + * type of 0 or 1. When the aggregation type is missing from a * + * trace the aggregation type defaults to 0 * + * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + pub static ref MEAN_CPU_USAGE_RATE_PAIR: GaugePair = GaugePair::new( + "google_mean_cpu_usage_rate", "Mean cpu usage rate by google machines", + ); + + pub static ref CANONICAL_MEMORY_USAGE_PAIR: GaugePair = GaugePair::new( + "google_canonical_memory_usage", "Canonical memory usage by google cluster machines", + ); + + pub static ref ASSIGNED_MEMORY_USAGE_PAIR: GaugePair = GaugePair::new( + "google_assigned_memory_usage", "Assigned memory usage for google cluster machines", + ); + + pub static ref UNMAPPED_PAGE_CACHE_MEMORY_USAGE_PAIR: GaugePair = GaugePair::new( + "google_unmapped_page_cache_memory_usage", "Unmapped page cache memory usage for google cluster machines", + ); + + pub static ref TOTAL_PAGE_CACHE_MEMORY_USAGE_PAIR: GaugePair = GaugePair::new( + "google_total_page_cache_memory_usage", "Total page cache memory usage for google cluster machines", + ); + + pub static ref MAX_MEMORY_USAGE_PAIR: GaugePair = GaugePair::new( + "google_max_memory_usage", "Maximum memory usage by google cluster machines", + ); + + pub static ref MEAN_DISK_IO_TIME_PAIR: GaugePair = GaugePair::new( + "google_mean_disk_io_time", "Mean disk I/O time for google cluster machines", + ); + + pub static ref MEAN_LOCAL_DISK_SPACE_USED_PAIR: GaugePair = GaugePair::new( + "google_mean_local_disk_space_used", "Mean local disk space used by google cluster machines", + ); + + pub static ref MAX_CPU_USAGE_PAIR: GaugePair = GaugePair::new( + "google_max_cpu_usage", "Maximum cpu usage for google cluster machines", + ); + + pub static ref MAX_DISK_IO_TIME_PAIR: GaugePair = GaugePair::new( + "google_max_disk_io_time", "Maximum disk I/O time for google cluster machines", + ); + + pub static ref CYCLES_PER_INSTRUCTION_PAIR: GaugePair = GaugePair::new( + "google_cycles_per_instruction", "Cycles per instruction for google cluster machines", + ); + + pub static ref MEMORY_ACCESSES_PER_INSTRUCTION_PAIR: GaugePair = GaugePair::new( + "google_memory_accesses_per_instruction", "Memory accesses per instruction for google cluster machines", + ); + + pub static ref SAMPLE_PORTION_PAIR: GaugePair = GaugePair::new( + "google_sample_portion", "Sample portion for google cluster machines", + ); + + pub static ref SAMPLED_CPU_USAGE_PAIR: GaugePair = GaugePair::new( + "google_sampled_cpu_usage", "Sampled cpu usage for google cluster machines", + ); +} + +/// @brief Given the part number, create a String for the filename. +/// +/// @param[in] part The csv part number such that: part ∈ [0, 500] +/// @param[in] gzipped Whether or not .gz should be appended to the filename +/// +/// @return The csv filename as a String, in the form: +/// +pub fn get_csv_filename(part: u16, gzipped: bool) -> String { + const TRU_CSV_PATH_PARTS: [&str; 4] = ["part-", "00000", "-of-00500.csv", ".gz"]; + + let mut filename = String::new(); + let part_name_str: String; + + if part < 10 { + part_name_str = format!("0000{}", part); + } else if (10..100).contains(&part) { + part_name_str = format!("000{}", part); + } else if (100..=CSV_MAX_PART_NO).contains(&part) { + part_name_str = format!("00{}", part); + } else { + panic!( + "Invalid part number: {} => part must be between 0 and 500", + part + ); + } + + filename.push_str(TRU_CSV_PATH_PARTS[0]); + filename.push_str(&part_name_str); + filename.push_str(TRU_CSV_PATH_PARTS[2]); + + if gzipped { + filename.push_str(TRU_CSV_PATH_PARTS[3]); + } + + filename +} + +/// @brief Creates a new csv reader wrapped around a gzip decoder which +/// streams data from the underlying file +/// +/// @param[in] input_dir The directory containing gzipped csv files +/// @param[in] part The part number out of the total number of csv files +/// +/// @return The configured reader +fn get_reader(input_dir: &str, part: u16) -> Result, BoxedErr> { + use csv::ReaderBuilder; + use flate2::read::GzDecoder; + use std::fs::File; + use std::io::BufReader; + use std::path::Path; + + let filename: String = get_csv_filename(part, true); + let file_path = Path::new(input_dir).join(&filename); + let fd: File = File::open(file_path)?; + let buf_rdr = BufReader::new(fd); + let gz_decoder = GzDecoder::new(buf_rdr); + + let csv_rdr: CsvGzReader = ReaderBuilder::new() + .delimiter(TRU_CSV_DELIMITER) + .flexible(true) + .has_headers(false) + .from_reader(gz_decoder); + + Ok(csv_rdr) +} + +/// @brief Main routine of the helper (reader) thread. +/// +/// The purpose of the thread is to handle all of the work involved in +/// reading and enqueuing lines from the csv.gz file for the +/// main thread to then pop and export on scrape +/// +/// @param[in] input_dir The path to the directory containing the csv.gz files +/// @param[in] all_parts Whether or not to run the exporter on all 500 parts of +/// the task resource usage csv data. Running in this mode +/// and not providing all 500 parts will cause the reader +/// thread to panic. If this option is true, part_index +/// should be None +/// @param[in] part_index Specify a single part (out of 500) to read csv data +/// from. The reader thread will stop after reading this +/// single file. If part_index is not None, then all_parts +/// should be false +/// @param[in] metrics The list of metrics, or csv fields, for the exporter +/// to expose to prometheus. At least one must be given +/// +/// @pre All csv files are expected to be of the form: +/// "part-00xxx-of-00500.csv.gz" +pub fn reader_thread_routine( + input_dir: String, + all_parts: bool, + part_index: Option, + metrics: Vec, +) -> Result<(), BoxedErr> { + const QUEUE_POLL_INTERVAL_MS: u64 = 250; + GOOGLE_METRICS.set(metrics).unwrap(); + let mut part: u16 = 0_u16; + + if !all_parts { + part = part_index.unwrap(); + } + + while let Ok(mut rdr) = get_reader(&input_dir, part) { + let csv_iter = rdr.deserialize(); + for csv_line in csv_iter { + while GOOGLE_DATA_QUEUE.is_full() { + thread::sleep(Duration::from_millis(QUEUE_POLL_INTERVAL_MS)); + } + let parsed_line: TruCsvFields = csv_line?; + let _ = GOOGLE_DATA_QUEUE.push(parsed_line); + } + part += 1; + + if !all_parts || part > CSV_MAX_PART_NO { + break; + } + } + + // Never read any parts or all parts was specified and we never read all 500 + // parts of the csv data + if part == 0 || (all_parts && part <= CSV_MAX_PART_NO) { + panic!( + "Failed to read initial .csv.gz file. Check that all data files + are named in the correct format ('part-?????-of-00500.csv.gz'). + If running with --all-parts, ensure all 500 parts exist in the + input directory. + " + ); + } else { + GOOGLE_DATA_QUEUE.close(); + Ok(()) + } +} + +/// @brief: Converts the start time of a job into seconds and normalizes it +/// +/// From pg.2 of the schema doc: +/// "Each record has a timestamp, which is in microseconds since 600 +/// seconds before the beginning of the trace period, and recorded as a +/// 64 bit integer (i.e., an event 20 second after the start of the +/// trace would have a timestamp=620s)." +/// +/// @param[in] time_micros The event start time in microseconds, +/// offset by T_OFFSET_SECS (600s) +/// +/// @return A duration representing the dilated trace start time in seconds +/// after subtracting the offset +pub fn get_normalized_start_time(time_micros: u64) -> Duration { + let time_secs = time_micros / MICRO_SECONDS_PER_SECOND; + Duration::from_secs((time_secs - T_OFFSET_SECS) * DILATION_FACTOR) +} + +/// @brief Given a single parsed line from the csv file, update all gauges +/// corresponding to the metrics in the list +/// +/// @param[in] csv_line A parsed line from the csv file containing label +/// values and metric data to export +pub fn export_line(csv_line: TruCsvFields) { + let metrics = GOOGLE_METRICS.get().unwrap(); + let label_vals: [&str; 3] = [ + csv_line.job_id.as_str(), + csv_line.task_index.as_str(), + csv_line.machine_id.as_str(), + ]; + + let aggregation_type = csv_line.aggregation_type.unwrap_or(0_u8); + + for metric in metrics { + let curr_gauge: &'static GaugeVec; + let wrapped_value: Option; + + (curr_gauge, wrapped_value) = match metric { + TruMetrics::MeanCpuUsageRate => ( + MEAN_CPU_USAGE_RATE_PAIR.get(aggregation_type), + csv_line.mean_cpu_usage_rate, + ), + TruMetrics::CanonicalMemoryUsage => ( + CANONICAL_MEMORY_USAGE_PAIR.get(aggregation_type), + csv_line.canonical_memory_usage, + ), + TruMetrics::AssignedMemoryUsage => ( + ASSIGNED_MEMORY_USAGE_PAIR.get(aggregation_type), + csv_line.assigned_memory_usage, + ), + TruMetrics::UnmappedPageCacheMemoryUsage => ( + UNMAPPED_PAGE_CACHE_MEMORY_USAGE_PAIR.get(aggregation_type), + csv_line.unmapped_page_cache_memory_usage, + ), + TruMetrics::TotalPageCacheMemoryUsage => ( + TOTAL_PAGE_CACHE_MEMORY_USAGE_PAIR.get(aggregation_type), + csv_line.total_page_cache_memory_usage, + ), + TruMetrics::MaxMemoryUsage => ( + MAX_MEMORY_USAGE_PAIR.get(aggregation_type), + csv_line.max_memory_usage, + ), + TruMetrics::MeanDiskIoTime => ( + MEAN_DISK_IO_TIME_PAIR.get(aggregation_type), + csv_line.mean_disk_io_time, + ), + TruMetrics::MeanLocalDiskSpaceUsed => ( + MEAN_LOCAL_DISK_SPACE_USED_PAIR.get(aggregation_type), + csv_line.mean_local_disk_space_used, + ), + TruMetrics::MaxCpuUsage => ( + MAX_CPU_USAGE_PAIR.get(aggregation_type), + csv_line.max_cpu_usage, + ), + TruMetrics::MaxDiskIoTime => ( + MAX_DISK_IO_TIME_PAIR.get(aggregation_type), + csv_line.max_disk_io_time, + ), + TruMetrics::CyclesPerInstruction => ( + CYCLES_PER_INSTRUCTION_PAIR.get(aggregation_type), + csv_line.cycles_per_instruction, + ), + TruMetrics::MemoryAccessesPerInstruction => ( + MEMORY_ACCESSES_PER_INSTRUCTION_PAIR.get(aggregation_type), + csv_line.memory_accesses_per_instruction, + ), + TruMetrics::SamplePortion => ( + SAMPLE_PORTION_PAIR.get(aggregation_type), + csv_line.sample_portion, + ), + TruMetrics::SampledCpuUsage => ( + SAMPLED_CPU_USAGE_PAIR.get(aggregation_type), + csv_line.sampled_cpu_usage, + ), + }; + + if let Some(metric_value) = wrapped_value { + // Set the metric, unless it was missing + curr_gauge.with_label_values(&label_vals).set(metric_value); + } + } +} + +/// @brief Exports all parsed CSV lines from the queue +/// +/// This function will continue popping lines from the queue until it +/// pops one with a start timestamp which should be exported later in time. +/// This line will be saved in FUTURE_LINE and then exported on the next +/// scrape for which the program runtime <= start time +pub fn export_from_queue() { + let elapsed_t: Duration = utilities::get_time_elapsed(); + let check_time = |line: &TruCsvFields| get_normalized_start_time(line.start_time) <= elapsed_t; + + GOOGLE_DATA_QUEUE + .try_iter() + .take_while(check_time) + .for_each(export_line); + + if GOOGLE_DATA_QUEUE.is_closed() && GOOGLE_DATA_QUEUE.is_empty() { + info!("No more task resource usage to export, shutting down"); + std::process::exit(0); + } +} diff --git a/PrometheusExporters/cluster_data_exporter/src/main.rs b/PrometheusExporters/cluster_data_exporter/src/main.rs new file mode 100644 index 0000000..d6a1a53 --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/src/main.rs @@ -0,0 +1,260 @@ +/// @NOTE: As new label-value combinations are added to each metric, +/// they will persist unless another metric with the same label-value combo +/// overwipes it. Therefore, user should be wary about the possibility +/// of program memory usage steadily increasing over the course of the runtime +use crate::alibaba_metrics::*; +use crate::google_metrics::*; +use crate::utilities::*; +use clap::Parser; +use hyper::body::Incoming; +use hyper::header::CONTENT_TYPE; +use hyper::server::conn::http1; +use hyper::service::service_fn; +use hyper::Request; +use hyper::Response; +use hyper_util::rt::TokioIo; +use prometheus::{Encoder, TextEncoder}; +use std::net::{Ipv4Addr, SocketAddr}; +use std::sync::OnceLock; +use std::{panic, process, thread}; +use tokio::net::TcpListener; +use tracing::{debug, error, info, warn}; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; + +mod alibaba_metrics; +mod google_metrics; +mod utilities; + +type BoxedErr = Box; + +/// Google or Alibaba. Must be initialized before starting export routine +static DATA_PROVIDER: OnceLock = OnceLock::new(); + +/// @brief Async call-back function for servicing http requests, like +/// prometheus scrapes +/// +/// @param[in] _req The incoming http request +/// +/// @return Prometheus metrics on success +/// BoxedErr on failure +async fn serve_req(_req: Request) -> Result, BoxedErr> { + let encoder = TextEncoder::new(); + let provider = DATA_PROVIDER.get().unwrap(); + + match provider { + Provider::Google => google_metrics::export_from_queue(), + Provider::Alibaba => alibaba_metrics::export_from_queue(), + } + + let metric_families = prometheus::gather(); + let body = encoder.encode_to_string(&metric_families)?; + let response = Response::builder() + .status(200) + .header(CONTENT_TYPE, encoder.format_type()) + .body(body)?; + + Ok(response) +} + +/// @brief Starts a thread to read and queue Google cluster data +/// +/// @param[in] input_dir The input directory to Google task resource usage +/// cluster data +/// @param[in] all_parts Whether to run the exporter across all csv parts or +/// not. This should be false if part index is not None +/// @param[in] part_index The part number, out of 500, of the csv file to use +/// when exporting task resource usage data. This should +/// be None if all_parts is true. +/// @param[in] metrics The list of metrics from the task resource usage data +/// to export +/// +/// @post All globals required by the main exporter thread are initialized. +fn start_google_thread( + input_dir: String, + all_parts: bool, + part_index: Option, + metrics: Vec, +) { + debug!("Starting Google reader thread"); + thread::spawn(move || { + // start reader thread + // Drops thread handle => thread is implicitly detached + if let Err(err) = + google_metrics::reader_thread_routine(input_dir, all_parts, part_index, metrics) + { + error!("Error in Google reader thread: {:?}", err); + process::exit(1); + } + }); + // Must be initialized before main thread starts exporting + google_metrics::GOOGLE_METRICS.wait(); + debug!("Google reader thread initialized"); +} + +/// @brief Starts a thread to read and queue Alibaba cluster data +/// +/// @param[in] input_dir The input directory containing the csv files for +/// reading +/// @param[in] all_parts Whether to run the exporter from part 0 until no more +/// csv files are found, or not. This should be false if +/// part index is not None. +/// @param[in] part_index Which csv file part to use as the data source. +/// This should be None if all_parts is true. +/// @param[in] data_type Which type of microservice data the reading thread +/// should be configured to read and queue +/// @param[in] data_year The year from which the source data comes from. Valid +/// options are 2021 and 2022 +/// @param[in] speedup Speedup factor for faster-than-realtime export +/// +/// @post All globals required by the main exporter thread are initialized. +fn start_alibaba_thread( + input_dir: String, + all_parts: bool, + part_index: Option, + data_type: MsDataType, + data_year: u32, + speedup: u64, +) { + debug!("Starting Alibaba reader thread"); + thread::spawn(move || { + if let Err(err) = alibaba_metrics::reader_thread_routine( + input_dir, all_parts, part_index, data_type, data_year, speedup, + ) { + error!("Error in Alibaba reader thread: {:?}", err); + process::exit(1); + } + }); + // Must be initialized before main thread starts exporting + alibaba_metrics::EXPORTER_DATA_TYPE.wait(); + debug!("Alibaba reader thread initialized"); +} + +/// @brief Sets up logging with optional file output +/// +/// @param[in] log_dir Optional directory for log file output +/// @param[in] log_level Log level string (DEBUG, INFO, WARN, ERROR) +/// +/// @return WorkerGuard if file logging is enabled, None otherwise. +/// The guard must be kept alive for the duration of the program. +fn setup_logging( + log_dir: Option<&str>, + log_level: &str, +) -> Result, BoxedErr> { + // Create env filter that respects RUST_LOG, with fallback to command line arg + let env_filter = EnvFilter::try_from_default_env() + .or_else(|_| EnvFilter::try_new(log_level)) + .unwrap_or_else(|_| EnvFilter::new("info")); + + if let Some(dir) = log_dir { + // Log to file AND stdout + std::fs::create_dir_all(dir)?; + let file_appender = + tracing_appender::rolling::never(dir, "cluster_data_exporter.log"); + let (non_blocking, guard) = tracing_appender::non_blocking(file_appender); + + tracing_subscriber::registry() + .with(env_filter) + .with( + tracing_subscriber::fmt::layer() + .with_writer(std::io::stdout) + .with_ansi(true), + ) + .with( + tracing_subscriber::fmt::layer() + .with_writer(non_blocking) + .with_ansi(false), + ) + .init(); + + info!( + "Logging initialized with file output: {}/cluster_data_exporter.log", + dir + ); + Ok(Some(guard)) + } else { + // Log to stdout only + tracing_subscriber::registry() + .with(env_filter) + .with(tracing_subscriber::fmt::layer()) + .init(); + + info!("Logging initialized (stdout only)"); + Ok(None) + } +} + +#[tokio::main] +async fn main() -> Result<(), BoxedErr> { + let cli = Cli::parse(); + + // Initialize logging (keep guard alive for lifetime of program) + let _log_guard = setup_logging(cli.log_dir.as_deref(), &cli.log_level)?; + + info!("Starting cluster_data_exporter"); + info!("Input directory: {}", cli.input_directory); + info!("Port: {}", cli.port); + + // This code forces the program to exit if a reader thread panics. + // Comment it out if it's preferable for the main thread to remain + let orig_hook = panic::take_hook(); + panic::set_hook(Box::new(move |panic_info| { + // invoke the default handler and then exit the process + orig_hook(panic_info); + process::exit(1); + })); + + let input_directory: String = cli.input_directory.clone(); + let port: u16 = cli.port; + let addr: SocketAddr = (Ipv4Addr::UNSPECIFIED, port).into(); + + let _ = utilities::T_START; // init t_start + + // Spin up reader thread to start queueing csv data + match cli.provider { + ProviderCmd::Google { + metrics, + all_parts, + part_index, + } => { + info!("Provider: Google"); + info!("Metrics: {:?}", metrics); + info!("Parts mode: {}", if all_parts { "all-parts" } else { "part-index" }); + if let Some(idx) = part_index { + info!("Part index: {}", idx); + } + let _ = DATA_PROVIDER.set(Provider::Google); + start_google_thread(input_directory, all_parts, part_index, metrics); + } + ProviderCmd::Alibaba { + data_type, + data_year, + all_parts, + part_index, + speedup, + } => { + info!("Provider: Alibaba"); + info!("Data type: {:?}", data_type); + info!("Data year: {}", data_year); + info!("Parts mode: {}", if all_parts { "all-parts" } else { "part-index" }); + if let Some(idx) = part_index { + info!("Part index: {}", idx); + } + info!("Speedup factor: {}x", speedup); + let _ = DATA_PROVIDER.set(Provider::Alibaba); + start_alibaba_thread(input_directory, all_parts, part_index, data_type, data_year, speedup); + } + } + + let listener = TcpListener::bind(addr).await?; + info!("Server listening on http://{}", addr); + + loop { + // Main exporter routine + let (stream, _) = listener.accept().await?; + let io = TokioIo::new(stream); + let service = service_fn(serve_req); + if let Err(err) = http1::Builder::new().serve_connection(io, service).await { + error!("Server error: {:?}", err); + }; + } +} diff --git a/PrometheusExporters/cluster_data_exporter/src/utilities.rs b/PrometheusExporters/cluster_data_exporter/src/utilities.rs new file mode 100644 index 0000000..f811084 --- /dev/null +++ b/PrometheusExporters/cluster_data_exporter/src/utilities.rs @@ -0,0 +1,113 @@ +use crate::alibaba_metrics::*; +use crate::google_metrics::*; +use clap::{ArgGroup, Parser, Subcommand, ValueEnum}; +use lazy_static::lazy_static; +use std::time::{Duration, Instant}; + +pub type BoxedErr = Box; + +lazy_static! { + /// An instant in time to roughly represent the start time of the exporter + /// This is used as the reference point for calculating how much time has + /// elapsed, and therefore which traces should be exported during a scrape + /// and which ones should be held onto until later + pub static ref T_START: Instant = Instant::now(); +} + +/// @brief Returns the time since T_START as a Duration +/// +/// @return Duration since the Instant defined by T_START +/// +/// @note Since T_START isn't initialized until it is referenced for the first +/// time, so if this function is called before T_START is ever referenced +/// then T_START will be initialized here with Duration::Zero returned +pub fn get_time_elapsed() -> Duration { + T_START.elapsed() +} + +#[derive(Debug, Clone, ValueEnum)] +pub enum Provider { + Google, + Alibaba, +} + +#[derive(Parser, Debug)] +#[command(name = "cluster_data_exporter", version, about)] +#[command(subcommand_required = true)] +pub struct Cli { + #[arg(short, long, aliases = ["input, in, dir, input_dir"])] + #[arg(required = true)] + pub input_directory: String, + + #[arg(short, long)] + #[arg(required = true)] + pub port: u16, + + /// Log level (DEBUG, INFO, WARN, ERROR) + #[arg(long, default_value = "INFO")] + pub log_level: String, + + /// Output directory for log files (optional, defaults to stdout only) + #[arg(long)] + pub log_dir: Option, + + #[command(subcommand)] + pub provider: ProviderCmd, +} + +#[derive(Subcommand, Debug)] +pub enum ProviderCmd { + /// Run the exporter on google task resource usage data + #[command(group(ArgGroup::new("csv-parts") + .args(&["all_parts", "part_index"]) + .required(true)) + )] + Google { + #[arg(long, value_enum, value_delimiter = ',', num_args = 1..)] + #[arg(required = true, require_equals = true)] + metrics: Vec, + + #[arg(long, group = "csv-parts", alias = "all")] + all_parts: bool, + + #[arg(long, group = "csv-parts", aliases = ["part", "index"])] + #[arg(require_equals = true)] + part_index: Option, + }, + + /// Run the exporter on Alibaba microservice data + #[command(group(ArgGroup::new("csv-parts") + .args(&["all_parts", "part_index"]) + .required(true)) + )] + Alibaba { + /// The type of microservice data to use + #[arg(long, value_enum)] + #[arg(required = true, require_equals = true)] + data_type: MsDataType, + + /// Which year the microservice data comes from + #[arg(long)] + #[arg(required = true, require_equals = true)] + #[arg(value_parser = clap::value_parser!(u32).range(2021..=2022))] + data_year: u32, + + /// Whether or not to run the exporter starting on part 0 of the csv + /// files and continue sequentially until no more files are found. + /// This option is mutually exclusive with --part-index + #[arg(long, group = "csv-parts", alias = "all")] + all_parts: bool, + + /// Specify a single csv file to use as trace data. + /// This option is mutually exclusive with --all-parts + #[arg(long, group = "csv-parts", aliases = ["part", "index"])] + #[arg(require_equals = true)] + part_index: Option, + + /// Speedup factor for faster-than-realtime export + /// 1 = real-time, 10 = 10x faster, 100 = 100x faster + #[arg(long, require_equals = true)] + #[arg(value_parser = clap::value_parser!(u64).range(1..))] + speedup: u64, + }, +} diff --git a/PrometheusExporters/fake_exporter/fake_exporter_python/Dockerfile b/PrometheusExporters/fake_exporter/fake_exporter_python/Dockerfile new file mode 100644 index 0000000..c775d8a --- /dev/null +++ b/PrometheusExporters/fake_exporter/fake_exporter_python/Dockerfile @@ -0,0 +1,12 @@ +FROM sketchdb-base:latest + +LABEL maintainer="SketchDB Team" +LABEL description="Prometheus Client for SketchDB" + +COPY requirements.txt . +RUN pip install --upgrade pip +RUN pip install --no-cache-dir -r requirements.txt + +COPY fake_exporter.py . + +ENTRYPOINT ["python", "fake_exporter.py"] diff --git a/PrometheusExporters/fake_exporter/fake_exporter_python/docker-compose.yml.j2 b/PrometheusExporters/fake_exporter/fake_exporter_python/docker-compose.yml.j2 new file mode 100644 index 0000000..8ee0b84 --- /dev/null +++ b/PrometheusExporters/fake_exporter/fake_exporter_python/docker-compose.yml.j2 @@ -0,0 +1,22 @@ +# fake_exporter Docker Compose Template +# This template is rendered with Jinja2 to generate the final docker-compose.yml + +services: + fake-exporter: + image: sketchdb-fake-exporter-python:latest + container_name: {{ container_name | default('sketchdb-fake-exporter') }} + ports: + - "{{ port }}:{{ port }}" + volumes: + - {{ experiment_output_dir }}:/app/output + - {{ output_dir }}:/app/exporter_output_dir + command: [ + "--output_dir", "/app/exporter_output_dir", + "--port", "{{ port }}", + "--valuescale", "{{ valuescale }}", + "--dataset", " {{ dataset }}", + "--num_labels", "{{ num_labels }}", + "--num_values_per_label", "{{ num_values_per_label }}", + "--metric_type", "{{ metric_type }}" + ] + restart: unless-stopped diff --git a/PrometheusExporters/fake_exporter/fake_exporter_python/fake-exporter-python-cli-compose.yml.j2 b/PrometheusExporters/fake_exporter/fake_exporter_python/fake-exporter-python-cli-compose.yml.j2 new file mode 100644 index 0000000..8e95ed0 --- /dev/null +++ b/PrometheusExporters/fake_exporter/fake_exporter_python/fake-exporter-python-cli-compose.yml.j2 @@ -0,0 +1,25 @@ +# fake_exporter Docker Compose Template +# This template is rendered with Jinja2 to generate the final docker-compose.yml + +services: + fake-exporter: + image: sketchdb-fake-exporter-python:latest + container_name: {{ container_name | default('asap-fake-exporter') }} + hostname: {{ container_name }} # What prometheus uses to scrape + networks: + - asap-network + expose: + - "{{ port }}" + volumes: + - {{ experiment_output_dir }}:/app/outputs + - {{ output_dir }}:/app/exporter_output_dir + command: [ + "--output_dir", "/app/exporter_output_dir", + "--port", "{{ port }}", + "--valuescale", "{{ valuescale }}", + "--dataset", " {{ dataset }}", + "--num_labels", "{{ num_labels }}", + "--num_values_per_label", "{{ num_values_per_label }}", + "--metric_type", "{{ metric_type }}" + ] + restart: no diff --git a/PrometheusExporters/fake_exporter/fake_exporter_python/fake_exporter.py b/PrometheusExporters/fake_exporter/fake_exporter_python/fake_exporter.py new file mode 100644 index 0000000..9e4cfe7 --- /dev/null +++ b/PrometheusExporters/fake_exporter/fake_exporter_python/fake_exporter.py @@ -0,0 +1,247 @@ +import argparse +import itertools +import os +import time +from typing import List + +import numpy +import numpy as np +from prometheus_client import start_http_server +from prometheus_client.core import REGISTRY, CounterMetricFamily, GaugeMetricFamily +from prometheus_client.registry import Collector + + +class CustomCollector(Collector): + def __init__( + self, scale, dataset, num_labels, num_values_per_label: List[int], metric_type + ): + self.scale = scale + # self.timeseries_id_start = timeseries_id_start + self.dataset = dataset + self.rng = numpy.random.default_rng(0) + self.total_samples = 0 + self.const_1M = 1000000 + self.const_2M = 2000000 + self.const_3M = 3000000 + + self.metric_type = metric_type + + self.uniform_counter = 0 + self.dynamic_counter = 0 + self.zipf_counter = 0 + self.normal_counter = 0 + + self.num_labels: int = num_labels + self.labels = [f"label_{i}" for i in range(self.num_labels)] + self.num_values_per_label: List[int] + self.values_per_label: List[List[str]] = [] + self.label_value_combinations: List[List[str]] = [] + + self.label_value_combinations = self.compute_labels( + num_labels, num_values_per_label + ) + + # print("values_per_label") + # [print(sublist) for sublist in self.values_per_label] + # print("label_value_combinations") + # [print(sublist) for sublist in self.label_value_combinations] + # assert False + + def compute_labels( + self, num_labels: int, num_values_per_label: List[int] + ) -> List[List[str]]: + if len(num_values_per_label) == 1: + self.num_values_per_label = [ + num_values_per_label[0] for _ in range(num_labels) + ] + else: + if len(num_values_per_label) != num_labels: + raise ValueError( + "Number of num_values_per_label must be equal to num_labels" + ) + self.num_values_per_label = num_values_per_label + + num_timeseries = np.prod(self.num_values_per_label) + + for label_idx in range(self.num_labels): + values = [ + f"value_{label_idx}_value_{value_idx}" + for value_idx in range(self.num_values_per_label[label_idx]) + ] + self.values_per_label.append(values) + + label_value_combinations = list(itertools.product(*self.values_per_label)) + assert len(label_value_combinations) == num_timeseries + + # convert from list[tuple[str]] to list[list[str]] + rv: List[List[str]] = [ + list(label_value_combination) + for label_value_combination in label_value_combinations + ] + return rv + + def get_uniform_value_gauge(self): + value = -1 + while value < 0 or value > self.scale: + # value = numpy.random.uniform() * self.scale + value = self.rng.uniform(0, self.scale) + return value + + def get_normal_value_gauge(self): + value = -1 + while value < 0 or value > self.scale: + value = self.rng.normal(loc=self.scale / 2, scale=self.scale) + return value + + def get_zipf_value_gauge(self): + value = -1 + while value < 0 or value > self.scale: + # value = numpy.random.zipf(1.01) + value = self.rng.zipf(1.01) + return value + + def get_dynamic_value_gauge(self): + value = -1 + while value < 0 or value > self.scale: + if self.total_samples < self.const_1M: + # value = numpy.random.zipf(1.01) + value = self.rng.zipf(1.01) + elif self.total_samples < self.const_2M: + # value = numpy.random.uniform() * self.scale + value = self.rng.uniform(0, self.scale) + else: + value = self.rng.normal(loc=self.scale / 2, scale=self.scale) + self.total_samples = (self.total_samples + 1) % self.const_3M + return value + + def get_uniform_value_counter(self): + value = -1 + while value < 0 or value > self.scale: + # value = numpy.random.uniform() * self.scale + value = self.rng.uniform(0, self.scale) + self.uniform_counter += value + return self.uniform_counter + + def get_normal_value_counter(self): + value = -1 + while value < 0 or value > self.scale: + value = self.rng.normal(loc=self.scale / 2, scale=self.scale) + self.normal_counter += value + return self.normal_counter + + def get_zipf_value_counter(self): + value = -1 + while value < 0 or value > self.scale: + # value = numpy.random.zipf(1.01) + value = self.rng.zipf(1.01) + self.zipf_counter += value + return self.zipf_counter + + def get_dynamic_value_counter(self): + value = -1 + while value < 0 or value > self.scale: + if self.total_samples < self.const_1M: + # value = numpy.random.zipf(1.01) + value = self.rng.zipf(1.01) + elif self.total_samples < self.const_2M: + # value = numpy.random.uniform() * self.scale + value = self.rng.uniform(0, self.scale) + else: + value = self.rng.normal(loc=self.scale / 2, scale=self.scale) + self.total_samples = (self.total_samples + 1) % self.const_3M + self.dynamic_counter += value + return self.dynamic_counter + + def collect(self): + if self.metric_type == "counter": + fake_metric = CounterMetricFamily( + "fake_metric", + "Generating fake time series data with {} dataset".format(self.dataset), + labels=self.labels, + ) + elif self.metric_type == "gauge": + fake_metric = GaugeMetricFamily( + "fake_metric", + "Generating fake time series data with {} dataset".format(self.dataset), + labels=self.labels, + ) + else: + fake_metric = GaugeMetricFamily( + "fake_metric", + "Generating fake time series data with {} dataset".format(self.dataset), + labels=self.labels, + ) + + for label_value_combination in self.label_value_combinations: + if self.metric_type == "counter": + if self.dataset == "uniform": + value = self.get_uniform_value_counter() + elif self.dataset == "normal": + value = self.get_normal_value_counter() + elif self.dataset == "zipf": + value = self.get_zipf_value_counter() + elif self.dataset == "dynamic": + value = self.get_dynamic_value_counter() + else: + value = self.get_dynamic_value_counter() + else: # gauge + if self.dataset == "uniform": + value = self.get_uniform_value_gauge() + elif self.dataset == "normal": + value = self.get_normal_value_gauge() + elif self.dataset == "zipf": + value = self.get_zipf_value_gauge() + elif self.dataset == "dynamic": + value = self.get_dynamic_value_gauge() + else: + value = self.get_dynamic_value_gauge() + + # labels = [f"label_value_{i}" for d in range(self.num_labels)] + # fake_metric.add_metric(labels, value=value) + fake_metric.add_metric(label_value_combination, value) + + yield fake_metric + + +def main(args): + os.makedirs(args.output_dir, exist_ok=True) + + metric_collector = CustomCollector( + args.valuescale, + args.dataset, + args.num_labels, + args.num_values_per_label, + args.metric_type, + ) + REGISTRY.register(metric_collector) + start_http_server(port=args.port) + print("Fake exporter started on port {}".format(args.port)) + while True: + time.sleep(1) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--output_dir", type=str, required=True) + parser.add_argument("--port", type=int, required=True) + parser.add_argument("--valuescale", type=int, required=True) + # parser.add_argument("--start_instanceid", type=int, required=True) + # parser.add_argument("--batchsize", type=int, required=True) + parser.add_argument("--dataset", type=str, required=True) + parser.add_argument("--num_labels", type=int, required=True) + parser.add_argument("--num_values_per_label", type=str, required=True) + parser.add_argument("--metric_type", type=str, required=True) + args = parser.parse_args() + + args.num_values_per_label = [int(i) for i in args.num_values_per_label.split(",")] + + # if ( + # args.port is None + # or args.valuescale is None + # or args.start_instanceid is None + # or args.batchsize is None + # or args.dataset is None + # ): + # print("Fake exporter missing argument") + # sys.exit(0) + main(args) diff --git a/PrometheusExporters/fake_exporter/fake_exporter_python/installation/install.sh b/PrometheusExporters/fake_exporter/fake_exporter_python/installation/install.sh new file mode 100755 index 0000000..a876782 --- /dev/null +++ b/PrometheusExporters/fake_exporter/fake_exporter_python/installation/install.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -e + +THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") +PARENT_DIR=$(dirname "$THIS_DIR") + +echo "Building Fake Exporter Python Docker image..." +cd "$PARENT_DIR" +docker build . -f Dockerfile -t sketchdb-fake-exporter-python:latest + +echo "Fake Exporter Python Docker image built successfully: sketchdb-fake-exporter-python:latest" \ No newline at end of file diff --git a/PrometheusExporters/fake_exporter/fake_exporter_python/requirements.txt b/PrometheusExporters/fake_exporter/fake_exporter_python/requirements.txt new file mode 100644 index 0000000..e24276c --- /dev/null +++ b/PrometheusExporters/fake_exporter/fake_exporter_python/requirements.txt @@ -0,0 +1,2 @@ +numpy +prometheus_client diff --git a/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Cargo.lock b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Cargo.lock new file mode 100644 index 0000000..1547d4d --- /dev/null +++ b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Cargo.lock @@ -0,0 +1,1166 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "anstream" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +dependencies = [ + "windows-sys 0.60.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.60.2", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "backtrace" +version = "0.3.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets 0.52.6", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bitflags" +version = "2.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a65b545ab31d687cff52899d4890855fec459eb6afe0da6417b8a18da87aa29" + +[[package]] +name = "bytes" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" + +[[package]] +name = "cfg-if" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" + +[[package]] +name = "clap" +version = "4.5.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc0e74a703892159f5ae7d3aac52c8e6c392f5ae5f359c70b5881d60aaac318" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3e7f4214277f3c7aa526a59dd3fbe306a370daee1f8b7b8c987069cd8e888a8" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14cb31bb0a7d536caef2639baa7fad459e15c3144efefa6dbd1c84562c4739f6" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "fake_exporter" +version = "0.1.0" +dependencies = [ + "clap", + "futures", + "http-body-util", + "hyper", + "hyper-util", + "lazy_static", + "prometheus", + "rand", + "rand_distr", + "tokio", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "getrandom" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "h2" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "http" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http", + "http-body", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "pin-utils", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-util" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http", + "http-body", + "hyper", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2", + "system-configuration", + "tokio", + "tower-service", + "tracing", + "windows-registry", +] + +[[package]] +name = "indexmap" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "io-uring" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + +[[package]] +name = "ipnet" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.175" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" + +[[package]] +name = "libm" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" + +[[package]] +name = "lock_api" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +dependencies = [ + "libc", + "wasi 0.11.1+wasi-snapshot-preview1", + "windows-sys 0.59.0", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + +[[package]] +name = "parking_lot" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "prometheus" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a" +dependencies = [ + "cfg-if", + "fnv", + "lazy_static", + "memchr", + "parking_lot", + "protobuf", + "thiserror 2.0.16", +] + +[[package]] +name = "protobuf" +version = "3.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d65a1d4ddae7d8b5de68153b48f6aa3bba8cb002b243dbdbc55a5afbc98f99f4" +dependencies = [ + "once_cell", + "protobuf-support", + "thiserror 1.0.69", +] + +[[package]] +name = "protobuf-support" +version = "3.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e36c2f31e0a47f9280fb347ef5e461ffcd2c52dd520d8e216b52f93b0b0d7d6" +dependencies = [ + "thiserror 1.0.69", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_distr" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" +dependencies = [ + "num-traits", + "rand", +] + +[[package]] +name = "redox_syscall" +version = "0.5.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" +dependencies = [ + "bitflags", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "signal-hook-registry" +version = "1.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" +dependencies = [ + "libc", +] + +[[package]] +name = "slab" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "system-configuration" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +dependencies = [ + "bitflags", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" +dependencies = [ + "thiserror-impl 2.0.16", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio" +version = "1.47.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +dependencies = [ + "backtrace", + "bytes", + "io-uring", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "slab", + "socket2", + "tokio-macros", + "windows-sys 0.59.0", +] + +[[package]] +name = "tokio-macros" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-util" +version = "0.7.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +dependencies = [ + "once_cell", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-registry" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" +dependencies = [ + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-result" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.3", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] + +[[package]] +name = "zerocopy" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Cargo.toml b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Cargo.toml new file mode 100644 index 0000000..ab6cec6 --- /dev/null +++ b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "fake_exporter" +version = "0.1.0" +edition = "2021" + +[dependencies] +prometheus = "0.14.0" +tokio = { version = "1", features = ["full"] } +hyper = { version = "1", features = ["full"] } +hyper-util = { version = "0.1", features = ["full"] } +http-body-util = "0.1" +rand = "0.9.1" +rand_distr = "0.5.1" +futures = "0.3" +lazy_static = "1.5" +clap = { version = "4.0", features = ["derive"] } diff --git a/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Dockerfile b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Dockerfile new file mode 100644 index 0000000..968907a --- /dev/null +++ b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/Dockerfile @@ -0,0 +1,17 @@ +# Use the official Rust image as a parent image +FROM rust:1.82 AS builder + +# Set the working directory in the container +WORKDIR /usr/src/app + +# Copy the Cargo.toml and Cargo.lock files +COPY Cargo.toml Cargo.lock ./ + +# Copy the source code +COPY src ./src + +# Build the application +RUN cargo build --release + +# Set the entrypoint to the fake_exporter binary +ENTRYPOINT ["target/release/fake_exporter"] diff --git a/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/docker-compose.yml.j2 b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/docker-compose.yml.j2 new file mode 100644 index 0000000..a8b67b4 --- /dev/null +++ b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/docker-compose.yml.j2 @@ -0,0 +1,18 @@ +# fake_exporter Docker Compose Template +# This template is rendered with Jinja2 to generate the final docker-compose.yml + +services: + fake-exporter: + image: sketchdb-fake-exporter-rust:latest + container_name: {{ container_name | default('sketchdb-fake-exporter') }} + ports: + - "{{ port }}:{{ port }}" + command: [ + "--port", "{{ port }}", + "--valuescale", "{{ valuescale }}", + "--dataset", "{{ dataset }}", + "--num-labels", "{{ num_labels }}", + "--num-values-per-label", "{{ num_values_per_label | string }}", + "--metric-type", "{{ metric_type }}" + ] + restart: unless-stopped diff --git a/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/fake-exporter-rust-cli-compose.yml.j2 b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/fake-exporter-rust-cli-compose.yml.j2 new file mode 100644 index 0000000..a59d56d --- /dev/null +++ b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/fake-exporter-rust-cli-compose.yml.j2 @@ -0,0 +1,21 @@ +# fake_exporter Docker Compose Template +# This template is rendered with Jinja2 to generate the final docker-compose.yml + +services: + fake-exporter: + image: sketchdb-fake-exporter-rust:latest + container_name: {{ container_name | default('asap-fake-exporter') }} + hostname: {{ container_name }} + networks: + - asap-network + expose: + - "{{ port }}" + command: [ + "--port", "{{ port }}", + "--valuescale", "{{ valuescale }}", + "--dataset", "{{ dataset }}", + "--num-labels", "{{ num_labels }}", + "--num-values-per-label", "{{ num_values_per_label | string }}", + "--metric-type", "{{ metric_type }}" + ] + restart: no diff --git a/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/installation/install.sh b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/installation/install.sh new file mode 100755 index 0000000..e7fe261 --- /dev/null +++ b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/installation/install.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -e + +THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") +PARENT_DIR=$(dirname "$THIS_DIR") + +echo "Building Fake Exporter Rust Docker image..." +cd "$PARENT_DIR" +docker build . -f Dockerfile -t sketchdb-fake-exporter-rust:latest + +echo "Fake Exporter Rust Docker image built successfully: sketchdb-fake-exporter-rust:latest" \ No newline at end of file diff --git a/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/src/main.rs b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/src/main.rs new file mode 100644 index 0000000..99e1bbd --- /dev/null +++ b/PrometheusExporters/fake_exporter/fake_exporter_rust/fake_exporter/src/main.rs @@ -0,0 +1,662 @@ +use clap::{Parser, ValueEnum}; +use hyper::{ + Request, Response, body::Incoming, header::CONTENT_TYPE, server::conn::http1, + service::service_fn, +}; +use hyper_util::rt::TokioIo; +use prometheus::{ + Encoder, TextEncoder, + core::{Collector, Desc}, + proto::MetricFamily, +}; +use rand::{SeedableRng, rngs::SmallRng}; +use rand_distr::{Distribution, Normal, Uniform, Zipf}; +use std::{f64::consts::PI, net::Ipv4Addr, net::SocketAddr, sync::Mutex, time::{SystemTime, UNIX_EPOCH}}; +use tokio::net::TcpListener; + +/// Dataset/pattern types for metric generation +#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)] +enum Dataset { + // === Random distribution types === + /// Uniform random distribution + Uniform, + /// Normal (Gaussian) random distribution + Normal, + /// Zipf power-law distribution + Zipf, + /// Cycles through Zipf -> Uniform -> Normal + Dynamic, + + // === Deterministic pattern types (time-based) === + /// Constant value (varies per series) + Constant, + /// Linearly increasing over time + LinearUp, + /// Linearly decreasing over time + LinearDown, + /// Sinusoidal wave + Sine, + /// Sinusoidal wave with gaussian noise + SineNoise, + /// Step function (discrete levels) + Step, + /// Baseline with random spikes + Spiky, + /// Exponential growth + ExpUp, +} + +impl Dataset { + /// Returns the pattern label value (for --add-pattern-label flag) + fn as_label(&self) -> &'static str { + match self { + Dataset::Uniform => "uniform", + Dataset::Normal => "normal", + Dataset::Zipf => "zipf", + Dataset::Dynamic => "dynamic", + Dataset::Constant => "constant", + Dataset::LinearUp => "linear_up", + Dataset::LinearDown => "linear_down", + Dataset::Sine => "sine", + Dataset::SineNoise => "sine_noise", + Dataset::Step => "step", + Dataset::Spiky => "spiky", + Dataset::ExpUp => "exp_up", + } + } +} + +type BoxedErr = Box; + +// === Dynamic distribution constants === +const CONST_1M: u64 = 1_000_000; +const CONST_2M: u64 = 2_000_000; +const CONST_3M: u64 = 3_000_000; + +const RNG_SEED: u64 = 0; // seed for rng used by all distributions + +const ZIPF_ALPHA: f64 = 1.01; // zipf parameter + +// === Pattern timing constants === +const SINE_PERIOD_SECS: f64 = 120.0; // 2 minute cycle +const STEP_PERIOD_SECS: f64 = 30.0; // Step changes every 30s +const LINEAR_WRAP_PERIOD_SECS: f64 = 300.0; // Linear resets every 5min + +// === Pattern variation constants (per-series diversity) === +const SINE_PHASE_VARIATION: f64 = 0.1; // Phase offset per series +const SINE_AMPLITUDE_VARIATION: f64 = 0.2; // Amplitude varies ±20% +const LINEAR_SLOPE_VARIATION: f64 = 0.1; // Slope varies ±10% +const CONSTANT_NUM_LEVELS: usize = 10; // 10 distinct constant values + +// === Noise/spike constants === +const NOISE_STDDEV_FRACTION: f64 = 0.1; // Noise is 10% of signal +const SPIKE_PROBABILITY: f64 = 0.05; // 5% chance per scrape +const SPIKE_MAGNITUDE: f64 = 5.0; // Spike is 5x baseline + +// === Step function constants === +const STEP_NUM_LEVELS: usize = 4; // 4 discrete levels + +// === Exponential constants === +const EXP_GROWTH_RATE: f64 = 0.01; // Growth rate per second +const EXP_WRAP_PERIOD_SECS: f64 = 300.0; // Exponential resets every 5min + +// Normal distribution mean +fn get_mean(valuescale: f64) -> f64 { + valuescale / 2.0 +} +// Normal distribution standard deviation +fn get_sigma(valuescale: f64) -> f64 { + valuescale / 8.0 +} + +// Converts string to vector of usize +fn get_num_vals_per_label(num_values_per_label_str: String, num_labels: usize) -> Vec { + let parse = num_values_per_label_str + .split(',') + .map(str::trim) // drop any surrounding whitespace + .filter(|s| !s.is_empty()) // skip empty segments, if any + .map(str::parse::) // parse each into usize + .collect(); + let num_values_per_label: Vec = match parse { + Ok(list) => list, + Err(error) => panic!("Couldn't parse num_values_per_label: {error:?}"), + }; + + let rv: Vec; + + if num_values_per_label.len() == 1 { + rv = vec![num_values_per_label[0]; num_labels]; + } else { + if num_values_per_label.len() != num_labels { + panic!( + "Number of num_values_per_label must be equal to num_labels (got {} vs {})", + num_values_per_label.len(), + num_labels + ); + } + rv = num_values_per_label; + } + + return rv; +} + +fn compute_labels(num_labels: usize, num_values_per_label: Vec, label_value_prefixes: &Option>) -> Vec> { + // 1. Build values_per_label + let mut values_per_label = Vec::with_capacity(num_labels); + for label_idx in 0..num_labels { + let count = num_values_per_label[label_idx]; + let mut bucket = Vec::with_capacity(count); + for value_idx in 0..count { + let value = match label_value_prefixes { + Some(prefixes) if label_idx < prefixes.len() => { + format!("{}_{}", prefixes[label_idx], value_idx) + } + _ => format!("value_{}_value_{}", label_idx, value_idx), + }; + bucket.push(value); + } + values_per_label.push(bucket); + } + + // 2. Compute expected total combinations + let expected: usize = num_values_per_label.iter().product(); + + // 3. Cartesian product helper + fn cartesian_product(pools: &[Vec]) -> Vec> { + let mut result: Vec> = vec![Vec::new()]; + for pool in pools { + let mut next = Vec::new(); + for prefix in &result { + for item in pool { + let mut new_prefix = prefix.clone(); + new_prefix.push(item.clone()); + next.push(new_prefix); + } + } + result = next; + } + result + } + + // 5. Generate combinations + let combos = cartesian_product(&values_per_label); + assert!( + combos.len() == expected, + "got {} combinations but expected {}", + combos.len(), + expected + ); + + combos +} + +struct FakeCollector { + valuescale: f64, // Max magnitude of value generation + dataset: Dataset, // Dataset/pattern type + label_value_combinations: Vec>, // list of label sets for all metrics + metric_type: String, // gauge or counter + metric_name: String, // custom metric name + label_names: Vec, // custom label names + add_pattern_label: bool, // whether to add a 'pattern' label to metrics + rng: Mutex, // seeded rng + zipf_dist: Option>, + normal_dist: Option>, + uniform_dist: Option>, + counter_state: Mutex, // tracking counter value + total_samples: Mutex, // for dynamic distribution only +} + +impl FakeCollector { + fn new( + valuescale: f64, + dataset: Dataset, + num_labels: usize, + num_values_per_label: String, + metric_type: String, + metric_name: Option, + label_names: Option, + label_value_prefixes: Option, + add_pattern_label: bool, + ) -> Self { + let num_values_per_label = get_num_vals_per_label(num_values_per_label, num_labels); + let prefixes: Option> = label_value_prefixes + .map(|s| s.split(',').map(|p| p.trim().to_string()).collect()); + let label_value_combinations = compute_labels(num_labels, num_values_per_label, &prefixes); + + // Determine metric name + let metric_name = match metric_name { + Some(name) => name, + None => if metric_type == "counter" { "fake_metric_total".to_string() } else { "fake_metric".to_string() }, + }; + + // Determine label names + let label_names: Vec = match label_names { + Some(names) => { + let parsed: Vec = names.split(',').map(|s| s.trim().to_string()).collect(); + if parsed.len() != num_labels { + panic!( + "Number of label names ({}) must match num_labels ({})", + parsed.len(), num_labels + ); + } + parsed + } + None => (0..num_labels).map(|i| format!("label_{}", i)).collect(), + }; + let mut zipf_dist: Option> = None; + let mut normal_dist: Option> = None; + let mut uniform_dist: Option> = None; + + // Instantiate required distributions based on dataset type + match dataset { + Dataset::Zipf => { + zipf_dist = Some( + Zipf::new(valuescale, ZIPF_ALPHA).expect("Failed to create Zipf distribution"), + ); + } + Dataset::Normal => { + let mean = get_mean(valuescale); + let sigma = get_sigma(valuescale); + normal_dist = + Some(Normal::new(mean, sigma).expect("Failed to create Normal distribution")); + } + Dataset::Dynamic => { + let mean = get_mean(valuescale); + let sigma = get_sigma(valuescale); + normal_dist = + Some(Normal::new(mean, sigma).expect("Failed to create Normal distribution")); + zipf_dist = Some( + Zipf::new(valuescale, ZIPF_ALPHA).expect("Failed to create Zipf distribution"), + ); + uniform_dist = Some( + Uniform::new_inclusive(0.0, valuescale) + .expect("Failed to create Uniform distribution"), + ); + } + Dataset::Uniform => { + uniform_dist = Some( + Uniform::new_inclusive(0.0, valuescale) + .expect("Failed to create Uniform distribution"), + ); + } + Dataset::SineNoise => { + // Needs normal distribution for noise + let noise_stddev = valuescale * NOISE_STDDEV_FRACTION; + normal_dist = + Some(Normal::new(0.0, noise_stddev).expect("Failed to create Normal distribution")); + } + Dataset::Spiky => { + // Needs uniform for probability check, normal for spike magnitude variation + uniform_dist = Some( + Uniform::new_inclusive(0.0, 1.0).expect("Failed to create Uniform distribution"), + ); + } + // Other patterns don't need distributions + Dataset::Constant + | Dataset::LinearUp + | Dataset::LinearDown + | Dataset::Sine + | Dataset::Step + | Dataset::ExpUp => {} + } + + Self { + valuescale, + dataset, + label_value_combinations, + metric_type, + metric_name, + label_names, + add_pattern_label, + rng: Mutex::new(SmallRng::seed_from_u64(RNG_SEED)), + zipf_dist, + normal_dist, + uniform_dist, + counter_state: Mutex::new(0.0), + total_samples: Mutex::new(0), + } + } + + /// Get current timestamp in seconds since epoch + fn get_time_secs() -> f64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("Time went backwards") + .as_secs_f64() + } + + /// Generate a sample value for the given series + /// For random distributions, series_id is ignored + /// For patterns, series_id is used to create per-series variation + fn get_sample(&self, series_id: usize) -> f64 { + match self.dataset { + // === Random distribution types === + Dataset::Zipf => { + self.zipf_dist + .as_ref() + .expect("Zipf distribution not initialized") + .sample(&mut self.rng.lock().unwrap()) + } + Dataset::Normal => { + self.normal_dist + .as_ref() + .expect("Normal distribution not initialized") + .sample(&mut self.rng.lock().unwrap()) + } + Dataset::Uniform => { + self.uniform_dist + .as_ref() + .expect("Uniform distribution not initialized") + .sample(&mut self.rng.lock().unwrap()) + } + Dataset::Dynamic => { + let mut samples_mutex = self.total_samples.lock().unwrap(); + let rv = if *samples_mutex < CONST_1M { + self.zipf_dist + .as_ref() + .expect("Zipf distribution not initialized") + .sample(&mut self.rng.lock().unwrap()) + } else if *samples_mutex < CONST_2M { + self.uniform_dist + .as_ref() + .expect("Uniform distribution not initialized") + .sample(&mut self.rng.lock().unwrap()) + } else { + self.normal_dist + .as_ref() + .expect("Normal distribution not initialized") + .sample(&mut self.rng.lock().unwrap()) + }; + *samples_mutex = (*samples_mutex + 1) % CONST_3M; + rv + } + + // === Deterministic pattern types === + Dataset::Constant => { + // Each series gets a different constant value based on its ID + let level = series_id % CONSTANT_NUM_LEVELS; + let base = self.valuescale / CONSTANT_NUM_LEVELS as f64; + base * (level as f64 + 0.5) // center of each level + } + Dataset::LinearUp => { + let now = Self::get_time_secs(); + // Slope varies per series + let slope_multiplier = 1.0 + (series_id % 10) as f64 * LINEAR_SLOPE_VARIATION; + let slope = (self.valuescale / LINEAR_WRAP_PERIOD_SECS) * slope_multiplier; + (now * slope) % self.valuescale + } + Dataset::LinearDown => { + let now = Self::get_time_secs(); + let slope_multiplier = 1.0 + (series_id % 10) as f64 * LINEAR_SLOPE_VARIATION; + let slope = (self.valuescale / LINEAR_WRAP_PERIOD_SECS) * slope_multiplier; + self.valuescale - ((now * slope) % self.valuescale) + } + Dataset::Sine => { + let now = Self::get_time_secs(); + // Phase offset varies per series + let phase = (series_id % 100) as f64 * SINE_PHASE_VARIATION; + // Amplitude varies per series (±SINE_AMPLITUDE_VARIATION) + let amplitude_multiplier = 1.0 + ((series_id % 5) as f64 - 2.0) * SINE_AMPLITUDE_VARIATION; + let amplitude = (self.valuescale / 2.0) * amplitude_multiplier; + let offset = self.valuescale / 2.0; // center the wave + + let angle = (2.0 * PI * now / SINE_PERIOD_SECS) + phase; + offset + amplitude * angle.sin() + } + Dataset::SineNoise => { + let now = Self::get_time_secs(); + let phase = (series_id % 100) as f64 * SINE_PHASE_VARIATION; + let amplitude_multiplier = 1.0 + ((series_id % 5) as f64 - 2.0) * SINE_AMPLITUDE_VARIATION; + let amplitude = (self.valuescale / 2.0) * amplitude_multiplier; + let offset = self.valuescale / 2.0; + + let angle = (2.0 * PI * now / SINE_PERIOD_SECS) + phase; + let base_value = offset + amplitude * angle.sin(); + + // Add gaussian noise + let noise = self.normal_dist + .as_ref() + .expect("Normal distribution not initialized for noise") + .sample(&mut self.rng.lock().unwrap()); + + (base_value + noise).max(0.0) // clamp to non-negative + } + Dataset::Step => { + let now = Self::get_time_secs(); + // Different series have different phase offsets for step timing + let phase_offset = (series_id % STEP_NUM_LEVELS) as f64 * (STEP_PERIOD_SECS / STEP_NUM_LEVELS as f64); + let adjusted_time = now + phase_offset; + + // Determine which step level we're at + let step_index = ((adjusted_time / STEP_PERIOD_SECS) as usize) % STEP_NUM_LEVELS; + let level_height = self.valuescale / STEP_NUM_LEVELS as f64; + + level_height * (step_index as f64 + 0.5) + } + Dataset::Spiky => { + // Baseline value varies per series + let baseline = self.valuescale * 0.2 * (1.0 + (series_id % 5) as f64 * 0.1); + + // Check if we should spike + let should_spike = self.uniform_dist + .as_ref() + .expect("Uniform distribution not initialized for spike check") + .sample(&mut self.rng.lock().unwrap()) < SPIKE_PROBABILITY; + + if should_spike { + baseline * SPIKE_MAGNITUDE + } else { + baseline + } + } + Dataset::ExpUp => { + let now = Self::get_time_secs(); + // Wrap time to avoid overflow + let wrapped_time = now % EXP_WRAP_PERIOD_SECS; + // Growth rate varies slightly per series + let rate = EXP_GROWTH_RATE * (1.0 + (series_id % 5) as f64 * 0.1); + + // Exponential growth, scaled to valuescale + let raw = (rate * wrapped_time).exp(); + // Normalize to valuescale range + let max_value = (rate * EXP_WRAP_PERIOD_SECS).exp(); + (raw / max_value) * self.valuescale + } + } + } + + // Generates a new random value based on the dataset, updates the counter, + // and returns the current counter value + // Note: Counter support for patterns is not fully implemented (uses series_id=0) + fn get_next_counter_val(&self, series_id: usize) -> f64 { + let random_val: f64 = self.get_sample(series_id); + let mut counter_mutex = self.counter_state.lock().unwrap(); + // Update counter with val + *counter_mutex += random_val; + *counter_mutex + } + + // Gets a metric family containing a counter family with all label_value combos + // Note: Pattern support for counters is limited - use gauge metric type for patterns + fn get_counter_family(&self) -> MetricFamily { + let mut counter_family = MetricFamily::default(); + counter_family.set_name(self.metric_name.clone()); + counter_family.set_help(format!( + "Generating fake time series data with {:?} dataset", + self.dataset + )); + counter_family.set_field_type(prometheus::proto::MetricType::COUNTER); + + for (series_id, label_value_combination) in self.label_value_combinations.iter().enumerate() { + let mut metric = prometheus::proto::Metric::default(); + let mut counter = prometheus::proto::Counter::default(); + let mut labels = Vec::new(); + + // Add the pattern label if enabled + if self.add_pattern_label { + let mut pattern_label = prometheus::proto::LabelPair::default(); + pattern_label.set_name("pattern".to_string()); + pattern_label.set_value(self.dataset.as_label().to_string()); + labels.push(pattern_label); + } + + for i in 0..label_value_combination.len() { + let mut label_and_value = prometheus::proto::LabelPair::default(); + let label_val: &String = &label_value_combination[i]; + label_and_value.set_name(self.label_names[i].clone()); + label_and_value.set_value(label_val.to_string()); + labels.push(label_and_value); + } + + metric.set_label(labels.into()); + counter.set_value(self.get_next_counter_val(series_id)); + metric.set_counter(counter); + counter_family.mut_metric().push(metric); + } + counter_family + } + + // Gets a metric family containing a gauge family with all label_value combos + fn get_gauge_family(&self) -> MetricFamily { + let mut gauge_family = MetricFamily::default(); + gauge_family.set_name(self.metric_name.clone()); + gauge_family.set_help(format!( + "Generating fake time series data with {:?} dataset", + self.dataset + )); + gauge_family.set_field_type(prometheus::proto::MetricType::GAUGE); + + for (series_id, label_value_combination) in self.label_value_combinations.iter().enumerate() { + let mut metric = prometheus::proto::Metric::default(); + let mut gauge = prometheus::proto::Gauge::default(); + let mut labels = Vec::new(); + + // Add the pattern label if enabled + if self.add_pattern_label { + let mut pattern_label = prometheus::proto::LabelPair::default(); + pattern_label.set_name("pattern".to_string()); + pattern_label.set_value(self.dataset.as_label().to_string()); + labels.push(pattern_label); + } + + // Add the regular labels + for i in 0..label_value_combination.len() { + let mut label_and_value = prometheus::proto::LabelPair::default(); + let label_val: &String = &label_value_combination[i]; + label_and_value.set_name(self.label_names[i].clone()); + label_and_value.set_value(label_val.to_string()); + labels.push(label_and_value); + } + + metric.set_label(labels.into()); + gauge.set_value(self.get_sample(series_id)); + metric.set_gauge(gauge); + gauge_family.mut_metric().push(metric); + } + gauge_family + } +} + +// Interface used by prometheus +impl Collector for FakeCollector { + fn desc(&self) -> Vec<&Desc> { + // Return empty vec initially + Vec::new() + } + + fn collect(&self) -> Vec { + let mut metric_families = Vec::new(); + + if self.metric_type == "counter" { + let counter_family = self.get_counter_family(); + metric_families.push(counter_family); + } else if self.metric_type == "gauge" { + let gauge_family = self.get_gauge_family(); + metric_families.push(gauge_family); + } else { + panic!("Metric type must be one of either 'counter' or 'gauge'") + } + + metric_families + } +} + +async fn serve_req(_req: Request) -> Result, BoxedErr> { + let encoder = TextEncoder::new(); + let metric_families = prometheus::gather(); // Calls collect() method + let body = encoder.encode_to_string(&metric_families)?; + let response = Response::builder() + .status(200) + .header(CONTENT_TYPE, encoder.format_type()) + .body(body)?; + + Ok(response) +} + +#[derive(Parser)] +#[command(name = "fake_exporter")] +#[command(about = "A Prometheus fake exporter for generating test metrics")] +struct Args { + #[arg(long, help = "Port to serve metrics on")] + port: u16, + + #[arg(long, help = "Maximum scale for generated values")] + valuescale: i32, + + #[arg(long, value_enum, help = "Dataset/pattern type for value generation")] + dataset: Dataset, + + #[arg(long, help = "Number of labels per metric")] + num_labels: usize, + + #[arg(long, help = "Comma-separated list of number of values per label")] + num_values_per_label: String, + + #[arg(long, help = "Metric type (gauge or counter)")] + metric_type: String, + + #[arg(long, help = "Custom metric name (default: fake_metric for gauge, fake_metric_total for counter)")] + metric_name: Option, + + #[arg(long, help = "Comma-separated custom label names (must match num-labels count)")] + label_names: Option, + + #[arg(long, help = "Comma-separated prefixes for label values (e.g. 'region,svc,inst' produces region_0, svc_0, inst_0)")] + label_value_prefixes: Option, + + #[arg(long, default_value = "false", help = "Add 'pattern' label to metrics with dataset name")] + add_pattern_label: bool, +} + +#[tokio::main] +async fn main() -> Result<(), BoxedErr> { + let args = Args::parse(); + + let fake_collector = Box::new(FakeCollector::new( + args.valuescale as f64, + args.dataset, + args.num_labels, + args.num_values_per_label, + args.metric_type, + args.metric_name, + args.label_names, + args.label_value_prefixes, + args.add_pattern_label, + )); + + // Register collector and start serving + let _ = prometheus::register(fake_collector); + let ip = Ipv4Addr::UNSPECIFIED; + let addr: SocketAddr = (ip, args.port).into(); + println!("Fake exporter started on port {}", args.port); + let listener = TcpListener::bind(addr).await?; + loop { + let (stream, _) = listener.accept().await?; + let io = TokioIo::new(stream); + + let service = service_fn(serve_req); + if let Err(err) = http1::Builder::new().serve_connection(io, service).await { + eprintln!("server error: {:?}", err); + }; + } +} diff --git a/PrometheusExporters/fake_kafka_exporter/Cargo.lock b/PrometheusExporters/fake_kafka_exporter/Cargo.lock new file mode 100644 index 0000000..4a39365 --- /dev/null +++ b/PrometheusExporters/fake_kafka_exporter/Cargo.lock @@ -0,0 +1,1006 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "bumpalo" +version = "3.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" + +[[package]] +name = "bytes" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" + +[[package]] +name = "cc" +version = "1.2.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "755d2fce177175ffca841e9a06afdb2c4ab0f593d53b4dee48147dfaade85932" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "chrono" +version = "0.4.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "clap" +version = "4.5.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "fake_kafka_exporter" +version = "0.1.0" +dependencies = [ + "chrono", + "clap", + "rand", + "rand_distr", + "rdkafka", + "serde", + "serde_json", + "tokio", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "pin-utils", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "iana-time-zone" +version = "0.1.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "js-sys" +version = "0.3.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.180" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" + +[[package]] +name = "libm" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" + +[[package]] +name = "libz-sys" +version = "1.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15d118bbf3771060e7311cc7bb0545b01d08a8b4a7de949198dec1fa0ca1c0f7" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "mio" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.61.2", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_enum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro-crate" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro2" +version = "1.0.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_distr" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" +dependencies = [ + "num-traits", + "rand", +] + +[[package]] +name = "rdkafka" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053adfa02fab06e86c01d586cc68aa47ee0ff4489a59469081dc12cbcde578bf" +dependencies = [ + "futures-channel", + "futures-util", + "libc", + "log", + "rdkafka-sys", + "serde", + "serde_derive", + "serde_json", + "slab", + "tokio", +] + +[[package]] +name = "rdkafka-sys" +version = "4.9.0+2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5230dca48bc354d718269f3e4353280e188b610f7af7e2fcf54b7a79d5802872" +dependencies = [ + "libc", + "libz-sys", + "num_enum", + "pkg-config", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" +dependencies = [ + "errno", + "libc", +] + +[[package]] +name = "slab" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +dependencies = [ + "libc", + "windows-sys 0.60.2", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tokio" +version = "1.49.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" +dependencies = [ + "bytes", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.61.2", +] + +[[package]] +name = "tokio-macros" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "toml_datetime" +version = "0.7.5+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.23.10+spec-1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" +dependencies = [ + "indexmap", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.6+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" +dependencies = [ + "winnow", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + +[[package]] +name = "winnow" +version = "0.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" +dependencies = [ + "memchr", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" + +[[package]] +name = "zerocopy" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfcd145825aace48cff44a8844de64bf75feec3080e0aa5cdbde72961ae51a65" diff --git a/PrometheusExporters/fake_kafka_exporter/Cargo.toml b/PrometheusExporters/fake_kafka_exporter/Cargo.toml new file mode 100644 index 0000000..2be3518 --- /dev/null +++ b/PrometheusExporters/fake_kafka_exporter/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "fake_kafka_exporter" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = { version = "4.0", features = ["derive"] } +rdkafka = "0.34" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +tokio = { version = "1", features = ["full"] } +rand = "0.9.1" +rand_distr = "0.5.1" +chrono = "0.4" diff --git a/PrometheusExporters/fake_kafka_exporter/Dockerfile b/PrometheusExporters/fake_kafka_exporter/Dockerfile new file mode 100644 index 0000000..1e1e14d --- /dev/null +++ b/PrometheusExporters/fake_kafka_exporter/Dockerfile @@ -0,0 +1,17 @@ +# Use the official Rust image as a parent image +FROM rust:1.82 AS builder + +# Set the working directory in the container +WORKDIR /usr/src/app + +# Copy the Cargo.toml and Cargo.lock files +COPY Cargo.toml Cargo.lock ./ + +# Copy the source code +COPY src ./src + +# Build the application +RUN cargo build --release + +# Set the entrypoint to the fake_kafka_exporter binary +ENTRYPOINT ["target/release/fake_kafka_exporter"] diff --git a/PrometheusExporters/fake_kafka_exporter/installation/install.sh b/PrometheusExporters/fake_kafka_exporter/installation/install.sh new file mode 100755 index 0000000..b0667ec --- /dev/null +++ b/PrometheusExporters/fake_kafka_exporter/installation/install.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -e + +THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") +PARENT_DIR=$(dirname "$THIS_DIR") + +echo "Building Fake Kafka Exporter Docker image..." +cd "$PARENT_DIR" +docker build . -f Dockerfile -t sketchdb-fake-kafka-exporter:latest + +echo "Fake Kafka Exporter Docker image built successfully: sketchdb-fake-kafka-exporter:latest" diff --git a/PrometheusExporters/fake_kafka_exporter/src/main.rs b/PrometheusExporters/fake_kafka_exporter/src/main.rs new file mode 100644 index 0000000..02117da --- /dev/null +++ b/PrometheusExporters/fake_kafka_exporter/src/main.rs @@ -0,0 +1,249 @@ +use clap::Parser; +use rand::rngs::SmallRng; +use rand::SeedableRng; +use rand_distr::{Distribution, Uniform}; +use rdkafka::config::ClientConfig; +use rdkafka::producer::{FutureProducer, FutureRecord}; +use serde_json::{json, Value as JsonValue}; +use std::time::Duration; +use tokio::time::sleep; + +const RNG_SEED: u64 = 0; + +/// Converts comma-separated string to vector of usize +fn get_num_vals_per_column(num_values_str: &str, num_columns: usize) -> Vec { + let parse: Result, _> = num_values_str + .split(',') + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::parse::) + .collect(); + + let num_values: Vec = match parse { + Ok(list) => list, + Err(error) => panic!("Couldn't parse num_values_per_metadata_column: {error:?}"), + }; + + if num_values.len() == 1 { + vec![num_values[0]; num_columns] + } else { + if num_values.len() != num_columns { + panic!( + "Number of num_values_per_metadata_column must be equal to metadata_columns count (got {} vs {})", + num_values.len(), + num_columns + ); + } + num_values + } +} + +/// Computes all combinations of metadata column values +fn compute_metadata_combinations( + column_names: &[String], + num_values_per_column: &[usize], +) -> Vec> { + // Build values for each column + let mut values_per_column: Vec> = Vec::with_capacity(column_names.len()); + for (col_idx, col_name) in column_names.iter().enumerate() { + let count = num_values_per_column[col_idx]; + let mut bucket = Vec::with_capacity(count); + for value_idx in 0..count { + bucket.push(format!("{}_{}", col_name, value_idx)); + } + values_per_column.push(bucket); + } + + // Cartesian product + fn cartesian_product(pools: &[Vec]) -> Vec> { + let mut result: Vec> = vec![Vec::new()]; + for pool in pools { + let mut next = Vec::new(); + for prefix in &result { + for item in pool { + let mut new_prefix = prefix.clone(); + new_prefix.push(item.clone()); + next.push(new_prefix); + } + } + result = next; + } + result + } + + let combos = cartesian_product(&values_per_column); + + // Pair column names with values + combos + .into_iter() + .map(|combo| { + column_names + .iter() + .zip(combo.into_iter()) + .map(|(name, val)| (name.clone(), val)) + .collect() + }) + .collect() +} + +/// Builds a JSON record for a single data point +fn build_json_record( + timestamp_ms: i64, + time_column: &str, + metadata: &[(String, String)], + value_columns: &[String], + rng: &mut SmallRng, + uniform_dist: &Uniform, +) -> JsonValue { + let mut record = json!({}); + let obj = record.as_object_mut().unwrap(); + + // Add timestamp + obj.insert(time_column.to_string(), json!(timestamp_ms)); + + // Add metadata columns + for (col_name, col_value) in metadata { + obj.insert(col_name.clone(), json!(col_value)); + } + + // Add value columns with random values + for col_name in value_columns { + let value = uniform_dist.sample(rng); + obj.insert(col_name.clone(), json!(value)); + } + + record +} + +#[derive(Parser)] +#[command(name = "fake_kafka_exporter")] +#[command(about = "A fake data exporter that outputs SQL/tabular-style JSON records to Kafka")] +struct Args { + #[arg(long, default_value = "localhost:9092", help = "Kafka broker address")] + kafka_broker: String, + + #[arg(long, help = "Kafka topic name")] + kafka_topic: String, + + #[arg(long, default_value = "time", help = "Name of the timestamp column")] + time_column: String, + + #[arg(long, help = "Comma-separated metadata column names")] + metadata_columns: String, + + #[arg(long, help = "Comma-separated counts per metadata column")] + num_values_per_metadata_column: String, + + #[arg(long, help = "Comma-separated value column names")] + value_columns: String, + + #[arg( + long, + default_value = "100.0", + help = "Max value for uniform distribution [0, value_scale]" + )] + value_scale: f64, + + #[arg(long, default_value = "1", help = "Seconds between data batches")] + frequency: u64, + + #[arg(long, default_value = "false", help = "Print records to console")] + debug_print: bool, +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + let args = Args::parse(); + + // Parse column names + let metadata_columns: Vec = args + .metadata_columns + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect(); + + let value_columns: Vec = args + .value_columns + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect(); + + if metadata_columns.is_empty() { + panic!("At least one metadata column is required"); + } + if value_columns.is_empty() { + panic!("At least one value column is required"); + } + + // Parse num_values_per_metadata_column + let num_values_per_column = + get_num_vals_per_column(&args.num_values_per_metadata_column, metadata_columns.len()); + + // Compute all metadata combinations + let all_metadata_combinations = + compute_metadata_combinations(&metadata_columns, &num_values_per_column); + + let num_combinations: usize = num_values_per_column.iter().product(); + println!( + "Generated {} metadata combinations from {} columns", + num_combinations, + metadata_columns.len() + ); + + // Create Kafka producer + let producer: FutureProducer = ClientConfig::new() + .set("bootstrap.servers", &args.kafka_broker) + .set("message.timeout.ms", "5000") + .create() + .expect("Failed to create Kafka producer"); + + println!( + "Connected to Kafka broker: {}, topic: {}", + args.kafka_broker, args.kafka_topic + ); + + // Initialize RNG and distribution + let mut rng = SmallRng::seed_from_u64(RNG_SEED); + let uniform_dist = Uniform::new_inclusive(0.0, args.value_scale) + .expect("Failed to create Uniform distribution"); + + // Main data generation loop + loop { + let timestamp_ms = chrono::Utc::now().timestamp_millis(); + + for metadata_combo in &all_metadata_combinations { + let record = build_json_record( + timestamp_ms, + &args.time_column, + metadata_combo, + &value_columns, + &mut rng, + &uniform_dist, + ); + + let record_str = serde_json::to_string(&record)?; + + if args.debug_print { + println!("{}", record_str); + } + + // Send to Kafka + let delivery_status = producer + .send( + FutureRecord::to(&args.kafka_topic) + .payload(&record_str) + .key(""), + Duration::from_secs(0), + ) + .await; + + if let Err((err, _)) = delivery_status { + eprintln!("Failed to send message to Kafka: {}", err); + } + } + + sleep(Duration::from_secs(args.frequency)).await; + } +} diff --git a/PrometheusExporters/installation/install.sh b/PrometheusExporters/installation/install.sh new file mode 100755 index 0000000..a4efbc3 --- /dev/null +++ b/PrometheusExporters/installation/install.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +set -e + +THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") +PROMETHEUS_EXPORTERS_DIR=$(dirname "$THIS_DIR") + +echo "Building PrometheusExporters Docker images..." + +# Build cluster data exporter +echo "Building Cluster Data Exporter..." +( + cd "$PROMETHEUS_EXPORTERS_DIR/cluster_data_exporter/installation" + ./install.sh +) + +# Build fake exporter python +echo "Building Fake Exporter Python..." +( + cd "$PROMETHEUS_EXPORTERS_DIR/fake_exporter/fake_exporter_python/installation" + ./install.sh +) + +# Build fake exporter rust +echo "Building Fake Exporter Rust..." +( + cd "$PROMETHEUS_EXPORTERS_DIR/fake_exporter/fake_exporter_rust/fake_exporter/installation" + ./install.sh +) + +# Build fake kafka exporter +echo "Building Fake Kafka Exporter..." +( + cd "$PROMETHEUS_EXPORTERS_DIR/fake_kafka_exporter/installation" + ./install.sh +) + +echo "All PrometheusExporters Docker images built successfully!" diff --git a/PrometheusExporters/query_cost_exporter/QueryCostExporter.py b/PrometheusExporters/query_cost_exporter/QueryCostExporter.py new file mode 100644 index 0000000..8448643 --- /dev/null +++ b/PrometheusExporters/query_cost_exporter/QueryCostExporter.py @@ -0,0 +1,324 @@ +from typing import Dict, List, Tuple + +import query_cost as query_cost +from loguru import logger +from process.ProcessMonitorHook import ProcessMetricSnapshot, ProcessMonitorHook +from prometheus_client import Gauge, start_http_server +from query_cost import CostModel, CostModelOption + + +class QueryCostExporterHook(ProcessMonitorHook): + """ + Wrapper class for the QueryCostExporter + """ + + def __init__( + self, + monitor_to_models_map: Dict[str, List[CostModelOption]], + addr: str, + port: int, + ): + self.port = port + self.addr = addr + self.monitor_to_models_map = monitor_to_models_map + self.exporter = None + + def init(self): + """ + Instantiates the cost exporter and launches it for exporting + """ + self.exporter = QueryCostExporter( + self.monitor_to_models_map, self.addr, self.port + ) + self.exporter.launch() + + def update(self, value): + """ + Updates exporter metrics using the given value + """ + if self.exporter is not None: + self.exporter.export_recent_measurement(value) + else: + raise RuntimeError( + "Exporter is None, remember to call init() before using this hook" + ) + + def close(self): + """ + Shuts down the cost exporter + """ + if self.exporter is not None: + self.exporter.shutdown() + else: + raise RuntimeError( + "Error closing hook, exporter is None. Did you remember to call init()?" + ) + + +class QueryCostExporter: + @staticmethod + def _IP_valid(addr): + """ + Verifies that a given ip address is of the correct type and is a "valid" + IP address for running the exporter. At the moment, this function considers + any properly formatted IP address as valid. + """ + if not isinstance(addr, str): + raise TypeError("IP address must be a string") + + if addr == "localhost": + return + + addr_nums = addr.split(sep=".") + if len(addr_nums) != 4: + raise ValueError("Improperly formatted IPv4 address") + for num_str in addr_nums: + if int(num_str) < 0 or int(num_str) > 255: + raise ValueError("Improperly formatted IPv4 address") + return + + @staticmethod + def _port_valid(port): + """ + Verifies that a given port is of the correct type and is a "valid" + port to be used by the exporter. At the moment, this function considers + any properly formatted port as valid + """ + if not isinstance(port, int): + raise TypeError("Port must be an integer") + if port < 0 or port > 65535: + raise ValueError("Improperly formatted port") + + return + + @staticmethod + def _monitor_to_models_map_valid(monitor_to_models_map): + """ + Verifies that the monitor_to_models_map given during object creation + is valid, e.g. is a dictionary with valid keys and values + """ + # Check map itself (Correct type, non-empty) + if monitor_to_models_map is None: + raise TypeError("Monitor to cost models map is None.") + elif not isinstance(monitor_to_models_map, dict): + raise TypeError("Monitor to cost models map must be a dictionary.") + elif len(monitor_to_models_map) == 0: + raise ValueError("Monitor to cost models map must not be empty.") + + # Check key-value pairs (Correct types, each monitor has at least one cost model) + for monitor in monitor_to_models_map: + if not isinstance(monitor, str): + raise TypeError("Monitor names in the map must be given as strings.") + + cost_models = monitor_to_models_map[monitor] + + if cost_models is None: + raise TypeError(f"Cost model list for {monitor} is None.") + elif not isinstance(cost_models, list): + raise TypeError( + f"Cost models for {monitor} must be given as a list of CostModelOption." + ) + elif len(cost_models) == 0: + raise ValueError(f"Cost model list for {monitor} is empty") + + for model in cost_models: + if not isinstance(model, type(CostModelOption.NO_TRANSFORM)): + raise TypeError( + f"List of cost models for {monitor} contains one or more element that is not a CostModelOption" + ) + + # NOTE: Implementation only uses prometheus Gauges + @staticmethod + def _create_prom_metric( + monitor_metric_name: str, cost_model: CostModelOption, metric_labels: List[str] + ) -> Gauge: + """ + Creates a single prometheus metric for a single monitor (e.g. cpu_percent) and + one of the cost functions applied to it. The name of the metric as seen by prometheus + will be "_", e.g. + "cpu_percent_NO_TRANSFORM" + """ + prom_metric_name = "{}_{}".format(monitor_metric_name, cost_model.name) + prom_description = "{}({})".format(cost_model.name, monitor_metric_name) + + return Gauge(prom_metric_name, prom_description, metric_labels) + + # NOTE Only uses prometheus gauges for metrics at the moment + @staticmethod + def _init_prom_metrics( + monitor_to_models_map, + ) -> Dict[str, List[Tuple[CostModel, Gauge]]]: + """ + Creates a dictionary which maps the name of a monitor to a list of tuples, + where each tuple contains a cost model object as the first element + and the corresponding prometheus metric as the second element, + e.g. Dict = {"cpu_percent": [(cost_model, Gauge), ...]} + """ + prometheus_metrics = {} + + for monitor_metric in monitor_to_models_map: + models_and_prom_metrics = [] + for cost_model_option in monitor_to_models_map[monitor_metric]: + cost_model = query_cost.create_model(cost_model_option) + prom_metric = QueryCostExporter._create_prom_metric( + monitor_metric, cost_model_option, ["keyword", "PID"] + ) + model_and_prom_metric = (cost_model, prom_metric) + models_and_prom_metrics.append(model_and_prom_metric) + + prometheus_metrics[monitor_metric] = models_and_prom_metrics + + return prometheus_metrics + + def __init__( + self, + monitor_to_models_map: Dict[str, List[CostModelOption]], + addr: str, + port: int, + ): + self.logger = logger.bind(module="query_cost_exporter") + + self.port = port + self.addr = addr + self.monitor_to_models_map = monitor_to_models_map + + self.http_server = None + self.server_thread = None + + # Verify input parameters + try: + QueryCostExporter._IP_valid(self.addr) + QueryCostExporter._port_valid(self.port) + QueryCostExporter._monitor_to_models_map_valid(self.monitor_to_models_map) + except (TypeError, ValueError) as e: + self.logger.error(f"Failed to create QueryCostExporter: {str(e)}") + raise e + + self.prometheus_metrics_map = QueryCostExporter._init_prom_metrics( + self.monitor_to_models_map + ) + self.logger.info("QueryCostExporter object created") + + def __enter__(self): + return self + + def __exit__(self, *args): + self.shutdown() + + def launch(self): + """ + Launches the exporter's http_server and server thread for exporting metrics + to be scraped by Prometheus + """ + if self.addr is None: + self.logger.error("Launch failed: Exporter IP address is None") + raise RuntimeError("Cost exporter failed to launch: exporter IP is None") + + if self.port is None: + self.logger.error("Launch failed: Exporter port is None") + raise RuntimeError("Cost exporter failed to launch: exporter port is None") + + self.logger.info(f"Launching cost exporter at {self.addr}:{self.port}...") + + try: + self.http_server, self.server_thread = start_http_server( + addr=self.addr, port=self.port + ) + except Exception as e: + self.logger.error(f"Failed to start http server due to exception: {str(e)}") + raise e + + self.logger.info(f"Exporter successfully started at {self.addr}:{self.port}") + print(f"Exporter running at {self.addr}:{self.port}") + + return + + def shutdown(self): + """ + Cleans up all resources associated with the exporter, mainly the + http_server and corresponding server thread + """ + print("Shutting down cost exporter server and joining server thread...") + + self.logger.info("Shutting down server...") + if self.http_server is not None: + try: + self.http_server.shutdown() + except Exception as e: + self.logger.error(f"Error shutting down http_server: {str(e)}") + raise e + self.logger.info("Shut down server successfully") + else: + self.logger.error("Exporter http_server is None") + raise RuntimeError("Cost exporter http_server is None") + + self.logger.info("Joining server thread...") + if self.server_thread is not None: + try: + self.server_thread.join() + except Exception as e: + self.logger.error(f"Error joining server thread: {str(e)}") + raise e + self.logger.info("Joined server thread successfully") + else: + self.logger.error("Exporter server thread is None") + raise RuntimeError("Cost exporter server thread is None") + + print("Exporter shut down successfully") + return + + # NOTE: This function is blocking. Exporting the new information requires + # the calling thread to perform all cost modelling calculations, + # so be wary when using cost models which take substantial time to + # compute + def export_recent_measurement(self, iteration_info: List[ProcessMetricSnapshot]): + """ + Takes a list of snapshots for every process and monitor from the most + recent iteration in process_monitor + """ + if iteration_info is None: + raise TypeError("Failed to export iteration, iteration_info is None") + elif not isinstance(iteration_info, list): + raise TypeError("iteration_info must be a list of ProcessMetricSnapshots") + + for snapshot in iteration_info: + self.export_snapshot(snapshot) + + # NOTE: Function logic currently assumes all prometheus metrics are Gauges + # NOTE: This function is blocking. Since this function makes the necessary + # calls to compute costs, beware of cost models which take a while to + # compute + def export_snapshot(self, snapshot: ProcessMetricSnapshot): + """ + Updates all prometheus metrics corresponding to the given monitor. The + function applies the corresponding cost function to the given value + before exporting + """ + if snapshot is None: + self.logger.error("Exporter given None snapshot") + raise TypeError("Attempt to export a None snapshot") + elif not isinstance(snapshot, ProcessMetricSnapshot): + self.logger.error("Wrong argument") + raise TypeError( + "export_snapshot() argument must be a ProcessMetricSnapshot" + ) + + pid = snapshot.pid + keyword = snapshot.keyword + monitor_name = snapshot.monitor_name + measurement = snapshot.value + self.logger.trace( + f"Updating for pid={pid}, keyword={keyword}, monitor_name={monitor_name}, measurement={measurement}" + ) + + if monitor_name in self.prometheus_metrics_map: + metric_list = self.prometheus_metrics_map[monitor_name] + for cost_model, prometheus_metric in metric_list: + # NOTE: For a computation like a sum, the cost is being computed + # using every measurement, i.e. across all PIDs and keywords, + # so PID and keyword labels are meaningless in these cases. + cost = cost_model.compute(measurement) + if cost is not None and prometheus_metric is not None: + prometheus_metric.labels(keyword=keyword, PID=pid).set(cost) + + return diff --git a/PrometheusExporters/query_cost_exporter/process/ProcessMonitorHook.py b/PrometheusExporters/query_cost_exporter/process/ProcessMonitorHook.py new file mode 100644 index 0000000..167e784 --- /dev/null +++ b/PrometheusExporters/query_cost_exporter/process/ProcessMonitorHook.py @@ -0,0 +1,39 @@ +from abc import ABC, abstractmethod +from typing import Any, Optional + + +class ProcessMonitorHook(ABC): + """ + Abstract parent class for any hooks in process_monitor + """ + + @abstractmethod + def init(self): + pass + + @abstractmethod + def update(self, value: Any): + pass + + @abstractmethod + def close(self): + pass + + +class ProcessMetricSnapshot: + """ + Class for providing hooks with a consistent format for a single measurement + for a single process + """ + + def __init__( + self, + pid: int, + value: Any, + keyword: Optional[str] = None, + monitor_name: Optional[str] = None, + ): + self.pid = pid + self.keyword = keyword + self.monitor_name = monitor_name + self.value = value diff --git a/PrometheusExporters/query_cost_exporter/process/__init__.py b/PrometheusExporters/query_cost_exporter/process/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/PrometheusExporters/query_cost_exporter/query_cost.py b/PrometheusExporters/query_cost_exporter/query_cost.py new file mode 100644 index 0000000..627b993 --- /dev/null +++ b/PrometheusExporters/query_cost_exporter/query_cost.py @@ -0,0 +1,136 @@ +""" +Rather than instantiating any of these cost models directly, it is preferred +for the user to use create_model(CostModelOption, *args) to initialize the cost model. + +When implementing a new model, the abstract CostModel() class should be used +as a parent class. Once a new model is implemented, it should be added to the +CostModelOption enum and the create_model function. +""" + +from abc import ABC, abstractmethod +from enum import Enum, auto +from typing import Any + + +# flake8: noqa +# Enum for available cost models +class CostModelOption(Enum): + """ + Enumeration of implemented cost models. + """ + + NO_TRANSFORM = auto() + SUM = auto() + ARITHMETIC_AVG = auto() + + +class CostModel(ABC): + """ + Abstract class representing any cost model. All implemented cost models + must be a child class of this abstract class. + """ + + @abstractmethod + def __init__(self): + """ + Any initial setup for models which require it. Usually, these are + models which maintain some sort of state + """ + pass + + @abstractmethod + def compute(self, x: Any) -> Any: + """ + Absract method for updating a cost model (if it has memory). It must + return the output of the model after updating + """ + pass + + +class NoTransform(CostModel): + """ + CostModel which applies no transformation when computing, i.e. calls to + compute simply return the input argument + """ + + def __init__(self): + pass + + def compute(self, x: Any): + return x + + @property + def name(self): + return "NO_TRANSFORM" + + +# NOTE: Assumes scalar inputs (e.g. int and float) +class Sum(CostModel): + """ + Model to represent the running sum of all samples + """ + + def __init__(self): + self.sum = 0 + + def compute(self, x: Any) -> Any: + """ + Returns the sum of x and all previous values + """ + if x is None: + raise TypeError("Input argument cannot be None") + self.sum += x + return self.sum + + @property + def name(self): + return "SUM" + + +# NOTE: Assumes scalar inputs (e.g. int and float) +class ArithmeticAverage(CostModel): + """ + Model to represent a running average across all samples + """ + + def __init__(self): + self.average = 0 + self.n = 0 + + def compute(self, x: Any) -> Any: + """ + Computes and returns the new average after including x + + Updates the internal average + """ + if x is None: + raise TypeError("Input argument cannot be None") + + self.n += 1 + self.average = self.average * (self.n - 1) / self.n + x / self.n + return self.average + + @property + def name(self): + return "ARITHMETIC_AVG" + + +def create_model(cost_model_option: CostModelOption, *args): + """ + Given a CostModelOption, initialize and return the corresponding cost model. + *args is to provide a CostModel with additional creation arguments if + the particular model takes additional parameters during creation + """ + if cost_model_option is None: + raise TypeError("cost_model_option cannot be None") + elif not isinstance(cost_model_option, type(CostModelOption.NO_TRANSFORM)): + raise TypeError("First argument, cost_model_option, must be a CostModelOption") + + if cost_model_option == CostModelOption.NO_TRANSFORM: + return NoTransform() + elif cost_model_option == CostModelOption.SUM: + return Sum() + elif cost_model_option == CostModelOption.ARITHMETIC_AVG: + return ArithmeticAverage() + else: + raise ValueError("Given cost model option not implemented.") diff --git a/PrometheusExporters/query_cost_exporter/requirements.txt b/PrometheusExporters/query_cost_exporter/requirements.txt new file mode 100644 index 0000000..022a5ad --- /dev/null +++ b/PrometheusExporters/query_cost_exporter/requirements.txt @@ -0,0 +1,2 @@ +loguru==0.7.3 +prometheus_client==0.22.1 diff --git a/PrometheusExporters/query_latency_exporter/QueryLatencyExporter.py b/PrometheusExporters/query_latency_exporter/QueryLatencyExporter.py new file mode 100644 index 0000000..766e42a --- /dev/null +++ b/PrometheusExporters/query_latency_exporter/QueryLatencyExporter.py @@ -0,0 +1,175 @@ +from loguru import logger +from prometheus_client import Gauge, start_http_server + + +class QueryLatencyExporter: + + @staticmethod + def _IP_valid(addr): + """ + Verifies that a given ip address is of the correct type and is a "valid" + IP address for running the exporter. At the moment, this function considers + any properly formatted IP address as valid + """ + if addr is None: + raise TypeError("IP address cannot be None") + elif not isinstance(addr, str): + raise TypeError("IP address must be a string") + elif addr == "localhost": + return + + addr_nums = addr.split(sep=".") + if len(addr_nums) != 4: + raise ValueError("Improperly formatted IPv4 address") + for num_str in addr_nums: + if int(num_str) < 0 or int(num_str) > 255: + raise ValueError("Improperly formatted IPv4 address") + return + + @staticmethod + def _port_valid(port): + """ + Verifies that a given ip address is of the correct type and is a "valid" + IP address for running the exporter. At the moment, this function considers + any properly formatted IP address as valid + """ + if port is None: + raise TypeError("Port cannot be None") + elif not isinstance(port, int): + raise TypeError("Port must be an integer") + elif port < 0 or port > 65535: + raise ValueError("Improperly formatted port") + + return + + def __init__(self, addr: str, port: int): + self.logger = logger.bind(module="query_latency_exporter") + self.port = port + self.addr = addr + + self.http_server = None + self.server_thread = None + + try: + QueryLatencyExporter._IP_valid(self.addr) + QueryLatencyExporter._port_valid(self.port) + except (TypeError, ValueError) as e: + self.logger.error(f"Failed to create QueryLatencyExporter: {str(e)}") + raise e + + self.latencies_metric = Gauge( + "query_latencies", "Query latencies", labelnames=["query_index", "server"] + ) + self.cumulative_latencies_metric = Gauge( + "cumulative_query_latencies", + "Query cumulative latencies", + labelnames=["query_index", "server"], + ) + self.logger.info("QueryLatencyExporter object created") + + def __enter__(self): + return self + + def __exit__(self, *args): + self.shutdown() + + def launch(self): + """ + Launches the exporter's http_server and server thread for exporting metrics + to be scraped by Prometheus + """ + if self.addr is None: + self.logger.error("Launch failed: Exporter IP address is None") + raise RuntimeError("Latency exporter failed to launch: exporter IP is None") + elif self.port is None: + self.logger.error("Launch failed: Exporter port is None") + raise RuntimeError( + "Latency exporter failed to launch: exporter port is None" + ) + + self.logger.info(f"Launching latency exporter at {self.addr}: {self.port}") + + try: + self.http_server, self.server_thread = start_http_server( + addr=self.addr, port=self.port + ) + except Exception as e: + self.logger.error(f"Failed to start http server due to exception: {str(e)}") + e.add_note("Latency exporter failed to launch") + raise e + + self.logger.info(f"Exporter successfully started at {self.addr}: {self.port}") + print(f"Exporter running at {self.addr}: {self.port}") + + return + + def shutdown(self): + """ + Cleans up all resources associated with the exporter, mainly the + http_server and corresponding server thread + """ + print("Shutting down latency exporter server and joining server thread...") + + self.logger.info("Shutting down server...") + if self.http_server is not None: + try: + self.http_server.shutdown() + except Exception as e: + self.logger.error(f"Error shutting down http_server: {str(e)}") + e.add_note("Attempt to shutdown exporter http_server failed.") + raise e + self.logger.info("Shut down server successfully") + else: + self.logger.error("Exporter http_server is None") + raise RuntimeError("Exporter http_server is None") + + self.logger.info("Joining server thread...") + if self.server_thread is not None: + try: + self.server_thread.join() + except Exception as e: + self.logger.error(f"Error joining server thread: {str(e)}") + e.add_note("Attempt to join exporter's server thread failed.") + raise e + self.logger.info("Joined server thread successfully") + else: + self.logger.error("Exporter server thread is None") + raise RuntimeError("Exporter server thread is None") + + print("Exporter shut down successfully") + return + + def export_repetition(self, repetition_idx: int, result): + """ + Exports a single repetition result for all queries + """ + if not isinstance(repetition_idx, int): + self.logger.error("Given non-integer repetition_idx") + raise TypeError("Repetition index must be an integer") + + self.logger.trace(f"Updating metrics for repetition no.{repetition_idx}") + + if result is None: + self.logger.error("Repetition result is None") + raise TypeError("Repetition result is None") + + for server_name in result: + for query_idx in result[server_name]: + query_result_across_time = result[server_name][query_idx] + query_rep_result = query_result_across_time.query_results[ + repetition_idx + ] + latency = query_rep_result.latency + cumulative_latency = query_rep_result.cumulative_latency + + if latency is not None: + self.latencies_metric.labels( + query_index=str(query_idx), server=server_name + ).set(latency) + + if cumulative_latency is not None: + self.cumulative_latencies_metric.labels( + query_index=str(query_idx), server=server_name + ).set(cumulative_latency) + + return diff --git a/PrometheusExporters/query_latency_exporter/requirements.txt b/PrometheusExporters/query_latency_exporter/requirements.txt new file mode 100644 index 0000000..022a5ad --- /dev/null +++ b/PrometheusExporters/query_latency_exporter/requirements.txt @@ -0,0 +1,2 @@ +loguru==0.7.3 +prometheus_client==0.22.1 diff --git a/QueryEngineRust/.cargo/config.toml b/QueryEngineRust/.cargo/config.toml new file mode 100644 index 0000000..c91c3f3 --- /dev/null +++ b/QueryEngineRust/.cargo/config.toml @@ -0,0 +1,2 @@ +[net] +git-fetch-with-cli = true diff --git a/QueryEngineRust/.gitignore b/QueryEngineRust/.gitignore new file mode 100644 index 0000000..eb5a316 --- /dev/null +++ b/QueryEngineRust/.gitignore @@ -0,0 +1 @@ +target diff --git a/QueryEngineRust/Cargo.toml b/QueryEngineRust/Cargo.toml new file mode 100644 index 0000000..567bcc0 --- /dev/null +++ b/QueryEngineRust/Cargo.toml @@ -0,0 +1,61 @@ +[package] +name = "query_engine_rust" +version = "0.1.0" +edition = "2021" + +[dependencies] +sketch-core = { path = "../sketch-core" } +form_urlencoded = "1.2" +promql_utilities = { path = "../CommonDependencies/dependencies/rs/promql_utilities" } +sql_utilities = { path = "../CommonDependencies/dependencies/rs/sql_utilities" } +sketch_db_common = { path = "../CommonDependencies/dependencies/rs/sketch_db_common" } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +serde_yaml = "0.9" +rmp-serde = "1.1" +tokio = { version = "1.0", features = ["full"] } +axum = "0.7" +rdkafka = "0.34" +rusqlite = { version = "0.31", features = ["bundled"] } +clap = { version = "4.0", features = ["derive"] } +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +tracing-appender = "0.2" +thiserror = "1.0" +anyhow = "1.0" +bincode = "1.3" +dashmap = "5.5" +uuid = { version = "1.0", features = ["v4"] } +chrono = { version = "0.4", features = ["serde"] } +structopt = "0.3" +urlencoding = "2.1" +flate2 = "1.0" +async-trait = "0.1" +promql-parser = "0.5.0" +reqwest = { version = "0.11", features = ["json"] } +xxhash-rust = { version = "0.8", features = ["xxh32", "xxh64"] } +dsrs = { git = "https://github.com/ProjectASAP/datasketches-rs" } +base64 = "0.21" +hex = "0.4" +sqlparser = "0.59.0" +datafusion = "43" +arrow = "53.4.1" +datafusion_summary_library = { path = "../CommonDependencies/dependencies/rs/datafusion_summary_library" } +futures = "0.3" +prost = "0.13" +snap = "1" +regex = "1" +prometheus = "0.13" +lazy_static = "1.4" +zstd = "0.13" + +[dev-dependencies] +tempfile = "3.20.0" + +[features] +#default = ["lock_profiling", "extra_debugging"] +default = [] +# Enable lock profiling instrumentation +lock_profiling = [] +# Enable extra debugging output +extra_debugging = [] diff --git a/QueryEngineRust/Dockerfile b/QueryEngineRust/Dockerfile new file mode 100644 index 0000000..c43d757 --- /dev/null +++ b/QueryEngineRust/Dockerfile @@ -0,0 +1,68 @@ +# QueryEngine Rust/Dockerfile +# Multi-stage build for Rust application + +FROM rust:1.89 AS builder + +LABEL maintainer="SketchDB Team" +LABEL description="QueryEngine Rust service for SketchDB" + +WORKDIR /code + +# Copy the CommonDependencies directory +COPY CommonDependencies ./CommonDependencies + +# Copy path dependencies of QueryEngineRust +COPY sketch-core ./sketch-core + +COPY Cargo.toml ./ +COPY Cargo.lock ./ +COPY QueryEngineRust/Cargo.toml ./QueryEngineRust/ +# COPY QueryEngineRust/.cargo ./QueryEngineRust/.cargo + +# Create a dummy main.rs to build dependencies +RUN mkdir -p QueryEngineRust/src && echo "fn main() {}" > QueryEngineRust/src/main.rs + +# Build dependencies (this layer will be cached) +# Uses BuildKit secret mount to pass git credentials without baking into a layer +WORKDIR /code/QueryEngineRust +RUN cargo build --release && rm -rf src/ +# RUN --mount=type=secret,id=git_token \ +# if [ -f /run/secrets/git_token ]; then \ +# git config --global url."https://x-access-token:$(cat /run/secrets/git_token)@github.com/".insteadOf "https://github.com/"; \ +# fi && \ +# cargo build --release && rm -rf src/ + +# Copy source code +COPY QueryEngineRust/src ./src + +# Build the actual application +RUN touch src/main.rs && cargo build --release +# RUN --mount=type=secret,id=git_token \ +# if [ -f /run/secrets/git_token ]; then \ +# git config --global url."https://x-access-token:$(cat /run/secrets/git_token)@github.com/".insteadOf "https://github.com/"; \ +# fi && \ +# touch src/main.rs && cargo build --release + +# Runtime stage with Ubuntu 24.04 (has newer glibc/libstdc++) +FROM ubuntu:24.04 + +WORKDIR /app + +# Install minimal runtime dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + libssl3 \ + zlib1g \ + && rm -rf /var/lib/apt/lists/* + +# Copy the built binary +COPY --from=builder /code/target/release/query_engine_rust /usr/local/bin/query_engine_rust + +# Expose the HTTP server port +EXPOSE 8088 + +# Note: Running as root to match Python QueryEngine behavior +# This allows writing to mounted volumes without permission issues + +# Use ENTRYPOINT to allow passing command line arguments +ENTRYPOINT ["query_engine_rust"] diff --git a/QueryEngineRust/LICENSE b/QueryEngineRust/LICENSE new file mode 100644 index 0000000..404d657 --- /dev/null +++ b/QueryEngineRust/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 SketchDB + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/QueryEngineRust/docker-compose.yml.j2 b/QueryEngineRust/docker-compose.yml.j2 new file mode 100644 index 0000000..43fd7e1 --- /dev/null +++ b/QueryEngineRust/docker-compose.yml.j2 @@ -0,0 +1,43 @@ +# QueryEngine Rust Docker Compose Template +# This template is rendered with Jinja2 to generate the final docker-compose.yml + +version: '3.8' + +services: + queryengine-rust: + image: sketchdb-queryengine-rust:latest + container_name: {{ container_name }} + environment: + - RUST_LOG={{ log_level }} + - RUST_BACKTRACE=1 + ports: + - "{{ http_port }}:8088" + network_mode: "host" + volumes: + # Mount output directory for experiment results + - "{{ experiment_output_dir }}:/app/outputs" + # Mount controller output directory for configuration files (read-only) + - "{{ controller_remote_output_dir }}:/app/controller_output:ro" + command: [ + "--kafka-topic", "{{ kafka_topic }}", + "--kafka-broker", "{{ kafka_host }}:9092", + "--input-format", "{{ input_format }}", + "--config", "/app/controller_output/inference_config.yaml", + "--streaming-config", "/app/controller_output/streaming_config.yaml", + "--prometheus-server", "http://{{ prometheus_host }}:{{ prometheus_port }}", + "--prometheus-scrape-interval", "{{ prometheus_scrape_interval }}", + "--delete-existing-db", + "--log-level", "{{ log_level }}", + "--output-dir", "/app/outputs", + "--streaming-engine", "{{ streaming_engine }}", + "--query-language", "{{ query_language }}", + "--lock-strategy", "{{ lock_strategy }}"{% if compress_json %}, + "--decompress-json"{% endif %}{% if profile_query_engine %}, + "--do-profiling"{% endif %}{% if forward_unsupported_queries %}, + "--forward-unsupported-queries"{% endif %}{% if dump_precomputes %}, + "--dump-precomputes"{% endif %} + ] + extra_hosts: + - "kafka:{{ kafka_host }}" + - "prometheus:{{ prometheus_host }}" + restart: no diff --git a/QueryEngineRust/docs/README.md b/QueryEngineRust/docs/README.md new file mode 100644 index 0000000..1fc7ce7 --- /dev/null +++ b/QueryEngineRust/docs/README.md @@ -0,0 +1,114 @@ +# QueryEngineRust Developer Documentation + +Welcome to the QueryEngineRust developer documentation! This directory contains guides for extending the system with new components. + +## Architecture Overview + +QueryEngineRust is organized into clear, extensible layers: + +``` +┌─────────────────────────────────────────────────────────┐ +│ Client Applications │ +└─────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────┐ +│ Protocol Servers (HTTP, etc.) │ +│ - Parse protocol-specific requests │ +│ - Route to appropriate adapter │ +│ - Handle protocol-specific endpoints │ +└─────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────┐ +│ Protocol Adapters (Prometheus, etc.) │ +│ - Parse query language (PromQL, SQL, etc.) │ +│ - Format responses for protocol │ +│ - Determine if query is supported │ +└─────────────────────────────────────────────────────────┘ + │ + ┌──────┴──────┐ + ▼ ▼ + ┌─────────────────┐ ┌──────────────────┐ + │ Query Engine │ │ Fallback Client │ + │ - Execute │ │ - Forward │ + │ queries │ │ unsupported │ + │ - Return │ │ queries │ + │ results │ │ │ + └────────┬────────┘ └──────────────────┘ + │ + ▼ + ┌─────────────────┐ + │ Store │ + │ - Data storage │ + │ - Sketches │ + └─────────────────┘ + ▲ + │ + ┌────────┴────────┐ + │ Ingest Drivers │ + │ - Kafka, etc. │ + └─────────────────┘ +``` + +## Directory Structure + +``` +src/drivers/ +├── ingest/ # Data ingestion (Kafka, etc.) +├── query/ +│ ├── adapters/ # Protocol adapters (Prometheus HTTP, etc.) +│ ├── fallback/ # Fallback backends (Prometheus, ClickHouse, etc.) +│ └── servers/ # Protocol servers (HTTP, Flight SQL, etc.) +``` + +## Extension Guides + +- **[Adding a Protocol Adapter](./adding-protocol-adapter.md)** - Add support for new query protocols (e.g., ClickHouse HTTP API) +- **[Adding a Fallback Backend](./adding-fallback-backend.md)** - Add new fallback query backends (e.g., DuckDB, Elasticsearch) +- **[Adding a Protocol Server](./adding-protocol-server.md)** - Add new protocol servers (e.g., Flight SQL, gRPC) + +## Key Concepts + +### Protocol Adapter +Handles protocol-specific request/response formatting and query parsing. Examples: Prometheus HTTP API, ClickHouse HTTP API. + +### Fallback Backend +External query system to forward unsupported queries to. Examples: Prometheus, ClickHouse, DuckDB. + +### Protocol Server +Handles network communication for a specific protocol. Examples: HTTP server, Flight SQL server. + +## Quick Reference + +### Adding a Protocol Adapter +1. Create `src/drivers/query/adapters/my_adapter.rs` +2. Implement `HttpProtocolAdapter` trait +3. Add to factory in `factory.rs` +4. Update `QueryProtocol` enum + +### Adding a Fallback Backend +1. Create `src/drivers/query/fallback/my_backend.rs` +2. Implement `FallbackClient` trait +3. Export from `fallback/mod.rs` + +### Adding a Protocol Server +1. Create `src/drivers/query/servers/my_server.rs` +2. Implement server logic with appropriate adapter +3. Export from `servers/mod.rs` + +## Testing + +Each component should include: +- Unit tests in the same file +- Integration tests in `src/tests/` +- Example usage in documentation + +## Contributing + +When adding new components: +1. Follow existing naming conventions +2. Add comprehensive documentation +3. Include tests +4. Update this documentation +5. Keep backward compatibility diff --git a/QueryEngineRust/docs/adding-fallback-backend.md b/QueryEngineRust/docs/adding-fallback-backend.md new file mode 100644 index 0000000..847a899 --- /dev/null +++ b/QueryEngineRust/docs/adding-fallback-backend.md @@ -0,0 +1,117 @@ +# Adding a Fallback Backend + +Fallback backends allow forwarding unsupported queries to external systems. This guide shows how to add support for a new fallback backend. + +## Overview + +A fallback backend: +- Accepts queries in a specific language (SQL, PromQL, etc.) +- Makes HTTP/gRPC/native calls to external system +- Returns results in a generic format +- Optionally provides runtime/health information + +## Example: Adding DuckDB HTTP Fallback + +### Step 1: Create the Fallback Client + +Create `src/drivers/query/fallback/duckdb.rs`: + +```rust +/// Fallback client for DuckDB HTTP API +pub struct DuckDBHttpFallback { + client: Client, + base_url: String, +} + +impl DuckDBHttpFallback { + pub fn new(base_url: String) -> Self { + Self { + client: Client::new(), + base_url, + } + } +} + +#[derive(Debug, Deserialize)] +struct DuckDBResponse { + success: bool, + data: Option>>, + columns: Option>, + error: Option, +} + +#[async_trait] +impl FallbackClient for DuckDBHttpFallback { + async fn execute_query( + &self, + request: &ParsedQueryRequest, + ) -> Result, StatusCode> { + ... + } + + async fn get_runtime_info(&self) -> Result { + ... + } +} +``` + +### Step 2: Export from Module + +Update `src/drivers/query/fallback/mod.rs`: + +```rust +mod duckdb; +pub use duckdb::DuckDBHttpFallback; +``` + +### Step 3: Use in Configuration + +The fallback client can now be used in adapter configuration: + +```rust +use crate::drivers::query::adapters::AdapterConfig; +use crate::drivers::query::fallback::DuckDBHttpFallback; +use std::sync::Arc; + +// Create adapter config with DuckDB fallback +let fallback = Some(Arc::new( + DuckDBHttpFallback::new("http://localhost:8080".to_string()) +) as Arc); + +let config = AdapterConfig::new( + QueryProtocol::PrometheusHttp, // Protocol for incoming queries + QueryLanguage::sql, // Query language + fallback, // DuckDB fallback +); +``` + +### Step 4: Add Tests + +Add tests in `duckdb.rs`: + +```rust +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_duckdb_fallback_creation() { + ... + } + + // Mock DuckDB server test would go here +} +``` + +## FallbackClient Trait Methods + +### Required: `execute_query()` +- Accepts a `ParsedQueryRequest` (query string + time) +- Makes external call to backend +- Returns `Json` response +- Should handle all error cases gracefully + +### Optional: `get_runtime_info()` +- Returns health/status information from backend +- Has default implementation (returns empty JSON) +- Override if backend has health endpoint diff --git a/QueryEngineRust/docs/adding-protocol-adapter.md b/QueryEngineRust/docs/adding-protocol-adapter.md new file mode 100644 index 0000000..b2ce68f --- /dev/null +++ b/QueryEngineRust/docs/adding-protocol-adapter.md @@ -0,0 +1,133 @@ +# Adding a Protocol Adapter + +Protocol adapters handle protocol-specific request/response formatting and query language parsing. This guide shows how to add support for a new query protocol. + +## Overview + +A protocol adapter: +- Parses incoming requests (GET/POST parameters, headers, etc.) +- Translates queries to internal format +- Formats query results for the protocol +- Defines protocol-specific endpoints + +## Example: Adding ClickHouse HTTP Adapter + +### Step 1: Create the Adapter File + +Create `src/drivers/query/adapters/clickhouse_http.rs`: + +```rust + +/// ClickHouse HTTP protocol adapter +pub struct ClickHouseHttpAdapter { + config: AdapterConfig, +} + +impl ClickHouseHttpAdapter { + ... +} + +#[async_trait] +impl QueryRequestAdapter for ClickHouseHttpAdapter { + ... +} + +#[async_trait] +impl QueryResponseAdapter for ClickHouseHttpAdapter { + ... +} + +#[async_trait] +impl HttpProtocolAdapter for ClickHouseHttpAdapter { + ... +} +``` + +### Step 2: Add Protocol Enum Variant + +Update `src/data_model/enums.rs` to add the new protocol: + +```rust +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum QueryProtocol { + ... + ClickHouseHttp, // Add this +} +``` + +### Step 3: Export from Module + +Update `src/drivers/query/adapters/mod.rs`: + +```rust +pub mod clickhouse_http; +pub use clickhouse_http::ClickHouseHttpAdapter; +``` + +### Step 4: Add to Factory + +Update `src/drivers/query/adapters/factory.rs`: + +```rust +pub fn create_http_adapter(config: AdapterConfig) -> Arc { + match config.protocol { + ... + QueryProtocol::ClickHouseHttp => { // Add this + Arc::new(ClickHouseHttpAdapter::new(config)) + } + } +} +``` + +### Step 5: Add Convenience Constructor (Optional) + +Update `src/drivers/query/adapters/config.rs`: + +```rust +impl AdapterConfig { + pub fn clickhouse_http(fallback_url: String, forward_unsupported: bool) -> Self { + ... + } +} +``` + +### Step 6: Test the Adapter + +Add tests in `clickhouse_http.rs`: + +```rust +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_parse_get_request() { + ... + } +} +``` + +## Key Traits to Implement + +### Required: `QueryRequestAdapter` +- `parse_get_request()` - Parse GET requests +- `parse_post_request()` - Parse POST requests +- `get_query_endpoint()` - Return endpoint path + +### Required: `QueryResponseAdapter` +- `format_success_response()` - Format successful query results +- `format_error_response()` - Format errors +- `format_unsupported_query_response()` - Format unsupported query errors + +### Required: `HttpProtocolAdapter` +- `adapter_name()` - Return adapter name for logging +- `get_runtime_info_path()` - Return health/status endpoint path +- `handle_runtime_info()` - Handle health/status requests + +## Common Gotchas + +- Don't implement query execution in the adapter - that's the engine's job +- Don't hard-code URLs or configuration - use `AdapterConfig` +- Handle both GET and POST requests appropriately +- Return protocol-specific error formats +- Use existing types from `traits.rs` (`ParsedQueryRequest`, `QueryExecutionResult`) diff --git a/QueryEngineRust/docs/adding-protocol-server.md b/QueryEngineRust/docs/adding-protocol-server.md new file mode 100644 index 0000000..ded7ed6 --- /dev/null +++ b/QueryEngineRust/docs/adding-protocol-server.md @@ -0,0 +1,105 @@ +# Adding a Protocol Server + +Protocol servers handle network communication for specific protocols. This guide shows how to add a new protocol server (like Flight SQL, gRPC, etc.). + +## Overview + +A protocol server: +- Listens on a network port +- Handles protocol-specific requests +- Uses adapters to process queries +- Returns protocol-specific responses + +## Example: Adding Flight SQL Server + +Flight SQL is Apache Arrow's SQL protocol over gRPC. Here's how to add it: + +### Step 1: Create the Server + +Create `src/drivers/query/servers/flight_sql.rs`: + +```rust +#[derive(Debug, Clone)] +pub struct FlightSqlServerConfig { + pub port: u16, + pub adapter_config: AdapterConfig, +} + +pub struct FlightSqlServer { + config: FlightSqlServerConfig, + query_engine: Arc, + store: Arc, +} + +impl FlightSqlServer { + pub fn new( + config: FlightSqlServerConfig, + query_engine: Arc, + store: Arc, + ) -> Self { + Self { + config, + query_engine, + store, + } + } + + pub async fn run(self) -> Result<(), Box> { + ... + } +} +``` + +### Step 3: Export from Module + +Update `src/drivers/query/servers/mod.rs`: + +```rust +pub mod flight_sql; +pub use flight_sql::{FlightSqlServer, FlightSqlServerConfig}; +``` + +### Step 4: Update Main Binary + +Update `src/main.rs` to support choosing the server: + +```rust +#[derive(Parser, Debug)] +struct Args { + // ... existing args ... + + /// Server protocol to use (http, flight_sql) + #[arg(long, default_value = "http")] + server_protocol: String, + + // ... rest of args ... +} + +#[tokio::main] +async fn main() -> Result<()> { + let args = Args::parse(); + + // ... setup engine, store, etc. ... + + match args.server_protocol.as_str() { + "http" => { + let server = HttpServer::new(http_config, engine, store); + server.run().await?; + } + "flight_sql" => { + let flight_config = FlightSqlServerConfig { + port: args.http_port, + adapter_config, + }; + let server = FlightSqlServer::new(flight_config, engine, store); + server.run().await?; + } + _ => { + eprintln!("Unknown server protocol: {}", args.server_protocol); + std::process::exit(1); + } + } + + Ok(()) +} +``` diff --git a/QueryEngineRust/docs/promsketch-integration.md b/QueryEngineRust/docs/promsketch-integration.md new file mode 100644 index 0000000..ad0ba8f --- /dev/null +++ b/QueryEngineRust/docs/promsketch-integration.md @@ -0,0 +1,144 @@ +# PromSketch Integration — Multi-Path Ingestion Architecture + +## 1. Overview + +QueryEngine supports two parallel data ingestion paths: + +1. **Precomputed pipeline**: A Kafka topic carrying pre-aggregated sketch buckets is consumed by `KafkaConsumer`, stored in `SimpleMapStore`, and served through the standard query path. +2. **Raw sample pipeline (Prometheus Remote Write)**: A standalone HTTP endpoint (`/api/v1/write`) accepts standard Prometheus remote write requests (Snappy-compressed protobuf). Decoded samples are inserted into `PromSketchStore` (which maintains live EHUniv, EHKLL, and USampling sketch instances per series) and served through the sketch query path. + +When a query arrives, the engine tries the sketch path first, then falls through to the precomputed path, and finally (optionally) to a remote Prometheus server. + +## 2. Data Flow Diagram + +``` +Raw Samples Path (Prometheus Remote Write): + Prometheus / Agent --> POST /api/v1/write --> PrometheusRemoteWriteServer --> PromSketchStore + (Snappy + protobuf) decode & insert + | + sketch_insert() + (EHUniv, EHKLL, USampling) + +Precomputed Path: + Prometheus --> PrecomputeEngine --> Kafka [precomputed] --> KafkaConsumer --> SimpleMapStore + +Query Path: + HTTP Request --> SimpleEngine + |-- (1) handle_sketch_query_promql() --> PromSketchStore.eval_matching() + |-- (2) precomputed pipeline (SimpleMapStore) + +-- (3) fallback --> Prometheus server +``` + +## 3. Query Routing + +When a PromQL query arrives, `SimpleEngine` dispatches it as follows: + +1. **PromSketch path** — `handle_sketch_query_promql()` parses the query (AST first, regex fallback for custom functions). If the function name is in `promsketch_func_map` and the `PromSketchStore` has matching series data, results are returned immediately. +2. **Precomputed path** — If the sketch path returns `None` (function not sketch-backed, no store configured, or no matching series), the query falls through to `SimpleMapStore`. +3. **Prometheus fallback** — If `--forward-unsupported-queries` is set and the precomputed path also misses, the query is forwarded to the remote Prometheus server. + +### Sketch-Backed Functions (13 total) + +These functions are routed to `PromSketchStore` first, with fallthrough to precomputed on miss: + +| Function | Sketch Type | Standard PromQL? | Description | +|-------------------------|-------------|-------------------|--------------------------------------------------| +| `entropy_over_time` | EHUniv | No (custom) | Shannon entropy of the sample distribution | +| `distinct_over_time` | EHUniv | No (custom) | Estimated number of distinct values | +| `l1_over_time` | EHUniv | No (custom) | L1 norm of the value vector | +| `l2_over_time` | EHUniv | No (custom) | L2 norm of the value vector | +| `quantile_over_time` | EHKLL | Yes | Approximate quantile (e.g., p50, p99) | +| `min_over_time` | EHKLL | Yes | Minimum value over the range | +| `max_over_time` | EHKLL | Yes | Maximum value over the range | +| `avg_over_time` | USampling | Yes | Average of sampled values | +| `count_over_time` | USampling | Yes | Count of sampled data points | +| `sum_over_time` | USampling | Yes | Sum of sampled values | +| `sum2_over_time` | USampling | No (custom) | Sum of squared values | +| `stddev_over_time` | USampling | Yes | Standard deviation over the range | +| `stdvar_over_time` | USampling | Yes | Variance over the range | + +### Non-Sketch Functions + +These functions always go directly to the precomputed pipeline (not in `promsketch_func_map`): + +| Function | Description | +|-------------|--------------------------------------| +| `rate` | Per-second rate of increase | +| `increase` | Total increase over the range | + +## 4. Configuration Reference + +### CLI Arguments + +| Argument | Description | Default | +|-------------------------------------|-------------------------------------------------------------------|-----------------| +| `--enable-prometheus-remote-write` | Enable the Prometheus remote write ingest endpoint | `false` | +| `--prometheus-remote-write-port` | Port for the Prometheus remote write HTTP server | `9090` | +| `--auto-init-sketches` | Auto-initialize all 3 sketch types for every new series | `true` | +| `--promsketch-config` | Path to a sketch configuration YAML file (optional) | (none) | + +### Sketch Config YAML + +All fields are optional; defaults are shown below. + +```yaml +eh_univ: + k: 50 # EH buckets for UnivMon + time_window: 1000000 # milliseconds + +eh_kll: + k: 50 # EH buckets for KLL + kll_k: 256 # KLL accuracy parameter + time_window: 1000000 + +sampling: + sample_rate: 0.2 # fraction of data points to sample + time_window: 1000000 +``` + +## 5. Deployment Checklist + +### Start QueryEngine with remote write enabled: + +```bash +./query_engine \ + --enable-prometheus-remote-write \ + --prometheus-remote-write-port 9090 \ + --promsketch-config promsketch_config.yaml # optional +``` + +### Configure Prometheus (or any remote write sender) to write to the endpoint: + +```yaml +# prometheus.yml +remote_write: + - url: "http://:9090/api/v1/write" +``` + +### Verify ingestion + +Check logs for `"Received N samples"` messages. + +### Verify queries + +```bash +curl 'http://localhost:8088/api/v1/query?query=quantile_over_time(0.5,metric[1m])&time=...' +``` + +### Monitor + +Use the `/metrics` endpoint for Prometheus counters (see section 6). + +## 6. `/metrics` Endpoint + +Exposed at `GET /metrics` in Prometheus exposition format. Key metrics: + +| Metric | Type | Description | +|-------------------------------------------------|-----------|-------------------------------------------------------| +| `promsketch_series_total` | Gauge | Number of live series currently tracked | +| `promsketch_samples_ingested_total` | Counter | Total raw samples ingested | +| `promsketch_ingest_errors_total` | Counter | Total ingestion errors (parse failures, etc.) | +| `promsketch_ingest_batch_duration_seconds` | Histogram | Time spent processing each ingestion batch | +| `promsketch_sketch_queries_total{result="hit"}` | Counter | Sketch queries that returned data | +| `promsketch_sketch_queries_total{result="miss"}`| Counter | Sketch queries that fell through (no matching series) | +| `promsketch_sketch_query_duration_seconds` | Histogram | End-to-end latency of sketch query evaluation | diff --git a/QueryEngineRust/examples/promql/inference_config.yaml b/QueryEngineRust/examples/promql/inference_config.yaml new file mode 100644 index 0000000..4aedd01 --- /dev/null +++ b/QueryEngineRust/examples/promql/inference_config.yaml @@ -0,0 +1,13 @@ +metrics: + fake_metric: + - instance + - job + - label_0 + - label_1 +cleanup_policy: + name: read_based +queries: +- aggregations: + - aggregation_id: 1 + read_count_threshold: 1 + query: quantile by (label_0) (0.99, fake_metric) diff --git a/QueryEngineRust/examples/promql/sketch_config.yaml b/QueryEngineRust/examples/promql/sketch_config.yaml new file mode 100644 index 0000000..e5ccc74 --- /dev/null +++ b/QueryEngineRust/examples/promql/sketch_config.yaml @@ -0,0 +1,10 @@ +eh_univ: + k: 50 + time_window: 1000000 +eh_kll: + k: 50 + kll_k: 256 + time_window: 1000000 +sampling: + sample_rate: 0.2 + time_window: 1000000 diff --git a/QueryEngineRust/examples/promql/streaming_config.yaml b/QueryEngineRust/examples/promql/streaming_config.yaml new file mode 100644 index 0000000..1c7fac1 --- /dev/null +++ b/QueryEngineRust/examples/promql/streaming_config.yaml @@ -0,0 +1,21 @@ +aggregations: +- aggregationId: 1 + aggregationType: DatasketchesKLL + aggregationSubType: '' + labels: + grouping: [label_0] + rollup: [instance, job, label_1] + aggregated: [] + metric: fake_metric + parameters: + K: 20 + tumblingWindowSize: 1 + windowSize: 1 + windowType: tumbling + spatialFilter: '' +metrics: + fake_metric: + - instance + - job + - label_0 + - label_1 diff --git a/QueryEngineRust/examples/sql/inference_config.yaml b/QueryEngineRust/examples/sql/inference_config.yaml new file mode 100644 index 0000000..378c6f6 --- /dev/null +++ b/QueryEngineRust/examples/sql/inference_config.yaml @@ -0,0 +1,16 @@ +tables: + - name: metrics_table + time_column: time + metadata_columns: [hostname, datacenter] + value_columns: [cpu_usage, memory_usage] +cleanup_policy: + name: read_based +queries: +- aggregations: + - aggregation_id: 1 + read_count_threshold: 1 + query: | + SELECT datacenter, quantile(0.99)(cpu_usage) as p99 + FROM metrics_table + GROUP BY datacenter + WHERE time BETWEEN DATEADD(s, -11, NOW()) AND DATEADD(s, -10, NOW()) diff --git a/QueryEngineRust/examples/sql/streaming_config.yaml b/QueryEngineRust/examples/sql/streaming_config.yaml new file mode 100644 index 0000000..d36c54c --- /dev/null +++ b/QueryEngineRust/examples/sql/streaming_config.yaml @@ -0,0 +1,21 @@ +tables: + - name: metrics_table + time_column: time + metadata_columns: [hostname, datacenter] + value_columns: [cpu_usage, memory_usage] +aggregations: +- aggregationId: 1 + aggregationType: DatasketchesKLL + aggregationSubType: '' + labels: + grouping: [datacenter] + rollup: [hostname] + aggregated: [] + table_name: metrics_table + value_column: cpu_usage + parameters: + K: 20 + tumblingWindowSize: 1 + windowSize: 1 + windowType: tumbling + spatialFilter: '' diff --git a/QueryEngineRust/installation/install.sh b/QueryEngineRust/installation/install.sh new file mode 100755 index 0000000..dfce4dc --- /dev/null +++ b/QueryEngineRust/installation/install.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -e + +THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") +PARENT_DIR=$(dirname "$THIS_DIR") + +source "$HOME/.cargo/env" + +echo "Building QueryEngine Rust binary..." +cd "$PARENT_DIR" +cargo build --release + +echo "Building QueryEngine Rust Docker image..." +cd "$(dirname "$PARENT_DIR")" +docker build . -f QueryEngineRust/Dockerfile -t sketchdb-queryengine-rust:latest + +echo "QueryEngine Rust Docker image built successfully: sketchdb-queryengine-rust:latest" diff --git a/QueryEngineRust/query-engine-rust-cli-compose.yml.j2 b/QueryEngineRust/query-engine-rust-cli-compose.yml.j2 new file mode 100644 index 0000000..64be341 --- /dev/null +++ b/QueryEngineRust/query-engine-rust-cli-compose.yml.j2 @@ -0,0 +1,50 @@ +# QueryEngine Rust Docker Compose Template +# This template is rendered with Jinja2 to generate the final docker-compose.yml + +services: + queryengine-rust: + image: sketchdb-queryengine-rust:latest # Need to change image name to 'asap' prefix + container_name: asap-queryengine-rust + hostname: queryengine-rust + networks: + - asap-network + ports: + - "{{ http_port | default('8088') }}:8088" + environment: + - RUST_LOG={{ log_level | default('info') }} + - RUST_BACKTRACE=1 + volumes: + - "{{ experiment_output_dir }}:/app/outputs" + - "{{ controller_remote_output_dir }}:/app/controller_output:ro" + command: [ + "--kafka-topic", "{{ kafka_topic }}", + "--kafka-broker", "kafka:9092", # Uses kafka container service name instead of IP + "--input-format", "{{ input_format }}", + "--config", "/app/controller_output/inference_config.yaml", + "--streaming-config", "/app/controller_output/streaming_config.yaml", + "--prometheus-server", "http://prometheus:9090", + "--prometheus-scrape-interval", "{{ prometheus_scrape_interval }}", + "--delete-existing-db", + "--log-level", "{{ log_level }}", + "--output-dir", "/app/outputs", + "--streaming-engine", "{{ streaming_engine }}", + "--query-language", "{{ query_language }}", + "--lock-strategy", "{{ lock_strategy }}"{% if compress_json %}, + "--decompress-json"{% endif %}{% if profile_query_engine %}, + "--do-profiling"{% endif %}{% if forward_unsupported_queries %}, + "--forward-unsupported-queries"{% endif %}{% if dump_precomputes %}, + "--dump-precomputes"{% endif %} + ] + depends_on: + kafka: + condition: service_healthy + kafka-init: + condition: service_completed_successfully # Wait for Kafka topics to be created + prometheus: + condition: service_healthy + controller: + condition: service_completed_successfully # Wait for controller to generate configs + arroyosketch: + condition: service_completed_successfully # Wait for pipeline configuration + restart: no + # Add healthcheck diff --git a/QueryEngineRust/rustfmt.toml b/QueryEngineRust/rustfmt.toml new file mode 100644 index 0000000..88c5c0f --- /dev/null +++ b/QueryEngineRust/rustfmt.toml @@ -0,0 +1,4 @@ +# Optional: customize rustfmt behavior +max_width = 100 +hard_tabs = false +tab_spaces = 4 diff --git a/QueryEngineRust/src/bin/run.sh b/QueryEngineRust/src/bin/run.sh new file mode 100644 index 0000000..57b41a6 --- /dev/null +++ b/QueryEngineRust/src/bin/run.sh @@ -0,0 +1 @@ +cargo run --bin test_offline_precomputes -- --input-file --mode merge --pattern-type temporal --aggregation-type DatasketchesKLL --window-size 90 --iterations 10 --slide-step 5 diff --git a/QueryEngineRust/src/bin/show_logical_plans.rs b/QueryEngineRust/src/bin/show_logical_plans.rs new file mode 100644 index 0000000..f5210cc --- /dev/null +++ b/QueryEngineRust/src/bin/show_logical_plans.rs @@ -0,0 +1,527 @@ +//! Standalone binary that constructs diverse QueryExecutionContext structures, +//! converts each to a DataFusion logical plan, and prints each plan along with +//! the schema of every edge and key internal variables. +//! +//! Covers 4 queries x multiple accumulator configurations = 10 test cases: +//! +//! 1. sum by (host) (data) — spatial sum +//! 2. quantile by (host) (0.5, data) — spatial quantile +//! 3. sum_over_time(data[1m]) — temporal sum +//! 4. quantile_over_time(0.5, data[1m]) — temporal quantile +//! +//! data has columns: host, service, region + +use datafusion::logical_expr::LogicalPlan; +use datafusion_summary_library::{PrecomputedSummaryRead, SummaryInfer, SummaryMergeMultiple}; +use promql_utilities::data_model::KeyByLabelNames; +use promql_utilities::query_logics::enums::Statistic; +use query_engine_rust::engines::simple_engine::{ + AggregationIdInfo, QueryExecutionContext, QueryMetadata, StoreQueryParams, StoreQueryPlan, +}; +use std::collections::HashMap; + +// ============================================================================ +// Context builders +// ============================================================================ + +/// Build a QueryExecutionContext with full control over all parameters. +#[allow(clippy::too_many_arguments)] +fn build_context( + metric: &str, + statistic: Statistic, + query_output_labels: Vec<&str>, + grouping_labels: Vec<&str>, + aggregated_labels: Vec<&str>, + agg_type_value: &str, + agg_type_key: &str, + agg_id_value: u64, + agg_id_key: u64, + keys_query: Option, + do_merge: bool, + is_exact_query: bool, + kwargs: HashMap, +) -> QueryExecutionContext { + QueryExecutionContext { + metric: metric.to_string(), + metadata: QueryMetadata { + query_output_labels: KeyByLabelNames { + labels: query_output_labels.into_iter().map(String::from).collect(), + }, + statistic_to_compute: statistic, + query_kwargs: kwargs, + }, + store_plan: StoreQueryPlan { + values_query: StoreQueryParams { + metric: metric.to_string(), + aggregation_id: agg_id_value, + start_timestamp: if do_merge { 1000 } else { 2000 }, + end_timestamp: 2000, + is_exact_query, + }, + keys_query, + }, + agg_info: AggregationIdInfo { + aggregation_id_for_key: agg_id_key, + aggregation_id_for_value: agg_id_value, + aggregation_type_for_key: agg_type_key.to_string(), + aggregation_type_for_value: agg_type_value.to_string(), + }, + do_merge, + spatial_filter: String::new(), + query_time: 2000, + grouping_labels: KeyByLabelNames { + labels: grouping_labels.into_iter().map(String::from).collect(), + }, + aggregated_labels: KeyByLabelNames { + labels: aggregated_labels.into_iter().map(String::from).collect(), + }, + } +} + +fn make_keys_query(metric: &str, agg_id: u64) -> StoreQueryParams { + StoreQueryParams { + metric: metric.to_string(), + aggregation_id: agg_id, + start_timestamp: 0, // DeltaSetAggregator reads from beginning of time + end_timestamp: 2000, + is_exact_query: false, // keys are always range queries + } +} + +// ============================================================================ +// Plan printing utilities +// ============================================================================ + +/// Recursively print the plan tree with indentation, showing each node's +/// explain text and output schema. +fn print_plan_tree(plan: &LogicalPlan, indent: usize) { + let prefix = " ".repeat(indent); + let connector = if indent > 0 { "└─► " } else { "" }; + + match plan { + LogicalPlan::Extension(ext) => { + // Print node name and explain text + println!("{prefix}{connector}{}", ext.node.name()); + + // Print detailed properties by downcasting each node + print_node_details(plan, indent + 2); + + // Print output schema + let schema = ext.node.schema(); + print!("{} schema: [", prefix); + for (i, field) in schema.fields().iter().enumerate() { + if i > 0 { + print!(", "); + } + print!("{}:{}", field.name(), field.data_type()); + } + println!("]"); + + // Recurse into inputs + let inputs = ext.node.inputs(); + for (i, input) in inputs.iter().enumerate() { + if inputs.len() > 1 { + println!("{} input {}:", prefix, i); + } + print_plan_tree(input, indent + 2); + } + } + _ => { + println!("{prefix}{connector}Unknown: {:?}", plan); + } + } +} + +/// Print detailed properties of a plan node by downcasting. +fn print_node_details(plan: &LogicalPlan, indent: usize) { + let prefix = " ".repeat(indent); + if let LogicalPlan::Extension(ext) = plan { + if let Some(infer) = ext.node.as_any().downcast_ref::() { + println!( + "{prefix}operations: {:?}", + infer + .operations + .iter() + .map(|op| format!("{}", op)) + .collect::>() + ); + println!("{prefix}output_names: {:?}", infer.output_names); + println!("{prefix}group_key_columns: {:?}", infer.group_key_columns); + println!("{prefix}has_keys_input: {}", infer.keys_input.is_some()); + } else if let Some(merge) = ext.node.as_any().downcast_ref::() { + println!("{prefix}group_by: {:?}", merge.group_by()); + println!("{prefix}sketch_column: {:?}", merge.sketch_column()); + println!("{prefix}summary_type: {}", merge.summary_type()); + } else if let Some(read) = ext.node.as_any().downcast_ref::() { + println!("{prefix}metric: {:?}", read.metric()); + println!("{prefix}aggregation_id: {}", read.aggregation_id()); + println!( + "{prefix}range: [{}, {}]", + read.start_timestamp(), + read.end_timestamp() + ); + println!("{prefix}is_exact_query: {}", read.is_exact_query()); + println!("{prefix}summary_type: {}", read.summary_type()); + println!("{prefix}output_labels: {:?}", read.output_labels()); + } + } +} + +/// Print key internal variables about a QueryExecutionContext. +fn print_context_variables(ctx: &QueryExecutionContext) { + let has_separate_keys = ctx.store_plan.keys_query.is_some() + && ctx.agg_info.aggregation_id_for_key != ctx.agg_info.aggregation_id_for_value; + let has_aggregated_labels = !ctx.aggregated_labels.labels.is_empty(); + + println!(" Internal variables:"); + println!(" has_separate_keys (dual input): {}", has_separate_keys); + println!( + " has_aggregated_labels (multi-population): {}", + has_aggregated_labels + ); + println!(" do_merge (temporal): {}", ctx.do_merge); + println!( + " keys_included: {}", + has_separate_keys || has_aggregated_labels + ); + println!( + " value_agg: {} (id={})", + ctx.agg_info.aggregation_type_for_value, ctx.agg_info.aggregation_id_for_value + ); + println!( + " key_agg: {} (id={})", + ctx.agg_info.aggregation_type_for_key, ctx.agg_info.aggregation_id_for_key + ); + println!( + " query_output_labels: {:?}", + ctx.metadata.query_output_labels.labels + ); + println!(" grouping_labels: {:?}", ctx.grouping_labels.labels); + println!(" aggregated_labels: {:?}", ctx.aggregated_labels.labels); + println!(" statistic: {:?}", ctx.metadata.statistic_to_compute); + if !ctx.metadata.query_kwargs.is_empty() { + println!(" query_kwargs: {:?}", ctx.metadata.query_kwargs); + } +} + +// ============================================================================ +// Test case definitions +// ============================================================================ + +struct TestCase { + title: String, + query: String, + description: String, + context: QueryExecutionContext, +} + +fn build_all_test_cases() -> Vec { + let metric = "data"; + let mut cases = Vec::new(); + + // ======================================================================== + // Query 1: sum by (host) (data) + // ======================================================================== + + // Case 1a: SumAccumulator only + // Simple single-population. Store groups by host, one Sum per host. + cases.push(TestCase { + title: "sum by (host) — SumAccumulator".into(), + query: "sum by (host) (data)".into(), + description: + "Single-population exact sum. Store groups by [host], one scalar sum per group key." + .into(), + context: build_context( + metric, + Statistic::Sum, + vec!["host"], // query_output_labels + vec!["host"], // grouping_labels (store GROUP BY) + vec![], // aggregated_labels (none) + "SumAccumulator", // value accumulator + "SumAccumulator", // key accumulator (same = single) + 42, + 42, // same agg_id + None, // no keys_query + false, // not temporal + true, // exact (sliding window) + HashMap::new(), + ), + }); + + // Case 1b: MultipleSumAccumulator only (self-keyed) + // The accumulator internally tracks sums for each host value. + // Store doesn't group by host; the accumulator maps host -> sum. + cases.push(TestCase { + title: "sum by (host) — MultipleSumAccumulator (self-keyed)".into(), + query: "sum by (host) (data)".into(), + description: "Self-keyed multi-population. Store groups by [] (no spatial grouping). \ + MultipleSumAccumulator internally maps host -> sum." + .into(), + context: build_context( + metric, + Statistic::Sum, + vec!["host"], // query_output_labels + vec![], // grouping_labels (no store grouping) + vec!["host"], // aggregated_labels (host tracked internally) + "MultipleSumAccumulator", + "MultipleSumAccumulator", // same type = single agg_id + 42, + 42, + None, + false, + true, + HashMap::new(), + ), + }); + + // Case 1c: CountMinSketch + DeltaSetAggregator (dual-input) + // CountMinSketch estimates frequency per key; DeltaSetAggregator enumerates keys. + cases.push(TestCase { + title: "sum by (host) — CountMinSketch + DeltaSetAggregator (dual-input)".into(), + query: "sum by (host) (data)".into(), + description: "Dual-input plan. CountMinSketch for value estimation per host key, \ + DeltaSetAggregator enumerates which hosts exist." + .into(), + context: build_context( + metric, + Statistic::Sum, + vec!["host"], + vec![], // grouping_labels (no store grouping) + vec!["host"], // aggregated_labels + "CountMinSketch", + "DeltaSetAggregator", + 42, + 99, // different agg_ids + Some(make_keys_query(metric, 99)), + false, + true, + HashMap::new(), + ), + }); + + // ======================================================================== + // Query 2: quantile by (host) (0.5, data) + // ======================================================================== + + let mut q_kwargs = HashMap::new(); + q_kwargs.insert("quantile".to_string(), "0.5".to_string()); + + // Case 2a: KLL only + cases.push(TestCase { + title: "quantile by (host) (0.5) — KLL".into(), + query: "quantile by (host) (0.5, data)".into(), + description: + "Single-population quantile. Store groups by [host], one KLL sketch per group key." + .into(), + context: build_context( + metric, + Statistic::Quantile, + vec!["host"], + vec!["host"], + vec![], + "KLL", + "KLL", + 42, + 42, + None, + false, + true, + q_kwargs.clone(), + ), + }); + + // Case 2b: HydraKLL + DeltaSetAggregator (dual-input) + cases.push(TestCase { + title: "quantile by (host) (0.5) — HydraKLL + DeltaSetAggregator (dual-input)".into(), + query: "quantile by (host) (0.5, data)".into(), + description: "Dual-input quantile. HydraKLL has per-host KLL sketches internally. \ + DeltaSetAggregator enumerates which hosts exist." + .into(), + context: build_context( + metric, + Statistic::Quantile, + vec!["host"], + vec![], // no store grouping + vec!["host"], // host tracked internally + "HydraKLL", + "DeltaSetAggregator", + 42, + 99, + Some(make_keys_query(metric, 99)), + false, + true, + q_kwargs.clone(), + ), + }); + + // ======================================================================== + // Query 3: sum_over_time(data[1m]) + // Temporal — all labels preserved, do_merge=true + // ======================================================================== + + // Case 3a: SumAccumulator only + cases.push(TestCase { + title: "sum_over_time(data[1m]) — SumAccumulator".into(), + query: "sum_over_time(data[1m])".into(), + description: "Temporal sum, single-population. All labels preserved. \ + do_merge=true to merge tumbling windows across the 1m range." + .into(), + context: build_context( + metric, + Statistic::Sum, + vec!["host", "service", "region"], + vec!["host", "service", "region"], + vec![], + "SumAccumulator", + "SumAccumulator", + 42, + 42, + None, + true, // temporal merge + false, // tumbling window (range query) + HashMap::new(), + ), + }); + + // Case 3b: MultipleSumAccumulator only (self-keyed) + cases.push(TestCase { + title: "sum_over_time(data[1m]) — MultipleSumAccumulator (self-keyed)".into(), + query: "sum_over_time(data[1m])".into(), + description: "Temporal sum, self-keyed multi-population. Store groups by [host]. \ + MultipleSumAccumulator internally maps (service, region) -> sum." + .into(), + context: build_context( + metric, + Statistic::Sum, + vec!["host", "service", "region"], + vec!["host"], // store groups by host only + vec!["service", "region"], // rest tracked internally + "MultipleSumAccumulator", + "MultipleSumAccumulator", + 42, + 42, + None, + true, + false, + HashMap::new(), + ), + }); + + // Case 3c: CountMinSketch + DeltaSetAggregator (dual-input) + cases.push(TestCase { + title: "sum_over_time(data[1m]) — CountMinSketch + DeltaSetAggregator (dual-input)".into(), + query: "sum_over_time(data[1m])".into(), + description: "Temporal sum, dual-input. Store groups by [host]. \ + CountMinSketch estimates per (service, region). \ + DeltaSetAggregator enumerates (service, region) keys." + .into(), + context: build_context( + metric, + Statistic::Sum, + vec!["host", "service", "region"], + vec!["host"], + vec!["service", "region"], + "CountMinSketch", + "DeltaSetAggregator", + 42, + 99, + Some(make_keys_query(metric, 99)), + true, + false, + HashMap::new(), + ), + }); + + // ======================================================================== + // Query 4: quantile_over_time(0.5, data[1m]) + // Temporal — all labels preserved, do_merge=true + // ======================================================================== + + // Case 4a: KLL only + cases.push(TestCase { + title: "quantile_over_time(0.5, data[1m]) — KLL".into(), + query: "quantile_over_time(0.5, data[1m])".into(), + description: "Temporal quantile, single-population. All labels preserved. \ + One KLL sketch per (host, service, region) group." + .into(), + context: build_context( + metric, + Statistic::Quantile, + vec!["host", "service", "region"], + vec!["host", "service", "region"], + vec![], + "KLL", + "KLL", + 42, + 42, + None, + true, + false, + q_kwargs.clone(), + ), + }); + + // Case 4b: HydraKLL + DeltaSetAggregator (dual-input) + cases.push(TestCase { + title: "quantile_over_time(0.5, data[1m]) — HydraKLL + DeltaSetAggregator (dual-input)" + .into(), + query: "quantile_over_time(0.5, data[1m])".into(), + description: "Temporal quantile, dual-input. Store groups by [host]. \ + HydraKLL has per-(service, region) KLL sketches. \ + DeltaSetAggregator enumerates (service, region) keys." + .into(), + context: build_context( + metric, + Statistic::Quantile, + vec!["host", "service", "region"], + vec!["host"], + vec!["service", "region"], + "HydraKLL", + "DeltaSetAggregator", + 42, + 99, + Some(make_keys_query(metric, 99)), + true, + false, + q_kwargs.clone(), + ), + }); + + cases +} + +// ============================================================================ +// Main +// ============================================================================ + +fn main() { + let cases = build_all_test_cases(); + + for (i, case) in cases.iter().enumerate() { + println!("╔══════════════════════════════════════════════════════════════════════"); + println!("║ Case {}: {}", i + 1, case.title); + println!("║ Query: {}", case.query); + println!("║ {}", case.description); + println!("╚══════════════════════════════════════════════════════════════════════"); + println!(); + + // Print key internal variables + print_context_variables(&case.context); + println!(); + + // Convert to logical plan + match case.context.to_logical_plan() { + Ok(plan) => { + println!(" Logical Plan Tree:"); + println!(" ──────────────────"); + print_plan_tree(&plan, 2); + } + Err(e) => { + println!(" ERROR converting to logical plan: {}", e); + } + } + + println!(); + println!(); + } +} diff --git a/QueryEngineRust/src/bin/test_offline_precomputes.rs b/QueryEngineRust/src/bin/test_offline_precomputes.rs new file mode 100644 index 0000000..fa25822 --- /dev/null +++ b/QueryEngineRust/src/bin/test_offline_precomputes.rs @@ -0,0 +1,916 @@ +// Standard library +use std::collections::HashMap; +use std::fs::File; +use std::io::{BufReader, Read}; +use std::path::PathBuf; + +// External crates +use clap::Parser; +use serde::Deserialize; + +// Internal imports from QueryEngineRust +use promql_utilities::query_logics::enums::{QueryPatternType, Statistic}; +use query_engine_rust::data_model::{AggregateCore, KeyByLabelValues, PrecomputedOutput}; +use query_engine_rust::precompute_operators::*; + +/// CLI Arguments +#[derive(Parser, Debug)] +#[command(name = "test_offline_precomputes")] +#[command(about = "Test offline precomputes for SimpleEngine functionality")] +struct Args { + /// Path to the dumped precomputes file (.msgpack) + #[arg(short, long)] + input_file: PathBuf, + + /// Test mode: "merge", "query", or "both" + #[arg(short, long, default_value = "both")] + mode: String, + + /// Query pattern type for testing: "temporal", "spatial", or "temporal_spatial" + #[arg(short, long, default_value = "temporal")] + pattern_type: String, + + /// Aggregation type for merging (e.g., "Sum", "DatasketchesKLL", "DeltaSetAggregator") + #[arg(short, long, default_value = "Sum")] + aggregation_type: String, + + /// Statistic to query: "sum", "count", "avg", "min", "max", "quantile", etc. + #[arg(short, long, default_value = "sum")] + statistic: String, + + /// Optional quantile parameter (for quantile queries) + #[arg(long)] + quantile: Option, + + /// Maximum number of precomputes to load (for testing) + #[arg(long)] + max_records: Option, + + /// Verbose logging + #[arg(short, long)] + verbose: bool, + + /// Window size for sliding window merges (number of precomputes per window) + #[arg(long)] + window_size: Option, + + /// Number of sliding window iterations (default: 1 if window_size set) + #[arg(long)] + iterations: Option, + + /// Step size for sliding (defaults to window_size for tumbling windows) + #[arg(long)] + slide_step: Option, + + /// Keep only last merged result for query testing (default: true) + #[arg(long, default_value = "true")] + keep_last_only: bool, +} + +/// Represents a single loaded precompute dump from the file +struct LoadedPrecompute { + metadata: PrecomputedOutput, + accumulator: Box, +} + +/// Deserializable version matching the dump format +/// This must match the PrecomputeDump struct in src/utils/precompute_dumper.rs +#[derive(Deserialize, Debug)] +struct PrecomputeDumpRaw { + #[allow(dead_code)] + timestamp: u64, + metadata: PrecomputedOutput, + accumulator_type: String, + accumulator_data_bytes: Vec, +} + +/// Type alias for merged precomputes result +type MergedPrecomputes = HashMap, Box>; + +/// Statistics for analysis +#[derive(Debug, Default)] +struct LoadStatistics { + total_records: usize, + records_by_type: HashMap, + records_by_aggregation_id: HashMap, + time_range: (u64, u64), // (min_start, max_end) +} + +/// Window configuration for sliding window merges +#[derive(Debug, Clone)] +struct WindowConfig { + window_size: usize, + iterations: usize, + slide_step: usize, + keep_last_only: bool, +} + +impl WindowConfig { + fn from_args(args: &Args) -> Option { + args.window_size.map(|window_size| Self { + window_size, + iterations: args.iterations.unwrap_or(1), + slide_step: args.slide_step.unwrap_or(window_size), + keep_last_only: args.keep_last_only, + }) + } + + /// Calculate window boundaries for given total precomputes + fn calculate_windows(&self, total: usize) -> Vec<(usize, usize)> { + let mut windows = Vec::new(); + for i in 0..self.iterations { + let start = i * self.slide_step; + if start >= total { + break; + } + let end = std::cmp::min(start + self.window_size, total); + if start < end { + windows.push((start, end)); + } + } + windows + } +} + +/// Statistics for a single window merge operation +#[derive(Debug, Clone)] +struct WindowStats { + precompute_count: usize, + merge_time: std::time::Duration, +} + +/// Statistics for windowed merge operations +#[derive(Debug, Default)] +struct WindowMergeStatistics { + window_stats: HashMap, Vec>, + total_windows: usize, + total_merges: usize, + total_merge_time: std::time::Duration, +} + +impl WindowMergeStatistics { + fn new() -> Self { + Self::default() + } + + fn add_window_stat(&mut self, key: Option, stat: WindowStats) { + self.total_windows += 1; + self.total_merges += stat.precompute_count; + self.total_merge_time += stat.merge_time; + + self.window_stats.entry(key).or_default().push(stat); + } +} + +/// Validate window-related CLI arguments +fn validate_window_args(args: &Args) -> Result<(), Box> { + if let Some(ws) = args.window_size { + if ws == 0 { + return Err("window_size must be greater than 0".into()); + } + if let Some(step) = args.slide_step { + if step == 0 { + return Err("slide_step must be greater than 0".into()); + } + } + } else { + // Ensure window-related args not used without window_size + if args.iterations.is_some() { + return Err("iterations requires window_size".into()); + } + if args.slide_step.is_some() { + return Err("slide_step requires window_size".into()); + } + } + Ok(()) +} + +fn main() -> Result<(), Box> { + // 1. Parse CLI arguments + let args = Args::parse(); + + // 2. Validate window arguments + validate_window_args(&args)?; + + // 3. Initialize logging + init_logging(args.verbose); + + // 4. Parse window configuration + let window_config = WindowConfig::from_args(&args); + + // 5. Load precomputes from file + println!("Loading precomputes from: {:?}", args.input_file); + let (precomputes, stats) = load_precomputes_from_file(&args.input_file, args.max_records)?; + + // 6. Display load statistics + print_load_statistics(&stats); + + // 7. Group precomputes by key for testing + let grouped_precomputes = group_precomputes_by_key(precomputes); + + // 8. Run tests based on mode + match args.mode.as_str() { + "merge" => { + println!("\n=== TESTING MERGE FUNCTIONALITY ===\n"); + test_merge_functionality( + &grouped_precomputes, + parse_pattern_type(&args.pattern_type), + &args.aggregation_type, + window_config, + )?; + } + "query" => { + if window_config.is_some() { + println!("Warning: window parameters ignored in 'query' mode"); + } + println!("\n=== TESTING QUERY FUNCTIONALITY ===\n"); + test_query_functionality( + &grouped_precomputes, + parse_statistic(&args.statistic)?, + build_query_kwargs(&args), + )?; + } + "both" => { + println!("\n=== TESTING MERGE FUNCTIONALITY ===\n"); + let merged = test_merge_functionality( + &grouped_precomputes, + parse_pattern_type(&args.pattern_type), + &args.aggregation_type, + window_config, + )?; + + println!("\n=== TESTING QUERY FUNCTIONALITY ===\n"); + test_query_on_merged( + &merged, + parse_statistic(&args.statistic)?, + build_query_kwargs(&args), + )?; + } + _ => { + return Err(format!("Invalid mode: {}", args.mode).into()); + } + } + + println!("\n=== TESTING COMPLETE ==="); + Ok(()) +} + +/// Load precomputes from a MessagePack dump file +/// +/// File format (from precompute_dumper.rs): +/// - 4 bytes: length prefix (u32, little-endian) +/// - N bytes: MessagePack-serialized PrecomputeDumpRaw +/// - Repeat... +fn load_precomputes_from_file( + file_path: &PathBuf, + max_records: Option, +) -> Result<(Vec, LoadStatistics), Box> { + let file = File::open(file_path)?; + let mut reader = BufReader::new(file); + let mut precomputes = Vec::new(); + let mut stats = LoadStatistics::default(); + + let mut count = 0; + loop { + // Check if we've reached max_records + if let Some(max) = max_records { + if count >= max { + println!("Reached max_records limit: {}", max); + break; + } + } + + // Read length prefix (4 bytes, little-endian) + let mut length_bytes = [0u8; 4]; + match reader.read_exact(&mut length_bytes) { + Ok(_) => {} + Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => { + // End of file reached + break; + } + Err(e) => return Err(e.into()), + } + + let length = u32::from_le_bytes(length_bytes) as usize; + + // Read the serialized data + let mut data_bytes = vec![0u8; length]; + reader.read_exact(&mut data_bytes)?; + + // Deserialize from MessagePack + let dump: PrecomputeDumpRaw = rmp_serde::from_slice(&data_bytes) + .map_err(|e| format!("Failed to deserialize record {}: {}", count, e))?; + + // Deserialize accumulator from bytes + let accumulator = + deserialize_accumulator(&dump.accumulator_type, &dump.accumulator_data_bytes)?; + + // Update statistics + update_statistics(&mut stats, &dump); + + // Create loaded precompute + precomputes.push(LoadedPrecompute { + metadata: dump.metadata, + accumulator, + }); + + count += 1; + if count % 10000 == 0 { + println!("Loaded {} precomputes...", count); + } + } + + stats.total_records = count; + println!("Total precomputes loaded: {}", count); + + Ok((precomputes, stats)) +} + +/// Deserialize accumulator from bytes based on type +/// Only supports accumulators with deserialize_from_bytes_arroyo method +fn deserialize_accumulator( + accumulator_type: &str, + bytes: &[u8], +) -> Result, Box> { + match accumulator_type { + "SumAccumulator" => Ok(Box::new( + sum_accumulator::SumAccumulator::deserialize_from_bytes_arroyo(bytes)?, + )), + "MultipleIncreaseAccumulator" => Ok(Box::new( + multiple_increase_accumulator::MultipleIncreaseAccumulator::deserialize_from_bytes_arroyo(bytes)?, + )), + "CountMinSketchAccumulator" => Ok(Box::new( + count_min_sketch_accumulator::CountMinSketchAccumulator::deserialize_from_bytes_arroyo(bytes)?, + )), + "CountMinSketchWithHeapAccumulator" => Ok(Box::new( + count_min_sketch_with_heap_accumulator::CountMinSketchWithHeapAccumulator::deserialize_from_bytes_arroyo( + bytes, + )?, + )), + "DatasketchesKLLAccumulator" => Ok(Box::new( + datasketches_kll_accumulator::DatasketchesKLLAccumulator::deserialize_from_bytes_arroyo(bytes)?, + )), + "DeltaSetAggregatorAccumulator" => Ok(Box::new( + delta_set_aggregator_accumulator::DeltaSetAggregatorAccumulator::deserialize_from_bytes_arroyo( + bytes, + )?, + )), + "SetAggregatorAccumulator" => Ok(Box::new( + set_aggregator_accumulator::SetAggregatorAccumulator::deserialize_from_bytes_arroyo(bytes)?, + )), + _ => Err(format!("Unsupported accumulator type: {} (only Arroyo-based accumulators supported)", accumulator_type).into()), + } +} + +/// Group precomputes by their key to prepare for merging +/// +/// This simulates how simple_engine.rs groups precomputes from the store +/// into HashMap, Vec>> +/// +/// Returns: HashMap, Vec>> +fn group_precomputes_by_key( + precomputes: Vec, +) -> HashMap, Vec>> { + let mut grouped: HashMap, Vec>> = + HashMap::new(); + + for precompute in precomputes { + grouped + .entry(precompute.metadata.key.clone()) + .or_default() + .push(precompute.accumulator); + } + + println!( + "Grouped {} precomputes into {} unique keys", + grouped.values().map(|v| v.len()).sum::(), + grouped.len() + ); + + grouped +} + +/// Test merge_precomputed_outputs functionality with optional windowing +/// This replicates the logic from simple_engine.rs:1334-1409 +/// +/// Reference: SimpleEngine::merge_precomputed_outputs +fn test_merge_functionality( + grouped_precomputes: &HashMap, Vec>>, + query_pattern_type: QueryPatternType, + aggregation_type: &str, + window_config: Option, +) -> Result> { + println!("Testing merge with pattern type: {:?}", query_pattern_type); + println!("Aggregation type: {}", aggregation_type); + + if let Some(ref config) = window_config { + println!("\n=== WINDOWED MERGE CONFIGURATION ==="); + println!("Window size: {}", config.window_size); + println!("Iterations: {}", config.iterations); + println!("Slide step: {}", config.slide_step); + + test_merge_with_windows( + grouped_precomputes, + query_pattern_type, + aggregation_type, + config, + ) + } else { + println!("Mode: Standard (merge all)"); + test_merge_all(grouped_precomputes, query_pattern_type, aggregation_type) + } +} + +/// Merge all precomputes for each key (standard mode) +fn test_merge_all( + grouped_precomputes: &HashMap, Vec>>, + query_pattern_type: QueryPatternType, + aggregation_type: &str, +) -> Result> { + let mut merged_results = HashMap::new(); + let mut merge_times = Vec::new(); + + for (key, precomputes) in grouped_precomputes.iter() { + if precomputes.is_empty() { + continue; + } + + let start = std::time::Instant::now(); + + let merged = if should_merge(query_pattern_type, aggregation_type) { + merge_accumulators(precomputes)? + } else { + println!(" No merge needed, taking single precompute"); + assert_eq!( + precomputes.len(), + 1, + "Expected exactly 1 precompute for spatial query without DeltaSetAggregator" + ); + precomputes[0].clone() + }; + + let elapsed = start.elapsed(); + merge_times.push(elapsed); + + println!( + " Merge completed in {:.2}ms", + elapsed.as_secs_f64() * 1000.0 + ); + println!(" Result type: {}", merged.get_accumulator_type()); + + merged_results.insert(key.clone(), merged); + } + + // Print statistics + if !merge_times.is_empty() { + let total: std::time::Duration = merge_times.iter().sum(); + let avg = total / merge_times.len() as u32; + println!("\n=== Merge Statistics ==="); + println!("Total merges: {}", merge_times.len()); + println!("Total time: {:.2}ms", total.as_secs_f64() * 1000.0); + println!("Average time: {:.2}ms", avg.as_secs_f64() * 1000.0); + } + + Ok(merged_results) +} + +/// Merge precomputes using sliding windows +fn test_merge_with_windows( + grouped_precomputes: &HashMap, Vec>>, + query_pattern_type: QueryPatternType, + aggregation_type: &str, + config: &WindowConfig, +) -> Result> { + let mut final_results = HashMap::new(); + let mut window_stats = WindowMergeStatistics::new(); + + println!( + "\nProcessing {} keys with sliding window merge", + grouped_precomputes.len() + ); + + for (key, precomputes) in grouped_precomputes.iter() { + if precomputes.is_empty() { + continue; + } + + //println!("\nKey: {:?}", key); + //println!("Total precomputes: {}", precomputes.len()); + + // Calculate window boundaries + let windows = config.calculate_windows(precomputes.len()); + + if windows.is_empty() { + println!(" Warning: No valid windows for this key"); + continue; + } + + //println!(" Windows to process: {}", windows.len()); + + // Process each window + let mut window_results = Vec::new(); + + for (window_idx, (start_idx, end_idx)) in windows.iter().enumerate() { + //println!(" Window {}/{}: merging precomputes [{}..{}] ({} items)", + // window_idx + 1, windows.len(), start_idx, end_idx, end_idx - start_idx); + + let window_start = std::time::Instant::now(); + + // Extract window slice + let window_slice = &precomputes[*start_idx..*end_idx]; + + // Perform merge if needed + let merged = if should_merge(query_pattern_type, aggregation_type) { + merge_accumulators(window_slice)? + } else { + // For spatial queries without DeltaSetAggregator + if window_slice.len() != 1 { + println!(" Warning: Expected 1 precompute for spatial query, got {}. Taking first.", window_slice.len()); + } + window_slice[0].clone() + }; + + let window_elapsed = window_start.elapsed(); + + // Record statistics + let stat = WindowStats { + precompute_count: end_idx - start_idx, + merge_time: window_elapsed, + }; + + window_stats.add_window_stat(key.clone(), stat); + + //println!(" Window merge time: {:.2}ms", window_elapsed.as_secs_f64() * 1000.0); + //println!(" Result type: {}", merged.get_accumulator_type()); + + // Store result + if config.keep_last_only { + // Only keep the last window's result + if window_idx == windows.len() - 1 { + window_results.push(merged); + } + } else { + // Keep all window results + window_results.push(merged); + } + } + + // Store final result for this key + if !window_results.is_empty() { + // For query testing, we use the last result + let final_result = window_results.into_iter().last().unwrap(); + final_results.insert(key.clone(), final_result); + } + } + + // Print comprehensive statistics + print_window_merge_statistics(&window_stats); + + Ok(final_results) +} + +/// Print detailed statistics for windowed merge operations +fn print_window_merge_statistics(stats: &WindowMergeStatistics) { + println!("\n=== WINDOWED MERGE STATISTICS ==="); + println!("Total windows processed: {}", stats.total_windows); + println!("Total precomputes merged: {}", stats.total_merges); + println!( + "Total merge time: {:.2}ms", + stats.total_merge_time.as_secs_f64() * 1000.0 + ); + + if stats.total_windows > 0 { + let avg_window_time = stats.total_merge_time / stats.total_windows as u32; + println!( + "Average time per window: {:.2}ms", + avg_window_time.as_secs_f64() * 1000.0 + ); + } + + // Per-key breakdown + println!("\n=== PER-KEY STATISTICS ==="); + for key_stats in stats.window_stats.values() { + //println!("\nKey: {:?}", key); + //println!(" Windows: {}", key_stats.len()); + + let total_precomputes: usize = key_stats.iter().map(|s| s.precompute_count).sum(); + let total_time: std::time::Duration = key_stats.iter().map(|s| s.merge_time).sum(); + + println!(" Total precomputes: {}", total_precomputes); + println!(" Total time: {:.2}ms", total_time.as_secs_f64() * 1000.0); + + if !key_stats.is_empty() { + let avg_time = total_time / key_stats.len() as u32; + println!( + " Average time per window: {:.2}ms", + avg_time.as_secs_f64() * 1000.0 + ); + } + } +} + +/// Determine if merging should happen based on pattern type and aggregation type +/// Mirrors logic from simple_engine.rs:1360-1395 +fn should_merge(pattern_type: QueryPatternType, aggregation_type: &str) -> bool { + match pattern_type { + QueryPatternType::OnlyTemporal | QueryPatternType::OneTemporalOneSpatial => true, + QueryPatternType::OnlySpatial => aggregation_type == "DeltaSetAggregator", + } +} + +/// Merge multiple accumulators +/// This replicates simple_engine.rs:1413-1441 +/// +/// Reference: SimpleEngine::merge_accumulators +fn merge_accumulators( + accumulators: &[Box], +) -> Result, Box> { + if accumulators.is_empty() { + return Err("No accumulators to merge".into()); + } + + if accumulators.len() == 1 { + return Ok(accumulators[0].clone()); + } + + let mut result = accumulators[0].clone(); + + for (i, accumulator) in accumulators[1..].iter().enumerate() { + //println!(" Merging accumulator {} of {}", i + 2, accumulators.len()); + match result.merge_with(accumulator.as_ref()) { + Ok(merged) => { + result = merged; + } + Err(e) => { + eprintln!(" Warning: Failed to merge accumulator {}: {}", i + 2, e); + // Continue with current result + } + } + } + + Ok(result) +} + +/// Test query_precompute_for_statistic functionality on merged results +fn test_query_on_merged( + merged_precomputes: &HashMap, Box>, + statistic: Statistic, + query_kwargs: HashMap, +) -> Result<(), Box> { + println!("Testing query with statistic: {:?}", statistic); + println!("Query kwargs: {:?}", query_kwargs); + + let mut query_results = Vec::new(); + + for (idx, (key, precompute)) in merged_precomputes.iter().enumerate() { + println!( + "\n--- Querying key {} of {} ---", + idx + 1, + merged_precomputes.len() + ); + println!("Key: {:?}", key); + println!("Accumulator type: {}", precompute.get_accumulator_type()); + + let start = std::time::Instant::now(); + + let result = + query_precompute_for_statistic(precompute.as_ref(), &statistic, key, &query_kwargs)?; + + let elapsed = start.elapsed(); + + println!(" Query result: {}", result); + println!(" Query time: {:.2}μs", elapsed.as_micros()); + + query_results.push((key.clone(), result)); + } + + // Print summary + println!("\n=== Query Results Summary ==="); + println!("Total results: {}", query_results.len()); + for (key, value) in &query_results { + println!(" {:?} => {}", key, value); + } + + Ok(()) +} + +/// Also test querying functionality on ungrouped precomputes +fn test_query_functionality( + grouped_precomputes: &HashMap, Vec>>, + statistic: Statistic, + query_kwargs: HashMap, +) -> Result<(), Box> { + println!("Testing query on individual (unmerged) precomputes"); + println!("Statistic: {:?}", statistic); + + for (key, precomputes) in grouped_precomputes { + println!( + "\n--- Key: {:?} ({} precomputes) ---", + key, + precomputes.len() + ); + + for (i, precompute) in precomputes.iter().enumerate() { + println!( + " Precompute {}: type = {}", + i, + precompute.get_accumulator_type() + ); + + let result = query_precompute_for_statistic( + precompute.as_ref(), + &statistic, + key, + &query_kwargs, + )?; + + println!(" Result: {}", result); + } + } + + Ok(()) +} + +/// Query a precompute for a specific statistic +/// Only supports Arroyo-based accumulators +fn query_precompute_for_statistic( + precompute: &dyn AggregateCore, + statistic: &Statistic, + key: &Option, + query_kwargs: &HashMap, +) -> Result> { + match precompute.get_accumulator_type() { + "SumAccumulator" => { + let acc = precompute + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to SumAccumulator")?; + use query_engine_rust::data_model::SingleSubpopulationAggregate; + acc.query(*statistic, None) + .map_err(|e| format!("{}", e).into()) + } + "MultipleIncreaseAccumulator" => { + let acc = precompute + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to MultipleIncreaseAccumulator")?; + let key_val = key + .as_ref() + .ok_or("Key required for MultipleIncreaseAccumulator")?; + use query_engine_rust::data_model::MultipleSubpopulationAggregate; + acc.query(*statistic, key_val, Some(query_kwargs)) + .map_err(|e| format!("{}", e).into()) + } + "CountMinSketchAccumulator" => { + let acc = precompute + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to CountMinSketchAccumulator")?; + let key_val = key + .as_ref() + .ok_or("Key required for CountMinSketchAccumulator")?; + use query_engine_rust::data_model::MultipleSubpopulationAggregate; + acc.query(*statistic, key_val, Some(query_kwargs)) + .map_err(|e| format!("{}", e).into()) + } + "CountMinSketchWithHeapAccumulator" => { + let acc = precompute + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to CountMinSketchWithHeapAccumulator")?; + let key_val = key + .as_ref() + .ok_or("Key required for CountMinSketchWithHeapAccumulator")?; + use query_engine_rust::data_model::MultipleSubpopulationAggregate; + acc.query(*statistic, key_val, Some(query_kwargs)) + .map_err(|e| format!("{}", e).into()) + } + "DatasketchesKLLAccumulator" => { + let acc = precompute + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to DatasketchesKLLAccumulator")?; + use query_engine_rust::data_model::SingleSubpopulationAggregate; + acc.query(*statistic, Some(query_kwargs)) + .map_err(|e| format!("{}", e).into()) + } + "DeltaSetAggregatorAccumulator" => { + let acc = precompute + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to DeltaSetAggregatorAccumulator")?; + if let Some(key_val) = key { + use query_engine_rust::data_model::MultipleSubpopulationAggregate; + acc.query(*statistic, key_val, Some(query_kwargs)) + .map_err(|e| format!("{}", e).into()) + } else { + Ok((acc.added.union(&acc.removed).count()) as f64) + } + } + "SetAggregatorAccumulator" => { + let acc = precompute + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to SetAggregatorAccumulator")?; + if let Some(key_val) = key { + use query_engine_rust::data_model::MultipleSubpopulationAggregate; + acc.query(*statistic, key_val, Some(query_kwargs)) + .map_err(|e| format!("{}", e).into()) + } else { + Ok(acc.added.len() as f64) + } + } + _ => Err(format!( + "Unsupported accumulator type: {}", + precompute.get_accumulator_type() + ) + .into()), + } +} + +/// Initialize logging based on verbosity +fn init_logging(verbose: bool) { + use tracing_subscriber; + + let level = if verbose { + tracing::Level::DEBUG + } else { + tracing::Level::INFO + }; + + tracing_subscriber::fmt().with_max_level(level).init(); +} + +/// Update statistics during loading +fn update_statistics(stats: &mut LoadStatistics, dump: &PrecomputeDumpRaw) { + // Count by type + *stats + .records_by_type + .entry(dump.accumulator_type.clone()) + .or_insert(0) += 1; + + // Count by aggregation_id + *stats + .records_by_aggregation_id + .entry(dump.metadata.aggregation_id) + .or_insert(0) += 1; + + // Track time range + if stats.time_range.0 == 0 || dump.metadata.start_timestamp < stats.time_range.0 { + stats.time_range.0 = dump.metadata.start_timestamp; + } + if dump.metadata.end_timestamp > stats.time_range.1 { + stats.time_range.1 = dump.metadata.end_timestamp; + } +} + +/// Print load statistics +fn print_load_statistics(stats: &LoadStatistics) { + println!("\n=== Load Statistics ==="); + println!("Total records: {}", stats.total_records); + + println!("\nRecords by accumulator type:"); + for (acc_type, count) in &stats.records_by_type { + println!(" {}: {}", acc_type, count); + } + + println!("\nRecords by aggregation ID:"); + for (agg_id, count) in &stats.records_by_aggregation_id { + println!(" Aggregation {}: {}", agg_id, count); + } + + println!("\nTime range:"); + println!(" Start: {}", stats.time_range.0); + println!(" End: {}", stats.time_range.1); + println!(" Duration: {} ms", stats.time_range.1 - stats.time_range.0); +} + +/// Parse pattern type string to enum +fn parse_pattern_type(s: &str) -> QueryPatternType { + match s.to_lowercase().as_str() { + "temporal" | "only_temporal" => QueryPatternType::OnlyTemporal, + "spatial" | "only_spatial" => QueryPatternType::OnlySpatial, + "temporal_spatial" | "one_temporal_one_spatial" => QueryPatternType::OneTemporalOneSpatial, + _ => { + eprintln!("Unknown pattern type '{}', defaulting to OnlyTemporal", s); + QueryPatternType::OnlyTemporal + } + } +} + +/// Parse statistic string to enum +fn parse_statistic(s: &str) -> Result> { + s.parse::() + .map_err(|_| format!("Invalid statistic: {}", s).into()) +} + +/// Build query kwargs from CLI args +fn build_query_kwargs(args: &Args) -> HashMap { + let mut kwargs = HashMap::new(); + + if let Some(ref quantile) = args.quantile { + kwargs.insert("quantile".to_string(), quantile.clone()); + } + + kwargs +} diff --git a/QueryEngineRust/src/commenting_out_flink_diff b/QueryEngineRust/src/commenting_out_flink_diff new file mode 100644 index 0000000..50c1763 --- /dev/null +++ b/QueryEngineRust/src/commenting_out_flink_diff @@ -0,0 +1,1032 @@ +diff --git a/src/data_model/enums.rs b/src/data_model/enums.rs +index b04e3bf..c04e9c7 100644 +--- a/src/data_model/enums.rs ++++ b/src/data_model/enums.rs +@@ -6,6 +6,6 @@ pub enum InputFormat { + + #[derive(clap::ValueEnum, Clone, Debug)] + pub enum StreamingEngine { +- Flink, ++ // Flink, + Arroyo, + } +diff --git a/src/data_model/precomputed_output.rs b/src/data_model/precomputed_output.rs +index f4ca3dd..b0f749f 100644 +--- a/src/data_model/precomputed_output.rs ++++ b/src/data_model/precomputed_output.rs +@@ -254,61 +254,61 @@ impl PrecomputedOutput { + streaming_engine: &str, + ) -> Result> { + match streaming_engine { +- "flink" => Self::deserialize_from_json_flink(data, streaming_config), ++ // "flink" => Self::deserialize_from_json_flink(data, streaming_config), + "arroyo" => Self::deserialize_from_json_arroyo(data, streaming_config), + _ => Err(format!("Unknown streaming engine: {streaming_engine}").into()), + } + } + +- /// Deserialization for Flink streaming engine +- pub fn deserialize_from_json_flink( +- data: &serde_json::Value, +- streaming_config: &HashMap, +- ) -> Result> { +- let aggregation_id = data +- .get("aggregation_id") +- .and_then(|v| v.as_u64()) +- .ok_or("Missing or invalid 'aggregation_id' field")?; +- +- let start_timestamp = data +- .get("start_timestamp") +- .and_then(|v| v.as_u64()) +- .ok_or("Missing or invalid 'start_timestamp' field")?; +- +- let end_timestamp = data +- .get("end_timestamp") +- .and_then(|v| v.as_u64()) +- .ok_or("Missing or invalid 'end_timestamp' field")?; +- +- let key = if let Some(key_data) = data.get("key") { +- if key_data.is_null() { +- None +- } else { +- Some(KeyByLabelValues::deserialize_from_json(key_data).map_err( +- |e| -> Box { +- format!("Failed to deserialize key: {e}").into() +- }, +- )?) +- } +- } else { +- None +- }; +- +- // Get aggregation type from streaming config lookup +- let config = streaming_config +- .get(&aggregation_id) +- .ok_or_else(|| { +- format!("Aggregation ID {aggregation_id} not found in streaming config") +- })? +- .clone(); +- +- Ok(Self { +- start_timestamp, +- end_timestamp, +- key, +- config, +- }) +- } ++ // /// Deserialization for Flink streaming engine ++ // pub fn deserialize_from_json_flink( ++ // data: &serde_json::Value, ++ // streaming_config: &HashMap, ++ // ) -> Result> { ++ // let aggregation_id = data ++ // .get("aggregation_id") ++ // .and_then(|v| v.as_u64()) ++ // .ok_or("Missing or invalid 'aggregation_id' field")?; ++ ++ // let start_timestamp = data ++ // .get("start_timestamp") ++ // .and_then(|v| v.as_u64()) ++ // .ok_or("Missing or invalid 'start_timestamp' field")?; ++ ++ // let end_timestamp = data ++ // .get("end_timestamp") ++ // .and_then(|v| v.as_u64()) ++ // .ok_or("Missing or invalid 'end_timestamp' field")?; ++ ++ // let key = if let Some(key_data) = data.get("key") { ++ // if key_data.is_null() { ++ // None ++ // } else { ++ // Some(KeyByLabelValues::deserialize_from_json(key_data).map_err( ++ // |e| -> Box { ++ // format!("Failed to deserialize key: {e}").into() ++ // }, ++ // )?) ++ // } ++ // } else { ++ // None ++ // }; ++ ++ // // Get aggregation type from streaming config lookup ++ // let config = streaming_config ++ // .get(&aggregation_id) ++ // .ok_or_else(|| { ++ // format!("Aggregation ID {aggregation_id} not found in streaming config") ++ // })? ++ // .clone(); ++ ++ // Ok(Self { ++ // start_timestamp, ++ // end_timestamp, ++ // key, ++ // config, ++ // }) ++ // } + + /// Deserialization for Arroyo streaming engine + pub fn deserialize_from_json_arroyo( +diff --git a/src/drivers/kafka_consumer.rs b/src/drivers/kafka_consumer.rs +index c2d090f..06f1651 100644 +--- a/src/drivers/kafka_consumer.rs ++++ b/src/drivers/kafka_consumer.rs +@@ -224,105 +224,105 @@ impl KafkaConsumer { + InputFormat::Json => { + // Handle streaming engine specific logic + match self.config.streaming_engine { +- StreamingEngine::Flink => { +- debug!("Received message of length: {}", payload.len()); ++ // StreamingEngine::Flink => { ++ // debug!("Received message of length: {}", payload.len()); + +- let json_data = if self.config.decompress_json { +- // Decompress using gzip +- let mut decoder = GzDecoder::new(payload); +- let mut decompressed = Vec::new(); +- match decoder.read_to_end(&mut decompressed) { +- Ok(_) => { +- debug!( +- "Decompressed JSON message of length: {}", +- decompressed.len() +- ); +- decompressed +- } +- Err(e) => { +- error!("Error decompressing gzip data: {}", e); +- return Err(format!("Gzip decompression error: {e}").into()); +- } +- } +- } else { +- payload.to_vec() +- }; ++ // let json_data = if self.config.decompress_json { ++ // // Decompress using gzip ++ // let mut decoder = GzDecoder::new(payload); ++ // let mut decompressed = Vec::new(); ++ // match decoder.read_to_end(&mut decompressed) { ++ // Ok(_) => { ++ // debug!( ++ // "Decompressed JSON message of length: {}", ++ // decompressed.len() ++ // ); ++ // decompressed ++ // } ++ // Err(e) => { ++ // error!("Error decompressing gzip data: {}", e); ++ // return Err(format!("Gzip decompression error: {e}").into()); ++ // } ++ // } ++ // } else { ++ // payload.to_vec() ++ // }; + +- let json_str = match String::from_utf8(json_data) { +- Ok(s) => s, +- Err(e) => { +- error!("Error converting bytes to UTF-8: {}", e); +- return Err(format!("UTF-8 conversion error: {e}").into()); +- } +- }; ++ // let json_str = match String::from_utf8(json_data) { ++ // Ok(s) => s, ++ // Err(e) => { ++ // error!("Error converting bytes to UTF-8: {}", e); ++ // return Err(format!("UTF-8 conversion error: {e}").into()); ++ // } ++ // }; + +- let json_parse_start_time = Instant::now(); ++ // let json_parse_start_time = Instant::now(); + +- let json_dict: serde_json::Value = match serde_json::from_str(&json_str) { +- Ok(dict) => { +- let json_parse_duration = json_parse_start_time.elapsed(); +- debug!( +- "JSON parsing took: {:.2}ms", +- json_parse_duration.as_secs_f64() * 1000.0 +- ); +- dict +- } +- Err(e) => { +- error!("Error parsing JSON: {}", e); +- debug!("JSON content: {}", json_str); +- return Err(format!("JSON parsing error: {e}").into()); +- } +- }; ++ // let json_dict: serde_json::Value = match serde_json::from_str(&json_str) { ++ // Ok(dict) => { ++ // let json_parse_duration = json_parse_start_time.elapsed(); ++ // debug!( ++ // "JSON parsing took: {:.2}ms", ++ // json_parse_duration.as_secs_f64() * 1000.0 ++ // ); ++ // dict ++ // } ++ // Err(e) => { ++ // error!("Error parsing JSON: {}", e); ++ // debug!("JSON content: {}", json_str); ++ // return Err(format!("JSON parsing error: {e}").into()); ++ // } ++ // }; + +- debug!( +- "Deserializing JSON message: {}, {}, {}", +- json_dict +- .get("aggregation_id") +- .and_then(|v| v.as_u64()) +- .unwrap_or(0), +- json_dict +- .get("start_timestamp") +- .and_then(|v| v.as_u64()) +- .unwrap_or(0), +- json_dict +- .get("end_timestamp") +- .and_then(|v| v.as_u64()) +- .unwrap_or(0) +- ); ++ // debug!( ++ // "Deserializing JSON message: {}, {}, {}", ++ // json_dict ++ // .get("aggregation_id") ++ // .and_then(|v| v.as_u64()) ++ // .unwrap_or(0), ++ // json_dict ++ // .get("start_timestamp") ++ // .and_then(|v| v.as_u64()) ++ // .unwrap_or(0), ++ // json_dict ++ // .get("end_timestamp") ++ // .and_then(|v| v.as_u64()) ++ // .unwrap_or(0) ++ // ); + +- let deserialize_start_time = Instant::now(); ++ // let deserialize_start_time = Instant::now(); + +- match PrecomputedOutput::deserialize_from_json_with_precompute(&json_dict) { +- Ok((output, precompute)) => { +- let deserialize_duration = deserialize_start_time.elapsed(); +- debug!( +- "Deserialization took: {:.2}ms", +- deserialize_duration.as_secs_f64() * 1000.0 +- ); +- debug!( +- "Deserialized item: {}, {}, {}", +- output.config.aggregation_id, +- output.start_timestamp, +- output.end_timestamp +- ); +- debug!("Successfully deserialized Flink JSON message with precompute data"); +- let total_message_duration = message_start_time.elapsed(); +- debug!( +- "Total message processing took: {:.2}ms", +- total_message_duration.as_secs_f64() * 1000.0 +- ); +- Ok(Some((output, precompute))) +- } +- Err(e) => { +- error!( +- "Error deserializing Flink PrecomputedOutput from JSON with precompute: {}", +- e +- ); +- debug!("JSON content: {}", json_str); +- Err(e) +- } +- } +- } ++ // match PrecomputedOutput::deserialize_from_json_with_precompute(&json_dict) { ++ // Ok((output, precompute)) => { ++ // let deserialize_duration = deserialize_start_time.elapsed(); ++ // debug!( ++ // "Deserialization took: {:.2}ms", ++ // deserialize_duration.as_secs_f64() * 1000.0 ++ // ); ++ // debug!( ++ // "Deserialized item: {}, {}, {}", ++ // output.config.aggregation_id, ++ // output.start_timestamp, ++ // output.end_timestamp ++ // ); ++ // debug!("Successfully deserialized Flink JSON message with precompute data"); ++ // let total_message_duration = message_start_time.elapsed(); ++ // debug!( ++ // "Total message processing took: {:.2}ms", ++ // total_message_duration.as_secs_f64() * 1000.0 ++ // ); ++ // Ok(Some((output, precompute))) ++ // } ++ // Err(e) => { ++ // error!( ++ // "Error deserializing Flink PrecomputedOutput from JSON with precompute: {}", ++ // e ++ // ); ++ // debug!("JSON content: {}", json_str); ++ // Err(e) ++ // } ++ // } ++ // } + StreamingEngine::Arroyo => { + // Arroyo messages - gzip decompression is applied at precompute level, not message level + let json_str = match String::from_utf8(payload.to_vec()) { +diff --git a/src/precompute_operators/count_min_sketch_accumulator.rs b/src/precompute_operators/count_min_sketch_accumulator.rs +index bcd98e4..658999e 100644 +--- a/src/precompute_operators/count_min_sketch_accumulator.rs ++++ b/src/precompute_operators/count_min_sketch_accumulator.rs +@@ -14,12 +14,12 @@ use promql_utilities::query_logics::enums::Statistic; + pub struct CountMinSketchAccumulator { + pub row_num: usize, + pub col_num: usize, +- pub sketch: Vec>, ++ pub sketch: Vec>, + } + + impl CountMinSketchAccumulator { + pub fn new(row_num: usize, col_num: usize) -> Self { +- let sketch = vec![vec![0; col_num]; row_num]; ++ let sketch = vec![vec![0.0; col_num]; row_num]; + Self { + row_num, + col_num, +@@ -44,7 +44,7 @@ impl CountMinSketchAccumulator { + for i in 0..self.row_num { + let hash_value = xxh32(key_bytes, i as u32); + let col_index = (hash_value as usize) % self.col_num; +- self.sketch[i][col_index] += value as i32; ++ self.sketch[i][col_index] += value; + } + } + +@@ -61,7 +61,7 @@ impl CountMinSketchAccumulator { + let key_str = key_values.join(";"); + let key_bytes = key_str.as_bytes(); + +- let mut min_value = i32::MAX; ++ let mut min_value = f64::MAX; + + // Query each row and take the minimum + for i in 0..self.row_num { +@@ -70,44 +70,44 @@ impl CountMinSketchAccumulator { + min_value = min_value.min(self.sketch[i][col_index]); + } + +- min_value as f64 ++ min_value + } + +- pub fn deserialize_from_json(data: &Value) -> Result> { +- let row_num = data["row_num"] +- .as_u64() +- .ok_or("Missing or invalid 'row_num' field")? as usize; +- let col_num = data["col_num"] +- .as_u64() +- .ok_or("Missing or invalid 'col_num' field")? as usize; +- +- let sketch_data = data["sketch"] +- .as_array() +- .ok_or("Missing or invalid 'sketch' field")?; +- +- let mut sketch = Vec::new(); +- for row in sketch_data { +- let row_array = row.as_array().ok_or("Invalid row in sketch data")?; +- let mut sketch_row = Vec::new(); +- for cell in row_array { +- let value = cell.as_i64().ok_or("Invalid cell value in sketch data")? as i32; +- sketch_row.push(value); +- } +- sketch.push(sketch_row); +- } +- +- Ok(Self { +- row_num, +- col_num, +- sketch, +- }) +- } ++ // pub fn deserialize_from_json(data: &Value) -> Result> { ++ // let row_num = data["row_num"] ++ // .as_f64() ++ // .ok_or("Missing or invalid 'row_num' field")? as usize; ++ // let col_num = data["col_num"] ++ // .as_f64() ++ // .ok_or("Missing or invalid 'col_num' field")? as usize; ++ ++ // let sketch_data = data["sketch"] ++ // .as_array() ++ // .ok_or("Missing or invalid 'sketch' field")?; ++ ++ // let mut sketch = Vec::new(); ++ // for row in sketch_data { ++ // let row_array = row.as_array().ok_or("Invalid row in sketch data")?; ++ // let mut sketch_row = Vec::new(); ++ // for cell in row_array { ++ // let value = cell.as_f64().ok_or("Invalid cell value in sketch data")?; ++ // sketch_row.push(value); ++ // } ++ // sketch.push(sketch_row); ++ // } ++ ++ // Ok(Self { ++ // row_num, ++ // col_num, ++ // sketch, ++ // }) ++ // } + + pub fn deserialize_from_bytes_arroyo( + buffer: &[u8], + ) -> Result> { + // Arroyo uses MessagePack format: [sketch_counters, col_num, row_num] +- let precompute: (Vec>, usize, usize) = rmp_serde::from_slice(buffer) ++ let precompute: (Vec>, usize, usize) = rmp_serde::from_slice(buffer) + .map_err(|e| format!("Failed to deserialize CountMinSketch from MessagePack: {e}"))?; + + let (sketch_counters, col_num, row_num) = precompute; +@@ -118,43 +118,46 @@ impl CountMinSketchAccumulator { + }) + } + +- pub fn deserialize_from_bytes(buffer: &[u8]) -> Result> { +- if buffer.len() < 8 { +- return Err("Buffer too short for row_num and col_num".into()); +- } +- +- let row_num = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]) as usize; +- let col_num = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]) as usize; +- +- let expected_size = 8 + (row_num * col_num * 4); +- if buffer.len() < expected_size { +- return Err("Buffer too short for sketch data".into()); +- } +- +- let mut sketch = Vec::new(); +- let mut offset = 8; +- +- for _ in 0..row_num { +- let mut row = Vec::new(); +- for _ in 0..col_num { +- let value = i32::from_le_bytes([ +- buffer[offset], +- buffer[offset + 1], +- buffer[offset + 2], +- buffer[offset + 3], +- ]); +- row.push(value); +- offset += 4; +- } +- sketch.push(row); +- } +- +- Ok(Self { +- row_num, +- col_num, +- sketch, +- }) +- } ++ // pub fn deserialize_from_bytes(buffer: &[u8]) -> Result> { ++ // if buffer.len() < 8 { ++ // return Err("Buffer too short for row_num and col_num".into()); ++ // } ++ ++ // TODO: this logic will need to be checked for i32 -> f64 ++ // Github Issue #11 ++ ++ // let row_num = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]) as usize; ++ // let col_num = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]) as usize; ++ ++ // let expected_size = 8 + (row_num * col_num * 4); ++ // if buffer.len() < expected_size { ++ // return Err("Buffer too short for sketch data".into()); ++ // } ++ ++ // let mut sketch = Vec::new(); ++ // let mut offset = 8; ++ ++ // for _ in 0..row_num { ++ // let mut row = Vec::new(); ++ // for _ in 0..col_num { ++ // let value = i32::from_le_bytes([ ++ // buffer[offset], ++ // buffer[offset + 1], ++ // buffer[offset + 2], ++ // buffer[offset + 3], ++ // ]); ++ // row.push(value); ++ // offset += 4; ++ // } ++ // sketch.push(row); ++ // } ++ ++ // Ok(Self { ++ // row_num, ++ // col_num, ++ // sketch, ++ // }) ++ // } + } + + impl SerializableToSink for CountMinSketchAccumulator { +@@ -319,7 +322,7 @@ mod tests { + // Check all values are initialized to 0 + for row in &cms.sketch { + for &value in row { +- assert_eq!(value, 0); ++ assert_eq!(value, 0.0); + } + } + } +@@ -356,17 +359,17 @@ mod tests { + let mut cms2 = CountMinSketchAccumulator::new(2, 3); + + // Set some values +- cms1.sketch[0][0] = 5; +- cms1.sketch[1][2] = 10; ++ cms1.sketch[0][0] = 5.0; ++ cms1.sketch[1][2] = 10.0; + +- cms2.sketch[0][0] = 3; +- cms2.sketch[0][1] = 7; ++ cms2.sketch[0][0] = 3.0; ++ cms2.sketch[0][1] = 7.0; + + let merged = CountMinSketchAccumulator::merge_accumulators(vec![cms1, cms2]).unwrap(); + +- assert_eq!(merged.sketch[0][0], 8); // 5 + 3 +- assert_eq!(merged.sketch[0][1], 7); // 0 + 7 +- assert_eq!(merged.sketch[1][2], 10); // 10 + 0 ++ assert_eq!(merged.sketch[0][0], 8.0); // 5 + 3 ++ assert_eq!(merged.sketch[0][1], 7.0); // 0 + 7 ++ assert_eq!(merged.sketch[1][2], 10.0); // 10 + 0 + } + + #[test] +@@ -378,30 +381,30 @@ mod tests { + assert!(result.is_err()); + } + +- #[test] +- fn test_count_min_sketch_serialization() { +- let mut cms = CountMinSketchAccumulator::new(2, 3); +- cms.sketch[0][1] = 42; +- cms.sketch[1][2] = 100; +- +- // Test JSON serialization +- let json_value = cms.serialize_to_json(); +- let deserialized = CountMinSketchAccumulator::deserialize_from_json(&json_value).unwrap(); +- +- assert_eq!(deserialized.row_num, 2); +- assert_eq!(deserialized.col_num, 3); +- assert_eq!(deserialized.sketch[0][1], 42); +- assert_eq!(deserialized.sketch[1][2], 100); +- +- // Test binary serialization +- let bytes = cms.serialize_to_bytes(); +- let deserialized_bytes = CountMinSketchAccumulator::deserialize_from_bytes(&bytes).unwrap(); +- +- assert_eq!(deserialized_bytes.row_num, 2); +- assert_eq!(deserialized_bytes.col_num, 3); +- assert_eq!(deserialized_bytes.sketch[0][1], 42); +- assert_eq!(deserialized_bytes.sketch[1][2], 100); +- } ++ // #[test] ++ // fn test_count_min_sketch_serialization() { ++ // let mut cms = CountMinSketchAccumulator::new(2, 3); ++ // cms.sketch[0][1] = 42.0; ++ // cms.sketch[1][2] = 100.0; ++ ++ // // Test JSON serialization ++ // let json_value = cms.serialize_to_json(); ++ // let deserialized = CountMinSketchAccumulator::deserialize_from_json(&json_value).unwrap(); ++ ++ // assert_eq!(deserialized.row_num, 2); ++ // assert_eq!(deserialized.col_num, 3); ++ // assert_eq!(deserialized.sketch[0][1], 42.0); ++ // assert_eq!(deserialized.sketch[1][2], 100.0); ++ ++ // // Test binary serialization ++ // let bytes = cms.serialize_to_bytes(); ++ // let deserialized_bytes = CountMinSketchAccumulator::deserialize_from_bytes(&bytes).unwrap(); ++ ++ // assert_eq!(deserialized_bytes.row_num, 2); ++ // assert_eq!(deserialized_bytes.col_num, 3); ++ // assert_eq!(deserialized_bytes.sketch[0][1], 42.0); ++ // assert_eq!(deserialized_bytes.sketch[1][2], 100.0); ++ // } + + #[test] + fn test_count_min_sketch_as_aggregate_core() { +diff --git a/src/precompute_operators/datasketches_kll_accumulator.rs b/src/precompute_operators/datasketches_kll_accumulator.rs +index c72b700..3635f8e 100644 +--- a/src/precompute_operators/datasketches_kll_accumulator.rs ++++ b/src/precompute_operators/datasketches_kll_accumulator.rs +@@ -68,62 +68,62 @@ impl DatasketchesKLLAccumulator { + sorted_values[index] + } + +- pub fn deserialize_from_json(data: &Value) -> Result> { +- let max_capacity = data["max_capacity"].as_u64().unwrap_or(1000) as usize; +- +- let values = if let Some(values_array) = data["values"].as_array() { +- values_array +- .iter() +- .map(|v| v.as_f64().unwrap_or(0.0)) +- .collect() +- } else { +- Vec::new() +- }; +- +- Ok(Self { +- values, +- max_capacity, +- }) +- } +- +- pub fn deserialize_from_bytes(buffer: &[u8]) -> Result> { +- if buffer.len() < 8 { +- return Err("Buffer too short for max_capacity and values_count".into()); +- } +- +- let max_capacity = +- u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]) as usize; +- let values_count = +- u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]) as usize; +- +- let expected_size = 8 + (values_count * 8); +- if buffer.len() < expected_size { +- return Err("Buffer too short for values data".into()); +- } +- +- let mut values = Vec::new(); +- let mut offset = 8; +- +- for _ in 0..values_count { +- let value = f64::from_le_bytes([ +- buffer[offset], +- buffer[offset + 1], +- buffer[offset + 2], +- buffer[offset + 3], +- buffer[offset + 4], +- buffer[offset + 5], +- buffer[offset + 6], +- buffer[offset + 7], +- ]); +- values.push(value); +- offset += 8; +- } +- +- Ok(Self { +- values, +- max_capacity, +- }) +- } ++ // pub fn deserialize_from_json(data: &Value) -> Result> { ++ // let max_capacity = data["max_capacity"].as_u64().unwrap_or(1000) as usize; ++ ++ // let values = if let Some(values_array) = data["values"].as_array() { ++ // values_array ++ // .iter() ++ // .map(|v| v.as_f64().unwrap_or(0.0)) ++ // .collect() ++ // } else { ++ // Vec::new() ++ // }; ++ ++ // Ok(Self { ++ // values, ++ // max_capacity, ++ // }) ++ // } ++ ++ // pub fn deserialize_from_bytes(buffer: &[u8]) -> Result> { ++ // if buffer.len() < 8 { ++ // return Err("Buffer too short for max_capacity and values_count".into()); ++ // } ++ ++ // let max_capacity = ++ // u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]) as usize; ++ // let values_count = ++ // u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]) as usize; ++ ++ // let expected_size = 8 + (values_count * 8); ++ // if buffer.len() < expected_size { ++ // return Err("Buffer too short for values data".into()); ++ // } ++ ++ // let mut values = Vec::new(); ++ // let mut offset = 8; ++ ++ // for _ in 0..values_count { ++ // let value = f64::from_le_bytes([ ++ // buffer[offset], ++ // buffer[offset + 1], ++ // buffer[offset + 2], ++ // buffer[offset + 3], ++ // buffer[offset + 4], ++ // buffer[offset + 5], ++ // buffer[offset + 6], ++ // buffer[offset + 7], ++ // ]); ++ // values.push(value); ++ // offset += 8; ++ // } ++ ++ // Ok(Self { ++ // values, ++ // max_capacity, ++ // }) ++ // } + + pub fn deserialize_from_bytes_arroyo( + buffer: &[u8], +diff --git a/src/precompute_operators/delta_set_aggregator_accumulator.rs b/src/precompute_operators/delta_set_aggregator_accumulator.rs +index 2248748..46a853b 100644 +--- a/src/precompute_operators/delta_set_aggregator_accumulator.rs ++++ b/src/precompute_operators/delta_set_aggregator_accumulator.rs +@@ -43,114 +43,114 @@ impl DeltaSetAggregatorAccumulator { + self.removed.insert(key); + } + +- pub fn deserialize_from_json(data: &Value) -> Result> { +- let mut added = HashSet::new(); +- let mut removed = HashSet::new(); +- +- if let Some(added_array) = data["added"].as_array() { +- for item in added_array { +- // Handle nested structure with "values" key +- let key_data = if let Some(values) = item.get("values") { +- values +- } else { +- item +- }; +- let key = KeyByLabelValues::deserialize_from_json(key_data)?; +- added.insert(key); +- } +- } +- +- if let Some(removed_array) = data["removed"].as_array() { +- for item in removed_array { +- // Handle nested structure with "values" key +- let key_data = if let Some(values) = item.get("values") { +- values +- } else { +- item +- }; +- let key = KeyByLabelValues::deserialize_from_json(key_data)?; +- removed.insert(key); +- } +- } +- +- Ok(Self { added, removed }) +- } +- +- pub fn deserialize_from_bytes(buffer: &[u8]) -> Result> { +- let mut offset = 0; +- let mut added = HashSet::new(); +- let mut removed = HashSet::new(); +- +- // Read added set +- if offset + 4 > buffer.len() { +- return Err("Buffer too short for added set size".into()); +- } +- let added_size = u32::from_le_bytes([ +- buffer[offset], +- buffer[offset + 1], +- buffer[offset + 2], +- buffer[offset + 3], +- ]) as usize; +- offset += 4; +- +- for _ in 0..added_size { +- if offset + 4 > buffer.len() { +- return Err("Buffer too short for added item size".into()); +- } +- let item_size = u32::from_le_bytes([ +- buffer[offset], +- buffer[offset + 1], +- buffer[offset + 2], +- buffer[offset + 3], +- ]) as usize; +- offset += 4; +- +- if offset + item_size > buffer.len() { +- return Err("Buffer too short for added item data".into()); +- } +- let key = +- KeyByLabelValues::deserialize_from_bytes(&buffer[offset..offset + item_size])?; +- offset += item_size; +- +- added.insert(key); +- } +- +- // Read removed set +- if offset + 4 > buffer.len() { +- return Err("Buffer too short for removed set size".into()); +- } +- let removed_size = u32::from_le_bytes([ +- buffer[offset], +- buffer[offset + 1], +- buffer[offset + 2], +- buffer[offset + 3], +- ]) as usize; +- offset += 4; +- +- for _ in 0..removed_size { +- if offset + 4 > buffer.len() { +- return Err("Buffer too short for removed item size".into()); +- } +- let item_size = u32::from_le_bytes([ +- buffer[offset], +- buffer[offset + 1], +- buffer[offset + 2], +- buffer[offset + 3], +- ]) as usize; +- offset += 4; +- +- if offset + item_size > buffer.len() { +- return Err("Buffer too short for removed item data".into()); +- } +- let key = +- KeyByLabelValues::deserialize_from_bytes(&buffer[offset..offset + item_size])?; +- offset += item_size; +- +- removed.insert(key); +- } +- +- Ok(Self { added, removed }) +- } ++ // pub fn deserialize_from_json(data: &Value) -> Result> { ++ // let mut added = HashSet::new(); ++ // let mut removed = HashSet::new(); ++ ++ // if let Some(added_array) = data["added"].as_array() { ++ // for item in added_array { ++ // // Handle nested structure with "values" key ++ // let key_data = if let Some(values) = item.get("values") { ++ // values ++ // } else { ++ // item ++ // }; ++ // let key = KeyByLabelValues::deserialize_from_json(key_data)?; ++ // added.insert(key); ++ // } ++ // } ++ ++ // if let Some(removed_array) = data["removed"].as_array() { ++ // for item in removed_array { ++ // // Handle nested structure with "values" key ++ // let key_data = if let Some(values) = item.get("values") { ++ // values ++ // } else { ++ // item ++ // }; ++ // let key = KeyByLabelValues::deserialize_from_json(key_data)?; ++ // removed.insert(key); ++ // } ++ // } ++ ++ // Ok(Self { added, removed }) ++ // } ++ ++ // pub fn deserialize_from_bytes(buffer: &[u8]) -> Result> { ++ // let mut offset = 0; ++ // let mut added = HashSet::new(); ++ // let mut removed = HashSet::new(); ++ ++ // // Read added set ++ // if offset + 4 > buffer.len() { ++ // return Err("Buffer too short for added set size".into()); ++ // } ++ // let added_size = u32::from_le_bytes([ ++ // buffer[offset], ++ // buffer[offset + 1], ++ // buffer[offset + 2], ++ // buffer[offset + 3], ++ // ]) as usize; ++ // offset += 4; ++ ++ // for _ in 0..added_size { ++ // if offset + 4 > buffer.len() { ++ // return Err("Buffer too short for added item size".into()); ++ // } ++ // let item_size = u32::from_le_bytes([ ++ // buffer[offset], ++ // buffer[offset + 1], ++ // buffer[offset + 2], ++ // buffer[offset + 3], ++ // ]) as usize; ++ // offset += 4; ++ ++ // if offset + item_size > buffer.len() { ++ // return Err("Buffer too short for added item data".into()); ++ // } ++ // let key = ++ // KeyByLabelValues::deserialize_from_bytes(&buffer[offset..offset + item_size])?; ++ // offset += item_size; ++ ++ // added.insert(key); ++ // } ++ ++ // // Read removed set ++ // if offset + 4 > buffer.len() { ++ // return Err("Buffer too short for removed set size".into()); ++ // } ++ // let removed_size = u32::from_le_bytes([ ++ // buffer[offset], ++ // buffer[offset + 1], ++ // buffer[offset + 2], ++ // buffer[offset + 3], ++ // ]) as usize; ++ // offset += 4; ++ ++ // for _ in 0..removed_size { ++ // if offset + 4 > buffer.len() { ++ // return Err("Buffer too short for removed item size".into()); ++ // } ++ // let item_size = u32::from_le_bytes([ ++ // buffer[offset], ++ // buffer[offset + 1], ++ // buffer[offset + 2], ++ // buffer[offset + 3], ++ // ]) as usize; ++ // offset += 4; ++ ++ // if offset + item_size > buffer.len() { ++ // return Err("Buffer too short for removed item data".into()); ++ // } ++ // let key = ++ // KeyByLabelValues::deserialize_from_bytes(&buffer[offset..offset + item_size])?; ++ // offset += item_size; ++ ++ // removed.insert(key); ++ // } ++ ++ // Ok(Self { added, removed }) ++ // } + + pub fn deserialize_from_bytes_arroyo( + buffer: &[u8], +@@ -412,36 +412,35 @@ mod tests { + } + + #[test] +- fn test_delta_set_aggregator_serialization() { +- let mut acc = DeltaSetAggregatorAccumulator::new(); +- +- let key1 = create_test_key("web"); +- let key2 = create_test_key("api"); +- +- acc.add_key(key1.clone()); +- acc.remove_key(key2.clone()); +- +- // Test JSON serialization +- let json_value = acc.serialize_to_json(); +- let deserialized = +- DeltaSetAggregatorAccumulator::deserialize_from_json(&json_value).unwrap(); +- +- assert_eq!(deserialized.added.len(), 1); +- assert_eq!(deserialized.removed.len(), 1); +- assert!(deserialized.added.contains(&key1)); +- assert!(deserialized.removed.contains(&key2)); +- +- // Test binary serialization +- let bytes = acc.serialize_to_bytes(); +- let deserialized_bytes = +- DeltaSetAggregatorAccumulator::deserialize_from_bytes(&bytes).unwrap(); +- +- assert_eq!(deserialized_bytes.added.len(), 1); +- assert_eq!(deserialized_bytes.removed.len(), 1); +- assert!(deserialized_bytes.added.contains(&key1)); +- assert!(deserialized_bytes.removed.contains(&key2)); +- } +- ++ // fn test_delta_set_aggregator_serialization() { ++ // let mut acc = DeltaSetAggregatorAccumulator::new(); ++ ++ // let key1 = create_test_key("web"); ++ // let key2 = create_test_key("api"); ++ ++ // acc.add_key(key1.clone()); ++ // acc.remove_key(key2.clone()); ++ ++ // // Test JSON serialization ++ // let json_value = acc.serialize_to_json(); ++ // let deserialized = ++ // DeltaSetAggregatorAccumulator::deserialize_from_json(&json_value).unwrap(); ++ ++ // assert_eq!(deserialized.added.len(), 1); ++ // assert_eq!(deserialized.removed.len(), 1); ++ // assert!(deserialized.added.contains(&key1)); ++ // assert!(deserialized.removed.contains(&key2)); ++ ++ // // Test binary serialization ++ // let bytes = acc.serialize_to_bytes(); ++ // let deserialized_bytes = ++ // DeltaSetAggregatorAccumulator::deserialize_from_bytes(&bytes).unwrap(); ++ ++ // assert_eq!(deserialized_bytes.added.len(), 1); ++ // assert_eq!(deserialized_bytes.removed.len(), 1); ++ // assert!(deserialized_bytes.added.contains(&key1)); ++ // assert!(deserialized_bytes.removed.contains(&key2)); ++ // } + #[test] + fn test_delta_set_aggregator_query() { + let acc = DeltaSetAggregatorAccumulator::new(); diff --git a/QueryEngineRust/src/data_model/aggregation_config.rs b/QueryEngineRust/src/data_model/aggregation_config.rs new file mode 100644 index 0000000..3480c45 --- /dev/null +++ b/QueryEngineRust/src/data_model/aggregation_config.rs @@ -0,0 +1 @@ +pub use sketch_db_common::aggregation_config::*; diff --git a/QueryEngineRust/src/data_model/aggregation_reference.rs b/QueryEngineRust/src/data_model/aggregation_reference.rs new file mode 100644 index 0000000..ccd18c5 --- /dev/null +++ b/QueryEngineRust/src/data_model/aggregation_reference.rs @@ -0,0 +1,33 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AggregationReference { + pub aggregation_id: u64, + /// For circular_buffer policy: keep this many most recent aggregates + #[serde(skip_serializing_if = "Option::is_none")] + pub num_aggregates_to_retain: Option, + /// For read_based policy: remove aggregate after this many reads + #[serde(skip_serializing_if = "Option::is_none")] + pub read_count_threshold: Option, +} + +impl AggregationReference { + pub fn new(aggregation_id: u64, num_aggregates_to_retain: Option) -> Self { + Self { + aggregation_id, + num_aggregates_to_retain, + read_count_threshold: None, + } + } + + pub fn with_read_count_threshold( + aggregation_id: u64, + read_count_threshold: Option, + ) -> Self { + Self { + aggregation_id, + num_aggregates_to_retain: None, + read_count_threshold, + } + } +} diff --git a/QueryEngineRust/src/data_model/enums.rs b/QueryEngineRust/src/data_model/enums.rs new file mode 100644 index 0000000..92c0d13 --- /dev/null +++ b/QueryEngineRust/src/data_model/enums.rs @@ -0,0 +1,32 @@ +#[derive(clap::ValueEnum, Clone, Debug)] +pub enum InputFormat { + Json, + Byte, +} + +#[derive(clap::ValueEnum, Clone, Debug)] +pub enum StreamingEngine { + Flink, + Arroyo, +} + +pub use sketch_db_common::enums::{CleanupPolicy, QueryLanguage}; + +#[derive(clap::ValueEnum, Clone, Debug, PartialEq)] +pub enum QueryProtocol { + #[value(alias = "PROMETHEUS_HTTP")] + PrometheusHttp, + #[value(alias = "CLICKHOUSE_HTTP")] + ClickHouseHttp, + #[value(alias = "ELASTIC_HTTP")] + ElasticHttp, + // Future: DuckDbHttp, etc. +} + +#[derive(clap::ValueEnum, Clone, Debug, Copy, PartialEq)] +pub enum LockStrategy { + #[value(name = "global")] + Global, + #[value(name = "per-key")] + PerKey, +} diff --git a/QueryEngineRust/src/data_model/inference_config.rs b/QueryEngineRust/src/data_model/inference_config.rs new file mode 100644 index 0000000..0626fef --- /dev/null +++ b/QueryEngineRust/src/data_model/inference_config.rs @@ -0,0 +1,249 @@ +use anyhow::Result; +use serde_yaml::Value; +use std::collections::HashSet; +use std::fs::File; +use std::io::BufReader; + +use crate::data_model::{CleanupPolicy, PromQLSchema, QueryConfig, QueryLanguage}; +use promql_utilities::data_model::KeyByLabelNames; +use sql_utilities::sqlhelper::{SQLSchema, Table}; + +/// Schema configuration that can be either PromQL or SQL format +#[derive(Debug, Clone)] +pub enum SchemaConfig { + PromQL(PromQLSchema), + SQL(SQLSchema), + ElasticQueryDSL, + ElasticSQL, +} + +#[derive(Debug, Clone)] +pub struct InferenceConfig { + pub schema: SchemaConfig, + pub query_configs: Vec, + pub cleanup_policy: CleanupPolicy, +} + +impl InferenceConfig { + pub fn new(query_language: QueryLanguage, cleanup_policy: CleanupPolicy) -> Self { + let schema = match query_language { + QueryLanguage::promql => SchemaConfig::PromQL(PromQLSchema::new()), + QueryLanguage::sql => SchemaConfig::SQL(SQLSchema::new(Vec::new())), + QueryLanguage::elastic_querydsl => SchemaConfig::ElasticQueryDSL, // Placeholder for QueryDSL + QueryLanguage::elastic_sql => SchemaConfig::ElasticSQL, + }; + Self { + schema, + query_configs: Vec::new(), + cleanup_policy, + } + } + + pub fn from_yaml_file(yaml_file: &str, query_language: QueryLanguage) -> Result { + let file = File::open(yaml_file)?; + let reader = BufReader::new(file); + let data: Value = serde_yaml::from_reader(reader)?; + + Self::from_yaml_data(&data, query_language) + } + + pub fn from_yaml_data(data: &Value, query_language: QueryLanguage) -> Result { + let schema = match query_language { + QueryLanguage::promql => { + let promql_schema = Self::parse_promql_schema(data)?; + SchemaConfig::PromQL(promql_schema) + } + QueryLanguage::sql => { + let sql_schema = Self::parse_sql_schema(data)?; + SchemaConfig::SQL(sql_schema) + } + QueryLanguage::elastic_querydsl => SchemaConfig::ElasticQueryDSL, + QueryLanguage::elastic_sql => SchemaConfig::ElasticSQL, + }; + + let cleanup_policy = Self::parse_cleanup_policy(data)?; + let query_configs = Self::parse_query_configs(data, cleanup_policy)?; + + Ok(Self { + schema, + query_configs, + cleanup_policy, + }) + } + + /// Parse PromQL schema from YAML data (metrics: key) + fn parse_promql_schema(data: &Value) -> Result { + let mut promql_schema = PromQLSchema::new(); + if let Some(metrics) = data.get("metrics") { + if let Some(metrics_map) = metrics.as_mapping() { + for (metric_name_val, labels_val) in metrics_map { + if let (Some(metric_name), Some(labels_seq)) = + (metric_name_val.as_str(), labels_val.as_sequence()) + { + let labels: Vec = labels_seq + .iter() + .filter_map(|v| v.as_str()) + .map(|s| s.to_string()) + .collect(); + let key_by_label_names = KeyByLabelNames::new(labels); + promql_schema = + promql_schema.add_metric(metric_name.to_string(), key_by_label_names); + } + } + } + } + Ok(promql_schema) + } + + /// Parse SQL schema from YAML data (tables: key at top level, matching ArroyoSketch format) + fn parse_sql_schema(data: &Value) -> Result { + let tables_data = data + .get("tables") + .and_then(|v| v.as_sequence()) + .ok_or_else(|| { + anyhow::anyhow!("Missing or invalid tables field for SQL query language") + })?; + + let mut tables = Vec::new(); + for table_data in tables_data { + let name = table_data + .get("name") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing name field in table"))? + .to_string(); + + let time_column = table_data + .get("time_column") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing time_column field in table {}", name))? + .to_string(); + + let value_columns: HashSet = table_data + .get("value_columns") + .and_then(|v| v.as_sequence()) + .ok_or_else(|| anyhow::anyhow!("Missing value_columns field in table {}", name))? + .iter() + .filter_map(|v| v.as_str()) + .map(|s| s.to_string()) + .collect(); + + let metadata_columns: HashSet = table_data + .get("metadata_columns") + .and_then(|v| v.as_sequence()) + .ok_or_else(|| anyhow::anyhow!("Missing metadata_columns field in table {}", name))? + .iter() + .filter_map(|v| v.as_str()) + .map(|s| s.to_string()) + .collect(); + + tables.push(Table::new( + name, + time_column, + value_columns, + metadata_columns, + )); + } + + Ok(SQLSchema::new(tables)) + } + + /// Parse cleanup policy from YAML data. Errors if not specified. + fn parse_cleanup_policy(data: &Value) -> Result { + let cleanup_policy_data = data.get("cleanup_policy").ok_or_else(|| { + anyhow::anyhow!( + "Missing cleanup_policy section in inference_config.yaml. \ + Must specify cleanup_policy.name as one of: circular_buffer, read_based, no_cleanup" + ) + })?; + + let name = cleanup_policy_data + .get("name") + .and_then(|v| v.as_str()) + .ok_or_else(|| { + anyhow::anyhow!( + "Missing cleanup_policy.name in inference_config.yaml. \ + Must be one of: circular_buffer, read_based, no_cleanup" + ) + })?; + + match name { + "circular_buffer" => Ok(CleanupPolicy::CircularBuffer), + "read_based" => Ok(CleanupPolicy::ReadBased), + "no_cleanup" => Ok(CleanupPolicy::NoCleanup), + _ => Err(anyhow::anyhow!( + "Invalid cleanup policy: '{}'. Valid options: circular_buffer, read_based, no_cleanup", + name + )), + } + } + + fn parse_query_configs( + data: &Value, + cleanup_policy: CleanupPolicy, + ) -> Result> { + // Handle queries field -> query_configs + let query_configs = if let Some(queries) = data.get("queries").and_then(|v| v.as_sequence()) + { + let mut configs = Vec::new(); + for query_data in queries { + let query = query_data + .get("query") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing query field"))? + .to_string(); + + // Parse aggregations if present + let aggregations = if let Some(aggregations_data) = + query_data.get("aggregations").and_then(|v| v.as_sequence()) + { + let mut agg_refs = Vec::new(); + for agg_data in aggregations_data { + let aggregation_id = agg_data + .get("aggregation_id") + .and_then(|v| v.as_u64()) + .ok_or_else(|| { + anyhow::anyhow!("Missing aggregation_id in aggregation") + })?; + + // Parse the appropriate parameter based on cleanup policy + let agg_ref = match cleanup_policy { + CleanupPolicy::CircularBuffer => { + let num_aggregates_to_retain = agg_data + .get("num_aggregates_to_retain") + .and_then(|v| v.as_u64()); + crate::data_model::AggregationReference::new( + aggregation_id, + num_aggregates_to_retain, + ) + } + CleanupPolicy::ReadBased => { + let read_count_threshold = agg_data + .get("read_count_threshold") + .and_then(|v| v.as_u64()); + crate::data_model::AggregationReference::with_read_count_threshold( + aggregation_id, + read_count_threshold, + ) + } + CleanupPolicy::NoCleanup => { + // No cleanup parameters needed + crate::data_model::AggregationReference::new(aggregation_id, None) + } + }; + agg_refs.push(agg_ref); + } + agg_refs + } else { + Vec::new() + }; + + let config = QueryConfig::new(query).with_aggregations(aggregations); + configs.push(config); + } + configs + } else { + Vec::new() + }; + Ok(query_configs) + } +} diff --git a/QueryEngineRust/src/data_model/key_by_label_values.rs b/QueryEngineRust/src/data_model/key_by_label_values.rs new file mode 100644 index 0000000..9b282bf --- /dev/null +++ b/QueryEngineRust/src/data_model/key_by_label_values.rs @@ -0,0 +1,163 @@ +use serde::{Deserialize, Serialize}; +// use std::collections::HashMap; +use std::hash::{Hash, Hasher}; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct KeyByLabelValues { + // pub labels: HashMap, + pub labels: Vec, +} + +impl KeyByLabelValues { + pub fn new() -> Self { + Self { labels: Vec::new() } + } + + pub fn new_with_labels(labels: Vec) -> Self { + Self { labels } + } + + pub fn insert(&mut self, value: String) { + self.labels.push(value); + } + + pub fn get(&self, index: usize) -> Option<&String> { + self.labels.get(index) + } + + pub fn serialize_to_json(&self) -> serde_json::Value { + serde_json::to_value(&self.labels).unwrap_or(serde_json::Value::Null) + } + + pub fn deserialize_from_json(data: &serde_json::Value) -> Result { + let labels: Vec = serde_json::from_value(data.clone())?; + Ok(Self { labels }) + } + + pub fn serialize_to_bytes(&self) -> Vec { + bincode::serialize(&self.labels).unwrap_or_default() + } + + pub fn deserialize_from_bytes(buffer: &[u8]) -> Result> { + let labels: Vec = bincode::deserialize(buffer)?; + Ok(Self { labels }) + } + + /// Encode labels as a semicolon-joined string — the canonical key format used + /// for all sketch hashing (CountMinSketch, HydraKLL, SetAggregator, DeltaSet). + pub fn to_semicolon_str(&self) -> String { + self.labels.join(";") + } + + /// Decode a semicolon-joined string back into a KeyByLabelValues. + pub fn from_semicolon_str(s: &str) -> Self { + Self { + labels: s.split(';').map(|s| s.to_string()).collect(), + } + } + + pub fn is_empty(&self) -> bool { + self.labels.is_empty() + } + + pub fn len(&self) -> usize { + self.labels.len() + } +} + +impl Hash for KeyByLabelValues { + fn hash(&self, state: &mut H) { + // Create a sorted vector of key-value pairs for consistent hashing + let mut sorted_pairs: Vec<_> = self.labels.iter().collect(); + sorted_pairs.sort(); + + for value in sorted_pairs { + value.hash(state); + } + } +} + +impl Default for KeyByLabelValues { + fn default() -> Self { + Self::new() + } +} + +impl std::fmt::Display for KeyByLabelValues { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{{")?; + let mut first = true; + for value in &self.labels { + if !first { + write!(f, ", ")?; + } + write!(f, "{value}")?; + first = false; + } + write!(f, "}}") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_key_by_label_values() { + let mut key = KeyByLabelValues::new(); + key.insert("localhost:8080".to_string()); + key.insert("prometheus".to_string()); + + assert_eq!(key.len(), 2); + assert_eq!(key.get(0), Some(&"localhost:8080".to_string())); + assert_eq!(key.get(1), Some(&"prometheus".to_string())); + } + + #[test] + fn test_serialization() { + let mut key = KeyByLabelValues::new(); + key.insert("test".to_string()); + + let json = key.serialize_to_json(); + let deserialized = KeyByLabelValues::deserialize_from_json(&json).unwrap(); + assert_eq!(key, deserialized); + } + + #[test] + fn test_byte_serialization() { + let mut key = KeyByLabelValues::new(); + key.insert("test".to_string()); + + let bytes = key.serialize_to_bytes(); + let deserialized = KeyByLabelValues::deserialize_from_bytes(&bytes).unwrap(); + assert_eq!(key, deserialized); + } + + #[test] + fn test_semicolon_roundtrip() { + let key = KeyByLabelValues::new_with_labels(vec!["web".to_string(), "prod".to_string()]); + assert_eq!(key.to_semicolon_str(), "web;prod"); + let roundtripped = KeyByLabelValues::from_semicolon_str("web;prod"); + assert_eq!(roundtripped, key); + } + + #[test] + fn test_hash_consistency() { + let mut key1 = KeyByLabelValues::new(); + key1.insert("a".to_string()); + key1.insert("b".to_string()); + + let mut key2 = KeyByLabelValues::new(); + key2.insert("b".to_string()); + key2.insert("a".to_string()); + + // Should hash to the same value regardless of insertion order + let mut hasher1 = std::collections::hash_map::DefaultHasher::new(); + let mut hasher2 = std::collections::hash_map::DefaultHasher::new(); + + key1.hash(&mut hasher1); + key2.hash(&mut hasher2); + + assert_eq!(hasher1.finish(), hasher2.finish()); + } +} diff --git a/QueryEngineRust/src/data_model/measurement.rs b/QueryEngineRust/src/data_model/measurement.rs new file mode 100644 index 0000000..0fe1abc --- /dev/null +++ b/QueryEngineRust/src/data_model/measurement.rs @@ -0,0 +1,94 @@ +use serde::{Deserialize, Serialize}; +use std::ops::Add; + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Measurement { + pub value: f64, +} + +impl Measurement { + pub fn new(value: f64) -> Self { + Self { value } + } + + pub fn serialize_to_bytes(&self) -> Vec { + self.value.to_le_bytes().to_vec() + } + + pub fn serialize_to_json(&self) -> serde_json::Value { + serde_json::json!({ + "value": self.value + }) + } + + pub fn deserialize_from_json(data: &serde_json::Value) -> Result { + let value = data["value"].as_f64().ok_or_else(|| { + serde_json::Error::io(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Missing or invalid 'value' field", + )) + })?; + Ok(Self::new(value)) + } + + pub fn deserialize_from_bytes(buffer: &[u8]) -> Result> { + if buffer.len() < 8 { + return Err("Buffer too short for f64".into()); + } + let value = f64::from_le_bytes([ + buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], buffer[5], buffer[6], buffer[7], + ]); + Ok(Self::new(value)) + } +} + +impl Add for Measurement { + type Output = Measurement; + + fn add(self, other: Measurement) -> Measurement { + Measurement::new(self.value + other.value) + } +} + +impl Add for &Measurement { + type Output = Measurement; + + fn add(self, other: &Measurement) -> Measurement { + Measurement::new(self.value + other.value) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_measurement_creation() { + let measurement = Measurement::new(42.5); + assert_eq!(measurement.value, 42.5); + } + + #[test] + fn test_measurement_addition() { + let m1 = Measurement::new(10.0); + let m2 = Measurement::new(20.0); + let result = m1 + m2; + assert_eq!(result.value, 30.0); + } + + #[test] + fn test_serialization() { + let measurement = Measurement::new(42.5); + let json = measurement.serialize_to_json(); + let deserialized = Measurement::deserialize_from_json(&json).unwrap(); + assert_eq!(measurement, deserialized); + } + + #[test] + fn test_byte_serialization() { + let measurement = Measurement::new(42.5); + let bytes = measurement.serialize_to_bytes(); + let deserialized = Measurement::deserialize_from_bytes(&bytes).unwrap(); + assert_eq!(measurement, deserialized); + } +} diff --git a/QueryEngineRust/src/data_model/mod.rs b/QueryEngineRust/src/data_model/mod.rs new file mode 100644 index 0000000..ce8a6d6 --- /dev/null +++ b/QueryEngineRust/src/data_model/mod.rs @@ -0,0 +1,23 @@ +pub mod aggregation_config; +pub mod aggregation_reference; +pub mod enums; +pub mod inference_config; +pub mod key_by_label_values; +pub mod measurement; +pub mod precomputed_output; +pub mod promql_schema; +pub mod query_config; +pub mod streaming_config; +pub mod traits; + +pub use aggregation_config::*; +pub use aggregation_reference::*; +pub use enums::*; +pub use inference_config::*; +pub use key_by_label_values::*; +pub use measurement::*; +pub use precomputed_output::*; +pub use promql_schema::*; +pub use query_config::*; +pub use streaming_config::*; +pub use traits::*; diff --git a/QueryEngineRust/src/data_model/precomputed_output.rs b/QueryEngineRust/src/data_model/precomputed_output.rs new file mode 100644 index 0000000..c5ebab5 --- /dev/null +++ b/QueryEngineRust/src/data_model/precomputed_output.rs @@ -0,0 +1,690 @@ +use chrono::DateTime; +use flate2::read::GzDecoder; +use serde::{Deserialize, Serialize}; +use std::io::Read as _; +use tracing::error; + +use crate::data_model::traits::SerializableToSink; +use crate::data_model::{KeyByLabelValues, StreamingConfig}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PrecomputedOutput { + pub start_timestamp: u64, + pub end_timestamp: u64, + pub key: Option, + pub aggregation_id: u64, + // pub config: AggregationConfig, + // Note: precompute will be handled separately as it's a trait object +} + +impl PrecomputedOutput { + pub fn new( + start_timestamp: u64, + end_timestamp: u64, + key: Option, + aggregation_id: u64, + // TODO: we should remove AggregationConfig from here. Configs should only be accessed from the StreamingConfig read in main.rs + // config: AggregationConfig, + ) -> Self { + Self { + start_timestamp, + end_timestamp, + key, + aggregation_id, + // config, + } + } + + pub fn get_freshness_debug_string(&self) -> String { + // Match Python implementation more closely + let current_time = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64; + let freshness = current_time.saturating_sub(self.end_timestamp); + format!( + "end_timestamp: {}, current_time: {}, freshness: {}", + self.end_timestamp, current_time, freshness + ) + } + + // /// Serialize PrecomputedOutput with precompute data to match Python JSON format + // pub fn serialize_to_json_with_precompute( + // &self, + // precompute: &dyn crate::data_model::AggregateCore, + // ) -> serde_json::Value { + // serde_json::json!({ + // // "config": self.config.serialize_to_json(), + // "start_timestamp": self.start_timestamp, + // "end_timestamp": self.end_timestamp, + // "key": self.key.as_ref().map(|k| k.serialize_to_json()), + // "precompute": precompute.serialize_to_json() + // }) + // } + + /// Deserialize from bytes using Python-compatible format + pub fn deserialize_from_bytes_with_precompute( + _data: &[u8], + ) -> Result<(Self, Vec), Box> { + error!("Not implemented: deserialize_from_bytes_with_precompute"); + Err(("Not implemented: deserialize_from_bytes_with_precompute").into()) + } + + // /// Simple deserialization from bytes (compatibility method for Kafka consumer) + // /// This doesn't include precompute data and is primarily for compatibility + // pub fn deserialize_from_bytes( + // data: &[u8], + // ) -> Result> { + // // Try to deserialize as JSON first (common case) + // if let Ok(json_str) = String::from_utf8(data.to_vec()) { + // if let Ok(json_value) = serde_json::from_str::(&json_str) { + // return Self::deserialize_from_json(&json_value); + // } + // } + + // // If JSON fails, try binary format + // let (output, _precompute_bytes) = Self::deserialize_from_bytes_with_precompute(data) + // .map_err(|e| -> Box { + // format!("Failed to deserialize from bytes: {e}").into() + // })?; + // Ok(output) + // } + + // /// Legacy deserialization method for backward compatibility + // pub fn deserialize_from_json( + // data: &serde_json::Value, + // ) -> Result> { + // // Extract required fields + // let config_data = data.get("config").ok_or("Missing 'config' field in JSON")?; + // // Use custom deserialization for the config + // let config = AggregationConfig::deserialize_from_json(config_data).map_err( + // |e| -> Box { + // format!("Failed to deserialize config: {e}").into() + // }, + // )?; + + // let start_timestamp = data + // .get("start_timestamp") + // .and_then(|v| v.as_u64()) + // .ok_or("Missing or invalid 'start_timestamp' field")?; + + // let end_timestamp = data + // .get("end_timestamp") + // .and_then(|v| v.as_u64()) + // .ok_or("Missing or invalid 'end_timestamp' field")?; + + // let key = if let Some(key_data) = data.get("key") { + // if key_data.is_null() { + // None + // } else { + // // Use the custom deserialize_from_json method which expects the direct HashMap format + // Some(KeyByLabelValues::deserialize_from_json(key_data).map_err( + // |e| -> Box { + // format!("Failed to deserialize key: {e}").into() + // }, + // )?) + // } + // } else { + // None + // }; + + // // For now, we create a PrecomputedOutput without precompute data + // // In a full implementation, we would deserialize the precompute field as well + // Ok(Self { + // start_timestamp, + // end_timestamp, + // key, + // config, + // }) + // } + + // /// Deserialization for Flink streaming engine + // pub fn deserialize_from_json_flink( + // data: &serde_json::Value, + // streaming_config: &HashMap, + // ) -> Result> { + // let aggregation_id = data + // .get("aggregation_id") + // .and_then(|v| v.as_u64()) + // .ok_or("Missing or invalid 'aggregation_id' field")?; + + // let start_timestamp = data + // .get("start_timestamp") + // .and_then(|v| v.as_u64()) + // .ok_or("Missing or invalid 'start_timestamp' field")?; + + // let end_timestamp = data + // .get("end_timestamp") + // .and_then(|v| v.as_u64()) + // .ok_or("Missing or invalid 'end_timestamp' field")?; + + // let key = if let Some(key_data) = data.get("key") { + // if key_data.is_null() { + // None + // } else { + // Some(KeyByLabelValues::deserialize_from_json(key_data).map_err( + // |e| -> Box { + // format!("Failed to deserialize key: {e}").into() + // }, + // )?) + // } + // } else { + // None + // }; + + // // Get aggregation type from streaming config lookup + // let config = streaming_config + // .get(&aggregation_id) + // .ok_or_else(|| { + // format!("Aggregation ID {aggregation_id} not found in streaming config") + // })? + // .clone(); + + // Ok(Self { + // start_timestamp, + // end_timestamp, + // key, + // config, + // }) + // } + + /// Deserialization for Arroyo streaming engine + pub fn deserialize_from_json_arroyo( + data: &serde_json::Value, + // streaming_config: &HashMap, + streaming_config: &StreamingConfig, + ) -> Result< + (Self, Box), + Box, + > { + let aggregation_id = data + .get("aggregation_id") + .and_then(|v| v.as_u64()) + .ok_or("Missing or invalid 'aggregation_id' field")?; + + // Parse window timestamps from Arroyo format + let window = data + .get("window") + .ok_or("Missing 'window' field in Arroyo data")?; + let start_str = window + .get("start") + .and_then(|v| v.as_str()) + .ok_or("Missing or invalid 'start' field in window")?; + let end_str = window + .get("end") + .and_then(|v| v.as_str()) + .ok_or("Missing or invalid 'end' field in window")?; + + // Parse timestamps with Z suffix - convert to milliseconds + let start_timestamp = (DateTime::parse_from_rfc3339(&format!("{start_str}Z")) + .map_err(|e| format!("Failed to parse start timestamp: {e}"))? + .timestamp() as u64) + * 1000; + let end_timestamp = (DateTime::parse_from_rfc3339(&format!("{end_str}Z")) + .map_err(|e| format!("Failed to parse end timestamp: {e}"))? + .timestamp() as u64) + * 1000; + + // Parse key from semicolon-separated format - always create KeyByLabelValues (even if empty) + let key_str = data.get("key").and_then(|v| v.as_str()).unwrap_or(""); + let labels: Vec = key_str.split(';').map(|s| s.to_string()).collect(); + // let key = Some(KeyByLabelValues::new_with_labels( + // labels + // .into_iter() + // .enumerate() + // .map(|(i, v)| (format!("label_{i}"), v)) + // .collect(), + // )); + let key = Some(KeyByLabelValues::new_with_labels(labels)); + + // Get aggregation type from streaming config lookup + let config = streaming_config + .get_aggregation_config(aggregation_id) + .ok_or_else(|| { + format!("Aggregation ID {aggregation_id} not found in streaming config") + })? + .clone(); + + let precomputed_output = Self { + start_timestamp, + end_timestamp, + key, + aggregation_id, + }; + + // data["precompute"] has been compressed using the following logic + // fn gzip_compress(data: &[u8]) -> Option> { + // let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + // encoder.write_all(&data).ok()?; + // encoder.finish().ok() + // } + + // Extract and decompress precompute data + // Equivalent python code: + // precompute_bytes = bytes.fromhex(data["precompute"]) + // precompute_bytes = gzip.decompress(precompute_bytes) + let precompute_hex = data + .get("precompute") + .and_then(|v| v.as_str()) + .ok_or("Missing or invalid 'precompute' field")?; + + // NOTE: Check if hex decoding is actually needed - might depend on Arroyo's JSON serialization + let compressed_bytes = hex::decode(precompute_hex) + .map_err(|e| format!("Failed to decode hex precompute data: {e}"))?; + + // Decompress gzip data + + let mut decoder = GzDecoder::new(&compressed_bytes[..]); + let mut precompute_bytes = Vec::new(); + decoder + .read_to_end(&mut precompute_bytes) + .map_err(|e| format!("Failed to decompress precompute data: {e}"))?; + + let precompute = Self::create_precompute_from_bytes( + &config.aggregation_type, + Vec::as_slice(&precompute_bytes), + "arroyo", + )?; + + Ok((precomputed_output, precompute)) + } + + // /// Deserialize from JSON and extract precompute data following Python implementation + // /// This is the public method that should be used by Kafka consumer + // pub fn deserialize_from_json_with_precompute( + // data: &serde_json::Value, + // ) -> Result< + // (Self, Box), + // Box, + // > { + // debug!("Deserializing PrecomputedOutput with precompute from JSON: {data}"); + // // First get the metadata + // let precomputed_output = Self::deserialize_from_json(data)?; + // debug!( + // "Deserialized PrecomputedOutput metadata: {:?}", + // precomputed_output + // ); + + // // Then deserialize the precompute data based on aggregation type + // let precompute_data = data + // .get("precompute") + // .ok_or("Missing 'precompute' field in JSON")?; + // let precompute = Self::create_precompute_from_json( + // &precomputed_output.config.aggregation_type, + // precompute_data, + // )?; + + // Ok((precomputed_output, precompute)) + // } + + // /// Deserialize from bytes and extract precompute data following Python implementation + // /// This is the public method that should be used by Kafka consumer + // pub fn deserialize_from_bytes_with_precompute_and_type( + // data: &[u8], + // aggregation_type: &str, + // ) -> Result< + // (Self, Box), + // Box, + // > { + // // First get the metadata and precompute bytes + // let (precomputed_output, precompute_bytes) = Self::deserialize_from_bytes_with_precompute( + // data, + // ) + // .map_err(|e| -> Box { + // format!("Failed to deserialize from bytes: {e}").into() + // })?; + + // // Then create the accumulator from the precompute bytes + // let precompute = + // Self::create_precompute_from_bytes(aggregation_type, &precompute_bytes, "flink")?; + + // Ok((precomputed_output, precompute)) + // } + + // /// Factory method to create precompute accumulator from JSON data + // fn create_precompute_from_json( + // precompute_type: &str, + // data: &serde_json::Value, + // ) -> Result, Box> + // { + // use crate::precompute_operators::*; + + // match precompute_type { + // "Sum" | "sum" => { + // let accumulator = SumAccumulator::deserialize_from_json(data) + // .map_err(|e| format!("Failed to deserialize SumAccumulator: {e}"))?; + // Ok(Box::new(accumulator)) + // } + // "MinMax" => { + // let accumulator = MinMaxAccumulator::deserialize_from_json(data) + // .map_err(|e| format!("Failed to deserialize MinMaxAccumulator: {e}"))?; + // Ok(Box::new(accumulator)) + // } + // "Increase" => { + // let accumulator = IncreaseAccumulator::deserialize_from_json(data) + // .map_err(|e| format!("Failed to deserialize IncreaseAccumulator: {e}"))?; + // Ok(Box::new(accumulator)) + // } + // "MultipleSum" => { + // let accumulator = MultipleSumAccumulator::deserialize_from_json(data) + // .map_err(|e| format!("Failed to deserialize MultipleSumAccumulator: {e}"))?; + // Ok(Box::new(accumulator)) + // } + // "MultipleMinMax" => { + // // Extract sub_type from data + // let _sub_type = data + // .get("sub_type") + // .and_then(|v| v.as_str()) + // .unwrap_or("min") + // .to_string(); + // let accumulator = MultipleMinMaxAccumulator::deserialize_from_json(data) + // .map_err(|e| format!("Failed to deserialize MultipleMinMaxAccumulator: {e}"))?; + // Ok(Box::new(accumulator)) + // } + // "MultipleIncrease" => { + // let accumulator = MultipleIncreaseAccumulator::deserialize_from_json(data) + // .map_err(|e| { + // format!("Failed to deserialize MultipleIncreaseAccumulator: {e}") + // })?; + // Ok(Box::new(accumulator)) + // } + // "CountMinSketch" => { + // let accumulator = CountMinSketchAccumulator::deserialize_from_json(data) + // .map_err(|e| format!("Failed to deserialize CountMinSketchAccumulator: {e}"))?; + // Ok(Box::new(accumulator)) + // } + // "DatasketchesKLL" => { + // let accumulator = + // DatasketchesKLLAccumulator::deserialize_from_json(data).map_err(|e| { + // format!("Failed to deserialize DatasketchesKLLAccumulator: {e}") + // })?; + // Ok(Box::new(accumulator)) + // } + // "DeltaSetAggregator" => { + // let accumulator = DeltaSetAggregatorAccumulator::deserialize_from_json(data) + // .map_err(|e| { + // format!("Failed to deserialize DeltaSetAggregatorAccumulator: {e}") + // })?; + // Ok(Box::new(accumulator)) + // } + // _ => Err(format!("Unknown precompute type: {precompute_type}").into()), + // } + // } + + /// Factory method to create precompute accumulator from bytes + fn create_precompute_from_bytes( + precompute_type: &str, + buffer: &[u8], + streaming_engine: &str, + ) -> Result, Box> + { + use crate::precompute_operators::*; + + // TODO: add arroyo methods in each operator + // TODO: remove flink methods + + match precompute_type { + "Sum" | "sum" => { + let accumulator = if streaming_engine == "flink" { + SumAccumulator::deserialize_from_bytes(buffer) + } else { + SumAccumulator::deserialize_from_bytes_arroyo(buffer) + } + .map_err(|e| format!("Failed to deserialize SumAccumulator: {e}"))?; + Ok(Box::new(accumulator)) + } + "MinMax" => { + let accumulator = MinMaxAccumulator::deserialize_from_bytes(buffer) + .map_err(|e| format!("Failed to deserialize MinMaxAccumulator: {e}"))?; + Ok(Box::new(accumulator)) + } + "Increase" => { + let accumulator = IncreaseAccumulator::deserialize_from_bytes(buffer) + .map_err(|e| format!("Failed to deserialize IncreaseAccumulator: {e}"))?; + Ok(Box::new(accumulator)) + } + "MultipleSum" => { + let accumulator = MultipleSumAccumulator::deserialize_from_bytes(buffer) + .map_err(|e| format!("Failed to deserialize MultipleSumAccumulator: {e}"))?; + Ok(Box::new(accumulator)) + } + "MultipleMinMax" => { + let accumulator = + MultipleMinMaxAccumulator::deserialize_from_bytes(buffer, "min".to_string()) + .map_err(|e| { + format!("Failed to deserialize MultipleMinMaxAccumulator: {e}") + })?; + Ok(Box::new(accumulator)) + } + "MultipleIncrease" => { + let accumulator = if streaming_engine == "flink" { + MultipleIncreaseAccumulator::deserialize_from_bytes(buffer) + } else { + MultipleIncreaseAccumulator::deserialize_from_bytes_arroyo(buffer) + } + .map_err(|e| format!("Failed to deserialize MultipleIncreaseAccumulator: {e}"))?; + Ok(Box::new(accumulator)) + } + "CountMinSketch" => { + let accumulator = if streaming_engine == "flink" { + CountMinSketchAccumulator::deserialize_from_bytes(buffer) + } else { + CountMinSketchAccumulator::deserialize_from_bytes_arroyo(buffer) + } + .map_err(|e| format!("Failed to deserialize CountMinSketchAccumulator: {e}"))?; + Ok(Box::new(accumulator)) + } + "CountMinSketchWithHeap" => { + let accumulator = if streaming_engine == "flink" { + CountMinSketchWithHeapAccumulator::deserialize_from_bytes(buffer) + } else { + CountMinSketchWithHeapAccumulator::deserialize_from_bytes_arroyo(buffer) + } + .map_err(|e| { + format!("Failed to deserialize CountMinSketchWithHeapAccumulator: {e}") + })?; + Ok(Box::new(accumulator)) + } + "DatasketchesKLL" => { + let accumulator = if streaming_engine == "flink" { + DatasketchesKLLAccumulator::deserialize_from_bytes(buffer) + } else { + DatasketchesKLLAccumulator::deserialize_from_bytes_arroyo(buffer) + } + .map_err(|e| format!("Failed to deserialize DatasketchesKLLAccumulator: {e}"))?; + Ok(Box::new(accumulator)) + } + "HydraKLL" => { + let accumulator = if streaming_engine == "flink" { + return Err("HydraKLL not supported for Flink".into()); + } else { + HydraKllSketchAccumulator::deserialize_from_bytes_arroyo(buffer) + } + .map_err(|e| format!("Failed to deserialize HydraKllSketchAccumulator: {e}"))?; + Ok(Box::new(accumulator)) + } + "DeltaSetAggregator" => { + let accumulator = if streaming_engine == "flink" { + DeltaSetAggregatorAccumulator::deserialize_from_bytes(buffer) + } else { + DeltaSetAggregatorAccumulator::deserialize_from_bytes_arroyo(buffer) + } + .map_err(|e| format!("Failed to deserialize DeltaSetAggregatorAccumulator: {e}"))?; + Ok(Box::new(accumulator)) + } + _ => Err(format!("Unknown precompute type: {precompute_type}").into()), + } + } +} + +impl SerializableToSink for PrecomputedOutput { + fn serialize_to_json(&self) -> serde_json::Value { + // Default implementation without precompute data for backward compatibility + serde_json::json!({ + // "config": self.config.serialize_to_json(), + "start_timestamp": self.start_timestamp, + "end_timestamp": self.end_timestamp, + "key": self.key.as_ref().map(|k| k.serialize_to_json()) + }) + } + + fn serialize_to_bytes(&self) -> Vec { + // Default implementation without precompute data for backward compatibility + serde_json::to_vec(self).unwrap_or_else(|_| Vec::new()) + } +} + +// #[cfg(test)] +// mod tests { +// use super::*; + +// #[test] +// fn test_aggregation_config_creation() { +// let labels = KeyByLabelNames::from_names(vec!["instance".to_string(), "job".to_string()]); +// let empty_labels = KeyByLabelNames::new(vec![]); +// let config = AggregationConfig::new( +// 1, +// "cpu_usage".to_string(), +// labels, +// empty_labels.clone(), +// empty_labels, +// "".to_string(), +// "sum".to_string(), +// 10, +// ); + +// assert_eq!(config.aggregation_id, 1); +// assert_eq!(config.metric, "cpu_usage"); +// assert_eq!(config.aggregation_type, "sum"); +// assert_eq!(config.tumbling_window_size, 10); +// } + +// #[test] +// fn test_query_config_builder() { +// let labels = KeyByLabelNames::from_names(vec!["instance".to_string()]); +// let empty_labels = KeyByLabelNames::new(vec![]); +// let aggregation = AggregationConfig::new( +// 1, +// "cpu_usage".to_string(), +// labels, +// empty_labels.clone(), +// empty_labels, +// "".to_string(), +// "sum".to_string(), +// 10, +// ); + +// let query_config = QueryConfig::new("sum_over_time(cpu_usage[5m])".to_string()) +// .add_aggregation(aggregation); + +// assert_eq!(query_config.query, "sum_over_time(cpu_usage[5m])"); +// assert_eq!(query_config.aggregations.len(), 1); +// } + +// #[test] +// fn test_precomputed_output_json_serialization_with_precompute() { +// // Test Issue 9: PrecomputedOutput JSON serialization alignment with Python behavior +// use crate::precompute_operators::SumAccumulator; +// use std::collections::BTreeMap; + +// let labels = KeyByLabelNames::from_names(vec!["instance".to_string()]); +// let empty_labels = KeyByLabelNames::new(vec![]); +// let config = AggregationConfig::new( +// 1, +// "cpu_usage".to_string(), +// labels, +// empty_labels.clone(), +// empty_labels, +// "".to_string(), +// "sum".to_string(), +// 10, +// ); + +// let mut key_labels = BTreeMap::new(); +// key_labels.insert("instance".to_string(), "server1".to_string()); +// let key = Some(KeyByLabelValues::new_with_labels(key_labels)); + +// let precomputed_output = PrecomputedOutput::new( +// 1000, // start_timestamp +// 2000, // end_timestamp +// key.clone(), +// config.clone(), +// ); + +// let accumulator = SumAccumulator::with_sum(42.5); + +// // Test JSON serialization with precompute data (matching Python format) +// let json_with_precompute = +// precomputed_output.serialize_to_json_with_precompute(&accumulator); + +// // Verify the JSON structure matches Python implementation +// assert!(json_with_precompute["config"].is_object()); +// assert_eq!(json_with_precompute["start_timestamp"], 1000); +// assert_eq!(json_with_precompute["end_timestamp"], 2000); +// assert!(json_with_precompute["key"].is_object()); +// assert!(json_with_precompute["precompute"].is_object()); + +// // Verify precompute data is included (this is the key difference from default serialization) +// assert_eq!(json_with_precompute["precompute"]["sum"], 42.5); + +// // Test default JSON serialization without precompute data +// let json_default = precomputed_output.serialize_to_json(); + +// // Verify default serialization does NOT include precompute data +// assert!( +// json_default["precompute"].is_null() +// || !json_default.as_object().unwrap().contains_key("precompute") +// ); +// assert_eq!(json_default["start_timestamp"], 1000); +// assert_eq!(json_default["end_timestamp"], 2000); +// } + +// #[test] +// fn test_precomputed_output_byte_serialization_with_precompute() { +// // Test Issue 9: PrecomputedOutput byte serialization alignment with Python behavior +// use crate::precompute_operators::SumAccumulator; + +// let labels = KeyByLabelNames::from_names(vec!["instance".to_string()]); +// let empty_labels = KeyByLabelNames::new(vec![]); +// let config = AggregationConfig::new( +// 1, +// "cpu_usage".to_string(), +// labels, +// empty_labels.clone(), +// empty_labels, +// "".to_string(), +// "sum".to_string(), +// 10, +// ); + +// let precomputed_output = PrecomputedOutput::new( +// 1000, // start_timestamp +// 2000, // end_timestamp +// None, // key +// config, +// ); + +// let accumulator = SumAccumulator::with_sum(42.5); + +// // Test byte serialization with precompute data (matching Python format) +// let bytes_with_precompute = +// precomputed_output.serialize_to_bytes_with_precompute(&accumulator); + +// // Test round-trip: serialize then deserialize +// let (deserialized_output, precompute_bytes) = +// PrecomputedOutput::deserialize_from_bytes_with_precompute(&bytes_with_precompute) +// .unwrap(); + +// // Verify round-trip works correctly +// assert_eq!(deserialized_output.start_timestamp, 1000); +// assert_eq!(deserialized_output.end_timestamp, 2000); +// assert!(deserialized_output.key.is_none()); +// assert_eq!(deserialized_output.config.aggregation_id, 1); +// assert_eq!(deserialized_output.config.metric, "cpu_usage"); + +// // Verify precompute data can be deserialized back to SumAccumulator +// let deserialized_accumulator = +// SumAccumulator::deserialize_from_bytes(&precompute_bytes).unwrap(); +// assert_eq!(deserialized_accumulator.sum, 42.5); +// } +// } diff --git a/QueryEngineRust/src/data_model/promql_schema.rs b/QueryEngineRust/src/data_model/promql_schema.rs new file mode 100644 index 0000000..1cd3859 --- /dev/null +++ b/QueryEngineRust/src/data_model/promql_schema.rs @@ -0,0 +1,32 @@ +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +use promql_utilities::data_model::KeyByLabelNames; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PromQLSchema { + pub config: HashMap, +} + +impl PromQLSchema { + pub fn new() -> Self { + Self { + config: HashMap::new(), + } + } + + pub fn add_metric(mut self, metric: String, labels: KeyByLabelNames) -> Self { + self.config.insert(metric, labels); + self + } + + pub fn get_labels(&self, metric: &str) -> Option<&KeyByLabelNames> { + self.config.get(metric) + } +} + +impl Default for PromQLSchema { + fn default() -> Self { + Self::new() + } +} diff --git a/QueryEngineRust/src/data_model/query_config.rs b/QueryEngineRust/src/data_model/query_config.rs new file mode 100644 index 0000000..cb05328 --- /dev/null +++ b/QueryEngineRust/src/data_model/query_config.rs @@ -0,0 +1,28 @@ +use serde::{Deserialize, Serialize}; + +use crate::data_model::AggregationReference; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct QueryConfig { + pub query: String, + pub aggregations: Vec, +} + +impl QueryConfig { + pub fn new(query: String) -> Self { + Self { + query, + aggregations: Vec::new(), + } + } + + pub fn add_aggregation(mut self, aggregation: AggregationReference) -> Self { + self.aggregations.push(aggregation); + self + } + + pub fn with_aggregations(mut self, aggregations: Vec) -> Self { + self.aggregations = aggregations; + self + } +} diff --git a/QueryEngineRust/src/data_model/streaming_config.rs b/QueryEngineRust/src/data_model/streaming_config.rs new file mode 100644 index 0000000..746a632 --- /dev/null +++ b/QueryEngineRust/src/data_model/streaming_config.rs @@ -0,0 +1,114 @@ +use anyhow::Result; +use core::panic; +use serde::{Deserialize, Serialize}; +use serde_yaml::Value; +use std::collections::HashMap; +use std::fs::File; +use std::io::BufReader; +use std::ops::Index; + +use crate::data_model::aggregation_config::AggregationConfig; +use crate::data_model::enums::QueryLanguage; +use crate::data_model::inference_config::{InferenceConfig, SchemaConfig}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct StreamingConfig { + pub aggregation_configs: HashMap, +} + +impl StreamingConfig { + pub fn new(aggregation_configs: HashMap) -> Self { + Self { + aggregation_configs, + } + } + + pub fn get_aggregation_config(&self, aggregation_id: u64) -> Option<&AggregationConfig> { + self.aggregation_configs.get(&aggregation_id) + } + + pub fn get_all_aggregation_configs(&self) -> &HashMap { + &self.aggregation_configs + } + + pub fn contains(&self, aggregation_id: u64) -> bool { + self.aggregation_configs.contains_key(&aggregation_id) + } + + pub fn from_yaml_file(yaml_file: &str) -> Result { + let file = File::open(yaml_file)?; + let reader = BufReader::new(file); + let data: Value = serde_yaml::from_reader(reader)?; + + Self::from_yaml_data(&data, None) + } + + pub fn from_yaml_data( + data: &Value, + inference_config: Option<&InferenceConfig>, + ) -> Result { + let mut retention_map: HashMap = HashMap::new(); + let mut read_count_threshold_map: HashMap = HashMap::new(); + + if let Some(inference_config) = inference_config { + for query_config in &inference_config.query_configs { + for aggregation in &query_config.aggregations { + let aggregation_id = aggregation.aggregation_id; + if let Some(num_aggregates) = aggregation.num_aggregates_to_retain { + // OLD: Keep last value only (for backwards compatibility) + retention_map.insert(aggregation_id, num_aggregates); + + // NEW: Sum up num_aggregates_to_retain across all queries + *read_count_threshold_map.entry(aggregation_id).or_insert(0) += + num_aggregates; + } + } + } + } + + // Derive query_language from inference_config schema + let query_language = inference_config + .map(|ic| match &ic.schema { + SchemaConfig::PromQL(_) => QueryLanguage::promql, + SchemaConfig::SQL(_) => QueryLanguage::sql, + SchemaConfig::ElasticQueryDSL => QueryLanguage::elastic_querydsl, + SchemaConfig::ElasticSQL => QueryLanguage::elastic_sql, + }) + .unwrap_or(QueryLanguage::promql); // Default to promql if no inference_config + + let mut aggregation_configs: HashMap = HashMap::new(); + + if let Some(aggregations) = data.get("aggregations").and_then(|v| v.as_sequence()) { + for aggregation_data in aggregations { + if let Some(aggregation_id) = aggregation_data.get("aggregationId") { + let aggregation_id_u64 = aggregation_id.as_u64().or_else(|| panic!()).unwrap(); + let num_aggregates_to_retain = retention_map.get(&aggregation_id_u64); + let read_count_threshold = read_count_threshold_map.get(&aggregation_id_u64); + let config = AggregationConfig::from_yaml_data( + aggregation_data, + num_aggregates_to_retain.copied(), + read_count_threshold.copied(), + query_language, + )?; + aggregation_configs.insert(aggregation_id_u64, config); + } + } + } + + Ok(Self::new(aggregation_configs)) + } +} + +impl Index for StreamingConfig { + type Output = AggregationConfig; + + fn index(&self, aggregation_id: u64) -> &Self::Output { + &self.aggregation_configs[&aggregation_id] + } +} + +impl Default for StreamingConfig { + fn default() -> Self { + Self::new(HashMap::new()) + } +} diff --git a/QueryEngineRust/src/data_model/traits.rs b/QueryEngineRust/src/data_model/traits.rs new file mode 100644 index 0000000..83183d6 --- /dev/null +++ b/QueryEngineRust/src/data_model/traits.rs @@ -0,0 +1,122 @@ +use crate::data_model::KeyByLabelValues; +use serde_json::Value; +use std::collections::HashMap; + +use promql_utilities::query_logics::enums::Statistic; + +pub use sketch_db_common::traits::SerializableToSink; + +/// Core trait for all aggregates containing shared functionality +/// This trait provides common operations like serialization, cloning, and type identification +pub trait AggregateCore: SerializableToSink + Send + Sync { + /// Clone this accumulator into a boxed trait object + fn clone_boxed_core(&self) -> Box; + + /// Get the type name of this accumulator + fn type_name(&self) -> &'static str; + + /// Downcast to Any for type checking + fn as_any(&self) -> &dyn std::any::Any; + + /// Merge this accumulator with another accumulator of the same type + /// Returns a new merged accumulator, leaving the original unchanged + fn merge_with( + &self, + other: &dyn AggregateCore, + ) -> Result, Box>; + + /// Get the accumulator type identifier for merge compatibility checking + fn get_accumulator_type(&self) -> &'static str; + + /// Get all keys stored in this accumulator + fn get_keys(&self) -> Option>; +} + +/// Trait for accumulators that support a single subpopulation +/// These accumulators store a single aggregate value (e.g., Sum, Increase) +pub trait SingleSubpopulationAggregate: AggregateCore { + /// Query the accumulator for a specific statistic + fn query( + &self, + statistic: Statistic, + query_kwargs: Option<&HashMap>, + ) -> Result>; + + /// Clone this accumulator into a boxed trait object + fn clone_boxed(&self) -> Box; +} + +/// Trait for accumulators that support multiple subpopulations identified by keys +/// These accumulators store separate values for different label combinations +pub trait MultipleSubpopulationAggregate: AggregateCore { + /// Query the accumulator for a specific statistic and key + fn query( + &self, + statistic: Statistic, + key: &KeyByLabelValues, + query_kwargs: Option<&HashMap>, + ) -> Result>; + + /// Clone this accumulator into a boxed trait object + fn clone_boxed(&self) -> Box; +} + +/// Factory traits for creating and merging accumulators (object-safe) +pub trait SingleSubpopulationAggregateFactory { + fn merge_accumulators( + &self, + accumulators: Vec>, + ) -> Result, Box>; + fn create_default(&self) -> Box; +} + +pub trait MultipleSubpopulationAggregateFactory { + fn merge_accumulators( + &self, + accumulators: Vec>, + ) -> Result, Box>; + fn create_default(&self) -> Box; +} + +/// Trait for merging multiple accumulators of the same type +pub trait MergeableAccumulator { + fn merge_accumulators( + accumulators: Vec, + ) -> Result> + where + T: Sized; +} + +// Implement Clone for the new trait objects +impl Clone for Box { + fn clone(&self) -> Self { + self.clone_boxed_core() + } +} + +impl Clone for Box { + fn clone(&self) -> Self { + self.clone_boxed() + } +} + +impl Clone for Box { + fn clone(&self) -> Self { + self.clone_boxed() + } +} + +/// Factory trait for creating accumulators from serialized data +pub trait AccumulatorFactory { + fn create_from_json( + accumulator_type: &str, + data: &Value, + ) -> Result, Box>; + fn create_from_bytes( + accumulator_type: &str, + buffer: &[u8], + ) -> Result, Box>; +} + +#[cfg(test)] +mod tests {} diff --git a/QueryEngineRust/src/drivers/ingest/kafka.rs b/QueryEngineRust/src/drivers/ingest/kafka.rs new file mode 100644 index 0000000..e2b9f03 --- /dev/null +++ b/QueryEngineRust/src/drivers/ingest/kafka.rs @@ -0,0 +1,444 @@ +use rdkafka::config::ClientConfig; +use rdkafka::consumer::{Consumer, StreamConsumer}; +use rdkafka::Message; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use tracing::{debug, error, info, warn}; + +use crate::data_model::enums::{InputFormat, StreamingEngine}; +use crate::data_model::traits::SerializableToSink; +use crate::data_model::PrecomputedOutput; +use crate::data_model::StreamingConfig; +use crate::stores::Store; +use crate::utils::PrecomputeDumper; + +#[derive(Debug, Clone)] +pub struct KafkaConsumerConfig { + pub broker: String, + pub topic: String, + pub group_id: String, + pub auto_offset_reset: String, + pub input_format: InputFormat, + pub decompress_json: bool, + pub batch_size: usize, + pub poll_timeout_ms: u64, + pub streaming_engine: StreamingEngine, + pub dump_precomputes: bool, + pub dump_output_dir: Option, +} + +pub struct KafkaConsumer { + config: KafkaConsumerConfig, + store: Arc, + consumer: StreamConsumer, + streaming_config: Arc, + previous_consume_time: Option, + precompute_dumper: Option, +} + +impl KafkaConsumer { + pub fn new( + config: KafkaConsumerConfig, + store: Arc, + streaming_config: Arc, + ) -> Result> { + let consumer: StreamConsumer = ClientConfig::new() + .set("bootstrap.servers", &config.broker) + .set("group.id", &config.group_id) + .set("auto.offset.reset", &config.auto_offset_reset) + .set("enable.partition.eof", "false") + .set("session.timeout.ms", "6000") + .set("enable.auto.commit", "true") + .create()?; + + // Subscribe to the topic + consumer.subscribe(&[&config.topic])?; + + // Initialize precompute dumper if enabled + let precompute_dumper = if config.dump_precomputes { + match &config.dump_output_dir { + Some(output_dir) => match PrecomputeDumper::new(output_dir) { + Ok(dumper) => { + info!("Precompute dumping enabled to: {}", dumper.get_file_path()); + Some(dumper) + } + Err(e) => { + error!("Failed to create precompute dumper: {}", e); + info!("Continuing without precompute dumping"); + None + } + }, + None => { + warn!("Precompute dumping requested but no output directory provided"); + None + } + } + } else { + None + }; + + Ok(Self { + config, + store, + consumer, + streaming_config, + previous_consume_time: None, + precompute_dumper, + }) + } + + pub async fn run(&mut self) -> Result<(), Box> { + info!( + "Starting Kafka consumer for topic: {} on broker: {}", + self.config.topic, self.config.broker + ); + + let mut batch = Vec::new(); + + loop { + // Collect messages into batches like Python implementation + let timeout_duration = Duration::from_millis(self.config.poll_timeout_ms); + + // StreamConsumer uses recv() for async message reception + match tokio::time::timeout(timeout_duration, self.consumer.recv()).await { + Ok(Ok(message)) => { + // Add timing debug similar to Python + let current_consume_time = Instant::now(); + if let Some(previous_time) = self.previous_consume_time { + let elapsed = current_consume_time.duration_since(previous_time); + debug!( + "Time since last consume: {:.2} seconds", + elapsed.as_secs_f64() + ); + } + self.previous_consume_time = Some(current_consume_time); + // Process single message and add to batch + match self.process_message(&message) { + Ok(Some((precomputed_output, precompute_accumulator))) => { + // Check if this is an empty DeltaSetAggregator and skip it + if let Some(delta_acc) = precompute_accumulator + .as_any() + .downcast_ref::() + { + if delta_acc.is_empty() { + debug!("Skipping empty DeltaSetAggregatorAccumulator"); + continue; + } + } + + // Dump precompute if enabled + if let Some(ref mut dumper) = self.precompute_dumper { + if let Err(e) = dumper.dump_precompute( + &precomputed_output, + precompute_accumulator.as_ref(), + ) { + error!("Failed to dump precompute: {}", e); + } + } + + // Store both the metadata and the real accumulator data + batch.push((precomputed_output, precompute_accumulator)); + } + Ok(None) => { + debug!("Message processed but no precomputed output produced"); + } + Err(e) => { + error!("Error processing message: {e}"); + continue; // Skip this message and continue + } + } + + // Process batch when we reach batch_size or periodically + if batch.len() >= self.config.batch_size { + self.process_batch(&mut batch).await?; + } + } + Ok(Err(kafka_err)) => { + if kafka_err.rdkafka_error_code() + == Some(rdkafka::types::RDKafkaErrorCode::PartitionEOF) + { + debug!("Reached end of partition"); + continue; + } else { + error!("Kafka error: {kafka_err}"); + return Err(Box::new(kafka_err)); + } + } + Err(_) => { + // Timeout occurred - process any accumulated batch + if !batch.is_empty() { + debug!( + "Poll timeout, processing accumulated batch of {} items", + batch.len() + ); + self.process_batch(&mut batch).await?; + } else { + debug!("Poll timeout, no messages to process"); + } + } + } + } + } + + async fn process_batch( + &self, + batch: &mut Vec<(PrecomputedOutput, Box)>, + ) -> Result<(), Box> { + if batch.is_empty() { + return Ok(()); + } + + let batch_start_time = Instant::now(); + debug!("Processing batch of {} messages", batch.len()); + + // Batch insert with real precompute data like Python implementation + let store_insert_start_time = Instant::now(); + match self.store.insert_precomputed_output_batch(batch.to_vec()) { + Ok(_) => { + let store_insert_duration = store_insert_start_time.elapsed(); + debug!( + "Store batch insert took: {:.2}ms", + store_insert_duration.as_secs_f64() * 1000.0 + ); + debug!("{}", batch[0].0.get_freshness_debug_string()); + for (item, _) in batch.iter() { + debug!( + "Received message: {} with aggregation_id: {}", + serde_json::to_string(&item.serialize_to_json()) + .unwrap_or_else(|_| "failed to serialize".to_string()), + item.aggregation_id + ); + } + } + Err(e) => { + error!("Error inserting precomputed output batch: {}", e); + return Err(e); + } + } + + batch.clear(); + let total_batch_duration = batch_start_time.elapsed(); + debug!( + "Total batch processing took: {:.2}ms", + total_batch_duration.as_secs_f64() * 1000.0 + ); + Ok(()) + } + + #[allow(clippy::type_complexity)] + fn process_message( + &self, + message: &rdkafka::message::BorrowedMessage<'_>, + ) -> Result< + Option<(PrecomputedOutput, Box)>, + Box, + > { + let message_start_time = Instant::now(); + let payload = match message.payload() { + Some(payload) => payload, + None => { + warn!("Received message with no payload"); + return Ok(None); + } + }; + + match self.config.input_format { + InputFormat::Byte => { + // For binary format, we need to first extract metadata to get aggregation_type + // Then use it to create the proper accumulator + // let (metadata, _precompute_bytes) = + // match PrecomputedOutput::deserialize_from_bytes_with_precompute(payload) { + // Ok(result) => result, + // Err(e) => { + // error!("Error deserializing binary message metadata: {}", e); + // return Err(format!("Binary deserialization error: {e}").into()); + // } + // }; + + // // Now deserialize with the correct accumulator type + // match PrecomputedOutput::deserialize_from_bytes_with_precompute_and_type( + // payload, + // &metadata.config.aggregation_type, + // ) { + // Ok((output, precompute)) => { + // debug!("Successfully deserialized binary message with precompute data"); + // Ok(Some((output, precompute))) + // } + // Err(e) => { + // error!("Error deserializing binary message with precompute: {}", e); + // Err(e) + // } + // } + error!("Binary input format with precompute not implemented"); + Err("Binary input format with precompute not implemented".into()) + } + InputFormat::Json => { + // Handle streaming engine specific logic + match self.config.streaming_engine { + StreamingEngine::Flink => { + // debug!("Received message of length: {}", payload.len()); + + // let json_data = if self.config.decompress_json { + // // Decompress using gzip + // let mut decoder = GzDecoder::new(payload); + // let mut decompressed = Vec::new(); + // match decoder.read_to_end(&mut decompressed) { + // Ok(_) => { + // debug!( + // "Decompressed JSON message of length: {}", + // decompressed.len() + // ); + // decompressed + // } + // Err(e) => { + // error!("Error decompressing gzip data: {}", e); + // return Err(format!("Gzip decompression error: {e}").into()); + // } + // } + // } else { + // payload.to_vec() + // }; + + // let json_str = match String::from_utf8(json_data) { + // Ok(s) => s, + // Err(e) => { + // error!("Error converting bytes to UTF-8: {}", e); + // return Err(format!("UTF-8 conversion error: {e}").into()); + // } + // }; + + // let json_parse_start_time = Instant::now(); + + // let json_dict: serde_json::Value = match serde_json::from_str(&json_str) { + // Ok(dict) => { + // let json_parse_duration = json_parse_start_time.elapsed(); + // debug!( + // "JSON parsing took: {:.2}ms", + // json_parse_duration.as_secs_f64() * 1000.0 + // ); + // dict + // } + // Err(e) => { + // error!("Error parsing JSON: {}", e); + // debug!("JSON content: {}", json_str); + // return Err(format!("JSON parsing error: {e}").into()); + // } + // }; + + // debug!( + // "Deserializing JSON message: {}, {}, {}", + // json_dict + // .get("aggregation_id") + // .and_then(|v| v.as_u64()) + // .unwrap_or(0), + // json_dict + // .get("start_timestamp") + // .and_then(|v| v.as_u64()) + // .unwrap_or(0), + // json_dict + // .get("end_timestamp") + // .and_then(|v| v.as_u64()) + // .unwrap_or(0) + // ); + + // let deserialize_start_time = Instant::now(); + + // match PrecomputedOutput::deserialize_from_json_with_precompute(&json_dict) { + // Ok((output, precompute)) => { + // let deserialize_duration = deserialize_start_time.elapsed(); + // debug!( + // "Deserialization took: {:.2}ms", + // deserialize_duration.as_secs_f64() * 1000.0 + // ); + // debug!( + // "Deserialized item: {}, {}, {}", + // output.config.aggregation_id, + // output.start_timestamp, + // output.end_timestamp + // ); + // debug!("Successfully deserialized Flink JSON message with precompute data"); + // let total_message_duration = message_start_time.elapsed(); + // debug!( + // "Total message processing took: {:.2}ms", + // total_message_duration.as_secs_f64() * 1000.0 + // ); + // Ok(Some((output, precompute))) + // } + // Err(e) => { + // error!( + // "Error deserializing Flink PrecomputedOutput from JSON with precompute: {}", + // e + // ); + // debug!("JSON content: {}", json_str); + // Err(e) + // } + // } + error!("Flink input format with precompute not implemented"); + Err("Flink input format with precompute not implemented".into()) + } + StreamingEngine::Arroyo => { + // Arroyo messages - gzip decompression is applied at precompute level, not message level + let json_str = match String::from_utf8(payload.to_vec()) { + Ok(s) => s, + Err(e) => { + error!("Error converting bytes to UTF-8: {}", e); + return Err(format!("UTF-8 conversion error: {e}").into()); + } + }; + + let json_dict: serde_json::Value = match serde_json::from_str(&json_str) { + Ok(dict) => dict, + Err(e) => { + error!("Error parsing Arroyo JSON: {}", e); + debug!("JSON content: {}", json_str); + return Err(format!("JSON parsing error: {e}").into()); + } + }; + + let deserialize_start_time = Instant::now(); + match PrecomputedOutput::deserialize_from_json_arroyo( + &json_dict, + &self.streaming_config, + ) { + Ok((output, precompute)) => { + let deserialize_duration = deserialize_start_time.elapsed(); + debug!( + "Arroyo deserialization took: {:.2}ms", + deserialize_duration.as_secs_f64() * 1000.0 + ); + debug!("Successfully deserialized Arroyo JSON message with precompute data"); + let total_message_duration = message_start_time.elapsed(); + debug!( + "Total Arroyo message processing took: {:.2}ms", + total_message_duration.as_secs_f64() * 1000.0 + ); + Ok(Some((output, precompute))) + } + Err(e) => { + error!( + "Error deserializing Arroyo PrecomputedOutput from JSON with precompute: {e}" + ); + debug!("JSON content: {}", json_str); + Err(e) + } + } + } + } + } + } + } + + pub async fn stop(&mut self) -> Result<(), Box> { + info!("Stopping Kafka consumer"); + + // Flush precompute dumper if it exists + if let Some(ref mut dumper) = self.precompute_dumper { + if let Err(e) = dumper.flush() { + error!("Failed to flush precompute dumper on stop: {}", e); + } + } + + // The consumer will be dropped automatically + Ok(()) + } +} diff --git a/QueryEngineRust/src/drivers/ingest/mod.rs b/QueryEngineRust/src/drivers/ingest/mod.rs new file mode 100644 index 0000000..f300788 --- /dev/null +++ b/QueryEngineRust/src/drivers/ingest/mod.rs @@ -0,0 +1,9 @@ +pub mod kafka; +pub mod prometheus_remote_write; +pub mod victoriametrics_remote_write; + +pub use kafka::{KafkaConsumer, KafkaConsumerConfig}; +// pub use prometheus_remote_write::{PrometheusRemoteWriteConfig, PrometheusRemoteWriteServer}; +// pub use victoriametrics_remote_write::{ +// VictoriaMetricsRemoteWriteConfig, VictoriaMetricsRemoteWriteServer, +// }; diff --git a/QueryEngineRust/src/drivers/ingest/prometheus_remote_write.rs b/QueryEngineRust/src/drivers/ingest/prometheus_remote_write.rs new file mode 100644 index 0000000..428c9de --- /dev/null +++ b/QueryEngineRust/src/drivers/ingest/prometheus_remote_write.rs @@ -0,0 +1,492 @@ +// use axum::{body::Bytes, extract::State, http::StatusCode, routing::post, Router}; +// use prost::Message; +// use std::sync::Arc; +// use tokio::net::TcpListener; +// use tracing::{debug, error, info, warn}; + +// // use crate::stores::promsketch_store::metrics as ps_metrics; +// // use crate::stores::promsketch_store::PromSketchStore; + +// // --------------------------------------------------------------------------- +// // Protobuf message types (Prometheus remote write wire format) +// // --------------------------------------------------------------------------- +// // These mirror the upstream proto definitions in prometheus/prompb but are +// // defined inline via prost derive macros so we don't need a .proto file or +// // build script. + +// #[derive(Clone, PartialEq, Message)] +// pub struct WriteRequest { +// #[prost(message, repeated, tag = "1")] +// pub timeseries: Vec, +// } + +// #[derive(Clone, PartialEq, Message)] +// pub struct TimeSeries { +// #[prost(message, repeated, tag = "1")] +// pub labels: Vec