diff --git a/.github/workflows/accuracy_performance.yml b/.github/workflows/accuracy_performance.yml new file mode 100644 index 0000000..79eff1e --- /dev/null +++ b/.github/workflows/accuracy_performance.yml @@ -0,0 +1,162 @@ +name: PR Evaluation + +# NOTE: GitHub-hosted runners are noisy. Latency numbers are indicative only. +# For precise benchmarks, register a self-hosted runner once asap-tools infra +# is decoupled from Cloudlab. See PDF eval guide Phase 3. + +on: + pull_request: + branches: + - main + paths: + - 'asap-query-engine/**' + - 'asap-planner-rs/**' + - 'asap-summary-ingest/**' + - 'asap-quickstart/**' + - '.github/workflows/accuracy_performance.yml' + - 'benchmarks/**' + workflow_dispatch: + +permissions: + contents: read + packages: write + pull-requests: write + +jobs: + # --------------------------------------------------------------------------- + # Job 1: build images once from branch code and push with a SHA-based tag. + # All downstream jobs pull these images instead of rebuilding. + # --------------------------------------------------------------------------- + build: + name: Build CI images + runs-on: ubuntu-latest + outputs: + image-tag: ${{ steps.tag.outputs.value }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Compute image tag + id: tag + run: echo "value=sha-$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT + + - name: Build and push asap-planner-rs + uses: docker/build-push-action@v6 + with: + context: . + file: asap-planner-rs/Dockerfile + push: true + tags: ghcr.io/projectasap/asap-planner-rs:${{ steps.tag.outputs.value }} + cache-from: type=registry,ref=ghcr.io/projectasap/asap-planner-rs:buildcache + cache-to: type=registry,ref=ghcr.io/projectasap/asap-planner-rs:buildcache,mode=max + + - name: Build and push asap-summary-ingest + uses: docker/build-push-action@v6 + with: + context: asap-summary-ingest + file: asap-summary-ingest/Dockerfile + push: true + tags: ghcr.io/projectasap/asap-summary-ingest:${{ steps.tag.outputs.value }} + + - name: Build and push asap-query-engine + uses: docker/build-push-action@v6 + with: + context: . + file: asap-query-engine/Dockerfile + push: true + tags: ghcr.io/projectasap/asap-query-engine:${{ steps.tag.outputs.value }} + cache-from: type=registry,ref=ghcr.io/projectasap/asap-query-engine:buildcache + cache-to: type=registry,ref=ghcr.io/projectasap/asap-query-engine:buildcache,mode=max + + # --------------------------------------------------------------------------- + # Job 2: pull the images built above, deploy the full stack, and evaluate. + # --------------------------------------------------------------------------- + eval: + name: Full-stack PR evaluation + needs: build + runs-on: ubuntu-latest + timeout-minutes: 60 + env: + ASAP_IMAGE_TAG: ${{ needs.build.outputs.image-tag }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Pull and start full stack + run: | + docker compose \ + -f asap-quickstart/docker-compose.yml \ + -f benchmarks/docker-compose.yml \ + up -d + + - name: Show running containers + run: | + docker compose \ + -f asap-quickstart/docker-compose.yml \ + -f benchmarks/docker-compose.yml \ + ps + + - name: Wait for all services to be healthy + run: bash benchmarks/scripts/wait_for_stack.sh + + - name: Wait for pipeline and data ingestion + run: bash benchmarks/scripts/ingest_wait.sh + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Python dependencies + run: pip install requests + + - name: Run baseline queries (Prometheus) + run: python benchmarks/scripts/run_baseline.py + + - name: Run ASAP queries (query engine) + run: python benchmarks/scripts/run_asap.py + + - name: Compare results and evaluate + run: python benchmarks/scripts/compare.py + + - name: Upload evaluation reports + if: always() + uses: actions/upload-artifact@v4 + with: + name: eval-reports-${{ github.run_id }} + path: benchmarks/reports/ + + - name: Print docker logs on failure + if: failure() + run: | + docker compose \ + -f asap-quickstart/docker-compose.yml \ + -f benchmarks/docker-compose.yml \ + logs --no-color + + - name: Teardown stack + if: always() + run: | + docker compose \ + -f asap-quickstart/docker-compose.yml \ + -f benchmarks/docker-compose.yml \ + down -v diff --git a/.github/workflows/correctness.yml b/.github/workflows/correctness.yml new file mode 100644 index 0000000..7ba0178 --- /dev/null +++ b/.github/workflows/correctness.yml @@ -0,0 +1,168 @@ +name: Correctness Tests + +# Verifies cross-language parity between Python and Rust implementations of +# PromQL pattern matching and sketch serialisation. Ephemeral GitHub Actions +# VMs are well-suited for these deterministic correctness checks. + +on: + push: + branches: [ main ] + paths: + - 'asap-common/tests/**' + - 'asap-common/dependencies/**' + - 'asap-common/sketch-core/**' + - '.github/workflows/correctness.yml' + pull_request: + branches: [ main ] + paths: + - 'asap-common/tests/**' + - 'asap-common/dependencies/**' + - 'asap-common/sketch-core/**' + - '.github/workflows/correctness.yml' + workflow_dispatch: + +jobs: + # ── 1. Cross-language token matching (Python vs Rust) ────────────────────── + cross-language-token-matching: + name: Cross-language PromQL token matching + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + if [ -f asap-common/dependencies/py/requirements.txt ]; then + pip install -r asap-common/dependencies/py/requirements.txt + fi + pip install -e asap-common/dependencies/py/promql_utilities/ + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + + - name: Install protoc + run: | + sudo apt-get update -qq + sudo apt-get install -y protobuf-compiler + + - name: Run sccache + uses: mozilla-actions/sccache-action@v0.0.4 + + - name: Cache cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-correctness-${{ hashFiles('**/Cargo.lock', '**/Cargo.toml') }} + + - name: Run master test runner (Python + Rust + comparison) + working-directory: asap-common/tests/compare_matched_tokens + run: python utilities/master_test_runner.py + env: + RUSTC_WRAPPER: sccache + + - name: Upload test artefacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: cross-language-token-results + path: | + asap-common/tests/compare_matched_tokens/python_tests/python_test_results.json + asap-common/tests/compare_matched_tokens/rust_tests/rust_test_results.json + asap-common/tests/compare_matched_tokens/comparison_tests/comparison_report.json + asap-common/tests/compare_matched_tokens/test_summary.json + if-no-files-found: warn + + # ── 2. Cross-language serialised pattern comparison ──────────────────────── + cross-language-pattern-serialisation: + name: Cross-language pattern serialisation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + if [ -f asap-common/dependencies/py/requirements.txt ]; then + pip install -r asap-common/dependencies/py/requirements.txt + fi + pip install -e asap-common/dependencies/py/promql_utilities/ + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + + - name: Install protoc + run: | + sudo apt-get update -qq + sudo apt-get install -y protobuf-compiler + + - name: Run sccache + uses: mozilla-actions/sccache-action@v0.0.4 + + - name: Cache cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-correctness-${{ hashFiles('**/Cargo.lock', '**/Cargo.toml') }} + + - name: Generate Python patterns + working-directory: asap-common/tests/compare_patterns + run: python python_generate_patterns.py + + - name: Build and run Rust pattern generator + working-directory: asap-common/tests/compare_patterns + run: cargo run --release + env: + RUSTC_WRAPPER: sccache + + - name: Compare serialised patterns + working-directory: asap-common/tests/compare_patterns + run: python compare_serialized_patterns.py + + # ── 3. Rust pattern-matching unit tests ──────────────────────────────────── + rust-pattern-matching: + name: Rust pattern matching unit tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + + - name: Install protoc + run: | + sudo apt-get update -qq + sudo apt-get install -y protobuf-compiler + + - name: Run sccache + uses: mozilla-actions/sccache-action@v0.0.4 + + - name: Cache cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-correctness-${{ hashFiles('**/Cargo.lock', '**/Cargo.toml') }} + + - name: Run Rust pattern matching tests + working-directory: asap-common/tests/rust_pattern_matching + run: cargo test --release + env: + RUSTC_WRAPPER: sccache diff --git a/asap-common/.gitignore b/asap-common/.gitignore index 9e7080c..102b6ea 100644 --- a/asap-common/.gitignore +++ b/asap-common/.gitignore @@ -8,4 +8,5 @@ dependencies/py/promql_utilities/promql_utilities.egg-info/ dependencies/rs/**/target/ tests/**/*.json +!tests/**/test_data/*.json tests/**/target/ diff --git a/asap-common/tests/compare_matched_tokens/rust_tests/Cargo.toml b/asap-common/tests/compare_matched_tokens/rust_tests/Cargo.toml index e38e483..c4155dc 100644 --- a/asap-common/tests/compare_matched_tokens/rust_tests/Cargo.toml +++ b/asap-common/tests/compare_matched_tokens/rust_tests/Cargo.toml @@ -1,3 +1,5 @@ +[workspace] + [package] name = "promql_cross_lang_tests" version = "0.1.0" diff --git a/asap-common/tests/compare_matched_tokens/rust_tests/src/pattern_tests.rs b/asap-common/tests/compare_matched_tokens/rust_tests/src/pattern_tests.rs index fcc210f..6b0fef9 100644 --- a/asap-common/tests/compare_matched_tokens/rust_tests/src/pattern_tests.rs +++ b/asap-common/tests/compare_matched_tokens/rust_tests/src/pattern_tests.rs @@ -19,22 +19,11 @@ impl PatternTester { // Rate pattern PromQLPattern::new( Self::build_rate_pattern(), - vec![ - "metric".to_string(), - "function".to_string(), - "range_vector".to_string(), - ], // Some("ONLY_TEMPORAL".to_string()), ), // Quantile over time pattern PromQLPattern::new( Self::build_quantile_over_time_pattern(), - vec![ - "metric".to_string(), - "function".to_string(), - "range_vector".to_string(), - "function_args".to_string(), - ], // Some("ONLY_TEMPORAL".to_string()), ), ]; @@ -44,34 +33,31 @@ impl PatternTester { // Sum aggregation pattern PromQLPattern::new( Self::build_sum_pattern(), - vec!["metric".to_string(), "aggregation".to_string()], // Some("ONLY_SPATIAL".to_string()), ), // Simple metric pattern PromQLPattern::new( Self::build_metric_pattern(), - vec!["metric".to_string()], // Some("ONLY_SPATIAL".to_string()), ), ]; // ONE_TEMPORAL_ONE_SPATIAL patterns let combined_patterns = vec![ - // Sum of rate pattern + // Aggregation of single-arg temporal functions PromQLPattern::new( Self::build_one_temporal_one_spatial_pattern(), - vec![ - "metric".to_string(), - "function".to_string(), - "aggregation".to_string(), - "range_vector".to_string(), - ], + // Some("ONE_TEMPORAL_ONE_SPATIAL".to_string()), + ), + // Aggregation of quantile_over_time (2-arg) + PromQLPattern::new( + Self::build_combined_quantile_pattern(), // Some("ONE_TEMPORAL_ONE_SPATIAL".to_string()), ), ]; // Insert in order from simple to complex to avoid panics - patterns.insert("ONLY_VECTOR".to_string(), spatial_patterns.clone()); + // ONLY_VECTOR is derived via disambiguation in test_query, not a separate entry patterns.insert("ONLY_SPATIAL".to_string(), spatial_patterns); patterns.insert("ONLY_TEMPORAL".to_string(), temporal_patterns); patterns.insert("ONE_TEMPORAL_ONE_SPATIAL".to_string(), combined_patterns); @@ -280,7 +266,20 @@ impl PatternTester { let args: Vec>> = vec![ms]; - PromQLPatternBuilder::function(vec!["rate", "increase"], args, Some("function"), None) + PromQLPatternBuilder::function( + vec![ + "rate", + "increase", + "avg_over_time", + "sum_over_time", + "count_over_time", + "min_over_time", + "max_over_time", + ], + args, + Some("function"), + None, + ) } fn build_quantile_over_time_pattern() -> Option> { @@ -327,7 +326,6 @@ impl PatternTester { let func = PromQLPatternBuilder::function( vec![ - "quantile_over_time", "sum_over_time", "count_over_time", "avg_over_time", @@ -351,6 +349,30 @@ impl PatternTester { ) } + fn build_combined_quantile_pattern() -> Option> { + let num = PromQLPatternBuilder::number(None, None); + let ms = PromQLPatternBuilder::matrix_selector( + PromQLPatternBuilder::metric(None, None, None, Some("metric")), + None, + Some("range_vector"), + ); + let func_args: Vec>> = vec![num, ms]; + let func = PromQLPatternBuilder::function( + vec!["quantile_over_time"], + func_args, + Some("function"), + None, + ); + PromQLPatternBuilder::aggregation( + vec!["sum", "count", "avg", "quantile", "min", "max"], + func, + None, + None, + None, + Some("aggregation"), + ) + } + fn build_sum_rate_pattern() -> Option> { let ms = PromQLPatternBuilder::matrix_selector( PromQLPatternBuilder::metric(None, None, None, Some("metric")), diff --git a/asap-common/tests/compare_matched_tokens/test_data/promql_queries.json b/asap-common/tests/compare_matched_tokens/test_data/promql_queries.json new file mode 100644 index 0000000..0d03af4 --- /dev/null +++ b/asap-common/tests/compare_matched_tokens/test_data/promql_queries.json @@ -0,0 +1,228 @@ +{ + "test_cases": [ + { + "id": "temporal_rate_basic", + "description": "Basic rate function over a time range", + "query": "rate(http_requests_total{job=\"api\"}[5m])", + "expected_pattern_type": "ONLY_TEMPORAL", + "expected_tokens": { + "metric": { + "name": "http_requests_total", + "labels": {"job": "api"}, + "at_modifier": null + }, + "function": { + "name": "rate" + }, + "range_vector": { + "range": "5m" + } + } + }, + { + "id": "temporal_increase_basic", + "description": "Increase function over a time range", + "query": "increase(http_requests_total[1h])", + "expected_pattern_type": "ONLY_TEMPORAL", + "expected_tokens": { + "metric": { + "name": "http_requests_total", + "labels": {}, + "at_modifier": null + }, + "function": { + "name": "increase" + }, + "range_vector": { + "range": "1h" + } + } + }, + { + "id": "temporal_quantile_over_time", + "description": "Quantile over time function", + "query": "quantile_over_time(0.95, cpu_usage{instance=\"host1\"}[10m])", + "expected_pattern_type": "ONLY_TEMPORAL", + "expected_tokens": { + "metric": { + "name": "cpu_usage", + "labels": {"instance": "host1"}, + "at_modifier": null + }, + "function": { + "name": "quantile_over_time" + }, + "range_vector": { + "range": "10m" + } + } + }, + { + "id": "temporal_avg_over_time", + "description": "Average over time function", + "query": "avg_over_time(memory_bytes[30m])", + "expected_pattern_type": "ONLY_TEMPORAL", + "expected_tokens": { + "metric": { + "name": "memory_bytes", + "labels": {}, + "at_modifier": null + }, + "function": { + "name": "avg_over_time" + }, + "range_vector": { + "range": "30m" + } + } + }, + { + "id": "spatial_sum_aggregation", + "description": "Sum aggregation across all series", + "query": "sum(http_requests_total{job=\"api\"})", + "expected_pattern_type": "ONLY_SPATIAL", + "expected_tokens": { + "metric": { + "name": "http_requests_total", + "labels": {"job": "api"}, + "at_modifier": null + }, + "aggregation": { + "op": "sum", + "modifier": null + } + } + }, + { + "id": "spatial_avg_aggregation", + "description": "Average aggregation by label", + "query": "avg by (instance) (cpu_usage)", + "expected_pattern_type": "ONLY_SPATIAL", + "expected_tokens": { + "metric": { + "name": "cpu_usage", + "labels": {}, + "at_modifier": null + }, + "aggregation": { + "op": "avg", + "modifier": null + } + } + }, + { + "id": "spatial_count_aggregation", + "description": "Count aggregation", + "query": "count(up{job=\"node\"})", + "expected_pattern_type": "ONLY_SPATIAL", + "expected_tokens": { + "metric": { + "name": "up", + "labels": {"job": "node"}, + "at_modifier": null + }, + "aggregation": { + "op": "count", + "modifier": null + } + } + }, + { + "id": "combined_sum_of_rate", + "description": "Sum aggregation of rate (temporal + spatial)", + "query": "sum(rate(http_requests_total{job=\"api\"}[5m]))", + "expected_pattern_type": "ONE_TEMPORAL_ONE_SPATIAL", + "expected_tokens": { + "metric": { + "name": "http_requests_total", + "labels": {"job": "api"}, + "at_modifier": null + }, + "function": { + "name": "rate" + }, + "aggregation": { + "op": "sum", + "modifier": null + }, + "range_vector": { + "range": "5m" + } + } + }, + { + "id": "combined_avg_of_quantile_over_time", + "description": "Avg aggregation of quantile_over_time (temporal + spatial)", + "query": "avg(quantile_over_time(0.99, response_time_seconds[15m]))", + "expected_pattern_type": "ONE_TEMPORAL_ONE_SPATIAL", + "expected_tokens": { + "metric": { + "name": "response_time_seconds", + "labels": {}, + "at_modifier": null + }, + "function": { + "name": "quantile_over_time" + }, + "aggregation": { + "op": "avg", + "modifier": null + }, + "range_vector": { + "range": "15m" + } + } + }, + { + "id": "combined_sum_of_avg_over_time", + "description": "Sum aggregation of avg_over_time", + "query": "sum by (job) (avg_over_time(memory_bytes{env=\"prod\"}[1h]))", + "expected_pattern_type": "ONE_TEMPORAL_ONE_SPATIAL", + "expected_tokens": { + "metric": { + "name": "memory_bytes", + "labels": {"env": "prod"}, + "at_modifier": null + }, + "function": { + "name": "avg_over_time" + }, + "aggregation": { + "op": "sum", + "modifier": null + }, + "range_vector": { + "range": "1h" + } + } + } + ], + "pattern_builder_tests": [ + { + "id": "builder_metric_no_labels", + "description": "Build a simple metric selector with no labels", + "builder_call": "metric", + "parameters": { + "collect_as": "metric" + }, + "expected_pattern": { + "type": "metric", + "collect_as": "metric" + } + }, + { + "id": "builder_function_rate", + "description": "Build a rate function pattern", + "builder_call": "function", + "parameters": { + "names": ["rate", "increase"], + "collect_as": "function" + }, + "expected_pattern": { + "type": "function", + "names": ["rate", "increase"], + "collect_as": "function" + } + } + ] +} diff --git a/asap-common/tests/compare_patterns/Cargo.toml b/asap-common/tests/compare_patterns/Cargo.toml index 8d04a6f..3f94a69 100644 --- a/asap-common/tests/compare_patterns/Cargo.toml +++ b/asap-common/tests/compare_patterns/Cargo.toml @@ -1,3 +1,5 @@ +[workspace] + [package] name = "compare_patterns_runner" version = "0.1.0" diff --git a/asap-common/tests/compare_patterns/src/main.rs b/asap-common/tests/compare_patterns/src/main.rs index 6c429f1..d75fa8b 100644 --- a/asap-common/tests/compare_patterns/src/main.rs +++ b/asap-common/tests/compare_patterns/src/main.rs @@ -25,7 +25,6 @@ fn main() { let pattern_1 = PromQLPatternBuilder::function(vec!["rate", "increase"], func_args1, Some("function"), None); let pattern1 = PromQLPattern::new( pattern_1, - vec!["metric".to_string(), "function".to_string(), "range_vector".to_string()], ); if let Some(ast) = pattern1.ast_pattern { only_temporal_patterns.push(serde_json::Value::Object(ast.into_iter().collect())); @@ -44,7 +43,6 @@ fn main() { let pattern_2 = PromQLPatternBuilder::function(vec!["quantile_over_time"], func_args2, Some("function"), Some("function_args")); let pattern2 = PromQLPattern::new( pattern_2, - vec!["metric".to_string(), "function".to_string(), "range_vector".to_string(), "function_args".to_string()], ); if let Some(ast) = pattern2.ast_pattern { only_temporal_patterns.push(serde_json::Value::Object(ast.into_iter().collect())); @@ -66,7 +64,6 @@ fn main() { ); let pattern3 = PromQLPattern::new( pattern_3, - vec!["metric".to_string(), "aggregation".to_string()], ); if let Some(ast) = pattern3.ast_pattern { only_spatial_patterns.push(serde_json::Value::Object(ast.into_iter().collect())); @@ -76,7 +73,6 @@ fn main() { let pattern_4 = PromQLPatternBuilder::metric(None, None, None, Some("metric")); let pattern4 = PromQLPattern::new( pattern_4, - vec!["metric".to_string()], ); if let Some(ast) = pattern4.ast_pattern { only_spatial_patterns.push(serde_json::Value::Object(ast.into_iter().collect())); @@ -108,7 +104,6 @@ fn main() { ); let pattern5 = PromQLPattern::new( pattern_5, - vec!["metric".to_string(), "range_vector".to_string(), "function".to_string(), "function_args".to_string(), "aggregation".to_string()], ); if let Some(ast) = pattern5.ast_pattern { one_temporal_one_spatial_patterns.push(serde_json::Value::Object(ast.into_iter().collect())); @@ -137,7 +132,6 @@ fn main() { ); let pattern6 = PromQLPattern::new( pattern_6, - vec!["metric".to_string(), "range_vector".to_string(), "function".to_string(), "aggregation".to_string()], ); if let Some(ast) = pattern6.ast_pattern { one_temporal_one_spatial_patterns.push(serde_json::Value::Object(ast.into_iter().collect())); diff --git a/asap-common/tests/rust_pattern_matching/Cargo.toml b/asap-common/tests/rust_pattern_matching/Cargo.toml index 14b2980..571641e 100644 --- a/asap-common/tests/rust_pattern_matching/Cargo.toml +++ b/asap-common/tests/rust_pattern_matching/Cargo.toml @@ -1,3 +1,5 @@ +[workspace] + [package] name = "promql_cross_lang_tests" version = "0.1.0" diff --git a/asap-common/tests/rust_pattern_matching/src/main.rs b/asap-common/tests/rust_pattern_matching/src/main.rs index 3a540af..4a29352 100644 --- a/asap-common/tests/rust_pattern_matching/src/main.rs +++ b/asap-common/tests/rust_pattern_matching/src/main.rs @@ -10,11 +10,6 @@ fn temporal_pattern( ) -> PromQLPattern { PromQLPattern::new( blocks[pattern_type].clone(), - vec![ - "metric".to_string(), - "function".to_string(), - "range_vector".to_string(), - ], ) } @@ -24,7 +19,6 @@ fn spatial_pattern( ) -> PromQLPattern { PromQLPattern::new( blocks[pattern_type].clone(), - vec!["metric".to_string(), "aggregation".to_string()], ) } @@ -39,12 +33,6 @@ fn spatial_of_temporal_pattern(temporal_block: &Option>) ); PromQLPattern::new( pattern, - vec![ - "metric".to_string(), - "function".to_string(), - "range_vector".to_string(), - "aggregation".to_string(), - ], ) } diff --git a/asap-summary-ingest/Dockerfile b/asap-summary-ingest/Dockerfile index 8432840..7a79036 100644 --- a/asap-summary-ingest/Dockerfile +++ b/asap-summary-ingest/Dockerfile @@ -1,4 +1,4 @@ -FROM sketchdb-base:latest +FROM ghcr.io/projectasap/asap-base:latest LABEL maintainer="SketchDB Team" LABEL description="ArroyoSketch pipeline configuration service" diff --git a/benchmarks/docker-compose.yml b/benchmarks/docker-compose.yml new file mode 100644 index 0000000..fea9921 --- /dev/null +++ b/benchmarks/docker-compose.yml @@ -0,0 +1,20 @@ +# CI image override: replaces quickstart's pinned release images with images +# built from the current branch. Intended for use as a Compose override: +# +# ASAP_IMAGE_TAG=sha- docker compose \ +# --project-directory . \ +# -f asap-quickstart/docker-compose.yml \ +# -f benchmarks/docker-compose.yml \ +# up -d +# +# ASAP_IMAGE_TAG is set automatically by the 'build' job in accuracy_performance.yml. + +services: + asap-planner-rs: + image: ghcr.io/projectasap/asap-planner-rs:${ASAP_IMAGE_TAG} + + asap-summary-ingest: + image: ghcr.io/projectasap/asap-summary-ingest:${ASAP_IMAGE_TAG} + + queryengine: + image: ghcr.io/projectasap/asap-query-engine:${ASAP_IMAGE_TAG} diff --git a/benchmarks/golden/.gitkeep b/benchmarks/golden/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/queries/promql_suite.json b/benchmarks/queries/promql_suite.json new file mode 100644 index 0000000..52b525e --- /dev/null +++ b/benchmarks/queries/promql_suite.json @@ -0,0 +1,18 @@ +{ + "queries": [ + {"id": "avg_all", "expr": "avg(sensor_reading)", "asap_native": false}, + {"id": "sum_all", "expr": "sum(sensor_reading)", "asap_native": false}, + {"id": "max_all", "expr": "max(sensor_reading)", "asap_native": false}, + {"id": "min_all", "expr": "min(sensor_reading)", "asap_native": false}, + {"id": "q50_all", "expr": "quantile(0.50, sensor_reading)", "asap_native": true}, + {"id": "q90_all", "expr": "quantile(0.90, sensor_reading)", "asap_native": true}, + {"id": "q95_all", "expr": "quantile(0.95, sensor_reading)", "asap_native": true}, + {"id": "q99_all", "expr": "quantile(0.99, sensor_reading)", "asap_native": true}, + {"id": "q95_by_pattern", "expr": "quantile by (pattern) (0.95, sensor_reading)", "asap_native": true}, + {"id": "q99_by_pattern", "expr": "quantile by (pattern) (0.99, sensor_reading)", "asap_native": true}, + {"id": "q50_by_pattern", "expr": "quantile by (pattern) (0.50, sensor_reading)", "asap_native": true}, + {"id": "avg_by_pattern", "expr": "avg by (pattern) (sensor_reading)", "asap_native": false}, + {"id": "sum_by_region", "expr": "sum by (region) (sensor_reading)", "asap_native": false}, + {"id": "max_by_service", "expr": "max by (service) (sensor_reading)", "asap_native": false} + ] +} diff --git a/benchmarks/reports/.gitkeep b/benchmarks/reports/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/scripts/compare.py b/benchmarks/scripts/compare.py new file mode 100644 index 0000000..2f901d9 --- /dev/null +++ b/benchmarks/scripts/compare.py @@ -0,0 +1,313 @@ +#!/usr/bin/env python3 +"""compare.py — compares baseline (Prometheus) vs ASAP query engine results. + +Applies pass/fail policy: + - FAIL if any query returned an error in ASAP (query failure) + - WARN if any ASAP-native query has relative error > max_error (default 5%) + - WARN (not FAIL) on >10% p95 latency regression (GH runner noise) + +Generates benchmarks/reports/eval_report.md and prints it to stdout. +Exits 1 if any PASS/FAIL check failed. + +Usage: + python benchmarks/scripts/compare.py \ + [--baseline FILE] \ + [--asap FILE] \ + [--output FILE] \ + [--max-error FLOAT] +""" + +import argparse +import json +import os +import sys +from datetime import datetime, timezone + +DEFAULT_BASELINE = os.path.join( + os.path.dirname(__file__), "..", "reports", "baseline_results.json" +) +DEFAULT_ASAP = os.path.join( + os.path.dirname(__file__), "..", "reports", "asap_results.json" +) +DEFAULT_OUTPUT = os.path.join( + os.path.dirname(__file__), "..", "reports", "eval_report.md" +) +DEFAULT_MAX_ERROR = 0.05 # 5 % +LATENCY_REGRESSION_THRESHOLD = 0.10 # 10 % — warn only + + +# ── helpers ────────────────────────────────────────────────────────────────── + +def percentile(values: list[float], pct: float) -> float: + """Compute percentile (0-100) from a list of floats using linear interpolation.""" + if not values: + return float("nan") + sorted_vals = sorted(values) + n = len(sorted_vals) + if n == 1: + return sorted_vals[0] + idx = (pct / 100.0) * (n - 1) + lo = int(idx) + hi = lo + 1 + if hi >= n: + return sorted_vals[-1] + frac = idx - lo + return sorted_vals[lo] * (1 - frac) + sorted_vals[hi] * frac + + +def valid_latencies(latencies: list) -> list[float]: + """Strip None entries (failed HTTP requests) from a latency list.""" + return [x for x in latencies if x is not None] + + +def label_key(metric: dict) -> str: + """Canonical sort key for a Prometheus metric label set.""" + return json.dumps(metric, sort_keys=True) + + +def extract_scalar_value(result: list) -> float | None: + """Return a single float from a scalar/single-entry vector result.""" + if not result: + return None + entry = result[0] + try: + return float(entry["value"][1]) + except (KeyError, IndexError, ValueError, TypeError): + return None + + +def compute_relative_error(baseline_val: float, asap_val: float) -> float: + """Relative error: |asap - baseline| / max(|baseline|, 1e-9).""" + denom = max(abs(baseline_val), 1e-9) + return abs(asap_val - baseline_val) / denom + + +def compare_results( + baseline_result: list, asap_result: list +) -> tuple[float | None, str]: + """ + Compare two Prometheus vector result arrays. + + Returns (max_relative_error, note). + For grouped results, matches entries by label set. + """ + if not baseline_result and not asap_result: + return 0.0, "both empty" + if not baseline_result: + return None, "baseline empty" + if not asap_result: + return None, "asap empty" + + # Build label-key → float maps + baseline_map: dict[str, float] = {} + for entry in baseline_result: + key = label_key(entry.get("metric", {})) + try: + baseline_map[key] = float(entry["value"][1]) + except (KeyError, IndexError, ValueError, TypeError): + pass + + asap_map: dict[str, float] = {} + for entry in asap_result: + key = label_key(entry.get("metric", {})) + try: + asap_map[key] = float(entry["value"][1]) + except (KeyError, IndexError, ValueError, TypeError): + pass + + # Single-value result (no labels / collapsed) + if len(baseline_map) == 1 and len(asap_map) == 1: + bv = next(iter(baseline_map.values())) + av = next(iter(asap_map.values())) + return compute_relative_error(bv, av), "" + + # Multi-value: match by label set + max_err = 0.0 + missing = [] + for key, bv in baseline_map.items(): + if key not in asap_map: + missing.append(key) + continue + err = compute_relative_error(bv, asap_map[key]) + max_err = max(max_err, err) + + note = f"{len(missing)} label set(s) missing from ASAP" if missing else "" + return max_err, note + + +# ── main ───────────────────────────────────────────────────────────────────── + +def main() -> None: + parser = argparse.ArgumentParser(description="Compare baseline vs ASAP results") + parser.add_argument("--baseline", default=DEFAULT_BASELINE, help="Baseline JSON file") + parser.add_argument("--asap", default=DEFAULT_ASAP, help="ASAP results JSON file") + parser.add_argument("--output", default=DEFAULT_OUTPUT, help="Output Markdown file") + parser.add_argument( + "--max-error", + type=float, + default=DEFAULT_MAX_ERROR, + help="Max relative error for ASAP-native queries (default: 0.01 = 1%%)", + ) + args = parser.parse_args() + + with open(args.baseline) as f: + baseline_data = json.load(f) + with open(args.asap) as f: + asap_data = json.load(f) + + baseline_results = baseline_data["results"] + asap_results = asap_data["results"] + + # Collect all query IDs (union) + all_ids = sorted(set(list(baseline_results.keys()) + list(asap_results.keys()))) + + failures: list[str] = [] + warnings: list[str] = [] + rows: list[dict] = [] + + for qid in all_ids: + b = baseline_results.get(qid, {}) + a = asap_results.get(qid, {}) + + asap_native = a.get("asap_native", False) + asap_status = a.get("status", "missing") + asap_error = a.get("error") + + # Latency p95 + b_lats = valid_latencies(b.get("latencies_ms", [])) + a_lats = valid_latencies(a.get("latencies_ms", [])) + b_p95 = percentile(b_lats, 95) if b_lats else float("nan") + a_p95 = percentile(a_lats, 95) if a_lats else float("nan") + + # Relative error + rel_error: float | None = None + note = "" + if asap_status == "success" and b.get("status") == "success": + rel_error, note = compare_results( + b.get("data", []), a.get("data", []) + ) + + # Pass/fail determination + row_status = "PASS" + + if asap_status == "error" or asap_status == "missing": + row_status = "FAIL" + reason = asap_error or "query not present in ASAP results" + failures.append(f"{qid}: ASAP query failed — {reason}") + + elif asap_native and rel_error is not None and rel_error > args.max_error: + row_status = "WARN" + warnings.append( + f"{qid}: relative error {rel_error:.4f} > threshold {args.max_error:.4f} — informational only" + ) + + # Latency regression — warn only + if ( + not (b_p95 != b_p95) # not NaN + and not (a_p95 != a_p95) + and b_p95 > 0 + ): + latency_regression = (a_p95 - b_p95) / b_p95 + if latency_regression > LATENCY_REGRESSION_THRESHOLD: + warnings.append( + f"{qid}: p95 latency regression {latency_regression:.1%} " + f"(baseline={b_p95:.1f}ms, asap={a_p95:.1f}ms) — " + "GH runner noise expected; informational only" + ) + + rows.append( + { + "id": qid, + "asap_native": asap_native, + "b_p95": b_p95, + "a_p95": a_p95, + "rel_error": rel_error, + "status": row_status, + "note": note, + } + ) + + overall = "PASS" if not failures else "FAIL" + + # ── Build Markdown report ──────────────────────────────────────────────── + now = datetime.now(timezone.utc).isoformat() + lines: list[str] = [] + lines.append("# ASAP PR Evaluation Report") + lines.append("") + lines.append(f"**Generated:** {now}") + lines.append( + f"**Overall verdict:** {'✅ PASS' if overall == 'PASS' else '❌ FAIL'}" + ) + lines.append(f"**Max error threshold:** {args.max_error:.2%}") + lines.append("") + + # Summary table + lines.append( + "| Query | ASAP Native | Baseline p95 (ms) | ASAP p95 (ms) | Rel Error | Status |" + ) + lines.append("|-------|:-----------:|:-----------------:|:-------------:|:---------:|:------:|") + for row in rows: + native_mark = "yes" if row["asap_native"] else "no" + b_p95_s = f"{row['b_p95']:.1f}" if row["b_p95"] == row["b_p95"] else "n/a" + a_p95_s = f"{row['a_p95']:.1f}" if row["a_p95"] == row["a_p95"] else "n/a" + if row["rel_error"] is None: + rel_err_s = "n/a" + else: + rel_err_s = f"{row['rel_error']:.4f}" + status_s = "✅ PASS" if row["status"] == "PASS" else "❌ FAIL" + note_s = f" ({row['note']})" if row["note"] else "" + lines.append( + f"| {row['id']}{note_s} | {native_mark} | {b_p95_s} | {a_p95_s} " + f"| {rel_err_s} | {status_s} |" + ) + + lines.append("") + + if failures: + lines.append("## Failures") + lines.append("") + for f_msg in failures: + lines.append(f"- {f_msg}") + lines.append("") + + if warnings: + lines.append("## Latency Warnings (informational)") + lines.append("") + lines.append( + "> **Note:** GitHub-hosted runners exhibit significant timing noise. " + "Latency numbers are indicative only. For precise benchmarks, register " + "a self-hosted runner once asap-tools infra is decoupled from Cloudlab " + "(see PDF eval guide Phase 3)." + ) + lines.append("") + for w in warnings: + lines.append(f"- {w}") + lines.append("") + + lines.append("---") + lines.append( + "_This report was generated automatically by `benchmarks/scripts/compare.py`._" + ) + + report = "\n".join(lines) + print(report) + + os.makedirs(os.path.dirname(os.path.abspath(args.output)), exist_ok=True) + with open(args.output, "w") as f: + f.write(report + "\n") + print(f"\n[compare] Report saved to {args.output}", file=sys.stderr) + + if overall == "FAIL": + print( + f"\n[compare] Evaluation FAILED. {len(failures)} check(s) failed:", + file=sys.stderr, + ) + for msg in failures: + print(f" - {msg}", file=sys.stderr) + sys.exit(1) + else: + print("\n[compare] Evaluation PASSED.", file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/scripts/ingest_wait.sh b/benchmarks/scripts/ingest_wait.sh new file mode 100644 index 0000000..8b12d3a --- /dev/null +++ b/benchmarks/scripts/ingest_wait.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ingest_wait.sh — waits for the asap-demo Arroyo pipeline to reach RUNNING +# state, then sleeps to allow sketches to accumulate before verifying that the +# query engine has ingested data. + +ARROYO_URL="http://localhost:5115/api/v1/pipelines" +QE_URL="http://localhost:8088/api/v1/query" +PIPELINE_NAME="asap-demo" +MAX_PIPELINE_WAIT=600 # seconds — Arroyo must compile Rust UDFs; allow extra time +ACCUMULATE_SLEEP=90 # seconds after pipeline is running +SLEEP=5 + +# ── 1. Wait for asap-demo pipeline to reach RUNNING ───────────────────────── +echo "[ingest_wait] Waiting for Arroyo pipeline '${PIPELINE_NAME}' to reach RUNNING state ..." +elapsed=0 +while true; do + state=$(curl -sf --max-time 10 "${ARROYO_URL}" 2>/dev/null \ + | python3 -c " +import sys, json +try: + data = json.load(sys.stdin) + # 'data' key may be null when no pipelines exist; use 'or []' to handle that + pipelines = data if isinstance(data, list) else (data.get('data') or []) + for p in pipelines: + name = str(p.get('name') or p.get('id') or '') + # Normalise hyphens/underscores so 'asap-demo' matches 'asap_demo' + if '${PIPELINE_NAME}'.replace('-', '_') in name.replace('-', '_'): + state = p.get('state') + stop = p.get('stop', '') + # Arroyo signals a running pipeline via state=null and stop='none' + if state is None and stop == 'none': + print('Running') + elif state is not None: + print(str(state)) + else: + print('stopped') + break + else: + # No matching pipeline found yet — print nothing so caller retries + pass +except Exception: + pass +" 2>/dev/null || true) + + if [ "${state}" = "Running" ] || [ "${state}" = "RUNNING" ] || [ "${state}" = "running" ]; then + echo "[ingest_wait] Pipeline '${PIPELINE_NAME}' is RUNNING (${elapsed}s elapsed)" + break + fi + + if [ "${elapsed}" -ge "${MAX_PIPELINE_WAIT}" ]; then + echo "[ingest_wait] ERROR: Pipeline '${PIPELINE_NAME}' did not reach RUNNING within ${MAX_PIPELINE_WAIT}s (last state: '${state:-unknown}')" >&2 + # Dump pipeline list for diagnosis + echo "[ingest_wait] Current Arroyo pipeline list:" >&2 + curl -sf --max-time 10 "${ARROYO_URL}" 2>/dev/null | python3 -m json.tool 2>/dev/null >&2 || true + exit 1 + fi + + echo "[ingest_wait] Pipeline state: '${state:-not found}' — retrying in ${SLEEP}s (${elapsed}s elapsed) ..." + sleep "${SLEEP}" + elapsed=$(( elapsed + SLEEP )) +done + +# ── 2. Allow sketches to accumulate ───────────────────────────────────────── +echo "[ingest_wait] Pipeline running. Sleeping ${ACCUMULATE_SLEEP}s for sketches to accumulate ..." +sleep "${ACCUMULATE_SLEEP}" + +# ── 3. Verify query engine has data ───────────────────────────────────────── +echo "[ingest_wait] Verifying query engine has data ..." +response=$(curl -sf --max-time 10 \ + "${QE_URL}?query=avg%28sensor_reading%29" 2>/dev/null || true) + +if [ -z "${response}" ]; then + echo "[ingest_wait] ERROR: Query engine returned empty response." >&2 + exit 1 +fi + +result_count=$(echo "${response}" | python3 -c " +import sys, json +data = json.load(sys.stdin) +result = data.get('data', {}).get('result', []) +print(len(result)) +" 2>/dev/null || echo "0") + +if [ "${result_count}" -eq 0 ]; then + echo "[ingest_wait] ERROR: Query engine has no data yet (result array is empty)." >&2 + exit 1 +fi + +echo "[ingest_wait] Query engine has data (${result_count} result entries). Ready for benchmarking." diff --git a/benchmarks/scripts/run_asap.py b/benchmarks/scripts/run_asap.py new file mode 100644 index 0000000..10747cb --- /dev/null +++ b/benchmarks/scripts/run_asap.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +"""run_asap.py — queries ASAPQuery engine for each entry in promql_suite.json. + +Runs each query 3 times to gather latency samples, then writes results to +benchmarks/reports/asap_results.json. + +Usage: + python benchmarks/scripts/run_asap.py \ + [--asap-url URL] \ + [--output FILE] +""" + +import argparse +import json +import os +import time +import urllib.parse +from datetime import datetime, timezone + +import requests + +SUITE_PATH = os.path.join( + os.path.dirname(__file__), "..", "queries", "promql_suite.json" +) +DEFAULT_OUTPUT = os.path.join( + os.path.dirname(__file__), "..", "reports", "asap_results.json" +) +RUNS_PER_QUERY = 3 + + +def query_asap(base_url: str, expr: str, ts: float) -> tuple[dict, float]: + """Issue a single instant query to the ASAP query engine, return (parsed_json, latency_ms).""" + encoded = urllib.parse.quote(expr, safe="") + url = f"{base_url}/api/v1/query?query={encoded}&time={ts}" + t0 = time.monotonic() + resp = requests.get(url, timeout=30) + latency_ms = (time.monotonic() - t0) * 1000.0 + resp.raise_for_status() + return resp.json(), latency_ms + + +def main() -> None: + parser = argparse.ArgumentParser(description="Run ASAP query engine benchmark queries") + parser.add_argument( + "--asap-url", + default="http://localhost:8088", + help="ASAP query engine base URL (default: http://localhost:8088)", + ) + parser.add_argument( + "--output", + default=DEFAULT_OUTPUT, + help="Output JSON file path", + ) + args = parser.parse_args() + + with open(SUITE_PATH) as f: + suite = json.load(f) + + results: dict[str, dict] = {} + now = time.time() + + for q in suite["queries"]: + qid = q["id"] + expr = q["expr"] + asap_native = q.get("asap_native", False) + latencies = [] + last_data = [] + last_error = None + last_status = "success" + + print(f"[asap] Running query '{qid}' (native={asap_native}): {expr}") + for run in range(1, RUNS_PER_QUERY + 1): + try: + payload, lat = query_asap(args.asap_url, expr, now) + latencies.append(lat) + if payload.get("status") == "success": + last_data = payload.get("data", {}).get("result", []) + last_status = "success" + last_error = None + else: + last_status = "error" + last_error = payload.get("error", "unknown error") + last_data = [] + print(f" run {run}/{RUNS_PER_QUERY}: {lat:.1f} ms status={last_status}") + except Exception as exc: # noqa: BLE001 + last_status = "error" + last_error = str(exc) + last_data = [] + latencies.append(None) + print(f" run {run}/{RUNS_PER_QUERY}: ERROR — {exc}") + + results[qid] = { + "status": last_status, + "asap_native": asap_native, + "latencies_ms": latencies, + "data": last_data, + "error": last_error, + } + + output = { + "timestamp": datetime.now(timezone.utc).isoformat(), + "asap_url": args.asap_url, + "results": results, + } + + os.makedirs(os.path.dirname(os.path.abspath(args.output)), exist_ok=True) + with open(args.output, "w") as f: + json.dump(output, f, indent=2) + print(f"\n[asap] Results saved to {args.output}") + + +if __name__ == "__main__": + main() diff --git a/benchmarks/scripts/run_baseline.py b/benchmarks/scripts/run_baseline.py new file mode 100644 index 0000000..6e09e41 --- /dev/null +++ b/benchmarks/scripts/run_baseline.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +"""run_baseline.py — queries Prometheus for each entry in promql_suite.json. + +Runs each query 3 times to gather latency samples, then writes results to +benchmarks/reports/baseline_results.json. + +Usage: + python benchmarks/scripts/run_baseline.py \ + [--prometheus-url URL] \ + [--output FILE] +""" + +import argparse +import json +import os +import time +import urllib.parse +from datetime import datetime, timezone + +import requests + +SUITE_PATH = os.path.join( + os.path.dirname(__file__), "..", "queries", "promql_suite.json" +) +DEFAULT_OUTPUT = os.path.join( + os.path.dirname(__file__), "..", "reports", "baseline_results.json" +) +RUNS_PER_QUERY = 3 + + +def query_prometheus(base_url: str, expr: str, ts: float) -> tuple[dict, float]: + """Issue a single instant query, return (parsed_json, latency_ms).""" + encoded = urllib.parse.quote(expr, safe="") + url = f"{base_url}/api/v1/query?query={encoded}&time={ts}" + t0 = time.monotonic() + resp = requests.get(url, timeout=30) + latency_ms = (time.monotonic() - t0) * 1000.0 + resp.raise_for_status() + return resp.json(), latency_ms + + +def main() -> None: + parser = argparse.ArgumentParser(description="Run baseline Prometheus queries") + parser.add_argument( + "--prometheus-url", + default="http://localhost:9090", + help="Prometheus base URL (default: http://localhost:9090)", + ) + parser.add_argument( + "--output", + default=DEFAULT_OUTPUT, + help="Output JSON file path", + ) + args = parser.parse_args() + + with open(SUITE_PATH) as f: + suite = json.load(f) + + results: dict[str, dict] = {} + now = time.time() + + for q in suite["queries"]: + qid = q["id"] + expr = q["expr"] + latencies = [] + last_data = [] + last_error = None + last_status = "success" + + print(f"[baseline] Running query '{qid}': {expr}") + for run in range(1, RUNS_PER_QUERY + 1): + try: + payload, lat = query_prometheus(args.prometheus_url, expr, now) + latencies.append(lat) + if payload.get("status") == "success": + last_data = payload.get("data", {}).get("result", []) + last_status = "success" + last_error = None + else: + last_status = "error" + last_error = payload.get("error", "unknown error") + last_data = [] + print(f" run {run}/{RUNS_PER_QUERY}: {lat:.1f} ms status={last_status}") + except Exception as exc: # noqa: BLE001 + last_status = "error" + last_error = str(exc) + last_data = [] + latencies.append(None) + print(f" run {run}/{RUNS_PER_QUERY}: ERROR — {exc}") + + results[qid] = { + "status": last_status, + "latencies_ms": latencies, + "data": last_data, + "error": last_error, + } + + output = { + "timestamp": datetime.now(timezone.utc).isoformat(), + "prometheus_url": args.prometheus_url, + "results": results, + } + + os.makedirs(os.path.dirname(os.path.abspath(args.output)), exist_ok=True) + with open(args.output, "w") as f: + json.dump(output, f, indent=2) + print(f"\n[baseline] Results saved to {args.output}") + + +if __name__ == "__main__": + main() diff --git a/benchmarks/scripts/wait_for_stack.sh b/benchmarks/scripts/wait_for_stack.sh new file mode 100644 index 0000000..672aa6d --- /dev/null +++ b/benchmarks/scripts/wait_for_stack.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +set -euo pipefail + +# wait_for_stack.sh — polls each service until healthy or times out. +# Used in CI after `docker compose up -d --build`. + +MAX_WAIT=180 # seconds per service +SLEEP=5 + +wait_for_url() { + local name="$1" + local url="$2" + local elapsed=0 + + echo "[wait_for_stack] Waiting for ${name} at ${url} ..." + while true; do + if curl -sf --max-time 5 "${url}" > /dev/null 2>&1; then + echo "[wait_for_stack] ${name} is healthy (${elapsed}s elapsed)" + return 0 + fi + if [ "${elapsed}" -ge "${MAX_WAIT}" ]; then + echo "[wait_for_stack] ERROR: ${name} did not become healthy within ${MAX_WAIT}s" >&2 + return 1 + fi + sleep "${SLEEP}" + elapsed=$(( elapsed + SLEEP )) + done +} + +wait_for_url "Prometheus" "http://localhost:9090/-/healthy" +wait_for_url "Arroyo API" "http://localhost:5115/api/v1/pipelines" +wait_for_url "QueryEngine" "http://localhost:8088/api/v1/query?query=vector(1)" + +echo "[wait_for_stack] All services are healthy."