ProjectASAP · milindsrivastava1997 · Apr 2, 2026 · Mar 28, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/.github/workflows/accuracy_performance.yml b/.github/workflows/accuracy_performance.yml
@@ -0,0 +1,163 @@
+name: PR Evaluation
+
+# NOTE: GitHub-hosted runners are noisy. Latency numbers are indicative only.
+# For precise benchmarks, register a self-hosted runner once asap-tools infra
+# is decoupled from Cloudlab. See PDF eval guide Phase 3.
+
+on:
+  pull_request:
+    branches:
+      - main
+    paths:
+      - 'asap-query-engine/**'
+      - 'asap-planner-rs/**'
+      - 'asap-summary-ingest/**'
+      - 'asap-quickstart/**'
+      - '.github/workflows/accuracy_performance.yml'
+      - 'benchmarks/**'
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  packages: write
+  pull-requests: write
+
+jobs:
+  # ---------------------------------------------------------------------------
+  # Job 1: build images once from branch code and push with a SHA-based tag.
+  # All downstream jobs pull these images instead of rebuilding.
+  # ---------------------------------------------------------------------------
+  build:
+    name: Build CI images
+    runs-on: ubuntu-latest
+    outputs:
+      image-tag: ${{ steps.tag.outputs.value }}
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Compute image tag
+        id: tag
+        run: echo "value=sha-$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
+
+      - name: Build and push asap-planner-rs
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: asap-planner-rs/Dockerfile
+          push: true
+          tags: ghcr.io/projectasap/asap-planner-rs:${{ steps.tag.outputs.value }}
+          cache-from: type=registry,ref=ghcr.io/projectasap/asap-planner-rs:buildcache
+          cache-to: type=registry,ref=ghcr.io/projectasap/asap-planner-rs:buildcache,mode=max
+
+      - name: Build and push asap-summary-ingest
+        uses: docker/build-push-action@v6
+        with:
+          context: asap-summary-ingest
+          file: asap-summary-ingest/Dockerfile
+          push: true
+          tags: ghcr.io/projectasap/asap-summary-ingest:${{ steps.tag.outputs.value }}
+          build-args: BASE_IMAGE=ghcr.io/projectasap/asap-base:latest
+
+      - name: Build and push asap-query-engine
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: asap-query-engine/Dockerfile
+          push: true
+          tags: ghcr.io/projectasap/asap-query-engine:${{ steps.tag.outputs.value }}
+          cache-from: type=registry,ref=ghcr.io/projectasap/asap-query-engine:buildcache
+          cache-to: type=registry,ref=ghcr.io/projectasap/asap-query-engine:buildcache,mode=max
+
+  # ---------------------------------------------------------------------------
+  # Job 2: pull the images built above, deploy the full stack, and evaluate.
+  # ---------------------------------------------------------------------------
+  eval:
+    name: Full-stack PR evaluation
+    needs: build
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    env:
+      ASAP_IMAGE_TAG: ${{ needs.build.outputs.image-tag }}
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Log in to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Pull and start full stack
+        run: |
+          docker compose \
+            -f asap-quickstart/docker-compose.yml \
+            -f benchmarks/docker-compose.yml \
+            up -d
+
+      - name: Show running containers
+        run: |
+          docker compose \
+            -f asap-quickstart/docker-compose.yml \
+            -f benchmarks/docker-compose.yml \
+            ps
+
+      - name: Wait for all services to be healthy
+        run: bash benchmarks/scripts/wait_for_stack.sh
+
+      - name: Wait for pipeline and data ingestion
+        run: bash benchmarks/scripts/ingest_wait.sh
+
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install Python dependencies
+        run: pip install requests
+
+      - name: Run baseline queries (Prometheus)
+        run: python benchmarks/scripts/run_baseline.py
+
+      - name: Run ASAP queries (query engine)
+        run: python benchmarks/scripts/run_asap.py
+
+      - name: Compare results and evaluate
+        run: python benchmarks/scripts/compare.py
+
+      - name: Upload evaluation reports
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: eval-reports-${{ github.run_id }}
+          path: benchmarks/reports/
+
+      - name: Print docker logs on failure
+        if: failure()
+        run: |
+          docker compose \
+            -f asap-quickstart/docker-compose.yml \
+            -f benchmarks/docker-compose.yml \
+            logs --no-color
+
+      - name: Teardown stack
+        if: always()
+        run: |
+          docker compose \
+            -f asap-quickstart/docker-compose.yml \
+            -f benchmarks/docker-compose.yml \
+            down -v
diff --git a/asap-summary-ingest/Dockerfile b/asap-summary-ingest/Dockerfile
@@ -1,4 +1,5 @@
-FROM sketchdb-base:latest
+ARG BASE_IMAGE=sketchdb-base:latest
+FROM ${BASE_IMAGE}
 
 LABEL maintainer="SketchDB Team"
 LABEL description="ArroyoSketch pipeline configuration service"

diff --git a/benchmarks/docker-compose.yml b/benchmarks/docker-compose.yml
@@ -0,0 +1,20 @@
+# CI image override: replaces quickstart's pinned release images with images
+# built from the current branch. Intended for use as a Compose override:
+#
+#   ASAP_IMAGE_TAG=sha-<short-sha> docker compose \
+#     --project-directory . \
+#     -f asap-quickstart/docker-compose.yml \
+#     -f benchmarks/docker-compose.yml \
+#     up -d
+#
+# ASAP_IMAGE_TAG is set automatically by the 'build' job in accuracy_performance.yml.
+
+services:
+  asap-planner-rs:
+    image: ghcr.io/projectasap/asap-planner-rs:${ASAP_IMAGE_TAG}
+
+  asap-summary-ingest:
+    image: ghcr.io/projectasap/asap-summary-ingest:${ASAP_IMAGE_TAG}
+
+  queryengine:
+    image: ghcr.io/projectasap/asap-query-engine:${ASAP_IMAGE_TAG}
diff --git a/benchmarks/golden/.gitkeep b/benchmarks/golden/.gitkeep
diff --git a/benchmarks/queries/promql_suite.json b/benchmarks/queries/promql_suite.json
@@ -0,0 +1,18 @@
+{
+  "queries": [
+    {"id": "avg_all", "expr": "avg(sensor_reading)", "approximate": false},
+    {"id": "sum_all", "expr": "sum(sensor_reading)", "approximate": false},
+    {"id": "max_all", "expr": "max(sensor_reading)", "approximate": false},
+    {"id": "min_all", "expr": "min(sensor_reading)", "approximate": false},
+    {"id": "q50_all", "expr": "quantile(0.50, sensor_reading)", "approximate": true},
+    {"id": "q90_all", "expr": "quantile(0.90, sensor_reading)", "approximate": true},
+    {"id": "q95_all", "expr": "quantile(0.95, sensor_reading)", "approximate": true},
+    {"id": "q99_all", "expr": "quantile(0.99, sensor_reading)", "approximate": true},
+    {"id": "q95_by_pattern", "expr": "quantile by (pattern) (0.95, sensor_reading)", "approximate": true},
+    {"id": "q99_by_pattern", "expr": "quantile by (pattern) (0.99, sensor_reading)", "approximate": true},
+    {"id": "q50_by_pattern", "expr": "quantile by (pattern) (0.50, sensor_reading)", "approximate": true},
+    {"id": "avg_by_pattern", "expr": "avg by (pattern) (sensor_reading)", "approximate": false},
+    {"id": "sum_by_region", "expr": "sum by (region) (sensor_reading)", "approximate": false},
+    {"id": "max_by_service", "expr": "max by (service) (sensor_reading)", "approximate": false}
+  ]
+}
diff --git a/benchmarks/reports/.gitkeep b/benchmarks/reports/.gitkeep