From e33eb769db6c1c70acd5899062e344a17fab5a82 Mon Sep 17 00:00:00 2001
From: Suryam Agarwal <suryam.agarwal@intel.com>
Date: Tue, 9 Jun 2026 10:44:27 +0530
Subject: [PATCH 1/2] changes stream density

---
 .../consolidate_multiple_run_of_metrics.py    |   13 +-
 benchmark-scripts/poi_stream_density.py       | 1297 +++++++++++++++--
 2 files changed, 1148 insertions(+), 162 deletions(-)

diff --git a/benchmark-scripts/consolidate_multiple_run_of_metrics.py b/benchmark-scripts/consolidate_multiple_run_of_metrics.py
index 1d789ff..98c3b5c 100644
--- a/benchmark-scripts/consolidate_multiple_run_of_metrics.py
+++ b/benchmark-scripts/consolidate_multiple_run_of_metrics.py
@@ -101,15 +101,17 @@ def get_vlm_application_latency(log_file_path):
     return statistics
 
 
-def get_vlm_application_latency_stream_density(results_dir, last_n_pairs=20):
+def get_vlm_application_latency_stream_density(results_dir, last_n_pairs=20,
+                                               since_ms=None):
     """Extract per-application average latency from the most recent
     vlm_application_metrics file in *results_dir*, using only the last
-    *last_n_pairs* completed start/end pairs to reflect current-iteration
-    performance.
+    *last_n_pairs* completed start/end pairs that begin at or after
+    *since_ms* (epoch milliseconds).  Pass ``since_ms=None`` to use all pairs.
 
     Args:
         results_dir: Directory to search for vlm_application_metrics*.txt.
         last_n_pairs: Number of most-recent completed pairs to use per app.
+        since_ms: If set, ignore events with timestamp_ms < since_ms.
 
     Returns:
         dict mapping ``app_id`` → ``avg_latency_ms`` (float), or empty dict
@@ -143,8 +145,11 @@ def get_vlm_application_latency_stream_density(results_dir, last_n_pairs=20):
             event = data.get("event", "")
             timestamp_ms = data.get("timestamp_ms", "")
             if app_name and id_value and event in ("start", "end") and timestamp_ms:
+                ts = int(timestamp_ms)
+                if since_ms is not None and ts < since_ms:
+                    continue
                 timing_data[f"{app_name}_{id_value}"].append(
-                    {"event": event, "timestamp_ms": int(timestamp_ms)}
+                    {"event": event, "timestamp_ms": ts}
                 )
 
     result = {}
diff --git a/benchmark-scripts/poi_stream_density.py b/benchmark-scripts/poi_stream_density.py
index d1319ac..1110587 100644
--- a/benchmark-scripts/poi_stream_density.py
+++ b/benchmark-scripts/poi_stream_density.py
@@ -44,7 +44,11 @@
 TARGET_LATENCY_MS       Latency threshold in ms  (default: 2000)
 LATENCY_METRIC          Which metric to compare: avg | max  (default: avg)
 SCENE_INCREMENT         Scenes to add per iteration  (default: 1)
-INIT_DURATION           Warm-up seconds after restart  (default: 90)
+INIT_DURATION           Seconds per alert-wait window per retry  (default: 45)
+MAX_ALERT_WAIT          Max total seconds to wait for alert from new camera
+                        (retries in INIT_DURATION windows until received or
+                        timeout).  Default: 180 — covers ~3 video cycles (~55s
+                        each).  Increase for CPU mode where inference is slower.
 STABILISE_DURATION      Extra wait for pipeline to stabilise  (default: 30)
 BENCHMARK_DURATION      Max wait for single benchmark in seconds  (default: 120)
 RESULTS_DIR             Where to write results  (default: ./results)
@@ -60,6 +64,7 @@
 import argparse
 import csv
 import glob
+import calendar
 import json
 import logging
 import os
@@ -138,6 +143,8 @@ class IterationResult:
     actual_detections: int = 0
     alerts_generated: int = 0
     detections_per_scene: float = 0.0
+    # Camera under test for this iteration (empty for baseline)
+    new_camera: str = ""
 
 
 @dataclass
@@ -187,7 +194,8 @@ def _set_stream_density(app_dir: str, density: int) -> None:
 def _compose_cmd(app_dir: str) -> str:
     """Build a combined compose invocation spanning SceneScape + POI.
 
-    Includes scenescape-overrides and cameras override when present.
+    Includes scenescape-overrides, NPU overlay (when NPU device is active),
+    and cameras override when present.
     The cameras override may reference POI services (e.g. poi-backend
     environment), so the POI compose file must be included too.
     """
@@ -205,6 +213,12 @@ def _compose_cmd(app_dir: str) -> str:
     ]
     if os.path.isfile(overrides):
         parts.append(f"-f {shlex.quote(overrides)}")
+    # Mirror Makefile logic: include NPU overlay when RESOURCE_CONFIG contains "npu".
+    # Without this, force-recreating lp-video strips /dev/accel, causing
+    # "[NPU_VCL] Unrecognized device ID! 0x0x0" on every stream-density iteration.
+    npu_overlay = os.path.join(app_dir, "docker-compose.npu-overrides.yml")
+    if os.path.isfile(npu_overlay) and _is_npu_device(app_dir):
+        parts.append(f"-f {shlex.quote(npu_overlay)}")
     parts.append(f"-f {shlex.quote(poi_compose)}")
     # Layer in cameras override if it exists
     cameras_override = os.path.join(app_dir, "docker", "docker-compose.cameras.yaml")
@@ -253,11 +267,18 @@ def _run_cmd(cmd: str) -> subprocess.CompletedProcess:
 # Helpers – SceneScape scene cleanup via REST API
 # ---------------------------------------------------------------------------
 
-def _delete_cloned_scenes(app_dir: str, num_scenes: int) -> None:
-    """Delete previously-cloned scenes from SceneScape via REST API."""
+# ---------------------------------------------------------------------------
+# SceneScape REST API helpers
+# ---------------------------------------------------------------------------
+
+def _scenescape_get_client(app_dir: str):
+    """Authenticate with SceneScape and return (base_url, ssl_ctx, token).
+
+    Reads SUPASS from docker/.env and authenticates as admin.
+    Returns (None, None, None) on failure — callers must handle gracefully.
+    """
     import ssl
     import urllib.request
-    import urllib.error
 
     env_file = os.path.join(app_dir, "docker", ".env")
     supass = ""
@@ -267,26 +288,216 @@ def _delete_cloned_scenes(app_dir: str, num_scenes: int) -> None:
                 supass = line.strip().split("=", 1)[1]
                 break
     if not supass:
-        logger.warning("Could not read SUPASS from .env — skipping scene cleanup")
-        return
+        logger.warning("Could not read SUPASS from docker/.env — SceneScape API unavailable")
+        return None, None, None
+
+    # Read base_url from zone_config.json; default to https://localhost
+    try:
+        zone_cfg = _read_zone_config(app_dir)
+        base_url = zone_cfg.get("scenescape_api", {}).get("base_url", "https://localhost").rstrip("/")
+        base_url = base_url + "/api/v1"
+    except Exception:
+        base_url = "https://localhost/api/v1"
 
-    base_url = "https://localhost/api/v1"
     ctx = ssl.create_default_context()
     ctx.check_hostname = False
     ctx.verify_mode = ssl.CERT_NONE
 
-    # Authenticate
     auth_data = json.dumps({"username": "admin", "password": supass}).encode()
     req = urllib.request.Request(
         f"{base_url}/auth", data=auth_data,
         headers={"Content-Type": "application/json"})
     try:
-        with urllib.request.urlopen(req, context=ctx) as resp:
+        with urllib.request.urlopen(req, context=ctx, timeout=15) as resp:
             token = json.loads(resp.read()).get("token", "")
     except Exception as e:
-        logger.warning("Failed to authenticate with SceneScape API: %s", e)
-        return
+        logger.warning("SceneScape authentication failed: %s", e)
+        return None, None, None
+
+    if not token:
+        logger.warning("SceneScape auth returned empty token")
+        return None, None, None
+
+    return base_url, ctx, token
+
+
+def _clone_scene_zip(base_zip_path: str, scene_name: str, camera_name: str) -> bytes:
+    """Clone *base_zip_path* with a new scene name and camera name.
+
+    Returns the cloned ZIP as raw bytes (suitable for multipart upload).
+    Replicates the logic in scenescape/webserver/stream_density.py so that the
+    benchmark can call the SceneScape import-scene API directly from the host
+    without spinning up a Docker sidecar container.
+
+    The import-scene endpoint matches the background image by checking whether
+    the scene name appears in the image filename, so we rename the image to
+    ``<scene_name>.<ext>`` in the output ZIP.
+    """
+    import io
+    import uuid
+    import zipfile
+
+    with zipfile.ZipFile(base_zip_path, "r") as zf:
+        json_name = None
+        base_json = None
+        other_files: dict = {}
+        for name in zf.namelist():
+            data = zf.read(name)
+            if name.endswith(".json"):
+                json_name = name
+                base_json = json.loads(data)
+            else:
+                other_files[name] = data
+
+    if not json_name or base_json is None:
+        raise ValueError(f"No scene JSON found in {base_zip_path}")
+
+    # Deep-copy and patch the scene JSON
+    scene_data = json.loads(json.dumps(base_json))
+    new_scene_uid = str(uuid.uuid4())
+    scene_data["uid"] = new_scene_uid
+    scene_data["name"] = scene_name
+
+    for cam in scene_data.get("cameras", []):
+        cam["uid"] = camera_name
+        cam["name"] = camera_name
+        cam["scene"] = new_scene_uid
+
+    for region in scene_data.get("regions", []):
+        region["uid"] = str(uuid.uuid4())
+        region["scene"] = new_scene_uid
+
+    # SceneScape's import-scene matches the resource file by checking whether
+    # the scene name is a substring of the filename.  Rename the image so it
+    # matches: "<scene_name><ext>" (e.g. "conference room-2.jpg").
+    safe_name = scene_name.replace("/", "_")
+
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf_out:
+        zf_out.writestr(f"{safe_name}.json", json.dumps(scene_data))
+        for orig_name, data in other_files.items():
+            import os as _os
+            ext = _os.path.splitext(orig_name)[1]
+            zf_out.writestr(f"{safe_name}{ext}", data)
+    return buf.getvalue()
+
+
+def _scenescape_import_scene(
+    app_dir: str,
+    scene_name: str,
+    camera_name: str,
+) -> tuple:
+    """Register a new scene + camera in SceneScape via POST /api/v1/import-scene/.
+
+    Clones the base scene ZIP in-memory with the new scene/camera names and
+    uploads it using the same multipart endpoint that scene-import.sh uses.
+    This replaces the Docker sidecar approach while being ~10× faster (~200ms).
+
+    Returns (scene_uid, camera_name) on success, (None, None) on failure.
+    Failure is non-fatal — callers fall back to the scene-import sidecar.
+    """
+    import io
+    import urllib.error
+    import urllib.request
+
+    base_url, ctx, token = _scenescape_get_client(app_dir)
+    if not token:
+        return None, None
+
+    # Locate base scene ZIP
+    try:
+        zone_cfg = _read_zone_config(app_dir)
+        scene_zip_name = zone_cfg.get("scene_zip", "conference-room.zip")
+    except Exception:
+        scene_zip_name = "conference-room.zip"
+
+    zip_path = str(Path(app_dir) / ".." / "scenescape" / "webserver" / scene_zip_name)
+    if not Path(zip_path).exists():
+        logger.warning("Base scene ZIP not found at %s — falling back to scene-import", zip_path)
+        return None, None
+
+    # Clone ZIP in-memory
+    try:
+        zip_bytes = _clone_scene_zip(zip_path, scene_name, camera_name)
+    except Exception as e:
+        logger.warning("Failed to clone scene ZIP: %s", e)
+        return None, None
+
+    # Build multipart/form-data body — SceneScape expects field name "zipFile"
+    boundary = "----BenchmarkFormBoundary"
+    filename = f"{scene_name.replace(' ', '-')}.zip"
+    body = (
+        f"--{boundary}\r\n"
+        f'Content-Disposition: form-data; name="zipFile"; filename="{filename}"\r\n'
+        f"Content-Type: application/zip\r\n\r\n"
+    ).encode() + zip_bytes + f"\r\n--{boundary}--\r\n".encode()
+
+    req = urllib.request.Request(
+        f"{base_url}/import-scene/",
+        data=body,
+        headers={
+            "Authorization": f"Token {token}",
+            "Content-Type": f"multipart/form-data; boundary={boundary}",
+        },
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(req, context=ctx, timeout=60) as resp:
+            resp_data = json.loads(resp.read())
+            # import-scene response: {"scene": null=success | {errors}, "cameras": ...}
+            # scene: null means scene was created successfully (null = no errors).
+            # The scene UID lives inside the camera objects in the cameras list.
+            scene_errors = resp_data.get("scene")
+            if scene_errors is not None:
+                # scene field is non-null → contains error details
+                logger.warning("SceneScape import-scene scene error: %s", scene_errors)
+                return None, None
+
+            # Scene created — extract UID from cameras data
+            scene_uid = ""
+            cameras = resp_data.get("cameras") or []
+            for cam_entry in cameras:
+                # cam_entry may be a list (one per camera) of [errors_or_None, cam_obj]
+                entries = cam_entry if isinstance(cam_entry, list) else [cam_entry]
+                for entry in entries:
+                    if isinstance(entry, dict) and entry.get("scene"):
+                        scene_uid = entry["scene"]
+                        break
+                if scene_uid:
+                    break
+
+            if not scene_uid:
+                # Fall back to listing scenes by name
+                try:
+                    list_req = urllib.request.Request(
+                        f"{base_url}/scenes", headers={"Authorization": f"Token {token}"})
+                    with urllib.request.urlopen(list_req, context=ctx, timeout=10) as r:
+                        for s in json.loads(r.read()).get("results", []):
+                            if s.get("name") == scene_name:
+                                scene_uid = s["uid"]
+                                break
+                except Exception:
+                    pass
+
+            logger.info("SceneScape scene+camera imported: %s / %s (uid=%s)",
+                        scene_name, camera_name, scene_uid)
+            return scene_uid or scene_name, camera_name
+    except urllib.error.HTTPError as e:
+        logger.warning("SceneScape import-scene → HTTP %s: %s", e.code, e.read().decode()[:200])
+        return None, None
+    except Exception as e:
+        logger.warning("SceneScape import-scene → %s", e)
+        return None, None
+
+
+def _delete_cloned_scenes(app_dir: str, num_scenes: int) -> None:
+    """Delete previously-cloned scenes from SceneScape via REST API."""
+    import urllib.request
+    import urllib.error
+
+    base_url, ctx, token = _scenescape_get_client(app_dir)
     if not token:
+        logger.warning("Could not authenticate — skipping scene cleanup")
         return
 
     auth_header = {"Authorization": f"Token {token}"}
@@ -353,31 +564,51 @@ def _read_base_config(app_dir: str) -> dict:
 def _generate_cameras_override(app_dir: str, num_scenes: int) -> None:
     """
     Generate ``docker-compose.cameras.yaml`` that adds real RTSP camera
-    streams for each additional camera beyond the base ones.
-
-    For N scenes, we create lp-cams-{N+1} through lp-cams-{2*N} services,
-    each streaming the same video on a unique RTSP path.
+    streams and DLStreamer pipeline containers for each additional camera
+    beyond the base two (Camera_01, Camera_02).
+
+    For N scenes, we create:
+      - lp-cams-{cam_idx}:   ffmpeg RTSP server for the new camera stream
+      - lp-video-{cam_idx}:  DLStreamer container running the inference pipeline
+      - lp-config-{cam_idx}: Docker config pointing to the generated pipeline JSON
+
+    Architecture note: each camera must have its own DLStreamer (lp-video-N)
+    container because each container mounts a single config.json (via Docker
+    configs) that hardcodes the RTSP source and camera name for that pipeline.
+    Sharing one container across cameras is not supported by this service design.
     """
     override_path = Path(app_dir) / "docker" / "docker-compose.cameras.yaml"
     base = _read_base_config(app_dir)
     base_camera = base["camera_name"]
     base_video = base["video_file"]
 
+    scenescape_dir = (Path(app_dir) / ".." / "scenescape").resolve()
+    dlstreamer_dir = scenescape_dir / "dlstreamer-pipeline-server"
+
     # POI already has 2 base cameras (Camera_01, Camera_02); add more
     # starting from camera index 3 (for scenes > 1)
     base_camera_count = 2
 
+    # When NPU is selected, extra DLStreamer containers need /dev/accel access.
+    # Without it OpenVINO reports "[NPU_VCL] Unrecognized device ID! 0x0x0"
+    # and the inference pipeline fails to initialise.
+    is_npu = _is_npu_device(app_dir)
+
     with open(override_path, "w") as f:
         f.write("# Auto-generated by poi_stream_density.py — do not edit\n")
         f.write(f"# Stream density: {num_scenes} scenes\n\n")
         f.write("services:\n")
 
-        # Additional RTSP camera streams
+        # Additional RTSP camera streams + DLStreamer instances
         for i in range(1, num_scenes):
             cam_idx = base_camera_count + i
             cam_name = f"{base_camera}-{cam_idx}"
-            svc_name = f"lp-cams-{cam_idx}"
-            f.write(f"  {svc_name}:\n")
+            cams_svc = f"lp-cams-{cam_idx}"
+            video_svc = f"lp-video-{cam_idx}"
+            config_name = f"lp-config-{cam_idx}"
+
+            # RTSP camera stream (ffmpeg)
+            f.write(f"  {cams_svc}:\n")
             f.write(f"    image: linuxserver/ffmpeg:version-8.0-cli\n")
             f.write(f'    command: "-nostdin -re -stream_loop -1 '
                     f'-i /workspace/media/{base_video} '
@@ -392,6 +623,75 @@ def _generate_cameras_override(app_dir: str, num_scenes: int) -> None:
             f.write(f'    restart: "no"\n')
             f.write(f"\n")
 
+            # DLStreamer pipeline server for this camera.
+            # Mirrors the lp-video-2 service from scenescape/docker-compose.yaml
+            # but uses lp-config-{cam_idx} to load this camera's pipeline config.
+            f.write(f"  {video_svc}:\n")
+            f.write(f"    image: docker.io/intel/dlstreamer-pipeline-server:${{DLSTREAMER_VERSION:-2026.1.0-20260331-weekly-ubuntu24}}\n")
+            f.write(f"    networks:\n")
+            f.write(f"      storewide-lp:\n")
+            f.write(f"    tty: true\n")
+            f.write(f"    entrypoint: [\"./run.sh\"]\n")
+            f.write(f"    devices:\n")
+            f.write(f"      - \"/dev/dri:/dev/dri\"\n")
+            if is_npu:
+                # Intel NPU (MTL/WCL) is exposed as /dev/accel (major 261).
+                # Without this mapping the VCL compiler reads device ID 0x0x0
+                # and fails: "[NPU_VCL] Unrecognized device ID! 0x0x0"
+                f.write(f"      - \"/dev/accel:/dev/accel\"\n")
+            f.write(f"    group_add:\n")
+            f.write(f"      - \"109\"\n")
+            f.write(f"      - \"110\"\n")
+            f.write(f"      - \"992\"\n")
+            f.write(f"    device_cgroup_rules:\n")
+            f.write(f"      - \"c 189:* rmw\"\n")
+            f.write(f"      - \"c 209:* rmw\"\n")
+            f.write(f"      - \"a 189:* rwm\"\n")
+            if is_npu:
+                f.write(f"      - \"c 261:* rmw\"  # Intel NPU accel devices\n")
+            f.write(f"    depends_on:\n")
+            f.write(f"      broker:\n")
+            f.write(f"        condition: service_started\n")
+            f.write(f"      ntpserv:\n")
+            f.write(f"        condition: service_started\n")
+            f.write(f"      {cams_svc}:\n")
+            f.write(f"        condition: service_started\n")
+            f.write(f"    healthcheck:\n")
+            f.write(f'      test: ["CMD", "curl", "-I", "-s", "http://localhost:8080/pipelines"]\n')
+            f.write(f"      interval: 10s\n")
+            f.write(f"      timeout: 5s\n")
+            f.write(f"      retries: 5\n")
+            f.write(f"      start_period: 10s\n")
+            f.write(f"    environment:\n")
+            f.write(f"      - RUN_MODE=EVA\n")
+            f.write(f"      - GENICAM=Balluff\n")
+            f.write(f"      - GST_DEBUG=1,gencamsrc:2\n")
+            f.write(f"      - ADD_UTCTIME_TO_METADATA=true\n")
+            f.write(f"      - APPEND_PIPELINE_NAME_TO_PUBLISHER_TOPIC=false\n")
+            f.write(f"      - MQTT_HOST=broker.scenescape.intel.com\n")
+            f.write(f"      - MQTT_PORT=1883\n")
+            f.write(f"      - REST_SERVER_PORT=8080\n")
+            f.write(f"      - HTTPS_PROXY=${{HTTPS_PROXY}}\n")
+            f.write(f"      - https_proxy=${{https_proxy}}\n")
+            f.write(f"      - HTTP_PROXY=${{HTTP_PROXY}}\n")
+            f.write(f"      - http_proxy=${{http_proxy}}\n")
+            f.write(f"      - NO_PROXY=mediaserver,${{NO_PROXY}}\n")
+            f.write(f"      - no_proxy=mediaserver,${{no_proxy}}\n")
+            f.write(f"    configs:\n")
+            f.write(f"      - source: {config_name}\n")
+            f.write(f"        target: /home/pipeline-server/config.json\n")
+            f.write(f"    volumes:\n")
+            f.write(f"      - ../scenescape/dlstreamer-pipeline-server/user_scripts:/home/pipeline-server/user_scripts\n")
+            f.write(f"      - vol-dlstreamer-pipeline-root-{cam_idx}:/var/cache/pipeline_root:uid=1999,gid=1999\n")
+            f.write(f"      - vol-sample-data:/home/pipeline-server/videos\n")
+            f.write(f"      - vol-models:/home/pipeline-server/models\n")
+            f.write(f"    secrets:\n")
+            f.write(f"      - source: root-cert\n")
+            f.write(f"        target: certs/scenescape-ca.pem\n")
+            f.write(f"    restart: always\n")
+            f.write(f"    pids_limit: 1000\n")
+            f.write(f"\n")
+
         # Build dynamic MQTT camera list for poi-backend
         all_cameras = ["Camera_01", "Camera_02"]
         for i in range(1, num_scenes):
@@ -405,8 +705,28 @@ def _generate_cameras_override(app_dir: str, num_scenes: int) -> None:
         f.write(f"      RTSP_PREWARM_CAMERAS: \"{camera_csv}\"\n")
         f.write(f"      MQTT_IMAGE_CAMERAS: \"{camera_csv}\"\n")
         f.write(f"      STREAM_DENSITY: \"{num_scenes}\"\n")
+        f.write(f"\n")
+
+        # Docker configs for each new DLStreamer pipeline
+        if num_scenes > 1:
+            f.write(f"configs:\n")
+            for i in range(1, num_scenes):
+                cam_idx = base_camera_count + i
+                cam_name = f"{base_camera}-{cam_idx}"
+                config_name = f"lp-config-{cam_idx}"
+                env_var = f"PIPELINE_CONFIG_{cam_idx}"
+                default_path = dlstreamer_dir / f"person-of-interest-{cam_name}-pipeline-config.json"
+                f.write(f"  {config_name}:\n")
+                f.write(f"    file: ${{{env_var}:-{default_path}}}\n")
+            f.write(f"\n")
+
+            # Named volumes for each new DLStreamer container's pipeline cache
+            f.write(f"volumes:\n")
+            for i in range(1, num_scenes):
+                cam_idx = base_camera_count + i
+                f.write(f"  vol-dlstreamer-pipeline-root-{cam_idx}:\n")
 
-    logger.info("Generated cameras override: %s  (%d scenes, %d extra cameras)",
+    logger.info("Generated cameras override: %s  (%d scenes, %d extra cameras+DLStreamer instances)",
                 override_path, num_scenes, max(0, num_scenes - 1))
 
 
@@ -416,9 +736,13 @@ def _generate_dlstreamer_config(app_dir: str, num_scenes: int) -> None:
 
     Reads the base pipeline config template and replicates it for each
     additional camera, updating the camera name in each pipeline.
+    Also writes PIPELINE_CONFIG_{cam_idx} to docker/.env so that the
+    lp-config-{cam_idx} Docker config defined in docker-compose.cameras.yaml
+    can resolve the correct pipeline JSON path.
     """
     scenescape_dir = Path(app_dir) / ".." / "scenescape"
     dlstreamer_dir = scenescape_dir / "dlstreamer-pipeline-server"
+    env_file = os.path.join(app_dir, "docker", ".env")
 
     base = _read_base_config(app_dir)
     base_camera = base["camera_name"]
@@ -453,9 +777,29 @@ def _generate_dlstreamer_config(app_dir: str, num_scenes: int) -> None:
             json.dump(cfg, fh, indent=2)
         logger.info("Generated pipeline config: %s", output_path)
 
+        # Write PIPELINE_CONFIG_{cam_idx} so docker-compose.cameras.yaml can
+        # resolve the lp-config-{cam_idx} Docker config file path.
+        env_key = f"PIPELINE_CONFIG_{cam_idx}"
+        _write_env_var(env_file, env_key, str(output_path.resolve()))
+
     logger.info("Generated DLStreamer configs for %d total cameras", base_camera_count + num_scenes - 1)
 
 
+def _is_npu_device(app_dir: str) -> bool:
+    """Return True when the active resource config selects NPU.
+
+    Reads RESOURCE_CONFIG from docker/.env to mirror the Makefile logic:
+    ``$(if $(findstring npu,$(DEVICE)),-f $(NPU_OVERLAY),)``.
+    """
+    env_file = os.path.join(app_dir, "docker", ".env")
+    if os.path.isfile(env_file):
+        with open(env_file) as fh:
+            for line in fh:
+                if line.startswith("RESOURCE_CONFIG=") and "npu" in line.lower():
+                    return True
+    return False
+
+
 def _reinit_env(app_dir: str, resource_config: str = "") -> None:
     """Re-run init.sh to regenerate .env with updated config.
 
@@ -476,19 +820,192 @@ def _reinit_env(app_dir: str, resource_config: str = "") -> None:
 
     env = os.environ.copy()
     if resource_config:
-        env["RESOURCE_CONFIG"] = resource_config
-        logger.info("Re-running init.sh with RESOURCE_CONFIG=%s …", resource_config)
+        # init.sh constructs RESOURCE_CONFIG_PATH as "${APP_DIR}/${RESOURCE_CONFIG}",
+        # so it expects a path relative to app_dir, not an absolute path.
+        # Convert absolute → relative so the path resolves correctly inside init.sh.
+        try:
+            rel_rc = str(Path(resource_config).relative_to(Path(app_dir)))
+        except ValueError:
+            rel_rc = resource_config  # already relative or outside app_dir
+        env["RESOURCE_CONFIG"] = rel_rc
+        logger.info("Re-running init.sh with RESOURCE_CONFIG=%s …", rel_rc)
     else:
         logger.info("Re-running init.sh to update .env …")
 
     cmd = f"bash {shlex.quote(str(init_script))} {shlex.quote(app_dir)}"
     result = subprocess.run(cmd, shell=True, capture_output=True, text=True, env=env)
     if result.returncode != 0:
-        logger.warning("init.sh returned non-zero:\n%s", result.stderr[-500:])
+        # init.sh writes errors to stdout (no >&2), so log both streams
+        output = (result.stderr + result.stdout)[-500:]
+        logger.warning("init.sh returned non-zero:\n%s", output)
     else:
         logger.info("init.sh completed — .env updated")
 
 
+def _wait_for_first_detection(timeout: int = 60, poll_interval: int = 3,
+                              camera_filter: Optional[str] = None) -> bool:
+    """Poll poi-backend logs until the first face detection arrives or timeout.
+
+    Args:
+        camera_filter: If set, only detections from this specific camera count
+                       as "warm". This prevents Camera_01/02 detections from
+                       masking the fact that Camera_01-3's pipeline hasn't
+                       started yet (false-positive "pipeline is warm").
+
+    Returns True if a detection was seen within *timeout* seconds, False otherwise.
+    Uses --since to only read logs produced after this function is called, avoiding
+    false positives from stale log lines from previous iterations.
+    """
+    camera_label = f" from camera={camera_filter}" if camera_filter else ""
+    since = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
+    deadline = time.time() + timeout
+    attempt = 0
+    while time.time() < deadline:
+        result = subprocess.run(
+            f"docker logs --since {since} poi-backend 2>&1",
+            shell=True, capture_output=True, text=True)
+        output = result.stdout + result.stderr
+        is_detection = (
+            "POI match" in output or
+            "face embedding" in output.lower() or
+            "detections" in output.lower() or
+            "poi_detections" in output
+        )
+        if is_detection:
+            if camera_filter is None or f"camera={camera_filter}" in output:
+                elapsed = int(timeout - (deadline - time.time()))
+                logger.info("First detection seen%s after ~%ds — pipeline is warm",
+                            camera_label, elapsed)
+                return True
+        if attempt % 4 == 0:
+            remaining = int(deadline - time.time())
+            logger.info("  Waiting for first detection%s … (%ds remaining)",
+                        camera_label, remaining)
+        attempt += 1
+        time.sleep(poll_interval)
+    logger.warning("No detection%s seen within %ds — proceeding anyway",
+                   camera_label, timeout)
+    return False
+
+
+# ---------------------------------------------------------------------------
+# Camera-specific helpers — new camera detection for stream-density
+# ---------------------------------------------------------------------------
+
+def _get_new_camera_name(app_dir: str, num_scenes: int) -> Optional[str]:
+    """Return the camera name newly added in this stream-density iteration.
+
+    Returns ``None`` for *num_scenes* == 1 (baseline iteration: Camera_01 and
+    Camera_02 are always present, no camera is "newly added").
+
+    For *num_scenes* > 1 each increment adds exactly one extra RTSP stream
+    whose index is ``base_camera_count + (num_scenes - 1)``.
+
+    Example (base_camera = "Camera_01"):
+      num_scenes=1 → None         (baseline — Camera_01 / Camera_02)
+      num_scenes=2 → "Camera_01-3"
+      num_scenes=3 → "Camera_01-4"
+      num_scenes=N → f"Camera_01-{N+1}"
+    """
+    if num_scenes <= 1:
+        return None
+    base = _read_base_config(app_dir)
+    base_camera = base["camera_name"]       # e.g. "Camera_01"
+    base_camera_count = 2                   # POI always starts with 2 base cameras
+    cam_idx = base_camera_count + (num_scenes - 1)
+    return f"{base_camera}-{cam_idx}"
+
+
+def _wait_for_alert_from_camera(
+    camera_id: str,
+    duration: int,
+    since: Optional[datetime] = None,
+    poll_interval: int = 5,
+) -> Optional[dict]:
+    """Poll /api/v1/alerts until a fresh alert from *camera_id* is dispatched.
+
+    "Fresh" means ``dispatched_at`` is strictly after *since* (defaults to the
+    moment this function is called).
+
+    Returns the matched alert dict so the caller can compute latency directly
+    without a second API round-trip.  Returns ``None`` if *duration* expires
+    without a matching alert.
+
+    Up to 100 alerts are fetched per poll to avoid missing the new camera's
+    alert when many alerts have already accumulated.
+    """
+    import urllib.request
+    from datetime import datetime as _dt, timezone as _tz
+
+    elapsed = 0
+    since_ts = since or datetime.utcnow()
+    since_aware = (
+        since_ts.astimezone(_tz.utc)
+        if since_ts.tzinfo
+        else since_ts.replace(tzinfo=_tz.utc)
+    )
+    logger.info(
+        "Waiting up to %ds for alert from camera=%s (since=%s) …",
+        duration, camera_id, since_ts.strftime("%H:%M:%S"),
+    )
+
+    while elapsed < duration:
+        sleep_time = min(poll_interval, duration - elapsed)
+        time.sleep(sleep_time)
+        elapsed += sleep_time
+
+        try:
+            req = urllib.request.Request(
+                "http://localhost:8000/api/v1/alerts?limit=100"
+            )
+            with urllib.request.urlopen(req, timeout=10) as resp:
+                alerts = json.loads(resp.read().decode())
+            if not isinstance(alerts, list):
+                continue
+
+            for alert in alerts:
+                dispatched_str = alert.get("dispatched_at") or alert.get("timestamp", "")
+                if not dispatched_str:
+                    continue
+                try:
+                    d_str = dispatched_str.replace("Z", "+00:00")
+                    dispatched_utc = _dt.fromisoformat(d_str)
+                    if not dispatched_utc.tzinfo:
+                        dispatched_utc = dispatched_utc.replace(tzinfo=_tz.utc)
+                    else:
+                        dispatched_utc = dispatched_utc.astimezone(_tz.utc)
+                    if dispatched_utc < since_aware:
+                        continue
+                except (ValueError, TypeError):
+                    continue
+
+                alert_camera = (
+                    alert.get("match", {}).get("camera_id")
+                    or alert.get("camera_id", "")
+                )
+                if alert_camera == camera_id:
+                    logger.info(
+                        "Alert from camera=%s received after %ds — pipeline active",
+                        camera_id, elapsed,
+                    )
+                    time.sleep(5)   # brief flush so metrics files catch up
+                    return alert
+        except Exception:
+            pass
+
+        if elapsed % 30 < poll_interval + 1:
+            logger.info(
+                "Waiting for alert from camera=%s … (%d/%ds)",
+                camera_id, elapsed, duration,
+            )
+
+    logger.warning(
+        "No alert from camera=%s within %ds — continuing anyway",
+        camera_id, duration,
+    )
+    return None
+
+
 def _wait_for_web_healthy(timeout: int = 300) -> None:
     """Block until SceneScape web container is healthy or timeout expires."""
     # Container name depends on compose project name used at startup
@@ -508,6 +1025,141 @@ def _wait_for_web_healthy(timeout: int = 300) -> None:
     logger.warning("Web container did not become healthy after %ds — continuing anyway", timeout)
 
 
+def _wait_for_scene_import_completion(timeout: int = 180) -> None:
+    """Wait until the scene-import one-shot container exits.
+
+    scene-import must complete before lp-video (DLStreamer) restarts so that
+    SceneScape has the newly cloned camera registered in its database before
+    DLStreamer starts publishing data for it.
+
+    Without this wait, SceneScape's controller has no record of Camera_01-3
+    and silently drops all DLStreamer output for that camera — poi-backend
+    never receives face embeddings → no FAISS match → no alert.
+    """
+    logger.info("Waiting for scene-import to complete (timeout=%ds) …", timeout)
+    deadline = time.time() + timeout
+    while time.time() < deadline:
+        running = subprocess.run(
+            "docker ps -q --filter 'name=scene-import' --filter 'status=running'",
+            shell=True, capture_output=True, text=True,
+        ).stdout.strip()
+        if running:
+            elapsed = int(timeout - (deadline - time.time()))
+            if elapsed % 30 < 6:
+                logger.info("  scene-import still running … (%ds elapsed)", elapsed)
+            time.sleep(5)
+            continue
+
+        exited = subprocess.run(
+            "docker ps -aq --filter 'name=scene-import' --filter 'status=exited'",
+            shell=True, capture_output=True, text=True,
+        ).stdout.strip()
+        if exited:
+            # Get exit code to surface errors
+            code_result = subprocess.run(
+                f"docker inspect {exited.splitlines()[0]} "
+                f"--format '{{{{.State.ExitCode}}}}'",
+                shell=True, capture_output=True, text=True,
+            )
+            exit_code = code_result.stdout.strip()
+            if exit_code == "0":
+                logger.info("scene-import completed successfully")
+            else:
+                logger.warning("scene-import exited with code %s — "
+                               "camera registration may be incomplete", exit_code)
+            return
+        # Container not found yet — give it a moment to start
+        time.sleep(3)
+
+    logger.warning("scene-import did not complete within %ds — "
+                   "SceneScape may not have the new camera registered yet", timeout)
+
+
+def _wait_for_camera_rtsp_ready(camera_name: str, timeout: int = 60) -> bool:
+    """Poll until the new camera's RTSP stream is being served by MediaMTX.
+
+    DLStreamer (lp-video) connects to RTSP at startup.  If lp-cams-N is still
+    initialising when lp-video is force-recreated, DLStreamer silently fails to
+    open the RTSP source and the Camera_01-N pipeline never starts — no
+    embeddings, no alerts.
+
+    Strategy:
+      1. Query MediaMTX path-list API via docker exec (port 9997 is NOT mapped
+         to host — only 8554/8889 are — so localhost:9997 always fails).
+      2. Fallback: check that the compose-named container is running.
+         Container name format: storewide-lp-lp-cams-{N}-1
+    """
+    logger.info("Waiting for RTSP stream camera=%s to be ready (timeout=%ds) …",
+                camera_name, timeout)
+    # Camera_01-3 → suffix "3" → service "lp-cams-3" → container "storewide-lp-lp-cams-3-1"
+    svc_idx = camera_name.split("-")[-1] if "-" in camera_name else "3"
+    container_name = f"storewide-lp-lp-cams-{svc_idx}-1"
+
+    deadline = time.time() + timeout
+    while time.time() < deadline:
+        # Primary: MediaMTX path-list API via docker exec (avoids host-port issue)
+        try:
+            result = subprocess.run(
+                "docker exec storewide-lp-mediaserver-1 "
+                "wget -qO- 'http://localhost:9997/v3/paths/list'",
+                shell=True, capture_output=True, text=True, timeout=5,
+            )
+            if result.returncode == 0 and camera_name in result.stdout:
+                logger.info("RTSP stream camera=%s is ready (confirmed via MediaMTX API)",
+                            camera_name)
+                return True
+        except Exception:
+            pass
+
+        # Fallback: check the lp-cams-N container is running
+        running = subprocess.run(
+            f"docker inspect {container_name} "
+            f"--format '{{{{.State.Running}}}}'",
+            shell=True, capture_output=True, text=True,
+        ).stdout.strip()
+        if running == "true":
+            # Container is up — give ffmpeg 3s to open the RTSP session
+            time.sleep(3)
+            logger.info("RTSP container %s is running — stream likely ready",
+                        container_name)
+            return True
+
+        elapsed = int(timeout - (deadline - time.time()))
+        if elapsed % 15 < 4:
+            logger.info("  RTSP camera=%s not ready yet … (%ds elapsed)",
+                        camera_name, elapsed)
+        time.sleep(3)
+
+    logger.warning("RTSP stream camera=%s not confirmed within %ds — "
+                   "DLStreamer may fail to connect", camera_name, timeout)
+    return False
+
+
+def _write_env_var(env_file: str, key: str, value: str) -> None:
+    """Write or update a KEY=VALUE line in an env file.
+
+    If the key already exists it is updated in-place; otherwise the line is
+    appended.  This ensures variables injected by the benchmark (e.g.
+    STREAM_DENSITY) survive ``init.sh`` regeneration and are visible to all
+    docker-compose services that read the same env file.
+    """
+    lines: list[str] = []
+    found = False
+    if os.path.isfile(env_file):
+        with open(env_file) as fh:
+            lines = fh.readlines()
+        for i, line in enumerate(lines):
+            if line.startswith(f"{key}=") or line.startswith(f"{key} ="):
+                lines[i] = f"{key}={value}\n"
+                found = True
+                break
+    if not found:
+        lines.append(f"{key}={value}\n")
+    with open(env_file, "w") as fh:
+        fh.writelines(lines)
+    logger.info("Set %s=%s in %s", key, value, env_file)
+
+
 def _scale_pipeline_services(app_dir: str, num_scenes: int, wait: int = 90, resource_config: str = "") -> None:
     """
     Scale the POI video pipeline to N scenes.
@@ -516,48 +1168,150 @@ def _scale_pipeline_services(app_dir: str, num_scenes: int, wait: int = 90, reso
       1. Update stream_density in zone_config.json
       2. Generate docker-compose.cameras.yaml with extra RTSP streams
       3. Re-run init.sh to update .env
-      4. Generate per-camera DLStreamer pipeline configs
-      5. Bring up new camera services
-      6. Wait for web container healthy
-      7. Clean stale scenes, restart scene-import
-      8. Recreate lp-video (DLStreamer) and poi-backend
+      4. Write STREAM_DENSITY + BASE_CAMERA_COUNT to docker/.env
+      5. Generate per-camera DLStreamer pipeline configs
+      6. Bring up new camera services
+      7. Wait for web container healthy
+      8. Clean stale scenes, restart scene-import
+      9. Recreate lp-video (DLStreamer)
     """
     logger.info("Scaling POI to %d scene(s) …", num_scenes)
 
     _set_stream_density(app_dir, num_scenes)
     _generate_cameras_override(app_dir, num_scenes)
     _reinit_env(app_dir, resource_config=resource_config)
+
+    env_file = os.path.join(app_dir, "docker", ".env")
+    _write_env_var(env_file, "STREAM_DENSITY", str(num_scenes))
+    # BASE_CAMERA_COUNT is still written for backward-compat with any
+    # scene-import fallback path (tells clone-zip to start at Camera_01-3)
+    _write_env_var(env_file, "BASE_CAMERA_COUNT", "2")
+
     _generate_dlstreamer_config(app_dir, num_scenes)
 
-    # Bring up any new camera services
+    # Determine new camera service names for this iteration
+    base_camera_count = 2
+    new_cam_services: list[str] = []
+    if num_scenes > 1:
+        new_cam = _get_new_camera_name(app_dir, num_scenes)
+        if new_cam:
+            cam_idx = int(new_cam.split("-")[-1])   # "Camera_01-3" → 3
+            new_cam_services = [f"lp-cams-{cam_idx}", f"lp-video-{cam_idx}"]
+
+    # Remove stale containers for new services before (re)creating them.
+    # Containers left in "Created" or "Exited" state from a previous iteration
+    # hold stale Docker network IDs.  Starting them without --force-recreate
+    # causes "network ... not found" errors.  Removing them first ensures
+    # docker compose creates fresh containers with the current network.
+    if new_cam_services:
+        logger.info("Removing stale containers for new services: %s …",
+                    " ".join(new_cam_services))
+        _docker_compose(app_dir, f"rm -f {' '.join(new_cam_services)}")
+
+    # Bring up all services.  --remove-orphans cleans up containers from
+    # previous iterations (e.g. lp-cams-4..7 left over from earlier runs)
+    # that would otherwise hold stale network references and cause conflicts.
+    # This also creates any new named volumes (vol-dlstreamer-pipeline-root-N).
     logger.info("Starting new camera streams …")
-    _docker_compose(app_dir, "up -d --no-recreate")
+    _docker_compose(app_dir, "up -d --no-recreate --remove-orphans")
+
+    # Pre-initialise the DLStreamer pipeline cache volume for the NEW camera only.
+    # New volumes are created by docker compose as root:root with no sticky bit.
+    # DLStreamer runs as uid=1999 and needs to create user_defined_pipelines/
+    # inside the mount — this fails with PermissionError on a fresh volume.
+    # Only the volume for the camera added THIS iteration is new; previous
+    # iterations' volumes are already initialised — no need to re-run alpine.
+    project = "storewide-lp"
+    if num_scenes > 1:
+        new_vol_idx = base_camera_count + (num_scenes - 1)
+        vol_name = f"{project}_vol-dlstreamer-pipeline-root-{new_vol_idx}"
+        logger.info("Initialising volume %s for DLStreamer uid=1999 …", vol_name)
+        subprocess.run(
+            f"docker run --rm -v {vol_name}:/data alpine sh -c "
+            f"'chmod a+rwxt /data && "
+            f"mkdir -p /data/user_defined_pipelines && "
+            f"chown 1999:1999 /data/user_defined_pipelines'",
+            shell=True, capture_output=True, text=True,
+        )
 
-    _wait_for_web_healthy()
+    # Explicitly force-recreate the new camera services to guarantee fresh
+    # containers with a valid network attachment (--no-recreate skips them if
+    # they were already recreated by the rm above but not yet started).
+    if new_cam_services:
+        cam_svc = new_cam_services[0]   # lp-cams-{N}
+        logger.info("Force-starting camera stream service %s …", cam_svc)
+        _docker_compose(app_dir, f"up -d --force-recreate {cam_svc}")
 
-    # Clean stale scene extracts in web container
-    logger.info("Cleaning stale scene-import extract dirs …")
-    _run_cmd("docker exec storewide-lp-web-1 bash -c "
-             "'rm -rf /workspace/media/storewide-loss-prevention-[0-9]*'")
+    _wait_for_web_healthy()
 
+    # Delete any scenes/cameras cloned in previous iterations, then register
+    # the new scene + camera directly via the SceneScape REST API.
+    #
+    # OLD flow: generate ZIP → docker compose up scene-import → wait ~10s
+    # NEW flow: clone ZIP in-memory → POST /api/v1/import-scene/ → ~200ms
+    #
+    # SceneScape must have Camera_01-N registered BEFORE lp-video-N connects
+    # to the RTSP source; otherwise the controller drops all DLStreamer output
+    # for the new camera (no embeddings → no FAISS match → no alert).
     _delete_cloned_scenes(app_dir, num_scenes)
 
-    # Re-run scene-import
-    logger.info("Re-running scene-import for %d scenes …", num_scenes)
-    _docker_compose(app_dir, "rm -f -s scene-import")
-    _docker_compose(app_dir, "up -d scene-import")
-    time.sleep(15)
-
-    # Recreate lp-video so updated DLStreamer config is mounted
-    logger.info("Recreating lp-video (DLStreamer) with new config …")
-    _docker_compose(app_dir, "up -d --force-recreate lp-video")
-
-    # Recreate poi-backend to pick up new camera subscriptions
-    logger.info("Recreating poi-backend to subscribe to new cameras …")
-    _poi_compose(app_dir, "up -d --force-recreate poi-backend")
-
-    logger.info("Waiting %ds for services to initialise …", wait)
-    time.sleep(wait)
+    if num_scenes > 1:
+        new_cam = _get_new_camera_name(app_dir, num_scenes)
+        zone_cfg = _read_zone_config(app_dir)
+        base_scene_name = zone_cfg.get("scene_name", "conference room")
+        new_scene_name = f"{base_scene_name}-{num_scenes}"
+        if new_cam:
+            logger.info("Registering scene=%s camera=%s via SceneScape REST API …",
+                        new_scene_name, new_cam)
+            scene_uid, cam_uid = _scenescape_import_scene(
+                app_dir, new_scene_name, new_cam)
+            if not scene_uid:
+                # Direct import failed — fall back to scene-import sidecar
+                logger.warning("SceneScape API import failed — falling back to scene-import sidecar")
+                _docker_compose(app_dir, "rm -f -s scene-import")
+                _docker_compose(app_dir, "up -d scene-import")
+                _wait_for_scene_import_completion(timeout=180)
+
+
+    # Wait for the new camera's RTSP stream to be served by MediaMTX.
+    # DLStreamer connects to RTSP at startup; if lp-cams-N is still initialising
+    # when lp-video is force-recreated, DLStreamer silently skips that pipeline.
+    if num_scenes > 1:
+        new_cam = _get_new_camera_name(app_dir, num_scenes)
+        if new_cam:
+            _wait_for_camera_rtsp_ready(new_cam, timeout=60)
+
+    # Recreate only lp-video (Camera_01 baseline) and the NEW lp-video-{N}.
+    # Already-running lp-video-3, lp-video-4, … from previous iterations have
+    # unchanged pipeline configs and keep their RTSP connections — no need to
+    # restart them.  Restarting N containers on every iteration adds O(N) setup
+    # time and disrupts healthy pipelines unnecessarily.
+    # --remove-orphans cleans leftover lp-video-{old} containers from prior runs.
+    logger.info("Recreating DLStreamer container(s) for %d scene(s) …", num_scenes)
+    if num_scenes == 1:
+        # Baseline: only Camera_01's container
+        video_services = "lp-video"
+    else:
+        # Incremental: baseline container + the single new camera container only
+        new_vid_idx = base_camera_count + (num_scenes - 1)
+        video_services = f"lp-video lp-video-{new_vid_idx}"
+    _docker_compose(app_dir, f"up -d --force-recreate --remove-orphans {video_services}")
+
+    # poi-backend subscribes to scenescape/data/camera/+ (wildcard) so it
+    # receives embeddings from all cameras without restart.  MQTT_IMAGE_CAMERAS
+    # only controls the thumbnail-grab strategy (MQTT vs RTSP), not embedding
+    # ingestion.  Skip the force-recreate to avoid resetting FAISS/Redis state
+    # and wasting the 90-second stabilisation window.
+
+    # Wait for DLStreamer to produce first detection (replaces fixed 90s sleep).
+    # For incremental iterations, wait for the NEW camera specifically — Camera_01/02
+    # detections would give a false-positive "pipeline warm" signal while
+    # Camera_01-N's lp-video-N is still initialising.
+    new_cam_for_warmup = _get_new_camera_name(app_dir, num_scenes)
+    _wait_for_first_detection(timeout=wait, poll_interval=3,
+                              camera_filter=new_cam_for_warmup)
+    logger.info("Pipeline warm — adding 10s stabilisation buffer …")
+    time.sleep(10)
 
 
 def _clean_cameras_override(app_dir: str) -> None:
@@ -648,20 +1402,27 @@ def _collect_poi_latency_from_docker_logs(app_dir: str, duration_secs: int = 30)
 
 def _collect_poi_e2e_latency_from_alerts(
     since: Optional[datetime] = None,
+    camera_filter: Optional[str] = None,
 ) -> Dict[str, float]:
     """Compute real end-to-end latency from POI alerts API.
 
-    Each alert contains:
-      - ``mqtt_received_at``: when POI backend received the MQTT message (preferred)
-      - ``timestamp``: DLStreamer frame capture time (fallback, includes pipeline latency)
-      - ``dispatched_at``: when the alert was actually dispatched
+    Uses ``timestamp`` (DLStreamer frame capture time from the MQTT payload)
+    as the start time and ``dispatched_at`` as the end time, giving true
+    end-to-end latency from frame capture → alert dispatch (includes FAISS
+    match overhead but NOT DLStreamer pipeline inference time — that is
+    captured by the VLM metrics logger which uses the same frame timestamp).
 
-    Uses ``mqtt_received_at`` when available to measure POI application latency
-    only, excluding DLStreamer pipeline latency (~6-8s).
+    NOTE: In stream-density mode this function is a fallback only.  The
+    primary path reads per-camera latency directly from the VLM metrics file
+    (``vlm_Person-of-Interest_{camera_id}_avg_ms``) which is written by
+    ``alert_service.py`` via ``user_log_start_time`` / ``log_end_time``.
 
     Args:
         since: If provided, only include alerts dispatched after this time.
                Filters out stale alerts from previous benchmark runs.
+        camera_filter: If provided, only include alerts from this specific camera.
+                       Used in stream-density mode to measure the newly added camera's
+                       latency independently.  When ``None``, all cameras are included.
 
     Returns dict with ``poi_e2e_latency_avg_ms``, ``poi_e2e_latency_max_ms``,
     ``poi_e2e_latency_min_ms``, and ``poi_e2e_alert_count``.
@@ -670,7 +1431,7 @@ def _collect_poi_e2e_latency_from_alerts(
     import urllib.error
 
     try:
-        req = urllib.request.Request("http://localhost:8000/api/v1/alerts")
+        req = urllib.request.Request("http://localhost:8000/api/v1/alerts?limit=100")
         with urllib.request.urlopen(req, timeout=10) as resp:
             alerts = json.loads(resp.read().decode())
     except Exception as e:
@@ -684,20 +1445,29 @@ def _collect_poi_e2e_latency_from_alerts(
 
     latencies_ms: list[float] = []
     skipped = 0
-    used_mqtt_received = 0
-    used_frame_timestamp = 0
+    filtered_camera = 0
     for alert in alerts:
-        # Prefer mqtt_received_at (POI application latency only, excludes
-        # DLStreamer pipeline latency) over timestamp (frame capture time).
-        mqtt_recv = alert.get("mqtt_received_at", "")
-        start_str = mqtt_recv or alert.get("timestamp", "")
+        # Filter by camera when requested (stream-density: test new camera only)
+        if camera_filter:
+            alert_camera = (
+                alert.get("match", {}).get("camera_id")
+                or alert.get("camera_id", "")
+            )
+            if alert_camera != camera_filter:
+                filtered_camera += 1
+                continue
+
+        # Use DLStreamer frame capture timestamp as start (true E2E start).
+        # alert["timestamp"] = payload["timestamp"] from MQTT = frame capture time.
+        # alert["dispatched_at"] = wall-clock when alert was fired after FAISS match.
+        frame_ts = alert.get("timestamp", "")
         dispatched_str = alert.get("dispatched_at", "")
-        if not start_str or not dispatched_str:
+        if not frame_ts or not dispatched_str:
             continue
         try:
-            start_str = start_str.replace("Z", "+00:00")
+            frame_ts = frame_ts.replace("Z", "+00:00")
             dispatched_str = dispatched_str.replace("Z", "+00:00")
-            start = _dt.fromisoformat(start_str)
+            start = _dt.fromisoformat(frame_ts)
             dispatched = _dt.fromisoformat(dispatched_str)
 
             # Normalize both to UTC-aware to avoid mixed tz subtraction errors
@@ -714,15 +1484,14 @@ def _collect_poi_e2e_latency_from_alerts(
             delta_ms = (dispatched_utc - start_utc).total_seconds() * 1000
             if delta_ms >= 0:
                 latencies_ms.append(delta_ms)
-                if mqtt_recv:
-                    used_mqtt_received += 1
-                else:
-                    used_frame_timestamp += 1
         except (ValueError, TypeError):
             continue
 
     if skipped:
         logger.info("Filtered out %d stale alerts (before benchmark start)", skipped)
+    if filtered_camera:
+        logger.debug("Filtered out %d alerts from other cameras (camera_filter=%s)",
+                     filtered_camera, camera_filter)
 
     if not latencies_ms:
         return {}
@@ -733,11 +1502,9 @@ def _collect_poi_e2e_latency_from_alerts(
         "poi_e2e_latency_min_ms": min(latencies_ms),
         "poi_e2e_alert_count": len(latencies_ms),
     }
-    label = "MQTT receive → alert dispatch"
-    if used_frame_timestamp and not used_mqtt_received:
-        label = "frame capture → alert dispatch (includes DLStreamer latency)"
-    elif used_frame_timestamp:
-        label = "start → alert dispatch (mixed sources)"
+    label = "frame capture → alert dispatch (alerts API)"
+    if camera_filter:
+        label = f"camera={camera_filter} " + label
     logger.info(
         "E2E latency (%s): avg=%.0fms, min=%.0fms, max=%.0fms (%d alerts)",
         label,
@@ -842,7 +1609,8 @@ def _save_alert_thumbnails(
 
 
 def _collect_poi_latency_from_metrics_files(
-    results_dir: str, stream_density: bool = False
+    results_dir: str, stream_density: bool = False,
+    since_ms: Optional[int] = None
 ) -> Dict[str, float]:
     """
     Extract POI detection-to-alert latency from vlm_application_metrics files.
@@ -853,7 +1621,9 @@ def _collect_poi_latency_from_metrics_files(
 
     For single benchmarks uses ``get_vlm_application_latency`` (all pairs).
     For stream density uses ``get_vlm_application_latency_stream_density``
-    (last 20 pairs) to reflect current-iteration performance.
+    with ``since_ms`` so only pairs from the current iteration are measured.
+    The file is never deleted between iterations — poi-backend holds an open
+    RotatingFileHandler to it.
     """
     all_stats: Dict[str, float] = {}
     search_dirs = [results_dir, "/tmp"]
@@ -863,7 +1633,8 @@ def _collect_poi_latency_from_metrics_files(
             if not os.path.isdir(d):
                 continue
             try:
-                stats = get_vlm_application_latency_stream_density(d, last_n_pairs=20)
+                stats = get_vlm_application_latency_stream_density(
+                    d, last_n_pairs=20, since_ms=since_ms)
                 if stats:
                     for app_id, avg_ms in stats.items():
                         all_stats[f"vlm_{app_id}_avg_ms"] = avg_ms
@@ -902,8 +1673,12 @@ def _extract_poi_latency(stats: Dict[str, float], metric: str) -> float:
          end   = wall-clock time at alert dispatch (log_end_time).
          This spans: camera frame capture → DLStreamer pipeline → FAISS match
          → alert dispatch.
-      2. Alerts API fallback (``mqtt_received_at`` → ``dispatched_at``) —
-         POI application latency only, excludes DLStreamer pipeline latency.
+         Two unique_id variants are written per alert:
+           * ``person-of-interest`` — aggregate across all cameras
+           * ``{camera_id}``        — per-camera (for stream-density isolation)
+      2. Alerts API fallback (``timestamp`` frame capture → ``dispatched_at``) —
+         same semantic as (1) but parsed from stored alert JSON rather than
+         the live metrics file.
       3. Returns 0 if no data available.
 
     Note: Docker-log-based ``log_detection_to_alert_ms`` is excluded because
@@ -911,15 +1686,29 @@ def _extract_poi_latency(stats: Dict[str, float], metric: str) -> float:
     first-alert gap includes dedup delay (60 s TTL), making it unreliable
     as a per-event latency metric.
     """
-    # Primary: vlm_application_metrics file-based values
-    vlm_values = [v for k, v in stats.items()
-                  if k.startswith("vlm_") and isinstance(v, (int, float)) and v > 0]
+    # Primary: vlm_application_metrics file-based values.
+    # Exclude the aggregate ``person-of-interest`` key — per-camera entries
+    # (e.g., ``vlm_Person-of-Interest_Camera_01-3_avg_ms``) are more precise
+    # in stream-density mode because they isolate each camera's contribution.
+    # If no per-camera entries exist (e.g., first/single run), fall back to
+    # the aggregate ``person-of-interest`` entry.
+    per_camera_values = [
+        v for k, v in stats.items()
+        if k.startswith("vlm_") and "_person-of-interest_" not in k
+        and isinstance(v, (int, float)) and v > 0
+    ]
+    aggregate_values = [
+        v for k, v in stats.items()
+        if k.startswith("vlm_") and "_person-of-interest_" in k
+        and isinstance(v, (int, float)) and v > 0
+    ]
+    vlm_values = per_camera_values or aggregate_values
     if vlm_values:
         if metric == "max":
             return max(vlm_values)
         return mean(vlm_values)
 
-    # Fallback: alerts API E2E latency
+    # Fallback: alerts API E2E latency (frame capture → dispatched_at)
     e2e_avg = stats.get("poi_e2e_latency_avg_ms", 0.0)
     e2e_max = stats.get("poi_e2e_latency_max_ms", 0.0)
     if e2e_avg > 0:
@@ -931,9 +1720,15 @@ def _extract_poi_latency(stats: Dict[str, float], metric: str) -> float:
 
 
 def _clean_metrics(results_dir: str) -> None:
-    """Remove stale metrics files before each measurement iteration."""
+    """Remove stale metrics files before each measurement iteration.
+
+    Note: vlm_application_metrics files are NOT deleted here because poi-backend
+    holds an open RotatingFileHandler to them.  Deleting the file causes the
+    handler to write to an unlinked inode (invisible to rglob) for the rest of
+    the run.  Instead, callers pass a ``since_ms`` filter so each iteration
+    reads only its own pairs from the cumulative file.
+    """
     patterns = [
-        "vlm_application_metrics*.txt",
         "vlm_performance_metrics*.txt",
     ]
     for d in [results_dir, "/tmp"]:
@@ -945,27 +1740,69 @@ def _clean_metrics(results_dir: str) -> None:
                     pass
 
 
+def _reset_alert_dedup() -> None:
+    """Clear Redis alert dedup and history via the poi-backend REST API.
+
+    Between stream-density iterations, the Redis dedup keys (``alert:sent:*``)
+    must be reset so the newly added camera can fire fresh alerts.
+
+    Background: the dedup key is the object UUID — shared across all cameras
+    tracking the same physical person.  If Camera_01 already alerted on
+    UUID-X within ALERT_DEDUP_TTL seconds, Camera_01-3 would be silently
+    suppressed when it detects the same person, preventing any alert from
+    the new camera regardless of its latency.
+
+    ``DELETE /api/v1/alerts`` clears both the recent-alerts list AND all
+    ``alert:sent:*`` / ``alert:*`` Redis keys so every iteration starts fresh.
+    """
+    import urllib.request
+    try:
+        req = urllib.request.Request(
+            "http://localhost:8000/api/v1/alerts",
+            method="DELETE",
+        )
+        with urllib.request.urlopen(req, timeout=10) as resp:
+            result = resp.read().decode()
+        logger.info("Alert dedup reset: %s", result)
+    except Exception as e:
+        logger.warning("Could not reset alert dedup (non-fatal): %s", e)
+
+
 # ---------------------------------------------------------------------------
 # POI Stream Density Runner
 # ---------------------------------------------------------------------------
 
 class POIStreamDensity:
     """
-    Iteratively increases the number of camera pipelines until end-to-end
-    POI detection-to-alert latency exceeds *target_latency_ms*.
-
-    What gets scaled at each iteration:
-      - RTSP camera streams (lp-cams-N)    → real video feed
-      - DLStreamer pipelines (lp-video)     → inference per camera
-      - SceneScape scenes (scene-import)    → scene clones in SceneScape DB
-      - poi-backend camera subscriptions    → MQTT + RTSP for new cameras
-
-    What stays running untouched:
+    Iteratively increases the number of camera/scene pipelines until the
+    newly added camera's end-to-end detection-to-alert latency exceeds
+    *target_latency_ms*.  This determines the maximum number of cameras
+    the hardware can support for this application.
+
+    Iteration logic
+    ---------------
+    *Baseline* (num_scenes=1): Camera_01 and Camera_02 are already running.
+    Any alert confirms the pipeline is functional.
+
+    *Incremental* (num_scenes > 1): each iteration adds exactly one new
+    camera (Camera_01-3, Camera_01-4, …) and one cloned scene.  The
+    benchmark waits specifically for an alert from that new camera, then
+    measures its E2E latency.  Only if latency ≤ target does the next
+    camera get added.
+
+    What gets added per iteration (num_scenes > 1)
+    ------------------------------------------------
+      - One new RTSP stream  (lp-cams-N Docker service)
+      - One extra DLStreamer pipeline config
+      - One new scene clone in SceneScape (scene-import re-run)
+      - lp-video (DLStreamer) restarted with the updated config
+
+    What stays running untouched
+    -----------------------------
+      - poi-backend  ← subscribes via MQTT wildcard ``scenescape/data/camera/+``
+        so it automatically processes every new camera without a restart.
       - SceneScape core (web, controller, broker, ntpserv, pgserver, vdms)
-      - poi-redis          (metadata store)
-      - poi-alert-service  (alert fan-out)
-      - poi-ui             (React frontend)
-      - mediaserver        (RTSP relay)
+      - poi-redis, poi-alert-service, poi-ui, mediaserver
     """
 
     MEMORY_SAFETY_PERCENT = 90
@@ -984,6 +1821,7 @@ def __init__(
         single_run_scenes: int = 1,
         benchmark_duration: int = 120,
         resource_config: str = "",
+        max_alert_wait: int = 180,
     ):
         self.app_dir = os.path.abspath(app_dir)
         self.target_latency_ms = target_latency_ms
@@ -997,6 +1835,20 @@ def __init__(
         self.single_run_scenes = single_run_scenes
         self.benchmark_duration = benchmark_duration
         self.resource_config = resource_config
+        # max total seconds to wait for an alert from the new camera per iteration;
+        # should cover at least 2-3 full video cycles to handle phase offset.
+        # Increase via MAX_ALERT_WAIT env var or --max_alert_wait flag for CPU mode.
+        self.max_alert_wait = max_alert_wait
+        self.latency_metric = latency_metric
+        self.scene_increment = scene_increment
+        self.init_duration = init_duration
+        self.stabilise_duration = stabilise_duration
+        self.results_dir = os.path.abspath(results_dir)
+        self.max_iterations = max_iterations
+        self.single_run = single_run
+        self.single_run_scenes = single_run_scenes
+        self.benchmark_duration = benchmark_duration
+        self.resource_config = resource_config
         os.makedirs(self.results_dir, exist_ok=True)
 
     def _services_running(self) -> bool:
@@ -1009,18 +1861,21 @@ def _services_running(self) -> bool:
                 return False
         return True
 
-    def _wait_for_alert_or_timeout(self, duration: int) -> None:
-        """Poll for alerts during single benchmark, exit early on first alert.
+    def _wait_for_alert_or_timeout(self, duration: int,
+                                    since: Optional[datetime] = None) -> bool:
+        """Poll for new alerts, exit early on first alert after *since*.
+
+        Used both in single-benchmark mode (time-to-first-alert) and in
+        stream-density mode (ensure at least one alert before data collection).
 
-        For single benchmarks the goal is to measure time-to-first-alert.
-        Instead of sleeping the full duration, poll every 5 seconds and
-        return as soon as at least one alert is found.
+        Returns True if a fresh alert was found, False if duration expired.
         """
         import urllib.request
         import urllib.error
 
         poll_interval = 5
         elapsed = 0
+        since_ts = since or datetime.utcnow()
         logger.info("Waiting up to %ds for alert (polling every %ds) …",
                      duration, poll_interval)
 
@@ -1033,20 +1888,34 @@ def _wait_for_alert_or_timeout(self, duration: int) -> None:
                 req = urllib.request.Request("http://localhost:8000/api/v1/alerts")
                 with urllib.request.urlopen(req, timeout=10) as resp:
                     alerts = json.loads(resp.read().decode())
-                if isinstance(alerts, list) and len(alerts) > 0:
-                    logger.info("Alert received after %ds — stopping early", elapsed)
+                # Filter to alerts created after iteration start
+                fresh = [
+                    a for a in (alerts if isinstance(alerts, list) else [])
+                    if a.get("created_at", a.get("timestamp", "")) > since_ts.strftime("%Y-%m-%dT%H:%M:%S")
+                ]
+                if fresh:
+                    logger.info("Fresh alert received after %ds — stopping early", elapsed)
                     # Brief extra wait for metrics files to flush
                     time.sleep(5)
-                    return
+                    return True
             except Exception:
                 pass
 
-            logger.info("No alerts yet (%d/%ds elapsed)", elapsed, duration)
+            logger.info("No fresh alerts yet (%d/%ds elapsed)", elapsed, duration)
 
-        logger.info("Benchmark duration reached (%ds) — collecting results", duration)
+        logger.info("Alert wait duration reached (%ds) — continuing anyway", duration)
+        return False
 
     def run(self) -> StreamDensityResult:
-        """Execute the POI stream-density loop."""
+        """Execute the POI stream-density loop.
+
+        For each iteration:
+          1. Add one new camera + scene (num_scenes > 1) or use base cameras (num_scenes = 1).
+          2. Wait for an alert specifically from the NEW camera.
+          3. Compute E2E latency filtered to that camera's alerts.
+          4. If latency ≤ target → add next camera and repeat.
+             If no alert received or latency > target → report max_scenes and stop.
+        """
         self._print_header()
         result = StreamDensityResult(target_latency_ms=self.target_latency_ms)
 
@@ -1055,8 +1924,13 @@ def run(self) -> StreamDensityResult:
         best: Optional[IterationResult] = None
 
         for iteration in range(1, max_iter + 1):
+            # Determine which camera is newly added in this iteration.
+            # None → baseline (num_scenes=1, Camera_01 + Camera_02 already present).
+            new_camera = _get_new_camera_name(self.app_dir, num_scenes)
+
             print(f"\n{'='*70}")
-            print(f"Iteration {iteration}: Testing {num_scenes} scene(s)")
+            print(f"Iteration {iteration}: Testing {num_scenes} scene(s)  "
+                  f"[new camera: {new_camera or 'N/A (baseline)'}]")
             print(f"{'='*70}")
 
             if not self._memory_safe():
@@ -1066,6 +1940,12 @@ def run(self) -> StreamDensityResult:
             # Record iteration start time for filtering stale alerts
             iteration_start = datetime.utcnow()
 
+            # Reset Redis alert dedup so the new camera can fire fresh alerts.
+            # Without this, UUID-based dedup from previous iterations would
+            # suppress alerts from Camera_01-3 if the same person was already
+            # alerted on Camera_01 (dedup key = object UUID, not camera-specific).
+            _reset_alert_dedup()
+
             # Clean old metrics before each measurement
             _clean_metrics(self.results_dir)
 
@@ -1073,29 +1953,82 @@ def run(self) -> StreamDensityResult:
                 # Single benchmark: services already up, skip scaling
                 logger.info("Services already running — skipping scaling for single benchmark")
             else:
-                # Scale to desired scene count
+                # Scale to desired scene count (adds new camera + scene)
                 _scale_pipeline_services(self.app_dir, num_scenes, wait=self.init_duration,
                                          resource_config=self.resource_config)
 
-            # Wait for data collection
-            if self.single_run:
-                # Single benchmark: poll for alerts with early exit
-                self._wait_for_alert_or_timeout(self.benchmark_duration)
+            # ── Wait for alert from the newly added camera ──────────────────
+            # For num_scenes=1 (baseline) there is no "new" camera, so we fall
+            # back to the generic any-camera wait.  For num_scenes>1 we wait
+            # specifically for the new camera to prove it is fully active.
+            #
+            # The wait retries in init_duration windows until either an alert
+            # is received OR total wait exceeds max_alert_wait.  This is
+            # important for CPU mode where inference is slower and the POI
+            # face may only appear once per video loop (~55s), so a single
+            # 45s window is not guaranteed to cover a full video cycle.
+            if new_camera:
+                # Camera-specific wait: must see an alert from the new camera
+                if self.single_run:
+                    _wait_for_alert_from_camera(
+                        new_camera, self.benchmark_duration, since=iteration_start,
+                    )
+                else:
+                    max_alert_wait = self.max_alert_wait
+                    total_waited = 0
+                    got_alert = False
+                    while not got_alert and total_waited < max_alert_wait:
+                        window = min(self.init_duration, max_alert_wait - total_waited)
+                        got_alert = _wait_for_alert_from_camera(
+                            new_camera, window, since=iteration_start,
+                        )
+                        total_waited += window
+                        if not got_alert and total_waited < max_alert_wait:
+                            logger.info(
+                                "No alert from camera=%s yet — retrying "
+                                "(%ds elapsed, max=%ds) …",
+                                new_camera, total_waited, max_alert_wait,
+                            )
+                    if not got_alert:
+                        logger.warning(
+                            "No alert from new camera=%s within max_alert_wait=%ds",
+                            new_camera, max_alert_wait,
+                        )
+                    logger.info("Collecting data for %ds …", self.stabilise_duration)
+                    time.sleep(self.stabilise_duration)
             else:
-                # Stream density: fixed stabilise wait per iteration
-                logger.info("Collecting data for %ds …", self.stabilise_duration)
-                time.sleep(self.stabilise_duration)
+                # Baseline iteration: any alert is acceptable
+                if self.single_run:
+                    self._wait_for_alert_or_timeout(self.benchmark_duration,
+                                                    since=iteration_start)
+                else:
+                    self._wait_for_alert_or_timeout(self.init_duration,
+                                                    since=iteration_start)
+                    logger.info("Collecting data for %ds …", self.stabilise_duration)
+                    time.sleep(self.stabilise_duration)
 
             # Use actual elapsed time for log collection window
             elapsed_seconds = int((datetime.utcnow() - iteration_start).total_seconds())
             log_window = elapsed_seconds if self.single_run else self.stabilise_duration
 
             # Collect latency from metrics files + docker logs
+            # Use calendar.timegm to convert naive UTC datetime → epoch ms correctly.
+            # datetime.timestamp() treats naive datetimes as LOCAL time, giving a
+            # 5.5h offset on IST systems that makes since_ms far too early.
+            iter_start_ms = int(calendar.timegm(iteration_start.timetuple()) * 1000)
             log_stats = _collect_poi_latency_from_docker_logs(
                 self.app_dir, log_window)
             file_stats = _collect_poi_latency_from_metrics_files(
-                self.results_dir, stream_density=not self.single_run)
-            e2e_stats = _collect_poi_e2e_latency_from_alerts(since=iteration_start)
+                self.results_dir, stream_density=not self.single_run,
+                since_ms=iter_start_ms if not self.single_run else None)
+
+            # E2E latency: filtered to the new camera when in incremental mode,
+            # so pass/fail reflects the NEW camera's responsiveness — not the
+            # aggregate of all previously-running cameras.
+            e2e_stats = _collect_poi_e2e_latency_from_alerts(
+                since=iteration_start,
+                camera_filter=new_camera,   # None → all cameras (baseline)
+            )
 
             # Save alert thumbnails to results directory
             _save_alert_thumbnails(self.results_dir, iteration=iteration,
@@ -1122,39 +2055,68 @@ def run(self) -> StreamDensityResult:
                     int(stats.get("poi_detections", 0)) / num_scenes
                     if num_scenes > 0 else 0
                 ),
+                new_camera=new_camera or "",
             )
 
             self._print_iteration(it_result)
 
-            # Pass/fail based on latency threshold
-            latency_ok = latency > 0 and latency <= self.target_latency_ms
-            has_detections = it_result.actual_detections > 0
-            has_matches = int(stats.get("poi_matches", 0)) > 0
-
-            if latency == 0 and has_matches:
-                # Matches found but latency not measurable (sub-second)
-                it_result.passed = True
-                best = it_result
-                print("  ✓ PASSED  (matches found, latency < 1s)")
-            elif latency == 0 and has_detections and not has_matches:
-                # Pipeline works but target person not in frame during window
-                it_result.passed = True
-                best = it_result
-                print(f"  ✓ PASSED  ({it_result.actual_detections} detections, "
-                      "no matches — target not in frame during window)")
-            elif latency == 0:
-                print("  ⚠ NO DATA – no detections collected")
-                if iteration > 1:
+            # ── Pass / fail decision ────────────────────────────────────────
+            # When a new_camera is defined the benchmark REQUIRES an alert from
+            # that specific camera.  A missing alert means the system cannot
+            # keep up with the additional pipeline load — treat as FAIL.
+            has_camera_alert = bool(e2e_stats)          # camera-filtered → non-empty means alert received
+            has_detections   = it_result.actual_detections > 0
+            has_matches      = int(stats.get("poi_matches", 0)) > 0
+
+            if new_camera:
+                # ── Incremental camera mode ──────────────────────────────────
+                if not has_camera_alert:
+                    it_result.passed = False
+                    print(f"  ✗ NO ALERT from new camera={new_camera} — "
+                          "system cannot process additional pipeline load")
+                    result.iterations.append(it_result)
+                    break
+                elif latency <= 0:
+                    # Alert received but latency sub-measurable (< 1 s)
+                    it_result.passed = True
+                    best = it_result
+                    print(f"  ✓ PASSED  (camera={new_camera} alert received, latency < 1s)")
+                elif latency <= self.target_latency_ms:
+                    it_result.passed = True
+                    best = it_result
+                    print(f"  ✓ PASSED  (camera={new_camera} "
+                          f"latency={latency:.0f}ms ≤ {self.target_latency_ms:.0f}ms)")
+                else:
+                    it_result.passed = False
+                    print(f"  ✗ FAILED  (camera={new_camera} "
+                          f"latency={latency:.0f}ms > {self.target_latency_ms:.0f}ms)")
+                    result.iterations.append(it_result)
                     break
-            elif latency_ok:
-                it_result.passed = True
-                best = it_result
-                print(f"  ✓ PASSED  (latency={latency:.0f}ms ≤ {self.target_latency_ms:.0f}ms)")
             else:
-                it_result.passed = False
-                print(f"  ✗ FAILED  (latency {latency:.0f}ms > {self.target_latency_ms:.0f}ms)")
-                result.iterations.append(it_result)
-                break
+                # ── Baseline iteration (num_scenes=1) — any alert ────────────
+                latency_ok = latency > 0 and latency <= self.target_latency_ms
+                if latency == 0 and has_matches:
+                    it_result.passed = True
+                    best = it_result
+                    print("  ✓ PASSED  (baseline: matches found, latency < 1s)")
+                elif latency == 0 and has_detections and not has_matches:
+                    it_result.passed = True
+                    best = it_result
+                    print(f"  ✓ PASSED  (baseline: {it_result.actual_detections} detections, "
+                          "no matches — target not in frame during window)")
+                elif latency == 0:
+                    print("  ⚠ NO DATA – no detections collected")
+                    if iteration > 1:
+                        break
+                elif latency_ok:
+                    it_result.passed = True
+                    best = it_result
+                    print(f"  ✓ PASSED  (baseline: latency={latency:.0f}ms ≤ {self.target_latency_ms:.0f}ms)")
+                else:
+                    it_result.passed = False
+                    print(f"  ✗ FAILED  (baseline: latency {latency:.0f}ms > {self.target_latency_ms:.0f}ms)")
+                    result.iterations.append(it_result)
+                    break
 
             result.iterations.append(it_result)
             num_scenes += self.scene_increment
@@ -1193,6 +2155,8 @@ def _print_header(self) -> None:
 
     def _print_iteration(self, it: IterationResult) -> None:
         print(f"\n  Scenes:      {it.num_scenes}")
+        if it.new_camera:
+            print(f"  New Camera:  {it.new_camera}")
         print(f"  Latency:     {it.latency_ms:.0f}ms")
         print(f"  Detections:  {it.actual_detections} "
               f"({it.detections_per_scene:.1f}/scene)")
@@ -1215,17 +2179,20 @@ def _print_summary(self, result: StreamDensityResult) -> None:
         print(f"  Met Target:      {'Yes' if result.met_target else 'No'}")
         if result.best_iteration:
             print(f"  Best Latency:    {result.best_iteration.latency_ms:.0f}ms "
-                  f"@ {result.best_iteration.num_scenes} scene(s)")
+                  f"@ {result.best_iteration.num_scenes} scene(s)"
+                  + (f"  [{result.best_iteration.new_camera}]"
+                     if result.best_iteration.new_camera else ""))
         print()
-        print(f"{'Scenes':<10}{'Latency':<12}{'Detections':<14}"
+        print(f"{'Scenes':<10}{'Camera':<16}{'Latency':<12}{'Detections':<14}"
               f"{'Alerts':<10}{'Mem %':<10}{'CPU %':<10}{'Status':<10}")
-        print("-" * 76)
+        print("-" * 92)
         for it in result.iterations:
             status = "✓ PASS" if it.passed else "✗ FAIL"
-            print(f"{it.num_scenes:<10}{it.latency_ms:<12.0f}"
+            cam = it.new_camera or "baseline"
+            print(f"{it.num_scenes:<10}{cam:<16}{it.latency_ms:<12.0f}"
                   f"{it.actual_detections:<14}{it.alerts_generated:<10}"
                   f"{it.memory_percent:<10.1f}{it.cpu_percent:<10.1f}{status}")
-        print("=" * 70)
+        print("=" * 92)
 
     def _export(self, result: StreamDensityResult) -> None:
         ts = datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -1239,6 +2206,7 @@ def _export(self, result: StreamDensityResult) -> None:
             "iterations": [
                 {
                     "num_scenes": it.num_scenes,
+                    "new_camera": it.new_camera,
                     "latency_ms": round(it.latency_ms, 2),
                     "passed": it.passed,
                     "memory_percent": round(it.memory_percent, 1),
@@ -1268,10 +2236,11 @@ def _export(self, result: StreamDensityResult) -> None:
         csv_path = os.path.join(self.results_dir, f"poi_stream_density_{ts}.csv")
         with open(csv_path, "w", newline="") as f:
             w = csv.writer(f)
-            w.writerow(["scenes", "latency_ms", "detections", "alerts",
+            w.writerow(["scenes", "new_camera", "latency_ms", "detections", "alerts",
                          "detections_per_scene", "passed", "memory_pct", "cpu_pct"])
             for it in result.iterations:
-                w.writerow([it.num_scenes, f"{it.latency_ms:.0f}",
+                w.writerow([it.num_scenes, it.new_camera or "baseline",
+                            f"{it.latency_ms:.0f}",
                             it.actual_detections, it.alerts_generated,
                             f"{it.detections_per_scene:.1f}",
                             it.passed, f"{it.memory_percent:.1f}",
@@ -1297,6 +2266,7 @@ def cmd_run(args) -> None:
         single_run_scenes=args.scenes,
         benchmark_duration=args.benchmark_duration,
         resource_config=args.resource_config,
+        max_alert_wait=args.max_alert_wait,
     )
     result = tester.run()
     sys.exit(0 if result.met_target else 1)
@@ -1335,11 +2305,15 @@ def main() -> None:
     target_latency = _env_float("TARGET_LATENCY_MS", 2000)
     latency_metric = _env_str("LATENCY_METRIC", "avg")
     scene_increment = _env_int("SCENE_INCREMENT", 1)
-    init_duration = _env_int("INIT_DURATION", 90)
+    init_duration = _env_int("INIT_DURATION", 45)
     stabilise_duration = _env_int("STABILISE_DURATION", 30)
     benchmark_duration = _env_int("BENCHMARK_DURATION", 120)
     results_dir = _env_str("RESULTS_DIR", "./results")
     max_iterations = _env_int("MAX_ITERATIONS", 50)
+    # Default max_alert_wait covers ~3 full video cycles (video ≈ 55s) plus
+    # inference warmup, giving enough time for CPU-mode pipelines to generate
+    # a face match on the new camera regardless of video phase offset.
+    max_alert_wait = _env_int("MAX_ALERT_WAIT", 180)
 
     parser = argparse.ArgumentParser(
         description="POI Stream Density Benchmark",
@@ -1370,6 +2344,13 @@ def main() -> None:
                             "Passed as RESOURCE_CONFIG to init.sh on every "
                             "re-init so device and precision are preserved "
                             "across stream-density iterations.")
+    p_run.add_argument("--max_alert_wait", type=int, default=max_alert_wait,
+                       help="Max total seconds to wait for an alert from the "
+                            "new camera per iteration (default: 180). "
+                            "The wait retries in init_duration windows until "
+                            "an alert is received or this timeout expires. "
+                            "Increase for CPU mode where inference is slower "
+                            "and a full video cycle (~55s) must complete.")
     p_run.set_defaults(func=cmd_run)
 
     # --- generate ---

From 34f65391bfbb06e09c857bd31dfa160a857851ae Mon Sep 17 00:00:00 2001
From: Suryam Agarwal <suryam.agarwal@intel.com>
Date: Tue, 9 Jun 2026 21:23:09 +0530
Subject: [PATCH 2/2] Refactor POI stream density benchmark script

---
 benchmark-scripts/poi_stream_density.py | 3007 ++++++-----------------
 1 file changed, 759 insertions(+), 2248 deletions(-)

diff --git a/benchmark-scripts/poi_stream_density.py b/benchmark-scripts/poi_stream_density.py
index 1110587..a9f20cd 100644
--- a/benchmark-scripts/poi_stream_density.py
+++ b/benchmark-scripts/poi_stream_density.py
@@ -1,1546 +1,667 @@
 #!/usr/bin/env python3
-# Copyright (C) 2026 Intel Corporation.
-# SPDX-License-Identifier: Apache-2.0
-
 """
-Person of Interest (POI) Stream Density Benchmark
-
-Iteratively increases the number of camera/scene pipelines until end-to-end
-POI detection-to-alert latency exceeds a configurable threshold.
-
-The POI pipeline flow measured:
-  Person detected on camera (MQTT) → face embedding extracted → FAISS match
-  → alert dispatched to alert-service
-
-Scaling is achieved by:
-  1. Updating ``stream_density`` in ``zone_config.json``
-  2. Re-running ``init.sh`` to regenerate ``.env`` + DLStreamer pipeline config
-  3. Generating ``docker-compose.cameras.yaml`` with additional RTSP camera
-     streams (``lp-cams-N``) for each new camera
-  4. Restarting ``scene-import`` (imports cloned scenes into SceneScape),
-     ``lp-cams-*``, ``lp-video`` (DLStreamer), and ``poi-backend``
-  5. SceneScape core services (web, controller, broker, etc.), poi-redis,
-     poi-alert-service, and poi-ui stay running
-
-Latency is measured from ``vlm_application_metrics`` files written by the
-``vlm_metrics_logger`` package.  In ``alert_service.py``:
-  - **start**: ``user_log_start_time(frame_ts_ms, ...)`` — the DLStreamer frame
-    capture timestamp from the MQTT payload (epoch ms).  This is the moment
-    the camera frame was decoded by the pipeline.
-  - **end**: ``log_end_time(...)`` — wall-clock time at alert dispatch.
-
-This measures **true end-to-end latency**:
-  camera frame capture → DLStreamer decode/detect/reid → MQTT → FAISS match → alert dispatch.
+Stream Density Benchmark - Performance Tools
 
-Sub-commands
-------------
-run       Full automated stream-density loop.
-generate  Write a ``docker-compose.cameras.yaml`` override for *N* scenes.
-clean     Revert overrides and set ``stream_density`` back to 1.
-down      Tear down all services.
+This file contains ALL benchmark logic including:
+- Orchestration
+- Latency calculation from metrics
+- Pass/fail decisions
+- Scaling decisions
 
-Environment variables
----------------------
-TARGET_LATENCY_MS       Latency threshold in ms  (default: 2000)
-LATENCY_METRIC          Which metric to compare: avg | max  (default: avg)
-SCENE_INCREMENT         Scenes to add per iteration  (default: 1)
-INIT_DURATION           Seconds per alert-wait window per retry  (default: 45)
-MAX_ALERT_WAIT          Max total seconds to wait for alert from new camera
-                        (retries in INIT_DURATION windows until received or
-                        timeout).  Default: 180 — covers ~3 video cycles (~55s
-                        each).  Increase for CPU mode where inference is slower.
-STABILISE_DURATION      Extra wait for pipeline to stabilise  (default: 30)
-BENCHMARK_DURATION      Max wait for single benchmark in seconds  (default: 120)
-RESULTS_DIR             Where to write results  (default: ./results)
-MAX_ITERATIONS          Safety cap on iterations  (default: 50)
-RESOURCE_CONFIG         Path to device resource config file relative to app_dir
-                        (e.g. configs/res/all-gpu.env).  Passed to init.sh on
-                        every re-init so device and model precision are preserved
-                        across stream-density iterations.  Prefer passing
-                        --resource_config on the CLI (set automatically by
-                        ``make benchmark DEVICE=...``).
+POI only provides raw data via files and APIs.
 """
 
 import argparse
-import csv
-import glob
 import calendar
+import csv
 import json
 import logging
+import glob
 import os
 import re
-import shlex
-import shutil
 import subprocess
 import sys
 import time
-from collections import defaultdict
-from dataclasses import dataclass, field
-from datetime import datetime
-from pathlib import Path
-from statistics import mean, median
-from typing import Dict, List, Optional
-
 import psutil
+from datetime import datetime, UTC
+from dataclasses import dataclass, asdict
+from typing import Dict, List, Optional, Any
+from pathlib import Path
 
-from consolidate_multiple_run_of_metrics import (
-    get_vlm_application_latency,
-    get_vlm_application_latency_stream_density,
-)
-
-# ---------------------------------------------------------------------------
-# Logging
-# ---------------------------------------------------------------------------
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s  %(levelname)-8s  %(message)s",
-)
+logging.basicConfig(level=logging.INFO, format="%(asctime)s  %(levelname)-8s  %(message)s")
 logger = logging.getLogger(__name__)
 
-# ---------------------------------------------------------------------------
-# Environment helpers
-# ---------------------------------------------------------------------------
-
-def _env_int(name: str, default: int) -> int:
-    v = os.environ.get(name)
-    if v is not None:
-        try:
-            return int(v)
-        except ValueError:
-            logger.warning("Invalid %s=%s, using default %d", name, v, default)
-    return default
-
-
-def _env_float(name: str, default: float) -> float:
-    v = os.environ.get(name)
-    if v is not None:
-        try:
-            return float(v)
-        except ValueError:
-            logger.warning("Invalid %s=%s, using default %.1f", name, v, default)
-    return default
-
-
-def _env_str(name: str, default: str) -> str:
-    return os.environ.get(name, default)
-
-
-# ---------------------------------------------------------------------------
-# Data classes
-# ---------------------------------------------------------------------------
 
 @dataclass
-class IterationResult:
-    """Metrics captured during one iteration."""
-    num_scenes: int
-    latency_ms: float  # chosen metric (avg or max)
-    latency_details: Dict[str, float] = field(default_factory=dict)
-    passed: bool = False
-    memory_percent: float = 0.0
-    cpu_percent: float = 0.0
-    timestamp: str = ""
-    # Throughput metrics
-    actual_detections: int = 0
-    alerts_generated: int = 0
-    detections_per_scene: float = 0.0
-    # Camera under test for this iteration (empty for baseline)
-    new_camera: str = ""
+class BenchmarkConfig:
+    """Configuration for benchmark run"""
+    target_latency_ms: float = 2000
+    latency_metric: str = "avg"  # avg or max
+    scene_increment: int = 1
+    init_duration: int = 45
+    stabilise_duration: int = 30
+    max_iterations: int = 50
+    max_alert_wait: int = 180
+    benchmark_duration: int = 120
+    single_run: bool = False
+    single_run_scenes: int = 1
+    results_dir: str = "./results"
 
 
 @dataclass
-class StreamDensityResult:
-    """Aggregate result of the full stream-density run."""
-    target_latency_ms: float = 2000.0
-    max_scenes: int = 0
-    met_target: bool = False
-    iterations: List[IterationResult] = field(default_factory=list)
-    best_iteration: Optional[IterationResult] = None
-
-
-# ---------------------------------------------------------------------------
-# Helpers – zone_config.json manipulation
-# ---------------------------------------------------------------------------
-
-def _zone_config_path(app_dir: str) -> Path:
-    return Path(app_dir) / "configs" / "zone_config.json"
-
-
-def _read_zone_config(app_dir: str) -> dict:
-    p = _zone_config_path(app_dir)
-    with open(p) as f:
-        return json.load(f)
-
-
-def _write_zone_config(app_dir: str, cfg: dict) -> None:
-    p = _zone_config_path(app_dir)
-    bak = p.with_suffix(".json.bak")
-    if not bak.exists():
-        shutil.copy2(p, bak)
-    with open(p, "w") as f:
-        json.dump(cfg, f, indent=2)
-    logger.info("Updated %s  (stream_density=%s)", p, cfg.get("stream_density"))
-
-
-def _set_stream_density(app_dir: str, density: int) -> None:
-    cfg = _read_zone_config(app_dir)
-    cfg["stream_density"] = density
-    _write_zone_config(app_dir, cfg)
-
-
-# ---------------------------------------------------------------------------
-# Helpers – docker compose
-# ---------------------------------------------------------------------------
-
-def _compose_cmd(app_dir: str) -> str:
-    """Build a combined compose invocation spanning SceneScape + POI.
-
-    Includes scenescape-overrides, NPU overlay (when NPU device is active),
-    and cameras override when present.
-    The cameras override may reference POI services (e.g. poi-backend
-    environment), so the POI compose file must be included too.
-    """
-    scenescape_dir = str(Path(app_dir) / ".." / "scenescape")
-    scenescape_compose = os.path.join(scenescape_dir, "docker-compose.yaml")
-    overrides = os.path.join(app_dir, "docker-compose.scenescape-overrides.yml")
-    poi_compose = os.path.join(app_dir, "docker-compose.yml")
-    env_file = os.path.join(app_dir, "docker", ".env")
-
-    parts = [
-        "docker compose",
-        f"--project-directory {shlex.quote(app_dir)}",
-        f"--env-file {shlex.quote(env_file)}",
-        f"-f {shlex.quote(scenescape_compose)}",
-    ]
-    if os.path.isfile(overrides):
-        parts.append(f"-f {shlex.quote(overrides)}")
-    # Mirror Makefile logic: include NPU overlay when RESOURCE_CONFIG contains "npu".
-    # Without this, force-recreating lp-video strips /dev/accel, causing
-    # "[NPU_VCL] Unrecognized device ID! 0x0x0" on every stream-density iteration.
-    npu_overlay = os.path.join(app_dir, "docker-compose.npu-overrides.yml")
-    if os.path.isfile(npu_overlay) and _is_npu_device(app_dir):
-        parts.append(f"-f {shlex.quote(npu_overlay)}")
-    parts.append(f"-f {shlex.quote(poi_compose)}")
-    # Layer in cameras override if it exists
-    cameras_override = os.path.join(app_dir, "docker", "docker-compose.cameras.yaml")
-    if os.path.isfile(cameras_override):
-        parts.append(f"-f {shlex.quote(cameras_override)}")
-    return " ".join(parts)
-
-
-def _poi_compose_cmd(app_dir: str) -> str:
-    """Build compose command for POI-only services."""
-    poi_compose = os.path.join(app_dir, "docker-compose.yml")
-    return f"docker compose -f {shlex.quote(poi_compose)}"
-
-
-def _docker_compose(app_dir: str, action: str) -> int:
-    """Run a combined compose action (SceneScape + POI)."""
-    if "up" in action:
-        subprocess.run(
-            "docker network create storewide-lp",
-            shell=True, capture_output=True,
-        )
-    cmd = f"{_compose_cmd(app_dir)} {action}"
-    logger.info("Running: %s", cmd)
-    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
-    if result.returncode != 0 and "down" not in action:
-        logger.warning("docker compose stderr:\n%s", result.stderr[-500:])
-    return result.returncode
-
-
-def _poi_compose(app_dir: str, action: str) -> int:
-    """Run a compose action against POI-only services."""
-    cmd = f"{_poi_compose_cmd(app_dir)} {action}"
-    logger.info("Running: %s", cmd)
-    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
-    if result.returncode != 0 and "down" not in action:
-        logger.warning("docker compose stderr:\n%s", result.stderr[-500:])
-    return result.returncode
-
-
-def _run_cmd(cmd: str) -> subprocess.CompletedProcess:
-    logger.debug("Running: %s", cmd)
-    return subprocess.run(cmd, shell=True, capture_output=True, text=True)
-
-
-# ---------------------------------------------------------------------------
-# Helpers – SceneScape scene cleanup via REST API
-# ---------------------------------------------------------------------------
-
-# ---------------------------------------------------------------------------
-# SceneScape REST API helpers
-# ---------------------------------------------------------------------------
-
-def _scenescape_get_client(app_dir: str):
-    """Authenticate with SceneScape and return (base_url, ssl_ctx, token).
-
-    Reads SUPASS from docker/.env and authenticates as admin.
-    Returns (None, None, None) on failure — callers must handle gracefully.
-    """
-    import ssl
-    import urllib.request
-
-    env_file = os.path.join(app_dir, "docker", ".env")
-    supass = ""
-    if os.path.isfile(env_file):
-        for line in open(env_file):
-            if line.startswith("SUPASS="):
-                supass = line.strip().split("=", 1)[1]
-                break
-    if not supass:
-        logger.warning("Could not read SUPASS from docker/.env — SceneScape API unavailable")
-        return None, None, None
-
-    # Read base_url from zone_config.json; default to https://localhost
-    try:
-        zone_cfg = _read_zone_config(app_dir)
-        base_url = zone_cfg.get("scenescape_api", {}).get("base_url", "https://localhost").rstrip("/")
-        base_url = base_url + "/api/v1"
-    except Exception:
-        base_url = "https://localhost/api/v1"
-
-    ctx = ssl.create_default_context()
-    ctx.check_hostname = False
-    ctx.verify_mode = ssl.CERT_NONE
-
-    auth_data = json.dumps({"username": "admin", "password": supass}).encode()
-    req = urllib.request.Request(
-        f"{base_url}/auth", data=auth_data,
-        headers={"Content-Type": "application/json"})
-    try:
-        with urllib.request.urlopen(req, context=ctx, timeout=15) as resp:
-            token = json.loads(resp.read()).get("token", "")
-    except Exception as e:
-        logger.warning("SceneScape authentication failed: %s", e)
-        return None, None, None
-
-    if not token:
-        logger.warning("SceneScape auth returned empty token")
-        return None, None, None
-
-    return base_url, ctx, token
-
-
-def _clone_scene_zip(base_zip_path: str, scene_name: str, camera_name: str) -> bytes:
-    """Clone *base_zip_path* with a new scene name and camera name.
-
-    Returns the cloned ZIP as raw bytes (suitable for multipart upload).
-    Replicates the logic in scenescape/webserver/stream_density.py so that the
-    benchmark can call the SceneScape import-scene API directly from the host
-    without spinning up a Docker sidecar container.
-
-    The import-scene endpoint matches the background image by checking whether
-    the scene name appears in the image filename, so we rename the image to
-    ``<scene_name>.<ext>`` in the output ZIP.
-    """
-    import io
-    import uuid
-    import zipfile
-
-    with zipfile.ZipFile(base_zip_path, "r") as zf:
-        json_name = None
-        base_json = None
-        other_files: dict = {}
-        for name in zf.namelist():
-            data = zf.read(name)
-            if name.endswith(".json"):
-                json_name = name
-                base_json = json.loads(data)
-            else:
-                other_files[name] = data
-
-    if not json_name or base_json is None:
-        raise ValueError(f"No scene JSON found in {base_zip_path}")
-
-    # Deep-copy and patch the scene JSON
-    scene_data = json.loads(json.dumps(base_json))
-    new_scene_uid = str(uuid.uuid4())
-    scene_data["uid"] = new_scene_uid
-    scene_data["name"] = scene_name
-
-    for cam in scene_data.get("cameras", []):
-        cam["uid"] = camera_name
-        cam["name"] = camera_name
-        cam["scene"] = new_scene_uid
-
-    for region in scene_data.get("regions", []):
-        region["uid"] = str(uuid.uuid4())
-        region["scene"] = new_scene_uid
-
-    # SceneScape's import-scene matches the resource file by checking whether
-    # the scene name is a substring of the filename.  Rename the image so it
-    # matches: "<scene_name><ext>" (e.g. "conference room-2.jpg").
-    safe_name = scene_name.replace("/", "_")
-
-    buf = io.BytesIO()
-    with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf_out:
-        zf_out.writestr(f"{safe_name}.json", json.dumps(scene_data))
-        for orig_name, data in other_files.items():
-            import os as _os
-            ext = _os.path.splitext(orig_name)[1]
-            zf_out.writestr(f"{safe_name}{ext}", data)
-    return buf.getvalue()
-
-
-def _scenescape_import_scene(
-    app_dir: str,
-    scene_name: str,
-    camera_name: str,
-) -> tuple:
-    """Register a new scene + camera in SceneScape via POST /api/v1/import-scene/.
-
-    Clones the base scene ZIP in-memory with the new scene/camera names and
-    uploads it using the same multipart endpoint that scene-import.sh uses.
-    This replaces the Docker sidecar approach while being ~10× faster (~200ms).
-
-    Returns (scene_uid, camera_name) on success, (None, None) on failure.
-    Failure is non-fatal — callers fall back to the scene-import sidecar.
-    """
-    import io
-    import urllib.error
-    import urllib.request
-
-    base_url, ctx, token = _scenescape_get_client(app_dir)
-    if not token:
-        return None, None
-
-    # Locate base scene ZIP
-    try:
-        zone_cfg = _read_zone_config(app_dir)
-        scene_zip_name = zone_cfg.get("scene_zip", "conference-room.zip")
-    except Exception:
-        scene_zip_name = "conference-room.zip"
-
-    zip_path = str(Path(app_dir) / ".." / "scenescape" / "webserver" / scene_zip_name)
-    if not Path(zip_path).exists():
-        logger.warning("Base scene ZIP not found at %s — falling back to scene-import", zip_path)
-        return None, None
-
-    # Clone ZIP in-memory
-    try:
-        zip_bytes = _clone_scene_zip(zip_path, scene_name, camera_name)
-    except Exception as e:
-        logger.warning("Failed to clone scene ZIP: %s", e)
-        return None, None
-
-    # Build multipart/form-data body — SceneScape expects field name "zipFile"
-    boundary = "----BenchmarkFormBoundary"
-    filename = f"{scene_name.replace(' ', '-')}.zip"
-    body = (
-        f"--{boundary}\r\n"
-        f'Content-Disposition: form-data; name="zipFile"; filename="{filename}"\r\n'
-        f"Content-Type: application/zip\r\n\r\n"
-    ).encode() + zip_bytes + f"\r\n--{boundary}--\r\n".encode()
-
-    req = urllib.request.Request(
-        f"{base_url}/import-scene/",
-        data=body,
-        headers={
-            "Authorization": f"Token {token}",
-            "Content-Type": f"multipart/form-data; boundary={boundary}",
-        },
-        method="POST",
-    )
-    try:
-        with urllib.request.urlopen(req, context=ctx, timeout=60) as resp:
-            resp_data = json.loads(resp.read())
-            # import-scene response: {"scene": null=success | {errors}, "cameras": ...}
-            # scene: null means scene was created successfully (null = no errors).
-            # The scene UID lives inside the camera objects in the cameras list.
-            scene_errors = resp_data.get("scene")
-            if scene_errors is not None:
-                # scene field is non-null → contains error details
-                logger.warning("SceneScape import-scene scene error: %s", scene_errors)
-                return None, None
-
-            # Scene created — extract UID from cameras data
-            scene_uid = ""
-            cameras = resp_data.get("cameras") or []
-            for cam_entry in cameras:
-                # cam_entry may be a list (one per camera) of [errors_or_None, cam_obj]
-                entries = cam_entry if isinstance(cam_entry, list) else [cam_entry]
-                for entry in entries:
-                    if isinstance(entry, dict) and entry.get("scene"):
-                        scene_uid = entry["scene"]
-                        break
-                if scene_uid:
-                    break
-
-            if not scene_uid:
-                # Fall back to listing scenes by name
-                try:
-                    list_req = urllib.request.Request(
-                        f"{base_url}/scenes", headers={"Authorization": f"Token {token}"})
-                    with urllib.request.urlopen(list_req, context=ctx, timeout=10) as r:
-                        for s in json.loads(r.read()).get("results", []):
-                            if s.get("name") == scene_name:
-                                scene_uid = s["uid"]
-                                break
-                except Exception:
-                    pass
-
-            logger.info("SceneScape scene+camera imported: %s / %s (uid=%s)",
-                        scene_name, camera_name, scene_uid)
-            return scene_uid or scene_name, camera_name
-    except urllib.error.HTTPError as e:
-        logger.warning("SceneScape import-scene → HTTP %s: %s", e.code, e.read().decode()[:200])
-        return None, None
-    except Exception as e:
-        logger.warning("SceneScape import-scene → %s", e)
-        return None, None
-
-
-def _delete_cloned_scenes(app_dir: str, num_scenes: int) -> None:
-    """Delete previously-cloned scenes from SceneScape via REST API."""
-    import urllib.request
-    import urllib.error
-
-    base_url, ctx, token = _scenescape_get_client(app_dir)
-    if not token:
-        logger.warning("Could not authenticate — skipping scene cleanup")
-        return
-
-    auth_header = {"Authorization": f"Token {token}"}
-
-    # List and delete cloned scenes (those with -N suffix)
-    req = urllib.request.Request(f"{base_url}/scenes", headers=auth_header)
-    try:
-        with urllib.request.urlopen(req, context=ctx) as resp:
-            scenes = json.loads(resp.read()).get("results", [])
-    except Exception:
-        return
-
-    for scene in scenes:
-        name = scene.get("name", "")
-        uid = scene.get("uid", "")
-        if re.search(r'-\d+$', name):
-            logger.info("Deleting cloned scene: %s (%s)", name, uid)
-            req = urllib.request.Request(
-                f"{base_url}/scene/{uid}",
-                method="DELETE", headers=auth_header)
-            try:
-                urllib.request.urlopen(req, context=ctx)
-            except urllib.error.HTTPError as e:
-                logger.warning("  DELETE failed (%s): %s", e.code, e.reason)
-
-    # Delete orphaned cameras with -N suffix
-    req = urllib.request.Request(f"{base_url}/cameras", headers=auth_header)
-    try:
-        with urllib.request.urlopen(req, context=ctx) as resp:
-            cameras = json.loads(resp.read()).get("results", [])
-    except Exception:
-        cameras = []
-    for cam in cameras:
-        cam_name = cam.get("name", "")
-        cam_id = cam.get("uid", cam.get("id", cam.get("sensor_id", "")))
-        if re.search(r'-\d+$', cam_name) and cam_id:
-            logger.info("Deleting orphaned camera: %s (%s)", cam_name, cam_id)
-            req = urllib.request.Request(
-                f"{base_url}/camera/{cam_id}",
-                method="DELETE", headers=auth_header)
-            try:
-                urllib.request.urlopen(req, context=ctx)
-            except urllib.error.HTTPError as e:
-                logger.warning("  DELETE camera failed (%s): %s", e.code, e.reason)
-
-
-# ---------------------------------------------------------------------------
-# Helpers – cameras override generation
-# ---------------------------------------------------------------------------
-
-def _read_base_config(app_dir: str) -> dict:
-    """Read base camera/scene/video config from zone_config.json."""
-    cfg = _read_zone_config(app_dir)
-    cameras = cfg.get("cameras", [])
-    if cameras:
-        camera = cameras[0].get("name", "Camera_01")
-        video = cameras[0].get("video_file", "Camera_01.mp4")
-    else:
-        camera = cfg.get("camera_name", "Camera_01")
-        video = cfg.get("video_file", "Camera_01.mp4")
-    return {"camera_name": camera, "video_file": video}
-
-
-def _generate_cameras_override(app_dir: str, num_scenes: int) -> None:
-    """
-    Generate ``docker-compose.cameras.yaml`` that adds real RTSP camera
-    streams and DLStreamer pipeline containers for each additional camera
-    beyond the base two (Camera_01, Camera_02).
-
-    For N scenes, we create:
-      - lp-cams-{cam_idx}:   ffmpeg RTSP server for the new camera stream
-      - lp-video-{cam_idx}:  DLStreamer container running the inference pipeline
-      - lp-config-{cam_idx}: Docker config pointing to the generated pipeline JSON
-
-    Architecture note: each camera must have its own DLStreamer (lp-video-N)
-    container because each container mounts a single config.json (via Docker
-    configs) that hardcodes the RTSP source and camera name for that pipeline.
-    Sharing one container across cameras is not supported by this service design.
-    """
-    override_path = Path(app_dir) / "docker" / "docker-compose.cameras.yaml"
-    base = _read_base_config(app_dir)
-    base_camera = base["camera_name"]
-    base_video = base["video_file"]
-
-    scenescape_dir = (Path(app_dir) / ".." / "scenescape").resolve()
-    dlstreamer_dir = scenescape_dir / "dlstreamer-pipeline-server"
-
-    # POI already has 2 base cameras (Camera_01, Camera_02); add more
-    # starting from camera index 3 (for scenes > 1)
-    base_camera_count = 2
-
-    # When NPU is selected, extra DLStreamer containers need /dev/accel access.
-    # Without it OpenVINO reports "[NPU_VCL] Unrecognized device ID! 0x0x0"
-    # and the inference pipeline fails to initialise.
-    is_npu = _is_npu_device(app_dir)
-
-    with open(override_path, "w") as f:
-        f.write("# Auto-generated by poi_stream_density.py — do not edit\n")
-        f.write(f"# Stream density: {num_scenes} scenes\n\n")
-        f.write("services:\n")
-
-        # Additional RTSP camera streams + DLStreamer instances
-        for i in range(1, num_scenes):
-            cam_idx = base_camera_count + i
-            cam_name = f"{base_camera}-{cam_idx}"
-            cams_svc = f"lp-cams-{cam_idx}"
-            video_svc = f"lp-video-{cam_idx}"
-            config_name = f"lp-config-{cam_idx}"
-
-            # RTSP camera stream (ffmpeg)
-            f.write(f"  {cams_svc}:\n")
-            f.write(f"    image: linuxserver/ffmpeg:version-8.0-cli\n")
-            f.write(f'    command: "-nostdin -re -stream_loop -1 '
-                    f'-i /workspace/media/{base_video} '
-                    f'-c:v copy -an -f rtsp -rtsp_transport tcp '
-                    f'rtsp://mediaserver:8554/{cam_name}"\n')
-            f.write(f"    volumes:\n")
-            f.write(f"      - vol-sample-data:/workspace/media\n")
-            f.write(f"    networks:\n")
-            f.write(f"      - storewide-lp\n")
-            f.write(f"    depends_on:\n")
-            f.write(f"      - mediaserver\n")
-            f.write(f'    restart: "no"\n')
-            f.write(f"\n")
-
-            # DLStreamer pipeline server for this camera.
-            # Mirrors the lp-video-2 service from scenescape/docker-compose.yaml
-            # but uses lp-config-{cam_idx} to load this camera's pipeline config.
-            f.write(f"  {video_svc}:\n")
-            f.write(f"    image: docker.io/intel/dlstreamer-pipeline-server:${{DLSTREAMER_VERSION:-2026.1.0-20260331-weekly-ubuntu24}}\n")
-            f.write(f"    networks:\n")
-            f.write(f"      storewide-lp:\n")
-            f.write(f"    tty: true\n")
-            f.write(f"    entrypoint: [\"./run.sh\"]\n")
-            f.write(f"    devices:\n")
-            f.write(f"      - \"/dev/dri:/dev/dri\"\n")
-            if is_npu:
-                # Intel NPU (MTL/WCL) is exposed as /dev/accel (major 261).
-                # Without this mapping the VCL compiler reads device ID 0x0x0
-                # and fails: "[NPU_VCL] Unrecognized device ID! 0x0x0"
-                f.write(f"      - \"/dev/accel:/dev/accel\"\n")
-            f.write(f"    group_add:\n")
-            f.write(f"      - \"109\"\n")
-            f.write(f"      - \"110\"\n")
-            f.write(f"      - \"992\"\n")
-            f.write(f"    device_cgroup_rules:\n")
-            f.write(f"      - \"c 189:* rmw\"\n")
-            f.write(f"      - \"c 209:* rmw\"\n")
-            f.write(f"      - \"a 189:* rwm\"\n")
-            if is_npu:
-                f.write(f"      - \"c 261:* rmw\"  # Intel NPU accel devices\n")
-            f.write(f"    depends_on:\n")
-            f.write(f"      broker:\n")
-            f.write(f"        condition: service_started\n")
-            f.write(f"      ntpserv:\n")
-            f.write(f"        condition: service_started\n")
-            f.write(f"      {cams_svc}:\n")
-            f.write(f"        condition: service_started\n")
-            f.write(f"    healthcheck:\n")
-            f.write(f'      test: ["CMD", "curl", "-I", "-s", "http://localhost:8080/pipelines"]\n')
-            f.write(f"      interval: 10s\n")
-            f.write(f"      timeout: 5s\n")
-            f.write(f"      retries: 5\n")
-            f.write(f"      start_period: 10s\n")
-            f.write(f"    environment:\n")
-            f.write(f"      - RUN_MODE=EVA\n")
-            f.write(f"      - GENICAM=Balluff\n")
-            f.write(f"      - GST_DEBUG=1,gencamsrc:2\n")
-            f.write(f"      - ADD_UTCTIME_TO_METADATA=true\n")
-            f.write(f"      - APPEND_PIPELINE_NAME_TO_PUBLISHER_TOPIC=false\n")
-            f.write(f"      - MQTT_HOST=broker.scenescape.intel.com\n")
-            f.write(f"      - MQTT_PORT=1883\n")
-            f.write(f"      - REST_SERVER_PORT=8080\n")
-            f.write(f"      - HTTPS_PROXY=${{HTTPS_PROXY}}\n")
-            f.write(f"      - https_proxy=${{https_proxy}}\n")
-            f.write(f"      - HTTP_PROXY=${{HTTP_PROXY}}\n")
-            f.write(f"      - http_proxy=${{http_proxy}}\n")
-            f.write(f"      - NO_PROXY=mediaserver,${{NO_PROXY}}\n")
-            f.write(f"      - no_proxy=mediaserver,${{no_proxy}}\n")
-            f.write(f"    configs:\n")
-            f.write(f"      - source: {config_name}\n")
-            f.write(f"        target: /home/pipeline-server/config.json\n")
-            f.write(f"    volumes:\n")
-            f.write(f"      - ../scenescape/dlstreamer-pipeline-server/user_scripts:/home/pipeline-server/user_scripts\n")
-            f.write(f"      - vol-dlstreamer-pipeline-root-{cam_idx}:/var/cache/pipeline_root:uid=1999,gid=1999\n")
-            f.write(f"      - vol-sample-data:/home/pipeline-server/videos\n")
-            f.write(f"      - vol-models:/home/pipeline-server/models\n")
-            f.write(f"    secrets:\n")
-            f.write(f"      - source: root-cert\n")
-            f.write(f"        target: certs/scenescape-ca.pem\n")
-            f.write(f"    restart: always\n")
-            f.write(f"    pids_limit: 1000\n")
-            f.write(f"\n")
-
-        # Build dynamic MQTT camera list for poi-backend
-        all_cameras = ["Camera_01", "Camera_02"]
-        for i in range(1, num_scenes):
-            cam_idx = base_camera_count + i
-            all_cameras.append(f"{base_camera}-{cam_idx}")
-        camera_csv = ",".join(all_cameras)
-
-        # Override poi-backend to subscribe to all cameras
-        f.write(f"  poi-backend:\n")
-        f.write(f"    environment:\n")
-        f.write(f"      RTSP_PREWARM_CAMERAS: \"{camera_csv}\"\n")
-        f.write(f"      MQTT_IMAGE_CAMERAS: \"{camera_csv}\"\n")
-        f.write(f"      STREAM_DENSITY: \"{num_scenes}\"\n")
-        f.write(f"\n")
-
-        # Docker configs for each new DLStreamer pipeline
-        if num_scenes > 1:
-            f.write(f"configs:\n")
-            for i in range(1, num_scenes):
-                cam_idx = base_camera_count + i
-                cam_name = f"{base_camera}-{cam_idx}"
-                config_name = f"lp-config-{cam_idx}"
-                env_var = f"PIPELINE_CONFIG_{cam_idx}"
-                default_path = dlstreamer_dir / f"person-of-interest-{cam_name}-pipeline-config.json"
-                f.write(f"  {config_name}:\n")
-                f.write(f"    file: ${{{env_var}:-{default_path}}}\n")
-            f.write(f"\n")
-
-            # Named volumes for each new DLStreamer container's pipeline cache
-            f.write(f"volumes:\n")
-            for i in range(1, num_scenes):
-                cam_idx = base_camera_count + i
-                f.write(f"  vol-dlstreamer-pipeline-root-{cam_idx}:\n")
-
-    logger.info("Generated cameras override: %s  (%d scenes, %d extra cameras+DLStreamer instances)",
-                override_path, num_scenes, max(0, num_scenes - 1))
-
-
-def _generate_dlstreamer_config(app_dir: str, num_scenes: int) -> None:
-    """
-    Generate a multi-pipeline DLStreamer config for N scenes.
-
-    Reads the base pipeline config template and replicates it for each
-    additional camera, updating the camera name in each pipeline.
-    Also writes PIPELINE_CONFIG_{cam_idx} to docker/.env so that the
-    lp-config-{cam_idx} Docker config defined in docker-compose.cameras.yaml
-    can resolve the correct pipeline JSON path.
-    """
-    scenescape_dir = Path(app_dir) / ".." / "scenescape"
-    dlstreamer_dir = scenescape_dir / "dlstreamer-pipeline-server"
-    env_file = os.path.join(app_dir, "docker", ".env")
-
-    base = _read_base_config(app_dir)
-    base_camera = base["camera_name"]
-    base_camera_count = 2
-
-    # Read existing Camera_01 pipeline config as template
-    template_path = dlstreamer_dir / f"person-of-interest-{base_camera}-pipeline-config.json"
-    if not template_path.exists():
-        logger.warning("Pipeline template not found: %s", template_path)
-        return
-
-    with open(template_path) as fh:
-        template_cfg = json.load(fh)
-
-    # Generate config for each additional camera
-    for i in range(1, num_scenes):
-        cam_idx = base_camera_count + i
-        cam_name = f"{base_camera}-{cam_idx}"
-        output_path = dlstreamer_dir / f"person-of-interest-{cam_name}-pipeline-config.json"
-
-        # Deep-copy and substitute camera name
-        cfg_str = json.dumps(template_cfg)
-        cfg_str = cfg_str.replace(base_camera, cam_name)
-        cfg = json.loads(cfg_str)
-
-        # Update pipeline name
-        if "config" in cfg and "pipelines" in cfg["config"]:
-            for pipeline in cfg["config"]["pipelines"]:
-                pipeline["name"] = f"reid_{cam_name}"
-
-        with open(output_path, "w") as fh:
-            json.dump(cfg, fh, indent=2)
-        logger.info("Generated pipeline config: %s", output_path)
-
-        # Write PIPELINE_CONFIG_{cam_idx} so docker-compose.cameras.yaml can
-        # resolve the lp-config-{cam_idx} Docker config file path.
-        env_key = f"PIPELINE_CONFIG_{cam_idx}"
-        _write_env_var(env_file, env_key, str(output_path.resolve()))
-
-    logger.info("Generated DLStreamer configs for %d total cameras", base_camera_count + num_scenes - 1)
-
-
-def _is_npu_device(app_dir: str) -> bool:
-    """Return True when the active resource config selects NPU.
-
-    Reads RESOURCE_CONFIG from docker/.env to mirror the Makefile logic:
-    ``$(if $(findstring npu,$(DEVICE)),-f $(NPU_OVERLAY),)``.
-    """
-    env_file = os.path.join(app_dir, "docker", ".env")
-    if os.path.isfile(env_file):
-        with open(env_file) as fh:
-            for line in fh:
-                if line.startswith("RESOURCE_CONFIG=") and "npu" in line.lower():
-                    return True
-    return False
-
-
-def _reinit_env(app_dir: str, resource_config: str = "") -> None:
-    """Re-run init.sh to regenerate .env with updated config.
-
-    Parameters
-    ----------
-    app_dir:
-        Absolute path to the person-of-interest/ directory.
-    resource_config:
-        Path to the device resource config file (e.g. configs/res/all-gpu.env).
-        Passed as the ``RESOURCE_CONFIG`` env var to init.sh so that device,
-        precision, and pre-process settings are preserved across stream-density
-        iterations.  When empty, init.sh uses its own default (all-gpu-cpu.env).
-    """
-    init_script = Path(app_dir) / ".." / "scenescape" / "scripts" / "init.sh"
-    if not init_script.exists():
-        logger.warning("init.sh not found at %s — skipping .env regeneration", init_script)
-        return
-
-    env = os.environ.copy()
-    if resource_config:
-        # init.sh constructs RESOURCE_CONFIG_PATH as "${APP_DIR}/${RESOURCE_CONFIG}",
-        # so it expects a path relative to app_dir, not an absolute path.
-        # Convert absolute → relative so the path resolves correctly inside init.sh.
-        try:
-            rel_rc = str(Path(resource_config).relative_to(Path(app_dir)))
-        except ValueError:
-            rel_rc = resource_config  # already relative or outside app_dir
-        env["RESOURCE_CONFIG"] = rel_rc
-        logger.info("Re-running init.sh with RESOURCE_CONFIG=%s …", rel_rc)
-    else:
-        logger.info("Re-running init.sh to update .env …")
-
-    cmd = f"bash {shlex.quote(str(init_script))} {shlex.quote(app_dir)}"
-    result = subprocess.run(cmd, shell=True, capture_output=True, text=True, env=env)
-    if result.returncode != 0:
-        # init.sh writes errors to stdout (no >&2), so log both streams
-        output = (result.stderr + result.stdout)[-500:]
-        logger.warning("init.sh returned non-zero:\n%s", output)
-    else:
-        logger.info("init.sh completed — .env updated")
-
-
-def _wait_for_first_detection(timeout: int = 60, poll_interval: int = 3,
-                              camera_filter: Optional[str] = None) -> bool:
-    """Poll poi-backend logs until the first face detection arrives or timeout.
-
-    Args:
-        camera_filter: If set, only detections from this specific camera count
-                       as "warm". This prevents Camera_01/02 detections from
-                       masking the fact that Camera_01-3's pipeline hasn't
-                       started yet (false-positive "pipeline is warm").
-
-    Returns True if a detection was seen within *timeout* seconds, False otherwise.
-    Uses --since to only read logs produced after this function is called, avoiding
-    false positives from stale log lines from previous iterations.
-    """
-    camera_label = f" from camera={camera_filter}" if camera_filter else ""
-    since = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
-    deadline = time.time() + timeout
-    attempt = 0
-    while time.time() < deadline:
-        result = subprocess.run(
-            f"docker logs --since {since} poi-backend 2>&1",
-            shell=True, capture_output=True, text=True)
-        output = result.stdout + result.stderr
-        is_detection = (
-            "POI match" in output or
-            "face embedding" in output.lower() or
-            "detections" in output.lower() or
-            "poi_detections" in output
-        )
-        if is_detection:
-            if camera_filter is None or f"camera={camera_filter}" in output:
-                elapsed = int(timeout - (deadline - time.time()))
-                logger.info("First detection seen%s after ~%ds — pipeline is warm",
-                            camera_label, elapsed)
-                return True
-        if attempt % 4 == 0:
-            remaining = int(deadline - time.time())
-            logger.info("  Waiting for first detection%s … (%ds remaining)",
-                        camera_label, remaining)
-        attempt += 1
-        time.sleep(poll_interval)
-    logger.warning("No detection%s seen within %ds — proceeding anyway",
-                   camera_label, timeout)
-    return False
-
-
-# ---------------------------------------------------------------------------
-# Camera-specific helpers — new camera detection for stream-density
-# ---------------------------------------------------------------------------
-
-def _get_new_camera_name(app_dir: str, num_scenes: int) -> Optional[str]:
-    """Return the camera name newly added in this stream-density iteration.
-
-    Returns ``None`` for *num_scenes* == 1 (baseline iteration: Camera_01 and
-    Camera_02 are always present, no camera is "newly added").
-
-    For *num_scenes* > 1 each increment adds exactly one extra RTSP stream
-    whose index is ``base_camera_count + (num_scenes - 1)``.
-
-    Example (base_camera = "Camera_01"):
-      num_scenes=1 → None         (baseline — Camera_01 / Camera_02)
-      num_scenes=2 → "Camera_01-3"
-      num_scenes=3 → "Camera_01-4"
-      num_scenes=N → f"Camera_01-{N+1}"
-    """
-    if num_scenes <= 1:
-        return None
-    base = _read_base_config(app_dir)
-    base_camera = base["camera_name"]       # e.g. "Camera_01"
-    base_camera_count = 2                   # POI always starts with 2 base cameras
-    cam_idx = base_camera_count + (num_scenes - 1)
-    return f"{base_camera}-{cam_idx}"
-
-
-def _wait_for_alert_from_camera(
-    camera_id: str,
-    duration: int,
-    since: Optional[datetime] = None,
-    poll_interval: int = 5,
-) -> Optional[dict]:
-    """Poll /api/v1/alerts until a fresh alert from *camera_id* is dispatched.
-
-    "Fresh" means ``dispatched_at`` is strictly after *since* (defaults to the
-    moment this function is called).
-
-    Returns the matched alert dict so the caller can compute latency directly
-    without a second API round-trip.  Returns ``None`` if *duration* expires
-    without a matching alert.
-
-    Up to 100 alerts are fetched per poll to avoid missing the new camera's
-    alert when many alerts have already accumulated.
-    """
-    import urllib.request
-    from datetime import datetime as _dt, timezone as _tz
-
-    elapsed = 0
-    since_ts = since or datetime.utcnow()
-    since_aware = (
-        since_ts.astimezone(_tz.utc)
-        if since_ts.tzinfo
-        else since_ts.replace(tzinfo=_tz.utc)
-    )
-    logger.info(
-        "Waiting up to %ds for alert from camera=%s (since=%s) …",
-        duration, camera_id, since_ts.strftime("%H:%M:%S"),
-    )
-
-    while elapsed < duration:
-        sleep_time = min(poll_interval, duration - elapsed)
-        time.sleep(sleep_time)
-        elapsed += sleep_time
-
-        try:
-            req = urllib.request.Request(
-                "http://localhost:8000/api/v1/alerts?limit=100"
-            )
-            with urllib.request.urlopen(req, timeout=10) as resp:
-                alerts = json.loads(resp.read().decode())
-            if not isinstance(alerts, list):
-                continue
-
-            for alert in alerts:
-                dispatched_str = alert.get("dispatched_at") or alert.get("timestamp", "")
-                if not dispatched_str:
-                    continue
-                try:
-                    d_str = dispatched_str.replace("Z", "+00:00")
-                    dispatched_utc = _dt.fromisoformat(d_str)
-                    if not dispatched_utc.tzinfo:
-                        dispatched_utc = dispatched_utc.replace(tzinfo=_tz.utc)
-                    else:
-                        dispatched_utc = dispatched_utc.astimezone(_tz.utc)
-                    if dispatched_utc < since_aware:
-                        continue
-                except (ValueError, TypeError):
-                    continue
-
-                alert_camera = (
-                    alert.get("match", {}).get("camera_id")
-                    or alert.get("camera_id", "")
-                )
-                if alert_camera == camera_id:
-                    logger.info(
-                        "Alert from camera=%s received after %ds — pipeline active",
-                        camera_id, elapsed,
-                    )
-                    time.sleep(5)   # brief flush so metrics files catch up
-                    return alert
-        except Exception:
-            pass
-
-        if elapsed % 30 < poll_interval + 1:
-            logger.info(
-                "Waiting for alert from camera=%s … (%d/%ds)",
-                camera_id, elapsed, duration,
-            )
-
-    logger.warning(
-        "No alert from camera=%s within %ds — continuing anyway",
-        camera_id, duration,
-    )
-    return None
-
-
-def _wait_for_web_healthy(timeout: int = 300) -> None:
-    """Block until SceneScape web container is healthy or timeout expires."""
-    # Container name depends on compose project name used at startup
-    candidates = ["storewide-lp-web-1", "scenescape-web-1"]
-    for attempt in range(timeout // 5):
-        for name in candidates:
-            result = subprocess.run(
-                f"docker inspect {name} --format '{{{{.State.Health.Status}}}}'",
-                shell=True, capture_output=True, text=True)
-            status = result.stdout.strip()
-            if status == "healthy":
-                logger.info("Web container (%s) is healthy (after %ds)", name, attempt * 5)
-                return
-        if attempt % 6 == 0:
-            logger.info("  web status: %s  (waiting…)", status)
-        time.sleep(5)
-    logger.warning("Web container did not become healthy after %ds — continuing anyway", timeout)
-
-
-def _wait_for_scene_import_completion(timeout: int = 180) -> None:
-    """Wait until the scene-import one-shot container exits.
-
-    scene-import must complete before lp-video (DLStreamer) restarts so that
-    SceneScape has the newly cloned camera registered in its database before
-    DLStreamer starts publishing data for it.
-
-    Without this wait, SceneScape's controller has no record of Camera_01-3
-    and silently drops all DLStreamer output for that camera — poi-backend
-    never receives face embeddings → no FAISS match → no alert.
-    """
-    logger.info("Waiting for scene-import to complete (timeout=%ds) …", timeout)
-    deadline = time.time() + timeout
-    while time.time() < deadline:
-        running = subprocess.run(
-            "docker ps -q --filter 'name=scene-import' --filter 'status=running'",
-            shell=True, capture_output=True, text=True,
-        ).stdout.strip()
-        if running:
-            elapsed = int(timeout - (deadline - time.time()))
-            if elapsed % 30 < 6:
-                logger.info("  scene-import still running … (%ds elapsed)", elapsed)
-            time.sleep(5)
-            continue
-
-        exited = subprocess.run(
-            "docker ps -aq --filter 'name=scene-import' --filter 'status=exited'",
-            shell=True, capture_output=True, text=True,
-        ).stdout.strip()
-        if exited:
-            # Get exit code to surface errors
-            code_result = subprocess.run(
-                f"docker inspect {exited.splitlines()[0]} "
-                f"--format '{{{{.State.ExitCode}}}}'",
-                shell=True, capture_output=True, text=True,
-            )
-            exit_code = code_result.stdout.strip()
-            if exit_code == "0":
-                logger.info("scene-import completed successfully")
-            else:
-                logger.warning("scene-import exited with code %s — "
-                               "camera registration may be incomplete", exit_code)
-            return
-        # Container not found yet — give it a moment to start
-        time.sleep(3)
-
-    logger.warning("scene-import did not complete within %ds — "
-                   "SceneScape may not have the new camera registered yet", timeout)
-
-
-def _wait_for_camera_rtsp_ready(camera_name: str, timeout: int = 60) -> bool:
-    """Poll until the new camera's RTSP stream is being served by MediaMTX.
-
-    DLStreamer (lp-video) connects to RTSP at startup.  If lp-cams-N is still
-    initialising when lp-video is force-recreated, DLStreamer silently fails to
-    open the RTSP source and the Camera_01-N pipeline never starts — no
-    embeddings, no alerts.
-
-    Strategy:
-      1. Query MediaMTX path-list API via docker exec (port 9997 is NOT mapped
-         to host — only 8554/8889 are — so localhost:9997 always fails).
-      2. Fallback: check that the compose-named container is running.
-         Container name format: storewide-lp-lp-cams-{N}-1
-    """
-    logger.info("Waiting for RTSP stream camera=%s to be ready (timeout=%ds) …",
-                camera_name, timeout)
-    # Camera_01-3 → suffix "3" → service "lp-cams-3" → container "storewide-lp-lp-cams-3-1"
-    svc_idx = camera_name.split("-")[-1] if "-" in camera_name else "3"
-    container_name = f"storewide-lp-lp-cams-{svc_idx}-1"
-
-    deadline = time.time() + timeout
-    while time.time() < deadline:
-        # Primary: MediaMTX path-list API via docker exec (avoids host-port issue)
-        try:
-            result = subprocess.run(
-                "docker exec storewide-lp-mediaserver-1 "
-                "wget -qO- 'http://localhost:9997/v3/paths/list'",
-                shell=True, capture_output=True, text=True, timeout=5,
-            )
-            if result.returncode == 0 and camera_name in result.stdout:
-                logger.info("RTSP stream camera=%s is ready (confirmed via MediaMTX API)",
-                            camera_name)
-                return True
-        except Exception:
-            pass
-
-        # Fallback: check the lp-cams-N container is running
-        running = subprocess.run(
-            f"docker inspect {container_name} "
-            f"--format '{{{{.State.Running}}}}'",
-            shell=True, capture_output=True, text=True,
-        ).stdout.strip()
-        if running == "true":
-            # Container is up — give ffmpeg 3s to open the RTSP session
-            time.sleep(3)
-            logger.info("RTSP container %s is running — stream likely ready",
-                        container_name)
-            return True
-
-        elapsed = int(timeout - (deadline - time.time()))
-        if elapsed % 15 < 4:
-            logger.info("  RTSP camera=%s not ready yet … (%ds elapsed)",
-                        camera_name, elapsed)
-        time.sleep(3)
-
-    logger.warning("RTSP stream camera=%s not confirmed within %ds — "
-                   "DLStreamer may fail to connect", camera_name, timeout)
-    return False
-
-
-def _write_env_var(env_file: str, key: str, value: str) -> None:
-    """Write or update a KEY=VALUE line in an env file.
-
-    If the key already exists it is updated in-place; otherwise the line is
-    appended.  This ensures variables injected by the benchmark (e.g.
-    STREAM_DENSITY) survive ``init.sh`` regeneration and are visible to all
-    docker-compose services that read the same env file.
-    """
-    lines: list[str] = []
-    found = False
-    if os.path.isfile(env_file):
-        with open(env_file) as fh:
-            lines = fh.readlines()
-        for i, line in enumerate(lines):
-            if line.startswith(f"{key}=") or line.startswith(f"{key} ="):
-                lines[i] = f"{key}={value}\n"
-                found = True
-                break
-    if not found:
-        lines.append(f"{key}={value}\n")
-    with open(env_file, "w") as fh:
-        fh.writelines(lines)
-    logger.info("Set %s=%s in %s", key, value, env_file)
-
-
-def _scale_pipeline_services(app_dir: str, num_scenes: int, wait: int = 90, resource_config: str = "") -> None:
-    """
-    Scale the POI video pipeline to N scenes.
-
-    Steps:
-      1. Update stream_density in zone_config.json
-      2. Generate docker-compose.cameras.yaml with extra RTSP streams
-      3. Re-run init.sh to update .env
-      4. Write STREAM_DENSITY + BASE_CAMERA_COUNT to docker/.env
-      5. Generate per-camera DLStreamer pipeline configs
-      6. Bring up new camera services
-      7. Wait for web container healthy
-      8. Clean stale scenes, restart scene-import
-      9. Recreate lp-video (DLStreamer)
-    """
-    logger.info("Scaling POI to %d scene(s) …", num_scenes)
-
-    _set_stream_density(app_dir, num_scenes)
-    _generate_cameras_override(app_dir, num_scenes)
-    _reinit_env(app_dir, resource_config=resource_config)
-
-    env_file = os.path.join(app_dir, "docker", ".env")
-    _write_env_var(env_file, "STREAM_DENSITY", str(num_scenes))
-    # BASE_CAMERA_COUNT is still written for backward-compat with any
-    # scene-import fallback path (tells clone-zip to start at Camera_01-3)
-    _write_env_var(env_file, "BASE_CAMERA_COUNT", "2")
-
-    _generate_dlstreamer_config(app_dir, num_scenes)
-
-    # Determine new camera service names for this iteration
-    base_camera_count = 2
-    new_cam_services: list[str] = []
-    if num_scenes > 1:
-        new_cam = _get_new_camera_name(app_dir, num_scenes)
-        if new_cam:
-            cam_idx = int(new_cam.split("-")[-1])   # "Camera_01-3" → 3
-            new_cam_services = [f"lp-cams-{cam_idx}", f"lp-video-{cam_idx}"]
-
-    # Remove stale containers for new services before (re)creating them.
-    # Containers left in "Created" or "Exited" state from a previous iteration
-    # hold stale Docker network IDs.  Starting them without --force-recreate
-    # causes "network ... not found" errors.  Removing them first ensures
-    # docker compose creates fresh containers with the current network.
-    if new_cam_services:
-        logger.info("Removing stale containers for new services: %s …",
-                    " ".join(new_cam_services))
-        _docker_compose(app_dir, f"rm -f {' '.join(new_cam_services)}")
-
-    # Bring up all services.  --remove-orphans cleans up containers from
-    # previous iterations (e.g. lp-cams-4..7 left over from earlier runs)
-    # that would otherwise hold stale network references and cause conflicts.
-    # This also creates any new named volumes (vol-dlstreamer-pipeline-root-N).
-    logger.info("Starting new camera streams …")
-    _docker_compose(app_dir, "up -d --no-recreate --remove-orphans")
-
-    # Pre-initialise the DLStreamer pipeline cache volume for the NEW camera only.
-    # New volumes are created by docker compose as root:root with no sticky bit.
-    # DLStreamer runs as uid=1999 and needs to create user_defined_pipelines/
-    # inside the mount — this fails with PermissionError on a fresh volume.
-    # Only the volume for the camera added THIS iteration is new; previous
-    # iterations' volumes are already initialised — no need to re-run alpine.
-    project = "storewide-lp"
-    if num_scenes > 1:
-        new_vol_idx = base_camera_count + (num_scenes - 1)
-        vol_name = f"{project}_vol-dlstreamer-pipeline-root-{new_vol_idx}"
-        logger.info("Initialising volume %s for DLStreamer uid=1999 …", vol_name)
-        subprocess.run(
-            f"docker run --rm -v {vol_name}:/data alpine sh -c "
-            f"'chmod a+rwxt /data && "
-            f"mkdir -p /data/user_defined_pipelines && "
-            f"chown 1999:1999 /data/user_defined_pipelines'",
-            shell=True, capture_output=True, text=True,
-        )
-
-    # Explicitly force-recreate the new camera services to guarantee fresh
-    # containers with a valid network attachment (--no-recreate skips them if
-    # they were already recreated by the rm above but not yet started).
-    if new_cam_services:
-        cam_svc = new_cam_services[0]   # lp-cams-{N}
-        logger.info("Force-starting camera stream service %s …", cam_svc)
-        _docker_compose(app_dir, f"up -d --force-recreate {cam_svc}")
-
-    _wait_for_web_healthy()
-
-    # Delete any scenes/cameras cloned in previous iterations, then register
-    # the new scene + camera directly via the SceneScape REST API.
-    #
-    # OLD flow: generate ZIP → docker compose up scene-import → wait ~10s
-    # NEW flow: clone ZIP in-memory → POST /api/v1/import-scene/ → ~200ms
-    #
-    # SceneScape must have Camera_01-N registered BEFORE lp-video-N connects
-    # to the RTSP source; otherwise the controller drops all DLStreamer output
-    # for the new camera (no embeddings → no FAISS match → no alert).
-    _delete_cloned_scenes(app_dir, num_scenes)
-
-    if num_scenes > 1:
-        new_cam = _get_new_camera_name(app_dir, num_scenes)
-        zone_cfg = _read_zone_config(app_dir)
-        base_scene_name = zone_cfg.get("scene_name", "conference room")
-        new_scene_name = f"{base_scene_name}-{num_scenes}"
-        if new_cam:
-            logger.info("Registering scene=%s camera=%s via SceneScape REST API …",
-                        new_scene_name, new_cam)
-            scene_uid, cam_uid = _scenescape_import_scene(
-                app_dir, new_scene_name, new_cam)
-            if not scene_uid:
-                # Direct import failed — fall back to scene-import sidecar
-                logger.warning("SceneScape API import failed — falling back to scene-import sidecar")
-                _docker_compose(app_dir, "rm -f -s scene-import")
-                _docker_compose(app_dir, "up -d scene-import")
-                _wait_for_scene_import_completion(timeout=180)
-
-
-    # Wait for the new camera's RTSP stream to be served by MediaMTX.
-    # DLStreamer connects to RTSP at startup; if lp-cams-N is still initialising
-    # when lp-video is force-recreated, DLStreamer silently skips that pipeline.
-    if num_scenes > 1:
-        new_cam = _get_new_camera_name(app_dir, num_scenes)
-        if new_cam:
-            _wait_for_camera_rtsp_ready(new_cam, timeout=60)
-
-    # Recreate only lp-video (Camera_01 baseline) and the NEW lp-video-{N}.
-    # Already-running lp-video-3, lp-video-4, … from previous iterations have
-    # unchanged pipeline configs and keep their RTSP connections — no need to
-    # restart them.  Restarting N containers on every iteration adds O(N) setup
-    # time and disrupts healthy pipelines unnecessarily.
-    # --remove-orphans cleans leftover lp-video-{old} containers from prior runs.
-    logger.info("Recreating DLStreamer container(s) for %d scene(s) …", num_scenes)
-    if num_scenes == 1:
-        # Baseline: only Camera_01's container
-        video_services = "lp-video"
-    else:
-        # Incremental: baseline container + the single new camera container only
-        new_vid_idx = base_camera_count + (num_scenes - 1)
-        video_services = f"lp-video lp-video-{new_vid_idx}"
-    _docker_compose(app_dir, f"up -d --force-recreate --remove-orphans {video_services}")
-
-    # poi-backend subscribes to scenescape/data/camera/+ (wildcard) so it
-    # receives embeddings from all cameras without restart.  MQTT_IMAGE_CAMERAS
-    # only controls the thumbnail-grab strategy (MQTT vs RTSP), not embedding
-    # ingestion.  Skip the force-recreate to avoid resetting FAISS/Redis state
-    # and wasting the 90-second stabilisation window.
-
-    # Wait for DLStreamer to produce first detection (replaces fixed 90s sleep).
-    # For incremental iterations, wait for the NEW camera specifically — Camera_01/02
-    # detections would give a false-positive "pipeline warm" signal while
-    # Camera_01-N's lp-video-N is still initialising.
-    new_cam_for_warmup = _get_new_camera_name(app_dir, num_scenes)
-    _wait_for_first_detection(timeout=wait, poll_interval=3,
-                              camera_filter=new_cam_for_warmup)
-    logger.info("Pipeline warm — adding 10s stabilisation buffer …")
-    time.sleep(10)
-
-
-def _clean_cameras_override(app_dir: str) -> None:
-    override_path = Path(app_dir) / "docker" / "docker-compose.cameras.yaml"
-    if override_path.exists():
-        override_path.unlink()
-        logger.info("Removed %s", override_path)
-
-    # Also remove generated pipeline configs for extra cameras
-    scenescape_dir = Path(app_dir) / ".." / "scenescape"
-    dlstreamer_dir = scenescape_dir / "dlstreamer-pipeline-server"
-    for cfg_file in dlstreamer_dir.glob("person-of-interest-*-[0-9]*-pipeline-config.json"):
-        cfg_file.unlink()
-        logger.info("Removed extra pipeline config: %s", cfg_file)
-
-
-# ---------------------------------------------------------------------------
-# Latency collection — POI-specific
-# ---------------------------------------------------------------------------
-
-def _collect_poi_latency_from_docker_logs(app_dir: str, duration_secs: int = 30) -> Dict[str, float]:
-    """
-    Extract end-to-end POI latency from poi-backend docker logs.
-
-    Measures the time between a POI match and its corresponding alert dispatch.
-    Also counts total detections, matches, and alerts for throughput tracking.
+class IterationMetrics:
+    """Metrics for one iteration"""
+    num_scenes: int
+    new_component: Optional[str]
+    latency_ms: float
+    passed: bool
+    memory_percent: float
+    cpu_percent: float
+    timestamp: str
+    detections: int
+    alerts: int
+    raw_metrics: Dict[str, Any]  # Raw metrics for debugging
+
+
+class StreamDensityBenchmark:
     """
-    container = "poi-backend"
-    since_arg = f"--since={duration_secs + 30}s"
-    cmd = f"docker logs {container} {since_arg} 2>&1"
-    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
-    if result.returncode != 0:
-        logger.warning("Failed to get poi-backend logs: %s", result.stderr[:200])
-        return {}
-
-    detection_count = 0
-    alert_count = 0
-    match_count = 0
-    match_times: list = []
-    alert_times: list = []
-
-    ts_re = re.compile(r"^(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})")
-
-    for line in result.stdout.splitlines():
-        if "Face embedding found" in line:
-            detection_count += 1
-        elif "POI match:" in line:
-            match_count += 1
-            m = ts_re.match(line)
-            if m:
-                match_times.append(m.group(1))
-        elif "Alert dispatched" in line or "Alert forwarded" in line:
-            alert_count += 1
-            m = ts_re.match(line)
-            if m:
-                alert_times.append(m.group(1))
-
-    stats: Dict[str, float] = {
-        "poi_detections": detection_count,
-        "poi_matches": match_count,
-        "poi_alerts": alert_count,
-    }
-
-    if detection_count > 0:
-        stats["match_rate"] = match_count / detection_count
-    if match_count > 0:
-        stats["alert_rate"] = alert_count / match_count
-
-    # Compute match-to-alert latency from log timestamps
-    if match_times and alert_times:
-        try:
-            from datetime import datetime as _dt
-            first_match = _dt.strptime(match_times[0], "%Y-%m-%d %H:%M:%S")
-            first_alert = _dt.strptime(alert_times[0], "%Y-%m-%d %H:%M:%S")
-            latency_s = (first_alert - first_match).total_seconds()
-            if latency_s >= 0:
-                stats["log_detection_to_alert_ms"] = latency_s * 1000
-                logger.info("Log-based latency: first match → first alert = %.0fms",
-                            latency_s * 1000)
-        except Exception:
-            pass
-
-    logger.info("POI logs: %d detections, %d matches, %d alerts (in %ds window)",
-                detection_count, match_count, alert_count, duration_secs)
-
-    return stats
-
-
-def _collect_poi_e2e_latency_from_alerts(
-    since: Optional[datetime] = None,
-    camera_filter: Optional[str] = None,
-) -> Dict[str, float]:
-    """Compute real end-to-end latency from POI alerts API.
-
-    Uses ``timestamp`` (DLStreamer frame capture time from the MQTT payload)
-    as the start time and ``dispatched_at`` as the end time, giving true
-    end-to-end latency from frame capture → alert dispatch (includes FAISS
-    match overhead but NOT DLStreamer pipeline inference time — that is
-    captured by the VLM metrics logger which uses the same frame timestamp).
-
-    NOTE: In stream-density mode this function is a fallback only.  The
-    primary path reads per-camera latency directly from the VLM metrics file
-    (``vlm_Person-of-Interest_{camera_id}_avg_ms``) which is written by
-    ``alert_service.py`` via ``user_log_start_time`` / ``log_end_time``.
-
-    Args:
-        since: If provided, only include alerts dispatched after this time.
-               Filters out stale alerts from previous benchmark runs.
-        camera_filter: If provided, only include alerts from this specific camera.
-                       Used in stream-density mode to measure the newly added camera's
-                       latency independently.  When ``None``, all cameras are included.
-
-    Returns dict with ``poi_e2e_latency_avg_ms``, ``poi_e2e_latency_max_ms``,
-    ``poi_e2e_latency_min_ms``, and ``poi_e2e_alert_count``.
+    Orchestrates the benchmark - contains ALL decision logic.
     """
-    import urllib.request
-    import urllib.error
-
-    try:
-        req = urllib.request.Request("http://localhost:8000/api/v1/alerts?limit=100")
-        with urllib.request.urlopen(req, timeout=10) as resp:
-            alerts = json.loads(resp.read().decode())
-    except Exception as e:
-        logger.warning("Failed to fetch alerts for E2E latency: %s", e)
-        return {}
-
-    if not isinstance(alerts, list) or not alerts:
-        return {}
+    
+    def __init__(
+        self,
+        config: BenchmarkConfig,
+        poi_scripts_dir: str,
+        app_dir: str,
+        resource_config: str = ""
+    ):
+        self.config = config
+        self.poi_scripts_dir = Path(poi_scripts_dir)
+        self.app_dir = Path(app_dir)
+        self.resource_config = resource_config
+        
+        # POI script paths (only for actions, NOT for metrics)
+        self.scale_script = self.poi_scripts_dir / "benchmark_scale.py"
+        self.alert_script = self.poi_scripts_dir / "benchmark_alert.py"
+        self.reset_script = self.poi_scripts_dir / "benchmark_reset.py"
+        
+        # Validate POI scripts exist
+        for script in [self.scale_script, self.alert_script, self.reset_script]:
+            if not script.exists():
+                raise FileNotFoundError(f"POI must provide: {script}")
+        
+        os.makedirs(config.results_dir, exist_ok=True)
+    
+    def run(self) -> Dict[str, Any]:
+        """Execute benchmark - makes all scaling decisions"""
+        self._print_header()
+        
+        results = {
+            "config": asdict(self.config),
+            "iterations": [],
+            "max_scenes": 0,
+            "met_target": False,
+            "best_iteration": None,
+            "timestamp": datetime.now().isoformat()
+        }
+        
+        num_scenes = self.config.single_run_scenes if self.config.single_run else 1
+        best_metrics = None
+        max_iterations = 1 if self.config.single_run else self.config.max_iterations
+        
+        for iteration in range(1, max_iterations + 1):
+            # Get new component name for this iteration
+            new_component = self._get_new_component_name(num_scenes)
+            
+            print(f"\n{'='*70}")
+            print(f"Iteration {iteration}: Testing {num_scenes} scene(s)")
+            if new_component:
+                print(f"  New camera: {new_component}")
+            print(f"{'='*70}")
+            
+            # Check memory
+            if not self._memory_safe():
+                logger.warning("Memory threshold exceeded – stopping")
+                break
+            
+            # Reset POI state
+            self._reset_poi_state()
+            iteration_start = datetime.now(UTC)
+            
+            # Scale POI every iteration in stream-density mode.
+            # In single-run mode, skip if services are already healthy.
+            should_scale = True
+            if self.config.single_run and self._is_poi_healthy():
+                should_scale = False
+                logger.info("Services already healthy - skipping scale in single-run mode")
+            if should_scale:
+                logger.info("Scaling POI to %d scene(s)...", num_scenes)
+                self._scale_poi(num_scenes)
+            
+            # Wait for alert (tell POI to wait)
+            if self.config.single_run:
+                got_alert = self._call_alert_script(
+                    new_component,
+                    self.config.benchmark_duration,
+                    iteration_start,
+                )
+            else:
+                got_alert = self._wait_for_alert_with_retry(
+                    new_component, iteration_start
+                )
+                if not new_component and got_alert:
+                    # Keep baseline behavior aligned with legacy implementation.
+                    time.sleep(self.config.stabilise_duration)
+            
+            # ================================================================
+            # METRICS COLLECTION & LATENCY CALCULATION (IN PERFORMANCE TOOLS)
+            # ================================================================
+            
+            # Read raw metrics files written by POI
+            raw_metrics = self._collect_raw_metrics(iteration_start)
+
+            # Save alert thumbnails for this iteration (best-effort).
+            _save_alert_thumbnails(self.config.results_dir, iteration=iteration, since=iteration_start)
+            
+            # Calculate latency from raw metrics (PURE PERFORMANCE TOOLS LOGIC)
+            latency_ms = self._calculate_latency_from_metrics(
+                raw_metrics, 
+                component_filter=new_component,
+                metric_type=self.config.latency_metric
+            )
+            
+            # Extract detections and alerts from raw metrics
+            detections = self._extract_detection_count(raw_metrics)
+            alerts = self._extract_alert_count(raw_metrics, new_component)
+            
+            # Create metrics object
+            metrics = IterationMetrics(
+                num_scenes=num_scenes,
+                new_component=new_component or "baseline",
+                latency_ms=latency_ms,
+                passed=False,  # Will set below
+                memory_percent=psutil.virtual_memory().percent,
+                cpu_percent=psutil.cpu_percent(interval=1),
+                timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                detections=detections,
+                alerts=alerts,
+                raw_metrics=raw_metrics
+            )
+            
+            # DECISION: Determine pass/fail based on calculated latency
+            metrics.passed = self._determine_pass_fail(
+                metrics, got_alert, new_component
+            )
+            
+            # Store iteration
+            self._print_iteration(metrics)
+            results["iterations"].append(asdict(metrics))
+            
+            # Track best iteration
+            if metrics.passed and (
+                not best_metrics or metrics.num_scenes > best_metrics.num_scenes
+            ):
+                best_metrics = metrics
+            
+            # DECISION: Continue or stop based on pass/fail
+            if not metrics.passed:
+                reason = "no alert" if (new_component and not got_alert) else "latency exceeded"
+                logger.info("Iteration %d failed (%s) – stopping", iteration, reason)
+                break
+            
+            # DECISION: Scale up for next iteration
+            num_scenes += self.config.scene_increment
+        
+        # Finalize results
+        if best_metrics:
+            results["max_scenes"] = best_metrics.num_scenes
+            results["met_target"] = True
+            results["best_iteration"] = asdict(best_metrics)
+        
+        self._export_results(results)
+        self._print_summary(results)
+        
+        return results
+    
+    # ========================================================================
+    # LATENCY CALCULATION LOGIC (Moved from POI to Performance Tools)
+    # ========================================================================
+    
+    def _collect_raw_metrics(self, since: datetime) -> Dict[str, Any]:
+        """
+        Collect raw metrics from POI's metrics files and API.
+        
+        POI writes:
+        - /tmp/vlm_application_metrics_*.txt (VLM metrics)
+        - Provides /api/v1/alerts endpoint
+        
+        Performance Tools reads these directly.
+        """
+        raw_metrics = {}
+        
+        # Read VLM metrics files (written by POI's alert_service.py)
+        vlm_data = self._read_vlm_metrics_files(since)
+        if vlm_data:
+            raw_metrics['vlm'] = vlm_data
+        
+        # Read alerts API (provided by POI)
+        alerts_data = self._read_alerts_api(since)
+        if alerts_data:
+            raw_metrics['alerts'] = alerts_data
+        
+        return raw_metrics
+    
+    def _read_vlm_metrics_files(self, since: datetime) -> Dict[str, Any]:
+        """
+        Parse VLM application metrics files.
 
-    from datetime import datetime as _dt, timezone as _tz
+        Uses a pure-Python implementation so no pandas/numpy is required.
+        The files contain start/end event lines like:
+          application=Person-of-Interest id=Camera_01-3 event=start timestamp_ms=1234567890
+          application=Person-of-Interest id=Camera_01-3 event=end   timestamp_ms=1234568000
+        """
+        metrics = {}
+        since_ms = int(calendar.timegm(since.timetuple()) * 1000)
 
-    latencies_ms: list[float] = []
-    skipped = 0
-    filtered_camera = 0
-    for alert in alerts:
-        # Filter by camera when requested (stream-density: test new camera only)
-        if camera_filter:
-            alert_camera = (
-                alert.get("match", {}).get("camera_id")
-                or alert.get("camera_id", "")
-            )
-            if alert_camera != camera_filter:
-                filtered_camera += 1
+        for d in (self.config.results_dir, "/tmp"):
+            if not os.path.isdir(d):
                 continue
+            try:
+                stats = _parse_vlm_metrics_dir(d, last_n_pairs=20, since_ms=since_ms)
+                for app_name, avg_ms in stats.items():
+                    metrics[f"vlm_{app_name}_avg_ms"] = avg_ms
+            except Exception as e:
+                logger.debug("Failed to parse VLM metrics from %s: %s", d, e)
 
-        # Use DLStreamer frame capture timestamp as start (true E2E start).
-        # alert["timestamp"] = payload["timestamp"] from MQTT = frame capture time.
-        # alert["dispatched_at"] = wall-clock when alert was fired after FAISS match.
-        frame_ts = alert.get("timestamp", "")
-        dispatched_str = alert.get("dispatched_at", "")
-        if not frame_ts or not dispatched_str:
-            continue
+        if not metrics:
+            logger.warning("No VLM metrics parsed from results_dir or /tmp")
+        return metrics
+    
+    def _read_alerts_api(self, since: datetime) -> Dict[str, Any]:
+        """
+        Read alerts from POI's API and calculate latencies.
+        
+        Alert format:
+        {
+            "timestamp": "2024-01-01T12:00:00.123Z",  # Frame capture time
+            "dispatched_at": "2024-01-01T12:00:00.456Z",  # Alert dispatch time
+            "match": {"camera_id": "Camera_01-3"}
+        }
+        
+        Latency = dispatched_at - timestamp
+        """
+        import urllib.request
+        from datetime import timezone
+        
         try:
-            frame_ts = frame_ts.replace("Z", "+00:00")
-            dispatched_str = dispatched_str.replace("Z", "+00:00")
-            start = _dt.fromisoformat(frame_ts)
-            dispatched = _dt.fromisoformat(dispatched_str)
-
-            # Normalize both to UTC-aware to avoid mixed tz subtraction errors
-            start_utc = start.astimezone(_tz.utc) if start.tzinfo else start.replace(tzinfo=_tz.utc)
-            dispatched_utc = dispatched.astimezone(_tz.utc) if dispatched.tzinfo else dispatched.replace(tzinfo=_tz.utc)
-
-            # Filter out alerts from before the benchmark started
-            if since is not None:
-                since_aware = since.astimezone(_tz.utc) if since.tzinfo else since.replace(tzinfo=_tz.utc)
-                if dispatched_utc < since_aware:
-                    skipped += 1
+            req = urllib.request.Request("http://localhost:8000/api/v1/alerts?limit=200")
+            with urllib.request.urlopen(req, timeout=10) as resp:
+                alerts = json.loads(resp.read().decode())
+        except Exception as e:
+            logger.warning("Failed to fetch alerts: %s", e)
+            return {}
+        
+        if not isinstance(alerts, list):
+            return {}
+        
+        # Group latencies by camera
+        camera_latencies = {}
+        all_latencies = []
+        
+        since_aware = since.replace(tzinfo=timezone.utc) if since.tzinfo else since
+        
+        for alert in alerts:
+            # Get camera ID
+            camera_id = alert.get("match", {}).get("camera_id") or alert.get("camera_id", "unknown")
+            
+            # Get timestamps
+            frame_ts = alert.get("timestamp", "")
+            dispatched_ts = alert.get("dispatched_at", "")
+            
+            if not frame_ts or not dispatched_ts:
+                continue
+            
+            try:
+                # Parse timestamps
+                start = datetime.fromisoformat(frame_ts.replace('Z', '+00:00'))
+                end = datetime.fromisoformat(dispatched_ts.replace('Z', '+00:00'))
+                
+                # Make timezone aware
+                if start.tzinfo is None:
+                    start = start.replace(tzinfo=timezone.utc)
+                if end.tzinfo is None:
+                    end = end.replace(tzinfo=timezone.utc)
+                
+                # Filter by since time
+                if end < since_aware:
                     continue
+                
+                # Calculate latency in milliseconds
+                latency_ms = (end - start).total_seconds() * 1000
+                
+                if latency_ms >= 0:
+                    all_latencies.append(latency_ms)
+                    if camera_id not in camera_latencies:
+                        camera_latencies[camera_id] = []
+                    camera_latencies[camera_id].append(latency_ms)
+                    
+            except (ValueError, TypeError) as e:
+                logger.debug("Failed to parse alert timestamps: %s", e)
+                continue
+        
+        result = {
+            "total_alerts": len(all_latencies),
+            "alerts_by_camera": {k: len(v) for k, v in camera_latencies.items()},
+        }
+        
+        if all_latencies:
+            result["overall_avg_ms"] = sum(all_latencies) / len(all_latencies)
+            result["overall_max_ms"] = max(all_latencies)
+            result["overall_min_ms"] = min(all_latencies)
+        
+        # Per-camera metrics
+        for camera_id, latencies in camera_latencies.items():
+            result[f"camera_{camera_id}_avg_ms"] = sum(latencies) / len(latencies)
+            result[f"camera_{camera_id}_max_ms"] = max(latencies)
+            result[f"camera_{camera_id}_alert_count"] = len(latencies)
+        
+        return result
+    
+    def _calculate_latency_from_metrics(
+        self, 
+        raw_metrics: Dict[str, Any], 
+        component_filter: Optional[str] = None,
+        metric_type: str = "avg"
+    ) -> float:
+        """
+        Calculate representative latency from raw metrics.
+        
+        Priority:
+        1. VLM metrics for specific camera (if component_filter provided)
+        2. VLM aggregate metrics
+        3. Alerts API metrics
+        """
+        vlm_data = raw_metrics.get('vlm', {})
+        alerts_data = raw_metrics.get('alerts', {})
+        
+        # If filtering to specific camera, look for camera-specific VLM metrics
+        if component_filter:
+            # Look for vlm_Person-of-Interest_{camera}_avg_ms
+            for key, value in vlm_data.items():
+                if component_filter in key and 'avg_ms' in key and value > 0:
+                    logger.info("Using VLM metrics for %s: %.0fms", component_filter, value)
+                    return value
+            
+            # Fallback to alerts API for specific camera
+            camera_key = f"camera_{component_filter}_{metric_type}_ms"
+            if camera_key in alerts_data and alerts_data[camera_key] > 0:
+                logger.info("Using alerts API for %s: %.0fms", component_filter, alerts_data[camera_key])
+                return alerts_data[camera_key]
+        
+        # No filter or no per-camera metrics - use aggregate
+        if 'vlm_Person-of-Interest_avg_ms' in vlm_data and vlm_data['vlm_Person-of-Interest_avg_ms'] > 0:
+            logger.info("Using aggregate VLM: %.0fms", vlm_data['vlm_Person-of-Interest_avg_ms'])
+            return vlm_data['vlm_Person-of-Interest_avg_ms']
+        
+        if metric_type == 'avg' and alerts_data.get('overall_avg_ms', 0) > 0:
+            return alerts_data['overall_avg_ms']
+        elif metric_type == 'max' and alerts_data.get('overall_max_ms', 0) > 0:
+            return alerts_data['overall_max_ms']
+        
+        logger.warning("No valid latency metrics found")
+        return 0.0
+    
+    def _extract_detection_count(self, raw_metrics: Dict[str, Any]) -> int:
+        """Extract detection count from raw metrics"""
+        # Try to get from alerts API
+        alerts_data = raw_metrics.get('alerts', {})
+        return alerts_data.get('total_alerts', 0)
+    
+    def _extract_alert_count(self, raw_metrics: Dict[str, Any], component_filter: Optional[str]) -> int:
+        """Extract alert count from raw metrics"""
+        alerts_data = raw_metrics.get('alerts', {})
+        
+        if component_filter:
+            return alerts_data.get(f"camera_{component_filter}_alert_count", 0)
+        
+        return alerts_data.get('total_alerts', 0)
+    
+    # ========================================================================
+    # POI Action Delegation (only for actions, NOT for metrics)
+    # ========================================================================
+    
+    def _scale_poi(self, num_scenes: int) -> None:
+        """Tell POI to scale to N scenes"""
+        result = subprocess.run(
+            [
+                sys.executable, str(self.scale_script),
+                "--app_dir", str(self.app_dir),
+                "--num_scenes", str(num_scenes),
+                "--resource_config", self.resource_config
+            ],
+            capture_output=True, text=True
+        )
+        
+        if result.returncode != 0:
+            logger.error("Scale script failed: %s", result.stderr)
+            raise RuntimeError(f"Scale failed: {result.stderr}")
+        
+        logger.info("POI scaled to %d scenes", num_scenes)
+    
+    def _get_new_component_name(self, num_scenes: int) -> Optional[str]:
+        """Ask POI what component was added"""
+        result = subprocess.run(
+            [
+                sys.executable, str(self.scale_script),
+                "--app_dir", str(self.app_dir),
+                "--get_new_component", str(num_scenes)
+            ],
+            capture_output=True, text=True
+        )
+        
+        if result.returncode == 0 and result.stdout.strip():
+            return result.stdout.strip()
+        return None
+    
+    def _wait_for_alert_with_retry(
+        self, component_name: Optional[str], iteration_start: datetime
+    ) -> bool:
+        """Tell POI to wait for alert"""
+        if not component_name:
+            return self._call_alert_script(None, self.config.init_duration, iteration_start)
+        
+        total_waited = 0
+        while total_waited < self.config.max_alert_wait:
+            window = min(self.config.init_duration, 
+                        self.config.max_alert_wait - total_waited)
+            
+            got_alert = self._call_alert_script(component_name, window, iteration_start)
+            
+            if got_alert:
+                logger.info("Alert received from %s", component_name)
+                time.sleep(self.config.stabilise_duration)
+                return True
+            
+            total_waited += window
+            logger.info("No alert yet (%ds/%ds) - retrying", total_waited, self.config.max_alert_wait)
+        
+        return False
+    
+    def _call_alert_script(
+        self, component_name: Optional[str], timeout: int, since: datetime
+    ) -> bool:
+        """Call POI's alert waiting script"""
+        cmd = [
+            sys.executable, str(self.alert_script),
+            "--app_dir", str(self.app_dir),
+            "--timeout", str(timeout),
+            "--since", since.isoformat()
+        ]
+        if component_name:
+            cmd.extend(["--component", component_name])
+        
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        return result.returncode == 0
+    
+    def _reset_poi_state(self) -> None:
+        """Tell POI to reset state"""
+        subprocess.run(
+            [sys.executable, str(self.reset_script), "--app_dir", str(self.app_dir)],
+            capture_output=True
+        )
+    
+    def _is_poi_healthy(self) -> bool:
+        """Check if POI services are already running"""
+        result = subprocess.run(
+            [sys.executable, str(self.scale_script), "--app_dir", str(self.app_dir), "--check_healthy"],
+            capture_output=True, text=True
+        )
+        return result.returncode == 0
+    
+    # ========================================================================
+    # Decision Logic (in performance-tools)
+    # ========================================================================
+    
+    def _determine_pass_fail(
+        self, metrics: IterationMetrics, got_alert: bool, new_component: Optional[str]
+    ) -> bool:
+        """DECISION: Did this iteration meet the latency target?"""
+        if new_component:
+            if not got_alert:
+                print(f"  ✗ FAILED - No alert from {new_component}")
+                return False
+            elif metrics.latency_ms <= 0:
+                print(f"  ✓ PASSED - Alert received")
+                return True
+            elif metrics.latency_ms <= self.config.target_latency_ms:
+                print(f"  ✓ PASSED - {metrics.latency_ms:.0f}ms ≤ {self.config.target_latency_ms:.0f}ms")
+                return True
+            else:
+                print(f"  ✗ FAILED - {metrics.latency_ms:.0f}ms > {self.config.target_latency_ms:.0f}ms")
+                return False
+        else:
+            if got_alert and metrics.latency_ms <= 0:
+                print("  ✓ PASSED - Baseline alert received")
+                return True
+            if metrics.latency_ms > 0 and metrics.latency_ms <= self.config.target_latency_ms:
+                print(f"  ✓ PASSED - {metrics.latency_ms:.0f}ms ≤ {self.config.target_latency_ms:.0f}ms")
+                return True
+            print("  ✗ FAILED - Baseline did not meet alert/latency criteria")
+            return False
+    
+    def _memory_safe(self) -> bool:
+        """DECISION: Is memory usage safe?"""
+        mem = psutil.virtual_memory()
+        if mem.percent > 90:
+            logger.warning("Memory at %.1f%% (threshold 90%%)", mem.percent)
+            return False
+        return True
+    
+    # ========================================================================
+    # Output Methods
+    # ========================================================================
+    
+    def _print_header(self):
+        print("=" * 70)
+        print("Stream Density Benchmark - Performance Tools")
+        print("(Performance Tools handles ALL metrics & decisions)")
+        print("=" * 70)
+        print(f"  Target Latency:    {self.config.target_latency_ms:.0f}ms")
+        print(f"  Latency Metric:    {self.config.latency_metric}")
+        print(f"  Scene Increment:   +{self.config.scene_increment}")
+        print(f"  Results Dir:       {self.config.results_dir}")
+        print("=" * 70)
+    
+    def _print_iteration(self, metrics: IterationMetrics):
+        print(f"\n  Scenes:      {metrics.num_scenes}")
+        if metrics.new_component != "baseline":
+            print(f"  New Camera:  {metrics.new_component}")
+        print(f"  Latency:     {metrics.latency_ms:.0f}ms")
+        print(f"  Detections:  {metrics.detections}")
+        print(f"  Alerts:      {metrics.alerts}")
+        print(f"  Memory:      {metrics.memory_percent:.1f}%")
+        print(f"  CPU:         {metrics.cpu_percent:.1f}%")
+    
+    def _print_summary(self, results: Dict):
+        print("\n" + "=" * 70)
+        print("BENCHMARK RESULTS")
+        print("=" * 70)
+        print(f"  Target Latency:  {self.config.target_latency_ms:.0f}ms")
+        print(f"  Max Scenes:      {results['max_scenes']}")
+        print(f"  Met Target:      {'Yes' if results['met_target'] else 'No'}")
+        print("=" * 70)
 
-            delta_ms = (dispatched_utc - start_utc).total_seconds() * 1000
-            if delta_ms >= 0:
-                latencies_ms.append(delta_ms)
-        except (ValueError, TypeError):
-            continue
-
-    if skipped:
-        logger.info("Filtered out %d stale alerts (before benchmark start)", skipped)
-    if filtered_camera:
-        logger.debug("Filtered out %d alerts from other cameras (camera_filter=%s)",
-                     filtered_camera, camera_filter)
-
-    if not latencies_ms:
-        return {}
-
-    stats: Dict[str, float] = {
-        "poi_e2e_latency_avg_ms": sum(latencies_ms) / len(latencies_ms),
-        "poi_e2e_latency_max_ms": max(latencies_ms),
-        "poi_e2e_latency_min_ms": min(latencies_ms),
-        "poi_e2e_alert_count": len(latencies_ms),
-    }
-    label = "frame capture → alert dispatch (alerts API)"
-    if camera_filter:
-        label = f"camera={camera_filter} " + label
-    logger.info(
-        "E2E latency (%s): avg=%.0fms, min=%.0fms, max=%.0fms (%d alerts)",
-        label,
-        stats["poi_e2e_latency_avg_ms"],
-        stats["poi_e2e_latency_min_ms"],
-        stats["poi_e2e_latency_max_ms"],
-        len(latencies_ms),
-    )
-    return stats
+        iterations = results.get("iterations", [])
+        if not iterations:
+            return
 
+        print()
+        print(
+            f"{'Scenes':<8}"
+            f"{'Camera':<16}"
+            f"{'Latency':<12}"
+            f"{'Detections':<12}"
+            f"{'Alerts':<8}"
+            f"{'Mem %':<8}"
+            f"{'CPU %':<8}"
+            f"{'Status':<8}"
+        )
+        print("-" * 80)
+        for it in iterations:
+            status = "PASS" if it.get("passed") else "FAIL"
+            camera = it.get("new_component") or "baseline"
+            print(
+                f"{it.get('num_scenes', 0):<8}"
+                f"{camera:<16}"
+                f"{it.get('latency_ms', 0):<12.0f}"
+                f"{it.get('detections', 0):<12}"
+                f"{it.get('alerts', 0):<8}"
+                f"{it.get('memory_percent', 0):<8.1f}"
+                f"{it.get('cpu_percent', 0):<8.1f}"
+                f"{status:<8}"
+            )
+        print("=" * 80)
+    
+    def _export_results(self, results: Dict):
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        
+        json_path = os.path.join(self.config.results_dir, f"stream_density_{timestamp}.json")
+        with open(json_path, "w") as f:
+            json.dump(results, f, indent=2)
+        print(f"\nJSON results: {json_path}")
+        
+        csv_path = os.path.join(self.config.results_dir, f"stream_density_{timestamp}.csv")
+        with open(csv_path, "w", newline="") as f:
+            writer = csv.writer(f)
+            writer.writerow(["scenes", "component", "latency_ms", "detections", "alerts", "passed"])
+            for it in results['iterations']:
+                writer.writerow([
+                    it['num_scenes'], it['new_component'],
+                    f"{it['latency_ms']:.0f}", it['detections'],
+                    it['alerts'], it['passed']
+                ])
+        print(f"CSV results: {csv_path}")
 
-def _save_alert_thumbnails(
-    results_dir: str, iteration: int = 1, since: Optional[datetime] = None,
-) -> int:
-    """Fetch alerts and their thumbnails from the POI API and save to results_dir.
 
-    Args:
-        since: If provided, only save thumbnails for alerts dispatched after this time.
+def _save_alert_thumbnails(results_dir: str, iteration: int = 1,
+                           since: Optional[datetime] = None) -> int:
+    """Fetch alert thumbnails from POI API and save under results_dir.
 
     Returns the number of thumbnails saved.
     """
-    import urllib.request
     import urllib.error
+    import urllib.request
+    from datetime import datetime as _dt, timezone as _tz
 
     thumbs_dir = os.path.join(results_dir, f"thumbnails_iter{iteration}")
     try:
         os.makedirs(thumbs_dir, exist_ok=True)
     except PermissionError:
         logger.warning(
-            "Cannot create thumbnails directory %s (permission denied). "
-            "The results/ directory may be owned by root (written by a Docker container). "
-            "Run: sudo chown -R $USER results/",
+            "Cannot create thumbnails directory %s (permission denied)",
             thumbs_dir,
         )
         return 0
-    saved = 0
 
     try:
         req = urllib.request.Request("http://localhost:8000/api/v1/alerts")
@@ -1551,33 +672,28 @@ def _save_alert_thumbnails(
         return 0
 
     if not isinstance(alerts, list) or not alerts:
-        logger.info("No alerts found — no thumbnails to save")
         return 0
 
-    from datetime import datetime as _dt, timezone as _tz
-
+    saved = 0
     for i, alert in enumerate(alerts):
-        # Filter stale alerts
         if since is not None:
             dispatched_str = alert.get("dispatched_at", "")
             if dispatched_str:
                 try:
-                    d_str = dispatched_str.replace("Z", "+00:00")
-                    dispatched = _dt.fromisoformat(d_str)
+                    dispatched = _dt.fromisoformat(dispatched_str.replace("Z", "+00:00"))
                     since_aware = since.astimezone(_tz.utc) if since.tzinfo else since.replace(tzinfo=_tz.utc)
-                    dispatched_utc = dispatched.astimezone(_tz.utc) if dispatched.tzinfo else dispatched.replace(tzinfo=_tz.utc)
-                    if dispatched_utc < since_aware:
+                    dispatched_aware = dispatched.astimezone(_tz.utc) if dispatched.tzinfo else dispatched.replace(tzinfo=_tz.utc)
+                    if dispatched_aware < since_aware:
                         continue
                 except (ValueError, TypeError):
                     pass
-        # Extract fields from nested alert structure
+
         match_data = alert.get("match", {})
         thumb_url = match_data.get("thumbnail_path") or alert.get("thumbnail_path") or ""
         object_id = alert.get("object_id", "")
         poi_id = alert.get("poi_id", "unknown")
         camera_id = match_data.get("camera_id") or alert.get("camera_id", "unknown")
 
-        # Build the thumbnail URL
         if thumb_url.startswith("/"):
             thumb_url = f"http://localhost:8000{thumb_url}"
         elif not thumb_url and object_id:
@@ -1590,14 +706,13 @@ def _save_alert_thumbnails(
             with urllib.request.urlopen(req, timeout=10) as resp:
                 img_data = resp.read()
 
-            safe_cam = re.sub(r'[^a-zA-Z0-9_-]', '_', camera_id)
-            safe_poi = re.sub(r'[^a-zA-Z0-9_-]', '_', poi_id)
-            fname = f"alert_{i:03d}_{safe_poi}_{safe_cam}.jpg"
-            fpath = os.path.join(thumbs_dir, fname)
-            with open(fpath, "wb") as f:
-                f.write(img_data)
+            safe_cam = re.sub(r"[^a-zA-Z0-9_-]", "_", camera_id)
+            safe_poi = re.sub(r"[^a-zA-Z0-9_-]", "_", poi_id)
+            out_name = f"alert_{i:03d}_{safe_poi}_{safe_cam}.jpg"
+            out_path = os.path.join(thumbs_dir, out_name)
+            with open(out_path, "wb") as fh:
+                fh.write(img_data)
             saved += 1
-            logger.info("Saved thumbnail: %s", fpath)
         except urllib.error.HTTPError as e:
             logger.debug("Thumbnail HTTP %d for %s", e.code, thumb_url)
         except Exception as e:
@@ -1606,776 +721,172 @@ def _save_alert_thumbnails(
     if saved:
         logger.info("Saved %d alert thumbnails to %s", saved, thumbs_dir)
     return saved
+    
 
+# ---------------------------------------------------------------------------
+# Pure-Python VLM metrics parser (no pandas/numpy dependency)
+# ---------------------------------------------------------------------------
 
-def _collect_poi_latency_from_metrics_files(
-    results_dir: str, stream_density: bool = False,
-    since_ms: Optional[int] = None
-) -> Dict[str, float]:
-    """
-    Extract POI detection-to-alert latency from vlm_application_metrics files.
-
-    These files are written by the vlm_metrics_logger package via
-    user_log_start_time (detection) and log_end_time (alert dispatch)
-    calls in the poi-backend.
+def _parse_vlm_metrics_dir(results_dir: str, last_n_pairs: int = 20,
+                            since_ms: Optional[int] = None) -> Dict[str, float]:
+    """Return per-app average latency (ms) from the most recent
+    vlm_application_metrics*.txt file in *results_dir*.
 
-    For single benchmarks uses ``get_vlm_application_latency`` (all pairs).
-    For stream density uses ``get_vlm_application_latency_stream_density``
-    with ``since_ms`` so only pairs from the current iteration are measured.
-    The file is never deleted between iterations — poi-backend holds an open
-    RotatingFileHandler to it.
+    Parses lines of the form:
+      application=<name> id=<id> event=start|end timestamp_ms=<epoch_ms>
+    Pairs start/end events (LIFO) and returns the average of the last
+    *last_n_pairs* completed durations per app_id key.
     """
-    all_stats: Dict[str, float] = {}
-    search_dirs = [results_dir, "/tmp"]
+    import re as _re
+    from collections import defaultdict
+    from pathlib import Path as _Path
 
-    if stream_density:
-        for d in search_dirs:
-            if not os.path.isdir(d):
+    files = sorted(
+        _Path(results_dir).rglob("vlm_application_metrics*.txt"),
+        key=lambda p: p.stat().st_mtime,
+    )
+    if not files:
+        return {}
+
+    timing: dict = defaultdict(list)
+    kv_pat = _re.compile(r'(\w+)=([^\s]+)')
+    with open(files[-1]) as fh:
+        for line in fh:
+            if "application=" not in line or "timestamp_ms=" not in line:
                 continue
-            try:
-                stats = get_vlm_application_latency_stream_density(
-                    d, last_n_pairs=20, since_ms=since_ms)
-                if stats:
-                    for app_id, avg_ms in stats.items():
-                        all_stats[f"vlm_{app_id}_avg_ms"] = avg_ms
-                    logger.info("VLM stream-density latency (%s): %s", d, stats)
-            except Exception as e:
-                logger.warning("Failed to parse VLM metrics in %s: %s", d, e)
-    else:
-        for d in search_dirs:
-            if not os.path.isdir(d):
+            data = dict(kv_pat.findall(line))
+            app = data.get("application", "")
+            id_ = data.get("id", "")
+            event = data.get("event", "")
+            ts_str = data.get("timestamp_ms", "")
+            if not (app and id_ and event in ("start", "end") and ts_str):
                 continue
-            pattern = os.path.join(d, "vlm_application_metrics_*.txt")
-            files = sorted(glob.glob(pattern), key=os.path.getmtime)
-            if not files:
+            ts = int(ts_str)
+            if since_ms is not None and ts < since_ms:
                 continue
-            latest = files[-1]
-            try:
-                stats = get_vlm_application_latency(latest)
-                if stats:
-                    for key, avg_ms in stats.items():
-                        all_stats[f"vlm_{key}"] = avg_ms
-                    logger.info("VLM metrics file latency (%s): %s", latest, stats)
-            except Exception as e:
-                logger.warning("Failed to parse VLM metrics in %s: %s", d, e)
-
-    return all_stats
-
-
-def _extract_poi_latency(stats: Dict[str, float], metric: str) -> float:
-    """
-    Extract a single representative POI latency value from collected stats.
-
-    Priority:
-      1. vlm_application_metrics file values — TRUE end-to-end latency:
-         start = DLStreamer frame capture timestamp (set by user_log_start_time
-         in alert_service.py using the MQTT payload's frame timestamp field),
-         end   = wall-clock time at alert dispatch (log_end_time).
-         This spans: camera frame capture → DLStreamer pipeline → FAISS match
-         → alert dispatch.
-         Two unique_id variants are written per alert:
-           * ``person-of-interest`` — aggregate across all cameras
-           * ``{camera_id}``        — per-camera (for stream-density isolation)
-      2. Alerts API fallback (``timestamp`` frame capture → ``dispatched_at``) —
-         same semantic as (1) but parsed from stored alert JSON rather than
-         the live metrics file.
-      3. Returns 0 if no data available.
-
-    Note: Docker-log-based ``log_detection_to_alert_ms`` is excluded because
-    log timestamps have only second-level precision and the first-match-to-
-    first-alert gap includes dedup delay (60 s TTL), making it unreliable
-    as a per-event latency metric.
-    """
-    # Primary: vlm_application_metrics file-based values.
-    # Exclude the aggregate ``person-of-interest`` key — per-camera entries
-    # (e.g., ``vlm_Person-of-Interest_Camera_01-3_avg_ms``) are more precise
-    # in stream-density mode because they isolate each camera's contribution.
-    # If no per-camera entries exist (e.g., first/single run), fall back to
-    # the aggregate ``person-of-interest`` entry.
-    per_camera_values = [
-        v for k, v in stats.items()
-        if k.startswith("vlm_") and "_person-of-interest_" not in k
-        and isinstance(v, (int, float)) and v > 0
-    ]
-    aggregate_values = [
-        v for k, v in stats.items()
-        if k.startswith("vlm_") and "_person-of-interest_" in k
-        and isinstance(v, (int, float)) and v > 0
-    ]
-    vlm_values = per_camera_values or aggregate_values
-    if vlm_values:
-        if metric == "max":
-            return max(vlm_values)
-        return mean(vlm_values)
-
-    # Fallback: alerts API E2E latency (frame capture → dispatched_at)
-    e2e_avg = stats.get("poi_e2e_latency_avg_ms", 0.0)
-    e2e_max = stats.get("poi_e2e_latency_max_ms", 0.0)
-    if e2e_avg > 0:
-        if metric == "max":
-            return e2e_max
-        return e2e_avg
-
-    return 0.0
-
-
-def _clean_metrics(results_dir: str) -> None:
-    """Remove stale metrics files before each measurement iteration.
-
-    Note: vlm_application_metrics files are NOT deleted here because poi-backend
-    holds an open RotatingFileHandler to them.  Deleting the file causes the
-    handler to write to an unlinked inode (invisible to rglob) for the rest of
-    the run.  Instead, callers pass a ``since_ms`` filter so each iteration
-    reads only its own pairs from the cumulative file.
-    """
-    patterns = [
-        "vlm_performance_metrics*.txt",
-    ]
-    for d in [results_dir, "/tmp"]:
-        for pat in patterns:
-            for f in glob.glob(os.path.join(d, pat)):
-                try:
-                    os.remove(f)
-                except OSError:
-                    pass
-
-
-def _reset_alert_dedup() -> None:
-    """Clear Redis alert dedup and history via the poi-backend REST API.
-
-    Between stream-density iterations, the Redis dedup keys (``alert:sent:*``)
-    must be reset so the newly added camera can fire fresh alerts.
-
-    Background: the dedup key is the object UUID — shared across all cameras
-    tracking the same physical person.  If Camera_01 already alerted on
-    UUID-X within ALERT_DEDUP_TTL seconds, Camera_01-3 would be silently
-    suppressed when it detects the same person, preventing any alert from
-    the new camera regardless of its latency.
-
-    ``DELETE /api/v1/alerts`` clears both the recent-alerts list AND all
-    ``alert:sent:*`` / ``alert:*`` Redis keys so every iteration starts fresh.
-    """
-    import urllib.request
-    try:
-        req = urllib.request.Request(
-            "http://localhost:8000/api/v1/alerts",
-            method="DELETE",
-        )
-        with urllib.request.urlopen(req, timeout=10) as resp:
-            result = resp.read().decode()
-        logger.info("Alert dedup reset: %s", result)
-    except Exception as e:
-        logger.warning("Could not reset alert dedup (non-fatal): %s", e)
+            timing[f"{app}_{id_}"].append({"event": event, "timestamp_ms": ts})
+
+    result: Dict[str, float] = {}
+    for app_id, events in timing.items():
+        events.sort(key=lambda x: x["timestamp_ms"])
+        durations = []
+        stack: list = []
+        for ev in events:
+            if ev["event"] == "start":
+                stack.append(ev["timestamp_ms"])
+            elif ev["event"] == "end" and stack:
+                durations.append(ev["timestamp_ms"] - stack.pop())
+        if durations:
+            tail = durations[-last_n_pairs:]
+            result[app_id] = sum(tail) / len(tail)
+    return result
 
 
 # ---------------------------------------------------------------------------
-# POI Stream Density Runner
+# poi_scaling helper loader (loads from POI benchmark/ folder)
 # ---------------------------------------------------------------------------
 
-class POIStreamDensity:
-    """
-    Iteratively increases the number of camera/scene pipelines until the
-    newly added camera's end-to-end detection-to-alert latency exceeds
-    *target_latency_ms*.  This determines the maximum number of cameras
-    the hardware can support for this application.
-
-    Iteration logic
-    ---------------
-    *Baseline* (num_scenes=1): Camera_01 and Camera_02 are already running.
-    Any alert confirms the pipeline is functional.
-
-    *Incremental* (num_scenes > 1): each iteration adds exactly one new
-    camera (Camera_01-3, Camera_01-4, …) and one cloned scene.  The
-    benchmark waits specifically for an alert from that new camera, then
-    measures its E2E latency.  Only if latency ≤ target does the next
-    camera get added.
-
-    What gets added per iteration (num_scenes > 1)
-    ------------------------------------------------
-      - One new RTSP stream  (lp-cams-N Docker service)
-      - One extra DLStreamer pipeline config
-      - One new scene clone in SceneScape (scene-import re-run)
-      - lp-video (DLStreamer) restarted with the updated config
-
-    What stays running untouched
-    -----------------------------
-      - poi-backend  ← subscribes via MQTT wildcard ``scenescape/data/camera/+``
-        so it automatically processes every new camera without a restart.
-      - SceneScape core (web, controller, broker, ntpserv, pgserver, vdms)
-      - poi-redis, poi-alert-service, poi-ui, mediaserver
-    """
-
-    MEMORY_SAFETY_PERCENT = 90
-
-    def __init__(
-        self,
-        app_dir: str,
-        target_latency_ms: float,
-        latency_metric: str,
-        scene_increment: int,
-        init_duration: int,
-        stabilise_duration: int,
-        results_dir: str,
-        max_iterations: int,
-        single_run: bool = False,
-        single_run_scenes: int = 1,
-        benchmark_duration: int = 120,
-        resource_config: str = "",
-        max_alert_wait: int = 180,
-    ):
-        self.app_dir = os.path.abspath(app_dir)
-        self.target_latency_ms = target_latency_ms
-        self.latency_metric = latency_metric
-        self.scene_increment = scene_increment
-        self.init_duration = init_duration
-        self.stabilise_duration = stabilise_duration
-        self.results_dir = os.path.abspath(results_dir)
-        self.max_iterations = max_iterations
-        self.single_run = single_run
-        self.single_run_scenes = single_run_scenes
-        self.benchmark_duration = benchmark_duration
-        self.resource_config = resource_config
-        # max total seconds to wait for an alert from the new camera per iteration;
-        # should cover at least 2-3 full video cycles to handle phase offset.
-        # Increase via MAX_ALERT_WAIT env var or --max_alert_wait flag for CPU mode.
-        self.max_alert_wait = max_alert_wait
-        self.latency_metric = latency_metric
-        self.scene_increment = scene_increment
-        self.init_duration = init_duration
-        self.stabilise_duration = stabilise_duration
-        self.results_dir = os.path.abspath(results_dir)
-        self.max_iterations = max_iterations
-        self.single_run = single_run
-        self.single_run_scenes = single_run_scenes
-        self.benchmark_duration = benchmark_duration
-        self.resource_config = resource_config
-        os.makedirs(self.results_dir, exist_ok=True)
-
-    def _services_running(self) -> bool:
-        """Check if key POI pipeline services are already running."""
-        for container in ("poi-backend", "storewide-lp-lp-video-1"):
-            result = subprocess.run(
-                f"docker inspect {container} --format '{{{{.State.Running}}}}'",
-                shell=True, capture_output=True, text=True)
-            if result.stdout.strip() != "true":
-                return False
-        return True
-
-    def _wait_for_alert_or_timeout(self, duration: int,
-                                    since: Optional[datetime] = None) -> bool:
-        """Poll for new alerts, exit early on first alert after *since*.
-
-        Used both in single-benchmark mode (time-to-first-alert) and in
-        stream-density mode (ensure at least one alert before data collection).
-
-        Returns True if a fresh alert was found, False if duration expired.
-        """
-        import urllib.request
-        import urllib.error
-
-        poll_interval = 5
-        elapsed = 0
-        since_ts = since or datetime.utcnow()
-        logger.info("Waiting up to %ds for alert (polling every %ds) …",
-                     duration, poll_interval)
-
-        while elapsed < duration:
-            sleep_time = min(poll_interval, duration - elapsed)
-            time.sleep(sleep_time)
-            elapsed += sleep_time
-
-            try:
-                req = urllib.request.Request("http://localhost:8000/api/v1/alerts")
-                with urllib.request.urlopen(req, timeout=10) as resp:
-                    alerts = json.loads(resp.read().decode())
-                # Filter to alerts created after iteration start
-                fresh = [
-                    a for a in (alerts if isinstance(alerts, list) else [])
-                    if a.get("created_at", a.get("timestamp", "")) > since_ts.strftime("%Y-%m-%dT%H:%M:%S")
-                ]
-                if fresh:
-                    logger.info("Fresh alert received after %ds — stopping early", elapsed)
-                    # Brief extra wait for metrics files to flush
-                    time.sleep(5)
-                    return True
-            except Exception:
-                pass
-
-            logger.info("No fresh alerts yet (%d/%ds elapsed)", elapsed, duration)
-
-        logger.info("Alert wait duration reached (%ds) — continuing anyway", duration)
-        return False
-
-    def run(self) -> StreamDensityResult:
-        """Execute the POI stream-density loop.
-
-        For each iteration:
-          1. Add one new camera + scene (num_scenes > 1) or use base cameras (num_scenes = 1).
-          2. Wait for an alert specifically from the NEW camera.
-          3. Compute E2E latency filtered to that camera's alerts.
-          4. If latency ≤ target → add next camera and repeat.
-             If no alert received or latency > target → report max_scenes and stop.
-        """
-        self._print_header()
-        result = StreamDensityResult(target_latency_ms=self.target_latency_ms)
-
-        num_scenes = self.single_run_scenes if self.single_run else 1
-        max_iter = 1 if self.single_run else self.max_iterations
-        best: Optional[IterationResult] = None
-
-        for iteration in range(1, max_iter + 1):
-            # Determine which camera is newly added in this iteration.
-            # None → baseline (num_scenes=1, Camera_01 + Camera_02 already present).
-            new_camera = _get_new_camera_name(self.app_dir, num_scenes)
-
-            print(f"\n{'='*70}")
-            print(f"Iteration {iteration}: Testing {num_scenes} scene(s)  "
-                  f"[new camera: {new_camera or 'N/A (baseline)'}]")
-            print(f"{'='*70}")
-
-            if not self._memory_safe():
-                logger.warning("Memory threshold exceeded – stopping.")
-                break
-
-            # Record iteration start time for filtering stale alerts
-            iteration_start = datetime.utcnow()
-
-            # Reset Redis alert dedup so the new camera can fire fresh alerts.
-            # Without this, UUID-based dedup from previous iterations would
-            # suppress alerts from Camera_01-3 if the same person was already
-            # alerted on Camera_01 (dedup key = object UUID, not camera-specific).
-            _reset_alert_dedup()
-
-            # Clean old metrics before each measurement
-            _clean_metrics(self.results_dir)
-
-            if self.single_run and self._services_running():
-                # Single benchmark: services already up, skip scaling
-                logger.info("Services already running — skipping scaling for single benchmark")
-            else:
-                # Scale to desired scene count (adds new camera + scene)
-                _scale_pipeline_services(self.app_dir, num_scenes, wait=self.init_duration,
-                                         resource_config=self.resource_config)
-
-            # ── Wait for alert from the newly added camera ──────────────────
-            # For num_scenes=1 (baseline) there is no "new" camera, so we fall
-            # back to the generic any-camera wait.  For num_scenes>1 we wait
-            # specifically for the new camera to prove it is fully active.
-            #
-            # The wait retries in init_duration windows until either an alert
-            # is received OR total wait exceeds max_alert_wait.  This is
-            # important for CPU mode where inference is slower and the POI
-            # face may only appear once per video loop (~55s), so a single
-            # 45s window is not guaranteed to cover a full video cycle.
-            if new_camera:
-                # Camera-specific wait: must see an alert from the new camera
-                if self.single_run:
-                    _wait_for_alert_from_camera(
-                        new_camera, self.benchmark_duration, since=iteration_start,
-                    )
-                else:
-                    max_alert_wait = self.max_alert_wait
-                    total_waited = 0
-                    got_alert = False
-                    while not got_alert and total_waited < max_alert_wait:
-                        window = min(self.init_duration, max_alert_wait - total_waited)
-                        got_alert = _wait_for_alert_from_camera(
-                            new_camera, window, since=iteration_start,
-                        )
-                        total_waited += window
-                        if not got_alert and total_waited < max_alert_wait:
-                            logger.info(
-                                "No alert from camera=%s yet — retrying "
-                                "(%ds elapsed, max=%ds) …",
-                                new_camera, total_waited, max_alert_wait,
-                            )
-                    if not got_alert:
-                        logger.warning(
-                            "No alert from new camera=%s within max_alert_wait=%ds",
-                            new_camera, max_alert_wait,
-                        )
-                    logger.info("Collecting data for %ds …", self.stabilise_duration)
-                    time.sleep(self.stabilise_duration)
-            else:
-                # Baseline iteration: any alert is acceptable
-                if self.single_run:
-                    self._wait_for_alert_or_timeout(self.benchmark_duration,
-                                                    since=iteration_start)
-                else:
-                    self._wait_for_alert_or_timeout(self.init_duration,
-                                                    since=iteration_start)
-                    logger.info("Collecting data for %ds …", self.stabilise_duration)
-                    time.sleep(self.stabilise_duration)
-
-            # Use actual elapsed time for log collection window
-            elapsed_seconds = int((datetime.utcnow() - iteration_start).total_seconds())
-            log_window = elapsed_seconds if self.single_run else self.stabilise_duration
-
-            # Collect latency from metrics files + docker logs
-            # Use calendar.timegm to convert naive UTC datetime → epoch ms correctly.
-            # datetime.timestamp() treats naive datetimes as LOCAL time, giving a
-            # 5.5h offset on IST systems that makes since_ms far too early.
-            iter_start_ms = int(calendar.timegm(iteration_start.timetuple()) * 1000)
-            log_stats = _collect_poi_latency_from_docker_logs(
-                self.app_dir, log_window)
-            file_stats = _collect_poi_latency_from_metrics_files(
-                self.results_dir, stream_density=not self.single_run,
-                since_ms=iter_start_ms if not self.single_run else None)
-
-            # E2E latency: filtered to the new camera when in incremental mode,
-            # so pass/fail reflects the NEW camera's responsiveness — not the
-            # aggregate of all previously-running cameras.
-            e2e_stats = _collect_poi_e2e_latency_from_alerts(
-                since=iteration_start,
-                camera_filter=new_camera,   # None → all cameras (baseline)
-            )
-
-            # Save alert thumbnails to results directory
-            _save_alert_thumbnails(self.results_dir, iteration=iteration,
-                                   since=iteration_start)
-
-            # Merge all stats
-            stats: Dict[str, float] = {}
-            stats.update(log_stats)
-            stats.update(file_stats)
-            stats.update(e2e_stats)
-
-            latency = _extract_poi_latency(stats, self.latency_metric)
-
-            it_result = IterationResult(
-                num_scenes=num_scenes,
-                latency_ms=latency,
-                latency_details=stats,
-                memory_percent=psutil.virtual_memory().percent,
-                cpu_percent=psutil.cpu_percent(interval=1),
-                timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                actual_detections=int(stats.get("poi_detections", 0)),
-                alerts_generated=int(stats.get("poi_alerts", 0)),
-                detections_per_scene=(
-                    int(stats.get("poi_detections", 0)) / num_scenes
-                    if num_scenes > 0 else 0
-                ),
-                new_camera=new_camera or "",
-            )
-
-            self._print_iteration(it_result)
-
-            # ── Pass / fail decision ────────────────────────────────────────
-            # When a new_camera is defined the benchmark REQUIRES an alert from
-            # that specific camera.  A missing alert means the system cannot
-            # keep up with the additional pipeline load — treat as FAIL.
-            has_camera_alert = bool(e2e_stats)          # camera-filtered → non-empty means alert received
-            has_detections   = it_result.actual_detections > 0
-            has_matches      = int(stats.get("poi_matches", 0)) > 0
-
-            if new_camera:
-                # ── Incremental camera mode ──────────────────────────────────
-                if not has_camera_alert:
-                    it_result.passed = False
-                    print(f"  ✗ NO ALERT from new camera={new_camera} — "
-                          "system cannot process additional pipeline load")
-                    result.iterations.append(it_result)
-                    break
-                elif latency <= 0:
-                    # Alert received but latency sub-measurable (< 1 s)
-                    it_result.passed = True
-                    best = it_result
-                    print(f"  ✓ PASSED  (camera={new_camera} alert received, latency < 1s)")
-                elif latency <= self.target_latency_ms:
-                    it_result.passed = True
-                    best = it_result
-                    print(f"  ✓ PASSED  (camera={new_camera} "
-                          f"latency={latency:.0f}ms ≤ {self.target_latency_ms:.0f}ms)")
-                else:
-                    it_result.passed = False
-                    print(f"  ✗ FAILED  (camera={new_camera} "
-                          f"latency={latency:.0f}ms > {self.target_latency_ms:.0f}ms)")
-                    result.iterations.append(it_result)
-                    break
-            else:
-                # ── Baseline iteration (num_scenes=1) — any alert ────────────
-                latency_ok = latency > 0 and latency <= self.target_latency_ms
-                if latency == 0 and has_matches:
-                    it_result.passed = True
-                    best = it_result
-                    print("  ✓ PASSED  (baseline: matches found, latency < 1s)")
-                elif latency == 0 and has_detections and not has_matches:
-                    it_result.passed = True
-                    best = it_result
-                    print(f"  ✓ PASSED  (baseline: {it_result.actual_detections} detections, "
-                          "no matches — target not in frame during window)")
-                elif latency == 0:
-                    print("  ⚠ NO DATA – no detections collected")
-                    if iteration > 1:
-                        break
-                elif latency_ok:
-                    it_result.passed = True
-                    best = it_result
-                    print(f"  ✓ PASSED  (baseline: latency={latency:.0f}ms ≤ {self.target_latency_ms:.0f}ms)")
-                else:
-                    it_result.passed = False
-                    print(f"  ✗ FAILED  (baseline: latency {latency:.0f}ms > {self.target_latency_ms:.0f}ms)")
-                    result.iterations.append(it_result)
-                    break
-
-            result.iterations.append(it_result)
-            num_scenes += self.scene_increment
-
-        result.best_iteration = best
-        result.max_scenes = best.num_scenes if best else 0
-        result.met_target = best is not None
-
-        self._export(result)
-        self._print_summary(result)
-        return result
-
-    def _memory_safe(self) -> bool:
-        mem = psutil.virtual_memory()
-        if mem.percent > self.MEMORY_SAFETY_PERCENT:
-            logger.warning("Memory at %.1f%% (threshold %d%%)",
-                           mem.percent, self.MEMORY_SAFETY_PERCENT)
-            return False
-        return True
-
-    def _print_header(self) -> None:
-        print("=" * 70)
-        print("POI Stream Density – Detection-to-Alert Latency Scaling")
-        print("=" * 70)
-        print(f"  Target Latency:    {self.target_latency_ms:.0f}ms")
-        print(f"  Latency Metric:    {self.latency_metric}")
-        print(f"  Scene Increment:   +{self.scene_increment}")
-        print(f"  Init Duration:     {self.init_duration}s")
-        if self.single_run:
-            print(f"  Benchmark Duration:{self.benchmark_duration}s (exits early on alert)")
-        else:
-            print(f"  Stabilise:         {self.stabilise_duration}s")
-        print(f"  Results Dir:       {self.results_dir}")
-        print(f"  Single-run Mode:   {self.single_run}")
-        print("=" * 70)
-
-    def _print_iteration(self, it: IterationResult) -> None:
-        print(f"\n  Scenes:      {it.num_scenes}")
-        if it.new_camera:
-            print(f"  New Camera:  {it.new_camera}")
-        print(f"  Latency:     {it.latency_ms:.0f}ms")
-        print(f"  Detections:  {it.actual_detections} "
-              f"({it.detections_per_scene:.1f}/scene)")
-        print(f"  Alerts:      {it.alerts_generated}")
-        print(f"  Memory:      {it.memory_percent:.1f}%")
-        print(f"  CPU:         {it.cpu_percent:.1f}%")
-        if it.latency_details:
-            for k, v in it.latency_details.items():
-                if isinstance(v, float):
-                    print(f"    {k}: {v:.2f}")
-                else:
-                    print(f"    {k}: {v}")
-
-    def _print_summary(self, result: StreamDensityResult) -> None:
-        print("\n" + "=" * 70)
-        print("POI STREAM DENSITY RESULTS")
-        print("=" * 70)
-        print(f"  Target Latency:  {result.target_latency_ms:.0f}ms")
-        print(f"  Max Scenes:      {result.max_scenes}")
-        print(f"  Met Target:      {'Yes' if result.met_target else 'No'}")
-        if result.best_iteration:
-            print(f"  Best Latency:    {result.best_iteration.latency_ms:.0f}ms "
-                  f"@ {result.best_iteration.num_scenes} scene(s)"
-                  + (f"  [{result.best_iteration.new_camera}]"
-                     if result.best_iteration.new_camera else ""))
-        print()
-        print(f"{'Scenes':<10}{'Camera':<16}{'Latency':<12}{'Detections':<14}"
-              f"{'Alerts':<10}{'Mem %':<10}{'CPU %':<10}{'Status':<10}")
-        print("-" * 92)
-        for it in result.iterations:
-            status = "✓ PASS" if it.passed else "✗ FAIL"
-            cam = it.new_camera or "baseline"
-            print(f"{it.num_scenes:<10}{cam:<16}{it.latency_ms:<12.0f}"
-                  f"{it.actual_detections:<14}{it.alerts_generated:<10}"
-                  f"{it.memory_percent:<10.1f}{it.cpu_percent:<10.1f}{status}")
-        print("=" * 92)
-
-    def _export(self, result: StreamDensityResult) -> None:
-        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
-
-        # JSON
-        json_path = os.path.join(self.results_dir, f"poi_stream_density_{ts}.json")
-        data = {
-            "target_latency_ms": result.target_latency_ms,
-            "max_scenes": result.max_scenes,
-            "met_target": result.met_target,
-            "iterations": [
-                {
-                    "num_scenes": it.num_scenes,
-                    "new_camera": it.new_camera,
-                    "latency_ms": round(it.latency_ms, 2),
-                    "passed": it.passed,
-                    "memory_percent": round(it.memory_percent, 1),
-                    "cpu_percent": round(it.cpu_percent, 1),
-                    "timestamp": it.timestamp,
-                    "actual_detections": it.actual_detections,
-                    "alerts_generated": it.alerts_generated,
-                    "detections_per_scene": round(it.detections_per_scene, 1),
-                    "latency_details": {
-                        k: round(v, 2) if isinstance(v, float) else v
-                        for k, v in it.latency_details.items()
-                    },
-                }
-                for it in result.iterations
-            ],
-        }
-        if result.best_iteration:
-            data["best_iteration"] = {
-                "num_scenes": result.best_iteration.num_scenes,
-                "latency_ms": round(result.best_iteration.latency_ms, 2),
-            }
-        with open(json_path, "w") as f:
-            json.dump(data, f, indent=2)
-        print(f"\nJSON results: {json_path}")
-
-        # CSV
-        csv_path = os.path.join(self.results_dir, f"poi_stream_density_{ts}.csv")
-        with open(csv_path, "w", newline="") as f:
-            w = csv.writer(f)
-            w.writerow(["scenes", "new_camera", "latency_ms", "detections", "alerts",
-                         "detections_per_scene", "passed", "memory_pct", "cpu_pct"])
-            for it in result.iterations:
-                w.writerow([it.num_scenes, it.new_camera or "baseline",
-                            f"{it.latency_ms:.0f}",
-                            it.actual_detections, it.alerts_generated,
-                            f"{it.detections_per_scene:.1f}",
-                            it.passed, f"{it.memory_percent:.1f}",
-                            f"{it.cpu_percent:.1f}"])
-        print(f"CSV results:  {csv_path}")
+def _load_poi_scaling(app_dir: str):
+    """Import poi_scaling from person-of-interest/benchmark/."""
+    import importlib.util
+    scaling_path = os.path.abspath(
+        os.path.join(app_dir, "benchmark", "poi_scaling.py")
+    )
+    if not os.path.exists(scaling_path):
+        raise FileNotFoundError(f"poi_scaling.py not found at {scaling_path}")
+    spec = importlib.util.spec_from_file_location("poi_scaling", scaling_path)
+    if spec is None or spec.loader is None:
+        raise RuntimeError(f"Cannot load poi_scaling from {scaling_path}")
+    mod = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(mod)
+    return mod
+
+
+def main():
+    if len(sys.argv) > 1 and sys.argv[1] in {"generate", "clean", "down"}:
+        _run_subcommand()
+        return
 
+    parser = argparse.ArgumentParser(description="Stream Density Benchmark")
+    parser.add_argument("--poi_scripts_dir", required=True)
+    parser.add_argument("--app_dir", required=True)
+    parser.add_argument("--target_latency_ms", type=float, default=2000)
+    parser.add_argument("--latency_metric", choices=["avg", "max"], default="avg")
+    parser.add_argument("--scene_increment", type=int, default=1)
+    parser.add_argument("--init_duration", type=int, default=45)
+    parser.add_argument("--stabilise_duration", type=int, default=30)
+    parser.add_argument("--max_iterations", type=int, default=50)
+    parser.add_argument("--max_alert_wait", type=int, default=180)
+    parser.add_argument("--benchmark_duration", type=int, default=120)
+    parser.add_argument("--single_run", action="store_true")
+    parser.add_argument("--scenes", type=int, default=1)
+    parser.add_argument("--results_dir", default="./results")
+    parser.add_argument("--resource_config", default="")
 
-# ---------------------------------------------------------------------------
-# CLI
-# ---------------------------------------------------------------------------
+    args = parser.parse_args()
 
-def cmd_run(args) -> None:
-    tester = POIStreamDensity(
-        app_dir=args.app_dir,
+    config = BenchmarkConfig(
         target_latency_ms=args.target_latency_ms,
         latency_metric=args.latency_metric,
         scene_increment=args.scene_increment,
         init_duration=args.init_duration,
         stabilise_duration=args.stabilise_duration,
-        results_dir=args.results_dir,
         max_iterations=args.max_iterations,
+        max_alert_wait=args.max_alert_wait,
+        benchmark_duration=args.benchmark_duration,
         single_run=args.single_run,
         single_run_scenes=args.scenes,
-        benchmark_duration=args.benchmark_duration,
-        resource_config=args.resource_config,
-        max_alert_wait=args.max_alert_wait,
+        results_dir=args.results_dir,
     )
-    result = tester.run()
-    sys.exit(0 if result.met_target else 1)
-
-
-def cmd_generate(args) -> None:
-    num = args.scenes
-    _set_stream_density(args.app_dir, num)
-    _generate_dlstreamer_config(args.app_dir, num)
-    _generate_cameras_override(args.app_dir, num)
-    _reinit_env(args.app_dir, resource_config=args.resource_config)
-    print(f"Generated overrides for {num} scene(s).  Run 'make demo' to start.")
-
-
-def cmd_clean(args) -> None:
-    app_dir = args.app_dir
-    bak = _zone_config_path(app_dir).with_suffix(".json.bak")
-    if bak.exists():
-        shutil.copy2(bak, _zone_config_path(app_dir))
-        bak.unlink()
-        logger.info("Restored zone_config.json from backup")
-    else:
-        _set_stream_density(app_dir, 1)
-    _generate_dlstreamer_config(app_dir, 1)
-    _clean_cameras_override(app_dir)
-    _reinit_env(app_dir, resource_config=getattr(args, "resource_config", ""))
-    print("Cleaned up – stream_density reset to 1.")
-
-
-def cmd_down(args) -> None:
-    _docker_compose(args.app_dir, "down -t 30 --volumes --remove-orphans")
-    cmd_clean(args)
 
+    benchmark = StreamDensityBenchmark(
+        config=config,
+        poi_scripts_dir=args.poi_scripts_dir,
+        app_dir=args.app_dir,
+        resource_config=args.resource_config,
+    )
 
-def main() -> None:
-    target_latency = _env_float("TARGET_LATENCY_MS", 2000)
-    latency_metric = _env_str("LATENCY_METRIC", "avg")
-    scene_increment = _env_int("SCENE_INCREMENT", 1)
-    init_duration = _env_int("INIT_DURATION", 45)
-    stabilise_duration = _env_int("STABILISE_DURATION", 30)
-    benchmark_duration = _env_int("BENCHMARK_DURATION", 120)
-    results_dir = _env_str("RESULTS_DIR", "./results")
-    max_iterations = _env_int("MAX_ITERATIONS", 50)
-    # Default max_alert_wait covers ~3 full video cycles (video ≈ 55s) plus
-    # inference warmup, giving enough time for CPU-mode pipelines to generate
-    # a face match on the new camera regardless of video phase offset.
-    max_alert_wait = _env_int("MAX_ALERT_WAIT", 180)
+    results = benchmark.run()
+    sys.exit(0 if results["met_target"] else 1)
 
-    parser = argparse.ArgumentParser(
-        description="POI Stream Density Benchmark",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-    )
-    sub = parser.add_subparsers(dest="command", required=True)
 
-    # --- run ---
-    p_run = sub.add_parser("run", help="Run POI stream-density loop")
-    p_run.add_argument("app_dir", help="Path to person-of-interest/")
-    p_run.add_argument("--target_latency_ms", type=float, default=target_latency)
-    p_run.add_argument("--latency_metric", choices=["avg", "max"], default=latency_metric)
-    p_run.add_argument("--scene_increment", type=int, default=scene_increment)
-    p_run.add_argument("--init_duration", type=int, default=init_duration)
-    p_run.add_argument("--stabilise_duration", type=int, default=stabilise_duration)
-    p_run.add_argument("--results_dir", default=results_dir)
-    p_run.add_argument("--max_iterations", type=int, default=max_iterations)
-    p_run.add_argument("--single_run", action="store_true",
-                       help="Run once with --scenes scenes (benchmark mode)")
-    p_run.add_argument("--benchmark_duration", type=int, default=benchmark_duration,
-                       help="Max duration in seconds for single benchmark (default: 120). "
-                            "Exits early when an alert is received.")
-    p_run.add_argument("--scenes", type=int, default=1,
-                       help="Number of scenes for single-run mode")
-    p_run.add_argument("--resource_config", default="",
-                       help="Absolute path to device resource config "
-                            "(e.g. /path/to/configs/res/all-gpu.env). "
-                            "Passed as RESOURCE_CONFIG to init.sh on every "
-                            "re-init so device and precision are preserved "
-                            "across stream-density iterations.")
-    p_run.add_argument("--max_alert_wait", type=int, default=max_alert_wait,
-                       help="Max total seconds to wait for an alert from the "
-                            "new camera per iteration (default: 180). "
-                            "The wait retries in init_duration windows until "
-                            "an alert is received or this timeout expires. "
-                            "Increase for CPU mode where inference is slower "
-                            "and a full video cycle (~55s) must complete.")
-    p_run.set_defaults(func=cmd_run)
+def _run_subcommand() -> None:
+    """Handle generate / clean / down subcommands via poi_scaling.py."""
+    import shutil
+    sub = sys.argv[1]
+    parser = argparse.ArgumentParser(description=f"{sub} helper")
+    parser.add_argument("command")
+    parser.add_argument("app_dir")
+    parser.add_argument("--scenes", type=int, default=1)
+    parser.add_argument("--resource_config", default="")
+    args = parser.parse_args()
 
-    # --- generate ---
-    p_gen = sub.add_parser("generate", help="Generate overrides for N scenes")
-    p_gen.add_argument("app_dir")
-    p_gen.add_argument("--scenes", type=int, default=1)
-    p_gen.add_argument("--resource_config", default="",
-                       help="Absolute path to device resource config file.")
-    p_gen.set_defaults(func=cmd_generate)
+    ps = _load_poi_scaling(args.app_dir)
 
-    # --- clean ---
-    p_clean = sub.add_parser("clean", help="Revert to single scene")
-    p_clean.add_argument("app_dir")
-    p_clean.add_argument("--resource_config", default="",
-                         help="Absolute path to device resource config file.")
-    p_clean.set_defaults(func=cmd_clean)
+    if sub == "generate":
+        ps.set_stream_density(args.app_dir, args.scenes)
+        ps.generate_dlstreamer_config(args.app_dir, args.scenes)
+        ps.generate_cameras_override(args.app_dir, args.scenes)
+        ps.reinit_env(args.app_dir, resource_config=args.resource_config)
+        print(f"Generated overrides for {args.scenes} scene(s).")
 
-    # --- down ---
-    p_down = sub.add_parser("down", help="Stop all services and clean up")
-    p_down.add_argument("app_dir")
-    p_down.set_defaults(func=cmd_down)
+    elif sub == "clean":
+        bak = ps.zone_config_path(args.app_dir).with_suffix(".json.bak")
+        if bak.exists():
+            shutil.copy2(bak, ps.zone_config_path(args.app_dir))
+            bak.unlink()
+        else:
+            ps.set_stream_density(args.app_dir, 1)
+        ps.generate_dlstreamer_config(args.app_dir, 1)
+        ps.clean_cameras_override(args.app_dir)
+        ps.reinit_env(args.app_dir, resource_config=args.resource_config)
+        print("Cleaned stream-density overrides.")
 
-    args = parser.parse_args()
-    args.func(args)
+    elif sub == "down":
+        ps.docker_compose(args.app_dir, "down -t 30 --volumes --remove-orphans")
+        ps.clean_cameras_override(args.app_dir)
+        print("Brought down stream-density services.")
 
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file