From f7967075451cf7e2aab3fbfcc1b4675ccbaffb9f Mon Sep 17 00:00:00 2001
From: gauss-math-inc <gauss-math-inc@users.noreply.github.com>
Date: Sun, 29 Mar 2026 23:59:17 +0000
Subject: [PATCH] test(installer): add tiny managed prove staging smoke

---
 tests/installer/README.md                     |  12 +
 .../fixtures/lean_hello_sorry/HelloSorry.lean |   1 +
 .../lean_hello_sorry/HelloSorry/Basic.lean    |   6 +
 .../fixtures/lean_hello_sorry/lakefile.toml   |   6 +
 .../fixtures/lean_hello_sorry/lean-toolchain  |   1 +
 .../Dockerfile                                |   5 +
 .../README.md                                 |  33 ++
 .../managed_prove_smoke.py                    | 346 ++++++++++++++++++
 .../run-in-container.sh                       |  72 ++++
 .../ubuntu_managed_prove_sorry_smoke/run.sh   |  34 ++
 10 files changed, 516 insertions(+)
 create mode 100644 tests/installer/fixtures/lean_hello_sorry/HelloSorry.lean
 create mode 100644 tests/installer/fixtures/lean_hello_sorry/HelloSorry/Basic.lean
 create mode 100644 tests/installer/fixtures/lean_hello_sorry/lakefile.toml
 create mode 100644 tests/installer/fixtures/lean_hello_sorry/lean-toolchain
 create mode 100644 tests/installer/ubuntu_managed_prove_sorry_smoke/Dockerfile
 create mode 100644 tests/installer/ubuntu_managed_prove_sorry_smoke/README.md
 create mode 100755 tests/installer/ubuntu_managed_prove_sorry_smoke/managed_prove_smoke.py
 create mode 100755 tests/installer/ubuntu_managed_prove_sorry_smoke/run-in-container.sh
 create mode 100755 tests/installer/ubuntu_managed_prove_sorry_smoke/run.sh

diff --git a/tests/installer/README.md b/tests/installer/README.md
index f7fd92a..7020eee 100644
--- a/tests/installer/README.md
+++ b/tests/installer/README.md
@@ -20,3 +20,15 @@ Current scenarios:
   - verifies the installer bootstraps its own Debian/Ubuntu prerequisites
   - stages a dummy `OPENAI_API_KEY` to test non-interactive provider setup
   - verifies the workflow-derived workspace, config, guide, and helper scripts
+
+- `ubuntu_managed_prove_sorry_smoke`
+  Verifies that a stock `ubuntu:24.04` container can install from the current
+  checkout and stage a managed `/prove` run against a tiny Lean project
+  containing one `sorry`. This scenario:
+  - installs via `./scripts/install-internal.sh` from a mounted git checkout
+  - accepts either `ANTHROPIC_API_KEY` (Claude backend) or `OPENAI_API_KEY`
+    (Codex backend)
+  - initializes a local `.gauss` manifest in the fixture project
+  - verifies the staged startup context, managed skill, and backend instructions
+  - verifies the managed MCP config points `LEAN_PROJECT_PATH` at the fixture
+  - leaves an opt-in `LIVE_MANAGED_PROVE_SMOKE=1` path for manual backend runs
diff --git a/tests/installer/fixtures/lean_hello_sorry/HelloSorry.lean b/tests/installer/fixtures/lean_hello_sorry/HelloSorry.lean
new file mode 100644
index 0000000..05d6944
--- /dev/null
+++ b/tests/installer/fixtures/lean_hello_sorry/HelloSorry.lean
@@ -0,0 +1 @@
+import HelloSorry.Basic
diff --git a/tests/installer/fixtures/lean_hello_sorry/HelloSorry/Basic.lean b/tests/installer/fixtures/lean_hello_sorry/HelloSorry/Basic.lean
new file mode 100644
index 0000000..155c996
--- /dev/null
+++ b/tests/installer/fixtures/lean_hello_sorry/HelloSorry/Basic.lean
@@ -0,0 +1,6 @@
+namespace HelloSorry
+
+theorem hello_world : True := by
+  sorry
+
+end HelloSorry
diff --git a/tests/installer/fixtures/lean_hello_sorry/lakefile.toml b/tests/installer/fixtures/lean_hello_sorry/lakefile.toml
new file mode 100644
index 0000000..bc5d2ba
--- /dev/null
+++ b/tests/installer/fixtures/lean_hello_sorry/lakefile.toml
@@ -0,0 +1,6 @@
+name = "HelloSorry"
+version = "0.1.0"
+defaultTargets = ["HelloSorry"]
+
+[[lean_lib]]
+name = "HelloSorry"
diff --git a/tests/installer/fixtures/lean_hello_sorry/lean-toolchain b/tests/installer/fixtures/lean_hello_sorry/lean-toolchain
new file mode 100644
index 0000000..4c685fa
--- /dev/null
+++ b/tests/installer/fixtures/lean_hello_sorry/lean-toolchain
@@ -0,0 +1 @@
+leanprover/lean4:v4.28.0
diff --git a/tests/installer/ubuntu_managed_prove_sorry_smoke/Dockerfile b/tests/installer/ubuntu_managed_prove_sorry_smoke/Dockerfile
new file mode 100644
index 0000000..5bbb5df
--- /dev/null
+++ b/tests/installer/ubuntu_managed_prove_sorry_smoke/Dockerfile
@@ -0,0 +1,5 @@
+FROM ubuntu:24.04
+
+SHELL ["/bin/bash", "-lc"]
+
+CMD ["/bin/bash"]
diff --git a/tests/installer/ubuntu_managed_prove_sorry_smoke/README.md b/tests/installer/ubuntu_managed_prove_sorry_smoke/README.md
new file mode 100644
index 0000000..aebb420
--- /dev/null
+++ b/tests/installer/ubuntu_managed_prove_sorry_smoke/README.md
@@ -0,0 +1,33 @@
+# Ubuntu Managed `/prove` Staging Smoke
+
+This scenario verifies that a clean `ubuntu:24.04` container can:
+
+1. install Open Gauss from the current checkout with `./scripts/install-internal.sh`
+2. copy a tiny Lean project that contains one `sorry`
+3. initialize a local `.gauss` manifest in that project
+4. stage a managed `/prove HelloSorry/Basic.lean` launch against that project
+5. confirm the staged skill, startup context, backend instructions, and MCP
+   config all point at the tiny Lean project correctly
+
+Provider requirements:
+
+- set `ANTHROPIC_API_KEY` to exercise the Claude-managed backend, or
+- set `OPENAI_API_KEY` to exercise the Codex-managed backend
+
+Usage:
+
+```bash
+tests/installer/ubuntu_managed_prove_sorry_smoke/run.sh
+```
+
+Notes:
+
+- The scenario mounts the current checkout at `/src`, so it always exercises
+  the branch you are testing without needing GitHub access inside the container.
+- The default path is deterministic: it validates managed staging only, not a
+  full model-driven proof attempt.
+- Set `LIVE_MANAGED_PROVE_SMOKE=1` to turn on the best-effort live backend
+  execution path for manual debugging. That path is intentionally not the
+  default smoke because backend/model timing can make it flaky.
+- The fixture intentionally stays tiny so the smoke focuses on managed proving,
+  not on a large project setup.
diff --git a/tests/installer/ubuntu_managed_prove_sorry_smoke/managed_prove_smoke.py b/tests/installer/ubuntu_managed_prove_sorry_smoke/managed_prove_smoke.py
new file mode 100755
index 0000000..041834b
--- /dev/null
+++ b/tests/installer/ubuntu_managed_prove_sorry_smoke/managed_prove_smoke.py
@@ -0,0 +1,346 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import os
+import shlex
+import subprocess
+import sys
+import time
+import tomllib
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parents[3]
+if str(REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(REPO_ROOT))
+
+from gauss_cli.autoformalize import resolve_autoformalize_request
+from gauss_cli.config import load_config
+from gauss_cli.project import initialize_gauss_project
+from swarm_manager import SwarmManager, SwarmTask
+
+
+def _resolve_backend_and_auth(explicit_backend: str | None) -> tuple[str, str]:
+    if explicit_backend:
+        backend = explicit_backend.strip().lower()
+        if backend not in {"claude-code", "codex"}:
+            raise SystemExit(f"unsupported backend override: {explicit_backend}")
+        return backend, "api-key"
+
+    if os.environ.get("ANTHROPIC_API_KEY", "").strip():
+        return "claude-code", "api-key"
+    if os.environ.get("OPENAI_API_KEY", "").strip():
+        return "codex", "api-key"
+    raise SystemExit("set ANTHROPIC_API_KEY or OPENAI_API_KEY before running this smoke")
+
+
+def _tail_output(task: SwarmTask, limit: int = 80) -> list[str]:
+    lines = task._output_lines or []
+    return lines[-limit:]
+
+
+def _print_task_diagnostics(task: SwarmTask) -> None:
+    print(f"task_id={task.task_id}")
+    print(f"status={task.status}")
+    print(f"progress={task.progress}")
+    if task.error:
+        print(f"error={task.error}")
+    if task.result:
+        print("result:")
+        print(task.result)
+    lines = _tail_output(task)
+    if lines:
+        print("output_tail:")
+        for line in lines:
+            print(line)
+
+
+def _wait_for_task(
+    manager: SwarmManager,
+    task: SwarmTask,
+    *,
+    project_dir: Path,
+    target_path: Path,
+    timeout_seconds: int,
+) -> bool:
+    """Wait for an interactive managed workflow to land a proof or terminate.
+
+    `/prove` sessions are interactive and can stay open after the proof lands, so
+    this smoke treats "target file no longer contains sorry and lake build passes"
+    as success and then terminates the managed session.
+    """
+    thread = task.thread
+    if thread is None:
+        raise RuntimeError("managed workflow did not create a task thread")
+
+    deadline = time.time() + timeout_seconds
+    last_report_at = 0.0
+    while True:
+        if target_path.is_file():
+            rendered = target_path.read_text(encoding="utf-8")
+            if "sorry" not in rendered:
+                _run_lake_build(project_dir)
+                if task.status not in {"complete", "failed", "cancelled"}:
+                    print("==> Proof landed; stopping interactive managed session")
+                    manager.cancel(task.task_id)
+                    thread.join(timeout=10)
+                return True
+
+        if task.status in {"complete", "failed", "cancelled"} and not thread.is_alive():
+            return False
+
+        remaining = deadline - time.time()
+        if remaining <= 0:
+            raise TimeoutError(
+                f"managed /prove workflow did not finish within {timeout_seconds}s"
+            )
+
+        now = time.time()
+        if now - last_report_at >= 10:
+            print(f"heartbeat: status={task.status} progress={task.progress}")
+            last_lines = _tail_output(task, limit=8)
+            if last_lines:
+                print("heartbeat_output_tail:")
+                for line in last_lines:
+                    print(line)
+            last_report_at = now
+
+        thread.join(timeout=min(1.0, remaining))
+
+
+def _run_lake_build(project_dir: Path) -> None:
+    subprocess.run(
+        ["lake", "build"],
+        cwd=project_dir,
+        env=dict(os.environ),
+        check=True,
+        text=True,
+    )
+
+
+def _run_codex_exec(plan, timeout_seconds: int) -> None:
+    argv = list(plan.handoff_request.argv)
+    if not argv or Path(argv[0]).name != "codex":
+        raise SystemExit(f"expected a codex launch argv, got: {argv!r}")
+
+    exec_argv = [argv[0], "exec", "--skip-git-repo-check", *argv[1:]]
+    print(f"==> Running Codex exec smoke: {shlex.join(exec_argv)}")
+
+    try:
+        completed = subprocess.run(
+            exec_argv,
+            cwd=plan.handoff_request.cwd,
+            env=plan.handoff_request.env,
+            stdin=subprocess.DEVNULL,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            timeout=timeout_seconds,
+            check=False,
+        )
+    except subprocess.TimeoutExpired as exc:
+        raise SystemExit(
+            f"codex exec did not finish within {timeout_seconds}s"
+        ) from exc
+
+    stdout = completed.stdout.strip()
+    stderr = completed.stderr.strip()
+    if stdout:
+        print("codex_stdout_tail:")
+        for line in stdout.splitlines()[-80:]:
+            print(line)
+    if stderr:
+        print("codex_stderr_tail:")
+        for line in stderr.splitlines()[-80:]:
+            print(line)
+
+    if completed.returncode != 0:
+        raise SystemExit(f"codex exec failed with exit code {completed.returncode}")
+
+
+def _assert_staged_artifacts(plan, project_dir: Path, target_path: Path) -> None:
+    managed = plan.managed_context
+    staged = plan.staged_paths()
+
+    print("==> Verifying staged managed workflow artifacts")
+
+    if managed.project_root != project_dir:
+        raise SystemExit(
+            f"managed project root mismatch: expected {project_dir}, got {managed.project_root}"
+        )
+    if managed.lean_root != project_dir:
+        raise SystemExit(
+            f"managed lean root mismatch: expected {project_dir}, got {managed.lean_root}"
+        )
+    if managed.project_manifest_path is None or not managed.project_manifest_path.is_file():
+        raise SystemExit("managed project manifest was not staged")
+    if managed.startup_context_path is None or not managed.startup_context_path.is_file():
+        raise SystemExit("managed startup context was not staged")
+    if managed.instructions_path is None or not managed.instructions_path.is_file():
+        raise SystemExit("managed backend instructions were not staged")
+    if managed.skills_root is None or not managed.skills_root.joinpath("SKILL.md").is_file():
+        raise SystemExit("managed Lean skill was not staged")
+    if not managed.plugin_root.exists():
+        raise SystemExit("managed Lean plugin root was not staged")
+    if not managed.mcp_config_path.is_file():
+        raise SystemExit("managed MCP config was not staged")
+    if managed.backend_config_path is not None and not managed.backend_config_path.is_file():
+        raise SystemExit("managed backend config path was recorded but missing on disk")
+
+    startup_text = managed.startup_context_path.read_text(encoding="utf-8")
+    instructions_text = managed.instructions_path.read_text(encoding="utf-8")
+    mcp_text = managed.mcp_config_path.read_text(encoding="utf-8")
+    mcp_config = tomllib.loads(mcp_text)
+
+    expected_backend_line = f"- Backend command: `{plan.backend_command}`"
+    expected_project_line = f"- Project root: `{project_dir}`"
+    expected_target = str(target_path.relative_to(project_dir))
+
+    if expected_backend_line not in startup_text:
+        raise SystemExit("startup context is missing the expected backend command")
+    if expected_project_line not in startup_text:
+        raise SystemExit("startup context is missing the expected project root")
+    if expected_target not in startup_text:
+        raise SystemExit("startup context is missing the target file path")
+    if str(project_dir) not in instructions_text:
+        raise SystemExit("managed backend instructions are missing the project root")
+    if str(managed.skills_root) not in instructions_text:
+        raise SystemExit("managed backend instructions are missing the staged skill path")
+
+    lean_lsp = mcp_config.get("mcp_servers", {}).get("lean-lsp")
+    if not isinstance(lean_lsp, dict):
+        raise SystemExit("managed MCP config is missing the lean-lsp server entry")
+    env = lean_lsp.get("env", {})
+    if env.get("LEAN_PROJECT_PATH") != str(project_dir):
+        raise SystemExit(
+            "managed MCP config points LEAN_PROJECT_PATH at the wrong project root"
+        )
+
+    print(f"staged_backend={managed.backend_name}")
+    print(f"staged_project_root={managed.project_root}")
+    print(f"staged_skill_root={managed.skills_root}")
+    print(f"staged_plugin_root={managed.plugin_root}")
+    print(f"staged_mcp_config={managed.mcp_config_path}")
+    print(f"staged_startup_context={managed.startup_context_path}")
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="Run a managed /prove smoke against a tiny Lean project."
+    )
+    parser.add_argument("--project-dir", required=True, help="Path to the Lean project copy")
+    parser.add_argument(
+        "--target",
+        default="HelloSorry/Basic.lean",
+        help="Relative path forwarded to /prove",
+    )
+    parser.add_argument(
+        "--backend",
+        default="",
+        help="Optional backend override: claude-code or codex",
+    )
+    parser.add_argument(
+        "--timeout-seconds",
+        type=int,
+        default=900,
+        help="How long to wait for the managed workflow to finish",
+    )
+    parser.add_argument(
+        "--live-run",
+        action="store_true",
+        help="Actually run the managed backend after staging (best-effort debug path).",
+    )
+    args = parser.parse_args()
+
+    backend_name, auth_mode = _resolve_backend_and_auth(args.backend or None)
+
+    project_dir = Path(args.project_dir).expanduser().resolve()
+    target_path = (project_dir / args.target).resolve()
+
+    if not project_dir.is_dir():
+        raise SystemExit(f"project directory not found: {project_dir}")
+    if not target_path.is_file():
+        raise SystemExit(f"target file not found: {target_path}")
+
+    print(f"==> Initializing Gauss project in {project_dir}")
+    project = initialize_gauss_project(project_dir, name="Installer Hello Sorry")
+    print(f"==> Project manifest: {project.manifest_path}")
+
+    config = load_config()
+    base_env = dict(os.environ)
+    base_env["GAUSS_AUTOFORMALIZE_BACKEND"] = backend_name
+    base_env["GAUSS_AUTOFORMALIZE_AUTH_MODE"] = auth_mode
+
+    command = f"/prove {args.target}"
+    print(f"==> Resolving managed workflow: {command} ({backend_name}, {auth_mode})")
+    plan = resolve_autoformalize_request(
+        command,
+        config,
+        active_cwd=str(project_dir),
+        base_env=base_env,
+    )
+
+    _assert_staged_artifacts(plan, project_dir, target_path)
+
+    if not args.live_run:
+        print("==> Staging smoke complete")
+        return 0
+
+    if plan.managed_context.backend_name == "codex":
+        _run_codex_exec(plan, args.timeout_seconds)
+    else:
+        SwarmManager.reset()
+        manager = SwarmManager()
+        description = plan.user_instruction or args.target
+        print("==> Spawning managed workflow task")
+        task = manager.spawn_interactive(
+            theorem=plan.user_instruction or plan.backend_command,
+            description=description,
+            argv=plan.handoff_request.argv,
+            cwd=plan.handoff_request.cwd,
+            env=plan.handoff_request.env,
+            workflow_kind=plan.workflow_kind,
+            workflow_command=plan.backend_command,
+            project_name=plan.project.label,
+            project_root=str(plan.project.root),
+            backend_name=plan.managed_context.backend_name,
+        )
+
+        solved = False
+        try:
+            solved = _wait_for_task(
+                manager,
+                task,
+                project_dir=project_dir,
+                target_path=target_path,
+                timeout_seconds=args.timeout_seconds,
+            )
+        except TimeoutError as exc:
+            manager.cancel(task.task_id)
+            _print_task_diagnostics(task)
+            raise SystemExit(str(exc)) from exc
+
+        print("==> Managed workflow finished")
+        _print_task_diagnostics(task)
+
+        if not solved and task.status != "complete":
+            raise SystemExit(f"managed workflow failed with status={task.status}")
+
+    rendered = target_path.read_text(encoding="utf-8")
+    if "sorry" in rendered:
+        print("final_target_contents:")
+        print(rendered)
+        raise SystemExit(f"{target_path} still contains `sorry`")
+
+    print("==> Verifying final Lean build")
+    try:
+        _run_lake_build(project_dir)
+    except subprocess.CalledProcessError as exc:
+        raise SystemExit(f"final lake build failed with exit code {exc.returncode}") from exc
+
+    print(f"==> Success: managed /prove removed sorry from {target_path}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/installer/ubuntu_managed_prove_sorry_smoke/run-in-container.sh b/tests/installer/ubuntu_managed_prove_sorry_smoke/run-in-container.sh
new file mode 100755
index 0000000..111bf63
--- /dev/null
+++ b/tests/installer/ubuntu_managed_prove_sorry_smoke/run-in-container.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+REPO_ROOT="${REPO_ROOT:-/src}"
+GAUSS_HOME="${GAUSS_HOME:-/root/.gauss}"
+FIXTURE_DIR="$REPO_ROOT/tests/installer/fixtures/lean_hello_sorry"
+SCENARIO_DIR="$REPO_ROOT/tests/installer/ubuntu_managed_prove_sorry_smoke"
+PROJECT_TARGET="HelloSorry/Basic.lean"
+
+log() {
+    printf '==> %s\n' "$1"
+}
+
+die() {
+    printf 'ERROR: %s\n' "$1" >&2
+    exit 1
+}
+
+if [[ ! -f "$REPO_ROOT/scripts/install-internal.sh" ]]; then
+    die "installer script not found under $REPO_ROOT"
+fi
+
+if [[ ! -d "$FIXTURE_DIR" ]]; then
+    die "fixture directory not found: $FIXTURE_DIR"
+fi
+
+if [[ -z "${ANTHROPIC_API_KEY:-}" && -z "${OPENAI_API_KEY:-}" ]]; then
+    die "set ANTHROPIC_API_KEY or OPENAI_API_KEY inside the container"
+fi
+
+if [[ -n "${ANTHROPIC_API_KEY:-}" ]]; then
+    BACKEND="claude-code"
+    AUTH_KEY_LABEL="ANTHROPIC_API_KEY"
+elif [[ -n "${OPENAI_API_KEY:-}" ]]; then
+    BACKEND="codex"
+    AUTH_KEY_LABEL="OPENAI_API_KEY"
+fi
+
+log "Installer scenario: ubuntu_managed_prove_sorry_smoke"
+log "Using repository checkout: $REPO_ROOT"
+log "Using backend: $BACKEND ($AUTH_KEY_LABEL)"
+
+cd "$REPO_ROOT"
+./scripts/install-internal.sh \
+    --gauss-home "$GAUSS_HOME" \
+    --skip-setup
+
+export GAUSS_HOME
+export PATH="$HOME/.local/bin:$REPO_ROOT/venv/bin:$HOME/.elan/bin:$PATH"
+
+WORKDIR="$(mktemp -d)"
+PROJECT_DIR="$WORKDIR/HelloSorry"
+cp -R "$FIXTURE_DIR" "$PROJECT_DIR"
+
+log "Priming Lean project"
+( cd "$PROJECT_DIR" && lake build )
+
+log "Running managed /prove staging smoke"
+SMOKE_ARGS=(
+    --project-dir "$PROJECT_DIR"
+    --target "$PROJECT_TARGET"
+    --backend "$BACKEND"
+    --timeout-seconds "${PROVE_TIMEOUT_SECONDS:-900}"
+)
+if [[ "${LIVE_MANAGED_PROVE_SMOKE:-}" == "1" ]]; then
+    log "Enabling live managed workflow execution (best-effort debug path)"
+    SMOKE_ARGS+=(--live-run)
+fi
+"$REPO_ROOT/venv/bin/python" "$SCENARIO_DIR/managed_prove_smoke.py" "${SMOKE_ARGS[@]}"
+
+log "Current $PROJECT_TARGET"
+sed -n '1,160p' "$PROJECT_DIR/$PROJECT_TARGET"
diff --git a/tests/installer/ubuntu_managed_prove_sorry_smoke/run.sh b/tests/installer/ubuntu_managed_prove_sorry_smoke/run.sh
new file mode 100755
index 0000000..e7922b7
--- /dev/null
+++ b/tests/installer/ubuntu_managed_prove_sorry_smoke/run.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
+IMAGE_TAG="${IMAGE_TAG:-opengauss-installer-ubuntu-managed-prove-sorry-smoke}"
+
+if ! command -v docker >/dev/null 2>&1; then
+    echo "ERROR: docker is not installed or not on PATH" >&2
+    exit 1
+fi
+
+if ! docker info >/dev/null 2>&1; then
+    echo "ERROR: docker daemon is not running" >&2
+    exit 1
+fi
+
+if [[ -z "${ANTHROPIC_API_KEY:-}" && -z "${OPENAI_API_KEY:-}" ]]; then
+    echo "ERROR: set ANTHROPIC_API_KEY or OPENAI_API_KEY for the managed prove smoke" >&2
+    exit 1
+fi
+
+echo "==> Building $IMAGE_TAG"
+docker build -t "$IMAGE_TAG" -f "$SCRIPT_DIR/Dockerfile" "$SCRIPT_DIR"
+
+echo "==> Running ubuntu_managed_prove_sorry_smoke"
+docker run --rm \
+    -e ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-}" \
+    -e OPENAI_API_KEY="${OPENAI_API_KEY:-}" \
+    -e PROVE_TIMEOUT_SECONDS="${PROVE_TIMEOUT_SECONDS:-900}" \
+    -e REPO_ROOT=/src \
+    -v "$REPO_ROOT:/src" \
+    "$IMAGE_TAG" \
+    bash -lc "/src/tests/installer/ubuntu_managed_prove_sorry_smoke/run-in-container.sh"