From afe40cb4cb682e4e9886b52cff9bf439f622388e Mon Sep 17 00:00:00 2001 From: SyncTek <145518101+SyncTekLLC@users.noreply.github.com> Date: Fri, 22 May 2026 15:13:45 -0400 Subject: [PATCH 1/3] test(b5): recording format red tests [F#11 F#12 F#17] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit F#11: regression guards (tap_first persistence + replay) — fixed in #146 F#12: RED — _build_requires_block allows duplicate text_subset_required F#17: RED — migrate-recording CLI has no --all / --missing-contract flag Co-Authored-By: Claude Sonnet 4.6 --- .../test_b5_domain_b_recording_format.py | 688 ++++++++++++++++++ 1 file changed, 688 insertions(+) create mode 100644 simdrive/tests/test_b5_domain_b_recording_format.py diff --git a/simdrive/tests/test_b5_domain_b_recording_format.py b/simdrive/tests/test_b5_domain_b_recording_format.py new file mode 100644 index 0000000..809c608 --- /dev/null +++ b/simdrive/tests/test_b5_domain_b_recording_format.py @@ -0,0 +1,688 @@ +"""Recording format integrity tests for b5 dogfood findings (Domain B). + +F#11 Recording drops `tap_first` focus context from type_text [HIGH] + STATUS: Fixed in main (commit c746e61, PR #146). Tests here are + regression guards — they PASS and confirm the fix is in place. + +F#12 `text_subset_required` contains duplicates in state contract [LOW] + STATUS: NOT YET FIXED. Tests are RED and will fail until dedup is added + to _build_requires_block. + +F#17 `simdrive migrate-recording --all` / `--missing-contract` flag [LOW] + STATUS: NOT YET FIXED. Tests are RED — argparse rejects --all/--missing-contract + with exit code 2 (unrecognised argument) until the flags are implemented. + +Run with: pytest -m "not live" tests/test_b5_domain_b_recording_format.py -x --tb=short +""" +from __future__ import annotations + +import shutil +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest +import yaml + + +# --------------------------------------------------------------------------- +# Shared helpers / fake factory +# --------------------------------------------------------------------------- + +_FAKE_UDID = "FAKEU-DID0-B5DOM-AIN-B0000000000" + + +def _make_sim_session(tmp_path: Path, sid: str = "b5-domain-b"): + """Return a minimal simulator Session stored in _SESSIONS.""" + from simdrive import session as session_mod + from simdrive.sim import Device + + d = Device( + udid=_FAKE_UDID, + name="iPhone 17 Pro", + os_version="26.3", + state="active", + ) + workdir = tmp_path / "sessions" / sid + workdir.mkdir(parents=True, exist_ok=True) + s = session_mod.Session( + session_id=sid, + device=d, + workdir=workdir, + target="simulator", + last_screenshot_w=1206, + last_screenshot_h=2622, + ) + session_mod._SESSIONS[sid] = s + return s + + +def _attach_fake_recorder(s, tmp_path: Path): + """Attach a lightweight recorder stub that captures add_step calls.""" + from simdrive import recorder as recorder_mod + + root = tmp_path / "recordings" / s.session_id + root.mkdir(parents=True, exist_ok=True) + (root / "snapshots").mkdir(parents=True, exist_ok=True) + + rec = MagicMock(spec=recorder_mod.Recorder) + rec.steps = [] + rec.name = s.session_id + rec.session = s + rec.root = root + rec.yaml_path = root / "recording.yaml" + + def _add_step(action, args, pre_screenshot, post_screenshot=None, marks_count=None, **kw): + idx = len(rec.steps) + 1 + rec.steps.append({"id": idx, "action": action, "args": dict(args)}) + return idx + + rec.add_step.side_effect = _add_step + s.recorder = rec + return rec + + +def _fake_png(path: Path) -> Path: + """Write a tiny fake PNG so path.exists() and stat checks pass.""" + from PIL import Image + + path.parent.mkdir(parents=True, exist_ok=True) + Image.new("RGB", (1206, 2622), (200, 200, 200)).save(path) + return path + + +def _write_recording_yaml(dir_: Path, *, requires=None, steps=None, + bundle_id: str = "com.example.app", + device: str = "iPhone 17 Pro", + os_version: str = "26.3", + app_version: str = "1.0.0") -> Path: + """Write a minimal recording.yaml under dir_.""" + dir_.mkdir(parents=True, exist_ok=True) + snaps = dir_ / "snapshots" + snaps.mkdir(parents=True, exist_ok=True) + + default_steps: list = [] + if steps is None: + pre = snaps / "001_pre.png" + _fake_png(pre) + post = snaps / "001_post.png" + _fake_png(post) + default_steps = [{ + "id": 1, + "action": "tap", + "args": {"x": 100, "y": 200, "screenshot_w": 1206, "screenshot_h": 2622}, + "pre_screenshot": "snapshots/001_pre.png", + "post_screenshot": "snapshots/001_post.png", + "captured_at": 0.0, + }] + + payload: dict = { + "name": dir_.name, + "created_at": 0.0, + "device": device, + "os_version": os_version, + "app_bundle_id": bundle_id, + "app_version": app_version, + "steps": steps if steps is not None else default_steps, + } + if requires is not None: + payload["requires"] = requires + + yaml_path = dir_ / "recording.yaml" + yaml_path.write_text(yaml.safe_dump(payload, sort_keys=False)) + return yaml_path + + +# =========================================================================== +# F#11 — Recording drops `tap_first` focus context from type_text +# =========================================================================== + + +class TestF11TypeTextTapFirstPersisted: + """F#11: type_text with tap_first must serialize the full args dict.""" + + def test_record_act_step_includes_tap_first_in_args(self, tmp_path, monkeypatch): + """_record_act_step for type_text stores tap_first alongside text in step args. + + Current behaviour: only {"text": text} is recorded; tap_first is dropped. + Expected behaviour: {"text": text, "tap_first": } is persisted. + + RED: will fail because server._record_act_step passes {"text": text} only. + """ + from simdrive import session as session_mod, server + + s = _make_sim_session(tmp_path, "f11-record-act") + rec = _attach_fake_recorder(s, tmp_path) + + pre_png = tmp_path / "pre.png" + _fake_png(pre_png) + post_png = tmp_path / "post.png" + _fake_png(post_png) + + fake_obs = SimpleNamespace( + screenshot_path=post_png, + screenshot_w=1206, + screenshot_h=2622, + marks=[], + ) + monkeypatch.setattr("simdrive.observe.observe", lambda *a, **kw: fake_obs) + + # The tap_first target that SHOULD be recorded alongside the text. + tap_first_target = {"text": "you@example.com"} + + # Call _record_act_step directly with what tool_type_text currently passes + # (only {"text": text}, omitting tap_first — that is the bug F#11). + server._record_act_step(s, "type_text", {"text": "dogfood@synctek.io"}, pre_png) + + # Find the step that was recorded. + assert len(rec.steps) == 1, "Expected exactly one step recorded" + step = rec.steps[0] + assert step["action"] == "type_text" + # F#11: tap_first MUST be present in the recorded args. + # Currently fails because _record_act_step is called with {"text": text} only. + assert "tap_first" in step["args"], ( + "F#11: tap_first is missing from recorded step args — " + "recording.yaml will not be able to replay the focus context" + ) + + def test_tool_type_text_step_args_includes_tap_first(self, tmp_path, monkeypatch): + """tool_type_text must pass tap_first into the step args when recording. + + RED: will fail because line ~992 in server.py records {"text": text} without tap_first. + """ + from simdrive import server, session as session_mod, som + from simdrive.sim import Device + + sid = "f11-tool-type-text" + s = _make_sim_session(tmp_path, sid) + rec = _attach_fake_recorder(s, tmp_path) + + tap_first_target = {"text": "you@example.com"} + text_value = "dogfood@synctek.io" + + pre_png = tmp_path / "obs_pre.png" + _fake_png(pre_png) + post_png = tmp_path / "obs_post.png" + _fake_png(post_png) + + # Create a mark that matches the tap_first target so _resolve_target_xy succeeds. + matching_mark = som.Mark(id=1, x=100, y=200, w=300, h=40, + text="you@example.com", confidence=0.95) + s.last_marks = [matching_mark.to_dict()] + s.last_screenshot_w = 1206 + s.last_screenshot_h = 2622 + + fake_obs = SimpleNamespace( + screenshot_path=pre_png, + screenshot_w=1206, + screenshot_h=2622, + marks=[matching_mark], + ) + + monkeypatch.setattr("simdrive.observe.observe", lambda *a, **kw: fake_obs) + monkeypatch.setattr("simdrive.act.type_text", lambda text, udid=None: None) + monkeypatch.setattr("simdrive.act._backend", lambda: "hid") + monkeypatch.setattr("simdrive.act.tap", lambda *a, **kw: None) + + result = server.tool_type_text({ + "session_id": sid, + "text": text_value, + "tap_first": tap_first_target, + }) + + assert result.get("ok") is True, f"tool_type_text returned error: {result}" + assert len(rec.steps) == 1, f"Expected 1 recorded step, got {len(rec.steps)}" + step = rec.steps[0] + assert step["action"] == "type_text" + # F#11: tap_first must be in the step args stored in the recording. + assert "tap_first" in step["args"], ( + "F#11: tap_first not persisted in recording step args — " + f"got args={step['args']!r}" + ) + assert step["args"]["tap_first"] == tap_first_target, ( + f"F#11: tap_first value mismatch: expected {tap_first_target!r}, " + f"got {step['args'].get('tap_first')!r}" + ) + + def test_replay_parser_accepts_and_passes_tap_first(self, tmp_path, monkeypatch): + """Replay engine must read tap_first from step args and execute it. + + RED: will fail because _execute_step / _execute_step_for_session ignores tap_first. + """ + from simdrive import recorder as recorder_mod, session as session_mod + + sid = "f11-replay-tap-first" + s = _make_sim_session(tmp_path, sid) + monkeypatch.setenv("SIMDRIVE_HOME", str(tmp_path)) + + # Build a recording.yaml with a type_text step that includes tap_first. + rec_dir = tmp_path / "recordings" / "f11-replay-recording" + rec_dir.mkdir(parents=True, exist_ok=True) + snaps = rec_dir / "snapshots" + snaps.mkdir(parents=True, exist_ok=True) + + pre_png = snaps / "001_pre.png" + _fake_png(pre_png) + post_png = snaps / "001_post.png" + _fake_png(post_png) + + tap_first_target = {"text": "you@example.com"} + recording_payload = { + "name": "f11-replay-recording", + "created_at": 0.0, + "device": "iPhone 17 Pro", + "os_version": "26.3", + "app_bundle_id": "io.synctek.simdrive.demo", + "app_version": "1.0", + "target": "simulator", + "steps": [{ + "id": 1, + "action": "type_text", + "args": { + "text": "dogfood@synctek.io", + "tap_first": tap_first_target, + "clear_first": False, + }, + "pre_screenshot": "snapshots/001_pre.png", + "post_screenshot": "snapshots/001_post.png", + "captured_at": 0.0, + }], + "requires": { + "target": "simulator", + "app": {"bundle_id": "io.synctek.simdrive.demo", "version": "1.0", "version_match": "any"}, + "sim": {"device": "iPhone 17 Pro", "ios_version": "26.3"}, + "device": None, + "initial_state": { + "foreground": True, + "text_subset_required": [], + "text_subset_forbidden": [], + "primary_button_label": None, + }, + }, + } + (rec_dir / "recording.yaml").write_text(yaml.safe_dump(recording_payload, sort_keys=False)) + + # Patch execution primitives to capture what gets called. + executed_calls: list = [] + + def _fake_act_tap(x, y, sw, sh, udid=None): + executed_calls.append(("tap", x, y)) + + def _fake_act_type_text(text, udid=None): + executed_calls.append(("type_text", text)) + + monkeypatch.setattr("simdrive.act.tap", _fake_act_tap) + monkeypatch.setattr("simdrive.act.type_text", _fake_act_type_text) + monkeypatch.setattr("simdrive.act._backend", lambda: "hid") + + # Patch _verify_state_contract to skip — not the SUT here. + monkeypatch.setattr( + "simdrive.recorder._verify_state_contract", + lambda *a, **kw: (True, []), + ) + # Patch observe so replay doesn't need a live simulator. + fake_obs = SimpleNamespace( + screenshot_path=pre_png, + screenshot_w=1206, + screenshot_h=2622, + marks=[], + ) + monkeypatch.setattr("simdrive.observe.observe", lambda *a, **kw: fake_obs) + monkeypatch.setattr("simdrive.recorder._observe_for_replay", lambda s: { + "screenshot_path": str(pre_png), + "marks": [], + }) + # Patch SSIM so similarity always passes. + monkeypatch.setattr("simdrive.recorder._ssim_or_fallback", lambda *a, **kw: 1.0) + + result = recorder_mod.replay("f11-replay-recording", s, on_drift="warn") + + # F#11: replay must have issued a tap for the tap_first target before typing. + tap_calls = [c for c in executed_calls if c[0] == "tap"] + assert len(tap_calls) >= 1, ( + "F#11: replay engine did not execute a tap for tap_first — " + "replay is not faithful to the original recording with focus context" + ) + + def test_recording_yaml_roundtrip_preserves_tap_first(self, tmp_path): + """A recording written with tap_first in args must load back with tap_first intact. + + RED: will fail because the server currently omits tap_first from the recorded args dict. + """ + # Write a recording.yaml manually (as it SHOULD look after the fix). + rec_dir = tmp_path / "recordings" / "roundtrip" + snaps = rec_dir / "snapshots" + snaps.mkdir(parents=True, exist_ok=True) + _fake_png(snaps / "001_pre.png") + _fake_png(snaps / "001_post.png") + + tap_first_val = {"text": "you@example.com"} + payload = { + "name": "roundtrip", + "created_at": 0.0, + "device": "iPhone 17 Pro", + "os_version": "26.3", + "app_bundle_id": "io.example.app", + "app_version": "1.0", + "target": "simulator", + "steps": [{ + "id": 1, + "action": "type_text", + "args": {"text": "hello", "tap_first": tap_first_val, "clear_first": False}, + "pre_screenshot": "snapshots/001_pre.png", + "post_screenshot": "snapshots/001_post.png", + "captured_at": 0.0, + }], + } + (rec_dir / "recording.yaml").write_text(yaml.safe_dump(payload, sort_keys=False)) + + # Load it back. + loaded = yaml.safe_load((rec_dir / "recording.yaml").read_text()) + step = loaded["steps"][0] + + # Verify tap_first survived the YAML round-trip (it should — YAML handles nested dicts). + # This test documents the expected shape; the real failure is upstream in the server. + assert "tap_first" in step["args"], ( + "F#11: tap_first not found in step args after YAML round-trip — " + f"got step args: {step['args']!r}" + ) + assert step["args"]["tap_first"] == tap_first_val, ( + f"F#11: tap_first value corrupted after round-trip: " + f"expected {tap_first_val!r}, got {step['args'].get('tap_first')!r}" + ) + + +# =========================================================================== +# F#12 — `text_subset_required` contains duplicates in state contract +# =========================================================================== + + +class TestF12TextSubsetRequiredDeduped: + """F#12: text_subset_required must be deduplicated before writing to recording.yaml.""" + + def test_build_requires_block_deduplicates_text_subset_required(self): + """_build_requires_block must dedupe text when the same mark text appears twice. + + Current behaviour: marks with the same text produce duplicates in text_subset_required. + Expected behaviour: each text value appears at most once. + + RED: will fail because _build_requires_block appends without deduplication. + """ + from simdrive.recorder import _build_requires_block + from simdrive.som import Mark + + # Two marks with identical text — mimics title + button both reading "Sign In". + marks = [ + Mark(id=1, x=40, y=80, w=400, h=60, text="Sign In", confidence=0.98), + Mark(id=2, x=40, y=500, w=200, h=40, text="Sign In", confidence=0.99), + Mark(id=3, x=40, y=200, w=300, h=40, text="Email", confidence=0.95), + ] + + block = _build_requires_block( + marks, + screen_h=2622, + app_bundle_id="com.example.app", + app_version="1.0", + sim_device="iPhone 17 Pro", + sim_ios_version="26.3", + ) + + subset = block.initial_state.text_subset_required + # F#12: "Sign In" must appear exactly once, not twice. + count_sign_in = subset.count("Sign In") + assert count_sign_in == 1, ( + f"F#12: 'Sign In' appears {count_sign_in} times in text_subset_required " + f"(expected 1) — duplicates not deduped: {subset!r}" + ) + + def test_text_subset_required_uniqueness_invariant(self): + """text_subset_required must have no duplicates regardless of mark count. + + RED: will fail with any duplicate-text mark set until dedup is added. + """ + from simdrive.recorder import _build_requires_block + from simdrive.som import Mark + + # Build 5 marks where 3 have the same text (extreme case). + marks = [ + Mark(id=i, x=40, y=i * 100, w=200, h=40, text="Continue", confidence=0.95) + for i in range(1, 4) + ] + [ + Mark(id=4, x=40, y=500, w=200, h=40, text="Cancel", confidence=0.92), + Mark(id=5, x=40, y=600, w=200, h=40, text="Settings", confidence=0.91), + ] + + block = _build_requires_block( + marks, + screen_h=2622, + app_bundle_id="com.example.app", + app_version="1.0", + sim_device="iPhone 17 Pro", + sim_ios_version="26.3", + ) + + subset = block.initial_state.text_subset_required + assert len(subset) == len(set(subset)), ( + f"F#12: text_subset_required contains duplicates: {subset!r}" + ) + + def test_state_contract_dedup_written_to_yaml(self, tmp_path, monkeypatch): + """recording.yaml's text_subset_required must contain unique values only. + + Simulates record_start capturing state where SoM produces duplicate text marks, + then verifies finalize() writes a deduped list. + + RED: will fail because _build_requires_block doesn't deduplicate. + """ + from simdrive import recorder as recorder_mod, session as session_mod + from simdrive.som import Mark + + monkeypatch.setenv("SIMDRIVE_HOME", str(tmp_path)) + sid = "f12-dedup-yaml" + s = _make_sim_session(tmp_path, sid) + + # Stub observe to return marks with duplicate text. + dup_marks = [ + Mark(id=1, x=40, y=80, w=400, h=60, text="Sign In", confidence=0.98), + Mark(id=2, x=40, y=500, w=200, h=40, text="Sign In", confidence=0.99), + Mark(id=3, x=40, y=200, w=300, h=40, text="Password", confidence=0.95), + ] + + fake_obs = SimpleNamespace( + screenshot_path=tmp_path / "fake.png", + screenshot_w=1206, + screenshot_h=2622, + marks=dup_marks, + ) + _fake_png(fake_obs.screenshot_path) + + monkeypatch.setattr("simdrive.observe.observe", lambda *a, **kw: fake_obs) + monkeypatch.setattr("simdrive.sim.get_app_version", lambda *a, **kw: "1.0") + monkeypatch.setenv("SIMDRIVE_HOME", str(tmp_path)) + + rec = recorder_mod.start(s, "f12-dedup-recording") + yaml_path = recorder_mod.stop(s) + + payload = yaml.safe_load(yaml_path.read_text()) + subset = payload.get("requires", {}).get("initial_state", {}).get("text_subset_required", []) + + assert len(subset) == len(set(subset)), ( + f"F#12: text_subset_required in recording.yaml contains duplicates: {subset!r}" + ) + # "Sign In" specifically must appear only once. + assert subset.count("Sign In") == 1, ( + f"F#12: 'Sign In' appears {subset.count('Sign In')} times in recording.yaml " + f"text_subset_required: {subset!r}" + ) + + +# =========================================================================== +# F#17 — `simdrive migrate-recording --all` / `--missing-contract` flag +# =========================================================================== + + +class TestF17MigrateRecordingAllFlag: + """F#17: migrate-recording must support --all and --missing-contract batch flags.""" + + _GOOD_REQUIRES = { + "target": "simulator", + "app": {"bundle_id": "com.example.app", "version": "1.0", "version_match": "minor"}, + "sim": {"device": "iPhone 17 Pro", "ios_version": "26.3"}, + "device": None, + "initial_state": { + "foreground": True, + "text_subset_required": ["Library", "Books"], + "text_subset_forbidden": [], + "primary_button_label": "Library", + }, + } + + def _patch_som(self, monkeypatch): + from simdrive import som + marks = [ + som.Mark(id=1, x=40, y=80, w=400, h=200, text="Library", confidence=0.95), + ] + monkeypatch.setattr(som, "detect_marks", lambda _path: list(marks)) + + def test_migrate_recording_all_migrates_every_old_recording(self, tmp_path, monkeypatch): + """--all flag must iterate all recordings under recordings_root and migrate each. + + RED: will fail because _cmd_migrate_recording has no --all flag. + """ + from simdrive.server import _cmd_migrate_recording + + monkeypatch.setenv("SIMDRIVE_HOME", str(tmp_path)) + self._patch_som(monkeypatch) + + # Create 2 old-format recordings (no requires block). + for name in ("old-rec-1", "old-rec-2"): + _write_recording_yaml(tmp_path / "recordings" / name, requires=None) + + # --all should migrate both without specifying a name. + with pytest.raises(SystemExit) as exc: + _cmd_migrate_recording(["--all"]) + assert exc.value.code == 0, ( + "F#17: --all flag returned non-zero exit — may not be implemented" + ) + + # Verify both recordings now have a requires block. + for name in ("old-rec-1", "old-rec-2"): + rec_yaml = tmp_path / "recordings" / name / "recording.yaml" + payload = yaml.safe_load(rec_yaml.read_text()) + assert "requires" in payload, ( + f"F#17: --all did not migrate {name} — no requires block written" + ) + + def test_migrate_recording_all_with_two_recordings_count(self, tmp_path, monkeypatch): + """--all must process all N recordings and report N migrated. + + RED: will fail because --all is not implemented. + """ + import sys + from io import StringIO + from simdrive.server import _cmd_migrate_recording + + monkeypatch.setenv("SIMDRIVE_HOME", str(tmp_path)) + self._patch_som(monkeypatch) + + for name in ("batch-1", "batch-2", "batch-3"): + _write_recording_yaml(tmp_path / "recordings" / name, requires=None) + + captured_output = StringIO() + monkeypatch.setattr(sys, "stdout", captured_output) + + with pytest.raises(SystemExit) as exc: + _cmd_migrate_recording(["--all"]) + + assert exc.value.code == 0, "F#17: --all exited non-zero" + output = captured_output.getvalue() + # Output should mention each recording or a count. + assert "batch-1" in output or "3" in output or "migrated" in output.lower(), ( + f"F#17: --all output doesn't confirm 3 recordings were processed: {output!r}" + ) + + def test_migrate_recording_missing_contract_skips_already_migrated(self, tmp_path, monkeypatch): + """--missing-contract must only touch recordings without a requires block. + + RED: will fail because --missing-contract is not implemented. + """ + from simdrive.server import _cmd_migrate_recording + + monkeypatch.setenv("SIMDRIVE_HOME", str(tmp_path)) + self._patch_som(monkeypatch) + + # One already-migrated recording. + _write_recording_yaml( + tmp_path / "recordings" / "already-good", + requires=self._GOOD_REQUIRES, + ) + original_mtime = (tmp_path / "recordings" / "already-good" / "recording.yaml").stat().st_mtime + + # One old-format recording needing migration. + _write_recording_yaml(tmp_path / "recordings" / "needs-migrate", requires=None) + + with pytest.raises(SystemExit) as exc: + _cmd_migrate_recording(["--missing-contract"]) + assert exc.value.code == 0, "F#17: --missing-contract exited non-zero" + + # already-good must NOT have been touched (mtime unchanged). + new_mtime = (tmp_path / "recordings" / "already-good" / "recording.yaml").stat().st_mtime + assert new_mtime == original_mtime, ( + "F#17: --missing-contract rewrote an already-migrated recording — " + "it should skip recordings that already have a requires block" + ) + + # needs-migrate MUST now have a requires block. + payload = yaml.safe_load( + (tmp_path / "recordings" / "needs-migrate" / "recording.yaml").read_text() + ) + assert "requires" in payload, ( + "F#17: --missing-contract did not migrate the recording that needed it" + ) + + def test_migrate_recording_all_handles_empty_recordings_dir(self, tmp_path, monkeypatch): + """--all on empty recordings dir should exit 0 gracefully. + + RED: will fail because --all is not implemented. + """ + from simdrive.server import _cmd_migrate_recording + + monkeypatch.setenv("SIMDRIVE_HOME", str(tmp_path)) + (tmp_path / "recordings").mkdir(parents=True, exist_ok=True) + + with pytest.raises(SystemExit) as exc: + _cmd_migrate_recording(["--all"]) + assert exc.value.code == 0, ( + "F#17: --all on empty recordings dir must exit 0, got non-zero" + ) + + def test_migrate_recording_all_flag_in_argparse_schema(self): + """_cmd_migrate_recording's argparse must accept --all without raising. + + RED: will fail because the argument parser has no --all option. + """ + from simdrive.server import _cmd_migrate_recording + + # Passing an unrecognised arg causes argparse SystemExit with code 2. + # If --all is defined, it will raise SystemExit(0 or 1) from actual logic, + # not argparse error (code 2). + with pytest.raises(SystemExit) as exc: + # We pass --all without a recordings dir, so it may fail on missing dir — + # but code must NOT be 2 (argparse "unrecognised arguments"). + _cmd_migrate_recording(["--all"]) + assert exc.value.code != 2, ( + "F#17: --all is not a recognised argument in _cmd_migrate_recording's argparse schema" + ) + + def test_migrate_recording_missing_contract_flag_in_argparse_schema(self): + """_cmd_migrate_recording's argparse must accept --missing-contract without raising. + + RED: will fail because the argument parser has no --missing-contract option. + """ + from simdrive.server import _cmd_migrate_recording + + with pytest.raises(SystemExit) as exc: + _cmd_migrate_recording(["--missing-contract"]) + assert exc.value.code != 2, ( + "F#17: --missing-contract is not a recognised argument in _cmd_migrate_recording's argparse schema" + ) From cce90ed958669d773ea1856d19a2d7791859aab3 Mon Sep 17 00:00:00 2001 From: SyncTek <145518101+SyncTekLLC@users.noreply.github.com> Date: Fri, 22 May 2026 15:18:49 -0400 Subject: [PATCH 2/3] =?UTF-8?q?test(b5):=20fix=20regression=20guard=20test?= =?UTF-8?q?=20setup=20for=20F#11=20=E2=80=94=209=20RED=204=20GREEN?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - F#11 tests: all 4 pass as regression guards (fix was in main #146) - Fixed mock setup for record_act_step test (pass correct args dict) - Fixed replay test: supply matching mark for tap_first text lookup - F#12 tests: all 3 RED (build_requires_block dedup not implemented) - Fixed mark fixture to use English-dict words (high confidence_band) - F#17 tests: all 6 RED (--all/--missing-contract flags not implemented) Co-Authored-By: Claude Sonnet 4.6 --- .../test_b5_domain_b_recording_format.py | 75 +++++++++++-------- 1 file changed, 44 insertions(+), 31 deletions(-) diff --git a/simdrive/tests/test_b5_domain_b_recording_format.py b/simdrive/tests/test_b5_domain_b_recording_format.py index 809c608..fe9998c 100644 --- a/simdrive/tests/test_b5_domain_b_recording_format.py +++ b/simdrive/tests/test_b5_domain_b_recording_format.py @@ -141,13 +141,17 @@ def _write_recording_yaml(dir_: Path, *, requires=None, steps=None, class TestF11TypeTextTapFirstPersisted: """F#11: type_text with tap_first must serialize the full args dict.""" - def test_record_act_step_includes_tap_first_in_args(self, tmp_path, monkeypatch): - """_record_act_step for type_text stores tap_first alongside text in step args. + def test_record_act_step_persists_full_args_including_tap_first(self, tmp_path, monkeypatch): + """_record_act_step correctly stores arbitrary args dict including tap_first. - Current behaviour: only {"text": text} is recorded; tap_first is dropped. - Expected behaviour: {"text": text, "tap_first": } is persisted. + Regression guard: confirms that when tool_type_text passes the full rec_args + dict (text + tap_first), the step is recorded with both fields intact. - RED: will fail because server._record_act_step passes {"text": text} only. + F#11 FIX (PR #146): tool_type_text now builds rec_args = {"text": text, + "tap_first": tap_target} before calling _record_act_step. This test confirms + that shape is preserved end-to-end through the recorder. + + REGRESSION: GREEN — confirm fix is in place. """ from simdrive import session as session_mod, server @@ -167,23 +171,25 @@ def test_record_act_step_includes_tap_first_in_args(self, tmp_path, monkeypatch) ) monkeypatch.setattr("simdrive.observe.observe", lambda *a, **kw: fake_obs) - # The tap_first target that SHOULD be recorded alongside the text. + # Call _record_act_step with the FIXED args dict (including tap_first). + # This mirrors what tool_type_text now does after the F#11 fix. tap_first_target = {"text": "you@example.com"} - - # Call _record_act_step directly with what tool_type_text currently passes - # (only {"text": text}, omitting tap_first — that is the bug F#11). - server._record_act_step(s, "type_text", {"text": "dogfood@synctek.io"}, pre_png) + server._record_act_step( + s, "type_text", + {"text": "dogfood@synctek.io", "tap_first": tap_first_target}, + pre_png, + ) # Find the step that was recorded. assert len(rec.steps) == 1, "Expected exactly one step recorded" step = rec.steps[0] assert step["action"] == "type_text" - # F#11: tap_first MUST be present in the recorded args. - # Currently fails because _record_act_step is called with {"text": text} only. + # Regression: tap_first must survive the _record_act_step → add_step chain. assert "tap_first" in step["args"], ( - "F#11: tap_first is missing from recorded step args — " + "F#11 regression: tap_first dropped by _record_act_step — " "recording.yaml will not be able to replay the focus context" ) + assert step["args"]["tap_first"] == tap_first_target def test_tool_type_text_step_args_includes_tap_first(self, tmp_path, monkeypatch): """tool_type_text must pass tap_first into the step args when recording. @@ -245,11 +251,14 @@ def test_tool_type_text_step_args_includes_tap_first(self, tmp_path, monkeypatch ) def test_replay_parser_accepts_and_passes_tap_first(self, tmp_path, monkeypatch): - """Replay engine must read tap_first from step args and execute it. + """Replay engine reads tap_first from step args and taps the focus target. - RED: will fail because _execute_step / _execute_step_for_session ignores tap_first. + Regression guard: confirms F#11 fix — the sim path in _execute_step_for_session + now calls _resolve_focus_target and act.tap when tap_first is present. + + REGRESSION: GREEN — confirm fix is in place. """ - from simdrive import recorder as recorder_mod, session as session_mod + from simdrive import recorder as recorder_mod, session as session_mod, som sid = "f11-replay-tap-first" s = _make_sim_session(tmp_path, sid) @@ -320,28 +329,28 @@ def _fake_act_type_text(text, udid=None): "simdrive.recorder._verify_state_contract", lambda *a, **kw: (True, []), ) - # Patch observe so replay doesn't need a live simulator. - fake_obs = SimpleNamespace( - screenshot_path=pre_png, - screenshot_w=1206, - screenshot_h=2622, - marks=[], - ) - monkeypatch.setattr("simdrive.observe.observe", lambda *a, **kw: fake_obs) + + # Provide a mark that matches the tap_first {"text": "you@example.com"} target. + matching_mark = som.Mark(id=1, x=100, y=200, w=300, h=40, + text="you@example.com", confidence=0.95) + + # _observe_for_replay returns a dict with marks for stable_id / text resolution. monkeypatch.setattr("simdrive.recorder._observe_for_replay", lambda s: { "screenshot_path": str(pre_png), - "marks": [], + "screenshot_w": 1206, + "screenshot_h": 2622, + "marks": [matching_mark], }) # Patch SSIM so similarity always passes. monkeypatch.setattr("simdrive.recorder._ssim_or_fallback", lambda *a, **kw: 1.0) result = recorder_mod.replay("f11-replay-recording", s, on_drift="warn") - # F#11: replay must have issued a tap for the tap_first target before typing. + # F#11 regression: replay must have issued a tap for the tap_first target before typing. tap_calls = [c for c in executed_calls if c[0] == "tap"] assert len(tap_calls) >= 1, ( - "F#11: replay engine did not execute a tap for tap_first — " - "replay is not faithful to the original recording with focus context" + "F#11 regression: replay engine did not execute a tap for tap_first — " + "the fix in recorder._execute_step_for_session is missing or broken" ) def test_recording_yaml_roundtrip_preserves_tap_first(self, tmp_path): @@ -438,14 +447,18 @@ def test_build_requires_block_deduplicates_text_subset_required(self): def test_text_subset_required_uniqueness_invariant(self): """text_subset_required must have no duplicates regardless of mark count. - RED: will fail with any duplicate-text mark set until dedup is added. + Uses words known to be in the English dictionary (high confidence_band) + so they pass the band filter and enter text_subset_required. + + RED: will fail because _build_requires_block does not deduplicate. """ from simdrive.recorder import _build_requires_block from simdrive.som import Mark - # Build 5 marks where 3 have the same text (extreme case). + # 3 marks with the same English text "Email" — all pass the confidence_band + # filter and should appear only once after dedup. marks = [ - Mark(id=i, x=40, y=i * 100, w=200, h=40, text="Continue", confidence=0.95) + Mark(id=i, x=40, y=i * 100, w=200, h=40, text="Email", confidence=0.95) for i in range(1, 4) ] + [ Mark(id=4, x=40, y=500, w=200, h=40, text="Cancel", confidence=0.92), From c0f0a74e95afffb70905ea6f7d3f3b5392647ecb Mon Sep 17 00:00:00 2001 From: SyncTek <145518101+SyncTekLLC@users.noreply.github.com> Date: Fri, 22 May 2026 15:23:02 -0400 Subject: [PATCH 3/3] feat(b5): recording format polish [F#12 F#17] F#12: deduplicate text_subset_required in _build_requires_block using a seen-set; same-text marks no longer produce duplicate entries. F#17: extend _cmd_migrate_recording with --all (batch-migrate every recording under recordings root) and --missing-contract (skip already- migrated recordings) flags; empty dir exits 0 gracefully. Co-Authored-By: Claude Sonnet 4.6 --- simdrive/src/simdrive/recorder.py | 4 +++ simdrive/src/simdrive/server.py | 59 +++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/simdrive/src/simdrive/recorder.py b/simdrive/src/simdrive/recorder.py index 51de3d3..03d1a6d 100644 --- a/simdrive/src/simdrive/recorder.py +++ b/simdrive/src/simdrive/recorder.py @@ -573,6 +573,7 @@ def _build_requires_block( # band high|medium, text >= 2 chars. # For device marks (list[dict] from annotate_device_screenshot), use dict access. required: list[str] = [] + _seen: set[str] = set() for m in marks: if len(required) >= 10: break @@ -587,6 +588,9 @@ def _build_requires_block( continue if len(text) < 2: continue + if text in _seen: + continue + _seen.add(text) required.append(text) primary_label: Optional[str] = None diff --git a/simdrive/src/simdrive/server.py b/simdrive/src/simdrive/server.py index c547eba..51857bd 100644 --- a/simdrive/src/simdrive/server.py +++ b/simdrive/src/simdrive/server.py @@ -3154,7 +3154,12 @@ def _cmd_lint_recordings(args: list[str]) -> None: def _cmd_migrate_recording(args: list[str]) -> None: - """Handle `simdrive migrate-recording [--force] [--dry-run]` CLI subcommand.""" + """Handle `simdrive migrate-recording [--force] [--dry-run]` CLI subcommand. + + Batch flags: + --all Migrate every recording under the recordings root. + --missing-contract Migrate only recordings that lack a `requires:` block. + """ import argparse import sys @@ -3167,13 +3172,63 @@ def _cmd_migrate_recording(args: list[str]) -> None: ), formatter_class=argparse.RawDescriptionHelpFormatter, ) - parser.add_argument("name", help="Recording name (directory under recordings root).") + # Positional name is optional when --all / --missing-contract is used. + parser.add_argument("name", nargs="?", default=None, + help="Recording name (directory under recordings root). " + "Omit when using --all or --missing-contract.") parser.add_argument("--force", action="store_true", default=False, help="Re-migrate even if the recording already has `requires:`.") parser.add_argument("--dry-run", action="store_true", default=False, help="Print what would be written; do not modify the file.") + parser.add_argument("--all", dest="all_recordings", action="store_true", default=False, + help="Migrate every recording found under the recordings root.") + parser.add_argument("--missing-contract", action="store_true", default=False, + help="Migrate only recordings that do not yet have a `requires:` block.") ns = parser.parse_args(args) + # --- Batch modes --- + if ns.all_recordings or ns.missing_contract: + import yaml as _yaml + root = recorder.recordings_root() + if not root.exists(): + print("Migrated 0 recordings.") + sys.exit(0) + + candidates = sorted(p for p in root.iterdir() if p.is_dir()) + migrated_count = 0 + for rec_dir in candidates: + yaml_path = rec_dir / "recording.yaml" + if not yaml_path.exists(): + continue + # --missing-contract: skip recordings that already have requires. + if ns.missing_contract: + try: + payload = _yaml.safe_load(yaml_path.read_text()) or {} + except Exception: + payload = {} + if "requires" in payload: + continue + + try: + result = recorder.migrate_recording(rec_dir.name, force=ns.force, dry_run=ns.dry_run) + except recorder.MigrationError as exc: + print(f"Error migrating {rec_dir.name}: {exc}", file=sys.stderr) + continue + + suffix = " (dry-run)" if result.dry_run else "" + if result.migrated: + print(f"Migrated {result.name}{suffix}: {result.text_mark_count} text marks.") + migrated_count += 1 + else: + print(f"Skipped {result.name}: {result.reason}") + + print(f"Done. {migrated_count} recording(s) migrated.") + sys.exit(0) + + # --- Single-recording mode --- + if not ns.name: + parser.error("Recording name is required unless --all or --missing-contract is specified.") + try: result = recorder.migrate_recording(ns.name, force=ns.force, dry_run=ns.dry_run) except recorder.MigrationError as exc: