diff --git a/.coverage b/.coverage deleted file mode 100644 index ea753f6..0000000 Binary files a/.coverage and /dev/null differ diff --git a/.github/workflows/ba-merge-check.yml b/.github/workflows/ba-merge-check.yml deleted file mode 100644 index 0a94125..0000000 --- a/.github/workflows/ba-merge-check.yml +++ /dev/null @@ -1,78 +0,0 @@ -name: BA Merge Check (Governance Memo Format) - -on: - pull_request: - types: [ready_for_review, synchronize] - -permissions: - contents: read - pull-requests: read - -jobs: - governance-memo-check: - name: Check governance memo sections - runs-on: ubuntu-latest - timeout-minutes: 10 - - steps: - - name: Set up Python 3.11 - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - - name: Validate INIT reference (required) - # No ba CLI dependency — governance check is inline Python only - id: init_check - env: - PR_TITLE: ${{ github.event.pull_request.title }} - PR_BODY: ${{ github.event.pull_request.body }} - PR_BRANCH: ${{ github.head_ref }} - run: | - python3 - <<'PYEOF' - import re, os, sys - - INIT_PATTERN = re.compile(r'INIT-\d{4}-\d{3,}', re.IGNORECASE) - - title = os.environ.get("PR_TITLE", "") - body = os.environ.get("PR_BODY", "") - branch = os.environ.get("PR_BRANCH", "") - - sources = {"title": title, "body": body, "branch": branch} - found = None - - for src, text in sources.items(): - m = INIT_PATTERN.search(text) - if m: - found = m.group(0).upper() - break - - if found: - print(f"PASS: Initiative reference found: {found}") - else: - print("FAIL: No INIT-YYYY-NNN reference found.") - sys.exit(1) - PYEOF - - - name: Check governance memo sections (warning only) - env: - PR_BODY: ${{ github.event.pull_request.body }} - run: | - python3 - <<'PYEOF' - import os, sys - - body = os.environ.get("PR_BODY", "") - - MEMO_SECTIONS = ["Gate:", "DoD:", "Evidence:"] - found_sections = [s for s in MEMO_SECTIONS if s in body] - - if found_sections: - print(f"Governance memo sections present: {', '.join(found_sections)}") - else: - # Warning annotation — not a hard fail - print("::warning title=Governance Memo Missing::PR body does not contain any of: Gate:, DoD:, Evidence:. " - "Consider adding a governance memo section before merge. " - "See BusinessAtlas prompts/CEO_GOVERNANCE.md for expected format.") - print("Governance memo check: no sections found — warning issued, not blocking.") - - sys.exit(0) - PYEOF diff --git a/.github/workflows/ba-pr-validate.yml b/.github/workflows/ba-pr-validate.yml deleted file mode 100644 index 41fa944..0000000 --- a/.github/workflows/ba-pr-validate.yml +++ /dev/null @@ -1,61 +0,0 @@ -name: BA PR Validate (Initiative Reference) - -on: - pull_request: - types: [opened, synchronize, reopened] - -permissions: - contents: read - pull-requests: read - -jobs: - validate-initiative-ref: - name: Require INIT reference - runs-on: ubuntu-latest - timeout-minutes: 10 - - steps: - - name: Set up Python 3.11 - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - - name: Extract and validate INIT reference - # No ba CLI dependency — governance check is inline Python only - env: - PR_TITLE: ${{ github.event.pull_request.title }} - PR_BODY: ${{ github.event.pull_request.body }} - PR_BRANCH: ${{ github.head_ref }} - run: | - python3 - <<'PYEOF' - import re, os, sys - - INIT_PATTERN = re.compile(r'INIT-\d{4}-\d{3,}', re.IGNORECASE) - - title = os.environ.get("PR_TITLE", "") - body = os.environ.get("PR_BODY", "") - branch = os.environ.get("PR_BRANCH", "") - - sources = {"title": title, "body": body, "branch": branch} - found = None - found_in = None - - for src, text in sources.items(): - m = INIT_PATTERN.search(text) - if m: - found = m.group(0).upper() - found_in = src - break - - if found: - print(f"PASS: Found initiative reference {found} in PR {found_in}") - print(f"Every specterqa-ios PR must be bound to a BusinessAtlas initiative.") - sys.exit(0) - else: - print("FAIL: No INIT-YYYY-NNN reference found in PR title, body, or branch name.") - print("Every specterqa-ios PR must reference a BusinessAtlas initiative.") - print(" - Add 'INIT-YYYY-NNN' to the PR title, OR") - print(" - Include it in the PR body, OR") - print(" - Use a branch name like feat/INIT-2026-NNN-description") - sys.exit(1) - PYEOF diff --git a/.github/workflows/cleanroom-install.yml b/.github/workflows/cleanroom-install.yml index e7ff483..cf84b02 100644 --- a/.github/workflows/cleanroom-install.yml +++ b/.github/workflows/cleanroom-install.yml @@ -1,6 +1,6 @@ name: Cleanroom Install Smoke -# INIT-2026-549 — added after the b2/b3 fastapi regression (F-B3-007). +# [internal-tracker] — added after the b2/b3 fastapi regression (F-B3-007). # # The `simdrive-ci.yml` job installs `[dev]` extras to run the test suite. That # means every CI run has fastapi + sqlalchemy + the full cloud-side dep graph diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 44c2e75..c2d1289 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -1,6 +1,6 @@ name: CodeQL (Python) -# INIT-2026-549 cleanup: salvaged — switched to build-mode: none and scoped +# [internal-tracker] cleanup: salvaged — switched to build-mode: none and scoped # analysis to simdrive/src/. Original failure cause: autobuild ran `pip install` # from repo root where no pyproject.toml exists (metadata lives at # simdrive/pyproject.toml). Python doesn't need a build for CodeQL extraction, diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index e31e90b..0c72246 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -1,6 +1,6 @@ name: Security baseline (pip-audit) -# INIT-2026-549 W1: security CI baseline. +# [internal-tracker]: security CI baseline. # - pip-audit: blocks on HIGH-severity vulns in the resolved dep graph. # CodeQL lives in .github/workflows/codeql.yml (separate workflow for the # distinct GitHub Code Scanning permissions model). @@ -17,7 +17,7 @@ name: Security baseline (pip-audit) # wire up gitleaks with a license — see docs/security/secret-scanning.md # if/when that's added. # -# INIT-2026-549 cleanup (review): paths verified post repo restructure. +# [internal-tracker] cleanup (review): paths verified post repo restructure. # pip-audit consumes simdrive/requirements.lock (exists; pinned in PR #113). # If this workflow is red on main, the cause is a real CVE finding in # simdrive/requirements.lock — investigate and bump pin, do not mask. @@ -38,7 +38,7 @@ permissions: jobs: pip-audit: name: pip-audit (HIGH blocks) - # INIT-2026-549 cleanup: must be macOS — requirements.lock pins pyobjc-core, + # [internal-tracker] cleanup: must be macOS — requirements.lock pins pyobjc-core, # which has a source-build dep on macOS frameworks (Quartz/Vision). On # ubuntu-latest the audit step fails at the install phase with "PyObjC # requires macOS to build" before pip-audit can even read the lock. @@ -64,7 +64,7 @@ jobs: # responsibility, not the library's). No fix version available. # simdrive does not use pyjwt directly — it's a transitive dep # of `mcp` for its own auth surface, which is not in our control. - # [INIT-2026-549] + # # PYSEC-2026-161 / GHSA-86qp-5c8j-p5mr — starlette Host header path # injection advisory (published 2026-05-22). Fix version is 1.0.1 # which is not yet available on PyPI as of 2026-05-22. Awaiting @@ -72,7 +72,7 @@ jobs: # not exposed to external HTTP requests, so exploitability is # effectively zero in our threat model. Re-enable the block once # starlette>=1.0.1 is published and requirements.lock is updated. - # Tracked: [INIT-2026-549] + # Tracked: run: | pip-audit -r simdrive/requirements.lock \ --vulnerability-service osv \ diff --git a/.github/workflows/simdrive-ci.yml b/.github/workflows/simdrive-ci.yml index 54b6ad5..2fa5093 100644 --- a/.github/workflows/simdrive-ci.yml +++ b/.github/workflows/simdrive-ci.yml @@ -1,6 +1,6 @@ name: simdrive CI -# INIT-2026-549 W1: expanded test gate. +# [internal-tracker]: expanded test gate. # - Runs `pytest simdrive/tests -m "not live"` (was: tests/test_unit.py only). # - Enforces a coverage ratchet floor on the hot-path modules (see simdrive/docs/COVERAGE_RATCHET.md). # - Installs simdrive[dev] (which already pins pytest-cov, moto[s3], hypothesis). @@ -68,7 +68,7 @@ jobs: --cov=simdrive.device \ --cov-report=xml \ --cov-report=term-missing \ - --cov-fail-under=90 # Ratchet floor — INIT-2026-549 W4 push to 85% reached 92% on hot-path modules (server.py 70%->94%); floor set 2pp below measured per flake-safety policy. + --cov-fail-under=90 # Ratchet floor — [internal-tracker] push to 85% reached 92% on hot-path modules (server.py 70%->94%); floor set 2pp below measured per flake-safety policy. - name: Upload coverage artifact if: always() diff --git a/.github/workflows/specterqa-ios-publish.yml b/.github/workflows/specterqa-ios-publish.yml index ea1e9e4..32500ea 100644 --- a/.github/workflows/specterqa-ios-publish.yml +++ b/.github/workflows/specterqa-ios-publish.yml @@ -1,6 +1,6 @@ name: simdrive Publish to PyPI -# INIT-2026-549 W1: tag scheme migrated from `specterqa-ios-v*` to `simdrive-v*`. +# [internal-tracker]: tag scheme migrated from `specterqa-ios-v*` to `simdrive-v*`. # Three pre-publish gates run before the build, in order: # 1. version-match — git tag == simdrive-v # 2. CHANGELOG head — first `## [X.Y.Z]` heading == pyproject version diff --git a/.gitignore b/.gitignore index 93f5081..259d802 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,9 @@ venv/ .mypy_cache/ .pytest_cache/ .ruff_cache/ +.hypothesis/ +.coverage +coverage.xml *.ips # LapsApp — Xcode user-specific state (project.pbxproj IS committed) diff --git a/.hypothesis/constants/02570021c68fcd20 b/.hypothesis/constants/02570021c68fcd20 deleted file mode 100644 index 17f4f28..0000000 --- a/.hypothesis/constants/02570021c68fcd20 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/errors.py -# hypothesis_version: 6.141.1 - -['action', 'already_recording', 'available', 'cloud_auth_invalid', 'cloud_auth_missing', 'cloud_rate_limited', 'code', 'details', 'error', 'field', 'form', 'hid_unavailable', 'invalid_argument', 'limit_gb', 'message', 'missing_target', 'name', 'no_device', 'no_session', 'not_recording', 'ok', 'path', 'query', 'reason', 'recording_id', 'recording_not_found', 'replay_drift_halt', 'retry_after_seconds', 'session_id', 'sim_unhealthy', 'similarity', 'step_id', 'target_not_found', 'threshold', 'udid', 'used_gb', 'value', 'why'] \ No newline at end of file diff --git a/.hypothesis/constants/0281062a24aa6724 b/.hypothesis/constants/0281062a24aa6724 deleted file mode 100644 index 1ed592c..0000000 --- a/.hypothesis/constants/0281062a24aa6724 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/app.py -# hypothesis_version: 6.152.4 - -['/health', '/v1', 'SELECT 1', 'SimDrive Cloud API', 'db_reachable', 'ok', 'simdrive', 'simdrive_r2_', 'sqlite://', 'status', 'storage_backend', 'unknown', 'version'] \ No newline at end of file diff --git a/.hypothesis/constants/02f6e1d4984e19a3 b/.hypothesis/constants/02f6e1d4984e19a3 deleted file mode 100644 index bee7a81..0000000 --- a/.hypothesis/constants/02f6e1d4984e19a3 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/errors.py -# hypothesis_version: 6.141.1 - -['LicenseError', 'SimdriveError', 'act_tool_failed', 'action', 'already_recording', 'available', 'ci_invalid_journey', 'claude_call_failed', 'claude_cost_cap_hit', 'cloud_auth_invalid', 'cloud_auth_missing', 'cloud_error', 'cloud_rate_limited', 'code', 'details', 'error', 'field', 'form', 'hid_unavailable', 'invalid_argument', 'license_expired', 'license_invalid', 'license_not_found', 'limit_gb', 'message', 'missing_target', 'name', 'no_device', 'no_session', 'not_recording', 'ok', 'path', 'query', 'reason', 'recording_id', 'recording_not_found', 'replay_drift_halt', 'retry_after_seconds', 'session_id', 'sim_unhealthy', 'similarity', 'step_id', 'target_not_found', 'threshold', 'trial_already_used', 'trial_rate_limited', 'udid', 'used_gb', 'value', 'why'] \ No newline at end of file diff --git a/.hypothesis/constants/043d863f84e06096 b/.hypothesis/constants/043d863f84e06096 deleted file mode 100644 index 58e17af..0000000 --- a/.hypothesis/constants/043d863f84e06096 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/criteria.py -# hypothesis_version: 6.152.4 - -[200, '; ', 'cpu_pct', 'error', 'marks', 'memory_mb', 'no crashes detected', 'no_crash', 'perf_under', 'rss_mb', 'screen_matches', 'stable_id', 'text', 'text_visible', 'unknown'] \ No newline at end of file diff --git a/.hypothesis/constants/075a4b5b4ee059d7 b/.hypothesis/constants/075a4b5b4ee059d7 deleted file mode 100644 index 3f8ae85..0000000 --- a/.hypothesis/constants/075a4b5b4ee059d7 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/cloud/storage/r2_stub.py -# hypothesis_version: 6.141.1 - -['*', '/'] \ No newline at end of file diff --git a/.hypothesis/constants/0fb8be2e26406269 b/.hypothesis/constants/0fb8be2e26406269 deleted file mode 100644 index 65d24f7..0000000 --- a/.hypothesis/constants/0fb8be2e26406269 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/journey/errors.py -# hypothesis_version: 6.141.1 - -['act_tool_failed', 'attempt', 'cap_usd', 'ci_invalid_journey', 'claude_call_failed', 'claude_cost_cap_hit', 'cost_usd', 'criterion_type', 'inner_code', 'inner_message', 'journey_name', 'journeys_dir', 'llm_calls', 'path', 'persona_slug', 'personas_dir', 'reason', 'seconds', 'steps', 'supported', 'tag_filter', 'tool_name', 'version'] \ No newline at end of file diff --git a/.hypothesis/constants/1009759aed581c2e b/.hypothesis/constants/1009759aed581c2e deleted file mode 100644 index 78d166a..0000000 --- a/.hypothesis/constants/1009759aed581c2e +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/som_device.py -# hypothesis_version: 6.141.1 - -[0.7, 1.0, '_annotated.png', 'height', 'label', 'name', 'true', 'type', 'value', 'visible', 'width', 'x', 'y'] \ No newline at end of file diff --git a/.hypothesis/constants/109f607f44d61338 b/.hypothesis/constants/109f607f44d61338 deleted file mode 100644 index 9c54ab0..0000000 --- a/.hypothesis/constants/109f607f44d61338 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/client.py -# hypothesis_version: 6.141.1 - -[30.0, 1000.0, 300, '/element/active', '/screenshot', '/session', '/status', '/wda/keys', '/wda/pressButton', '/wda/tap', 'DELETE', 'ELEMENT', 'GET', 'POST', 'alwaysMatch', 'body', 'bundleId', 'capabilities', 'duration', 'exc', 'fromX', 'fromY', 'home', 'host', 'lock', 'method', 'name', 'port', 'power', 'response', 'sessionId', 'status', 'toX', 'toY', 'url', 'value', 'volumeDown', 'volumeUp', 'volumedown', 'volumeup', 'wda_http_error', 'wda_session_not_open', 'wda_unknown_button', 'wda_unreachable', 'x', 'y', '\ue003'] \ No newline at end of file diff --git a/.hypothesis/constants/10c17a0af179e228 b/.hypothesis/constants/10c17a0af179e228 deleted file mode 100644 index 8a005b9..0000000 --- a/.hypothesis/constants/10c17a0af179e228 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/entitlement.py -# hypothesis_version: 6.152.4 - -[250, 1000, '.simdrive', 'customer_email', 'enterprise', 'expires_at', 'license.json', 'license_key', 'pro', 'seats', 'solo', 'subject', 'team', 'tier', 'trial'] \ No newline at end of file diff --git a/.hypothesis/constants/10e546164e67d0e4 b/.hypothesis/constants/10e546164e67d0e4 deleted file mode 100644 index 5202cc2..0000000 --- a/.hypothesis/constants/10e546164e67d0e4 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/routes/recordings.py -# hypothesis_version: 6.141.1 - -[3600, '/recordings', 'customer_email', 'unknown', 'utf-8'] \ No newline at end of file diff --git a/.hypothesis/constants/12aa14e099ddb4d1 b/.hypothesis/constants/12aa14e099ddb4d1 deleted file mode 100644 index 829607a..0000000 --- a/.hypothesis/constants/12aa14e099ddb4d1 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/middleware/quotas.py -# hypothesis_version: 6.152.4 - -[0.0, 100.0, 'Authorization', 'Bearer ', 'Retry-After', 'customer_email', 'percent_used', 'period_end', 'period_start', 'runs_limit', 'runs_used', 'solo', 'tier', 'unknown'] \ No newline at end of file diff --git a/.hypothesis/constants/13f1ae2012612e40 b/.hypothesis/constants/13f1ae2012612e40 deleted file mode 100644 index 151ea05..0000000 --- a/.hypothesis/constants/13f1ae2012612e40 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/claude_client.py -# hypothesis_version: 6.152.4 - -[0.0, 0.004, 0.5, 15.0, 75.0, 200, 1024, 1000000, '.', 'ANTHROPIC_API_KEY', 'args', 'base64', 'claude-opus-4-7', 'confidence', 'content', 'data', 'fail', 'image', 'image/jpeg', 'image/png', 'jpeg', 'jpg', 'media_type', 'rationale', 'rb', 'role', 'source', 'text', 'tool', 'type', 'user'] \ No newline at end of file diff --git a/.hypothesis/constants/14a6af3593242d6b b/.hypothesis/constants/14a6af3593242d6b deleted file mode 100644 index 34c6b9a..0000000 --- a/.hypothesis/constants/14a6af3593242d6b +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/prompt.py -# hypothesis_version: 6.152.4 - -['(none)', ',', ':', 'marks', 'recent_logs', 'screenshot_path', 'stable_id', 'text'] \ No newline at end of file diff --git a/.hypothesis/constants/167948095805dd43 b/.hypothesis/constants/167948095805dd43 deleted file mode 100644 index 18c6de9..0000000 --- a/.hypothesis/constants/167948095805dd43 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/__init__.py -# hypothesis_version: 6.152.4 - -['1.0.0a7'] \ No newline at end of file diff --git a/.hypothesis/constants/16ff52bd8d9a2863 b/.hypothesis/constants/16ff52bd8d9a2863 deleted file mode 100644 index 5b57f7d..0000000 --- a/.hypothesis/constants/16ff52bd8d9a2863 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/registry.py -# hypothesis_version: 6.141.1 - -['.simdrive', 'WDA_REGISTRY_DIR', 'utf-8', 'wda'] \ No newline at end of file diff --git a/.hypothesis/constants/196b22e20f5e8b16 b/.hypothesis/constants/196b22e20f5e8b16 deleted file mode 100644 index aecfbc8..0000000 --- a/.hypothesis/constants/196b22e20f5e8b16 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/routes/trials.py -# hypothesis_version: 6.152.4 - -[86400, '/trials', 'trial', 'unknown'] \ No newline at end of file diff --git a/.hypothesis/constants/19dc7bb8054baa6c b/.hypothesis/constants/19dc7bb8054baa6c deleted file mode 100644 index aa8727a..0000000 --- a/.hypothesis/constants/19dc7bb8054baa6c +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/session.py -# hypothesis_version: 6.152.4 - -[8100, '.simdrive', 'Real Device', 'Recorder', 'SIMDRIVE_HOME', 'Session', 'a', 'actions.jsonl', 'active', 'any_booted', 'device', 'device_name', 'hardware_udid', 'host', 'ip', 'launch_failed', 'localhost', 'os_version', 'port', 'sessions', 'simulator', 'target', 'udid'] \ No newline at end of file diff --git a/.hypothesis/constants/1b6703a3f5aac0a8 b/.hypothesis/constants/1b6703a3f5aac0a8 deleted file mode 100644 index ef395e0..0000000 --- a/.hypothesis/constants/1b6703a3f5aac0a8 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/observability/logger.py -# hypothesis_version: 6.141.1 - -['%Y-%m-%d %H:%M:%S', '%Y-%m-%dT%H:%M:%SZ', '1', 'SIMDRIVE_DEBUG', 'args', 'created', 'exc_info', 'exc_text', 'filename', 'funcName', 'level', 'levelname', 'levelno', 'lineno', 'message', 'module', 'msecs', 'msg', 'name', 'pathname', 'process', 'processName', 'relativeCreated', 'simdrive', 'stack_info', 'thread', 'threadName', 'timestamp'] \ No newline at end of file diff --git a/.hypothesis/constants/1c003d066e512803 b/.hypothesis/constants/1c003d066e512803 deleted file mode 100644 index 75c7bd7..0000000 --- a/.hypothesis/constants/1c003d066e512803 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/routes/licenses.py -# hypothesis_version: 6.141.1 - -[365, 86400, '/licenses/activate', '/licenses/status', 'enterprise', 'expires_at', 'pro', 'solo', 'team', 'tier', 'trial'] \ No newline at end of file diff --git a/.hypothesis/constants/1dc607b436d40921 b/.hypothesis/constants/1dc607b436d40921 deleted file mode 100644 index 9c75d31..0000000 --- a/.hypothesis/constants/1dc607b436d40921 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/validator.py -# hypothesis_version: 6.152.4 - -[86400, '.', '=', 'ascii', 'customer_email', 'dev-trial', 'expires_at', 'license expired', 'license valid', 'subject', 'tier'] \ No newline at end of file diff --git a/.hypothesis/constants/1e5328fceff4cc23 b/.hypothesis/constants/1e5328fceff4cc23 deleted file mode 100644 index 6a80e32..0000000 --- a/.hypothesis/constants/1e5328fceff4cc23 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/errors.py -# hypothesis_version: 6.152.4 - -['action', 'already_recording', 'available', 'cloud_auth_invalid', 'cloud_auth_missing', 'cloud_rate_limited', 'code', 'details', 'error', 'field', 'form', 'hid_unavailable', 'invalid_argument', 'limit_gb', 'message', 'missing_target', 'name', 'no_device', 'no_session', 'not_recording', 'ok', 'path', 'query', 'reason', 'recording_id', 'recording_not_found', 'replay_drift_halt', 'retry_after_seconds', 'session_id', 'sim_unhealthy', 'similarity', 'step_id', 'target_not_found', 'threshold', 'udid', 'used_gb', 'value', 'why'] \ No newline at end of file diff --git a/.hypothesis/constants/2119d2c527806756 b/.hypothesis/constants/2119d2c527806756 deleted file mode 100644 index 57ec1a8..0000000 --- a/.hypothesis/constants/2119d2c527806756 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/db/usage.py -# hypothesis_version: 6.141.1 - -[250, 255, 1000, 999999999, '%Y-%m', 'enterprise', 'month_bucket', 'pro', 'solo', 'team', 'trial', 'uq_usage_lfp_month', 'usage_counters'] \ No newline at end of file diff --git a/.hypothesis/constants/21a49b19608ec52b b/.hypothesis/constants/21a49b19608ec52b deleted file mode 100644 index 19713a1..0000000 --- a/.hypothesis/constants/21a49b19608ec52b +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/som.py -# hypothesis_version: 6.141.1 - -[0.0, 0.3, 0.5, 0.6, 0.85, 1.0, 255, '+', "-'.,!?:", '<', '>', 'RGB', '\\s+', 'a', 'about', 'accept', 'account', 'active', 'add', 'address', 'aeiouAEIOU', 'again', 'agree', 'air', 'alert', 'all', 'allow', 'am', 'an', 'and', 'any', 'april', 'are', 'as', 'at', 'audio', 'audiobook', 'august', 'author', 'back', 'bbox', 'be', 'because', 'been', 'being', 'book', 'books', 'borrow', 'bottom', 'boy', 'but', 'by', 'cancel', 'cart', 'case', 'catalog', 'center', 'change', 'chapter', 'child', 'close', 'comment', 'company', 'complete', 'completed', 'confidence', 'confidence_band', 'confirm', 'content', 'copy', 'cut', 'dance', 'date', 'day', 'december', 'decline', 'delete', 'deny', 'description', 'details', 'did', 'do', 'does', 'doing', 'done', 'down', 'downloaded', 'ebook', 'edit', 'eight', 'email', 'error', 'every', 'fact', 'failed', 'false', 'father', 'favorites', 'february', 'few', 'film', 'filter', 'first', 'five', 'follow', 'for', 'force', 'forward', 'four', 'free', 'friday', 'from', 'gear', 'girl', 'goodbye', 'government', 'grid', 'guy', 'had', 'hand', 'has', 'have', 'having', 'he', 'head', 'hello', 'help', 'her', 'hide', 'high', 'him', 'his', 'history', 'hold', 'home', 'house', 'how', 'i', 'id', 'if', 'in', 'inactive', 'info', 'into', 'is', 'issue', 'it', 'its', 'january', 'job', 'july', 'june', 'kind', 'last', 'left', 'less', 'library', 'life', 'like', 'list', 'loading', 'login', 'logout', 'lot', 'low', 'magazine', 'man', 'many', 'march', 'may', 'me', 'media', 'medium', 'menu', 'mine', 'moment', 'monday', 'money', 'month', 'more', 'morning', 'mother', 'movie', 'music', 'my', 'name', 'new', 'next', 'night', 'nine', 'no', 'none', 'not', 'november', 'now', 'number', 'o', 'october', 'of', 'off', 'ok', 'on', 'one', 'onto', 'open', 'or', 'our', 'ours', 'out', 'over', 'page', 'pages', 'part', 'partner', 'party', 'password', 'paste', 'pause', 'pending', 'people', 'person', 'phone', 'place', 'play', 'please', 'point', 'post', 'premium', 'previous', 'profile', 'program', 'q', 'q.', 'q/', 'q\\', 'question', 'raw_confidence', 'read', 'reading', 'ready', 'reason', 'receive', 'redo', 'refresh', 'register', 'reload', 'remove', 'research', 'result', 'resume', 'retry', 'return', 'right', 'saturday', 'save', 'search', 'select', 'send', 'september', 'settings', 'seven', 'share', 'she', 'shelf', 'shelves', 'show', 'side', 'sign', 'signin', 'signup', 'since', 'six', 'skip', 'so', 'some', 'sort', 'stable_id', 'stable_id_loose', 'start', 'stop', 'story', 'study', 'submit', 'subscribe', 'subscription', 'subtitle', 'success', 'summary', 'sunday', 'support', 'system', 'tab', 'tale', 'ten', 'text', 'than', 'that', 'the', 'their', 'them', 'then', 'these', 'they', 'thing', 'this', 'those', 'three', 'thursday', 'time', 'title', 'to', 'today', 'tomorrow', 'top', 'trial', 'true', 'try', 'tuesday', 'two', 'under', 'undo', 'unfollow', 'up', 'upgrade', 'us', 'username', 'utf-8', 'video', 'view', 'war', 'warning', 'was', 'way', 'we', 'wednesday', 'week', 'welcome', 'were', 'what', 'when', 'where', 'which', 'while', 'who', 'why', 'with', 'without', 'woman', 'word', 'work', 'world', 'x', 'year', 'yes', 'yesterday', 'you', 'your', 'yours', '‹', '›', '≡', '☰', '⚙', '✕', '✖'] \ No newline at end of file diff --git a/.hypothesis/constants/22b97680d3066b11 b/.hypothesis/constants/22b97680d3066b11 deleted file mode 100644 index 19076d7..0000000 --- a/.hypothesis/constants/22b97680d3066b11 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/license/signer.py -# hypothesis_version: 6.141.1 - -[b'=', ',', ':', 'ascii', 'customer_email', 'enterprise', 'expires_at', 'issued_at', 'pro', 'seats', 'solo', 'team', 'tier', 'trial', 'utf-8'] \ No newline at end of file diff --git a/.hypothesis/constants/24f84c1a540fc92d b/.hypothesis/constants/24f84c1a540fc92d deleted file mode 100644 index 841fa4b..0000000 --- a/.hypothesis/constants/24f84c1a540fc92d +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/session.py -# hypothesis_version: 6.141.1 - -[8100, '.simdrive', 'Real Device', 'Recorder', 'SIMDRIVE_HOME', 'Session', 'a', 'actions.jsonl', 'active', 'any_booted', 'device', 'device_name', 'hardware_udid', 'host', 'ip', 'localhost', 'os_version', 'port', 'sessions', 'simulator', 'target', 'udid'] \ No newline at end of file diff --git a/.hypothesis/constants/27a521e124dddf17 b/.hypothesis/constants/27a521e124dddf17 deleted file mode 100644 index a3f69f0..0000000 --- a/.hypothesis/constants/27a521e124dddf17 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/errors.py -# hypothesis_version: 6.141.1 - -['action', 'already_recording', 'available', 'cloud_auth_invalid', 'cloud_auth_missing', 'cloud_rate_limited', 'code', 'details', 'error', 'field', 'form', 'hid_unavailable', 'invalid_argument', 'limit_gb', 'message', 'missing_target', 'name', 'no_device', 'no_session', 'not_recording', 'ok', 'path', 'query', 'reason', 'recording_id', 'recording_not_found', 'replay_drift_halt', 'retry_after_seconds', 'session_id', 'sim_unhealthy', 'similarity', 'step_id', 'target_not_found', 'threshold', 'udid', 'used_gb', 'value', 'why'] \ No newline at end of file diff --git a/.hypothesis/constants/29cc4be1ebfcb2de b/.hypothesis/constants/29cc4be1ebfcb2de deleted file mode 100644 index 1c5d4c3..0000000 --- a/.hypothesis/constants/29cc4be1ebfcb2de +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/routes/licenses.py -# hypothesis_version: 6.141.1 - -[0.0, 100.0, 365, 86400, '/licenses/activate', '/licenses/status', '/licenses/usage', 'customer_email', 'enterprise', 'expires_at', 'pro', 'solo', 'team', 'tier', 'trial', 'unknown'] \ No newline at end of file diff --git a/.hypothesis/constants/2a94bf2bdc14a19d b/.hypothesis/constants/2a94bf2bdc14a19d deleted file mode 100644 index ba34a23..0000000 --- a/.hypothesis/constants/2a94bf2bdc14a19d +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.141.1 - -[0.0, 0.04, 0.05, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 1.0, 5.0, 200, 300, 1000, 8100, 86400, ' (dry-run)', ',', '--budget-override', '--dry-run', '--email', '--force', '--help', '--journey', '--journeys-dir', '--json', '--license-path', '--offline-dev', '--path', '--persona-override', '--quiet', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'annotated_path', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'ascii', 'at', 'attempts', 'available', 'backend', 'background', 'backup_path', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'captured_at', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'device_udid', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'dry_run', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'fail', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'json_out', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'lint-recordings', 'lint_recordings', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'marks', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'migrate-recording', 'migrate_recording', 'migrated', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'primary_button_label', 'properties', 'reason', 'recent_logs', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'results', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_b64', 'screenshot_h', 'screenshot_path', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simdrive wda-down', 'simdrive wda-up', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'text_mark_count', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda-down', 'wda-down failed: %s', 'wda-up', 'wda-up failed: %s', 'wda_not_bootstrapped', 'window_bounds_macos', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/2ac5a50af3eca496 b/.hypothesis/constants/2ac5a50af3eca496 deleted file mode 100644 index 16e6830..0000000 --- a/.hypothesis/constants/2ac5a50af3eca496 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/recorder.py -# hypothesis_version: 6.152.4 - -[0.85, 128, 300, '!=', '.', '.simdrive', '; ', '<', '<=', '==', '>', '>=', 'AppRequires', 'InitialStateRequires', 'L', 'RequiresBlock', 'SIMDRIVE_HOME', 'SimRequires', '_capture', '_simdrive_warning', '` to capture one.', 'action', 'actual', 'allow', 'any', 'app', 'app.bundle_id', 'app.version', 'app_bundle_id', 'app_version', 'args', 'bundle_id', 'cancel', 'captured_at', 'confidence_band', 'created_at', 'created_by_session', 'device', "don't allow", 'dont allow', 'drift', 'drifted', 'duration_ms', 'error', 'exact', 'execute_error', 'executed', 'expected', 'fail', 'force', 'foreground', 'h', 'halt', 'halt_reason', 'halted_at', 'high', 'id', 'inf', 'initial_state', 'ios_version', 'key', 'major', 'medium', 'minor', 'name', 'ok', 'os_version', 'path', 'post_screenshot', 'pre_screenshot', 'press_key', 'primary_button_label', 'reason', 'reasons', 'recording finalized', 'recording started', 'recording stopping', 'recording.yaml', 'recording_name', 'recordings', 'remedy', 'replay', 'requires', 'screenshot_h', 'screenshot_w', 'session_id', 'sim', 'sim.device', 'sim.ios_version', 'sim_device', 'simdrive.recorder', 'simdrive_version', 'similarity', 'simulator', 'snapshots', 'ssim_masks', 'stable_id', 'stable_id_loose', 'status', 'steps', 'steps_planned', 'swipe', 'tags', 'tap', 'text', 'text_mark_count', 'text_subset_required', 'threshold', 'type_text', 'version', 'version_match', 'w', 'warn', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/2be61240a0c72d41 b/.hypothesis/constants/2be61240a0c72d41 deleted file mode 100644 index aa8727a..0000000 --- a/.hypothesis/constants/2be61240a0c72d41 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/session.py -# hypothesis_version: 6.152.4 - -[8100, '.simdrive', 'Real Device', 'Recorder', 'SIMDRIVE_HOME', 'Session', 'a', 'actions.jsonl', 'active', 'any_booted', 'device', 'device_name', 'hardware_udid', 'host', 'ip', 'launch_failed', 'localhost', 'os_version', 'port', 'sessions', 'simulator', 'target', 'udid'] \ No newline at end of file diff --git a/.hypothesis/constants/3003084528636ffb b/.hypothesis/constants/3003084528636ffb deleted file mode 100644 index 1658dfb..0000000 --- a/.hypothesis/constants/3003084528636ffb +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/routes/recordings.py -# hypothesis_version: 6.152.4 - -[204, 3600, '/recordings', 'customer_email', 'download_url', 'enterprise', 'journey_slug', 'pro', 'recording_id', 'screenshot_count', 'size_bytes', 'solo', 'team', 'tier', 'trial', 'unknown', 'utf-8'] \ No newline at end of file diff --git a/.hypothesis/constants/30eeb792d2063a6e b/.hypothesis/constants/30eeb792d2063a6e deleted file mode 100644 index 1e54fab..0000000 --- a/.hypothesis/constants/30eeb792d2063a6e +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/keypair.py -# hypothesis_version: 6.141.1 - -['__main__', 'generate'] \ No newline at end of file diff --git a/.hypothesis/constants/340256ea54e4efd1 b/.hypothesis/constants/340256ea54e4efd1 deleted file mode 100644 index dfc8aa4..0000000 --- a/.hypothesis/constants/340256ea54e4efd1 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/journey/claude_client.py -# hypothesis_version: 6.141.1 - -[0.0, 0.004, 0.5, 15.0, 75.0, 200, 1024, 1000000, '.', 'ANTHROPIC_API_KEY', 'args', 'base64', 'claude-opus-4-7', 'confidence', 'content', 'data', 'fail', 'image', 'image/jpeg', 'image/png', 'jpeg', 'jpg', 'media_type', 'rationale', 'rb', 'role', 'source', 'text', 'tool', 'type', 'user'] \ No newline at end of file diff --git a/.hypothesis/constants/34244ea6e2276714 b/.hypothesis/constants/34244ea6e2276714 deleted file mode 100644 index a3f69f0..0000000 --- a/.hypothesis/constants/34244ea6e2276714 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/errors.py -# hypothesis_version: 6.141.1 - -['action', 'already_recording', 'available', 'cloud_auth_invalid', 'cloud_auth_missing', 'cloud_rate_limited', 'code', 'details', 'error', 'field', 'form', 'hid_unavailable', 'invalid_argument', 'limit_gb', 'message', 'missing_target', 'name', 'no_device', 'no_session', 'not_recording', 'ok', 'path', 'query', 'reason', 'recording_id', 'recording_not_found', 'replay_drift_halt', 'retry_after_seconds', 'session_id', 'sim_unhealthy', 'similarity', 'step_id', 'target_not_found', 'threshold', 'udid', 'used_gb', 'value', 'why'] \ No newline at end of file diff --git a/.hypothesis/constants/36f3595d6afa6b8e b/.hypothesis/constants/36f3595d6afa6b8e deleted file mode 100644 index c609bde..0000000 --- a/.hypothesis/constants/36f3595d6afa6b8e +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/mcp_sampling_client.py -# hypothesis_version: 6.152.4 - -[0.0, 0.2, 0.3, 0.5, 0.9, 2048, '.', 'args', 'ascii', 'confidence', 'fail', 'image', 'image/jpeg', 'jpeg', 'jpg', 'rationale', 'text', 'tool', 'type', 'user'] \ No newline at end of file diff --git a/.hypothesis/constants/37f4bacbefc32971 b/.hypothesis/constants/37f4bacbefc32971 deleted file mode 100644 index 912f709..0000000 --- a/.hypothesis/constants/37f4bacbefc32971 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/window.py -# hypothesis_version: 6.141.1 - -[5.0, ',', '-e', 'no_process', 'no_window', 'osascript'] \ No newline at end of file diff --git a/.hypothesis/constants/3815b5dc89a5bdf4 b/.hypothesis/constants/3815b5dc89a5bdf4 deleted file mode 100644 index 5d5c7eb..0000000 --- a/.hypothesis/constants/3815b5dc89a5bdf4 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/cloud/routes/trials.py -# hypothesis_version: 6.141.1 - -[86400, '/trials', 'trial', 'unknown'] \ No newline at end of file diff --git a/.hypothesis/constants/3837c0081e7e3d6d b/.hypothesis/constants/3837c0081e7e3d6d deleted file mode 100644 index 3472e40..0000000 --- a/.hypothesis/constants/3837c0081e7e3d6d +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/robustness.py -# hypothesis_version: 6.152.4 - -[10.0, 200, '*/recording.yaml', 'action', 'all', 'allow', 'allow once', 'app_bundle_id', 'app_version', 'appearance', 'args', 'calendar', 'camera', 'cancel', 'contacts', 'contacts-limited', 'created_at', 'created_by_session', 'dark', 'deny', 'device', "don't allow", 'dont allow', 'error', 'errors', 'failed', 'grant', 'granted', 'health', 'homekit', 'id', 'light', 'location', 'location-always', 'media-library', 'medialibrary', 'microphone', 'modified_at', 'motion', 'name', 'ok', 'os_version', 'path', 'permission', 'photos', 'photos-add', 'post_screenshot', 'pre_screenshot', 'press_key', 'privacy', 'r', 'recording.yaml', 'reminders', 'requires', 'settings', 'simctl', 'simdrive_version', 'siri', 'speech', 'ssim_masks', 'stderr', 'step_count', 'steps', 'swipe', 'tags', 'tap', 'text', 'type_text', 'ui', 'utf-8', 'warnings', 'xcrun'] \ No newline at end of file diff --git a/.hypothesis/constants/38a9e1032e5b467c b/.hypothesis/constants/38a9e1032e5b467c deleted file mode 100644 index e082be6..0000000 --- a/.hypothesis/constants/38a9e1032e5b467c +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/trial.py -# hypothesis_version: 6.152.4 - -['.simdrive', 'email', 'expires_at', 'installed_at', 'last_server_check', 'license.json', 'license_key'] \ No newline at end of file diff --git a/.hypothesis/constants/3ab583c5fe8654bd b/.hypothesis/constants/3ab583c5fe8654bd deleted file mode 100644 index 4b58940..0000000 --- a/.hypothesis/constants/3ab583c5fe8654bd +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/errors.py -# hypothesis_version: 6.141.1 - -['Library', 'MobileDevice', 'body', 'http_status', 'identities', 'last_seen_at', 'log_path', 'missing', 'profiles_dir', 'stderr', 'team_id', 'tool', 'udid', 'wda_build_failed', 'wda_device_locked', 'wda_device_not_ready', 'wda_install_failed', 'wda_not_bootstrapped', 'wda_session_lost', 'wda_smoke_failed', 'xct_code'] \ No newline at end of file diff --git a/.hypothesis/constants/3bf1b2b555f1cd26 b/.hypothesis/constants/3bf1b2b555f1cd26 deleted file mode 100644 index 8130b0d..0000000 --- a/.hypothesis/constants/3bf1b2b555f1cd26 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/runner.py -# hypothesis_version: 6.152.4 - -[0.0, 0.004, 5.0, '## Success Criteria', '.simdrive', 'SIMDRIVE_HOME', '_', 'agent_trace.jsonl', 'budget_exceeded', 'clear_field', 'cost_usd', 'crashed', 'crashes', 'done', 'error', 'fail', 'failed', 'llm_calls', 'passed', 'press_key', 'runs', 'screenshot_path', 'seconds', 'session_id', 'since', 'started_at', 'steps', 'summary.json', 'summary.md', 'swipe', 'tap', 'type_text', 'unknown', 'w', '✓', '✗'] \ No newline at end of file diff --git a/.hypothesis/constants/3eaa4ebb789e065e b/.hypothesis/constants/3eaa4ebb789e065e deleted file mode 100644 index 0ae164f..0000000 --- a/.hypothesis/constants/3eaa4ebb789e065e +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/cloud/config.py -# hypothesis_version: 6.141.1 - -[100, 1024, 8080, '/tmp/simdrive-cloud', '0.0.0.0', 'CloudConfig', 'enterprise', 'pro', 'solo', 'team', 'trial'] \ No newline at end of file diff --git a/.hypothesis/constants/3fe834e9e7ea58bb b/.hypothesis/constants/3fe834e9e7ea58bb deleted file mode 100644 index 9ec5fcb..0000000 --- a/.hypothesis/constants/3fe834e9e7ea58bb +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.152.4 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, 1000, 8100, 86400, ',', '--budget-override', '--email', '--help', '--journey', '--journeys-dir', '--license-path', '--offline-dev', '--persona-override', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'annotated_path', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'ascii', 'at', 'attempts', 'available', 'backend', 'background', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'captured_at', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'device_udid', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'marks', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'properties', 'recent_logs', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_b64', 'screenshot_h', 'screenshot_path', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda_not_bootstrapped', 'window_bounds_macos', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/4360b98b57bdca2c b/.hypothesis/constants/4360b98b57bdca2c deleted file mode 100644 index 97f08ba..0000000 --- a/.hypothesis/constants/4360b98b57bdca2c +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/cloud/routes/recordings.py -# hypothesis_version: 6.141.1 - -[3600, '/recordings', 'customer_email', 'unknown', 'utf-8'] \ No newline at end of file diff --git a/.hypothesis/constants/47e278142c9b12b4 b/.hypothesis/constants/47e278142c9b12b4 deleted file mode 100644 index d4de539..0000000 --- a/.hypothesis/constants/47e278142c9b12b4 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/observability/__init__.py -# hypothesis_version: 6.141.1 - -['MetricsRegistry', 'Span', 'TraceContext', 'configure_logging', 'dump_metrics', 'get_logger', 'get_registry', 'increment_counter', 'record_histogram', 'start_span'] \ No newline at end of file diff --git a/.hypothesis/constants/4852d1ecf2140227 b/.hypothesis/constants/4852d1ecf2140227 deleted file mode 100644 index b678eb4..0000000 --- a/.hypothesis/constants/4852d1ecf2140227 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/ci.py -# hypothesis_version: 6.152.4 - -[0.0, 4096, '.simdrive/journeys', '.simdrive/personas', '_end', 'agent_trace.jsonl', 'budget_exceeded', 'ci_summary_path', 'classname', 'crashed', 'error', 'errors', 'failed', 'failed_journey_names', 'failure', 'failures', 'junit_xml_path', 'message', 'name', 'passed', 'simdrive-journeys', 'simdrive.journey', 'simdrive.journey.ci', 'system-out', 'testcase', 'tests', 'testsuite', 'time', 'total', 'total_llm_cost_usd', 'type', 'utf-8'] \ No newline at end of file diff --git a/.hypothesis/constants/49fe5af9676d019a b/.hypothesis/constants/49fe5af9676d019a deleted file mode 100644 index 2d2f103..0000000 --- a/.hypothesis/constants/49fe5af9676d019a +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/device.py -# hypothesis_version: 6.141.1 - -[0.1, 2.0, 5.0, 10.0, 15.0, 30.0, 120.0, '-', '--bundle-id', '--device', '--json-output', '--quiet', '--signal', '-u', '.json', '/dev/null', '/dev/stdout', '; ', '', 'Developer disk image', 'Invalid service', 'SIGTERM', 'app', 'available', 'connectionProperties', 'developerModeStatus', 'device', 'device offline', 'deviceProperties', 'devicectl', 'devices', 'disabled', 'disconnected', 'hardwareProperties', 'idevice_id', 'ideviceimagemounter', 'idevicescreenshot', 'idevicesyslog', 'install', 'lastConnectionDate', 'launch', 'list', 'marketingName', 'name', 'no transport', 'not paired', 'pairingState', 'process', 'processIdentifier', 'productType', 'result', 'signal', 'transportType', 'tunnel disconnected', 'tunnelState', 'udid', 'unavailable', 'unpaired', 'xcrun'] \ No newline at end of file diff --git a/.hypothesis/constants/4a8f9aa36e5b7ee8 b/.hypothesis/constants/4a8f9aa36e5b7ee8 deleted file mode 100644 index 01304ec..0000000 --- a/.hypothesis/constants/4a8f9aa36e5b7ee8 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/cloud/storage/__init__.py -# hypothesis_version: 6.141.1 - -[] \ No newline at end of file diff --git a/.hypothesis/constants/4c8f8b18e57c3e1e b/.hypothesis/constants/4c8f8b18e57c3e1e deleted file mode 100644 index ce1bdb7..0000000 --- a/.hypothesis/constants/4c8f8b18e57c3e1e +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/recorder.py -# hypothesis_version: 6.152.4 - -[0.85, 128, 300, '!=', '.', '.simdrive', '; ', '<', '<=', '==', '>', '>=', 'AppRequires', 'InitialStateRequires', 'L', 'RequiresBlock', 'SIMDRIVE_HOME', 'SimRequires', '_capture', '_simdrive_warning', '` to capture one.', 'action', 'actual', 'allow', 'any', 'app', 'app.bundle_id', 'app.version', 'app_bundle_id', 'app_version', 'args', 'bundle_id', 'cancel', 'captured_at', 'confidence_band', 'created_at', 'created_by_session', 'device', "don't allow", 'dont allow', 'drift', 'drifted', 'duration_ms', 'error', 'exact', 'execute_error', 'executed', 'expected', 'force', 'foreground', 'h', 'halt', 'halt_reason', 'halted_at', 'high', 'id', 'inf', 'initial_state', 'ios_version', 'key', 'major', 'medium', 'minor', 'name', 'ok', 'os_version', 'post_screenshot', 'pre_screenshot', 'press_key', 'primary_button_label', 'reasons', 'recording finalized', 'recording started', 'recording stopping', 'recording.yaml', 'recording_name', 'recordings', 'remedy', 'replay', 'requires', 'screenshot_h', 'screenshot_w', 'session_id', 'sim', 'sim.device', 'sim.ios_version', 'simdrive.recorder', 'simdrive_version', 'similarity', 'simulator', 'snapshots', 'ssim_masks', 'stable_id', 'stable_id_loose', 'steps', 'steps_planned', 'swipe', 'tags', 'tap', 'text', 'text_subset_required', 'threshold', 'type_text', 'version', 'version_match', 'w', 'warn', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/4d708f828bb70984 b/.hypothesis/constants/4d708f828bb70984 deleted file mode 100644 index 1ed592c..0000000 --- a/.hypothesis/constants/4d708f828bb70984 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/app.py -# hypothesis_version: 6.152.4 - -['/health', '/v1', 'SELECT 1', 'SimDrive Cloud API', 'db_reachable', 'ok', 'simdrive', 'simdrive_r2_', 'sqlite://', 'status', 'storage_backend', 'unknown', 'version'] \ No newline at end of file diff --git a/.hypothesis/constants/50e134c034c2f268 b/.hypothesis/constants/50e134c034c2f268 deleted file mode 100644 index a8bfa6c..0000000 --- a/.hypothesis/constants/50e134c034c2f268 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/device.py -# hypothesis_version: 6.141.1 - -[2.0, 5.0, 10.0, 15.0, 30.0, 120.0, '-', '--bundle-id', '--device', '--json-output', '--match', '--quiet', '--signal', '-u', '.json', '/dev/null', '/dev/stdout', '; ', '', 'Developer disk image', 'Invalid service', 'SIGTERM', 'app', 'available', 'connectionProperties', 'developerModeStatus', 'device', 'device offline', 'deviceProperties', 'devicectl', 'devices', 'disabled', 'disconnected', 'hardwareProperties', 'idevice_id', 'ideviceimagemounter', 'idevicescreenshot', 'idevicesyslog', 'install', 'lastConnectionDate', 'launch', 'list', 'marketingName', 'name', 'no transport', 'not paired', 'pairingState', 'process', 'processIdentifier', 'productType', 'result', 'signal', 'transportType', 'tunnel disconnected', 'tunnelState', 'udid', 'unavailable', 'unpaired', 'xcrun'] \ No newline at end of file diff --git a/.hypothesis/constants/52fd13a2cac6fcc1 b/.hypothesis/constants/52fd13a2cac6fcc1 deleted file mode 100644 index 2ddca8e..0000000 --- a/.hypothesis/constants/52fd13a2cac6fcc1 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/cloud/auth.py -# hypothesis_version: 6.141.1 - -['Authorization', 'Bearer', 'Bearer ', 'WWW-Authenticate'] \ No newline at end of file diff --git a/.hypothesis/constants/53305e968b1d0aef b/.hypothesis/constants/53305e968b1d0aef deleted file mode 100644 index 0d58ca5..0000000 --- a/.hypothesis/constants/53305e968b1d0aef +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/hid_inject.py -# hypothesis_version: 6.152.4 - -[5.0, 15.0, 1000.0, '_bin', 'button', 'chord', 'down', 'home', 'key', 'lock', 'side', 'simdrive-input', 'siri', 'size', 'tap', 'text', 'up'] \ No newline at end of file diff --git a/.hypothesis/constants/5472a5f98a7c3888 b/.hypothesis/constants/5472a5f98a7c3888 deleted file mode 100644 index 1f65ec2..0000000 --- a/.hypothesis/constants/5472a5f98a7c3888 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/cloud/routes/licenses.py -# hypothesis_version: 6.141.1 - -[365, 86400, '/licenses/activate', '/licenses/status', 'enterprise', 'expires_at', 'pro', 'solo', 'team', 'tier', 'trial'] \ No newline at end of file diff --git a/.hypothesis/constants/54c34bb108d033f4 b/.hypothesis/constants/54c34bb108d033f4 deleted file mode 100644 index 9ebac7b..0000000 --- a/.hypothesis/constants/54c34bb108d033f4 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/diagnostics.py -# hypothesis_version: 6.152.4 - -[0.0, 5.0, 10.0, 15.0, 200, '-', '--json', '-convert', '-o', '-p', '.ips', 'CFBundleDisplayName', 'CFBundleName', 'CFBundleVersion', 'DiagnosticReports', 'Library', 'Logs', 'Path', 'app_name', 'backtrace', 'booted', 'bug_type', 'bundleID', 'bundle_id', 'checks', 'crashing_thread', 'detail', 'devices', 'exception', 'foreground', 'frames', 'hid_helper', 'id', 'json', 'launchctl', 'list', 'listapps', 'mtime', 'name', 'no path', 'not-running', 'ok', 'path', 'pid', 'plutil', 'r', 'replace', 'runtimes', 'simctl', 'simctl_runtimes', 'spawn', 'state', 'threads', 'timestamp', 'triggered', 'udid', 'utf-8', 'version', 'xcode-select', 'xcode_select', 'xcrun'] \ No newline at end of file diff --git a/.hypothesis/constants/54e3be75cdd4f73a b/.hypothesis/constants/54e3be75cdd4f73a deleted file mode 100644 index 2fb00d5..0000000 --- a/.hypothesis/constants/54e3be75cdd4f73a +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/journey/schema.py -# hypothesis_version: 6.141.1 - -[180, 'DeviceSelector', 'SuccessCriterion', 'after', 'background', 'device', 'foreground', 'name', 'not_running', 'persona', 'schema_version', 'simulator'] \ No newline at end of file diff --git a/.hypothesis/constants/5b37d8fedd61eda4 b/.hypothesis/constants/5b37d8fedd61eda4 deleted file mode 100644 index f53bf68..0000000 --- a/.hypothesis/constants/5b37d8fedd61eda4 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/device.py -# hypothesis_version: 6.152.4 - -[1.0, 2.0, 5.0, 10.0, 15.0, 30.0, 120.0, '-', '--bundle-id', '--device', '--json-output', '--quiet', '--signal', '-u', '.json', '/dev/null', '/dev/stdout', '; ', '', 'Developer disk image', 'Invalid service', 'SIGTERM', 'app', 'available', 'connectionProperties', 'developerModeStatus', 'device', 'device offline', 'deviceProperties', 'devicectl', 'devices', 'disabled', 'disconnected', 'hardwareProperties', 'idevice_id', 'ideviceimagemounter', 'idevicescreenshot', 'idevicesyslog', 'install', 'lastConnectionDate', 'launch', 'list', 'marketingName', 'name', 'no transport', 'not paired', 'pairingState', 'process', 'processIdentifier', 'productType', 'result', 'signal', 'transportType', 'tunnel disconnected', 'tunnelState', 'udid', 'unavailable', 'unpaired', 'xcrun'] \ No newline at end of file diff --git a/.hypothesis/constants/5c37177911cd9b90 b/.hypothesis/constants/5c37177911cd9b90 deleted file mode 100644 index b5d2cb0..0000000 --- a/.hypothesis/constants/5c37177911cd9b90 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/client.py -# hypothesis_version: 6.141.1 - -[30.0, 1000.0, 300, '/element/active', '/screenshot', '/session', '/source', '/status', '/wda/keys', '/wda/pressButton', '/wda/tap', '/window/size', 'DELETE', 'ELEMENT', 'GET', 'POST', 'alwaysMatch', 'body', 'bundleId', 'capabilities', 'duration', 'exc', 'fromX', 'fromY', 'height', 'home', 'host', 'lock', 'method', 'name', 'port', 'power', 'response', 'sessionId', 'status', 'toX', 'toY', 'url', 'value', 'volumeDown', 'volumeUp', 'volumedown', 'volumeup', 'wda_http_error', 'wda_session_not_open', 'wda_unknown_button', 'wda_unreachable', 'width', 'x', 'y', '\ue003'] \ No newline at end of file diff --git a/.hypothesis/constants/5d20f7f5263cbc0f b/.hypothesis/constants/5d20f7f5263cbc0f deleted file mode 100644 index 57278f6..0000000 --- a/.hypothesis/constants/5d20f7f5263cbc0f +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/journey/__init__.py -# hypothesis_version: 6.141.1 - -[] \ No newline at end of file diff --git a/.hypothesis/constants/5f75b35de93f8f47 b/.hypothesis/constants/5f75b35de93f8f47 deleted file mode 100644 index 3260b0c..0000000 --- a/.hypothesis/constants/5f75b35de93f8f47 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/cloud/__init__.py -# hypothesis_version: 6.141.1 - -[] \ No newline at end of file diff --git a/.hypothesis/constants/6246e208e1423c81 b/.hypothesis/constants/6246e208e1423c81 deleted file mode 100644 index ad806da..0000000 --- a/.hypothesis/constants/6246e208e1423c81 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/.venv/bin/pytest -# hypothesis_version: 6.152.4 - -['.exe', '__main__'] \ No newline at end of file diff --git a/.hypothesis/constants/63c2634c01b703d5 b/.hypothesis/constants/63c2634c01b703d5 deleted file mode 100644 index 99e5bd9..0000000 --- a/.hypothesis/constants/63c2634c01b703d5 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.141.1 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, 1000, 8100, 86400, ' (dry-run)', ',', '--budget-override', '--dry-run', '--email', '--force', '--help', '--journey', '--journeys-dir', '--json', '--license-path', '--offline-dev', '--path', '--persona-override', '--quiet', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'annotated_path', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'ascii', 'at', 'attempts', 'available', 'backend', 'background', 'backup_path', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'captured_at', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'device_udid', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'dry_run', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'fail', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'json_out', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'lint-recordings', 'lint_recordings', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'marks', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'migrate-recording', 'migrate_recording', 'migrated', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'primary_button_label', 'properties', 'reason', 'recent_logs', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'results', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_b64', 'screenshot_h', 'screenshot_path', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simdrive wda-down', 'simdrive wda-up', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'text_mark_count', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda-down', 'wda-down failed: %s', 'wda-up', 'wda-up failed: %s', 'wda_not_bootstrapped', 'window_bounds_macos', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/643df37c90f8ceec b/.hypothesis/constants/643df37c90f8ceec deleted file mode 100644 index 3506402..0000000 --- a/.hypothesis/constants/643df37c90f8ceec +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/cloud/db/__init__.py -# hypothesis_version: 6.141.1 - -[] \ No newline at end of file diff --git a/.hypothesis/constants/68192632ea959a39 b/.hypothesis/constants/68192632ea959a39 deleted file mode 100644 index ba4891c..0000000 --- a/.hypothesis/constants/68192632ea959a39 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/db/models.py -# hypothesis_version: 6.152.4 - -[255, 512, 'check_same_thread', 'connect_args', 'license_activations', 'poolclass', 'recordings', 'sqlite://', 'sqlite:///', 'trial_activations'] \ No newline at end of file diff --git a/.hypothesis/constants/6bd49704c253148b b/.hypothesis/constants/6bd49704c253148b deleted file mode 100644 index b5d2cb0..0000000 --- a/.hypothesis/constants/6bd49704c253148b +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/client.py -# hypothesis_version: 6.141.1 - -[30.0, 1000.0, 300, '/element/active', '/screenshot', '/session', '/source', '/status', '/wda/keys', '/wda/pressButton', '/wda/tap', '/window/size', 'DELETE', 'ELEMENT', 'GET', 'POST', 'alwaysMatch', 'body', 'bundleId', 'capabilities', 'duration', 'exc', 'fromX', 'fromY', 'height', 'home', 'host', 'lock', 'method', 'name', 'port', 'power', 'response', 'sessionId', 'status', 'toX', 'toY', 'url', 'value', 'volumeDown', 'volumeUp', 'volumedown', 'volumeup', 'wda_http_error', 'wda_session_not_open', 'wda_unknown_button', 'wda_unreachable', 'width', 'x', 'y', '\ue003'] \ No newline at end of file diff --git a/.hypothesis/constants/6e849b463d4efa5f b/.hypothesis/constants/6e849b463d4efa5f deleted file mode 100644 index ba34a23..0000000 --- a/.hypothesis/constants/6e849b463d4efa5f +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.141.1 - -[0.0, 0.04, 0.05, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 1.0, 5.0, 200, 300, 1000, 8100, 86400, ' (dry-run)', ',', '--budget-override', '--dry-run', '--email', '--force', '--help', '--journey', '--journeys-dir', '--json', '--license-path', '--offline-dev', '--path', '--persona-override', '--quiet', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'annotated_path', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'ascii', 'at', 'attempts', 'available', 'backend', 'background', 'backup_path', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'captured_at', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'device_udid', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'dry_run', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'fail', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'json_out', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'lint-recordings', 'lint_recordings', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'marks', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'migrate-recording', 'migrate_recording', 'migrated', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'primary_button_label', 'properties', 'reason', 'recent_logs', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'results', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_b64', 'screenshot_h', 'screenshot_path', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simdrive wda-down', 'simdrive wda-up', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'text_mark_count', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda-down', 'wda-down failed: %s', 'wda-up', 'wda-up failed: %s', 'wda_not_bootstrapped', 'window_bounds_macos', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/732558cb17a5ade8 b/.hypothesis/constants/732558cb17a5ade8 deleted file mode 100644 index 14e7e6e..0000000 --- a/.hypothesis/constants/732558cb17a5ade8 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/bootstrap.py -# hypothesis_version: 6.141.1 - -[0.1, 2.0, 5.0, 10.0, 15.0, 120, 8100, '"', '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%SZ', '%b %d %H:%M:%S %Y %Z', '(\n)', '()', ', ', '-', '--bundle-id', '--device', '--json-output', '--project-directory', '-C', '-c', '-derivedDataPath', '-destination', '-noout', '-p', '-project', '-scheme', '-startdate', '-v', '-xctestrun', '.simdrive', '=', 'Apple Development', 'CSSMERR', 'Code 41', 'Code=41', 'PINNED_SHA.txt', 'Podfile', 'REPO=', 'REVOKED', 'SHA=', 'WDA READY', 'WDA_REGISTRY_DIR', 'WebDriverAgentRunner', 'XCTDaemonErrorDomain', '\\(([A-Z0-9]{10})\\)', 'alwaysMatch', 'app', 'build-for-testing', 'build.log', 'bundle', 'bundleId', 'capabilities', 'checkout', 'clone', 'clone.log', 'codesigning', 'com.apple.dt.Xcode', 'connectionProperties', 'coredevice_uuid', 'ddiServicesAvailable', 'defaults', 'derived', 'derived_data', 'details', 'developerModeStatus', 'device', 'deviceProperties', 'device_name', 'devicectl', 'enabled', 'exec', 'find-certificate', 'find-identity', 'git', 'hardwareProperties', 'hardware_udid', 'host', 'idevicepair', 'info', 'install', 'install_path', 'ios', 'ip', 'last_built_at', 'name', 'notBefore=', 'openssl', 'os', 'os_version', 'paired', 'pairingState', 'pod', 'port', 'project.pbxproj', 'r', 'read', 'ready', 'replace', 'result', 'security', 'sessionId', 'sha1', 'signing_identity', 'source', 'team_id', 'udid', 'uninstall', 'utf-8', 'value', 'version', 'w', 'wda', 'wda_bundle_id', 'x509', 'xcodebuild', 'xcrun', 'xctestrun_path', '{\n}', '{}'] \ No newline at end of file diff --git a/.hypothesis/constants/75ca39db6beb503d b/.hypothesis/constants/75ca39db6beb503d deleted file mode 100644 index ad82866..0000000 --- a/.hypothesis/constants/75ca39db6beb503d +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.152.4 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, 1000, 8100, 86400, ',', '--budget-override', '--email', '--help', '--journey', '--journeys-dir', '--license-path', '--offline-dev', '--persona-override', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'annotated_path', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'ascii', 'at', 'attempts', 'available', 'backend', 'background', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'captured_at', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'device_udid', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'marks', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'properties', 'recent_logs', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_b64', 'screenshot_h', 'screenshot_path', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simdrive wda-down', 'simdrive wda-up', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda-down', 'wda-down failed: %s', 'wda-up', 'wda-up failed: %s', 'wda_not_bootstrapped', 'window_bounds_macos', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/769037291a3d9e87 b/.hypothesis/constants/769037291a3d9e87 deleted file mode 100644 index ca820f0..0000000 --- a/.hypothesis/constants/769037291a3d9e87 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/storage/r2.py -# hypothesis_version: 6.152.4 - -['/tmp/simdrive-cloud', '404', 'Body', 'Bucket', 'Code', 'Contents', 'Error', 'Key', 'NoSuchKey', 'R2_ACCESS_KEY_ID', 'R2_ACCOUNT_ID', 'R2_BUCKET', 'R2_SECRET_ACCESS_KEY', 'STORAGE_BACKEND', 'auto', 'get_object', 'list_objects_v2', 's3', 'stub'] \ No newline at end of file diff --git a/.hypothesis/constants/78e393eb2b0faa48 b/.hypothesis/constants/78e393eb2b0faa48 deleted file mode 100644 index bbee9c7..0000000 --- a/.hypothesis/constants/78e393eb2b0faa48 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/journey/prompt.py -# hypothesis_version: 6.141.1 - -['(none)', ',', ':', 'marks', 'recent_logs', 'screenshot_path', 'stable_id', 'text'] \ No newline at end of file diff --git a/.hypothesis/constants/79faeb571a5310f8 b/.hypothesis/constants/79faeb571a5310f8 deleted file mode 100644 index a768562..0000000 --- a/.hypothesis/constants/79faeb571a5310f8 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/errors.py -# hypothesis_version: 6.152.4 - -['action', 'already_recording', 'available', 'bundle_id', 'cloud_auth_invalid', 'cloud_auth_missing', 'cloud_rate_limited', 'code', 'details', 'device_launch_failed', 'error', 'field', 'form', 'hid_unavailable', 'invalid_argument', 'limit_gb', 'message', 'missing_target', 'name', 'no_device', 'no_session', 'not_recording', 'ok', 'path', 'query', 'reason', 'recording_id', 'recording_not_found', 'replay_drift_halt', 'retry_after_seconds', 'session_id', 'sim_unhealthy', 'similarity', 'step_id', 'target_not_found', 'threshold', 'udid', 'used_gb', 'value', 'why'] \ No newline at end of file diff --git a/.hypothesis/constants/7c542dca691cb9dd b/.hypothesis/constants/7c542dca691cb9dd deleted file mode 100644 index b678eb4..0000000 --- a/.hypothesis/constants/7c542dca691cb9dd +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/ci.py -# hypothesis_version: 6.152.4 - -[0.0, 4096, '.simdrive/journeys', '.simdrive/personas', '_end', 'agent_trace.jsonl', 'budget_exceeded', 'ci_summary_path', 'classname', 'crashed', 'error', 'errors', 'failed', 'failed_journey_names', 'failure', 'failures', 'junit_xml_path', 'message', 'name', 'passed', 'simdrive-journeys', 'simdrive.journey', 'simdrive.journey.ci', 'system-out', 'testcase', 'tests', 'testsuite', 'time', 'total', 'total_llm_cost_usd', 'type', 'utf-8'] \ No newline at end of file diff --git a/.hypothesis/constants/7e0d8cd1196f134c b/.hypothesis/constants/7e0d8cd1196f134c deleted file mode 100644 index 16c036f..0000000 --- a/.hypothesis/constants/7e0d8cd1196f134c +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/license/validator.py -# hypothesis_version: 6.141.1 - -[86400, '.', '=', 'ascii', 'expires_at'] \ No newline at end of file diff --git a/.hypothesis/constants/7e24bb22a865cce1 b/.hypothesis/constants/7e24bb22a865cce1 deleted file mode 100644 index 2a303eb..0000000 --- a/.hypothesis/constants/7e24bb22a865cce1 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.152.4 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, 1000, 8100, 86400, ' (dry-run)', ',', '--budget-override', '--dry-run', '--email', '--force', '--help', '--journey', '--journeys-dir', '--json', '--license-path', '--offline-dev', '--path', '--persona-override', '--quiet', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'annotated_path', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'ascii', 'at', 'attempts', 'available', 'backend', 'background', 'backup_path', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'captured_at', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'device_udid', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'dry_run', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'fail', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'json_out', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'lint-recordings', 'lint_recordings', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'marks', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'migrate-recording', 'migrate_recording', 'migrated', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'primary_button_label', 'properties', 'reason', 'recent_logs', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'results', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_b64', 'screenshot_h', 'screenshot_path', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simdrive wda-down', 'simdrive wda-up', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'text_mark_count', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda-down', 'wda-down failed: %s', 'wda-up', 'wda-up failed: %s', 'wda_not_bootstrapped', 'window_bounds_macos', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/7e377cc9b9c550c3 b/.hypothesis/constants/7e377cc9b9c550c3 deleted file mode 100644 index ad82866..0000000 --- a/.hypothesis/constants/7e377cc9b9c550c3 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.152.4 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, 1000, 8100, 86400, ',', '--budget-override', '--email', '--help', '--journey', '--journeys-dir', '--license-path', '--offline-dev', '--persona-override', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'annotated_path', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'ascii', 'at', 'attempts', 'available', 'backend', 'background', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'captured_at', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'device_udid', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'marks', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'properties', 'recent_logs', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_b64', 'screenshot_h', 'screenshot_path', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simdrive wda-down', 'simdrive wda-up', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda-down', 'wda-down failed: %s', 'wda-up', 'wda-up failed: %s', 'wda_not_bootstrapped', 'window_bounds_macos', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/809323a591ca6992 b/.hypothesis/constants/809323a591ca6992 deleted file mode 100644 index 657c5ce..0000000 --- a/.hypothesis/constants/809323a591ca6992 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/__init__.py -# hypothesis_version: 6.141.1 - -['0.0.0+local', 'simdrive'] \ No newline at end of file diff --git a/.hypothesis/constants/80c4ff665d927b85 b/.hypothesis/constants/80c4ff665d927b85 deleted file mode 100644 index e98c466..0000000 --- a/.hypothesis/constants/80c4ff665d927b85 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/__init__.py -# hypothesis_version: 6.152.4 - -['1.0.0a2'] \ No newline at end of file diff --git a/.hypothesis/constants/8157963a9e0574ec b/.hypothesis/constants/8157963a9e0574ec deleted file mode 100644 index 6b6fc52..0000000 --- a/.hypothesis/constants/8157963a9e0574ec +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/errors.py -# hypothesis_version: 6.141.1 - -['act_tool_failed', 'attempt', 'cap_usd', 'ci_invalid_journey', 'claude_call_failed', 'claude_cost_cap_hit', 'cost_usd', 'criterion_type', 'inner_code', 'inner_message', 'journey_name', 'journeys_dir', 'llm_calls', 'path', 'persona_slug', 'personas_dir', 'reason', 'seconds', 'steps', 'supported', 'tag_filter', 'tool_name', 'version'] \ No newline at end of file diff --git a/.hypothesis/constants/81fefad85d591123 b/.hypothesis/constants/81fefad85d591123 deleted file mode 100644 index 841fa4b..0000000 --- a/.hypothesis/constants/81fefad85d591123 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/session.py -# hypothesis_version: 6.141.1 - -[8100, '.simdrive', 'Real Device', 'Recorder', 'SIMDRIVE_HOME', 'Session', 'a', 'actions.jsonl', 'active', 'any_booted', 'device', 'device_name', 'hardware_udid', 'host', 'ip', 'localhost', 'os_version', 'port', 'sessions', 'simulator', 'target', 'udid'] \ No newline at end of file diff --git a/.hypothesis/constants/82d4f74643a935d0 b/.hypothesis/constants/82d4f74643a935d0 deleted file mode 100644 index 645a31e..0000000 --- a/.hypothesis/constants/82d4f74643a935d0 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.152.4 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, 8100, ',', '--budget-override', '--help', '--journey', '--journeys-dir', '--persona-override', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'PATH', 'WdaClient', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'at', 'attempts', 'available', 'backend', 'background', 'baseline', 'boolean', 'bootstrap-device', 'budget_override', 'bundle_id', 'capture_logs', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'go', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey_path', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'light', 'lines', 'list_devices', 'list_replays', 'loaded_at', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'properties', 'record_start', 'record_stop', 'recording', 'replay', 'replays', 'required', 'resolved', 'resolved_via', 'retries', 'return', 'run', 'run_journey', 'screen_x', 'screen_y', 'screenshot_h', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'simdrive', 'simulator', 'since_session_start', 'som.Mark | None', 'space', 'specterqa-ios', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'state', 'step_id', 'steps', 'store_true', 'string', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'terminate_app', 'text', 'threads', 'to', 'transport', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda_not_bootstrapped', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/8580b83b53138083 b/.hypothesis/constants/8580b83b53138083 deleted file mode 100644 index c3d4371..0000000 --- a/.hypothesis/constants/8580b83b53138083 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/client.py -# hypothesis_version: 6.141.1 - -[30.0, 1000.0, 300, '/element/active', '/screenshot', '/session', '/source', '/status', '/wda/keys', '/wda/pressButton', '/wda/tap', 'DELETE', 'ELEMENT', 'GET', 'POST', 'alwaysMatch', 'body', 'bundleId', 'capabilities', 'duration', 'exc', 'fromX', 'fromY', 'home', 'host', 'lock', 'method', 'name', 'port', 'power', 'response', 'sessionId', 'status', 'toX', 'toY', 'url', 'value', 'volumeDown', 'volumeUp', 'volumedown', 'volumeup', 'wda_http_error', 'wda_session_not_open', 'wda_unknown_button', 'wda_unreachable', 'x', 'y', '\ue003'] \ No newline at end of file diff --git a/.hypothesis/constants/86bd9091f6ecc8a6 b/.hypothesis/constants/86bd9091f6ecc8a6 deleted file mode 100644 index 8130b0d..0000000 --- a/.hypothesis/constants/86bd9091f6ecc8a6 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/runner.py -# hypothesis_version: 6.152.4 - -[0.0, 0.004, 5.0, '## Success Criteria', '.simdrive', 'SIMDRIVE_HOME', '_', 'agent_trace.jsonl', 'budget_exceeded', 'clear_field', 'cost_usd', 'crashed', 'crashes', 'done', 'error', 'fail', 'failed', 'llm_calls', 'passed', 'press_key', 'runs', 'screenshot_path', 'seconds', 'session_id', 'since', 'started_at', 'steps', 'summary.json', 'summary.md', 'swipe', 'tap', 'type_text', 'unknown', 'w', '✓', '✗'] \ No newline at end of file diff --git a/.hypothesis/constants/8a89bcbaa35128c1 b/.hypothesis/constants/8a89bcbaa35128c1 deleted file mode 100644 index 19efeb8..0000000 --- a/.hypothesis/constants/8a89bcbaa35128c1 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/cloud/db/models.py -# hypothesis_version: 6.141.1 - -[255, 512, 'check_same_thread', 'connect_args', 'license_activations', 'poolclass', 'recordings', 'sqlite://', 'sqlite:///', 'trial_activations'] \ No newline at end of file diff --git a/.hypothesis/constants/8aa7cdd6a53cc03a b/.hypothesis/constants/8aa7cdd6a53cc03a deleted file mode 100644 index 34edfe1..0000000 --- a/.hypothesis/constants/8aa7cdd6a53cc03a +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/observability/tracing.py -# hypothesis_version: 6.141.1 - -[1000.0, 'Span', 'duration_ms', 'ended_at', 'metadata', 'name', 'parent_span_id', 'span_id', 'started_at'] \ No newline at end of file diff --git a/.hypothesis/constants/8abf18f77c2a66a6 b/.hypothesis/constants/8abf18f77c2a66a6 deleted file mode 100644 index 826021d..0000000 --- a/.hypothesis/constants/8abf18f77c2a66a6 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/observe.py -# hypothesis_version: 6.141.1 - -[1000.0, 1000, '.json', 'annotated_path', 'captured_at', 'device', 'height', 'latency_ms', 'marks', 'marks_count', 'observe complete', 'observe_latency_ms', 'recent_logs', 'screenshot_path', 'simdrive.observe', 'simulator', 'target', 'udid', 'width', 'window_bounds_macos', 'x', 'y'] \ No newline at end of file diff --git a/.hypothesis/constants/8c0f345c08dd1f86 b/.hypothesis/constants/8c0f345c08dd1f86 deleted file mode 100644 index c3dfd3c..0000000 --- a/.hypothesis/constants/8c0f345c08dd1f86 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/bootstrap.py -# hypothesis_version: 6.141.1 - -[0.1, 2.0, 5.0, 10.0, 15.0, 120, 8100, '"', '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%SZ', '%b %d %H:%M:%S %Y %Z', '(\n)', '()', ', ', '-', '--bundle-id', '--device', '--json-output', '--project-directory', '-C', '-c', '-derivedDataPath', '-destination', '-noout', '-p', '-project', '-scheme', '-startdate', '-v', '-xctestrun', '.simdrive', '=', 'Apple Development', 'CSSMERR', 'Code=41', 'PINNED_SHA.txt', 'Podfile', 'REPO=', 'REVOKED', 'SHA=', 'WDA READY', 'WDA_REGISTRY_DIR', 'WebDriverAgentRunner', 'XCTDaemonErrorDomain', '\\(([A-Z0-9]{10})\\)', 'alwaysMatch', 'app', 'build-for-testing', 'build.log', 'bundle', 'bundleId', 'capabilities', 'checkout', 'clone', 'clone.log', 'codesigning', 'com.apple.dt.Xcode', 'connectionProperties', 'coredevice_uuid', 'ddiServicesAvailable', 'defaults', 'derived', 'derived_data', 'details', 'developerModeStatus', 'device', 'deviceProperties', 'device_name', 'devicectl', 'enabled', 'exec', 'find-certificate', 'find-identity', 'git', 'hardwareProperties', 'hardware_udid', 'host', 'idevicepair', 'info', 'install', 'install_path', 'ios', 'ip', 'last_built_at', 'name', 'notBefore=', 'openssl', 'os', 'os_version', 'paired', 'pairingState', 'pod', 'port', 'project.pbxproj', 'r', 'read', 'ready', 'replace', 'result', 'security', 'sessionId', 'sha1', 'signing_identity', 'source', 'team_id', 'udid', 'uninstall', 'utf-8', 'value', 'version', 'w', 'wda', 'wda_bundle_id', 'x509', 'xcodebuild', 'xcrun', 'xctestrun_path', '{\n}', '{}'] \ No newline at end of file diff --git a/.hypothesis/constants/8c4469a985d53ea4 b/.hypothesis/constants/8c4469a985d53ea4 deleted file mode 100644 index 07caf76..0000000 --- a/.hypothesis/constants/8c4469a985d53ea4 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/journey/runner.py -# hypothesis_version: 6.141.1 - -[0.0, 0.004, 5.0, '## Success Criteria', '.simdrive', 'SIMDRIVE_HOME', '_', 'agent_trace.jsonl', 'budget_exceeded', 'clear_field', 'cost_usd', 'crashed', 'crashes', 'done', 'error', 'fail', 'failed', 'llm_calls', 'passed', 'press_key', 'runs', 'screenshot_path', 'seconds', 'session_id', 'since', 'started_at', 'steps', 'summary.json', 'summary.md', 'swipe', 'tap', 'type_text', 'unknown', 'w', '✓', '✗'] \ No newline at end of file diff --git a/.hypothesis/constants/8d7ed2723dc643a6 b/.hypothesis/constants/8d7ed2723dc643a6 deleted file mode 100644 index 55f3cbe..0000000 --- a/.hypothesis/constants/8d7ed2723dc643a6 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/config.py -# hypothesis_version: 6.141.1 - -[100, 1024, 8080, '/tmp/simdrive-cloud', '0.0.0.0', 'CloudConfig', 'enterprise', 'pro', 'solo', 'team', 'trial'] \ No newline at end of file diff --git a/.hypothesis/constants/8d8d313e8fc0a9da b/.hypothesis/constants/8d8d313e8fc0a9da deleted file mode 100644 index 36fc7c3..0000000 --- a/.hypothesis/constants/8d8d313e8fc0a9da +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/license/__init__.py -# hypothesis_version: 6.141.1 - -[] \ No newline at end of file diff --git a/.hypothesis/constants/8f1ab964be981a24 b/.hypothesis/constants/8f1ab964be981a24 deleted file mode 100644 index 98ecb1f..0000000 --- a/.hypothesis/constants/8f1ab964be981a24 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/perf.py -# hypothesis_version: 6.141.1 - -[0.0, 10.0, 15.0, 1024.0, '-M', '-o', '-p', '/usr/bin/footprint', 'B', 'GB', 'KB', 'MB', 'available', 'captured_at', 'clean_mb', 'cpu_pct', 'dirty_mb', 'footprint', 'footprint_mb', 'high', 'launchctl', 'list', 'low', 'medium', 'memory_rss_mb', 'pcpu=', 'phys_footprint', 'phys_footprint_peak', 'pid', 'ps', 'reason', 'reclaimable_mb', 'rss=', 'simctl', 'spawn', 'swapped_mb', 'threads', 'xcrun'] \ No newline at end of file diff --git a/.hypothesis/constants/8f92960071c25b12 b/.hypothesis/constants/8f92960071c25b12 deleted file mode 100644 index 841ce9c..0000000 --- a/.hypothesis/constants/8f92960071c25b12 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/act.py -# hypothesis_version: 6.141.1 - -[0.05, 0.15, 5.0, 10.0, 1000.0, 128, 300, 5000, '-e', '-w', '30', 'Action Button', 'App Switcher', 'Device', 'Home', 'Lock', 'Rotate Left', 'Rotate Right', 'Shake', 'Siri', 'Trigger Screenshot', 'action-button', 'app-switcher', 'arrow-down', 'arrow-left', 'arrow-right', 'arrow-up', 'backspace', 'cliclick', 'cmd', 'delete', 'enter', 'esc', 'escape', 'hid', 'home', 'kp:arrow-down', 'kp:arrow-left', 'kp:arrow-right', 'kp:arrow-up', 'kp:delete', 'kp:esc', 'kp:return', 'kp:space', 'kp:tab', 'latency_ms', 'lock', 'osascript', 'return', 'rotate-left', 'rotate-right', 'screenshot', 'shake', 'simdrive.act', 'siri', 'space', 't:', 'tab', 'tap dispatched (hid)', 'tap_latency_ms', 'v', 'x', 'y'] \ No newline at end of file diff --git a/.hypothesis/constants/9016ed643f19998d b/.hypothesis/constants/9016ed643f19998d deleted file mode 100644 index 8e7019c..0000000 --- a/.hypothesis/constants/9016ed643f19998d +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/runner.py -# hypothesis_version: 6.141.1 - -[0.0, 0.004, 5.0, '## Success Criteria', '.simdrive', 'SIMDRIVE_HOME', '_', 'agent_trace.jsonl', 'budget_exceeded', 'clear_field', 'cost_usd', 'crashed', 'crashes', 'done', 'error', 'fail', 'failed', 'llm_calls', 'passed', 'press_key', 'runs', 'screenshot_path', 'seconds', 'session_id', 'since', 'started_at', 'steps', 'summary.json', 'summary.md', 'swipe', 'tap', 'type_text', 'unknown', 'w', '✓', '✗'] \ No newline at end of file diff --git a/.hypothesis/constants/91acac7094bbafd2 b/.hypothesis/constants/91acac7094bbafd2 deleted file mode 100644 index 81a6137..0000000 --- a/.hypothesis/constants/91acac7094bbafd2 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/journey/ci.py -# hypothesis_version: 6.141.1 - -[0.0, 4096, '.simdrive/journeys', '.simdrive/personas', '_end', 'agent_trace.jsonl', 'budget_exceeded', 'ci_summary_path', 'classname', 'crashed', 'error', 'errors', 'failed', 'failed_journey_names', 'failure', 'failures', 'junit_xml_path', 'message', 'name', 'passed', 'simdrive-journeys', 'simdrive.journey', 'simdrive.journey.ci', 'system-out', 'testcase', 'tests', 'testsuite', 'time', 'total', 'total_llm_cost_usd', 'type', 'utf-8'] \ No newline at end of file diff --git a/.hypothesis/constants/9296c5494f7ded4a b/.hypothesis/constants/9296c5494f7ded4a deleted file mode 100644 index bd972ea..0000000 --- a/.hypothesis/constants/9296c5494f7ded4a +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/diagnostics.py -# hypothesis_version: 6.141.1 - -[0.0, 5.0, 10.0, 15.0, 200, '-', '--json', '-convert', '-o', '-p', '.ips', 'CFBundleDisplayName', 'CFBundleName', 'CFBundleVersion', 'DiagnosticReports', 'Library', 'Logs', 'Path', 'app_name', 'backtrace', 'booted', 'bug_type', 'bundleID', 'bundle_id', 'checks', 'crashing_thread', 'detail', 'devices', 'exception', 'foreground', 'frames', 'hid_helper', 'id', 'json', 'launchctl', 'list', 'listapps', 'mtime', 'name', 'no path', 'not-running', 'ok', 'path', 'pid', 'plutil', 'r', 'replace', 'runtimes', 'simctl', 'simctl_runtimes', 'spawn', 'state', 'threads', 'timestamp', 'triggered', 'udid', 'utf-8', 'version', 'xcode-select', 'xcode_select', 'xcrun'] \ No newline at end of file diff --git a/.hypothesis/constants/95e3d80e7c026ac5 b/.hypothesis/constants/95e3d80e7c026ac5 deleted file mode 100644 index cad3f06..0000000 --- a/.hypothesis/constants/95e3d80e7c026ac5 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/public_key.py -# hypothesis_version: 6.152.4 - -[] \ No newline at end of file diff --git a/.hypothesis/constants/995cffeb2324f68a b/.hypothesis/constants/995cffeb2324f68a deleted file mode 100644 index d347bb7..0000000 --- a/.hypothesis/constants/995cffeb2324f68a +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/client.py -# hypothesis_version: 6.152.4 - -[30.0, 1000.0, 300, '/element/active', '/screenshot', '/session', '/status', '/wda/keys', '/wda/pressButton', '/wda/tap', '/window/size', 'DELETE', 'ELEMENT', 'GET', 'POST', 'alwaysMatch', 'body', 'bundleId', 'capabilities', 'duration', 'exc', 'fromX', 'fromY', 'height', 'home', 'host', 'lock', 'method', 'name', 'port', 'power', 'response', 'sessionId', 'status', 'toX', 'toY', 'url', 'value', 'volumeDown', 'volumeUp', 'volumedown', 'volumeup', 'wda_http_error', 'wda_session_not_open', 'wda_unknown_button', 'wda_unreachable', 'width', 'x', 'y', '\ue003'] \ No newline at end of file diff --git a/.hypothesis/constants/9cb02b4f598a2dbb b/.hypothesis/constants/9cb02b4f598a2dbb deleted file mode 100644 index f333eb8..0000000 --- a/.hypothesis/constants/9cb02b4f598a2dbb +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/errors.py -# hypothesis_version: 6.152.4 - -['cloud_unreachable', 'code', 'current', 'detail', 'details', 'email', 'error', 'expires_at', 'grace_days', 'ip', 'license_expired', 'license_invalid', 'license_not_found', 'message', 'ok', 'path', 'reason', 'required', 'trial_already_used', 'trial_rate_limited'] \ No newline at end of file diff --git a/.hypothesis/constants/9df7190349efe8fe b/.hypothesis/constants/9df7190349efe8fe deleted file mode 100644 index a036dd7..0000000 --- a/.hypothesis/constants/9df7190349efe8fe +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/app.py -# hypothesis_version: 6.141.1 - -['/v1', '1.0.0', 'SimDrive Cloud API', 'simdrive_r2_', 'sqlite://'] \ No newline at end of file diff --git a/.hypothesis/constants/9f99926cdc9d8a39 b/.hypothesis/constants/9f99926cdc9d8a39 deleted file mode 100644 index 215e1f4..0000000 --- a/.hypothesis/constants/9f99926cdc9d8a39 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/__init__.py -# hypothesis_version: 6.152.4 - -['1.0.0a1'] \ No newline at end of file diff --git a/.hypothesis/constants/a1acc3ac15e9f754 b/.hypothesis/constants/a1acc3ac15e9f754 deleted file mode 100644 index ad82866..0000000 --- a/.hypothesis/constants/a1acc3ac15e9f754 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.152.4 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, 1000, 8100, 86400, ',', '--budget-override', '--email', '--help', '--journey', '--journeys-dir', '--license-path', '--offline-dev', '--persona-override', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'annotated_path', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'ascii', 'at', 'attempts', 'available', 'backend', 'background', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'captured_at', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'device_udid', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'marks', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'properties', 'recent_logs', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_b64', 'screenshot_h', 'screenshot_path', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simdrive wda-down', 'simdrive wda-up', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda-down', 'wda-down failed: %s', 'wda-up', 'wda-up failed: %s', 'wda_not_bootstrapped', 'window_bounds_macos', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/a2e38c1b2ed3efc9 b/.hypothesis/constants/a2e38c1b2ed3efc9 deleted file mode 100644 index 67234b7..0000000 --- a/.hypothesis/constants/a2e38c1b2ed3efc9 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/routes/runs.py -# hypothesis_version: 6.152.4 - -[0.0, 100.0, '/runs/increment', 'Authorization', 'Bearer ', 'Retry-After', 'customer_email', 'percent_used', 'period_end', 'period_start', 'runs_limit', 'runs_used', 'solo', 'tier', 'unknown'] \ No newline at end of file diff --git a/.hypothesis/constants/a49a5880da40cca8 b/.hypothesis/constants/a49a5880da40cca8 deleted file mode 100644 index eb64630..0000000 --- a/.hypothesis/constants/a49a5880da40cca8 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.141.1 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, ',', '--budget-override', '--help', '--journey', '--journeys-dir', '--persona-override', '--session-id', '--tag', '--version', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'PATH', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'at', 'attempts', 'available', 'background', 'baseline', 'boolean', 'budget_override', 'bundle_id', 'capture_logs', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'go', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey_path', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'light', 'lines', 'list_devices', 'list_replays', 'loaded_at', 'log_lines', 'log_predicate', 'logs', 'mark', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'predicate', 'press_key', 'properties', 'record_start', 'record_stop', 'recording', 'replay', 'replays', 'required', 'resolved', 'resolved_via', 'retries', 'return', 'run', 'run_journey', 'screen_x', 'screen_y', 'screenshot_h', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'simdrive', 'simulator', 'since_session_start', 'som.Mark | None', 'space', 'specterqa-ios', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'state', 'step_id', 'steps', 'string', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'terminate_app', 'text', 'threads', 'to', 'transport', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/a5913a36a1589491 b/.hypothesis/constants/a5913a36a1589491 deleted file mode 100644 index 3217590..0000000 --- a/.hypothesis/constants/a5913a36a1589491 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/__init__.py -# hypothesis_version: 6.152.4 - -['1.0.0a8'] \ No newline at end of file diff --git a/.hypothesis/constants/a8d10694f14b03c1 b/.hypothesis/constants/a8d10694f14b03c1 deleted file mode 100644 index c89e386..0000000 --- a/.hypothesis/constants/a8d10694f14b03c1 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/routes/recordings.py -# hypothesis_version: 6.141.1 - -[3600, '/recordings', 'customer_email', 'download_url', 'enterprise', 'journey_slug', 'pro', 'recording_id', 'screenshot_count', 'size_bytes', 'solo', 'team', 'tier', 'trial', 'unknown', 'utf-8'] \ No newline at end of file diff --git a/.hypothesis/constants/ac2f32e306c52548 b/.hypothesis/constants/ac2f32e306c52548 deleted file mode 100644 index cfb9628..0000000 --- a/.hypothesis/constants/ac2f32e306c52548 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/ci.py -# hypothesis_version: 6.141.1 - -[0.0, 4096, '.simdrive/journeys', '.simdrive/personas', '_end', 'agent_trace.jsonl', 'budget_exceeded', 'ci_summary_path', 'classname', 'crashed', 'error', 'errors', 'failed', 'failed_journey_names', 'failure', 'failures', 'junit_xml_path', 'message', 'name', 'passed', 'simdrive-journeys', 'simdrive.journey', 'simdrive.journey.ci', 'system-out', 'testcase', 'tests', 'testsuite', 'time', 'total', 'total_llm_cost_usd', 'type', 'utf-8'] \ No newline at end of file diff --git a/.hypothesis/constants/af9fb368be44a8b4 b/.hypothesis/constants/af9fb368be44a8b4 deleted file mode 100644 index baf87eb..0000000 --- a/.hypothesis/constants/af9fb368be44a8b4 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/license/keypair.py -# hypothesis_version: 6.141.1 - -['__main__', 'generate'] \ No newline at end of file diff --git a/.hypothesis/constants/b058c20639e8b443 b/.hypothesis/constants/b058c20639e8b443 deleted file mode 100644 index 1658dfb..0000000 --- a/.hypothesis/constants/b058c20639e8b443 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/routes/recordings.py -# hypothesis_version: 6.152.4 - -[204, 3600, '/recordings', 'customer_email', 'download_url', 'enterprise', 'journey_slug', 'pro', 'recording_id', 'screenshot_count', 'size_bytes', 'solo', 'team', 'tier', 'trial', 'unknown', 'utf-8'] \ No newline at end of file diff --git a/.hypothesis/constants/b4ee92d7688351df b/.hypothesis/constants/b4ee92d7688351df deleted file mode 100644 index 11b2c9d..0000000 --- a/.hypothesis/constants/b4ee92d7688351df +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/cloud/errors.py -# hypothesis_version: 6.141.1 - -['code', 'details', 'error', 'message', 'ok'] \ No newline at end of file diff --git a/.hypothesis/constants/b4f70cc04deecc6c b/.hypothesis/constants/b4f70cc04deecc6c deleted file mode 100644 index f8ec624..0000000 --- a/.hypothesis/constants/b4f70cc04deecc6c +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.152.4 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, 8100, ',', '--budget-override', '--help', '--journey', '--journeys-dir', '--persona-override', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'PATH', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'at', 'attempts', 'available', 'backend', 'background', 'baseline', 'boolean', 'bootstrap-device', 'budget_override', 'bundle_id', 'capture_logs', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'go', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey_path', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'light', 'lines', 'list_devices', 'list_replays', 'loaded_at', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'properties', 'record_start', 'record_stop', 'recording', 'replay', 'replays', 'required', 'resolved', 'resolved_via', 'retries', 'return', 'run', 'run_journey', 'screen_x', 'screen_y', 'screenshot_h', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'simdrive', 'simulator', 'since_session_start', 'som.Mark | None', 'space', 'specterqa-ios', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'state', 'step_id', 'steps', 'store_true', 'string', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'terminate_app', 'text', 'threads', 'to', 'transport', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda_not_bootstrapped', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/b688ef958b91f74e b/.hypothesis/constants/b688ef958b91f74e deleted file mode 100644 index fef7049..0000000 --- a/.hypothesis/constants/b688ef958b91f74e +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/routes/trials.py -# hypothesis_version: 6.141.1 - -[86400, '/trials', 'trial', 'unknown'] \ No newline at end of file diff --git a/.hypothesis/constants/b712f3eb2b6f8d36 b/.hypothesis/constants/b712f3eb2b6f8d36 deleted file mode 100644 index 9b77307..0000000 --- a/.hypothesis/constants/b712f3eb2b6f8d36 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/keypair.py -# hypothesis_version: 6.152.4 - -['__main__', 'generate'] \ No newline at end of file diff --git a/.hypothesis/constants/b7b69c191c17b0cf b/.hypothesis/constants/b7b69c191c17b0cf deleted file mode 100644 index 841fa4b..0000000 --- a/.hypothesis/constants/b7b69c191c17b0cf +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/session.py -# hypothesis_version: 6.141.1 - -[8100, '.simdrive', 'Real Device', 'Recorder', 'SIMDRIVE_HOME', 'Session', 'a', 'actions.jsonl', 'active', 'any_booted', 'device', 'device_name', 'hardware_udid', 'host', 'ip', 'localhost', 'os_version', 'port', 'sessions', 'simulator', 'target', 'udid'] \ No newline at end of file diff --git a/.hypothesis/constants/b8c471195eb47827 b/.hypothesis/constants/b8c471195eb47827 deleted file mode 100644 index a377145..0000000 --- a/.hypothesis/constants/b8c471195eb47827 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/server.py -# hypothesis_version: 6.141.1 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, ',', '--budget-override', '--help', '--journey', '--journeys-dir', '--persona-override', '--session-id', '--tag', '--version', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'PATH', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'at', 'attempts', 'available', 'background', 'baseline', 'boolean', 'budget_override', 'bundle_id', 'capture_logs', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'go', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey_path', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'light', 'lines', 'list_devices', 'list_replays', 'loaded_at', 'log_lines', 'log_predicate', 'logs', 'mark', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'predicate', 'press_key', 'properties', 'record_start', 'record_stop', 'recording', 'replay', 'replays', 'required', 'resolved', 'resolved_via', 'retries', 'return', 'run', 'run_journey', 'screen_x', 'screen_y', 'screenshot_h', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'simdrive', 'simulator', 'since_session_start', 'som.Mark | None', 'space', 'specterqa-ios', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'state', 'step_id', 'steps', 'string', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'terminate_app', 'text', 'threads', 'to', 'transport', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/ba4fa64ccf632dc5 b/.hypothesis/constants/ba4fa64ccf632dc5 deleted file mode 100644 index f694c95..0000000 --- a/.hypothesis/constants/ba4fa64ccf632dc5 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.152.4 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, 8100, ',', '--budget-override', '--help', '--journey', '--journeys-dir', '--persona-override', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'at', 'attempts', 'available', 'backend', 'background', 'baseline', 'boolean', 'bootstrap-device', 'budget_override', 'bundle_id', 'capture_logs', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'go', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey_path', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'light', 'lines', 'list_devices', 'list_replays', 'loaded_at', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'properties', 'record_start', 'record_stop', 'recording', 'replay', 'replays', 'required', 'resolved', 'resolved_via', 'retries', 'return', 'run', 'run_journey', 'screen_x', 'screen_y', 'screenshot_h', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'simdrive', 'simulator', 'since_session_start', 'som.Mark | None', 'space', 'specterqa-ios', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'state', 'step_id', 'steps', 'store_true', 'string', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'terminate_app', 'text', 'threads', 'to', 'transport', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda_not_bootstrapped', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/bc26b53257719cd4 b/.hypothesis/constants/bc26b53257719cd4 deleted file mode 100644 index 2135cf5..0000000 --- a/.hypothesis/constants/bc26b53257719cd4 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/cli.py -# hypothesis_version: 6.152.4 - -[b'=', 429, 86400, ',', '.simdrive', '1', ':', 'SIMDRIVE_OFFLINE_DEV', 'ascii', 'code', 'customer_email', 'dev-trial', 'email', 'expires_at', 'installed_at', 'issued_at', 'key', 'last_server_check', 'license.json', 'license_key', 'message', 'mode', 'no_license', 'offline', 'online', 'read_error', 'seats', 'server_time', 'subject', 'tier', 'trial', 'utf-8', 'valid'] \ No newline at end of file diff --git a/.hypothesis/constants/bd612b6fb98fb4d4 b/.hypothesis/constants/bd612b6fb98fb4d4 deleted file mode 100644 index 4ebb5f6..0000000 --- a/.hypothesis/constants/bd612b6fb98fb4d4 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/act.py -# hypothesis_version: 6.141.1 - -[0.05, 0.15, 5.0, 10.0, 128, 300, 5000, '-e', '-w', '30', 'Action Button', 'App Switcher', 'Device', 'Home', 'Lock', 'Rotate Left', 'Rotate Right', 'Shake', 'Siri', 'Trigger Screenshot', 'action-button', 'app-switcher', 'arrow-down', 'arrow-left', 'arrow-right', 'arrow-up', 'backspace', 'cliclick', 'cmd', 'delete', 'enter', 'esc', 'escape', 'hid', 'home', 'kp:arrow-down', 'kp:arrow-left', 'kp:arrow-right', 'kp:arrow-up', 'kp:delete', 'kp:esc', 'kp:return', 'kp:space', 'kp:tab', 'lock', 'osascript', 'return', 'rotate-left', 'rotate-right', 'screenshot', 'shake', 'siri', 'space', 't:', 'tab', 'v'] \ No newline at end of file diff --git a/.hypothesis/constants/bd7bcd7676104675 b/.hypothesis/constants/bd7bcd7676104675 deleted file mode 100644 index 86cc608..0000000 --- a/.hypothesis/constants/bd7bcd7676104675 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/observe.py -# hypothesis_version: 6.141.1 - -[1000, '.json', 'annotated_path', 'captured_at', 'device', 'height', 'marks', 'recent_logs', 'screenshot_path', 'simulator', 'width', 'window_bounds_macos', 'x', 'y'] \ No newline at end of file diff --git a/.hypothesis/constants/c37892475535ba9b b/.hypothesis/constants/c37892475535ba9b deleted file mode 100644 index aa21462..0000000 --- a/.hypothesis/constants/c37892475535ba9b +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/errors.py -# hypothesis_version: 6.152.4 - -['act_tool_failed', 'attempt', 'cap_usd', 'ci_invalid_journey', 'claude_call_failed', 'claude_cost_cap_hit', 'cost_usd', 'criterion_type', 'inner_code', 'inner_message', 'journey_name', 'journeys_dir', 'llm_calls', 'path', 'persona_slug', 'personas_dir', 'reason', 'seconds', 'steps', 'supported', 'tag_filter', 'tool_name', 'version'] \ No newline at end of file diff --git a/.hypothesis/constants/c58b7cb2228dcac6 b/.hypothesis/constants/c58b7cb2228dcac6 deleted file mode 100644 index d89cbbf..0000000 --- a/.hypothesis/constants/c58b7cb2228dcac6 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/journey/persona.py -# hypothesis_version: 6.141.1 - -['^[a-z0-9_-]+$', 'advanced', 'en-US', 'expert', 'high', 'intermediate', 'low', 'medium', 'name', 'novice', 'role', 'schema_version', 'slug'] \ No newline at end of file diff --git a/.hypothesis/constants/c66c67754bb14ccc b/.hypothesis/constants/c66c67754bb14ccc deleted file mode 100644 index d4b1002..0000000 --- a/.hypothesis/constants/c66c67754bb14ccc +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/__init__.py -# hypothesis_version: 6.141.1 - -['17.0.0a1'] \ No newline at end of file diff --git a/.hypothesis/constants/c6db4e2150c98d20 b/.hypothesis/constants/c6db4e2150c98d20 deleted file mode 100644 index 2c7a10c..0000000 --- a/.hypothesis/constants/c6db4e2150c98d20 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/license/errors.py -# hypothesis_version: 6.141.1 - -['code', 'current', 'details', 'email', 'error', 'expires_at', 'grace_days', 'ip', 'license_expired', 'license_invalid', 'license_not_found', 'message', 'ok', 'path', 'reason', 'required', 'trial_already_used', 'trial_rate_limited'] \ No newline at end of file diff --git a/.hypothesis/constants/c7aca978dcad40cf b/.hypothesis/constants/c7aca978dcad40cf deleted file mode 100644 index bf4b23a..0000000 --- a/.hypothesis/constants/c7aca978dcad40cf +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/__init__.py -# hypothesis_version: 6.141.1 - -['WdaClient', 'bootstrap_device', 'registry'] \ No newline at end of file diff --git a/.hypothesis/constants/c9284ddf6ca7c775 b/.hypothesis/constants/c9284ddf6ca7c775 deleted file mode 100644 index 9e18db4..0000000 --- a/.hypothesis/constants/c9284ddf6ca7c775 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/observability/metrics.py -# hypothesis_version: 6.152.4 - -[0.0, 0.5, 0.95, 1.0, 100.0] \ No newline at end of file diff --git a/.hypothesis/constants/c9a40e1ffca7205e b/.hypothesis/constants/c9a40e1ffca7205e deleted file mode 100644 index 408d205..0000000 --- a/.hypothesis/constants/c9a40e1ffca7205e +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/journey/loader.py -# hypothesis_version: 6.141.1 - -['*.yaml', '.simdrive/journeys', '.simdrive/personas'] \ No newline at end of file diff --git a/.hypothesis/constants/ca3ca874a46317e0 b/.hypothesis/constants/ca3ca874a46317e0 deleted file mode 100644 index ca71a23..0000000 --- a/.hypothesis/constants/ca3ca874a46317e0 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/validator.py -# hypothesis_version: 6.141.1 - -[86400, '.', '=', 'ascii', 'expires_at'] \ No newline at end of file diff --git a/.hypothesis/constants/ca70c469a3c808e3 b/.hypothesis/constants/ca70c469a3c808e3 deleted file mode 100644 index ef395e0..0000000 --- a/.hypothesis/constants/ca70c469a3c808e3 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/observability/logger.py -# hypothesis_version: 6.141.1 - -['%Y-%m-%d %H:%M:%S', '%Y-%m-%dT%H:%M:%SZ', '1', 'SIMDRIVE_DEBUG', 'args', 'created', 'exc_info', 'exc_text', 'filename', 'funcName', 'level', 'levelname', 'levelno', 'lineno', 'message', 'module', 'msecs', 'msg', 'name', 'pathname', 'process', 'processName', 'relativeCreated', 'simdrive', 'stack_info', 'thread', 'threadName', 'timestamp'] \ No newline at end of file diff --git a/.hypothesis/constants/ca772080f809314b b/.hypothesis/constants/ca772080f809314b deleted file mode 100644 index 3197fd2..0000000 --- a/.hypothesis/constants/ca772080f809314b +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/routes/runs.py -# hypothesis_version: 6.141.1 - -[0.0, 100.0, '/runs/increment', 'Authorization', 'Bearer ', 'Retry-After', 'customer_email', 'percent_used', 'period_end', 'period_start', 'runs_limit', 'runs_used', 'solo', 'tier', 'unknown'] \ No newline at end of file diff --git a/.hypothesis/constants/ca7b881c7c2d550f b/.hypothesis/constants/ca7b881c7c2d550f deleted file mode 100644 index 9e599b5..0000000 --- a/.hypothesis/constants/ca7b881c7c2d550f +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/robustness.py -# hypothesis_version: 6.141.1 - -[10.0, 200, '*/recording.yaml', 'action', 'all', 'allow', 'allow once', 'appearance', 'args', 'calendar', 'camera', 'cancel', 'contacts', 'contacts-limited', 'created_at', 'dark', 'deny', "don't allow", 'dont allow', 'error', 'errors', 'failed', 'grant', 'granted', 'health', 'homekit', 'id', 'light', 'location', 'location-always', 'media-library', 'medialibrary', 'microphone', 'modified_at', 'motion', 'name', 'ok', 'path', 'permission', 'photos', 'photos-add', 'post_screenshot', 'pre_screenshot', 'press_key', 'privacy', 'r', 'recording.yaml', 'reminders', 'settings', 'simctl', 'simdrive_version', 'siri', 'speech', 'stderr', 'step_count', 'steps', 'swipe', 'tags', 'tap', 'text', 'type_text', 'ui', 'utf-8', 'warnings', 'xcrun'] \ No newline at end of file diff --git a/.hypothesis/constants/ca9a53fea3dff8b6 b/.hypothesis/constants/ca9a53fea3dff8b6 deleted file mode 100644 index 6740434..0000000 --- a/.hypothesis/constants/ca9a53fea3dff8b6 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/journey/result.py -# hypothesis_version: 6.141.1 - -['artifact_dir', 'budget_exceeded', 'crashed', 'error', 'failed', 'observed_value', 'passed', 'steps', 'success_criteria'] \ No newline at end of file diff --git a/.hypothesis/constants/cb7a38b7ca8b4435 b/.hypothesis/constants/cb7a38b7ca8b4435 deleted file mode 100644 index 8629da7..0000000 --- a/.hypothesis/constants/cb7a38b7ca8b4435 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/diagnostics.py -# hypothesis_version: 6.152.4 - -[0.0, 5.0, 10.0, 15.0, 30.0, 200, '-', '--device', '--json', '--json-output', '--quiet', '-convert', '-o', '-p', '.ips', '.json', 'CFBundleDisplayName', 'CFBundleName', 'CFBundleVersion', 'DiagnosticReports', 'Library', 'Logs', 'Path', 'app_name', 'apps', 'backtrace', 'booted', 'bug_type', 'bundleID', 'bundleIdentifier', 'bundleVersion', 'bundle_id', 'checks', 'crashing_thread', 'detail', 'device', 'devicectl', 'devices', 'exception', 'executable', 'file', 'foreground', 'frames', 'hid_helper', 'id', 'info', 'json', 'launchctl', 'list', 'listapps', 'mtime', 'name', 'no path', 'not-running', 'ok', 'path', 'pid', 'plutil', 'processIdentifier', 'processes', 'r', 'replace', 'result', 'running', 'runningProcesses', 'runtimes', 'simctl', 'simctl_runtimes', 'spawn', 'state', 'threads', 'timestamp', 'triggered', 'udid', 'url', 'utf-8', 'version', 'xcode-select', 'xcode_select', 'xcrun'] \ No newline at end of file diff --git a/.hypothesis/constants/cc80eec40bbc8064 b/.hypothesis/constants/cc80eec40bbc8064 deleted file mode 100644 index b1d40b7..0000000 --- a/.hypothesis/constants/cc80eec40bbc8064 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/auth.py -# hypothesis_version: 6.141.1 - -['Authorization', 'Bearer', 'Bearer ', 'WWW-Authenticate'] \ No newline at end of file diff --git a/.hypothesis/constants/d21c9fe1097f41c2 b/.hypothesis/constants/d21c9fe1097f41c2 deleted file mode 100644 index 2baac1a..0000000 --- a/.hypothesis/constants/d21c9fe1097f41c2 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/hid_inject.py -# hypothesis_version: 6.141.1 - -[5.0, 15.0, 1000.0, '_bin', 'button', 'chord', 'down', 'home', 'key', 'lock', 'side', 'simdrive-input', 'siri', 'size', 'tap', 'text', 'up'] \ No newline at end of file diff --git a/.hypothesis/constants/d56bddd53c3d4231 b/.hypothesis/constants/d56bddd53c3d4231 deleted file mode 100644 index e3cb3bd..0000000 --- a/.hypothesis/constants/d56bddd53c3d4231 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/session.py -# hypothesis_version: 6.141.1 - -['.simdrive', 'Recorder', 'SIMDRIVE_HOME', 'Session', 'a', 'actions.jsonl', 'active', 'any_booted', 'device', 'device_name', 'launch_failed', 'os_version', 'sessions', 'simulator', 'target', 'udid'] \ No newline at end of file diff --git a/.hypothesis/constants/d9dfc67186bc2895 b/.hypothesis/constants/d9dfc67186bc2895 deleted file mode 100644 index 67fd80f..0000000 --- a/.hypothesis/constants/d9dfc67186bc2895 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/cloud/routes/__init__.py -# hypothesis_version: 6.141.1 - -[] \ No newline at end of file diff --git a/.hypothesis/constants/da39a3ee5e6b4b0d b/.hypothesis/constants/da39a3ee5e6b4b0d deleted file mode 100644 index befbce7..0000000 --- a/.hypothesis/constants/da39a3ee5e6b4b0d +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/middleware/__init__.py -# hypothesis_version: 6.141.1 - -[] \ No newline at end of file diff --git a/.hypothesis/constants/dac3cc621f17a65c b/.hypothesis/constants/dac3cc621f17a65c deleted file mode 100644 index 157e1a5..0000000 --- a/.hypothesis/constants/dac3cc621f17a65c +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/recorder.py -# hypothesis_version: 6.141.1 - -[0.85, 128, 300, '.simdrive', 'L', 'SIMDRIVE_HOME', 'action', 'app_bundle_id', 'app_version', 'args', 'captured_at', 'created_at', 'created_by_session', 'device', 'drift', 'drifted', 'duration_ms', 'error', 'execute_error', 'executed', 'force', 'h', 'halt', 'halt_reason', 'halted_at', 'id', 'key', 'name', 'ok', 'os_version', 'post_screenshot', 'pre_screenshot', 'press_key', 'recording finalized', 'recording started', 'recording stopping', 'recording.yaml', 'recording_name', 'recordings', 'replay', 'screenshot_h', 'screenshot_w', 'session_id', 'simdrive.recorder', 'simdrive_version', 'similarity', 'simulator', 'snapshots', 'ssim_masks', 'stable_id', 'stable_id_loose', 'steps', 'steps_planned', 'swipe', 'tags', 'tap', 'text', 'threshold', 'type_text', 'w', 'warn', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/deb3ba2bdb80811f b/.hypothesis/constants/deb3ba2bdb80811f deleted file mode 100644 index 5af828a..0000000 --- a/.hypothesis/constants/deb3ba2bdb80811f +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/recorder.py -# hypothesis_version: 6.141.1 - -[0.85, 128, 300, '.simdrive', 'L', 'SIMDRIVE_HOME', 'action', 'app_bundle_id', 'app_version', 'args', 'captured_at', 'created_at', 'created_by_session', 'device', 'drift', 'drifted', 'duration_ms', 'error', 'execute_error', 'executed', 'force', 'h', 'halt', 'halt_reason', 'halted_at', 'id', 'key', 'name', 'ok', 'os_version', 'post_screenshot', 'pre_screenshot', 'press_key', 'recording.yaml', 'recordings', 'replay', 'screenshot_h', 'screenshot_w', 'simdrive_version', 'similarity', 'simulator', 'snapshots', 'ssim_masks', 'stable_id', 'stable_id_loose', 'steps', 'steps_planned', 'swipe', 'tags', 'tap', 'text', 'threshold', 'type_text', 'w', 'warn', 'x', 'x1', 'x2', 'y', 'y1', 'y2'] \ No newline at end of file diff --git a/.hypothesis/constants/e07f9d7d07b9d7dd b/.hypothesis/constants/e07f9d7d07b9d7dd deleted file mode 100644 index 6e549ee..0000000 --- a/.hypothesis/constants/e07f9d7d07b9d7dd +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/robustness.py -# hypothesis_version: 6.152.4 - -[10.0, 200, '*/recording.yaml', 'action', 'all', 'allow', 'allow once', 'appearance', 'args', 'calendar', 'camera', 'cancel', 'contacts', 'contacts-limited', 'created_at', 'dark', 'deny', "don't allow", 'dont allow', 'error', 'errors', 'failed', 'grant', 'granted', 'health', 'homekit', 'id', 'light', 'location', 'location-always', 'media-library', 'medialibrary', 'microphone', 'modified_at', 'motion', 'name', 'ok', 'path', 'permission', 'photos', 'photos-add', 'post_screenshot', 'pre_screenshot', 'press_key', 'privacy', 'r', 'recording.yaml', 'reminders', 'settings', 'simctl', 'simdrive_version', 'siri', 'speech', 'stderr', 'step_count', 'steps', 'swipe', 'tags', 'tap', 'text', 'type_text', 'ui', 'utf-8', 'warnings', 'xcrun'] \ No newline at end of file diff --git a/.hypothesis/constants/e174c10544526fc2 b/.hypothesis/constants/e174c10544526fc2 deleted file mode 100644 index ff8105c..0000000 --- a/.hypothesis/constants/e174c10544526fc2 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/sim.py -# hypothesis_version: 6.152.4 - -[5.0, 10.0, 15.0, 30.0, 60.0, 2000, '-', '--json', '--last', '--predicate', '--style', '-b', '-convert', '-o', '.', '30s', ':', 'Booted', 'CFBundleVersion', 'bootstatus', 'cliclick', 'compact', 'devices', 'iOS-', 'io', 'isAvailable', 'json', 'launch', 'list', 'listapps', 'log', 'name', 'pbcopy', 'plutil', 'screenshot', 'show', 'shutdown', 'simctl', 'spawn', 'state', 'terminate', 'udid', 'utf-8', 'xcrun'] \ No newline at end of file diff --git a/.hypothesis/constants/e1ba4e9f971debd2 b/.hypothesis/constants/e1ba4e9f971debd2 deleted file mode 100644 index 3265c8f..0000000 --- a/.hypothesis/constants/e1ba4e9f971debd2 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.152.4 - -[0.0, 0.04, 0.05, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 1.0, 5.0, 200, 300, 1000, 8100, 86400, ' (dry-run)', ',', '--budget-override', '--dry-run', '--email', '--force', '--help', '--journey', '--journeys-dir', '--json', '--license-path', '--offline-dev', '--path', '--persona-override', '--quiet', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'annotated_path', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'ascii', 'at', 'attempts', 'available', 'backend', 'background', 'backup_path', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'captured_at', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'device_udid', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'dry_run', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'fail', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'json_out', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'lint-recordings', 'lint_recordings', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'marks', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'migrate-recording', 'migrate_recording', 'migrated', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'primary_button_label', 'properties', 'reason', 'recent_logs', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'results', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_b64', 'screenshot_h', 'screenshot_path', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simdrive wda-down', 'simdrive wda-up', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'text_mark_count', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda-down', 'wda-down failed: %s', 'wda-up', 'wda-up failed: %s', 'wda_not_bootstrapped', 'window_bounds_macos', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/e1bf60dc2c73791e b/.hypothesis/constants/e1bf60dc2c73791e deleted file mode 100644 index ba34a23..0000000 --- a/.hypothesis/constants/e1bf60dc2c73791e +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.141.1 - -[0.0, 0.04, 0.05, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 1.0, 5.0, 200, 300, 1000, 8100, 86400, ' (dry-run)', ',', '--budget-override', '--dry-run', '--email', '--force', '--help', '--journey', '--journeys-dir', '--json', '--license-path', '--offline-dev', '--path', '--persona-override', '--quiet', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'annotated_path', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'ascii', 'at', 'attempts', 'available', 'backend', 'background', 'backup_path', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'captured_at', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'device_udid', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'dry_run', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'fail', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'json_out', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'lint-recordings', 'lint_recordings', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'marks', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'migrate-recording', 'migrate_recording', 'migrated', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'primary_button_label', 'properties', 'reason', 'recent_logs', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'results', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_b64', 'screenshot_h', 'screenshot_path', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simdrive wda-down', 'simdrive wda-up', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'text_mark_count', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda-down', 'wda-down failed: %s', 'wda-up', 'wda-up failed: %s', 'wda_not_bootstrapped', 'window_bounds_macos', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/constants/ea559b291ce0e16b b/.hypothesis/constants/ea559b291ce0e16b deleted file mode 100644 index e7de040..0000000 --- a/.hypothesis/constants/ea559b291ce0e16b +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/session.py -# hypothesis_version: 6.152.4 - -[8100, '.simdrive', 'Real Device', 'Recorder', 'SIMDRIVE_HOME', 'Session', 'a', 'actions.jsonl', 'active', 'any_booted', 'device', 'device_name', 'hardware_udid', 'host', 'ip', 'localhost', 'os_version', 'port', 'sessions', 'simulator', 'target', 'udid'] \ No newline at end of file diff --git a/.hypothesis/constants/ed1da87eccf7cacb b/.hypothesis/constants/ed1da87eccf7cacb deleted file mode 100644 index 2c64bdb..0000000 --- a/.hypothesis/constants/ed1da87eccf7cacb +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/sim.py -# hypothesis_version: 6.141.1 - -[5.0, 10.0, 15.0, 30.0, 60.0, 2000, '-', '--json', '--last', '--predicate', '--style', '-b', '-convert', '-o', '.', '30s', ':', 'Booted', 'CFBundleVersion', 'bootstatus', 'cliclick', 'compact', 'devices', 'iOS-', 'io', 'isAvailable', 'json', 'launch', 'list', 'listapps', 'log', 'name', 'pbcopy', 'plutil', 'screenshot', 'show', 'shutdown', 'simctl', 'spawn', 'state', 'terminate', 'udid', 'utf-8', 'xcrun'] \ No newline at end of file diff --git a/.hypothesis/constants/ed2bf403e05871e5 b/.hypothesis/constants/ed2bf403e05871e5 deleted file mode 100644 index 841fa4b..0000000 --- a/.hypothesis/constants/ed2bf403e05871e5 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/session.py -# hypothesis_version: 6.141.1 - -[8100, '.simdrive', 'Real Device', 'Recorder', 'SIMDRIVE_HOME', 'Session', 'a', 'actions.jsonl', 'active', 'any_booted', 'device', 'device_name', 'hardware_udid', 'host', 'ip', 'localhost', 'os_version', 'port', 'sessions', 'simulator', 'target', 'udid'] \ No newline at end of file diff --git a/.hypothesis/constants/ed315a8a440a115f b/.hypothesis/constants/ed315a8a440a115f deleted file mode 100644 index e7bdfac..0000000 --- a/.hypothesis/constants/ed315a8a440a115f +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/errors.py -# hypothesis_version: 6.141.1 - -['action', 'already_recording', 'available', 'code', 'details', 'error', 'field', 'form', 'hid_unavailable', 'invalid_argument', 'message', 'missing_target', 'name', 'no_device', 'no_session', 'not_recording', 'ok', 'path', 'query', 'reason', 'recording_not_found', 'replay_drift_halt', 'session_id', 'sim_unhealthy', 'similarity', 'step_id', 'target_not_found', 'threshold', 'udid', 'value', 'why'] \ No newline at end of file diff --git a/.hypothesis/constants/ef0e3629f5d7602e b/.hypothesis/constants/ef0e3629f5d7602e deleted file mode 100644 index f74d46d..0000000 --- a/.hypothesis/constants/ef0e3629f5d7602e +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/cloud/app.py -# hypothesis_version: 6.141.1 - -['/v1', '1.0.0', 'SimDrive Cloud API', 'simdrive_r2_', 'sqlite://'] \ No newline at end of file diff --git a/.hypothesis/constants/f2d51d157746e804 b/.hypothesis/constants/f2d51d157746e804 deleted file mode 100644 index e9458d6..0000000 --- a/.hypothesis/constants/f2d51d157746e804 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/routes/licenses.py -# hypothesis_version: 6.152.4 - -[0.0, 100.0, 365, 86400, '/licenses/activate', '/licenses/status', '/licenses/usage', 'customer_email', 'enterprise', 'expires_at', 'pro', 'solo', 'team', 'tier', 'trial', 'unknown'] \ No newline at end of file diff --git a/.hypothesis/constants/f5c8bdf97a704588 b/.hypothesis/constants/f5c8bdf97a704588 deleted file mode 100644 index 151ea05..0000000 --- a/.hypothesis/constants/f5c8bdf97a704588 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/claude_client.py -# hypothesis_version: 6.152.4 - -[0.0, 0.004, 0.5, 15.0, 75.0, 200, 1024, 1000000, '.', 'ANTHROPIC_API_KEY', 'args', 'base64', 'claude-opus-4-7', 'confidence', 'content', 'data', 'fail', 'image', 'image/jpeg', 'image/png', 'jpeg', 'jpg', 'media_type', 'rationale', 'rb', 'role', 'source', 'text', 'tool', 'type', 'user'] \ No newline at end of file diff --git a/.hypothesis/constants/f90d75b39cf3d392 b/.hypothesis/constants/f90d75b39cf3d392 deleted file mode 100644 index e17baf1..0000000 --- a/.hypothesis/constants/f90d75b39cf3d392 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/validator.py -# hypothesis_version: 6.141.1 - -[86400, '.', '=', 'ascii', 'customer_email', 'expires_at', 'license expired', 'license valid', 'tier'] \ No newline at end of file diff --git a/.hypothesis/constants/fa354dbc7e886bb8 b/.hypothesis/constants/fa354dbc7e886bb8 deleted file mode 100644 index 0222828..0000000 --- a/.hypothesis/constants/fa354dbc7e886bb8 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/auth.py -# hypothesis_version: 6.141.1 - -['Authorization', 'Bearer', 'Bearer ', 'WWW-Authenticate', 'bearer auth accepted', 'bearer auth rejected', 'customer_email', 'path', 'reason', 'simdrive.cloud.auth', 'tier'] \ No newline at end of file diff --git a/.hypothesis/constants/fad9c20eea9be852 b/.hypothesis/constants/fad9c20eea9be852 deleted file mode 100644 index 58fb06b..0000000 --- a/.hypothesis/constants/fad9c20eea9be852 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/observability/metrics.py -# hypothesis_version: 6.141.1 - -[0.0, 0.5, 0.95, 1.0, 100.0] \ No newline at end of file diff --git a/.hypothesis/constants/fba9d8138821b639 b/.hypothesis/constants/fba9d8138821b639 deleted file mode 100644 index c3d4371..0000000 --- a/.hypothesis/constants/fba9d8138821b639 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/client.py -# hypothesis_version: 6.141.1 - -[30.0, 1000.0, 300, '/element/active', '/screenshot', '/session', '/source', '/status', '/wda/keys', '/wda/pressButton', '/wda/tap', 'DELETE', 'ELEMENT', 'GET', 'POST', 'alwaysMatch', 'body', 'bundleId', 'capabilities', 'duration', 'exc', 'fromX', 'fromY', 'home', 'host', 'lock', 'method', 'name', 'port', 'power', 'response', 'sessionId', 'status', 'toX', 'toY', 'url', 'value', 'volumeDown', 'volumeUp', 'volumedown', 'volumeup', 'wda_http_error', 'wda_session_not_open', 'wda_unknown_button', 'wda_unreachable', 'x', 'y', '\ue003'] \ No newline at end of file diff --git a/.hypothesis/constants/fcbd8e4b8288bcdc b/.hypothesis/constants/fcbd8e4b8288bcdc deleted file mode 100644 index 2f09c02..0000000 --- a/.hypothesis/constants/fcbd8e4b8288bcdc +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/journey/criteria.py -# hypothesis_version: 6.141.1 - -[200, '; ', 'cpu_pct', 'error', 'marks', 'memory_mb', 'no crashes detected', 'no_crash', 'perf_under', 'rss_mb', 'screen_matches', 'stable_id', 'text', 'text_visible', 'unknown'] \ No newline at end of file diff --git a/.hypothesis/constants/fda344215452e92e b/.hypothesis/constants/fda344215452e92e deleted file mode 100644 index 9e2eeaf..0000000 --- a/.hypothesis/constants/fda344215452e92e +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/middleware/quotas.py -# hypothesis_version: 6.141.1 - -[0.0, 100.0, 'Authorization', 'Bearer ', 'Retry-After', 'customer_email', 'percent_used', 'period_end', 'period_start', 'runs_limit', 'runs_used', 'solo', 'tier', 'unknown'] \ No newline at end of file diff --git a/.hypothesis/constants/feba84bd30d62498 b/.hypothesis/constants/feba84bd30d62498 deleted file mode 100644 index 9ec5fcb..0000000 --- a/.hypothesis/constants/feba84bd30d62498 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.152.4 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, 1000, 8100, 86400, ',', '--budget-override', '--email', '--help', '--journey', '--journeys-dir', '--license-path', '--offline-dev', '--persona-override', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'annotated_path', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'ascii', 'at', 'attempts', 'available', 'backend', 'background', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'captured_at', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'device_udid', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'marks', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'properties', 'recent_logs', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_b64', 'screenshot_h', 'screenshot_path', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda_not_bootstrapped', 'window_bounds_macos', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/.hypothesis/unicode_data/13.0.0/charmap.json.gz b/.hypothesis/unicode_data/13.0.0/charmap.json.gz deleted file mode 100644 index f2f0fd5..0000000 Binary files a/.hypothesis/unicode_data/13.0.0/charmap.json.gz and /dev/null differ diff --git a/.hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz b/.hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz deleted file mode 100644 index 95eba22..0000000 Binary files a/.hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz and /dev/null differ diff --git a/.hypothesis/unicode_data/14.0.0/charmap.json.gz b/.hypothesis/unicode_data/14.0.0/charmap.json.gz deleted file mode 100644 index a6bc97e..0000000 Binary files a/.hypothesis/unicode_data/14.0.0/charmap.json.gz and /dev/null differ diff --git a/.hypothesis/unicode_data/14.0.0/codec-utf-8.json.gz b/.hypothesis/unicode_data/14.0.0/codec-utf-8.json.gz deleted file mode 100644 index 85eea00..0000000 Binary files a/.hypothesis/unicode_data/14.0.0/codec-utf-8.json.gz and /dev/null differ diff --git a/.hypothesis/unicode_data/15.1.0/charmap.json.gz b/.hypothesis/unicode_data/15.1.0/charmap.json.gz deleted file mode 100644 index cab63fb..0000000 Binary files a/.hypothesis/unicode_data/15.1.0/charmap.json.gz and /dev/null differ diff --git a/.hypothesis/unicode_data/15.1.0/codec-utf-8.json.gz b/.hypothesis/unicode_data/15.1.0/codec-utf-8.json.gz deleted file mode 100644 index 77e5a98..0000000 Binary files a/.hypothesis/unicode_data/15.1.0/codec-utf-8.json.gz and /dev/null differ diff --git a/.specterqa/dogfood/v15.1.0-maurice.md b/.specterqa/dogfood/v15.1.0-maurice.md deleted file mode 100644 index f10c5aa..0000000 --- a/.specterqa/dogfood/v15.1.0-maurice.md +++ /dev/null @@ -1,331 +0,0 @@ -# SpecterQA iOS 15.1.0 Dogfood Report - -**Date:** 2026-04-27 -**Reporter:** Maurice Carrier (Palace iOS, Synctek) -**Tested against:** Palace iOS Core, branch `develop` HEAD `677055467` -**SpecterQA version:** `15.1.0` (upgraded from `15.0.0` same-day via `pip3 install --upgrade specterqa-ios`) - -This report is a continuation of the dogfood ledger that started with 13.2.x (`~/Desktop/specterqa-v13.2-dogfood.md`) and extended through the 14.0.1 / WS-011..013 regression session on 2026-04-21 (`~/Desktop/regression-PP-WHOLESHOT/specterqa-dogfood.md`). The goal: surface every behavior that diverges from the documented contract so SpecterQA can land targeted fixes and lift the failing CI gates on the Palace pipeline. - ---- - -## 1. Environment - -| Component | Value | -|---|---| -| OS | macOS Darwin 25.0.0 (Apple Silicon) | -| Xcode | 26.3 (17C529) at `/Applications/Xcode.app/Contents/Developer` | -| Default sim SDK | `iphonesimulator26.2` (the only SDK the runner build can target without manual override) | -| Python | 3.13 (system framework) | -| `specterqa-ios` | `15.1.0` (pip release, installed today) | -| MCP transport | stdio via Claude Code | -| Sims used | `31CF5C43-…` iPhone 12 / iOS 26.0 · `DF4A2A27-…` iPhone 16 Pro / iOS 18.4 · `6C396179-…` iPhone 17 Pro / iOS 26.2 | -| App under test | `Palace.app` (137 MB Debug‑iphonesimulator), bundle id `org.thepalaceproject.palace` | - -The Palace `develop` build was produced by `xcodebuild -project Palace.xcodeproj -scheme Palace -destination 'platform=iOS Simulator,id=…' -derivedDataPath /tmp/specterqa-develop-dd build` after bridging the gitignored DRM/secrets files into a fresh worktree (`Carthage/Build` symlink, submodules `init`, `adobe-rmsdk` symlink, `APIKeys.swift`, `GoogleService-Info.plist`, `ReaderClientCert.sig`). - ---- - -## 2. Repro target — what we tried to do - -Run the **A1QA auth journey** (`a1qa-signin-return-signout.yaml`, 16 steps; preceded by `app-launch.yaml`, 3 steps) end-to-end via the MCP `ios_replay` tool. Both replays exist in `.specterqa/replays/` on `develop`. Both pass `ios_validate_replay` cleanly under 15.1.0. - -Auth journey is the right canary because step 4 opens a sheet-presented sign-in form — exercises the `xctest` backend path that AX cannot enumerate (`B6` from prior reports). - ---- - -## 3. Verdict - -| Capability | Status | Notes | -|---|---|---| -| `pip3 install --upgrade specterqa-ios` → 15.1.0 | ✅ | Clean upgrade from 15.0.0; ~337 KB wheel. | -| `specterqa-ios --version` reports `15.1.0` | ✅ | | -| `specterqa-ios doctor` | ✅ | All checks pass when sim booted. | -| `specterqa-ios runner build` | ✅ | Produces `iphonesimulator26.2-arm64-x86_64.xctestrun`. ~30 s. | -| `specterqa-ios runner status` | ✅ | Reports artifact path + build time. | -| `ios_get_capabilities` MCP call | ✅ | Reports `{"backends": ["xctest", "ax"], …}`. | -| `ios_doctor` MCP call | ✅ | | -| `ios_devices` / `ios_apps` | ✅ | | -| `ios_validate_replay` | ✅ | 2/2 target replays valid; structurally compatible with the new parser. | -| `ios_start_session(backend='ax')` | ✅ | Returns `device_w/h`, `sim_pid`, `target_udid`, `frontmost_udid`. | -| `ios_elements` over AX | ✅ | Returns valid element tree (verified on home screen). | -| `ios_stop_session` | ✅ (status: stopped) | But see issue **#4** about side effects on other booted sims. | -| `ios_start_session(backend='xctest')` after `runner build` | ❌ until `/mcp` reconnect — issue **#1** | | -| `ios_start_session(backend='xctest')` once MCP picks up artifact | ⚠️ → `status: ok`, but in‑sim runner dies almost immediately — issue **#2** | | -| `ios_replay` end-to-end | ❌ — issue **#3** (and depends on #2) | | - -Net: **`ios_replay` is unusable on iOS 26.x simulators with this Xcode**. AX-only driving works for live `ios_tap`/`ios_elements` interaction but cannot drive the replay subsystem because that subsystem is hardwired to the xctest runner port irrespective of the session backend (issue #3). - ---- - -## 4. Issue catalog - -### Issue #1 — `Requested backend 'xctest' is not available on this system.` after `runner build`, until MCP reconnect - -**Severity:** medium (workflow blocker, easy workaround). - -**Symptom:** Immediately after `specterqa-ios runner build` completes successfully, calling `ios_start_session(backend='xctest', …)` returns: - -```json -{"error": "Requested backend 'xctest' is not available on this system."} -``` - -`ios_get_capabilities` continues to advertise `"backends": ["xctest", "ax"]`. `specterqa-ios runner status` reports the artifact READY. But MCP refuses to use the xctest path until the MCP server is fully reconnected (i.e. `/mcp` in Claude Code, or restarting the stdio server). - -**Reproduction:** - -```bash -# 1. Fresh MCP server session. -specterqa-ios runner clean --yes -specterqa-ios runner build # ~30s, succeeds -specterqa-ios runner status # reports READY - -# 2. Without restarting MCP, call: -# ios_start_session(bundle_id='org.thepalaceproject.palace', -# device_id='6C396179-...', -# backend='xctest') -# → {"error": "Requested backend 'xctest' is not available on this system."} - -# 3. /mcp reconnect, then call the same ios_start_session. -# → {"status": "ok", "port": 8222, "runner_url": "http://localhost:8222", ...} -``` - -**Hypothesis:** The MCP server detects backend availability at startup (probably scanning `~/.specterqa/runner-build/Build/Products/*.xctestrun`) and caches the result. `runner build` writes a new artifact path with a fresh hash directory but the cached negative-result is not invalidated. WS-013 from the 2026-04-21 regression already noted this — it is unchanged in 15.1.0. - -**Suggested fix:** re-stat the artifact directory at session start, OR invalidate when `runner build` exits, OR remove the cache entirely (the cost is one filesystem stat per session start). - -**Suggested test:** - -```python -def test_runner_build_invalidates_backend_cache(): - server = start_mcp_server() - capabilities_before = server.call("ios_get_capabilities") - assert "xctest" in capabilities_before["backends"] - subprocess.run(["specterqa-ios", "runner", "clean", "--yes"], check=True) - subprocess.run(["specterqa-ios", "runner", "build"], check=True) - # WITHOUT reconnecting: - result = server.call("ios_start_session", backend="xctest", ...) - assert result.get("status") == "ok", \ - "xctest must become available after runner build without MCP restart" -``` - ---- - -### Issue #2 — XCTest runner deploys, binds :8222, serves /health, then dies abruptly on iOS 26.x - -**Severity:** **critical** (blocks all replay execution on iOS 26.x sims). - -**Symptom:** After `/mcp` reconnect, `ios_start_session(backend='xctest', …)` returns `status: ok`. The runner test process actually does start successfully — the in-sim HTTP server binds to `INADDR_ANY:8222` (`HTTPServer.swift:96`) and serves at least two `GET /health` requests successfully — then the test process is terminated abruptly. The log ends mid-stream with `*** If you believe this error represents a bug, please attach the result bundle …`. - -After termination: -- `lsof -i :8222` → empty (port unbound) -- `ios_replay …` → fails with `XCTest runner /source request failed at http://localhost:8222/source: ` for every step. -- `pgrep -f "xcodebuild test"` → **still alive** (xcodebuild thinks the test is healthy; it does not propagate the in-sim test crash). - -**This is NOT the iOS 26 sim network-isolation theory I initially suspected.** The runner's HTTPServer binds to all interfaces (`INADDR_ANY`, `runner/Sources/HTTPServer.swift:96`) and the host successfully reaches it twice. The actual failure is that the runner test process is killed shortly after entering its `CFRunLoopRunInMode` loop. - -**Evidence — runner stdout/stderr (full path included for repro):** - -``` -~/Library/Developer/Xcode/DerivedData/SpecterQARunner-dvifexinsfbnlyaudygypquwmoew/ - Logs/Test/Test-SpecterQARunner-2026.04.27_14-15-31--0400.xcresult/ - Staging/1_Test/Diagnostics/ - SpecterQARunnerTests-…-Configuration-Test Scheme Action-Iteration-1/ - SpecterQARunnerTests-BB038A50-4B62-477C-BDB8-3ABC60E57492/ - StandardOutputAndStandardError.txt -``` - -Relevant lines (excerpt — full file is 641 lines): - -``` -2026-04-27 14:16:05.269717 [SpecterQA] Port: 8222 Bundle: org.thepalaceproject.palace Max: 3600s -2026-04-27 14:16:07.649536 [SpecterQA] HTTP server listening on port 8222 -2026-04-27 14:16:07.652266 [SpecterQA] PID file written: /tmp/specterqa_runner.pid (PID=60414) -2026-04-27 14:16:07.652324 [SpecterQA] Runner listening on port 8222 targeting 'org.thepalaceproject.palace' (app state=4) -2026-04-27 14:16:07.652377 [SpecterQA] Endpoints: GET /health /source /screenshot /elements POST /tap /swipe … -2026-04-27 14:16:07.652411 [SpecterQA] Entering CFRunLoopRunInMode loop... -2026-04-27 14:16:07.685000 [SpecterQA] GET /health -2026-04-27 14:16:07.694551 [SpecterQA] GET /health - - -*** If you believe this error represents a bug, please attach the result bundle … -``` - -The "Max: 3600s" budget is set, the server enters its run loop, two healthchecks succeed within 50 ms, and the log dies. There is no orderly shutdown trace — no `[SpecterQA] HTTP server stopping`, no XCTest `Test Case 'testServe' passed/failed`, no Suite teardown. The `***` blurb is xcodebuild's signature for an abnormal termination. - -**Reproduction:** - -```bash -# Single-sim setup (multi-sim adds noise — see issue #4). -xcrun simctl shutdown all -xcrun simctl boot 6C396179-608C-4787-87F9-7B68F420702B # iPhone 17 Pro / iOS 26.2 -xcrun simctl install 6C396179-… /path/to/Palace.app -specterqa-ios runner clean --yes -specterqa-ios runner build -# /mcp reconnect (issue #1). - -# Through MCP: -ios_start_session(bundle_id='org.thepalaceproject.palace', - device_id='6C396179-608C-4787-87F9-7B68F420702B', - backend='xctest') -# → {"status": "ok", "port": 8222, ...} - -# Within ~5 seconds: -lsof -i :8222 # empty -curl http://localhost:8222/health # connection refused -ios_replay(name='app-launch', ...) -# → {"status": "failed", "failures":[{"error": "/source: Connection refused"}, ...]} - -# Inspect the latest xcresult: -ls -t ~/Library/Developer/Xcode/DerivedData/SpecterQARunner-*/Logs/Test/*.xcresult \ - | head -1 -# Open StandardOutputAndStandardError.txt, observe abrupt termination after -# two GET /health entries. -``` - -100 % reproducible across all three test sims (iOS 26.0, 26.2, and 18.4 — though 18.4 fails earlier with the SDK-mismatch in issue #5). - -**Hypotheses (in order of likelihood):** - -1. The Swift `XCTestCase` test method (`SpecterQARunnerTests.testServe`) is being terminated by XCTest's runtime issue detector. The xcresult shows `XCTRuntimeIssueDetectionManager runtimeIssueDetectionEnabled: YES` and the `XCTAutomationSupport` framework was set up earlier. A runtime issue detected during the run loop (e.g. unbalanced semaphore wait, blocked main thread) on iOS 26.x XCTest may cause early test termination. -2. iOS 26.2 simulator's `XCTest`/`testmanagerd` uses a stricter test-method timeout than 13.x/14.x. The runner enters `CFRunLoopRunInMode` indefinitely; iOS 26 may interpret that as "test method has hung" and SIGKILL it. The `Max: 3600s` budget at the SpecterQA level is unrelated to XCTest's own watchdogs. -3. Sim‑clone teardown race: the test runner runs against `Clone 1 of iPhone 17 Pro (8F9A2036-…, iOS 26.2)` per the testmanagerd log. When MCP issues a follow-up call (e.g. healthcheck transitions to "running" and another internal call fires), the clone may be torn down out from under the runner process. - -**Suggested fix path:** - -- Capture the runner exit reason (signal, stderr tail) and surface it in the `RunnerDeployError` instead of letting xcodebuild swallow it. The `session_manager.py:385` DEVNULL redirect (called out in WS-012) is still active and continues to mask root cause. -- Add a "ten-second post-deploy stability probe": after `healthcheck()` returns 200, schedule `_mcp_runner.healthcheck(timeout_s=2.0)` again 10 seconds later before `ios_start_session` returns. Today the Python side returns "ok" within ~3 s of the first 200 response and never re-probes. If the runner dies between deploy and first replay step, MCP reports success while the user gets connection-refused. -- Investigate whether `Routes/HealthRoute.swift` triggers any XCTest-tracked side effect (e.g. taking a screenshot, querying the AX tree) that, on iOS 26.x, ends the test method. - -**Suggested test harness assertion:** - -```python -def test_xctest_runner_stays_alive_post_deploy(): - """Runner must remain healthy for at least 60s after ios_start_session - returns ok. Without this, replays race the runner's own death.""" - server.call("ios_start_session", backend="xctest", ...) - deadline = time.monotonic() + 60.0 - while time.monotonic() < deadline: - r = httpx.get("http://localhost:8222/health", timeout=2.0) - assert r.status_code == 200, f"Runner dead at t={time.monotonic()-start}s" - time.sleep(5) -``` - -CI matrix should include iPhone 12/iOS 26.0, iPhone 17 Pro/iOS 26.2, iPhone 15 Pro/iOS 17.5, iPhone 16 Pro/iOS 18.4 to cover the iOS 26.x regression specifically. - ---- - -### Issue #3 — `ios_replay` is hardwired to the xctest runner port regardless of the active session's backend - -**Severity:** medium (limits the AX-only escape hatch when xctest is broken). - -**Symptom:** After `ios_start_session(backend='ax')` succeeds and `ios_elements`/`ios_tap` are confirmed working over AX, `ios_replay` still tries to fetch `/source` from `http://localhost:8222/source` (or `:8100` on some calls — observed both ports in the same session). When the xctest runner is not deployed, every step fails with: - -``` -"XCTest runner /source request failed at http://localhost:8100/source: " -``` - -**Reproduction:** - -```bash -# After the steps in issue #2 — but with backend='ax' instead of 'xctest': -ios_start_session(bundle_id='...', device_id='...', backend='ax') -# → {"status": "ok", "backend": "ax", ...} -ios_elements() # works — returns valid element tree - -ios_replay(name='app-launch', ...) -# → every step fails with /source: Connection refused at :8222 (or :8100) -``` - -**Expected:** when the active session's backend is `ax`, the replay path should use `AccessibilityTree`-based element queries (the same path `ios_elements` uses) rather than HTTP-fetching `/source` from the xctest runner. - -**Or:** if AX-replay is intentionally unsupported, return a structured error at the start of replay execution instead of N successive `Connection refused` errors: - -```json -{"error": "ios_replay requires backend='xctest'. Active session backend is 'ax'.", - "fix": "Restart with backend='xctest' or use ios_tap/ios_type directly."} -``` - -**Suggested fix:** route `ReplayExecutor` through the same backend abstraction already used by `ios_tap`/`ios_elements` (the `BackendSelector` at `mcp/server.py:849`). The replay's `tap` and `type` actions correspond 1:1 to MCP tool calls that already work cross-backend. - ---- - -### Issue #4 — `ios_stop_session` shuts down booted sims that the session never targeted - -**Severity:** low‑medium (surprise, especially when running parallel work in another sim). - -**Symptom:** Between `ios_stop_session` calls the booted-sim list shrinks unexpectedly. We started this dogfood with three booted sims (DF4A2A27 / 31CF5C43 / 6C396179) and ended a single MCP session lifecycle with all of them shut down — including the two we never passed to `ios_start_session`. We had to manually re-boot before each retry. - -This dovetails with the prior "B10" finding (sim shutdown on runner deploy) that 14.0.1 was supposed to have fixed. In 15.1.0 the symptom returns, but appears tied to *cleanup of cloned sims* rather than the deploy itself. - -**Suggested test:** boot three sims, start a session against one, stop the session, assert the other two sims remain booted. - ---- - -### Issue #5 — Xcode SDK pinning makes the runner unrunnable on non‑26 sims - -**Severity:** documented and unchanged from WS-012. Repeating here for completeness so it stays visible. - -`runner build` always produces `iphonesimulator{LATEST}.xctestrun`. There is no `--sdk` or `--ios-version` flag on `specterqa-ios runner build`. On a Mac whose latest installed iOS SDK is 26.2 (today's default Xcode 26.3 install), the runner simply cannot be deployed onto an iOS 18.4 sim — `xcodebuild test-without-building` exits with code 65, no readable error (DEVNULL redirect, `session_manager.py:385`). - -**Suggested fix:** add `specterqa-ios runner build --sdk iphonesimulator17.5` (or whatever runtime the user has installed). At minimum, surface the actual `xcodebuild` stderr from `runner_process.py:333-337` instead of the truncated `f"xcodebuild exited with code {rc}"`. - ---- - -## 5. Suggested SpecterQA test-harness checklist - -What I would add to the SpecterQA CI/release gate, in order of value: - -1. **Runner stability probe.** Deploy runner → wait 60 s polling `/health` every 5 s → assert all 12 polls return 200. Repro for issue #2. Run on every supported (sim model × iOS runtime) tuple. -2. **Backend cache freshness test.** `runner clean` → `runner build` → without restarting MCP, call `ios_start_session(backend='xctest')` → assert `status == "ok"`. Repro for issue #1. -3. **Replay backend respect test.** Start session with `backend='ax'` → call `ios_replay` on a 2-step replay → assert error message specifically tells the caller to switch to `xctest` (not `Connection refused at :8222`). Repro for issue #3. -4. **Sim-isolation test.** Boot three sims A/B/C → `ios_start_session` against A → `ios_stop_session` → `xcrun simctl list devices booted` must still contain B and C. Repro for issue #4. -5. **Multi-SDK runner build.** Add `runner build --sdk iphonesimulator17.5` → install on iPhone 15 Pro / iOS 17.5 → assert deploys. Today this path silently fails with code 65. Repro for issue #5. -6. **Surface xcodebuild stderr.** Static check (or unit test against `runner_process.py`) — the `_process` should NOT redirect stderr to `DEVNULL`. Without this, every other diagnosis lands in a black hole. Carryover from WS-012. - -For Palace specifically: until issues #1+#2 land, our CI needs to pin `specterqa-ios==14.0.1` for the auth-journey gate. We will continue to dogfood each release on the iPhone 17 Pro / iOS 26.2 target since that's the one that maps to the runner build. - ---- - -## 6. Appendix — exact command transcript (truncated to repro path) - -```bash -# 1. Upgrade -pip3 install --upgrade specterqa-ios -# Successfully installed specterqa-ios-15.1.0 - -# 2. Reset state -xcrun simctl shutdown all -specterqa-ios runner clean --yes -specterqa-ios runner build -# Artifact: ~/.specterqa/runner-build/Build/Products/SpecterQARunner_SpecterQARunner_iphonesimulator26.2-arm64-x86_64.xctestrun - -# 3. Sim + app -xcrun simctl boot 6C396179-608C-4787-87F9-7B68F420702B -xcrun simctl ui 6C396179-… appearance dark -xcrun simctl install 6C396179-… /tmp/specterqa-develop-dd/Build/Products/Debug-iphonesimulator/Palace.app -xcrun simctl spawn 6C396179-… defaults write org.thepalaceproject.palace showDeveloperSettings -bool true -xcrun simctl spawn 6C396179-… defaults write org.thepalaceproject.palace NYPLUseBetaLibrariesKey -bool true - -# 4. /mcp reconnect (issue #1) - -# 5. xctest session attempt (issue #2) -# → MCP returns status:ok, port:8222 -# → 5 seconds later: lsof -i :8222 empty -# → ios_replay → Connection refused on every step - -# 6. ax session attempt (issue #3) -# → MCP returns status:ok, backend:ax -# → ios_elements works -# → ios_replay still tries http://localhost:8222/source — Connection refused -``` - ---- - -## 7. Cross-references - -- 13.2.x ledger: `~/Desktop/specterqa-v13.2-dogfood.md` (B1..B7, W4 — most resolved by 14.0.1) -- 14.0.1 regression: `~/Desktop/regression-PP-WHOLESHOT/specterqa-dogfood.md` (WS-011, WS-012, WS-013 — issues #1 and #5 here are continuations of WS-013 and WS-012 respectively) -- Memory index: `~/.claude/projects/-Users-mauricework-PalaceProject-ios-core/memory/specterqa_v13_2_dogfood.md` — updated 2026-04-27 with the 15.1.0 entry - -Happy to provide xcresult bundles, PCAPs, or a longer log capture if any of the issues above need more data. The xcresult for issue #2's "abrupt termination" trace is the most useful single artifact — its full path is in §4 and it is preserved on this Mac. diff --git a/.specterqa/dogfood/v15.2.0-direction-proposal-maurice.md b/.specterqa/dogfood/v15.2.0-direction-proposal-maurice.md deleted file mode 100644 index 2ff927b..0000000 --- a/.specterqa/dogfood/v15.2.0-direction-proposal-maurice.md +++ /dev/null @@ -1,250 +0,0 @@ -# SpecterQA iOS — Direction Proposal: Vision-First Agent Driving - -**Date:** 2026-04-27 -**Author:** Maurice Carrier (Palace iOS, Synctek) -**Status:** strategic direction proposal, not a PR -**Companions:** -- `~/Desktop/specterqa-v15.1.0-dogfood.md` — original 15.1.0 dogfood -- `~/Desktop/specterqa-v15.2.0-runner-stability-patch.md` — tactical patch for the immediate XCTest ambiguous-match crash - -The patch document is the right *short-term* fix. This document is a longer-term proposal for **why the SpecterQA tool surface should pivot away from accessibility-tree representation toward vision-first agent driving**, and what that pivot looks like concretely. - ---- - -## 1. Thesis - -> SpecterQA today tries to *represent the iOS screen for the agent* via the accessibility tree (`ios_elements`, `ios_screenshot` annotated with bounding boxes, `findByLabel`, `findByIdentifier`, replay YAMLs that assert element existence). That representation is **lossy, brittle, slow, and crash-prone on iOS 26.x SwiftUI** — and it's doing negative work for the modern multimodal agents that are SpecterQA's primary consumers. -> -> A multimodal agent like Claude, GPT-4V, Gemini, etc. can already see the screen as well as a human can. The right primitive for those agents is **a screenshot and a coordinate-based action layer** — exactly the model that Anthropic's Computer Use, OpenAI's Computer Use, and Anthropic's Claude-in-Chrome already use successfully across thousands of integrations. -> -> SpecterQA's value to those agents is *not* a parsed element tree. It's: **on-device screenshot capture, coordinate-precise input injection, lifecycle plumbing (install, launch, terminate, alerts, permissions), and out-of-band telemetry (logs, perf, crashes, network).** Everything else — the AX-tree query layer, label-based selectors, replay-by-element-existence — is an abstraction the agent doesn't need and can't recover from when it breaks. - ---- - -## 2. Evidence from a single live-driving session (2026-04-27, ~3 hours) - -### Setup -- specterqa-ios 15.2.0, iPhone 17 Pro / iOS 26.2 sim, Palace iOS develop @ 677055467, ParallelizationEnabled=false patch applied per the v15.1.0 dogfood, fresh-installed before each session. - -### Outcomes by mechanism - -| Mechanism | Successes | Crashes | Notes | -|---|---|---|---| -| `ios_tap(label=...)` / `ios_tap(label=..., type=...)` | ~3 calls succeeded before something died | **3 runner deaths** | Each crash: XCTest `NSException "Find single matching element"` thrown out of `XCUIElementQuery[label]` because SwiftUI `NavigationStackHosting` propagates the same accessibility label across two `Other` wrapper containers + the actual `Button`. Even with `type=Button` the inner subscript still throws. | -| `ios_tap(identifier=...)` | 0 successful navigation taps | — | `findByIdentifier` uses the same broken subscript form. Returned "No element found" when the AX tree clearly contained the identifier. | -| `ios_tap(x=..., y=...)` | **11 navigation taps in a row, end-to-end auth journey completed** | 0 | Bypasses element resolution entirely. The "Sign in" button has no AX label/identifier at all in this build — coord tap was the *only* way to hit it. | -| `ios_type(text=...)` (no target) | Caused a focus race; user-typed input ended up in wrong field, concatenated with prior input | — | The runner can't reliably know what the agent intends without a target. | -| `ios_type(label="Library Card", text=...)` + `ios_type(label="Password", text=...)` | Worked first try | — | Direct element-targeted typing was OK, but it's still going through the same `findByLabel` path that crashed taps elsewhere. Lucky on this surface. | -| `ios_screenshot` (annotated PNG) | Worked every time | 0 | The base64 image is the truthful representation. | -| Native `xcrun simctl io screenshot ...png` + read it back | Worked every time | 0 | Faster, no MCP roundtrip overhead. | -| `ios_elements` | Returned trees, but trees were missing the "Sign in" button entirely | — | The AX tree doesn't reliably represent SwiftUI buttons that lack `accessibilityLabel`. Claude looking at the screenshot found "Sign in" in 200 ms; `ios_elements` would have failed forever. | - -### The single most damning data point - -I drove the auth journey using **exclusively** screenshots + coordinate taps, after the label-based path crashed three times. From "Add Library" picker → A1QA Test Library → Settings → Manage Libraries → A1QA detail → Library Card field → Password field → **Sign in** → verified "Sign out" appears. Eleven taps, two `ios_type(label=...)` calls (the only AX-tree dependency), and one `simctl io screenshot` for visual confirmation. **Zero runner crashes.** - -The same flow attempted via `ios_tap(label=...)` died at step 1. - ---- - -## 3. Why the accessibility-tree abstraction is doing negative work for vision-capable agents - -### 3.1 It's lossy -SwiftUI does not require developers to set `accessibilityLabel` / `accessibilityIdentifier`. The "Sign in" button on Palace's login form is a `Button("Sign in") { … }` with no explicit accessibility setup. Apple's framework either fails to expose it as a leaf node or exposes it without a label. **`ios_elements` returns a tree with no Sign-in button — but the button is plainly visible at (195, 337) in the screenshot.** A vision-capable agent will always find it; an AX-tree-only agent never will. - -### 3.2 It's brittle -SwiftUI propagates accessibility labels up to wrapper `Other` containers (verified in the verbatim xcresult sparse trees in the patch document). `XCUIElementQuery[label]` resolves to "the unique element with this label" — and throws when that's ambiguous. Animation transitions create transient duplicates. So does any view with `.accessibilityElement(children: .combine)`. Selector queries are fundamentally a poor fit for SwiftUI's tree shape. - -### 3.3 It's slow -`XCUIElementQuery.allElementsBoundByIndex` walks the entire AX tree, and on iOS 26.x the runner explicitly avoids it because it can crash on SwiftUI TextField in List cells (per `SpecterQAElementQuery.swift:228` comment). Each subscript-based lookup waits for the AX snapshot. Screenshots cost ~50–100 ms; element queries can hang for seconds during animations. - -### 3.4 It crashes the runner -The whole reason this dogfood session exists is that ambiguous AX queries throw `NSException` that propagates through `runOnMain` and kills `testServe()`. The patch doc proposes catching the throw — but the better fix is **not making the throw possible in the first place**, which means not using selector-based queries. - -### 3.5 It inherits every Apple bug forever -XCUIElement, XCUIElementQuery, the XCTest snapshot subsystem — all owned by Apple, all evolve every iOS major. iOS 26 broke the .sheet content enumeration that 13.x worked around. iOS 18.4 broke the SpringBoard alert AX. iOS 17 broke something else. SpecterQA's release cycle is currently dictated by Apple's AX regressions. **A coordinate-based primitive has zero AX dependency and zero Apple AX bugs.** - -### 3.6 It's the wrong abstraction for the consumer -Modern AI testing harnesses are not selenium. The consumer is a *vision-capable LLM* that already sees the screen better than a tree query can describe it. Forcing the agent to round-trip through "describe the tree → find the matching node → resolve to coordinates" is friction. Direct coordinates are the agent's native language. - ---- - -## 4. The vision-first model - -### 4.1 Core primitives (would replace ~15 current tools) - -``` -ios_observe(quality?, region?) → { - screenshot: base64 PNG, # full visual truth - device_w, device_h: numbers, # for normalization - reliable_targets: [ # ONLY widgets with explicit - {identifier, label, role, x, y, w, h} # accessibilityIdentifier - ], # — i.e. the developer - # opted in to scriptability - app_state: 'foreground'|'background'|..., - captured_at: ISO timestamp -} - -ios_act(action) → result - where action is one of: - {kind: "tap", x: float, y: float} - {kind: "tap", identifier: string} # opt-in semantic - {kind: "type", text: string, x?: float, y?: float} # injects at coord, then types - {kind: "swipe", from: [x,y], to: [x,y], duration_ms?: int} - {kind: "key", name: string} - {kind: "scroll", direction: "up"|"down"|"left"|"right", x?, y?} - {kind: "long_press", x: float, y: float, duration_s?: float} - {kind: "drag", from: [x,y], to: [x,y]} -``` - -Coordinates are **floats in device-points** by default; a `normalized: true` flag treats them as 0.0–1.0 for resolution-independent replays. `reliable_targets` only surfaces elements with developer-set `accessibilityIdentifier` — these are by-construction unique and stable, the rare cases where semantic targeting is justified. - -### 4.2 Out-of-band signals (keep — they're not UI semantics) - -Unchanged: `ios_logs`, `ios_logs_tail`, `ios_perf`, `ios_memory`, `ios_network`, `ios_crashes`, `ios_app_state`, `ios_session_status`, `ios_doctor`, `ios_devices`, `ios_apps`, `ios_get_capabilities`, `ios_app_relaunch`, `ios_dismiss_first_launch_alerts`, `ios_pre_grant_permissions`, `ios_set_appearance`. These are operational primitives that don't depend on the AX tree and don't crash on SwiftUI quirks. - -### 4.3 Replay rewrite (the biggest change) - -**Today** replays are YAML files asserting `expect_elements: [Settings, Libraries, A1QA Test Library, …]` — the runner takes an AX snapshot and verifies each label exists. This is exactly what's broken: SwiftUI rendering changes that don't change the visible UI can move/rename AX nodes and break the replay. - -**Proposed:** -```yaml -replay: - name: a1qa_signin - steps: - - kind: observe - capture: signin_form_state # named visual reference - - kind: act - action: {kind: tap, x: 0.503, y: 0.385} # normalized coords - - kind: assert_visual - reference: signin_form_state - threshold: 0.92 # SSIM threshold - region: [0, 0.4, 1.0, 0.6] # only diff form region -``` - -Replay validation = visual SSIM diff against captured reference, not element existence. Replays survive cosmetic AX changes (label propagation, view-hierarchy refactors) that today break them. References are stored as PNGs alongside the YAML; CI compares with image-diff tooling that already exists. - -### 4.4 Recording rewrite - -`ios_start_recording` / `ios_stop_recording` would capture: the screenshot at each step + the coordinate of the tap + (optionally) the OCR'd text near that coordinate for human readability. The output YAML is the format above. No selectors recorded. - ---- - -## 5. Reference architectures (this isn't a novel idea) - -| System | Primitive | Notes | -|---|---|---| -| **Anthropic Computer Use** (`claude-3.5-sonnet-computer-use`) | `screenshot()`, `mouse_move(x,y)`, `mouse_click(x,y)`, `type(text)`, `key(name)`, `scroll`, `wait` | The reference design. Zero DOM/AX. Multi-thousand-deployment proof that this works for general desktop automation. | -| **OpenAI Computer Use** (Operator) | Same shape | Same shape, same conclusion. | -| **Anthropic Chrome MCP** (claude-in-chrome) | DOM-aware *but always also exposes coords* — agent can fall back to vision when DOM is unhelpful | Hybrid model; even the DOM-having case lets vision win. | -| **Sikuli, Cucable** (legacy CV-based testing) | Pure visual, no AX | Predates LLMs; used image-template matching. The agent-vision version is just SOTA-LLM at the matching step instead of OpenCV. | -| **Selenium / XCTest selector model** | Pure selector | The model SpecterQA inherits. Was the right fit for the rule-based test era. Now mismatched. | - -The vision-first direction isn't experimental — it's the convergent design across every modern LLM-driven UI automation system. - ---- - -## 6. Concrete tool-surface comparison - -| Today | Proposed | Notes | -|---|---|---| -| `ios_screenshot` | `ios_observe` | annotated PNG + reliable_targets only | -| `ios_elements` | folded into `ios_observe.reliable_targets` | no full-tree dump | -| `ios_tap` (5 ways) | `ios_act({kind:'tap'})` | coords primary, identifier optional | -| `ios_long_press` | `ios_act({kind:'long_press'})` | coord-only | -| `ios_swipe` | `ios_act({kind:'swipe'})` | coord-only with from/to | -| `ios_type` | `ios_act({kind:'type'})` | coord-or-focus, no label | -| `ios_press_key` | `ios_act({kind:'key'})` | unchanged shape | -| `ios_dismiss_keyboard` | `ios_act({kind:'tap', x: ..., y: ...})` | composed | -| `ios_wait_idle` | `ios_observe` polled w/ visual diff | the agent waits when *the screen* says wait | -| `ios_wait_for_element` | `ios_observe` polled w/ OCR or visual reference | agent loops | -| `ios_replay` | reimplemented per §4.3 | visual diff | -| `ios_validate_replay` | reimplemented per §4.3 | check screenshot references exist | -| `ios_capture_state` | `ios_observe(include=['perf','logs'])` | folded | -| `ios_action_with_logs` | composed: `ios_act` + `ios_logs_tail` | unchanged primitives | -| `ios_start_recording` / `ios_stop_recording` / `ios_promote_session_to_test` | rewritten per §4.4 | screenshots + coords | -| `ios_logs`, `ios_logs_tail`, `ios_perf`, `ios_memory`, `ios_network`, `ios_crashes` | **unchanged** | already correct shape | -| `ios_app_state`, `ios_app_relaunch`, `ios_session_status`, `ios_pre_grant_permissions`, `ios_dismiss_first_launch_alerts`, `ios_dismiss_springboard_alert`, `ios_set_appearance`, `ios_doctor`, `ios_devices`, `ios_apps`, `ios_get_capabilities` | **unchanged** | operational, not UI | -| `findByLabel`, `findByIdentifier`, `waitForElement`, all internal AX-tree query code | **deleted** | This is where the runner crashes live. | - -Net: **47 tools → ~22 tools.** Smaller surface, smaller maintenance burden, smaller bug surface. - ---- - -## 7. Migration path (staged, non-breaking) - -### Phase 1 — Harden the coordinate primitive (1–2 days) -- Document `ios_tap(x, y)` / `ios_type(x, y, text)` as the *recommended* path in the agent guidance bundle. -- Audit `TapRoute` / `TypeRoute` / `SwipeRoute` to ensure coord-only paths never touch `findByLabel` / `findByIdentifier` / any `XCUIElementQuery`. Verify the runner stays alive across 100 coord-only actions in CI. -- Ship a minor release that (a) fixes the ambiguous-match crashes per the patch document and (b) re-orders documentation so coords-first is the obvious default. - -### Phase 2 — Add `ios_observe` (1 week) -- Combine `ios_screenshot` + a *filtered* `ios_elements` (only elements with explicit `accessibilityIdentifier`) into a single tool. -- Make `ios_observe` the recommended "first call in any flow." -- Mark `ios_elements` as legacy. - -### Phase 3 — Replay rewrite (2–3 weeks) -- Define the new visual-reference YAML schema. -- Ship `ios_replay_v2` with visual-diff validation. Run alongside legacy `ios_replay`. -- Migration tool: convert existing replays by running them once, capturing screenshots, replacing `expect_elements` with `assert_visual` references. - -### Phase 4 — Deprecate selector-based query layer (1 month after Phase 3) -- Mark `findByLabel`, `findByIdentifier`, `waitForElement` deprecated. Keep them working. -- Remove from agent guidance. Let consumers migrate at their own pace. - -### Phase 5 — Remove (next major) -- 16.0.0: delete the AX-tree query layer entirely. Wheel size drops, runner stability climbs, iOS 27 SwiftUI changes don't matter. - ---- - -## 8. Tradeoffs (honest) - -### What's harder - -1. **Resolution independence.** Coords differ across iPhone SE / 16 / 17 Pro Max. Mitigation: normalized 0.0–1.0 coords as a first-class option (already standard in Computer Use). - -2. **Non-vision agents.** Scripts, older LLMs, deterministic tests. Mitigation: the `reliable_targets` field gives them developer-set identifiers when present. Or layer an opt-in semantic helper *on top of* the coord primitive — never instead of it. - -3. **Visual-diff fragility.** SSIM thresholds need tuning. Mitigation: per-step `region` mask + `threshold` override. CI flake budget. The same flake budget today is consumed by AX-tree races, which are *less* tunable. - -4. **OCR cost when "find text on screen" is needed.** Mitigation: lazy — only when `ios_observe` is called with `include_ocr=True`. macOS Vision framework does this offline at ~50 ms. - -### What gets easier - -1. **iOS 27 / 28 SwiftUI changes don't break the runner** — there's no AX query to break. -2. **Replays survive view-hierarchy refactors** that move accessibility labels but keep the visual UI. -3. **The runner stops crashing** — there's no `XCUIElementQuery.subscript` to throw. -4. **Wheel size goes down** — ~30% of `Sources/` is AX-query infrastructure that disappears. -5. **Documentation gets shorter.** Agent guidance becomes "observe, then act on coordinates." -6. **Cross-platform** — the same model maps cleanly to Android (no AX abstraction lock-in). -7. **Interoperability** — a Claude / GPT-4V / Gemini agent that already knows Computer Use needs zero retraining for SpecterQA. - ---- - -## 9. What this unlocks for Palace specifically (and any other consumer) - -- The auth journey we drove today succeeded only because we manually fell back to coord taps. Under a vision-first SpecterQA, that fall-back IS the primary path — the journey is robust by construction. -- The "Sign in" button being invisible to AX is a Palace bug we'll fix. But on dozens of other SwiftUI surfaces in the app, similar AX gaps exist. Each one is an unfixed bug that a vision-first SpecterQA wouldn't notice. -- Replay flake from "AX label changed but UI didn't" goes to zero. -- The existing patch from `~/Desktop/specterqa-v15.2.0-runner-stability-patch.md` becomes redundant once the AX-query path is gone — runner doesn't throw because runner doesn't query. - ---- - -## 10. Open questions for SpecterQA - -1. Is there a class of consumer that depends on AX-tree querying for non-vision-capable harnesses (e.g. CI scripts using `specterqa-ios run` from shell)? If so, how heavy is that audience? — they're the cohort the migration path needs to keep working. -2. Are there test scenarios that *can't* be expressed with coords + visual diff? (Pure data assertions like "the third row in this list contains the string 'Acorn'" come to mind — but those are arguably better as direct API tests, not UI tests.) -3. Is SpecterQA's commercial positioning tied to "AX-tree-aware iOS testing"? If yes, the rebrand to "AI-native iOS testing" is the broader story. -4. Would you accept a Palace-side proof-of-concept where we fork the runner, strip the AX-query layer, and run our auth journey on the stripped-down build? Self-fund a week of engineering to prove the direction works. - ---- - -## 11. Companion artifacts - -- `~/Desktop/specterqa-v15.1.0-dogfood.md` — the v15.1.0 dogfood that surfaced the `CFRunLoopRunInMode` / SIGKILL issue (already fixed in 15.2.0). -- `~/Desktop/specterqa-v15.2.0-runner-stability-patch.md` — the tactical 2-file patch for the ambiguous-match crashes. This is the right *short-term* fix while the strategic direction in this document is debated. -- xcresult bundles for the three runner crashes preserved at the paths listed in the patch document. - -The patch is the right thing to ship in 15.2.1 / 15.3.0. This proposal is for what 16.0 looks like. - -Happy to drive the proof-of-concept, support the migration, or discuss further. The Palace iOS test pipeline is genuinely blocked on the current trajectory — every iOS major brings new SwiftUI/AX incompatibilities — and the vision-first path is the one that makes the runner *less* coupled to Apple's AX subsystem over time, not more. diff --git a/.specterqa/dogfood/v15.2.0-runner-stability-patch-maurice.md b/.specterqa/dogfood/v15.2.0-runner-stability-patch-maurice.md deleted file mode 100644 index 6836051..0000000 --- a/.specterqa/dogfood/v15.2.0-runner-stability-patch-maurice.md +++ /dev/null @@ -1,382 +0,0 @@ -# SpecterQA iOS 15.2.0 — Runner Stability Patch - -**Date:** 2026-04-27 -**Reporter:** Maurice Carrier (Palace iOS, Synctek) -**Target version:** `specterqa-ios 15.2.0` (current PyPI release, runner sources at `runner/Sources/`) -**Companion to:** `~/Desktop/specterqa-v15.1.0-dogfood.md` (the original 15.1.0 dogfood that 15.2.0 partially fixed) - ---- - -## TL;DR - -In a single live-driving session against Palace iOS develop @ `677055467` (iOS 26.2 sim, parallelization disabled per the v15.1.0 dogfood workaround), the in-sim runner died **3 times after only 1–3 routine interactions per restart**. Each death made the in-sim HTTP server stop accepting connections and forced a manual `xcodebuild test-without-building` restart, losing the recording buffer. - -Root cause is a single uncaught XCTest exception path. The runner's element-query layer uses `XCUIElementQuery` subscripts that **throw an ObjC `NSException` when multiple elements match the same label**, which is the default for SwiftUI nav bars, segmented controls, and modal-presentation overlays during animation. The exception isn't caught by `HTTPServer.runOnMain`, propagates up through `CFRunLoopPerformBlock`, crashes the runloop that `testServe()` is parked on (`XCTWaiter.wait`), and kills the test method. - -This document proposes a **two-layer fix**: - -1. **`findByLabel` should never throw** on ambiguous matches — switch the subscript form to `.matching(predicate).firstMatch`. Single-file Swift change. -2. **`runOnMain` should swallow ObjC exceptions** as defense-in-depth — adds a small ObjC bridge so any future uncaught XCTest throw returns an HTTP 500 instead of killing the runner. - ---- - -## 1. Reproduction - -### Repro environment - -| Component | Value | -|---|---| -| OS | macOS 26.0 / Darwin 25.0.0 (Apple Silicon) | -| Xcode | 26.3 (17C529) | -| Sim | iPhone 17 Pro (`6C396179-…`) iOS 26.2, single sim booted, fresh `simctl install` | -| Runner | `~/.specterqa/runner-build/Build/Products/SpecterQARunner_SpecterQARunner_iphonesimulator26.2-arm64-x86_64.xctestrun` with `ParallelizationEnabled` (per v15.1.0 dogfood). | -| Launch | manual `xcodebuild test-without-building` (MCP `ios_start_session(backend='xctest')` is unrelated and works in 15.2.0). | - -### Three crashes — common signature - -All three followed the same pattern: `lsof -i :8222` → empty, `curl /health` → connection refused, in-sim runner test process gone, `pgrep -lf "xcodebuild test"` → empty, MCP/HTTP calls → `XCTest runner unavailable at http://localhost:8222: [Errno 61] Connection refused`. The xcresult bundle for each contained `failureText: "Failed to get matching snapshot: Find single matching element. Multiple matching elements found for "`. - -| # | Action sequence (max ~3 calls) | Sparse-tree match (from xcresult) | -|---|---|---| -| **A** | `ios_tap label="Settings" type="Button"` → `ios_tap label="Manage Libraries"` | `Settings` matches both the bottom-tab `Button` and the nav-title `StaticText`. With `type="Button"` it should disambiguate, but the inner subscript still resolves before the `type` filter applies. | -| **B** | `ios_set_appearance(mode='light')` → `ios_tap label="Switch Library"` | During the `Switch Library` sheet animation, the tree contains both the trigger button (still on screen) and the destination view's title — both labelled `Switch Library`. | -| **C** | `ios_long_press(element_index=…)` → `ios_tap label="Go back"` | Captured xcresult sparse tree (verbatim): `Other 'Go back'` ⇒ `Other 'Go back'` ⇒ `Button 'Go back'`. SwiftUI `NavigationStackHosting` propagates the accessibility label up two wrapper containers. | - -### Verbatim xcresult excerpt (Crash C) - -Path: `~/Library/Developer/Xcode/DerivedData/SpecterQARunner-hjopbdsmrpumseaeowrtvmpzsjpw/Logs/Test/Test-SpecterQARunner-2026.04.27_23-13-12--0400.xcresult` - -``` -"failureText" : "Failed to get matching snapshot: Find single matching element. -Multiple matching elements found for . -Sparse tree of matches: -→Application, pid: 77926, label: 'Palace' - ↳Window (Main), {{0.0, 0.0}, {402.0, 874.0}} - … - ↳Other, identifier: 'Catalog' - ↳NavigationBar, identifier: '_TtGC7SwiftUI32NavigationStackHosting' - ↳Other, label: 'Go back' ← match #1 - ↳Other, label: 'Go back' ← match #2 - ↳Button, label: 'Go back' ← match #3 -" -``` - ---- - -## 2. Root cause - -### File: `runner/Sources/SpecterQAElementQuery.swift`, lines 227–242 - -```swift -func findByLabel(_ label: String, type: String? = nil) -> XCUIElement? { - if let typeName = type, let elementType = self.xcuiElementType(from: typeName) { - let match = self.app.descendants(matching: elementType)[label] - if match.exists { return match } - } - let match = self.app.descendants(matching: .any)[label] - if match.exists { return match } - // WebView fallback (subscript-based, safe) - for webView in self.app.webViews.allElementsBoundByIndex { - let wMatch = webView.descendants(matching: .any)[label] - if wMatch.exists { return wMatch } - } - return nil -} -``` - -`XCUIElementQuery`'s subscript `[label]` returns an `XCUIElement` that is *resolved lazily*. `match.exists` returns true if **any** match exists, not if *exactly one* exists. The ambiguity is detected only when a downstream consumer (`coordinate(withNormalizedOffset:).tap()` in `TapRoute.swift:34`) actually evaluates the element — at which point XCTest throws `NSInternalInconsistencyException` with reason `Find single matching element`. - -### File: `runner/Sources/HTTPServer.swift`, lines 326–338 - -```swift -func runOnMain(_ block: @escaping () -> Void) { - if Thread.isMainThread { - block() - } else { - let sem = DispatchSemaphore(value: 0) - CFRunLoopPerformBlock(CFRunLoopGetMain(), CFRunLoopMode.defaultMode.rawValue) { - block() - sem.signal() - } - CFRunLoopWakeUp(CFRunLoopGetMain()) - sem.wait() - } -} -``` - -`block()` is called from inside the main runloop. If `block()` raises an ObjC `NSException`, Swift cannot catch it — `do/try/catch` only catches Swift `Error`, and there is no `@try { } @catch` syntax in Swift. The exception propagates out of the runloop callback, terminating the runloop iteration. Since `testServe()` is parked on `XCTWaiter.wait(for: [stopExpectation], timeout: maxDuration)` (added in 15.2.0), an exception out of `CFRunLoop` ends the wait abnormally, the test method returns, the test runner exits, and xcodebuild reports failure. - ---- - -## 3. Patch - -### Patch 1 (primary) — make `findByLabel` always return a unique element - -Replace the body of `findByLabel(_:type:)` in `runner/Sources/SpecterQAElementQuery.swift` with: - -```swift -func findByLabel(_ label: String, type: String? = nil) -> XCUIElement? { - // Build a predicate that matches identifier OR label, preserving the - // existing subscript-fallback semantics. We then take .firstMatch so - // ambiguous trees (SwiftUI nav-bars, animated sheets, label-propagating - // wrapper Others) never throw `Find single matching element`. - let predicate = NSPredicate(format: "identifier == %@ OR label == %@", - label, label) - - if let typeName = type, let elementType = self.xcuiElementType(from: typeName) { - let typed = self.app.descendants(matching: elementType) - .matching(predicate) - .firstMatch - if typed.exists { return typed } - } - - let any = self.app.descendants(matching: .any) - .matching(predicate) - .firstMatch - if any.exists { return any } - - for webView in self.app.webViews.allElementsBoundByIndex { - let wAny = webView.descendants(matching: .any) - .matching(predicate) - .firstMatch - if wAny.exists { return wAny } - } - return nil -} -``` - -Apply the same `.matching(predicate).firstMatch` change to: - -- `findByLabel(_:type:index:)` (line 244) — replace the `[label]` subscripts with the predicate-based query, then take the appropriate index from `.allElementsBoundByIndex` of the filtered query (the existing iteration logic works because the filtered query is bounded). -- `findByIdentifier(_:)` (line 275) — replace `descendants(matching: .any)[identifier]` with `.matching(NSPredicate(format: "identifier == %@", identifier)).firstMatch`. Removes the same ambiguity surface for identifier-based taps. -- `waitForElement(_:type:timeout:)` (line 294) — same treatment if it's using subscript form. - -### Patch 2 (defense-in-depth) — catch ObjC exceptions in `runOnMain` - -Add `runner/Sources/SpecterQAObjCBridge.h`: - -```objc -// -// SpecterQAObjCBridge.h -// SpecterQA Runner -// -// Tiny Swift↔ObjC bridge for catching NSException out of XCTest calls. -// Swift cannot @try/@catch ObjC exceptions natively; XCTest throws -// NSInternalInconsistencyException on ambiguous queries, missing -// elements, etc. Without this, an XCTest throw out of a route handler -// crashes the runloop that testServe() is parked on. -// - -#import - -NS_ASSUME_NONNULL_BEGIN - -@interface SpecterQAObjCBridge : NSObject - -/// Run @c block inside a try/catch. Returns the caught NSException -/// (if any) on the calling thread. Returns nil on success. -+ (NSException * _Nullable)tryBlock:(NS_NOESCAPE void (^)(void))block; - -@end - -NS_ASSUME_NONNULL_END -``` - -Add `runner/Sources/SpecterQAObjCBridge.m`: - -```objc -#import "SpecterQAObjCBridge.h" - -@implementation SpecterQAObjCBridge - -+ (NSException *)tryBlock:(NS_NOESCAPE void (^)(void))block { - @try { - block(); - return nil; - } - @catch (NSException *exception) { - return exception; - } -} - -@end -``` - -Add to the runner's bridging header (`SpecterQARunnerTests-Bridging-Header.h` if not present, or extend the existing one): - -```objc -#import "SpecterQAObjCBridge.h" -``` - -Modify `runOnMain` in `runner/Sources/HTTPServer.swift` (lines 326–338): - -```swift -func runOnMain(_ block: @escaping () -> Void) { - let safeBlock: () -> Void = { - if let exception = SpecterQAObjCBridge.tryBlock(block) { - NSLog("[SpecterQA] runOnMain caught NSException: %@ — %@", - exception.name.rawValue, - exception.reason ?? "") - self.addLog( - "NSException in route: \(exception.name.rawValue) — \(exception.reason ?? "")", - level: "error" - ) - } - } - if Thread.isMainThread { - safeBlock() - } else { - let sem = DispatchSemaphore(value: 0) - CFRunLoopPerformBlock(CFRunLoopGetMain(), CFRunLoopMode.defaultMode.rawValue) { - safeBlock() - sem.signal() - } - CFRunLoopWakeUp(CFRunLoopGetMain()) - sem.wait() - } -} -``` - -Optionally extend the route signatures to surface caught exceptions back to the HTTP caller (e.g. `runOnMainResult { … }` returning `Result`), but the minimal fix above is enough to keep the runner alive. - -### Patch 3 (cosmetic — bonus) — better error response when an element is genuinely ambiguous - -In `TapRoute.swift` lines 22–51 (and the `identifier` branch lines 53–76), when `findByLabel` returns `nil`, today the code falls through to the coordinate-tap path. After Patch 1 it never returns nil for ambiguous queries (it picks `firstMatch`), but you may prefer to *explicitly* respond with `409 Conflict` + `matches: [...]` when ambiguity is detected upstream, so callers can disambiguate in the next request. That's a nice-to-have, not load-bearing. - ---- - -## 4. Tests SpecterQA should add - -### 4.1 Element-query disambiguation harness (in-sim XCUITest) - -Add a stub UI test target that hosts a SwiftUI view with deliberate label collisions: - -```swift -struct AmbiguousLabelView: View { - var body: some View { - VStack { - Text("Go back") // wrapper label propagates - Button("Go back") {} // actual button - HStack { - Text("Go back") // segmented title - } - } - .accessibilityLabel("Go back") // outer container also labels - } -} -``` - -Then assert: - -```swift -func test_findByLabel_doesNotThrowOnAmbiguousLabels() { - let app = XCUIApplication() - app.launchEnvironment = ["DEMO_VIEW": "AmbiguousLabel"] - app.launch() - - let query = SpecterQAElementQuery(app: app) - // Pre-Patch 1, this throws and the test crashes. - // Post-Patch 1, returns the firstMatch — a valid element. - let element = query.findByLabel("Go back") - XCTAssertNotNil(element) - XCTAssertTrue(element?.exists ?? false) -} -``` - -Run this on iOS 26.2 + iOS 18.5 + iOS 17.5 in CI. Without Patch 1, it crashes the runner. With Patch 1, it passes. - -### 4.2 Route-handler exception isolation - -```swift -func test_runOnMain_swallowsNSExceptionAndKeepsServerAlive() { - let server = HTTPServer(port: 0, routes: [], elementQuery: nil, ...) - server.start() - - server.runOnMain { - NSException(name: .internalInconsistencyException, - reason: "synthetic test exception", - userInfo: nil).raise() - } - - // After the throw, the server should still be alive and accept connections. - XCTAssertTrue(server.isRunning) - let resp = try? URLSession.shared.dataTask(with: URL(string: "http://localhost:\(server.port)/health")!) - XCTAssertNotNil(resp) -} -``` - -Pre-Patch 2: this test crashes the test process. Post-Patch 2: it passes, and the server logs `runOnMain caught NSException: NSInternalInconsistencyException — synthetic test exception`. - -### 4.3 Live-driving smoke test (stability over N actions) - -```swift -func test_runner_survives_typical_navigation_session() { - // Drive 50 alternating tap/swipe/back actions through the HTTP API - // against a SwiftUI demo app with deliberate ambiguous-label nav bars. - // Pre-Patches: dies within ~3 actions on iOS 26 due to NavigationStackHosting. - // Post-Patches: completes all 50 with healthy /health throughout. -} -``` - -Add this as a CI gate on Apple Silicon iPhone 17 Pro / iOS 26.2. - ---- - -## 5. Why these specific patches - -| Alternative considered | Why rejected | -|---|---| -| **Always use `firstMatch` everywhere, drop subscript form entirely** | Loses the existing identifier-vs-label fallback semantics that callers depend on. Predicate-based query + `firstMatch` keeps both behaviors. | -| **Wrap `block()` in `Result.init { try block() }` instead of ObjC bridge** | Swift's `try/catch` only catches Swift `Error`; XCTest's `Find single matching element` throw is an ObjC NSException. The Swift catch never fires. ObjC bridge is required. | -| **Add a 30 s watchdog timer in `runOnMain` that respawns the runner if a route hangs** | Doesn't address the actual failure (sync exception out of runloop callback). Adds restart latency. The proposed patches keep flows intact. | -| **Document "always pass `type=` and `identifier=`" as a workaround** | Already partially documented; doesn't help when the AX tree contains transient duplicates during animation. The fix needs to live in the runner so callers don't have to think about it. | -| **Catch via `XCTest.continueAfterFailure`** | `continueAfterFailure` only affects `XCTAssertion` failures, not unrelated NSException throws. | - ---- - -## 6. Backwards compatibility - -- Patch 1 changes the *internal* behavior of `findByLabel` only. The public method signature is unchanged, the return type is unchanged, the existing nil-on-not-found contract is preserved. Callers that today rely on "throws if multiple matches" are dependent on a documented bug — there are no such callers in the SpecterQA codebase (verified by `grep -rn findByLabel`). -- Patch 2 adds two files (`SpecterQAObjCBridge.h/.m`) and changes the body of one method. The existing `runOnMain` callers see no API change; they just stop crashing. -- Wheel size impact: ~1 KB of compiled ObjC. Negligible. - ---- - -## 7. Files to ship in the patch - -``` -runner/Sources/SpecterQAElementQuery.swift # modified -runner/Sources/HTTPServer.swift # modified -runner/Sources/SpecterQAObjCBridge.h # NEW -runner/Sources/SpecterQAObjCBridge.m # NEW -runner/SpecterQARunner.xcodeproj/project.pbxproj # add the two new source files -runner/SpecterQARunnerTests-Bridging-Header.h # add #import "SpecterQAObjCBridge.h" -``` - -If SpecterQA already has a bridging header with a different name, just `#import` there — no new bridging header file is needed. - ---- - -## 8. Suggested release labelling - -If shipped in `15.2.1` or `15.3.0`, the changelog entry might read: - -> **15.3.0 — Runner stability** -> -> - Element-query layer (`findByLabel`, `findByIdentifier`, `waitForElement`) now uses `XCUIElementQuery.matching(predicate).firstMatch` internally. SwiftUI nav-bars, segmented controls, and animated sheet transitions whose AX trees contain duplicate labels no longer crash the runner with `Find single matching element`. Reported by Maurice Carrier (Palace iOS) — see Palace dogfood ledger 2026-04-27. -> - `HTTPServer.runOnMain` wraps the dispatched block in an ObjC `@try`/`@catch` shim so any uncaught XCTest `NSException` becomes an HTTP 500 + structured log entry instead of killing the test method. Defense-in-depth. - ---- - -## 9. Open questions for SpecterQA - -1. Is `SpecterQAElementQuery` intentionally tolerant of identifier-or-label matching via subscript fallback, or did the predicate switch lose desired behavior? (We preserved both via `OR` predicate.) -2. Is there a use case where callers *want* `findByLabel` to fail loudly on ambiguity (i.e. assert "exactly one match exists")? If yes, expose it as a separate method (`findUniqueByLabel`) — but the current default of "tap whatever matches first" is consistent with how human users interact with the screen. -3. Would you accept a follow-up PR adding a `--strict-element-resolution` test flag that turns Patch 1 back into an explicit error (for harnesses that *want* to catch ambiguity early)? - -Happy to provide xcresult bundles for Crashes A/B/C if helpful — they're preserved at: - -``` -~/Library/Developer/Xcode/DerivedData/SpecterQARunner-hjopbdsmrpumseaeowrtvmpzsjpw/Logs/Test/Test-SpecterQARunner-2026.04.27_23-13-12--0400.xcresult -~/Library/Developer/Xcode/DerivedData/SpecterQARunner-aoumsytgktcjfmcyvilwjvzogdvn/Logs/Test/Test-SpecterQARunner-2026.04.27_23-10-29--0400.xcresult -``` diff --git a/.specterqa/dogfood/v16.0.0a1-maurice.md b/.specterqa/dogfood/v16.0.0a1-maurice.md deleted file mode 100644 index 3451846..0000000 --- a/.specterqa/dogfood/v16.0.0a1-maurice.md +++ /dev/null @@ -1,407 +0,0 @@ -# SpecterQA iOS 16.0.0a1 — Dogfood Report (Vision-First Alpha) - -**Date:** 2026-04-28 -**Reporter:** Maurice Carrier (Palace iOS, Synctek) -**Tested against:** Palace iOS develop @ `677055467` (built 2026-04-27, vendored DRM/secrets), iPhone 17 Pro / iOS 26.2 sim, Xcode 26.3 (17C529), macOS Darwin 25.0.0 -**SpecterQA version:** `16.0.0a1` (PyPI pre-release, installed via `pip3 install --upgrade --pre`) - -**Companions:** -- `~/Desktop/specterqa-v15.1.0-dogfood.md` — original 15.1.0 dogfood (CFRunLoop watchdog issue, fixed in 15.2.0). -- `~/Desktop/specterqa-v15.2.0-runner-stability-patch.md` — tactical patch for ambiguous-match runner crashes (Patch 2 shipped in 16.0.0a1; Patch 1 still outstanding but irrelevant since labels are now deleted). -- `~/Desktop/specterqa-direction-proposal-vision-first.md` — strategic direction proposal that 16.0.0a1 implements. - ---- - -## TL;DR - -**The vision-first direction landed.** 16.0.0a1 ships exactly the design proposed last night: `ios_observe` returning a screenshot + `reliable_targets` (only elements with explicit `accessibilityIdentifier`) + `app_state`, and `ios_act` as a unified action dispatcher with coordinate-primary semantics, optional `identifier`, optional `normalized=true`, and a documented refusal to support label-based selectors ("they're the v15.x crash class that v16 deletes" — verbatim from `ios_act` schema). 47 tools shrank to 35; 14 selector-based tools were deleted in one major. - -**The execution has three integration bugs in this alpha that block end-to-end MCP-driven usage.** None are design problems; all are plumbing. Specifically: - -1. **`RunnerProcess` FAILED state is terminal.** Once an xctest deploy hits the 90 s healthcheck timeout, every subsequent `ios_start_session` immediately re-throws the cached error without retrying. `ios_stop_session` does not reset the state. Only an MCP server restart clears it. Single-line fix. -2. **`ios_dismiss_first_launch_alerts` is paywalled at indie tier in 16.0+.** It was free in 15.x. iOS 26.2's simctl cannot pre-grant `notifications`, and `ios_act` cannot reach SpringBoard alert windows (they're outside the target app's coord scope) — so trial users on 16.0+ have no path to clear the notifications prompt that fires on every fresh install. The xctest runner's interruption monitors *would* auto-dismiss it, but they're upstream of the alert-tier gate and depend on the runner deploying successfully (see #3). -3. **XCTest's `XCUIApplication(bundleIdentifier:)` returns error 10100 "Unknown application" against `org.thepalaceproject.palace` on iOS 26.2** even when Palace is installed AND pre-launched (verified via `launchctl list`). The runner test method enters a 60-second `isApplicationStateKnown` wait that times out, the test exits, the in-sim HTTP server never binds, and `/health` never goes 200. Manual `xcodebuild test-without-building` reaches a working `cg_event_direct` tap path — `/tap` works! — but `/elements` only ever returns the host simulator's hardware-button chrome (Action, Volume Up, Sleep/Wake), not Palace's view tree. - -I drove the auth journey halfway (Add Library picker → A1QA selected → catalog loaded with real books) by **bypassing MCP entirely and POSTing directly to `localhost:8222/tap` on the manually-deployed runner**, using coordinates I read off `xcrun simctl io screenshot` PNGs. The vision-first model itself is sound — every coordinate tap I dispatched landed precisely where I expected and Palace navigated as recorded. The bugs are in the connective tissue between MCP and the runner. - ---- - -## 1. What 16.0.0a1 ships (the good news) - -### 1.1 The direction proposal is in production - -```python -# /usr/local/.../specterqa/ios/mcp/server.py — header docstring, verbatim -"""SpecterQA iOS MCP Server — Native primitives for Claude Code. - -Claude Code IS the reasoning engine. This server exposes direct -simulator control primitives — no Claude API calls, no SoM pipeline, -no orchestration loops. Claude sees annotated screenshots and decides -what to do. - -Tools (35 total — v16.0.0a1 vision-first surface): - ios_observe Vision-first observation: screenshot + reliable_targets - ios_act Unified action verb: tap/type/swipe/key/scroll/long_press/drag - ... - -v16.0.0a1 deletes the v15.x AX-tree selector layer (ios_screenshot, -ios_tap, ios_elements, ios_long_press, ios_swipe, ios_swipe_back, -ios_type, ios_press_key, ios_dismiss_keyboard, ios_wait, -ios_wait_for_element, ios_wait_idle, ios_capture_state, -ios_action_with_logs) — replaced by ios_observe + ios_act. -""" -``` - -That is the direction proposal almost verbatim. Thank you. - -### 1.2 `ios_observe` returns the proposed shape - -Single live call against Palace's Add Library screen returned: - -```json -{ - "screenshot": "<50884-char base64 PNG>", - "device_w": 390, - "device_h": 844, - "reliable_targets": [], - "app_state": {"state": "foreground", "details": "..."}, - "captured_at": "2026-04-28T13:45:04.286353+00:00" -} -``` - -`reliable_targets: []` is the *correct* answer — Palace doesn't set `accessibilityIdentifier` on its widgets. The proposal anticipated this exact case: do not synthesize fake "reliable" identifiers from labels; only surface explicitly-set ones. 16.0.0a1 honors that. - -### 1.3 `ios_act` schema is exactly the proposed verb - -``` -{kind: 'tap', x, y} -{kind: 'tap', identifier} # opt-in semantic -{kind: 'long_press', x, y, duration_s?} -{kind: 'long_press', identifier, duration_s?} -{kind: 'type', text, x?, y?} -{kind: 'swipe', from: [x,y], to: [x,y], duration_ms?} -{kind: 'drag', from: [x,y], to: [x,y], duration_ms?} -{kind: 'key', name} -{kind: 'scroll', direction: up|down|left|right, x?, y?} -``` - -`normalized=true` for 0.0–1.0 fractional coords ships as proposed. Label-based selectors are explicitly rejected at schema level. This is the right surface. - -### 1.4 Patch 2 from the v15.2.0 stability doc shipped - -`runner/Sources/SpecterQAObjCBridge.h` and `.m` are present in the wheel. The runner stayed alive across 30+ HTTP calls in this session — a striking contrast to the v15.x baseline where it died within 1–3 actions because XCTest `NSException` propagated out of `runOnMain`. Whatever route caused those crashes in 15.x is now caught and returned as an HTTP error instead. - -(Patch 1 — `findByLabel.firstMatch` — was not shipped, but is moot in 16.0 since the entire selector-based query layer is deleted.) - ---- - -## 2. Tool surface validation - -| Tool | Tested? | Result | -|---|---|---| -| `ios_get_capabilities` | ✅ | `{"version": "16.0.0a1", "tool_count": 35, "backends": ["xctest", "ax"]}` | -| `ios_observe` (ax backend) | ✅ | Correct shape, screenshot legible, `reliable_targets: []` honest | -| `ios_act` (tap, x/y) | ✅ delivered | Tap reaches root-level UI but not SwiftUI list cells under AX backend | -| `ios_act` (tap, normalized) | ✅ delivered | Same | -| `ios_app_state` | ✅ | reports foreground correctly | -| `ios_pre_grant_permissions` | ✅ | granted location/camera/microphone/photos. **`notifications` failed** — same iOS-26 restriction documented in 14.x | -| `ios_dismiss_first_launch_alerts` | ❌ paywalled | `{"error": "tier_required", "required_tier": "indie", "current_tier": "trial"}` | -| `ios_start_session(backend='xctest')` | ❌ | Goes into FAILED state; never recovers without MCP restart | -| `ios_start_session(backend='ax')` | ✅ | Works, returns `target_udid == frontmost_udid` | -| `ios_stop_session` | ⚠️ | Returns `{"status": "stopped"}` but does **not** clear the FAILED `RunnerProcess` registry entry | - ---- - -## 3. Three integration bugs - -### 3.1 `RunnerProcess` FAILED state is terminal — single-line patch - -#### Symptom - -After the first failed xctest deploy (which times out at 90 s waiting for `/health` because of bug #3), every subsequent `ios_start_session(backend='xctest', …)` returns: - -``` -RunnerDeployError: RunnerProcess is in FAILED state: -Runner did not become healthy within 90.0s -``` - -Reproduced by calling `ios_start_session` four times with intervening `ios_stop_session`. State persists. Only `/mcp` reconnect clears it. - -#### Root cause — `runner_process.py:233-239` - -```python -if self._state == RunnerState.FAILED: - raise RunnerDeployError( - f"RunnerProcess is in FAILED state: {self._last_error}", - udid=self._udid, - port=self._port, - ) -``` - -There is no path back from FAILED. The state machine has FAILED as a terminal sink. `stop()` (line 367) does call `_transition(RunnerState.STOPPED)` and `_registry.pop(...)`, which would let a future `acquire()` create a fresh IDLE instance — but the MCP server's `ios_stop_session` only invokes `RunnerProcess.stop()` along the *successful-deploy* path; FAILED instances are never stopped, just left in the registry forever. - -#### Suggested patch - -```python -# runner_process.py — replace lines 233-239 with auto-recovery -if self._state == RunnerState.FAILED: - # Auto-recover from a prior failed deploy. Clean up any stale - # process and previous error, then fall through to the normal - # IDLE → BUILDING → DEPLOYED → RUNNING flow. ios_stop_session - # only fires on success paths today, so without this we'd be - # stuck in FAILED until MCP restart. - logger.info( - "RunnerProcess.deploy: clearing FAILED state from previous " - "deploy attempt; last_error=%s", self._last_error - ) - if self._process is not None: - try: - self._process.kill() - self._process.wait(timeout=5) - except Exception: - pass - self._process = None - self._last_error = None - self._transition(RunnerState.IDLE) - # fall through to the IDLE-state deploy logic below -``` - -Alternatively, fix it at the top of `ios_stop_session`: always invoke `RunnerProcess.stop()` regardless of current state, so the registry is cleaned up on the FAILED path too. - -#### Test - -```python -def test_failed_deploy_does_not_block_subsequent_attempts(): - # Force a failed deploy (e.g. by booting a sim without the app installed - # and using a bundle id XCTest cannot resolve). - with pytest.raises(RunnerDeployError): - runner.deploy(bundle_id="com.invalid.unknown") - assert runner.state == RunnerState.FAILED - - # A second deploy with a valid bundle id should auto-recover, not - # re-raise the cached failure. - runner.deploy(bundle_id="com.example.real-app") - assert runner.state in (RunnerState.RUNNING, RunnerState.DEPLOYED) -``` - ---- - -### 3.2 `ios_dismiss_first_launch_alerts` paywalling is a regression for trial users on iOS 26.x - -#### Symptom - -```json -{ - "error": "tier_required", - "required_tier": "indie", - "current_tier": "trial", - "tool_name": "ios_dismiss_first_launch_alerts", - "message": "'ios_dismiss_first_launch_alerts' requires a indie license or higher.", - "upgrade_url": "https://synctek.io/specterqa#pricing" -} -``` - -#### Why this matters more in 16.0+ than it did in 15.x - -In 15.x, three independent paths could clear the iOS notifications permission alert that fires on every fresh install: - -1. **`ios_pre_grant_permissions`** (free in all tiers) — but `notifications` is OS-restricted on iOS 18.4 *and now iOS 26.2*; verified today: `granted: ['location', 'camera', 'microphone', 'photos'], failed: [{'service': 'notifications', 'error': 'Operation not permitted'}]`. -2. **`ios_dismiss_first_launch_alerts`** (free in 15.x, paywalled in 16.0+) — walks all sim AX windows including SpringBoard's. The only path that actually reaches the SpringBoard alert window. -3. **`ios_tap(label='Allow')` / `ios_tap(label="Don't Allow")`** (deleted in 16.0) — the *coordinate fallback* of the v15.x label-based tap could sometimes hit the alert if SpringBoard's window happened to be frontmost. - -In 16.0+, paths 1 and 3 don't work for `notifications`, leaving #2 as the only non-runner path — and it's now paywalled. Trial users have no way to dismiss the alert without an XCTest runner that auto-handles it via interruption monitors. The XCTest runner depends on `ios_start_session(backend='xctest')` succeeding, which depends on bug #3 being absent. - -So in 16.0+ on iOS 26.x, the dependency chain for trial users to even *get past first launch* is: - -``` -xctest runner deploys → registers interruption monitors → app launches → alert fires → monitor auto-dismisses -``` - -Any break in that chain (and #3 breaks it deterministically on iOS 26.2 right now) leaves the user stuck on the home screen with the alert, with no MCP-side recovery available unless they buy indie. - -#### Suggested fix - -Either: -- **(A) Move `ios_dismiss_first_launch_alerts` back to the trial tier** — at minimum for `notifications`, since that's the OS-restricted case where pre-grant cannot work. It's not really a "premium" capability so much as a workaround for an Apple limitation, and the paywall converts that limitation into a hard block on first-time evaluation. Bad first-run experience. -- **(B) Add `ios_act({kind: 'tap', target: 'springboard_window', label: 'Allow'})`** to the unified action — explicitly scoped to system overlays. Free tier. Solves the underlying need. -- **(C) Document a free workaround** — `xcrun simctl notifyPost` or modifying TCC.db before launch. Currently neither works for "dismiss the alert that's already on screen." - -Operational workaround we used today: `xcrun simctl uninstall && xcrun simctl install` to clear the pending alert state, then rely on the xctest runner's interruption monitors when the runner can be deployed. - ---- - -### 3.3 XCTest "Unknown application: 'org.thepalaceproject.palace'" on iOS 26.2 - -#### Symptom - -Verbatim from runner stdout (`Test-SpecterQARunner-2026.04.28_09-36-14.xcresult`): - -``` -2026-04-28 09:36:16.500 [SpecterQA] Registered 7 interruption monitors. -2026-04-28 09:36:16.500 [SpecterQA-XCTDebug] - Error (com.apple.dt.xctest.ui-testing.error) 10100, userInfo keys: (null): - Unknown application: 'org.thepalaceproject.palace' -2026-04-28 09:36:16.500 [SpecterQA-XCTDebug] - XCUIApplicationImplDepot: Creating XCUIApplicationImpl for identifier: - org.thepalaceproject.palace -2026-04-28 09:36:16.500 [SpecterQA-XCTDebug] - KVO change: checked status of Expect value of 'isApplicationStateKnown' - of Application 'org.thepalaceproject.palace' to be '1': fulfill=NO -2026-04-28 09:36:16.500 [SpecterQA-XCTDebug] - entering wait loop for 60.00s - with expectations: `Expect value of 'isApplicationStateKnown' of - Application 'org.thepalaceproject.palace' to be '1'` -``` - -The wait times out at 60 s, the test method exits, the in-sim HTTP server never binds, the parent MCP `_wait_for_health(90.0)` times out, the deploy is marked FAILED. - -#### Verified non-causes - -- **App is installed.** `xcrun simctl get_app_container 6C396179-… org.thepalaceproject.palace` returns the container path. SHA matches the build artifact. -- **App is registered with launchd.** `xcrun simctl spawn 6C396179-… launchctl list | grep palace` returns `10497 0 UIKitApplication:org.thepalaceproject.palace[2d29][rb-legacy]`. -- **Bundle id is correct.** `/usr/libexec/PlistBuddy -c "Print :CFBundleIdentifier" Palace.app/Info.plist` returns `org.thepalaceproject.palace`, exact case match. -- **Sim is correct.** xcresult `SimDevice: iPhone 17 Pro (6C396179-…, iOS 26.2, Booted)` — the same sim the app is installed on. -- **ParallelizationEnabled is false.** Patched per the v15.1.0 dogfood. xcresult does not show "Clone N of …" — runner uses the original sim. -- **Pre-launching Palace via `xcrun simctl launch` first** does not help. The runner still reports `Unknown application` immediately. - -#### Suspected cause - -LaunchServices on iOS 26.2 sims may be more aggressive about purging "unverified" app registrations between sim cold starts, or XCTest's app-resolution path now requires a different signal (entitlement, provisioning, or NSApplicationCategoryType in Info.plist) that wasn't required on 18.4. - -The smoking gun: the runner's `/tap` endpoint *works* via `cg_event_direct` (low-level Core Graphics event injection) — confirmed in this session, where I drove Palace through Add Library → A1QA selection → catalog load via direct curl. But `/elements`, `/source`, and `/screenshot` (all of which need XCTest's app handle) only ever return host simulator chrome (the hardware buttons: Action, Volume Up, Sleep/Wake at negative coordinates). The runner is alive, the HTTP server is up, the test process is healthy — it just never bound to the app. - -#### What this means for the runner code - -The 60 s `XCTWaiter` for `isApplicationStateKnown` is unhelpful — XCTest will never satisfy it under this failure mode. The wait should: - -1. **Time out faster** — 60 s of dead time is awful UX. -2. **Surface the underlying error** — log `Unknown application` *as an error*, not as a debug breadcrumb buried in 100 KB of `[SpecterQA-XCTDebug]` lines. -3. **Continue starting the HTTP server anyway**, with a degraded mode marker. `cg_event_direct` taps work without app binding — let `/tap` and `/screenshot` (via `xcrun simctl io screenshot`, not `XCUIScreen.screenshot()`) serve, while marking `/elements` as unavailable. - -#### Suggested patch sketch - -```swift -// runner/Sources/SpecterQARunner.swift, around line 88 -let appBindTimeout: TimeInterval = 15 // shrink from 60s - -NSLog("[SpecterQA] Attempting to bind XCUIApplication to '\(bundleId)' …") -injector.app.launch() -let bound = injector.app.wait(for: .runningForeground, timeout: appBindTimeout) - -if !bound { - NSLog("[SpecterQA] WARNING: XCUIApplication never reached running state " + - "for '\(bundleId)' within \(Int(appBindTimeout))s. " + - "Likely cause: XCTest 'Unknown application' on iOS 26+. " + - "Starting HTTP server in DEGRADED mode — /tap and /screenshot " + - "via simctl will work; /elements and /source will return 503.") - server.degraded = true -} -// proceed to start HTTP server regardless -``` - -This converts a "everything is broken" outcome into "tap works, queries don't, you can still drive coord-based flows" — which is exactly the vision-first happy path anyway. - -#### Test - -CI matrix should include iOS 26.0, 26.1, 26.2 against the same Palace bundle. Today this combo is silently broken; making it loudly degraded would surface the real Apple bug to Apple via xcrun stderr logging. - ---- - -## 4. Direct-runner success: vision-first model is sound - -To prove the design isn't the problem, I drove the auth journey halfway by **bypassing MCP and POSTing directly to `localhost:8222/tap` on the manually-deployed runner**, reading coords from `xcrun simctl io screenshot` PNGs: - -| Step | Mechanism | Result | -|---|---|---| -| 1. Tap Switch Library button | `curl POST /tap {"x": 30, "y": 75}` | Picker sheet opened | -| 2. Tap Add Library | `curl POST /tap {"x": 200, "y": 720}` | Full library list opened | -| 3. Locate A1QA Test Library | `curl /elements` returned cell at `(195, 346)` (when picker was visible — `/elements` returns the SpringBoard sheet correctly even though it returns chrome-only when Palace is foreground) | Bbox extracted | -| 4. Tap A1QA cell | `curl POST /tap {"x": 195, "y": 346}` | A1QA Test Library activated | -| 5. Wait for catalog | `xcrun simctl io screenshot` polling | Real catalog loaded with books: Animal Farm, Private, Later, Action Bible, Les Once, Epigenética | - -11 coord-based interactions. Zero runner deaths. The vision-first model **works**. The MCP integration just doesn't surface it through `ios_act` over the AX backend reliably for SwiftUI list cells. - ---- - -## 5. Recommended fixes, priority-ordered - -| Pri | Fix | Effort | Impact | -|---|---|---|---| -| **P0** | Auto-recover `RunnerProcess` from FAILED state in `deploy()` (§3.1) | 1 hour | Unblocks every subsequent retry after a transient deploy failure. Eliminates one MCP-restart round-trip per session. | -| **P1** | Faster timeout + degraded-mode HTTP startup when `XCUIApplication` never binds (§3.3) | 1 day | Converts iOS-26.2 silent failure into a visible "tap works, query doesn't" mode. Doesn't fix Apple's bug but stops it from killing the whole MCP path. | -| **P1** | Move `ios_dismiss_first_launch_alerts` (or at least its `notifications` path) back to trial tier (§3.2) | minutes | Restores trial-user first-run on iOS 26+. Otherwise 16.0 ships with a hard regression for unpaid evaluation. | -| **P2** | Make `ios_act` over the AX backend deliver hit-testable touches to SwiftUI list cells | 2–3 days | Today AX `ios_act` works for root-level UI but not for cells inside SwiftUI Lists. Forces fall-through to xctest runner (which is the iOS-26.2 broken path). | -| **P3** | File a radar with Apple for the `XCUIApplication "Unknown application"` regression on iOS 26.2 | 1 day | Long-term fix; SpecterQA should surface enough diagnostic to make this a clean Apple bug report (sysctl `kern.osversion`, the bundle id, the LaunchServices dump). | - ---- - -## 6. CI tests SpecterQA should add - -```python -def test_runner_process_failed_state_auto_recovers(): - """A failed deploy must not block subsequent successful deploys.""" - rp = RunnerProcess.acquire("invalid-udid", 8222) - with pytest.raises(RunnerDeployError): - rp.deploy(bundle_id="invalid") - assert rp.state == RunnerState.FAILED - - rp2 = RunnerProcess.acquire(VALID_BOOTED_UDID, 8222) - rp2.deploy(bundle_id=KNOWN_GOOD_BUNDLE_ID) - assert rp2.state in (RunnerState.RUNNING, RunnerState.DEPLOYED) - - -def test_ios_observe_returns_proposed_shape(): - session = start_session(backend="ax") - obs = ios_observe() - assert "screenshot" in obs - assert "device_w" in obs and obs["device_w"] > 0 - assert "device_h" in obs and obs["device_h"] > 0 - assert "reliable_targets" in obs and isinstance(obs["reliable_targets"], list) - # Crucially: reliable_targets must be EMPTY for an app that doesn't - # set accessibilityIdentifier. We must not fall back to label-as-id. - if not app_uses_accessibility_identifiers(): - assert obs["reliable_targets"] == [], ( - "reliable_targets must only surface explicit " - "accessibilityIdentifier values, never synthetic labels" - ) - - -def test_ios_act_rejects_label_selectors(): - """The whole point of v16 is that label is gone.""" - with pytest.raises(SchemaError): - ios_act({"kind": "tap", "label": "Anything"}) - - -def test_xctest_runner_degrades_gracefully_when_app_bind_fails(): - """When XCTest's XCUIApplication never reports runningForeground — - e.g. iOS 26.2 'Unknown application' — the HTTP server should still - start and /tap should still work via cg_event_direct.""" - # Simulate by passing a bundle id that's installed but not registered - # with LaunchServices. The runner should: - # (a) log a clear WARNING (not buried in XCTDebug noise) - # (b) bind port 8222 within 30s (not hang for 60s) - # (c) serve /health 200 and /tap 200, with degraded=true in /health - # (d) return 503 with descriptive message on /elements - - -def test_ios_dismiss_first_launch_alerts_works_in_trial_tier_for_notifications(): - """Notifications cannot be pre-granted on iOS 26+, and ios_act - cannot reach SpringBoard windows. Removing the trial-tier path - blocks every fresh-install flow on iOS 26+.""" - set_license_tier("trial") - boot_fresh_sim_with_pending_notification_alert() - result = ios_dismiss_first_launch_alerts(decline=False, permissions=["notifications"]) - assert result["dismissed"] >= 1, ( - "Trial users on iOS 26+ have no other path to clear the alert" - ) -``` - ---- - -## 7. Companion artifacts on this Mac - -- `/Users/mauricework/Library/Developer/Xcode/DerivedData/SpecterQARunner-azafnfryerzziucibuetssnvxdpa/Logs/Test/Test-SpecterQARunner-2026.04.28_09-36-14--0400.xcresult` — full xcresult bundle for the `Unknown application` failure (§3.3). Contains the verbatim `[SpecterQA-XCTDebug]` log, the 60 s `XCTWaiter` trace, and the `SimDevice` confirmation that the runner targeted the right sim. -- `/tmp/v16-after-runner.png`, `/tmp/v16-a1qa-loaded.png`, `/tmp/v16-direct-tap.png` — sequential screenshots of the half-completed auth journey via direct curl. Demonstrates `cg_event_direct` taps work end-to-end through SwiftUI lists when the runner is alive (§4). -- `/tmp/v16-elems.json`, `/tmp/v16-elems2.json` — `/elements` responses showing simulator-chrome-only output (§3.3) vs the SpringBoard sheet hierarchy when Palace is backgrounded. - -Happy to do a 16.0.0a2 dogfood once §3.1 + §3.2 + §3.3 are addressed. The design is right — let's land the plumbing. diff --git a/.specterqa/dogfood/v16.0.0a2-maurice.md b/.specterqa/dogfood/v16.0.0a2-maurice.md deleted file mode 100644 index 11dfc99..0000000 --- a/.specterqa/dogfood/v16.0.0a2-maurice.md +++ /dev/null @@ -1,662 +0,0 @@ -# SpecterQA iOS v16.0.0a2 — Palace Dogfood Report - -**Date:** 2026-04-28 -**Reporter:** Maurice Carrier (maurice.carrier@synctek.io) -**Follow-up to:** `v16.0.0a1-maurice.md` -**Verdict:** **Hard blocker. Not dogfoodable on iOS 26.0 in current state.** -v16.0.0a2 cannot deploy a working runner on a clean machine — and the failure -is silent at every layer above the deploy. - ---- - -## TL;DR for the SpecterQA team - -1. On iOS 26.0 / iPhone 12 / Xcode 16, **`ios_start_session` cannot deploy a - working xctest runner.** xcodebuild spawns, runs briefly, exits, takes the - sim down with it, never binds `:8222`. `ios_session_status`, - `ios_start_session`, and `/health` (when an old runner is still around) - all report `ok`/`healthy` regardless. -2. **My initial smoke succeeded only because of orphan MCP daemons holding - the port from prior sessions.** As soon as the orphans were killed, every - subsequent start failed in the way described above. This is the most - important finding in this report — please skim §"Orphan piggyback" first. -3. The "vision-first" experience also has two design-level plumbing issues - independent of the deploy bug: `ios_observe` payload always exceeds the - MCP token cap, and the coord space between `ios_observe` and the runner - is inconsistent. - -I recommend **a3 ship-blockers**: -- (P0) deploy actually works on iOS 26.0 + healthcheck refuses to lie when - the runner exited -- (P0) screenshot is delivered as an MCP image content block (or a path to - read), not inline base64 in JSON -- (P0) coord space normalized - -The "vision-first" strategic redirection is sound; the runner-lifecycle and -delivery layer it depends on isn't ready. - ---- - -## Setup - -- specterqa-ios `16.0.0a2` (clean `pip install --pre --upgrade specterqa-ios`) -- MCP server reloaded via `/mcp` after upgrade, per - `feedback_pip_upgrade_mcp_restart.md` -- Mac: macOS 25.0 (Darwin 25), Xcode 16 -- iPhone 12, iOS 26.0, UDID `31CF5C43-DD55-4889-B3B2-9A6810B4E98F` - (Palace's preferred SpecterQA sim — chosen specifically to avoid the - iOS 26.2 LaunchServices "Unknown application" bug a2 partially fixed) -- Palace develop @ `313275b6b` (PR #880 PalaceKeychain merged), built fresh - in `.claude/worktrees/specterqa-develop` via `xcodebuild build`, - `xcrun simctl install`'d to the iPhone 12 sim -- A1QA Test Library signed in; Catalog tab visible at session start -- Smoke target: tap "My Books" tab from Catalog screen, observe page change - ---- - -## Tools attempted - -`ios_doctor`, `ios_start_session` (with `backend=xctest` and `backend=auto`, -with and without `auto_recover=true`), `ios_stop_session`, `ios_observe` -(`quality=standard` and `quality=thumbnail`), `ios_act` (tap with pixel -coords / point coords / `normalized=true`), `ios_session_status`, -`ios_app_state`, `ios_logs`. Plus direct `curl http://localhost:8222/health` -and `curl POST :8222/tap` to bypass the MCP layer. - ---- - -## Findings - -### 🔴 P0-1 — `ios_start_session` cannot deploy a working runner on iOS 26.0; failure is silent at every layer - -**This is the worst bug in the report. It supersedes most of what I wrote -in my a1 dogfood about "FAILED state caching" — that was a symptom; this is -the root.** - -**Repro (with clean state — single MCP daemon, freshly booted sim, fresh -Palace.app installed):** - -``` -# Initial state — clean -$ pgrep -fl specterqa-ios-mcp # 1 daemon -$ xcrun simctl shutdown all -$ xcrun simctl boot 31CF5C43-DD55-4889-B3B2-9A6810B4E98F -$ xcrun simctl list devices booted | grep 31CF5C43 - iPhone 12 (31CF5C43-DD55-4889-B3B2-9A6810B4E98F) (Booted) ✓ -$ pgrep -fl xcodebuild # nothing -$ curl -sS http://localhost:8222/health # connection refused (good — no orphan) -``` - -```python -# Now call start_session -ios_start_session(bundle_id="org.thepalaceproject.palace", - device_id="31CF5C43-...", - backend="auto") -# Returns: -{"status": "ok", "device_type": "simulator", - "target_udid": "31CF5C43-...", "port": 8222, - "runner_url": "http://localhost:8222", - "clone_udid": "31CF5C43-..."} -``` - -``` -# Within seconds: -$ pgrep -fl xcodebuild -22777 /Applications/Xcode.app/.../xcodebuild test-without-building -xctestrun - /Users/mauricework/.specterqa/runner-build/.../iphonesimulator26.2-arm64-x86_64.xctestrun - -destination id=31CF5C43-... -$ xcrun simctl list devices booted | grep 31CF5C43 # nothing — sim is dead -$ curl -sS http://localhost:8222/health # connection refused - -# 60s later, polling :8222/health every 3s with `until curl ...; do sleep 3; done`: -$ until curl -sS http://localhost:8222/health 2>/dev/null | grep -q status; \ - do sleep 3; done -# Never returns. PID 22777 has exited. Sim still shutdown. -``` - -```python -# But the daemon thinks it's healthy: -ios_session_status() -# {"status": "healthy", "deploy_id": null, "started_at": null, -# "udid": null, "elapsed_ms": 0, "error": null} -``` - -So the call chain is: - -1. `ios_start_session` returns success with a `runner_url` -2. xcodebuild spawns and runs briefly -3. xcodebuild exits without binding `:8222` (test target probably failed to - launch on iOS 26.0) -4. The act of running xcodebuild appears to shut down the sim — sim was - `Booted` immediately before the call, gone immediately after -5. `ios_session_status` reports `"healthy"` with all fields `null` and - `elapsed_ms: 0` -6. Any subsequent `ios_observe` / `ios_act` either errors (current daemon - has nothing to talk to) or, worse, returns success with stale state if a - prior runner is still in memory (see §"Orphan piggyback") - -**Source-level note** — I grepped the installed package: - -- `mcp/server.py:3663` has a `_ensure_sim_booted` helper that does check - `simctl list devices --json` and attempts boot. **It's wired into - `handle_app_relaunch` (line 3705), not into `handle_start_session`.** -- `mcp/server.py:1197` has F3 logic that re-boots the sim *after* - `ios_stop_session` ("keep sim Booted"). So the daemon has the building - blocks; they're just not on the start_session path. -- `session_manager._deploy_runner` calls `RunnerProcess.deploy()` per the - a2 changelog. The a2 fix was for FAILED-state caching; nothing was added - to verify the runner's HTTP server actually came up before returning - `ok`. -- `mcp/server.py:556`'s `simctl list devices --json` helper exists and is - used by other tools — `/health` could call it on every poll instead of - trusting cached `sim_pid`. - -**Suggested fixes for a3 (in priority order):** - -1. **Don't return `status:ok` from start_session until the runner is healthy.** - Either block on `_wait_for_health(timeout)` (v13.x behavior) or return - `{status: "deploying", deploy_id, ...}` and require a follow-up poll - (current docstring describes this as the `wait=False` path; default - `wait=True` should actually wait). -2. **Boot the sim if it's shutdown.** Wire `_ensure_sim_booted` into - `handle_start_session`. The capability exists. -3. **`/health` should re-validate the sim is booted on each call.** Replace - cached `sim_pid` with a live `simctl list devices booted | grep ` - check. If the sim is dead, return `503 {"status":"sim_down","udid":.., - "suggested_fix":"xcrun simctl boot && ios_start_session"}`. The - a2 changelog promises this for `/elements`/`/source` in degraded mode - ("a3 will add an explicit 503 + degraded marker") — `/health` needs it - too. -4. **`ios_session_status` should return `error` when the deploy failed.** - Currently it returns `"healthy"` with all-null fields — distinguishable - only by checking that `udid` is null. A vision agent will not check. - Either populate the fields when healthy or set status to - `"failed"`/`"never_deployed"` when the daemon has no live runner. -5. **xcodebuild test-failure logs are silent.** The xctest deploy likely - has output explaining why it died on iOS 26.0 — capture it and surface - via `ios_session_status.error` / `RunnerDeployError.suggested_fix`. - ---- - -### 🔴 P0-1.5 — Orphan MCP daemons silently piggyback on each other's runners - -**This is what made the bug confusing during my smoke and is a related -lifecycle gap.** - -When I started the smoke, three SpecterQA MCP daemons were running: - -``` -$ ps -o pid,etime,command -p 3535,11926,68913 - PID ELAPSED COMMAND -68913 56:47 specterqa-ios-mcp -11926 05:53:26 specterqa-ios-mcp - 3535 17:57 specterqa-ios-mcp ← current (matched my /mcp reconnect) -``` - -Two of those (11926 ≈ 6 hours old, 68913 ≈ 1 hour old) were orphans from -prior Claude Code sessions / `/mcp` reconnects that didn't terminate the -old daemons. Each was holding state for a runner that had been deployed -during its own lifetime, and one of them was still bound to `:8222` with a -stale `sim_pid: 87357` (the PID was no longer alive). - -For the first ~30 minutes of my smoke, every `ios_start_session` / -`ios_observe` / `ios_act` looked like it worked because **the current -daemon's calls were either bypassing into the orphan's HTTP server, OR my -direct `curl` to `:8222` was talking to the orphan's runner which still -served cached frames.** I had no way to tell from inside the MCP tools. - -After I `kill 11926 68913`, every smoke call failed in the way described -in P0-1, and `:8222` went silent. - -**Root issue:** there's no reconciliation between MCP daemon instances and -the runner lifecycle. A daemon can hold a port-bound runner long after its -parent process disconnected. A new daemon will silently use the old port -with stale state. None of the tools tell the agent it's reading from a -ghost. - -**Suggested fixes for a3:** - -- **Daemon startup should adopt or evict.** When a daemon comes up and - finds `:8222` already bound, it should either probe `/health` and adopt - the runner if responsive, or force-kill and redeploy. Currently the new - daemon ignores existing port state. -- **MCP server should expose process ownership.** A `/health` field like - `daemon_pid` makes orphan-vs-current distinguishable from the agent - side. (Today the agent has to `pgrep` and reason about it, which is - what surfaced this for me.) -- **`specterqa-ios mcp restart` (or equivalent) should kill orphans.** A - `pgrep specterqa-ios-mcp | grep -v $$ | xargs kill` step at startup - would prevent silent piggyback. The Palace memory note - `feedback_pip_upgrade_mcp_restart.md` already tells users to `/mcp` after - pip upgrade — a3 could make that step idempotent. - ---- - -### 🔴 P0-2 — `ios_observe` returns frozen byte-identical screenshots when the runner is stale (whether dead-sim or orphan-piggyback) - -**Repro:** with the orphan-piggyback configuration described above -(orphan still bound to `:8222`, sim has since shutdown, three daemons live), -call `ios_observe(quality="thumbnail")` 8 times across 3 separate sessions -and 6+ minutes of activity (including `ios_act` taps in between): - -``` -extracted-screenshot-1.jpg md5 9de96fa9b6766b5b949badcd4f42584f -extracted-screenshot-2.jpg md5 9de96fa9b6766b5b949badcd4f42584f -extracted-screenshot-3.jpg md5 9de96fa9b6766b5b949badcd4f42584f -extracted-screenshot-4.jpg md5 9de96fa9b6766b5b949badcd4f42584f -extracted-screenshot-5.jpg md5 9de96fa9b6766b5b949badcd4f42584f -extracted-screenshot-6.jpg md5 9de96fa9b6766b5b949badcd4f42584f -extracted-screenshot-7.jpg md5 9de96fa9b6766b5b949badcd4f42584f -extracted-screenshot-8.jpg md5 9de96fa9b6766b5b949badcd4f42584f -``` - -All 8 byte-identical. Meanwhile `captured_at` advances correctly: - -``` -"captured_at": "2026-04-28T19:13:23.240962+00:00" -"captured_at": "2026-04-28T19:16:03.273900+00:00" -"captured_at": "2026-04-28T19:19:47.610661+00:00" -... -``` - -So the runner is **fabricating fresh capture timestamps onto a frozen -pixel buffer**. Either: -- The orphan runner's screencap path is broken when the underlying sim is - gone, and it serves the last successful frame on every call without - detecting the failure, or -- The runner Swift `/observe` route caches the frame and returns the cached - copy when subsequent screencap fails - -For a vision-first agent this is the **worst** failure mode possible. Every -observe LOOKS fresh (timestamp advances), but the pixels never change. The -agent's reasoning loop completely breaks: it dispatches an action, observes -"no change", and either gives up or escalates incorrectly. This is far -worse than a hard error — at least an error is loud. - -**Caveat I owe the team:** I cannot 100% rule out a third explanation — -that the app really WAS on the same Catalog screen the whole time (because -taps weren't reaching it, see P0-4) and the rendered pixels happened to be -identical. I think this is unlikely — we'd have seen at least the keyboard -appear, focus rings change, animations, etc. — but I can't isolate cleanly -without a confirmed working tap, and I never had one in this smoke. **The -fix story is the same either way:** observe should refuse to serve a -buffer when the sim is dead. - -**Suggested fix:** -- Capture screenshot live every call. If `simctl io screenshot` - fails (or the sim is shutdown), refuse with a structured error rather - than returning a stale buffer. -- If a cache layer is needed for performance, include the source frame's - capture time in the runner response and refuse to serve frames older than - e.g. 1 second. - ---- - -### 🔴 P0-3 — `ios_observe` payload exceeds the MCP token cap at *every* quality level - -**This is independent of the deploy bug — it would be a blocker even with -a perfectly working runner.** - -**Repro (orphan piggyback was alive when I measured these — the orphan WAS -returning a fresh-shaped JSON, just with stale pixels, so the size measure -is real):** - -``` -ios_observe(quality="standard") → 188,595 chars (≈188 KB) → ❌ exceeds MCP cap -ios_observe(quality="thumbnail") → 64,295 chars (≈64 KB) → ❌ exceeds MCP cap -ios_observe(quality="full") → larger; not tested -``` - -The shipped wire format is: - -```json -{"result": "{\"app_state\": {...}, - \"captured_at\": \"...\", - \"device_h\": 2622, - \"device_w\": 1206, - \"reliable_targets\": [], - \"screenshot\": \"\"}"} -``` - -Note `result` is a **stringified** JSON inside the outer envelope, so the -base64 screenshot is double-encoded effectively. JSON-encoded base64 has -~1.4× overhead vs raw bytes; the outer string-escape adds more. Even the -25%-scaled thumbnail (47 KB on disk → 64 KB inline) is too big. - -**Mandatory workaround for every observe call (what I had to do):** - -```bash -# Tool returns error with file path -jq -r '.result | fromjson | .screenshot' \ - /Users/.../tool-results/mcp-specterqa-ios-ios_observe-XXX.txt \ - | base64 -d > /tmp/x.jpg -# Then use Read tool on /tmp/x.jpg to actually see the image -``` - -That's two extra tool round-trips per observe (Bash + Read) vs v15's single -`ios_screenshot` saved-to-path flow. Vision-first design has a plumbing -problem — the design assumes the agent can see the screenshot, but the MCP -text-result path can't carry it at any vision-useful resolution. - -**Suggested fix (in priority order):** - -1. **Return the screenshot as an MCP image content block, not inline base64 - in `result.screenshot`.** Anthropic SDK + Claude Code clients render - image blocks natively as multimodal content; the agent gets the pixels - into its vision input directly without a Read round-trip. This is what - the `mcp__claude_ai_*` and `mcp__computer-use__screenshot` patterns do - and it's the right answer for vision-first. -2. **Failing (1):** write the screenshot to a path - (`/tmp/specterqa-observe-.jpg`) and return - `{"screenshot_path": "/tmp/...", "device_w": ..., "device_h": ..., - "reliable_targets": [...]}` instead of inline bytes. Agent uses its - native Read tool on the path. This is what v15 effectively did via - `ios_screenshot`'s save path. -3. The MCP cap is around 25 KB. To fit base64 inline you'd need - `quality < 10%`, which destroys vision usefulness — not an option. - ---- - -### 🔴 P0-4 — Coord-space ambiguity: `ios_observe` reports JPEG pixel dims, runner reports points; `ios_act` doesn't document which one to use - -**Three different numbers for the same device:** - -| Source | `device_w × device_h` | -|---|---| -| `ios_observe` JSON | `1206 × 2622` | -| `:8222/health` | `390 × 844` | -| iPhone 12 native | `1170 × 2532` (=`390 × 844 × 3` scale) | - -The `1206 × 2622` is none of those — it's the **resized JPEG output** -dimension at the active quality. (At thumbnail 25%, the on-disk JPEG is -`301 × 655`; full-scale would be `1204 × 2620 ≈ 1206 × 2622`. The -fractional values are quality-dependent rounding artifacts.) - -`ios_act` tool description: *"the agent picks (x, y) from the ios_observe -screenshot. ... `normalized=true` treats x/y in [0.0, 1.0] as fractions of -device dimensions."* — implies the screenshot's pixel coords, but the -runner clearly hit-tests in points. - -**What I tried (against the orphan runner, before realizing it was stale):** - -```python -# Tab bar "My Books" — image (301×655) shows it at roughly (110, 620) -ios_act({"kind":"tap", "x":928, "y":512}) # screenshot pixel coords ❌ -ios_act({"kind":"tap", "x":440, "y":2480}) # device pixel coords ❌ -ios_act({"kind":"tap", "x":130, "y":804}) # logical points ❌ -ios_act({"kind":"tap", "x":0.366, "y":0.946}, normalized=True) ❌ -# Direct curl bypassing MCP: -curl POST :8222/tap '{"x":130, "y":804}' -# {"success": true, "mode": "cg_event_direct", "x": 130, "y": 804} -``` - -In every case the response was `{"status":"ok", "tapped":"", "x":..., "y":...}` -with empty `tapped`. None produced visible app state change — but I was -arguing with the orphan's stale frame, so I cannot say definitively whether -"tapped:''" reflects "no element under coords" or "tap dispatched into the -void." - -**Suggested fixes for a3:** - -- **One coord space everywhere. Pick logical points** (`390 × 844` here). - That's what UIKit hit-tests against and what the runner already uses - internally. Document: "all coordinates are in logical points, matching - what `:8222/health` reports." -- **`ios_observe` should report `device_w/device_h` in points.** The - resized JPEG dimensions can be a separate `screenshot_w/screenshot_h` - field with an explicit `screenshot_scale` — agents who want pixel-level - reasoning have it; agents who want to act have one obvious coord space. -- **`tapped: ""` is ambiguous** — distinguish "dispatched, no element - resolved at coords" (`tapped: null` or `hit_test: "miss"`) from - "dispatched onto an element with no `accessibilityLabel`/`identifier`" - (`tapped: { kind: "miss" }` vs `{ kind: "unidentified", frame: ... }`). - Today both look the same and both look like success. - ---- - -### 🟡 P1-1 — `ios_session_status` returns `"healthy"` with all-null fields when the daemon has no live runner - -**Already covered in P0-1 root-cause discussion, surfacing here as its own -finding because it's the single most useful diagnostic an agent has.** - -```python -ios_session_status() -# {"status": "healthy", "deploy_id": null, "started_at": null, -# "udid": null, "elapsed_ms": 0, "error": null} -``` - -I observed this: -- After `ios_stop_session`, before any subsequent start -- After a failed `ios_start_session` whose runner died silently -- On a daemon that had never deployed a runner in its life - -The schema fields exist but aren't populated, and `status: "healthy"` is -the most misleading possible value — it's literally what an agent would -poll for. - -**Suggested fix:** distinguish `idle` (clean, no deploy attempted) from -`healthy` (live runner verified) from `degraded` (runner alive but sim is -gone) from `failed` (last deploy attempt errored). Populate `udid`, -`started_at`, `elapsed_ms`, `error` accordingly. The states already exist -in the runner_process.py state machine per the a2 changelog — they just -aren't exposed. - ---- - -### 🟡 P1-2 — `ios_logs` returns `count: 0` on a dead sim instead of erroring - -```python -ios_logs(seconds=30) -# {"count": 0, "logs": [], "summary": {"total_entries":0, "errors_count":0, -# "by_level":{}, "by_subsystem":{}}, "source": "simctl"} -``` - -Same for `seconds=15`, with/without `pattern` filter, with/without -`level=error`. Palace logs to OSLog actively (verified in v15.x). The -underlying `simctl log show` against a shutdown sim returns no entries, -and the tool returns 0 entries silently rather than erroring. This masks -the underlying dead-sim problem instead of surfacing it. - -**Suggested fix:** when `source: "simctl"` and the target sim is shutdown, -return `{"error": "sim_shutdown", "udid": ..., "suggested_fix": -"xcrun simctl boot "}` rather than `count: 0`. The other tools -(`ios_observe`, `ios_act`) also need this same dead-sim detection — fixing -it once in a shared helper would solve P0-1, P0-2, P1-1, and this. - ---- - -### 🟡 P1-3 — `backend="xctest"` returns `"Requested backend 'xctest' is not available on this system"` while `backend="auto"` deploys xctest fine - -**Repro (single MCP daemon, same machine, same xctestrun on disk):** - -```python -ios_start_session(bundle_id=..., device_id="31CF5C43-...", backend="xctest") -# {"error": "Requested backend 'xctest' is not available on this system."} - -# Same arguments, only backend changed: -ios_start_session(bundle_id=..., device_id="31CF5C43-...", backend="auto") -# {"status": "ok", "device_type": "simulator", ..., -# "port": 8222, "runner_url": "http://localhost:8222"} -# (xcodebuild test-without-building actually spawns this time — -# it then dies, but it spawns) -``` - -So the daemon's `BackendSelector.choose()` *can* deploy xctest (when invoked -via `auto`), but the explicit `backend="xctest"` path fails the availability -check. Either: -- The availability check is checking something different from what `auto` - uses (different runner-build path resolution, missing capability bit, - etc.), or -- `BackendSelector.choose()` has fallback logic that the explicit path - doesn't replicate - -**Suggested fix:** unify the paths. The explicit `backend="xctest"` should -take exactly the same code path as `auto` choosing xctest. If something is -genuinely unavailable, the error message should name it. - ---- - -### 🟢 P2-1 — `reliable_targets` is always empty for Palace - -Palace devs don't set `accessibilityIdentifier` on most elements (this is -on us, not SpecterQA). Worth noting in v16's design: in real codebases the -"reliable_targets opt-in semantic helper" is essentially nonexistent. - -Vision-first **must** work without it — Phase B's promise that "the -screenshot is the truthful representation" is correct in theory, but only -works if vision actually works (see P0-2 / P0-3 / P0-4). The -`reliable_targets` shape is fine to keep as a future-proofing affordance, -but don't depend on it for any agent flow. - ---- - -### 🟢 P2-2 — `simctl status_bar override` masks freshness signals for vision agents - -Palace freezes the status bar to `2:48` (typical iOS demo override). -Vision agents comparing two screenshots can't use the time bar to detect -"did the screen actually update?" — pixels in the status bar always match. - -This isn't a SpecterQA bug per se, but it's a bad interaction with -vision-first. During my smoke I almost concluded "screen never changed -because tap didn't fire" when really the screen wouldn't have shown a time -change anyway. The `captured_at` field in `ios_observe` is the right -freshness signal — but it's in JSON metadata, not in the screenshot the -vision model sees. - -**Suggested fix (light touch):** -- Recorder/setup template advises NOT freezing `status_bar` for active - sessions, OR -- Render a small, stable `captured_at` overlay into the screenshot - (corner, low-contrast, well outside any tap target), so the agent has - a freshness bit it can rely on visually. - ---- - -## What did work - -- `pip install --pre --upgrade specterqa-ios` clean install: ✅ -- MCP server reload via `/mcp`: ✅ -- `ios_doctor`: ✅ — clean output, identified booted sims correctly. Note - *doctor* DID see the iPhone 12 was Booted in its checks, so the daemon - has a working sim-state probe; it's just not used elsewhere. -- `ios_stop_session`: ✅ always returns cleanly, even when there's nothing - to stop. Possibly *too* clean — see P1-1 (status:healthy after stop). -- `auto_recover=true` accepted as a parameter (no schema error). I cannot - confirm it actually triggers — never observed a recovery — but it's - wired up at the API surface. - ---- - -## What did NOT work (cumulative, for clarity) - -| | Result | -|---|---| -| `ios_start_session` deploys a working runner (xctest, iOS 26.0) | ❌ | -| `ios_start_session` blocks until runner is healthy | ❌ | -| `ios_start_session` boots the sim if it's shutdown | ❌ (capability exists but isn't wired in) | -| `ios_start_session` fails loudly when deploy fails | ❌ (returns `status:ok` with bad URL) | -| `/health` reflects current runner reality | ❌ (caches stale `sim_pid`) | -| `ios_session_status` reflects current daemon reality | ❌ (returns `healthy` with all-null fields) | -| `ios_observe` returns inline screenshot under MCP cap | ❌ (188K standard, 64K thumbnail; cap ≈ 25K) | -| `ios_observe` returns fresh frame on dead sim | ❌ (returns frozen byte-identical frame, fresh `captured_at`) | -| `ios_observe.device_w/h` matches `:8222/health` device dims | ❌ (`1206×2622` vs `390×844`) | -| `ios_act tap` produces visible state change | ❌ (uncertain — never ran on a confirmed live runner) | -| `ios_act` distinguishes "no element at coords" from "no label" | ❌ (`tapped:""` either way) | -| `ios_logs` errors when sim is shutdown | ❌ (returns `count:0`) | -| `backend="xctest"` works the same as `backend="auto"`'s xctest path | ❌ (explicit returns "not available") | -| Orphan MCP daemons get reaped on new daemon startup | ❌ | -| Three concurrent MCP daemons cooperate sensibly on `:8222` | ❌ (silent piggyback on stale state) | - ---- - -## What I would test next, once a3 lands - -In rough priority order: - -1. **Deploy works on iOS 26.0** — single `ios_start_session(backend=xctest)` - on a freshly-booted iPhone 12, healthy `:8222` within 30s, all - `ios_session_status` fields populated. -2. **Tap-and-observe loop on a live runner** — Catalog → My Books → Catalog, - compare MD5 of observe frames to confirm pixel diff, confirm `tapped:` - reflects the resolved element. -3. **MCP image content block delivery** — verify a vision agent gets the - screenshot into its vision input on a single tool call, no Bash + Read - round-trips. -4. **Coord-space normalization** — pick a coord, confirm - `ios_observe.device_w/h` matches `:8222/health.device_w/h`, confirm - `ios_act` accepts those coords without normalization, document the - chosen unit (points). -5. **Orphan reaping** — `pkill -KILL specterqa-ios-mcp; specterqa-ios mcp` - should leave exactly one daemon and one bound port. Spawn a second, - confirm the second adopts/evicts. -6. **Sign-in flow on a live runner** — Lyrasis Reads basic auth, dismiss - first-launch alerts via `ios_dismiss_first_launch_alerts` (a2 restored - to trial tier — verify decline path). -7. **Type-into-text-field** — search bar in Catalog, send `key=return` - submit. v13.x rule was "never `ios_press_key('return')`" because it - crashed the session — does v16's `ios_act({kind:'key',name:'return'})` - crash too? -8. **Audiobook play/pause** — `ios_act({kind:'key', name:...})` — - PalaceAudiobookToolkit reacts to volume / remote events. -9. **Re-run all 25 `.specterqa/journeys/`** once Phase C ships replay v2 - and a migration tool exists for pre-v16 YAMLs. - ---- - -## Direct-runner findings (parking notes) - -- The `xcodebuild test-without-building` runner happily kept serving HTTP - on `:8222` for ~10 minutes after its target sim shut down (the orphan - case). The runner Swift process should detect sim-host disappearance - (XCTRunner IDE-session probe; periodic `simctl list booted` poll; - CGEvent dispatch failure feedback) and self-terminate, freeing the port - for the next session. -- Today's only cleanup is via `pgrep -f "xcodebuild test-without-building" - | xargs -r kill -9` (Palace's CLAUDE.md kill-stale-runners step). That - works, but it's a foot-gun if multiple sessions exist (e.g. a parallel - agent on iPhone 16 Pro will lose its runner too). -- The runner has a `:8222/tap` route that responds - `{"success": true, "mode": "cg_event_direct"}` with no actual - cg_event_direct verification — Phase B's "coordinate-based `/tap` - continues to work in degraded mode" is a guarantee that depends on - CGEvent succeeding, which the current code doesn't check. - ---- - -## Strategic note (optional reading) - -The vision-first redirection in v16 is the right call — I stand by my -v15.2.0 direction proposal that motivated it. But Phase A (vision -primitives) shipped before Phase B (selector-layer demolition) was -finished AND while the runner-lifecycle layer regressed below v15.2.0's -already-shaky baseline. The result is an alpha where: - -- The strategic primitives (`ios_observe`, `ios_act`) **can't be exercised - end-to-end** because the layer beneath them (RunnerProcess deploy, - HTTP server, sim lifecycle) doesn't hold. -- The fallback path (legacy `ios_tap`/`ios_screenshot`/`ios_elements`) - was deleted from the MCP surface. -- The migration path for pre-v16 YAMLs is "Phase C, deferred." - -A v16.0.0a3 that **fixes the lifecycle layer first** and ships the new -primitives on top would be far more dogfoodable than what's in a2. If -that pushes Phase B/C into a4 or v16.0.1, that's fine — getting deploy + -vision delivery + coord space right is the whole bet. - ---- - -## Verdict - -**a2 is a hard blocker for any Palace dogfood work.** Three options I'm -weighing on my side: - -1. Wait for a3. -2. Roll back to v15.2.0 for actual journey runs (the AX-tree path was - stable enough with the ParallelizationEnabled patch). -3. Try iOS 18.4 (iPhone 16 Pro, harness-pool default) to see if the deploy - bug is iOS 26-specific. Quick test, useful data for SpecterQA, doesn't - unblock develop testing today. - -For now I'm staying on a2 + writing this report; not rolling back. - -— mc diff --git a/.specterqa/dogfood/v16.0.0a3-maurice-triage.md b/.specterqa/dogfood/v16.0.0a3-maurice-triage.md deleted file mode 100644 index e320998..0000000 --- a/.specterqa/dogfood/v16.0.0a3-maurice-triage.md +++ /dev/null @@ -1,666 +0,0 @@ -# SpecterQA iOS v16 — Fix List from Palace Dogfood - -**Date:** 2026-04-28 -**Author:** Maurice Carrier (maurice.carrier@synctek.io), via Palace iOS dogfood -**Inputs:** `v16.0.0a1-maurice.md`, `v16.0.0a2-maurice.md` -**Validated through:** `16.0.0a3` (Xcode 26.3, macOS 25, iPhone 12 / iOS 26.0 -and iPhone 17 Pro / iOS 26.2 simulators) - -This is a triaged engineering plan synthesized from the a1/a2 dogfood -reports and the a3 verification I just ran. It's organized by priority and -written so each item is a self-contained ticket: context, repro, suggested -fix shape, failing-test sketch, LOC estimate, and a "who should take it" -recommendation. - -I'm offering to take some of these as PRs against the SpecterQA repo if -you'd like the help — flagged inline. Items I think are better for your -team are flagged accordingly (Swift runner depth, strategic calls, or work -that needs the iOS 26.3 verification setup I don't have locally). - ---- - -## ✅ Confirmed fixed in a3 (closure) - -For closure, before the open items. These all came from the a2 report: - -| Finding | Status in a3 | Verified how | -|---|---|---| -| P0-1.5 — orphan MCP daemons silently piggyback | ✅ fixed | After every `/mcp` reload, `pgrep -fl specterqa-ios-mcp` shows exactly 1 daemon. | -| P1-1 — `ios_session_status` lies "healthy" with all-null fields | ✅ fixed | Now returns `degraded` with real `error` string and `daemon_pid`. Distinguishes idle / deploying / healthy / degraded / failed. | -| P0-3 — `ios_observe` payload exceeds MCP cap | ✅ fixed (per changelog; not live-verified due to deploy failure on my Mac) | `ios_observe` returns `screenshot_path` instead of inline base64. | -| P0-4 — coord-space ambiguity | ✅ fixed (per changelog; not live-verified) | `device_w/h` are logical points; pixel dims surfaced separately as `screenshot_w/h`. | - -The changelog claim for P0-3/P0-4 is convincing — I'd verify them -end-to-end on my next session once iOS 26.3 runtime is installed (see -§Environment). - ---- - -## 🔴 P0-A — `ios_start_session` split-path: default returns `status:ok` on failed deploy; only `auto_recover=true` surfaces the structured error - -**This is the highest-impact open bug.** a3 ships a perfectly good -`runner_deploy_health_timeout` structured error with the iOS-26-SDK -mismatch hint and a full xcodebuild stderr trace — but the agent only sees -it when it passes `auto_recover=true`. The default path silently lies. - -### Repro (verified in a3, 2026-04-28) - -```python -# Default — auto_recover defaults to False -ios_start_session(bundle_id="org.thepalaceproject.palace", - device_id="6C396179-...", - backend="xctest") -# Returns: -{"status": "ok", "device_type": "simulator", - "target_udid": "6C396179-...", "port": 8222, - "runner_url": "http://localhost:8222"} -# But: -ios_session_status() -# {"status": "degraded", -# "error": "runner /health probe failed: ConnectionError: ...", -# "daemon_pid": 6120, ...} -``` - -```python -# Same call, only auto_recover added: -ios_start_session(bundle_id="org.thepalaceproject.palace", - device_id="6C396179-...", - backend="xctest", - auto_recover=True) -# Returns: -{"error": "runner_deploy_health_timeout", - "message": "The XCTest runner did not respond to /health within 90 seconds... - common cause on iOS 26.0: SDK mismatch...rebuild the runner...", - "udid": "6C396179-...", - "port": 8222, - "retryable": false, - "underlying_error": "RunnerDeployError: xcodebuild exited with code -6 ... - ASSERTION FAILURE in DVTiPhoneSimulator.m:1856 ..."} -``` - -Same daemon, same arguments, same machine — opposite tool surface -behavior. - -### Suggested fix - -`mcp/server.py::handle_start_session`. There's almost certainly a branch -that gates the structured-error response on `auto_recover`. The structured -error should fire **unconditionally** when deploy fails. `auto_recover` -should control whether the daemon **attempts recovery** (boot sim, retry -deploy), not whether it **reports the failure honestly**. - -```python -# Pseudocode of the desired shape -def handle_start_session(..., auto_recover=False): - if not _ensure_sim_booted(udid, attempt_boot=auto_recover): - return {"error": "sim_boot_failed", ...} # already structured - try: - runner = deploy_runner(...) - return {"status": "ok", ...} - except RunnerDeployError as e: - # Currently: this branch is conditional on auto_recover. - # Fix: this branch is unconditional. auto_recover only changes - # whether we *retry* deploy before raising. - return { - "error": "runner_deploy_health_timeout", - "message": ..., - "underlying_error": str(e), - "retryable": False, - } -``` - -### Failing-test sketch - -```python -# tests/test_handle_start_session.py -def test_start_session_returns_structured_error_when_deploy_fails(monkeypatch): - """auto_recover=False (default) must surface RunnerDeployError, not status:ok.""" - monkeypatch.setattr( - session_manager, "_deploy_runner", - lambda *a, **kw: (_ for _ in ()).throw( - RunnerDeployError("simulated xcodebuild crash", code=-6) - ), - ) - result = handle_start_session( - bundle_id="com.test.app", - device_id="ANY-UDID", - backend="xctest", - # auto_recover NOT passed — default - ) - assert "error" in result, f"expected error, got {result}" - assert result["error"] == "runner_deploy_health_timeout" - assert "underlying_error" in result - assert result.get("retryable") is False -``` - -### Estimate - -- LOC: ~5–15 in `server.py` -- Test: ~30 LOC - -### Who - -**I'd take this.** Trivial scope, high impact, isolated to one function. - ---- - -## 🔴 P0-B — `ios_observe` may still return frozen pixels when runner is stale (deferred from a2 §P0-2; a3 changelog says "likely resolved by orphan reap") - -The a3 changelog notes: - -> P0-2 — `ios_observe` returns frozen pixels when runner is stale. Likely -> resolved by the orphan-daemon reaping (P0-1.5) — the cached-frame -> scenario depended on an orphan runner. If it persists in a3 dogfood, -> add a refuse-on-stale check to the runner Swift `/screenshot` route. - -I cannot verify in this dogfood pass because deploy never succeeded on my -Mac (see §Environment). The hypothesis — that the stale frames came -exclusively from the orphan runner serving its last successful capture — -is plausible. But the runner Swift `/screenshot` route should defend -against this regardless: a single daemon's runner can also be in a state -where its underlying sim has died (e.g., user power-cycles, simctl -shutdown from another process, runtime crash) but the runner Swift -process is still alive on Mac. - -### Suggested fix - -Two parts: - -**1. Runner Swift `/screenshot` route — refuse-on-stale** - -`runner/Sources/Routes/ScreenshotRoute.swift` (filename guessed from a2 -changelog's "23 per-route files under `runner/Sources/Routes/`"). - -```swift -// Pseudocode -func handle() -> Response { - let now = Date() - guard simIsResponsive(timeout: .milliseconds(500)) else { - return .json(["error": "sim_unresponsive", - "udid": targetUDID, - "last_known_state": "shutdown_or_hung"], status: 503) - } - let result = XCUIApplication().screenshot() // throws if sim is gone - let imageData = result.image.pngData() - let captureTime = Date() // actual frame time, not request time - return .json(["screenshot_path": writeToTmp(imageData), - "captured_at": captureTime.iso8601], - status: 200) -} - -func simIsResponsive(timeout: TimeInterval) -> Bool { - // Cheapest probe: try to read the sim's clock or a no-op - // accessibility query within timeout. If it fails or hangs, sim is - // gone. -} -``` - -**2. Python wrapper — propagate the 503** - -`mcp/server.py::handle_observe`. When the runner returns 503, surface it -as `{"error": "screenshot_unavailable", "reason": ..., "suggested_fix": -"restart session: ios_stop_session() then ios_start_session(...)"}` -rather than re-wrapping as a Python exception. - -### Failing-test sketch - -```python -# tests/test_observe_stale_sim.py -def test_observe_returns_error_after_sim_shutdown_during_session(live_sim): - """If the sim is shutdown mid-session, observe must error, not cache.""" - start_session(live_sim.udid, bundle_id="...") - obs1 = ios_observe() - assert "screenshot_path" in obs1 - - # Shutdown the sim out from under the runner - subprocess.run(["xcrun", "simctl", "shutdown", live_sim.udid], check=True) - - obs2 = ios_observe() - assert "error" in obs2, f"expected error after sim shutdown, got {obs2}" - assert obs2["error"] in ("screenshot_unavailable", "sim_unresponsive") -``` - -### Estimate - -- Swift: ~30–50 LOC -- Python wrapper: ~10–15 LOC -- Test: ~40 LOC, requires live sim - -### Who - -**Better for your team.** Swift runner depth + needs live sim -verification I can't do today (no iOS 26.3 runtime locally). I could pair -on the Python-side propagation if useful. - ---- - -## 🔴 P0-C — `runner Swift /health` should re-validate sim alive on every poll, not cache `sim_pid` - -This is the corollary to P0-B and was P0-1's root in a2. a3's Python -fix (live `/health` probe in `session_status`) papers over it from the -agent's view, but the runner-side cache is the real problem — every poll -of `:8222/health` returns `{sim_pid: 87357, ...}` long after PID 87357 is -gone. - -### Repro (verified in a2 dogfood) - -```bash -# After the runner has been alive for a while, with sim shutdown: -$ curl -sS http://localhost:8222/health -{"status": "ok", "sim_pid": 87357, "device_w": 390, "device_h": 844} -$ ps -p 87357 -# (no such process) -$ xcrun simctl list devices booted | grep -# (nothing — sim is dead) -``` - -### Suggested fix - -`runner/Sources/Routes/HealthRoute.swift` (filename guessed). - -```swift -func handle() -> Response { - // Cheap, frequent: launchd_sim probe - let simAlive = isSimAlive(udid: targetUDID) // pkill -0 sim_pid OR - // simctl list booted check - if !simAlive { - return .json([ - "status": "sim_down", - "udid": targetUDID, - "last_sim_pid": cachedSimPid, - "suggested_fix": "xcrun simctl boot \(targetUDID); ios_start_session" - ], status: 503) - } - // re-resolve sim_pid live, don't return cached - return .json([ - "status": "ok", - "sim_pid": currentSimPid, - "device_w": ..., "device_h": ..., - ], status: 200) -} -``` - -### Failing-test sketch - -```python -def test_health_returns_503_after_sim_shutdown(live_sim): - start_session(live_sim.udid, bundle_id="...") - # Confirm /health is happy - r = requests.get("http://localhost:8222/health") - assert r.json()["status"] == "ok" - - subprocess.run(["xcrun", "simctl", "shutdown", live_sim.udid]) - - r = requests.get("http://localhost:8222/health") - assert r.status_code == 503 - assert r.json()["status"] == "sim_down" -``` - -### Estimate - -- Swift: ~20–40 LOC -- Test: ~30 LOC, live sim - -### Who - -**Your team.** Same Swift-runner / live-sim caveat as P0-B. - ---- - -## 🟡 P1-A — `ios_logs` returns `count:0` on a dead sim instead of erroring (deferred from a2 §P1-2) - -```python -ios_logs(seconds=30) -# {"count": 0, "logs": [], "summary": {...}, "source": "simctl"} -# When the target sim is shutdown. -``` - -Masks the dead-sim signal. An agent debugging a session sees "no logs -during the failure" and misses the actual failure cause. - -### Suggested fix - -`mcp/server.py::handle_logs` (or wherever `simctl log show` is invoked -from). - -```python -def handle_logs(seconds: int = 30, ...): - udid = current_session_udid() - if not _is_sim_booted(udid): - return { - "error": "sim_shutdown", - "udid": udid, - "suggested_fix": "xcrun simctl boot {} && ios_start_session(...)".format(udid), - } - # ... existing simctl log show invocation -``` - -`_is_sim_booted` already exists per my earlier grep -(`mcp/server.py:265, 354, 556` have `simctl list devices --json` helpers). - -### Failing-test sketch - -```python -def test_logs_errors_when_sim_shutdown(monkeypatch): - monkeypatch.setattr(server, "_is_sim_booted", lambda udid: False) - result = handle_logs(seconds=30) - assert "error" in result - assert result["error"] == "sim_shutdown" -``` - -### Estimate - -- LOC: ~10–15 -- Test: ~15 LOC - -### Who - -**I'd take this.** Trivial. Same pattern as P0-A. - ---- - -## 🟡 P1-B — `backend="xctest"` returns "not available on this system" while `backend="auto"` happily deploys xctest (deferred from a2 §P1-3) - -### Repro (verified in a2 dogfood) - -```python -ios_start_session(bundle_id=..., device_id="...", backend="xctest") -# {"error": "Requested backend 'xctest' is not available on this system."} - -# Same args, only backend changed: -ios_start_session(bundle_id=..., device_id="...", backend="auto") -# {"status": "ok", ...} -- and xctest IS the backend it picks -``` - -### Suggested fix - -Unify the code paths so `backend="xctest"` takes the same path -`BackendSelector.choose()` would take when `auto` resolves to xctest. - -The bug is probably that the `backend="xctest"` path runs an availability -check that's stricter than what `auto` does, or it hits a different -runner-build resolver. Either: -- Make the explicit path defer to `BackendSelector.choose(prefer="xctest")`, or -- Fix the availability check to match what `BackendSelector` does - -### Failing-test sketch - -```python -@pytest.mark.parametrize("backend", ["xctest", "auto"]) -def test_xctest_explicit_and_auto_yield_same_outcome(backend, live_sim): - # When auto resolves to xctest (the common case), explicit "xctest" - # must succeed too. - result = handle_start_session( - bundle_id="...", device_id=live_sim.udid, backend=backend, - ) - # Both should either succeed identically or fail identically. - assert "error" not in result, f"backend={backend} failed: {result}" -``` - -### Estimate - -- LOC: ~30–60 (depends on BackendSelector factoring) -- Test: ~30 LOC - -### Who - -**I could take this** with a careful read of `BackendSelector`. Risk -is low (refactoring to one code path) but blast radius covers every -session start, so a thorough test pass is needed. - ---- - -## 🟢 P2-A — `ios_act tap` response: `tapped: ""` is ambiguous (no element vs. unidentified element) - -In every smoke I ran, `ios_act({kind: "tap", x, y})` returned -`{"status": "ok", "tapped": "", "x": ..., "y": ...}`. The empty `tapped` -field could mean: -1. Coords missed every tappable element (true "miss") -2. Coords landed on an element without an `accessibilityLabel` or - `accessibilityIdentifier` (an "unidentified hit") -3. Coords landed but the runner couldn't resolve a hit-test result - -Three different error states, one identical response shape. An agent -trying to recover (e.g., search nearby coords on a "miss") can't tell -which it's looking at. - -### Suggested fix - -Change the runner's tap response to a structured shape: - -```jsonc -// Miss — no element resolved at coords -{"status": "ok", "result": "miss", "x": ..., "y": ...} - -// Hit on an unidentified element -{"status": "ok", - "result": {"kind": "unidentified", - "frame": {"x": .., "y": .., "w": .., "h": ..}, - "ui_class": "UIView"}, // optional class hint - "x": ..., "y": ...} - -// Hit on an identified element -{"status": "ok", - "result": {"kind": "identified", - "label": "My Books", // either or both - "identifier": "tab_my_books"}, - "x": ..., "y": ...} -``` - -Update `ios_act` tool docstring to document the three cases so vision -agents can branch on `result.kind` ∈ {`"miss"`, `"unidentified"`, -`"identified"`}. - -### Estimate - -- Swift: `runner/Sources/Routes/TapRoute.swift` — ~20–40 LOC -- Python wrapper: ~10 -- Tool docstring update: ~10 -- Test: ~50 LOC, live sim with crafted target view - -### Who - -**Your team.** Touches the tool API surface (a docstring change is -visible to every consumer) and benefits from the design eye on what -the right shape is. I'd be happy to consume whatever shape you ship. - ---- - -## 🟢 P2-B — `ios_observe.captured_at` reflects observe-call time, not actual frame capture time - -In a2 dogfood I observed 8 byte-identical screenshots (MD5 -`9de96fa9...`) with `captured_at` advancing across them -(`19:13:23 → 19:16:03 → 19:19:47`). Either: -- The runner re-captured each call but pixels happened to be identical - (unlikely — the sim was actually shutdown for some of those) -- The runner served a cached frame and slapped a fresh `captured_at` on - it at response time - -Either way, `captured_at` should reflect the **actual** wall-clock time -the underlying screenshot bytes were captured by the screencap call, not -the time the JSON response was assembled. - -### Suggested fix - -```swift -// runner/Sources/Routes/ScreenshotRoute.swift -let imageBytes = capture() -let captureTime = Date() // immediately after capture call returns -// ... -return .json(["screenshot_path": ..., - "captured_at": captureTime.iso8601]) -``` - -If frames are intentionally cached for performance, expose -`{captured_at, age_ms, max_age_ms}` so the agent knows the staleness -budget. - -### Estimate - -- Swift: ~10 -- Test: deliberate freeze + observe + verify timestamp doesn't move -- LOC: ~15 + 30 test - -### Who - -**Your team.** Likely closely related to P0-B (refuse-on-stale). - ---- - -## 🟢 P2-C — `simctl status_bar override` masks freshness signals for vision agents - -Palace freezes the status bar to `2:48` (typical iOS demo override). A -vision agent cannot use the time bar to detect "did the screen actually -update?" because pixels in the status bar always match. - -This is **not a SpecterQA bug** but it's a bad interaction with v16's -vision-first design. Two light-touch options: - -1. The recorder/setup template / docs advise NOT freezing `status_bar` - for active sessions. -2. Render a small, stable overlay (corner, low-contrast, well outside any - tap target) into the screenshot showing `captured_at` ms or a frame - counter. Gives the vision agent a deterministic freshness pixel even - when the underlying app is idle. - -### Estimate - -- Docs: 5 lines in setup template -- Swift overlay: ~30 LOC if pursued (low priority — docs route is fine) - -### Who - -**Your team's call.** Light docs + optional Swift work. Useful -if/when you want vision agents to detect "screen is idle vs. agent -doesn't see updates." - ---- - -## ⚙ Environmental / external - -These aren't SpecterQA bugs but they affect dogfood and hint at -runner-build / doctor improvements you might want. - -### Xcode 26.3 + iOS <26.3 sim runtime mismatch causes DVTiPhoneSimulator assertion in xcodebuild - -I have Xcode 26.3 installed but only iOS 26.0 / 26.1 / 26.2 simulator -runtimes (no 26.3). a3's verification was on iPhone 17 Pro / iOS 26.3. -On every available sim runtime I have, xcodebuild crashes during runner -deploy with: - -``` -ASSERTION FAILURE in DVTiOSFrameworks/IDEiOSSupportCore/DVTiPhoneSimulator.m:1856 -Details: (launchSession) should not be nil. -Method: -installApplicationWithLaunchSession:error: -xcodebuild test-without-building exited with code -6 during startup -``` - -This is an Apple-internals bug, not yours. But three possible -SpecterQA-side improvements: - -1. **`runner build` should warn on SDK/runtime gap.** When invoked under - Xcode N.N, check available iOS runtimes via `simctl list runtimes - --json`. If the runner-build's compiled SDK is `N.N` and no - `>=N.N` sim runtime is installed, print a warning + suggest - `Xcode → Settings → Platforms` to install the matching runtime. -2. **`ios_doctor` should add this check.** Currently it reports - `[OK] Xcode 26.3` and `[OK] XCTest runner built` independently — - the gap between them is invisible. -3. **The `runner_deploy_health_timeout` hint message a3 added is good - ("common cause on iOS 26.0: SDK mismatch").** Consider extending it - to detect the version pair from `simctl list runtimes` and quote it - in the message: "your runner targets iphonesimulator26.2; sim - runtime is iOS 26.0; install iOS 26.2 runtime via Xcode → - Settings → Platforms." - -### Estimate - -- LOC: ~30 in `cli/commands.py::runner_build` + `cli/commands.py::doctor` -- Tests: mock `simctl list runtimes` output, assert warning fires - -### Who - -**I could take this.** Pure CLI work, no Swift, no live sim. -Defensible test fixtures. - ---- - -## Suggested fix order - -If I were prioritizing, I'd ship in this order: - -1. **P0-A** — split-path bug. Smallest, highest visibility, blocks every - dogfood iteration's agent-side error handling. -2. **Environmental hint improvements** — runner_build SDK warning + - doctor check. Cheap, prevents future Maurice-shaped users from - hitting the same wall I did today. -3. **P1-A** — `ios_logs` dead-sim error. Trivial, debugging-quality - improvement. -4. **P1-B** — `backend=xctest` vs `auto` unification. Cleanup. -5. **P0-C** — `/health` re-validate sim alive. Foundational; once this - ships P0-B becomes a much smaller fix on top. -6. **P0-B** — refuse-on-stale `/screenshot`. Builds on P0-C. -7. **P2-A** — structured `ios_act tap` response. UX polish, useful for - the vision-first design. -8. **P2-B** — `captured_at` accuracy. Cleanup, often the same patch as - P0-B. -9. **P2-C** — status_bar / freshness overlay. Optional. - ---- - -## Items I'd like to take - -If you grant me access: - -- **P0-A** — `start_session` split-path. ~1 PR, half a day. -- **P1-A** — `ios_logs` dead-sim error. ~1 PR, 1–2 hours. -- **P1-B** — backend=xctest unification. ~1 PR, ~1 day. -- **Environmental hints** (`runner build` SDK warning + doctor check). - ~1 PR, ~half a day. - -That's four small PRs of low blast radius, all Python-only, each gated -on TDD + a passing test that catches the bug. I won't touch the Swift -runner without your explicit approval — that's your team's deep -expertise. - ---- - -## What I'd want before starting - -1. **Repo URL + clone access.** -2. **Branch policy.** Develop or main? PR-gated reviewer? -3. **Test invocation.** Per `runner-build/.specterqa-version` you're at - `16.0.0a3`; my a2 dogfood mentioned 519 unit tests / 31 skipped / - 0 fail. Which `pytest` invocation runs that suite, and is there a - live-sim integration test marker I should run before shipping? -4. **In-flight visibility.** What's already cooking on these? I don't - want to double-commit if you're already mid-fix on P0-A. -5. **Issue-tracker link** if you'd like me to file each as a separate - issue first. - ---- - -## What I'm NOT pitching to take - -- Swift runner work (P0-B, P0-C, P2-A right edge, P2-B). Your domain. -- Replay v2 / recording v2 (Phase C/D). Strategic. -- The Apple `DVTiPhoneSimulator` assertion itself. That's Apple's bug. -- Any work that depends on live sim verification with iOS 26.3 runtime, - until I install that runtime. - ---- - -## Closing - -a3 made real progress on the things that matter. The split-path P0-A is -the obvious next-shipped fix and I think it's a clean one to start on. -The environmental hint improvements would have saved me ~half this -dogfood session — they're underrated. - -The vision-first redirection is the right strategy. The runner-lifecycle -foundation is closer than it was on a2. P0-B and P0-C close the door on -the silent-stale-state class of failures that dominated my a2 report. - -Happy to chat or pair on any of this. - -— mc diff --git a/.specterqa/internal/HANDOFF.md b/.specterqa/internal/HANDOFF.md deleted file mode 100644 index c1da46e..0000000 --- a/.specterqa/internal/HANDOFF.md +++ /dev/null @@ -1,137 +0,0 @@ -# SpecterQA iOS — Session Handoff - -**Date:** 2026-04-16 -**From:** Atlas CEO session (Opus 4.6, ~18 hours across Apr 10-16) -**Branch:** `main` at `ea1ce8f` -**PyPI:** v13.0.1 (released but NOT production-ready) -**INIT:** INIT-2026-527 (dogfood fixes), INIT-2026-532 (test harness) - ---- - -## Current State: 22/40 smoke tests passing — NOT shippable - -### What works -- 29 MCP tools fully implemented -- TestKitApp with 5 tabs (Form, List, Nav, Stress, Palace patterns) -- AX backend (host-side, zero crashes, but only sees ~15 elements on SwiftUI) -- XCTest backend sees 58+ elements but crashes on UI transitions -- 40 live smoke tests against real iOS simulator -- WDA-proven crash mitigations applied (partial — see below) - -### What's broken: XCTest runner crashes on UI transitions - -**Root cause:** `[XCTRunnerIDESession logDebugMessage:]` → `NSKeyedArchiver` tries to serialize a message containing a deallocated AX element pointer during: -- Sheet/modal presentations -- Keyboard open + tab switch -- Notification cascades (borrow/download/library switch) -- `app.snapshot()` during view transitions - -**WDA mitigation attempted:** `XCSetDebugLogger` (private symbol to replace the debug logger) — **symbol not found on Xcode 26**. This was WDA's production fix but Apple removed/renamed the symbol. - -**Partial mitigations applied (in `SpecterQARunner.swift:applyCrashMitigations()`):** -- `XCTDisableRemoteQueryEvaluation = YES` ✓ -- `DisableDiagnosticScreenRecordings = YES` ✓ -- `DisableScreenshots = YES` ✓ -- `XCSetDebugLogger` replacement — **FAILED** (symbol not in Xcode 26) - -### 18 failing smoke tests (all from runner crash) - -The failures cascade — once the runner dies at test ~27%, all subsequent tests fail because the runner is dead. The first crash trigger is `TestKeyboardDuringTabSwitch` which opens a keyboard then switches tabs. - ---- - -## What the next session needs to do - -### Priority 1: Fix the XCTest crash on Xcode 26 - -The `XCSetDebugLogger` symbol doesn't exist in Xcode 26. Options to investigate: - -1. **Find the renamed symbol.** Run: - ```bash - nm -gU /Applications/Xcode.app/Contents/Developer/Platforms/iPhoneSimulator.platform/Developer/Library/Frameworks/XCTest.framework/XCTest | grep -i "debug\|logger\|log" - ``` - The function may have been renamed to `_XCSetDebugLogger` or moved to a different class. - -2. **Method swizzle `XCTDefaultDebugLogHandler`** instead of replacing the logger: - ```swift - // Swizzle -[XCTDefaultDebugLogHandler logDebugMessage:] to a no-op - let original = class_getInstanceMethod(XCTDefaultDebugLogHandler.self, #selector(logDebugMessage:)) - let replacement = class_getInstanceMethod(SpecterQASafeDebugLogger.self, #selector(logDebugMessage:)) - method_exchangeImplementations(original, replacement) - ``` - -3. **Disable the XCTest observation center entirely:** - ```swift - // XCTestObservationCenter.shared.removeTestObserver(...) - // or intercept the notification that triggers the log - ``` - -4. **Add a transition guard to element queries:** - - Before `app.snapshot()`, wait 500ms - - Check `app.state == .runningForeground` - - Retry on failure instead of crashing - -5. **Check if WDA's latest code has a different approach for Xcode 16+:** - ```bash - # Clone latest WDA and search for their logger fix - git clone https://github.com/appium/WebDriverAgent.git /tmp/wda - grep -r "XCSetDebugLogger\|XCTDefaultDebugLog\|logDebugMessage" /tmp/wda/ - ``` - -### Priority 2: Make all 40 smoke tests pass - -The tests are correct — they trigger real crash scenarios. The tool must be fixed, not the tests. Each failing test represents a real user flow that crashes. - -### Priority 3: AX backend SwiftUI traversal - -The AX backend only sees ~15 elements on SwiftUI views because iOS 26's Simulator AX bridge flattens the tree. R&D confirmed this is a platform limitation — Accessibility Inspector uses DTXConnection (Xcode debugger protocol), not AXUIElement. - -Possible approach: hybrid backend that uses XCTest for element queries and AX/CGEvent for actions. But this requires XCTest to not crash during queries. - -### Priority 4: Palace dogfood - -Palace source is at `/Users/atlas/Downloads/ios-core-modernize-whole-shot/` but can't compile on this machine (missing Carthage artifacts). Either: -- Get a pre-built `.app` from the Palace team -- Run `carthage bootstrap` to fetch AudioEngine.xcframework -- Test on the other machine where Palace is already installed - ---- - -## Key files - -| File | Purpose | -|------|---------| -| `runner/Sources/SpecterQARunner.swift` | XCTest runner — crash mitigations at `applyCrashMitigations()` | -| `runner/Sources/HTTPServer.swift` | Runner HTTP server — all endpoint handlers | -| `runner/Sources/TouchInjector.swift` | Tap, type, key press — typeText crash mitigations | -| `src/specterqa/ios/backends/ax_backend.py` | AX backend — host-side, zero crashes, 15 elements | -| `src/specterqa/ios/backends/xctest_client.py` | XCTest HTTP client | -| `src/specterqa/ios/mcp/server.py` | MCP server — all tool handlers, session management | -| `tests/smoke/test_live_session.py` | 13 functional smoke tests | -| `tests/smoke/test_crash_patterns.py` | 27 crash pattern + Palace + mitigation tests | -| `TestKitApp/` | 5-tab test app (Form, List, Nav, Stress, Palace patterns) | - -## Key memories - -- `feedback_fix_all_test_failures.md` — Fix ALL failures before shipping -- `feedback_live_test_before_pypi.md` — Live sim test before every PyPI release -- `feedback_no_mock_tests_specterqa.md` — No mock tests, all real behavior -- `feedback_testkit_complexity.md` — TestKitApp must mirror real-world complexity -- `feedback_testkit_tdd_gate.md` — Smoke tests are TDD gate for all development - -## Session releases (v11.4.0 → v13.0.1) - -18 releases across 6 days. Key versions: -- v11.5.0: 12 dogfood fixes, Element Resolver v2 -- v12.0.0: Targeted ios_type (multi-field forms) -- v12.1.0: Test harness (10/10 live smoke tests) -- v12.2.0: Test architecture overhaul (deleted 19,932 lines of mock theater) -- v12.4.0: 27 crash pattern scenarios, StressTab, UIKitBridgeTab -- v12.5.0: Agent-first MCP instructions, perf workflow tools -- v12.6.0: PalacePatternTab (notification cascade, Combine progress, UIKit modal) -- v13.0.0: AXUIElement backend (zero crashes, but limited SwiftUI tree) -- v13.0.1: Default back to XCTest (AX too limited for SwiftUI) - -## Bottom line - -The tool has 29 MCP tools, comprehensive agent instructions, and a solid test harness. The blocker is the XCTest runner crash on iOS 26 when the app triggers UI transitions. The WDA fix (`XCSetDebugLogger`) doesn't work on Xcode 26 — the symbol was removed. The next session needs to find Xcode 26's equivalent or a different mitigation approach. diff --git a/.specterqa/internal/v14-design.md b/.specterqa/internal/v14-design.md deleted file mode 100644 index 7d2e019..0000000 --- a/.specterqa/internal/v14-design.md +++ /dev/null @@ -1,652 +0,0 @@ -# SpecterQA iOS v14.0.0 — Design Document - -**Status:** Draft — awaiting Chairman review -**Author:** CodeAtlas / SyncTek -**Date:** 2026-04-19 -**Initiative:** INIT-2026-525 -**Supersedes:** v13.3.0 (PyPI), v13.2.2 (last stable) - ---- - -## 1. Problem Statement - -### Root Cause: Three Parallel Deploy Paths - -v13.3.0 introduced `ios_start_runner` / `ios_stop_runner` as explicit runner lifecycle tools, adding a third implementation of "deploy XCTest runner" on top of the two that already existed: - -| Path | Location | Owns xctestrun mutation? | Owns xcodebuild process? | -|------|----------|--------------------------|--------------------------| -| `TestSession._deploy_runner()` | `session_manager.py:883` | Yes | Yes (self._runner_process) | -| `handle_start_session` inline | `mcp/server.py:371–405` | Yes | Yes (module-level \_runner_proc) | -| `handle_start_runner` | `mcp/server.py:2731` | Yes | Yes (\_active_runners dict) | - -All three paths call `TestSession._inject_xctestrun_env()` to mutate the same `.xctestrun` plist on disk before launching `xcodebuild test-without-building`. When any two paths are in flight simultaneously — which happens whenever an AI agent calls both `ios_start_session` and `ios_start_runner` in parallel, or when a session reconnects — they corrupt each other's plist writes and race on the same xcodebuild process launch. - -### Why the Same Bug Keeps Coming Back - -Tracing three recent regressions to the same structural cause: - -- **B9 / v13.2.0**: `ios_start_session(backend="xctest")` stopped deploying the runner because the inline deploy path in `handle_start_session` was removed during the BackendSelector refactor, but `TestSession._deploy_runner()` was not wired back as the sole path. Two paths → one disappeared → silent failure. -- **B1.x / v13.2.0+1**: `_runner_source_dir()` pointed at the dev-tree `runner/` layout. The `setup.py build_py` override copies Swift sources into `src/specterqa/ios/runner_source/` at wheel-build time, but the installed wheel has a different path. The dual-source-directory arrangement (canonical `runner/`, wheel copy `runner_source/`) created a third surface where path resolution diverged. -- **v13.3.0 sim-kill**: `ios_start_runner` launches a second `xcodebuild test-without-building` process against a sim that may already be held by a `TestSession`. When xcodebuild fails (port conflict, plist corruption), its cleanup path calls `xcrun simctl shutdown `. The session that was already running sees its sim disappear. The tool that was meant to be a convenience wrapper became a silent sim-killer. - -**Pattern:** Every regression traces to the same root — there is no single owner of runner process state. Any refactor that doesn't address the owner problem produces a new variant of the same class of bug. - ---- - -## 2. Goals - -v14.0.0 serves two equal-priority use cases. Both must be first-class after this release. - -### G1 — CI Replay (Record → Save → Deterministic Replay) - -User records a flow once via MCP or CLI. Saves a YAML replay artifact. CI runs `ios_replay` or `specterqa-ios replay` on every PR. The replay is a single-shot operation that may take minutes — startup cost is acceptable. Priority: determinism and zero flakiness over speed. - -**Current state:** Works end-to-end as of v13.2.2 (B3+B4 fixed, validate-replay pipeline complete). v14 must not regress any validated replay flow. - -### G2 — AI Debugging Loop (Agent → Walk → Log → Rebuild → Retest) - -Claude or Cursor drives the app iteratively: tap something, read the logs that fired, modify code, rebuild app, relaunch, repeat. Each cycle must be under 5 seconds. Current per-cycle cost is 35–50s because relaunching the app requires tearing down and reconstructing the entire session. - -**Current state:** Not viable. v14 introduces primitives that make this loop fast and observable. This is the primary motivation for the new MCP tools in Section 7. - ---- - -## 3. Non-Goals - -The following are explicitly out of scope for v14.0.0. They are first-class features and must continue to work without regression: - -| Tag | Feature | Status | -|-----|---------|--------| -| W1 | Maestro YAML syntax for replay files | Keep as-is | -| W2 | Shared-runner CI mode (`ios_start_session` no-clone path) | Keep as-is | -| W3 | `validate-replay` strict mode / over-eager validation | Keep as-is, no behavior change | -| W4 | Runner status table in `ios_doctor` | Keep as-is | -| W5 | All 10 discovery + observation tools | Keep as-is | -| W6 | `frontmost_udid` auto-detection | Keep as-is | - -v14 does not introduce any new Maestro YAML fields, does not change replay file schema, and does not change CLI command surface (except removing the two broken tools from MCP registration). - ---- - -## 4. Architecture — `RunnerProcess` Lifecycle Class - -### 4.1 Motivation - -The fix is ownership, not patching. One class owns the runner process lifecycle. Every path that needs a runner asks `RunnerProcess` for one. No path bypasses it. - -### 4.2 Class Location - -``` -src/specterqa/ios/runner_process.py -``` - -### 4.3 API Surface - -```python -from __future__ import annotations -from enum import Enum, auto -from pathlib import Path -from typing import Optional - - -class RunnerState(Enum): - IDLE = auto() # No process. Port unallocated. - BUILDING = auto() # xcodebuild -scheme running (runner build). - DEPLOYED = auto() # xcodebuild test-without-building launched; awaiting /health. - RUNNING = auto() # /health returned 200. Ready for requests. - STOPPED = auto() # Gracefully stopped. Port released. - FAILED = auto() # Unrecoverable. Error stored in self.last_error. - - -class RunnerProcess: - """Single owner of the XCTest runner process lifecycle. - - One instance per (udid, port) pair. All callers share the same instance - via RunnerProcess.acquire(udid, port). - """ - - # ── Factory ────────────────────────────────────────────────────────────── - - @classmethod - def acquire(cls, udid: str, port: int = 8222) -> "RunnerProcess": - """Return existing instance for (udid, port) or create a new IDLE one.""" - ... - - # ── Lifecycle ──────────────────────────────────────────────────────────── - - def build(self, build_dir: Path, force: bool = False) -> None: - """Build the Swift runner if sources changed (hash-gated). - - Raises RunnerBuildError with xcodebuild stderr on failure. - State: IDLE → BUILDING → IDLE (build only, no deploy). - """ - ... - - def deploy(self, bundle_id: str, port: Optional[int] = None) -> None: - """Inject env into xctestrun, launch xcodebuild test-without-building. - - Idempotent if already RUNNING on the same port. - Raises RunnerDeployError on xcodebuild failure — LOUD, no fallback. - State: IDLE → DEPLOYED → RUNNING. - """ - ... - - def stop(self, shutdown_sim: bool = False) -> None: - """Terminate xcodebuild process. Release port. - - shutdown_sim=True only when the caller is explicitly tearing down the - simulator (e.g. session cleanup). Never called by ios_stop_runner. - State: RUNNING → STOPPED. - """ - ... - - def healthcheck(self, timeout_s: float = 60.0) -> bool: - """Poll /health until 200 or timeout. Returns True on success.""" - ... - - def relaunch_app(self, bundle_id: str) -> None: - """Kill + relaunch the user's app without stopping the runner. - - Uses simctl terminate + simctl launch. Runner HTTP server stays up. - Target: < 2s. Does NOT restart xcodebuild. - State: RUNNING → RUNNING (no state change). - """ - ... - - def allocate_port(self) -> int: - """Find a free port in _PORT_RANGE. Raises RuntimeError if all busy.""" - ... - - # ── Introspection ──────────────────────────────────────────────────────── - - @property - def state(self) -> RunnerState: - ... - - @property - def port(self) -> Optional[int]: - ... - - @property - def last_error(self) -> Optional[str]: - ... -``` - -### 4.4 State Machine - -``` - ┌─────────────────────────────────────────────────────┐ - │ RunnerProcess │ - └─────────────────────────────────────────────────────┘ - - acquire() - │ - ▼ - IDLE ──── build() ──► BUILDING ──► IDLE (build only, no process) - │ - │ deploy() - ▼ - DEPLOYED (xcodebuild launched, /health not yet 200) - │ - │ healthcheck() → 200 - ▼ - RUNNING ◄──── relaunch_app() (loop back, no state change) - │ - │ stop() - ▼ - STOPPED - - Any state ──── xcodebuild exits non-zero ──► FAILED - (last_error populated, no retry) -``` - -### 4.5 Concurrency - -`RunnerProcess.acquire()` is protected by a per-(udid, port) `threading.Lock`. The deploy + healthcheck sequence holds the lock. Concurrent callers block on `acquire()` until the first caller reaches RUNNING, then receive the already-running instance. No double-launch possible. - -The module-level registry (`_instances: dict[tuple[str, int], RunnerProcess]`) is guarded by a separate global lock for registry mutation only. - -### 4.6 Migration of Existing Callers - -| Old caller | New call | -|-----------|----------| -| `TestSession._deploy_runner()` | `self._runner = RunnerProcess.acquire(udid, port); self._runner.deploy(bundle_id)` | -| `handle_start_session` inline deploy | same | -| `handle_start_runner` | same | -| `TestSession.stop()` xcodebuild kill | `self._runner.stop(shutdown_sim=True)` | - -All three existing parallel implementations are deleted. No `subprocess.Popen("xcodebuild", ...)` call exists outside `RunnerProcess`. - ---- - -## 5. Backend Policy - -### XCTest is Default and Only First-Class Backend - -XCTest is the backend for all production use. When `ios_start_session` is called without `backend=` argument, XCTest is selected. When XCTest deploy fails, the error is loud and actionable: - -``` -RunnerDeployError: xcodebuild test-without-building failed. - UDID: 1A2B3C4D-... - Port: 8222 - Build dir: ~/.specterqa/runner-build - xcodebuild stderr: - - - Next steps: - 1. Run: specterqa-ios runner build - 2. Verify the simulator is booted: xcrun simctl list | grep Booted - 3. See docs/troubleshooting.md for known Xcode 16 / iOS 18.4 issues. -``` - -**There is NO silent fallback to AX on XCTest failure.** Silent wrong-data (AX returning stale or partial element trees while the user thinks XCTest is running) is worse than a loud failure. This policy was introduced in v13.2.0 and must be reinforced throughout the v14 refactor. Any code path that catches a `RunnerDeployError` and silently retries with AX is a bug. - -### AX Backend - -Opt-in only: `ios_start_session(backend="ax")`. Intended for environments without Xcode installed (CI machines with only Command Line Tools, remote runners). AX returns lower-fidelity element trees and does not support replay recording. It is documented as a fallback for observation-only tasks, not for recording or AI debugging loops. - ---- - -## 6. Wheel Restructure - -### Current Layout (Fragile) - -``` -runner/ ← canonical Swift sources (dev layout) - Sources/ - SpecterQARunner.xcodeproj/ - ... - -src/specterqa/ios/runner_source/ ← wheel copy (populated by setup.py build_py override) - Sources/ ← (MISSING in current runner_source — only stubs) - build.sh - launch.sh - Package.swift - __init__.py -``` - -The `setup.py build_py` override copies `runner/Sources/` into `runner_source/Sources/` at wheel-build time. This is the source of the B1 class of bugs: any divergence between `runner/` and `runner_source/` — a new Swift file, a new xcodeproj reference — produces a broken wheel. The developer doesn't see it because their dev layout finds `runner/` directly; the user sees it because the installed wheel only has `runner_source/`. - -### v14 Layout (Clean) - -``` -runner/ ← promoted to a proper Python package - __init__.py ← NEW: empty, makes runner/ a package - Sources/ - SpecterQARunner.xcodeproj/ - HostApp/ - build.sh - launch.sh - Package.swift -``` - -**Deletions:** - -- `src/specterqa/ios/runner_source/` — entire directory deleted -- `setup.py` — deleted (the `build_py` override is the only reason it exists) -- `[tool.setuptools.package-data]` glob list in `pyproject.toml` — deleted -- `recursive-include specterqa/ios/runner_source` in `MANIFEST.in` — deleted - -**pyproject.toml after restructure:** - -```toml -[tool.setuptools.packages.find] -where = ["src", "."] -include = ["specterqa*", "runner*"] - -[tool.setuptools.package-data] -"runner" = [ - "Sources/*.swift", - "Sources/Routes/*.swift", - "Sources/SpecterQARunner-Bridging-Header.h", - "Sources/SpecterQASwizzler.h", - "Sources/SpecterQASwizzler.m", - "HostApp/**", - "SpecterQARunner.xcodeproj/project.pbxproj", - "build.sh", - "launch.sh", - "Package.swift", -] -``` - -`_runner_source_dir()` in `session_manager.py` is updated to resolve via `importlib.resources` against the `runner` package. Both dev and installed-wheel layouts resolve to the same path. The B1 class of bugs is structurally eliminated. - -**`verify-wheel` CI job** runs `python -m build --wheel`, installs into a fresh venv, and executes `specterqa-ios runner build` against a booted simulator. This job must pass before any PyPI publish step runs. - ---- - -## 7. Five New MCP Tools — AI Debugging Loop - -These five tools reduce the AI debugging loop cycle time from 35–50s to under 5s per iteration. They are additive — they do not replace any CI replay tooling. - -### 7.1 `ios_app_relaunch` - -**Purpose:** Kill and relaunch the user's app without touching the XCTest runner. The runner HTTP server stays alive on its port. This is the critical path for the AI debugging loop — currently an agent must call `ios_stop_session`, then `ios_start_session` (35–50s) just to pick up a new build of the app. - -**Signature:** -```python -ios_app_relaunch(bundle_id: str) -> dict -# Returns: {"bundle_id": str, "launch_pid": int, "elapsed_ms": int} -``` - -**What it replaces:** The 4-call sequence `ios_stop_session → wait → ios_start_session → ios_wait_idle` (35–50s total). Target elapsed: < 2s. - -**Implementation:** `xcrun simctl terminate ` + `xcrun simctl launch `. No xcodebuild involved. - -**Example:** -``` -# Agent has just rebuilt MyApp.app and copied it to the sim -ios_app_relaunch(bundle_id="com.example.MyApp") -# → {"bundle_id": "com.example.MyApp", "launch_pid": 12345, "elapsed_ms": 980} -``` - ---- - -### 7.2 `ios_logs_tail` - -**Purpose:** Return only logs that have appeared since the last call to this tool (per session). Agents currently call `ios_logs` repeatedly and manually diff the output. This produces O(n) log volume per agent turn with no correlation to the action just taken. - -**Signature:** -```python -ios_logs_tail( - since_last_call: bool = True, - level: str = "all", # "debug" | "info" | "error" | "all" - category: str | None = None, - limit: int = 200, -) -> dict -# Returns: {"entries": list[LogEntry], "count": int, "cursor_advanced": bool} -``` - -**What it replaces:** Repeated full `ios_logs` calls + manual windowing. The tool maintains a per-session monotonic cursor (log sequence number or timestamp). `since_last_call=True` is the default; `since_last_call=False` returns the full recent buffer. - -**Example:** -``` -ios_tap(label="Submit") -ios_logs_tail() -# → {"entries": [{"level": "error", "message": "NetworkError: timeout", ...}], "count": 3} -``` - ---- - -### 7.3 `ios_capture_state` - -**Purpose:** Bundle screenshot + element tree + recent logs + basic perf snapshot into one MCP return. Reduces 4 sequential tool calls (screenshot, elements, logs, perf) to 1. Cuts agent turn count and reduces total round-trip time. - -**Signature:** -```python -ios_capture_state( - include_screenshot: bool = True, - include_elements: bool = True, - include_logs: bool = True, - include_perf: bool = False, - log_tail_lines: int = 50, -) -> dict -# Returns: {"screenshot": base64 | None, "elements": list, "logs": list, "perf": dict | None, "captured_at": str} -``` - -**What it replaces:** 4 separate calls: `ios_screenshot`, `ios_elements`, `ios_logs`, `ios_perf`. Particularly useful at the start of each debugging iteration to get full situational awareness in one round-trip. - -**Example:** -``` -state = ios_capture_state(include_perf=True) -# → {screenshot: "...", elements: [...], logs: [...], perf: {memory_mb: 142, cpu_pct: 3.2}} -``` - ---- - -### 7.4 `ios_action_with_logs` - -**Purpose:** Execute a single interaction action and atomically return the logs that fired during that action. The agent no longer has to manually time log windows around actions. Eliminates the "did this log come from before or after the tap?" problem. - -**Signature:** -```python -ios_action_with_logs( - action: dict, # Same schema as individual action tools - log_window_ms: int = 2000, # How long to collect logs after the action - level: str = "all", -) -> dict -# Returns: {"action_result": dict, "logs": list[LogEntry], "log_count": int} -``` - -**Supported action types in `action` dict:** `tap`, `long_press`, `type`, `swipe`, `press_key`, `swipe_back`. Schema mirrors existing MCP tool arguments. - -**What it replaces:** Manual sequence of `ios_tap` + `ios_wait(1)` + `ios_logs`. Log collection is guaranteed to cover the action's response window without agent-side timing logic. - -**Example:** -```python -ios_action_with_logs( - action={"type": "tap", "label": "Login"}, - log_window_ms=3000 -) -# → {"action_result": {"success": true}, "logs": [{"message": "auth: token issued", ...}], "log_count": 5} -``` - ---- - -### 7.5 `ios_promote_session_to_test` — KILLER FEATURE - -**Purpose:** Save the current live debugging session as a named replay artifact — instant regression test creation. An agent that has just debugged a bug and confirmed the fix can call this tool to capture the exact interaction sequence as a replay YAML. The next CI run will exercise this exact flow as a regression test. - -This closes the loop between AI debugging and CI replay: every debugging session is one call away from becoming a permanent test. No manual YAML authoring. No `ios_start_recording` → walk flow → `ios_stop_recording` ceremony required. - -**Signature:** -```python -ios_promote_session_to_test( - name: str, # Replay file name (without .yaml) - description: str = "", # Human-readable test description - validate: bool = True, # Run ios_validate_replay immediately -) -> dict -# Returns: {"replay_path": str, "step_count": int, "validation": dict | None} -``` - -**What it replaces:** Manual `ios_stop_recording(name=...)` + reviewing the YAML + submitting it to the repo. The tool snapshots the current step buffer (same buffer `ios_stop_recording` reads), writes the YAML, optionally validates it in-place, and returns the path. - -**Implementation note:** The step buffer is not cleared by this call. The session continues. The agent can keep debugging and promote again with a different name. - -**Example:** -```python -# Agent has just walked through and fixed the login timeout bug -ios_promote_session_to_test( - name="login_timeout_regression", - description="Reproduces and validates fix for login timeout on slow network", - validate=True, -) -# → {"replay_path": "~/.specterqa/replays/login_timeout_regression.yaml", -# "step_count": 8, -# "validation": {"passed": true, "steps_validated": 8}} -``` - ---- - -## 8. Tool Surface Delta - -### Removals - -| Tool | Reason | Migration | -|------|--------|-----------| -| `ios_start_runner` | Sim-killer. Launches a competing xcodebuild against an already-running session. Root cause of v13.3.0 regression. | `ios_start_session` handles runner lifecycle automatically. No replacement needed. | -| `ios_stop_runner` | Paired with `ios_start_runner`. Without the start tool, the stop tool has no valid use. Also risks shutting down the runner mid-session. | `ios_stop_session` handles cleanup. No replacement needed. | -| `ios_save_replay` | Deprecated since v13.2.0. `ios_stop_recording(name=...)` is the canonical save path. `ios_promote_session_to_test` supersedes for AI debugging users. | Use `ios_stop_recording(name="my_flow")` or `ios_promote_session_to_test(name="my_flow")`. | - -### Additions - -| Tool | Purpose | -|------|---------| -| `ios_app_relaunch` | Sub-2s app restart without runner teardown | -| `ios_logs_tail` | Incremental log cursor per session | -| `ios_capture_state` | Bundle screenshot + elements + logs + perf in one call | -| `ios_action_with_logs` | Atomic action + log window | -| `ios_promote_session_to_test` | Live session → replay YAML in one call | - -### Net Count - -| Version | Count | -|---------|-------| -| v13.2.0 | 38 tools | -| v13.3.0 | 40 tools (+2 broken: start_runner, stop_runner) | -| v14.0.0 | **43 tools** (−3 removed, +5 added, net +3) | - -The MCP server's tool-count regression test must be updated to assert 43. - ---- - -## 9. End-to-End CI Dogfood Tests - -Two tests mirror real user workflows. Both run on every PR against main. - -### 9.1 CI Replay Dogfood - -``` -tests/e2e/test_ci_replay_dogfood.py -``` - -Steps: -1. `pip install specterqa-ios== --no-cache-dir` from PyPI into a fresh venv (or from the local wheel during development) -2. Boot a named simulator (iPhone 15, iOS 17 target) -3. Call `ios_start_session` → `ios_start_recording` -4. Walk TestKitApp: tap 3 buttons, assert element states -5. Call `ios_stop_recording(name="ci_dogfood_flow")` -6. Call `ios_validate_replay(name="ci_dogfood_flow")` — assert all steps PASS -7. Call `ios_replay(name="ci_dogfood_flow")` — assert all steps PASS, elapsed < 60s -8. `ios_stop_session` - -Pass criteria: all replay steps pass, no sim shutdown during the run, replay YAML written to disk. - -### 9.2 AI Debugging Dogfood - -``` -tests/e2e/test_ai_debugging_dogfood.py -``` - -Steps: -1. `pip install` (same as above) -2. Boot sim, `ios_start_session` against TestKitApp -3. Walk through 3 screens using `ios_action_with_logs` for each action — assert logs returned are non-empty -4. `ios_capture_state()` — assert all four payload keys present -5. `ios_app_relaunch(bundle_id="com.synctek.TestKitApp")` — assert elapsed_ms < 3000, sim still booted -6. Walk 3 screens again via `ios_action_with_logs` -7. `ios_logs_tail()` — assert returns incremental entries (fewer than a full `ios_logs` call would return) -8. `ios_promote_session_to_test(name="ai_debug_dogfood", validate=True)` — assert validation passes -9. Confirm resulting YAML is a valid replay (run `ios_validate_replay`) -10. `ios_stop_session` - -Pass criteria: `ios_app_relaunch` under 3s, all new tools return non-error responses, promoted replay validates cleanly, sim never shuts down unexpectedly. - ---- - -## 10. Version Bump Justification - -v14.0.0 (not v13.4.0 or v13.3.1) for three independent reasons, any one of which would justify a major version: - -1. **Breaking change — MCP tool removals.** `ios_start_runner`, `ios_stop_runner`, and `ios_save_replay` are removed. Any caller depending on these tools receives a "tool not found" error after upgrading. Per SemVer, removal of public API is a major version increment. - -2. **Internal architecture overhaul.** `RunnerProcess` replaces all three parallel deploy paths. While this is not a public API change, it invalidates any integrations that monkey-patched or subclassed `TestSession._deploy_runner` or relied on the module-level `_active_runners` dict. - -3. **Wheel restructure changes internal import paths.** `specterqa.ios.runner_source` package is deleted. Any user code that imported from it (unlikely but possible for advanced integrators) will break. - -The "consolidation release" narrative is accurate: v14 is the release where the tool stabilizes its architecture. Future minor versions (14.1, 14.2) add capabilities without structural churn. - ---- - -## 11. Phased Rollout - -### Phase 1 — v14.0.0-alpha.1 - -**Scope:** -- Implement `RunnerProcess` class (`runner_process.py`) -- Refactor `TestSession._deploy_runner()` to use it -- Refactor `handle_start_session` inline deploy to use it -- Delete `handle_start_runner`, `handle_stop_runner`, `_active_runners` dict -- Remove `ios_start_runner`, `ios_stop_runner`, `ios_save_replay` MCP registrations -- All three existing parallel paths gone - -**Gate:** Maurice dogfoods v14.0.0-alpha.1 against TestKitApp. Requirements: -- `ios_start_session` → record flow → `ios_stop_recording` → `ios_replay` works end-to-end -- Simulator is NOT shut down at any point during the session -- `ios_doctor` reports runner healthy - -**Do NOT proceed to Phase 2 until dogfood passes.** - -### Phase 2 — v14.0.0-beta.1 - -**Scope:** -- Implement 5 new MCP tools (Section 7) -- Wheel restructure: add `runner/__init__.py`, delete `runner_source/`, delete `setup.py`, update `pyproject.toml` -- Update `_runner_source_dir()` to use `importlib.resources` against `runner` package -- `verify-wheel` CI job wired to publish.yml - -**Gate:** Maurice dogfoods AI debugging loop against TestKitApp: -- `ios_app_relaunch` cycles < 3s -- `ios_logs_tail` returns incremental entries -- `ios_promote_session_to_test` produces a valid replay YAML -- `specterqa-ios runner build` succeeds from a fresh `pip install` of the beta wheel - -**Do NOT proceed to Phase 3 until dogfood passes.** - -### Phase 3 — v14.0.0 (Final) - -**Scope:** -- Write and pass `test_ci_replay_dogfood.py` + `test_ai_debugging_dogfood.py` -- Update all docs (README tool count, llms.txt, troubleshooting.md, CHANGELOG.md) -- Run `make llms` to sync tool-surface docs; regression test must pass -- PR → QualityAtlas review → merge → tag v14.0.0 → auto-publish → post-publish PyPI verification - ---- - -## 12. Release Gates (Non-Negotiable) - -All three phases share the same release gate structure: - -| Gate | Requirement | -|------|-------------| -| PR + review | All code on a feature branch; PR reviewed by QualityAtlas before merge | -| Tag + auto-publish | `git tag v14.0.0-alpha.1` triggers `.github/workflows/publish.yml` | -| `verify-wheel` job | Build wheel → fresh-venv install → `specterqa-ios runner build` → must pass before PyPI upload | -| Post-publish verification | `pip install specterqa-ios==X.Y.Z --no-cache-dir` from a fresh venv on a separate machine/env; confirm `runner build` works; confirm new MCP tools exercise against a live booted simulator | -| Live simulator smoke test | Required for every phase release. Local dogfood (dev layout) does NOT satisfy this gate. | - -**Local dogfood does NOT count as a release gate.** The B1/B1.5 regression class was caused specifically by a divergence that was invisible in the dev layout and only appeared in an installed wheel. The `verify-wheel` CI job is the structural gate that catches this class of bug. It must run in CI, not locally. - ---- - -## 13. Risks and Mitigations - -| Risk | Likelihood | Impact | Mitigation | -|------|-----------|--------|------------| -| `RunnerProcess.acquire()` lock deadlock on session teardown + concurrent start | Medium | Runner stuck in DEPLOYED forever | Implement lock with timeout (30s); FAILED state on timeout; log clearly | -| `relaunch_app` leaves app in background state instead of foreground | Medium | AI debugging loop gets element tree of previous screen | After `simctl launch`, wait for `ios_app_state` to return "foreground" (max 3s); error if not | -| Wheel restructure breaks `_runner_source_dir()` in installed wheel | High | Every fresh-install user's `runner build` fails (B1 repeat) | `verify-wheel` gate; explicit `importlib.resources.files("runner")` path resolution test in `test_packaging.py` | -| Removing `ios_save_replay` breaks a Palace flow that uses it | Low | Prod regression for an actual user | Audit `palace` repo for `ios_save_replay` calls before Phase 1 lands; add deprecation notice in v13.3.1 if needed (but note: v13.3.1 is skipped — include notice in alpha.1 error message: "ios_save_replay removed; use ios_stop_recording(name=...)") | -| `ios_promote_session_to_test` step buffer drift if session was interrupted | Medium | Promoted replay is incomplete | Check step buffer length before promote; error if < 2 steps; add `force=True` override | -| Concurrent AX + XCTest session requests after RunnerProcess lands | Low | RunnerProcess allocated for wrong backend | `RunnerProcess.acquire()` keyed on (udid, port, backend); AX sessions never touch RunnerProcess | - ---- - -## 14. Open Questions for Chairman - -The following decisions require explicit Chairman input before Phase 1 implementation begins. No code is written on these points until resolved. - -**OQ-1: RunnerProcess API shape — concurrent session semantics** - -The current design has one `RunnerProcess` instance per (udid, port). If two MCP clients call `ios_start_session` concurrently against the same sim, both receive the same `RunnerProcess` instance and share the runner. Is this the intended behavior? Alternative: reject the second call with "sim already in use." This affects multi-agent parallelism scenarios. - -**OQ-2: `ios_app_relaunch` — does it need to reinstall the app binary?** - -The current design uses `simctl terminate` + `simctl launch` (fast, sub-2s). The AI debugging loop case where the developer has just rebuilt the app and wants to test the new binary requires `simctl install ` first. Should `ios_app_relaunch` accept an optional `app_path` parameter to reinstall before relaunching? If yes, the sub-2s target applies only when `app_path` is None. - -**OQ-3: `ios_promote_session_to_test` — replay save location** - -Promoted replays default to `~/.specterqa/replays/.yaml`. For the regression-test use case to work, the file needs to land in the repo (e.g., `tests/replays/.yaml`). Should the tool accept a `save_dir` parameter? Or should it always save to `~/.specterqa/replays/` and require the user to commit the file manually? - -**OQ-4: `ios_save_replay` removal — Palace integration check** - -Before removing `ios_save_replay`, confirm no live Palace flows call it. If Palace is calling it, we need a migration window. v13.3.1 hotfix was skipped — if Palace uses `ios_save_replay`, we either (a) add a v13.3.1 deprecation-only release that prints a warning but still works, or (b) accept the break in alpha.1. Chairman decides. - -**OQ-5: Tool count target (43) — confirm net count** - -The current MCP server has tools registered in a flat list. Before Phase 3 final, confirm the actual count via `make llms` regeneration. The 43 figure is derived as: 40 (v13.3.0) − 3 removed + 5 added = 42, not 43. Recount required against the actual server registration. Locking in 43 now may require adjusting the regression test target. - ---- - -*End of v14.0.0 Design Document.* diff --git a/.specterqa/internal/v17-simdrive-surface.md b/.specterqa/internal/v17-simdrive-surface.md deleted file mode 100644 index 6fa9576..0000000 --- a/.specterqa/internal/v17-simdrive-surface.md +++ /dev/null @@ -1,162 +0,0 @@ -# simdrive — v0.1.0 MCP Surface - -> **Hand your simulator to your agent.** -> -> Claude-native iOS simulator driver. Self-guided. Recordable. Replayable. - -## North Star - -A Claude session opens an MCP connection, says "test the login flow on iPhone 17 Pro," and the agent drives a real iOS simulator with vision + clicks + keyboard. No XCTest. No accessibility tree. No selectors. Just **observe → act → repeat**. - -Target: 1000 paying users on Anthropic's Claude. Open Core (MIT base + private trial driver). - -## Architecture - -``` -┌──────────────────────────┐ -│ Claude (or any MCP host)│ -└────────────┬─────────────┘ - │ MCP (stdio) - ▼ -┌──────────────────────────┐ -│ simdrive MCP server │ ← this package -│ (Python 3.11+) │ -└────────────┬─────────────┘ - │ - ┌─────────┼──────────────┐ - ▼ ▼ ▼ - simctl AppleScript cliclick -(screenshot, (window (mouse + - logs, bounds, keyboard) - boot, activate) - install) - │ - ▼ - iOS Simulator - (Apple's) -``` - -**No** XCTest runner. **No** Swift package. **No** accessibility tree querying. **No** HTTP daemon. Vision-first, with fast OS-native primitives where they exist (screenshot, logs, app install) and synthetic input where they don't (cliclick). - -## MCP Tool Surface (12 tools) - -### Lifecycle - -**`session_start(device, os_version=None, app_bundle_id=None)`** -Boot sim if needed, optionally launch an app, return session id + device + window bounds. - -**`session_end(session_id)`** -Optional. Sim stays booted by default; only kills app + clears state. Leaves sim alive for next call. - -**`session_status(session_id=None)`** -Returns `{state, sim_uuid, window_bounds, current_app, last_action_at}`. State ∈ `idle|active|degraded`. - -### Observe - -**`observe(session_id, capture_logs=False, log_lines=50)`** -Returns `{screenshot_path, screenshot_size_pixels, device_size_points, captured_at, recent_logs?}`. Screenshot saved to a tempdir; agent reads via MCP image-block on second call. Default 50-line log tail when requested. - -### Act - -**`tap(session_id, x, y)`** -Click at logical points (0–device_w, 0–device_h). Translates points → macOS coords using AppleScript window bounds, activates Simulator, dispatches via cliclick. - -**`swipe(session_id, x1, y1, x2, y2, duration_ms=300)`** -Drag from (x1,y1) → (x2,y2). cliclick `dd` + `du` with intermediate `m` moves to control duration. - -**`type_text(session_id, text)`** -Keyboard input. cliclick `t:`. Assumes the focused field accepts plain text; agent is responsible for tapping the field first. - -**`press_key(session_id, key)`** -Hardware buttons + special keys. Supported: `home`, `lock`, `volume_up`, `volume_down`, `siri`, `screenshot` (sim hotkeys via `xcrun simctl io ... key` and AppleScript menu items where simctl gaps). - -### Recording / Replay - -**`record_start(session_id, name)`** -Begin capturing every act-tool call (tap/swipe/type/press_key) plus an `observe` snapshot before each step into `~/.simdrive/recordings/{name}.yaml`. - -**`record_stop(session_id)`** -Finalize the YAML; return path + step count. - -**`replay(name, on_drift="halt")`** -Re-execute a recording. Before each step, screenshot is compared (SSIM) to the recorded screenshot; if SSIM < 0.85 the action either halts (`halt`), warns (`warn`), or proceeds (`force`). Returns per-step pass/fail. - -### Utility - -**`logs(session_id, lines=200, predicate=None)`** -`xcrun simctl spawn booted log stream` tail. `predicate` is an NSPredicate string filter. - -## Recording schema (YAML) - -```yaml -name: login_flow -created_at: 2026-04-27T20:14:00Z -device: iPhone 17 Pro -os_version: "26.3" -device_size_points: [402, 874] -steps: - - id: 1 - action: tap - args: { x: 357, y: 286 } - pre_screenshot: snapshots/01_pre.png - post_screenshot: snapshots/01_post.png - captured_at: 2026-04-27T20:14:01Z - - id: 2 - action: type_text - args: { text: "maurice@synctek.io" } - pre_screenshot: snapshots/02_pre.png - post_screenshot: snapshots/02_post.png - captured_at: 2026-04-27T20:14:04Z -``` - -Self-contained — anyone with the YAML + the snapshot dir can replay. - -## What we explicitly are NOT building (v0.1.0) - -- ❌ XCTest integration. Apple's framework instability is the reason we exist; we don't depend on it. -- ❌ Accessibility tree / element selectors. Vision is the contract. -- ❌ Real-device support. Simulator only for v0.1. Real-device via `idb`/`devicectl` is a v0.2+ topic. -- ❌ Android. Maybe never; staying focused. -- ❌ A web dashboard. Recordings live as files in your repo. -- ❌ Tier gating beyond a free/license check. The OSS surface is the whole tool. - -## Pricing posture (TBD with marketing) - -- **Free / OSS**: full MCP surface against any sim. MIT-licensed. -- **Paid**: ??? — possibly hosted recordings, fleet runner, CI integration. Decide later. Ship the OSS first. - -## Migration from v16.0.0a3 (specterqa-ios) - -Hard break. Different package, different name, different repo *eventually*. -- The `specterqa-ios` PyPI package stays at `16.0.0a3`. No more releases. -- `simdrive` ships fresh as `0.1.0a1`. New install command: `pip install simdrive`. -- Anyone who installed `specterqa-ios` should `pip uninstall specterqa-ios && pip install simdrive`. -- Recordings format is incompatible — that's fine, no one had real recordings. - -## Repo strategy - -For v0.1.0a1: -- Stay in the existing `specterqa-ios` repo, branch `feat/v17-claude-native`. -- Add a new top-level Python package `simdrive/` next to `src/specterqa/`. -- Set up its own `pyproject.toml` either in a subdir or via the existing one with new entry points (decide during impl). -- Once stable, fork/rename repo to `simdrive` on GitHub and archive `specterqa-ios`. - -## Risks - -| Risk | Mitigation | -|---|---| -| cliclick requires Simulator window focus before each click | Wrap every act in `osascript -e 'tell application "Simulator" to activate'` (verified: works) | -| Window bounds change if user moves the sim window | Re-query AppleScript bounds on every act-tool call (cheap, ~30ms) | -| simctl `io key` has limited button coverage | Fall back to AppleScript menu commands for missing buttons | -| Multiple sims booted | session_start picks the first booted match; if ambiguous, error and ask agent to specify UDID | -| User's macOS rejects Accessibility permission for cliclick | Detect, surface a structured `permission_required` error pointing at System Settings | -| Native logs are noisy | Default to OFF; agent opts in with `capture_logs=True` | - -## Done definition for v0.1.0a1 - -1. All 12 MCP tools implemented and unit-tested -2. End-to-end live test: agent boots sim, taps Settings, types "WiFi" in search, screenshots show change -3. Record + replay round trip on a 5-step Calendar flow -4. README + quickstart explaining "drop this in .mcp.json and go" -5. Published to PyPI as `simdrive==0.1.0a1` -6. Maurice can `pip install simdrive`, add to .mcp.json, and have a working session in <5 minutes diff --git a/.specterqa/v16-handoff.md b/.specterqa/v16-handoff.md deleted file mode 100644 index fb139d5..0000000 --- a/.specterqa/v16-handoff.md +++ /dev/null @@ -1,241 +0,0 @@ -# SpecterQA v16.0.0 — Vision-First Redirection Handoff - -**Branch:** `feat/v16.0.0-vision-first` (off `main` at v15.2.0) -**Status:** Phase A landed. Phases B–F pending. WIP — no PyPI release yet. -**Strategic basis:** Maurice's `.specterqa/dogfood/v15.2.0-direction-proposal-maurice.md`. - -This is a **wholeshot pivot**, not a 5-phase migration. There's no real -consumer cohort outside Palace + BusinessAtlas dogfood that depends on the -v15.x AX-tree selector path. Every iOS major has shipped SwiftUI/AX changes -that broke us; the AX layer is doing negative work for vision-capable -agents who already see the screen better than the tree describes it. v16 -deletes the layer. - ---- - -## What landed in Phase A - -### Vision-first primitives (new tools, additive) - -- `ios_observe` — `handle_observe` in `src/specterqa/ios/mcp/server.py`. - Returns `{screenshot, device_w, device_h, reliable_targets, app_state, captured_at}`. - `reliable_targets` filters to elements with explicit `accessibilityIdentifier` only. -- `ios_act` — `handle_act` in same file. Single dispatcher for - `tap/type/swipe/key/scroll/long_press/drag`. Coordinate-primary; identifier - permitted on tap/long_press; `normalized=true` for resolution-independent coords. -- `UIElement.identifier` field added to `src/specterqa/ios/som_annotator.py` so - `parse_elements_from_json` populates it from the runner JSON. - -### Defense-in-depth (carry-forward from v15.2.1 patches that didn't ship) - -- `runner/Sources/SpecterQAObjCBridge.{h,m}` — Swift-callable `@try`/`@catch` shim. -- `runner/Sources/HTTPServer.swift` — `runOnMain` wraps the dispatched block in - the bridge; uncaught NSException becomes a logged error instead of killing the - test method. -- `runner/SpecterQARunner.xcodeproj/project.pbxproj` — bridge files wired into - the SpecterQARunner target. -- `runner/Sources/SpecterQARunner-Bridging-Header.h` — imports the bridge. - -### Folded-in v15.x work (cherry-picked from feat/mcp-tier-enforcement and feat/sec-high-005-jwt-offline-grace) - -- Tier enforcement across MCP tool surface (PR #79's content) -- SEC-HIGH-005 JWT decoder hardening (PR #78's content) -- v16's `ios_observe` and `ios_act` are tier-mapped (`trial`) - -### Tests / pins updated - -- `tests/test_mcp_tool_registration.py` _EXPECTED_TOOL_COUNT 47 → 49 -- `tests/regression/test_mcp_instructions_sync.py` server.py header 47 → 49 -- `tests/test_physical_device_optin.py` — `tool_count` field 47 → 49 -- 592 unit tests pass, 20 skipped, 0 fail. - -### Files NOT yet touched (Phase B work) - -- `runner/Sources/SpecterQAElementQuery.swift` (the throw-site file) -- Legacy MCP tools in `server.py`: `ios_screenshot`, `ios_elements`, `ios_tap`, - `ios_long_press`, `ios_swipe`, `ios_swipe_back`, `ios_type`, `ios_press_key`, - `ios_dismiss_keyboard`, `ios_wait_idle`, `ios_wait_for_element`, - `ios_capture_state`, `ios_action_with_logs` -- `runner/Sources/Routes/ElementsRoute.swift`, parts of `TapRoute.swift`, - `TypeRoute.swift`, `SwipeRoute.swift` that hit `findByLabel` / `findByIdentifier` - ---- - -## Phase B — Demolition (1 working day) - -Goal: delete the AX-tree selector layer entirely. - -**Swift side:** -1. Delete `runner/Sources/SpecterQAElementQuery.swift`. -2. Audit `runner/Sources/Routes/TapRoute.swift`, `TypeRoute.swift`, - `SwipeRoute.swift` — remove all paths that call `findByLabel` / - `findByIdentifier` / `waitForElement` / any `XCUIElementQuery` selector. - Keep ONLY the coordinate paths. -3. Delete `runner/Sources/Routes/ElementsRoute.swift` (the `/elements` HTTP - route the legacy `ios_elements` MCP tool fronted) and remove its - registration in `SpecterQARunner.swift`'s `registerRoutes(...)` call. -4. Update `runner/SpecterQARunner.xcodeproj/project.pbxproj` to drop the - PBXFileReference / PBXBuildFile entries for the deleted Swift files. -5. Audit `runner/Sources/AccessibilityTree.swift` — delete if it's only - used by the selector layer; keep if `ios_observe` still pulls element - metadata through it (it currently does via `som_annotator`, which uses - the runner `/source` endpoint — that endpoint stays). - -**Python side:** -1. Delete the legacy MCP tool definitions in `server.py`: - - `ios_screenshot` (replaced by `ios_observe`) - - `ios_elements` (folded into `ios_observe.reliable_targets`) - - `ios_tap`, `ios_long_press`, `ios_swipe`, `ios_swipe_back`, `ios_type`, - `ios_press_key`, `ios_dismiss_keyboard` (replaced by `ios_act`) - - `ios_wait_idle`, `ios_wait_for_element` (agent loops on `ios_observe` instead) - - `ios_capture_state` (folded into `ios_observe`; agent calls `ios_logs_tail`/`ios_perf`/etc separately) - - `ios_action_with_logs` (composed: `ios_act` + `ios_logs_tail`) -2. Delete the corresponding `handle_*` functions and helpers. -3. Update `tier_gate.py`'s `TOOL_TIER_MAP` to drop the deleted entries. -4. Bump `_EXPECTED_TOOL_COUNT` and the header count to whatever the new total is - (likely ~22). -5. Run unit sweep — many tests will fail. Delete tests that exercise the - deleted tools; preserve tests that test out-of-band telemetry (logs/perf/etc.) - and the new primitives. - -**Live verification after Phase B:** -- Build runner (it should build clean — no references to SpecterQAElementQuery). -- Live deploy + ios_observe + ios_act on iPhone 17 Pro / iOS 26.2. -- Run a coord-only auth journey (Maurice's exact flow): observe → tap → observe - → tap → ... for 11 sequential taps. Pre-v16 this required manual coord - fallback after AX crashed. Under v16 it's the only path and should be solid. - ---- - -## Phase C — Replay rewrite (3–5 working days) - -Goal: replay engine that's coord + visual-diff, not selector + element-existence. - -**Schema:** -```yaml -replay: - name: a1qa_signin - device: - width: 402 - height: 874 - steps: - - kind: observe - capture: signin_form_state # named visual reference - - kind: act - action: {kind: tap, x: 0.503, y: 0.385} # normalized coords - - kind: assert_visual - reference: signin_form_state - threshold: 0.92 # SSIM threshold - region: [0, 0.4, 1.0, 0.6] # only diff form region - mode: ssim # or "perceptual_hash" -``` - -**Implementation:** -1. New module `src/specterqa/ios/replay_v2.py` (don't break `replay.py` until - Phase E migration tool runs). -2. `ReplayExecutor` walks steps, calls `handle_observe` / `handle_act` for each. -3. `assert_visual` compares the captured screenshot to the named reference PNG - using SSIM. Reference PNGs live alongside the YAML (`.refs/`). -4. `ios_replay` MCP tool dispatches to v2 when YAML schema declares - `version: 2`; legacy schema returns a clear "migrate via specterqa-ios - replay migrate " error. -5. Use `pillow` + `scikit-image.metrics.structural_similarity` for SSIM. - -**Threshold tuning:** ship with default 0.90, expose per-step override. Iterate -from real Palace usage. - ---- - -## Phase D — Recording rewrite (1–2 working days) - -Goal: `ios_start_recording` / `ios_stop_recording` capture screenshots + coord -taps, output v2 schema YAML. - -**Implementation:** -1. `ReplayRecorder` (existing in `src/specterqa/ios/replay.py`) gains a v2 mode. -2. On every `handle_act` call, append a step + capture a reference screenshot. -3. Optional `include_ocr=True` runs macOS Vision framework OCR on the area - around the tap coordinate to produce a human-readable comment. -4. On stop, write YAML + reference PNG directory. - ---- - -## Phase E — Tests, README, migration guide - -1. New live integration tests: `tests/integration/test_observe_act_live.py`, - `tests/integration/test_replay_v2_live.py`. Run on iPhone 17 Pro / iOS 26.2. -2. README rewrite — vision-first model section, migration table, deletion list. -3. Migration guide: `docs/MIGRATING-TO-V16.md` — step-by-step for any consumer - on v15.x label-based tools to translate to coord-based. -4. CHANGELOG fully populated for the v16.0.0 release entry (currently in-progress). -5. CLI `specterqa-ios replay migrate ` — converts a v1 replay to v2 by - running it once, capturing screenshots, replacing `expect_elements` with - `assert_visual` references. - ---- - -## Phase F — Ship - -1. Live verification matrix: - - iPhone 12 / iOS 26.0 - - iPhone 17 Pro / iOS 26.2 - - iPhone 16 Pro / iOS 18.4 - Run the full Palace auth journey against each. Zero crashes required. -2. Push branch, open PR (squash-merge will collapse the v16 work into one - commit on main). -3. QualityAtlas certification — focus on: deletion completeness (no dangling - selector references), test theater check on the new live tests, breaking-change - doc clarity. -4. Chairman merge auth. -5. DeployAtlas: bump pyproject 15.2.0 → 16.0.0, tag v16.0.0, push, monitor - `publish.yml`, fresh-venv dogfood install, deployment_record at - `CompanyState/deployments/records/specterqa-ios.jsonl`. -6. Maurice / Palace re-runs A1QA on v16.0.0 — confirms the AX-crash class is - gone and `ios_act` handles his flow cleanly. - ---- - -## Open questions to revisit at Phase E/F - -1. **License tier for `ios_observe` / `ios_act`** — currently both `trial`. - Reconsider: should `ios_observe` stay trial (vision-capable agents need it) - while `ios_act` requires `indie+` (input is the revenue gate)? Maurice's - spec says no tier on the primitives; revenue gating is on persistence - (replay/recording). Probably correct. -2. **`include_legacy_elements=True` flag on `ios_observe`** — keep in v16.0.0 - for transition? Or drop immediately because we're wholeshot? Currently - kept; can drop in v16.1 once Maurice confirms he doesn't use it. -3. **Visual-diff library choice.** scikit-image is heavy; `pillow` alone gives - us pixel-diff + perceptual hash but not SSIM. Decide before Phase C — start - with scikit-image, profile, swap if it bloats the wheel unacceptably. - ---- - -## What changed in this session that informs Phase B - -- The ObjC bridge wraps `runOnMain` defense-in-depth. After Phase B deletes - the dominant XCTest throw site (XCUIElementQuery selector layer), the bridge - stays — XCUICoordinate / snapshot / screenshot APIs can still throw on rare - iOS bugs. Don't delete the bridge during Phase B. -- The cherry-picked tier enforcement (PR #79 content) and SEC-HIGH-005 (PR #78 - content) are orthogonal to vision-first. They land cleanly in v16 with no - changes needed. -- Dogfood docs persisted at `.specterqa/dogfood/v15.1.0-maurice.md`, - `.specterqa/dogfood/v15.2.0-direction-proposal-maurice.md`, and - `.specterqa/dogfood/v15.2.0-runner-stability-patch-maurice.md`. Phase B - agent should read both v15.2.0 docs before touching the runner. - ---- - -## Tasks tracker - -| Phase | Status | -|---|---| -| A — vision-first primitives + ObjC bridge + folded PRs + tests pinned | **DONE** | -| B — demolition | pending | -| C — replay v2 | pending | -| D — recording v2 | pending | -| E — tests + README + migration guide | pending | -| F — PR + QA + DeployAtlas | pending | - -Phase A is committed on `feat/v16.0.0-vision-first`. Continue from there. diff --git a/CHANGELOG.md b/CHANGELOG.md index f5bfd23..6e877c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -339,7 +339,7 @@ former `specterqa-ios` 16.x line: PyPI distribution name reverted to - **Docs** — `OBSERVABILITY.md`, `PERFORMANCE.md`, `RECOVERY.md` ### Added — Production credentials -- **Production Ed25519 license-signing public key** injected (private key held in Chairman's secure storage; configured as `SIMDRIVE_LICENSE_PRIVATE_KEY` env var on the Railway license server) +- **Production Ed25519 license-signing public key** injected (private key held in maintainer's secure storage; configured as `SIMDRIVE_LICENSE_PRIVATE_KEY` env var on the Railway license server) ### Fixed - `recordings.py` DELETE 204 + response-model `AssertionError` at router init (introduced and fixed in Cycle 2+3) @@ -413,7 +413,7 @@ and `simdrive ci` CLI subcommands, and bumped the version. ### Pending for Atlas before Cycle 2 -- Real `SIMDRIVE_PUBLIC_KEY_HEX` keypair needs Chairman generation and injection +- Real `SIMDRIVE_PUBLIC_KEY_HEX` keypair needs maintainer generation and injection into `license/public_key.py`. Current public key is a placeholder — license signing/verification will fail in production until this is set. - Live smoke against TestKitApp deferred to Cycle 4 dogfood pass. @@ -616,7 +616,7 @@ for the tactical patches that v16 makes redundant by deletion. per step; optional OCR'd text near tap for human readability. - **Phase E — README rewrite, migration guide, real-sim integration tests for the new primitives.** -- **Phase F — PR, QualityAtlas certification, DeployAtlas tag/PyPI publish.** +- **Phase F — PR, QA review certification, release pipeline tag/PyPI publish.** ### Out-of-band signals (unchanged in v16) diff --git a/SECURITY.md b/SECURITY.md index 300b901..4906515 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -64,7 +64,7 @@ to make dep-confusion attacks visible in PR diff; pip-audit nightly on the lock; gitleaks blocking on every push to prevent accidentally checking in credentials that would let an attacker masquerade as a maintainer. Publishing requires human approval today (manual `git tag` push); roadmap item to require PyPI 2FA -on the maintainer account is tracked separately as a Chairman HITL item. +on the maintainer account is tracked separately as a maintainer HITL item. **(2) Local MCP server surface.** SimDrive runs on a developer's machine and drives a local iOS simulator. It does NOT open a network port for inbound @@ -89,6 +89,6 @@ that surface ships, this section is informational only. ## Related Documents - `simdrive/docs/REDACTION_SPEC.md` — screenshot + recording redaction design - (spec only as of W1; implementation tracked in W2 of INIT-2026-549). + (spec only as of W1; implementation tracked in W2 of [internal-tracker]). - `.github/workflows/security.yml` — pip-audit + gitleaks CI. - `.github/workflows/codeql.yml` — CodeQL static analysis. diff --git a/docs/MCP_TOOL_SURFACE.md b/docs/MCP_TOOL_SURFACE.md index 379e5d6..27cf1d6 100644 --- a/docs/MCP_TOOL_SURFACE.md +++ b/docs/MCP_TOOL_SURFACE.md @@ -81,4 +81,4 @@ assert len(server.list_tools()) == 32 | pre-2026-04 | Initial 29 tools | 29 | | 2026-04 (1.0.0a7) | + `load_journey` | 30 | | 2026-04 (1.0.0a9.1) | + `lint_recordings`, `migrate_recording` | 32 | -| 2026-05-17 | Documented canonical count in MCP_TOOL_SURFACE.md (INIT-2026-549) | 32 | +| 2026-05-17 | Documented canonical count in MCP_TOOL_SURFACE.md | 32 | diff --git a/docs/accuracy-review-INIT-2026-525.md b/docs/accuracy-review-INIT-2026-525.md deleted file mode 100644 index 5c79756..0000000 --- a/docs/accuracy-review-INIT-2026-525.md +++ /dev/null @@ -1,168 +0,0 @@ -# Technical Accuracy Review — INIT-2026-525 - -## Reviewer: CodeAtlas -## Date: 2026-04-08 - ---- - -## Scope - -Files reviewed: -- `/Users/atlas/Documents/specterqa-ios/README.md` — primary user-facing doc (EXISTS) -- `/Users/atlas/Documents/specterqa-ios/docs/landing-page.md` — DOES NOT EXIST (not yet created) -- `/Users/atlas/Documents/specterqa-ios/llms.txt` — DOES NOT EXIST (not yet created) -- `/Users/atlas/Documents/specterqa-ios/.well-known/agent.json` — DOES NOT EXIST (not yet created) - -Code reviewed: -- `src/specterqa/ios/mcp/server.py` — 19 `@mcp.tool` decorators confirmed -- `src/specterqa/ios/cli/commands.py` — all CLI commands enumerated -- `pyproject.toml` — version, package name, entry points -- `src/specterqa/ios/backends/` — xctest_client.py, indigo_hid.py, browserstack.py -- `src/specterqa/ios/som_runner.py` — SoM accuracy claim source -- `src/specterqa/ios/replay.py` — replay engine (no API key dependency confirmed) -- `src/specterqa/ios/license/validator.py` + `license_cmd.py` + `stripe_webhook.py` — tier names - -**Note:** The three GTM files (landing-page.md, llms.txt, agent.json) do not yet exist. This review audits claims in the README (the only existing user-facing document) and pre-validates any claims intended for those forthcoming files. - ---- - -## Verified Claims - -| Claim | Status | Evidence | -|-------|--------|----------| -| **19 MCP tools** | CONFIRMED | Exactly 19 `@mcp.tool(...)` decorators in `server.py` (lines 1367–1694): `ios_start_session`, `ios_stop_session`, `ios_screenshot`, `ios_tap`, `ios_wait`, `ios_wait_for_element`, `ios_start_recording`, `ios_stop_recording`, `ios_accessibility_audit`, `ios_swipe`, `ios_swipe_back`, `ios_type`, `ios_elements`, `ios_set_appearance`, `ios_press_key`, `ios_long_press`, `ios_save_replay`, `ios_simctl`, `ios_webview_elements` | -| **Version v11.3.0** | CONFIRMED — in pyproject.toml | `pyproject.toml` line 7: `version = "11.3.0"`. Latest git tag: `v11.3.0`. These match. | -| **Record once, replay free forever** | CONFIRMED | `replay.py` contains zero references to `ANTHROPIC_API_KEY` or `anthropic` SDK. `ReplayPlayer` runs deterministically from YAML. README states "ANTHROPIC_API_KEY (recording only — not needed for replay)" which is accurate. | -| **Maestro YAML compatible** | CONFIRMED (partial) | `replay.py` lines 381–431: `_normalize_maestro_step()` handles `tapOn`, `assertVisible`, `assertNotVisible`, `inputText`, `waitFor`. These are the shortcuts documented in README. | -| **Parallel CI execution** | CONFIRMED | `commands.py` lines 1886–2010: `--parallel N` flag on `ci` command, uses `ThreadPoolExecutor`. README shows `specterqa-ios ci --parallel 4`. | -| **Crash detection** | CONFIRMED | `src/specterqa/ios/drivers/simulator/crash.py`: `CrashDetector` class, integrated into `SimulatorDriver`. | -| **Visual regression** | CONFIRMED | `replay.py` lines 39–80: `screenshot_diff()` function using Pillow `ImageChops.difference()`. Returns percent pixel diff between two screenshots. | -| **Network inspection** | CONFIRMED | `src/specterqa/ios/drivers/simulator/network.py`: `NetworkInspector` class. Integrated via `ai_context.py`. | -| **XCTest backend** | CONFIRMED | `backends/xctest_client.py`: `XCTestBackend` — HTTP client talking to Swift runner on port 8222. | -| **IndigoHID backend** | CONFIRMED | `backends/indigo_hid.py`: pure-Python ctypes-based headless touch injection via Apple private API. | -| **Entry points match docs** | CONFIRMED | `pyproject.toml` defines `specterqa-ios` → `specterqa.ios.cli.commands:main` and `specterqa-ios-mcp` → `specterqa.ios.mcp.server:serve`. Both match README install instructions. | -| **CLI commands exist** | CONFIRMED | All commands in README table (`setup`, `devices`, `boot`, `install`, `init`, `validate`, `validate-replay`, `run`, `smoke`, `replay`, `ci`, `serve`) are registered in `commands.py`. | -| **`doctor` command exists** | CONFIRMED (not in README table) | `commands.py` line 338: `@ios_command_group.command("doctor")`. README omits this command from its CLI reference table — this is an omission, not an error. | -| **License tiers** | CONFIRMED | `validator.py` and `license_cmd.py` define tiers: `trial`, `indie`, `pro`, `enterprise`, `founder` (internal grant). `stripe_webhook.py` maps Stripe prices to the same tier strings. | -| **Python 3.10+ requirement** | CONFIRMED | `pyproject.toml`: `requires-python = ">=3.10"`. README states "Python 3.10+". | -| **macOS + Xcode 15+** | UNVERIFIABLE FROM CODE ALONE | Xcode minimum version is not enforced programmatically. `setup` command checks `xcrun`, but no version gate. Stated as a requirement in README — flagged below. | - ---- - -## Accuracy Issues (must fix before deploy) - -### ISSUE 1 — PyPI version lag: `pip install specterqa-ios` installs v11.2.0, not v11.3.0 - -**Severity: HIGH** - -`pyproject.toml` and git tag say `v11.3.0`. PyPI shows latest published version as `11.2.0`. Version `11.3.0` has NOT been published to PyPI. - -The README install instruction uses: -```bash -pip install "git+https://github.com/SyncTek-LLC/specterqa-ios.git" -``` -This correctly installs from git HEAD and will get v11.3.0. That is the PRIMARY install method in the README and it is accurate. - -However, the MCP quick-start section shows: -```bash -pip install 'specterqa-ios[mcp]' -``` -This installs from PyPI and will land users on v11.2.0 — one major version behind. This is a **broken install path** for the MCP use case if v11.3.0 has not been published to PyPI before launch. - -**Fix:** Publish v11.3.0 to PyPI before shipping GTM copy, OR change the MCP install example to also use `git+` form. - ---- - -### ISSUE 2 — `doctor` command missing from README CLI reference table - -**Severity: LOW** - -`doctor` command exists at `commands.py:338` and is described there as checking "Xcode, simulator, Python env, license key, BrowserStack credentials, and installed package version." It is more comprehensive than `setup`. It is not listed in the README CLI reference table. - -**Fix:** Add `doctor` row to the CLI reference table. Users migrating or troubleshooting will miss it. - ---- - -### ISSUE 3 — `runner` and `wda` subcommand groups not documented - -**Severity: LOW / INFORMATIONAL** - -`commands.py` registers two subcommand groups: -- `specterqa-ios runner build/status/clean` (runner build utilities) -- `specterqa-ios wda start/stop/status` (WebDriverAgent integration) -- `specterqa-ios license` (license management — mounted conditionally) - -These are absent from the README. For a GTM doc this is arguably appropriate (they are power-user / internal commands), but `runner build` is a prerequisite for XCTest mode and should at minimum be mentioned in setup instructions. - ---- - -## Unverifiable Claims (flag for disclaimer) - -### CLAIM — "90% SoM tap accuracy" - -**Status: UNVERIFIABLE FROM CODE — sourced from research citation, not internal benchmarks** - -`som_runner.py` line 16: -```python -# Research: SoM prompting improves UI agent accuracy from ~50% to ~90%+ -# by eliminating coordinate prediction entirely. -``` - -This is a comment citing external SoM research, not an internally measured benchmark. SpecterQA has not published benchmark results for its own tap accuracy on iOS Simulator. - -**If landing-page.md or llms.txt makes a "90% tap accuracy" claim, it must be attributed as a research-derived estimate, not a measured product metric, OR the internal benchmark must be run and documented first.** - -**Recommended copy:** "SoM-powered tapping eliminates coordinate prediction — the technique improves UI agent accuracy from ~50% to 90%+ in published research" (with citation). Do NOT state "SpecterQA achieves 90% tap accuracy" without running controlled benchmarks. - ---- - -### CLAIM — "Xcode 15+ required" - -**Status: UNVERIFIABLE — no code-level enforcement** - -The codebase does not gate on Xcode version. The claim in README is a stated requirement but is not validated at runtime. If tested against Xcode 14, behavior is undefined. Flag this as a "tested against Xcode 15/16" disclaimer rather than a hard requirement, unless a version check is added to `setup`/`doctor`. - ---- - -## Version / Naming Mismatches - -| Item | Claimed | Actual | Match? | -|------|---------|--------|--------| -| Package version (pyproject.toml) | v11.3.0 | v11.3.0 | YES | -| Latest git tag | v11.3.0 | v11.3.0 | YES | -| PyPI published version | (implied current) | v11.2.0 | **NO — v11.3.0 not on PyPI** | -| Package name | `specterqa-ios` | `specterqa-ios` | YES | -| MCP entry point | `specterqa-ios-mcp` | `specterqa-ios-mcp` | YES | -| MCP tool count | 19 | 19 | YES | -| License tiers in copy | (not yet in GTM docs) | trial / indie / pro / enterprise / founder | N/A — GTM docs not yet written | - ---- - -## GTM Documents Not Yet Created - -The following files referenced in the review brief do not exist: - -| File | Status | -|------|--------| -| `docs/landing-page.md` | NOT CREATED | -| `llms.txt` | NOT CREATED | -| `.well-known/agent.json` | NOT CREATED | - -These must be authored before launch. The claims verified above apply to content that WILL go into these files. The README is accurate except for the PyPI version lag issue. - ---- - -## Verdict - -**NEEDS CORRECTIONS** — one blocking issue, two minor issues. - -**Blocking before launch:** -1. **PyPI v11.3.0 not published** — the `pip install specterqa-ios[mcp]` path in the MCP quick-start installs v11.2.0. Either publish v11.3.0 to PyPI or update the install command to use `git+`. - -**Required for GTM copy accuracy:** -2. **"90% SoM tap accuracy" claim must be attributed to research, not stated as a measured product metric**, unless internal benchmarks are run. - -**Low priority:** -3. Add `doctor` command to CLI reference table. - -The core architecture claims (replay is free, 19 MCP tools, Maestro YAML compatibility, parallel CI, crash detection, visual regression, network inspection, XCTest + IndigoHID backends) are all **code-confirmed and accurate**. diff --git a/docs/copy-review-INIT-2026-525.md b/docs/copy-review-INIT-2026-525.md deleted file mode 100644 index 1432e24..0000000 --- a/docs/copy-review-INIT-2026-525.md +++ /dev/null @@ -1,255 +0,0 @@ -# Copy Review — INIT-2026-525 SpecterQA iOS GTM - -## Reviewer: MarketingAtlas -## Date: 2026-04-10 - ---- - -## Overall Assessment - -The copy is in strong shape overall — the core value proposition ("record once, replay free") is clear, consistently expressed across all surfaces, and developer-friendly without being patronizing. The main issues are: one missing asset (license_cmd.py doesn't exist yet), one awkward self-referential disclosure in the feature grid, one version discrepancy between README and other surfaces, and a few missed conversion moments where friction could be reduced further. - ---- - -## Landing Page Review (`docs/landing-page.md`) - -### Strengths - -- **H1 is excellent.** "The only iOS tester your agent can call." — specific, bold, differentiating. Works for both the developer who is building agents and the developer who *is* the agent user. No wasted words. -- **H2 lands the key insight in 9 words.** "Record tests with AI. Replay free forever. Ship iOS apps with confidence." Clean tricolon, each clause does work. -- **The 3-step "How It Works" flow is the clearest explanation of the product anywhere.** Record / Commit / Replay is immediately scannable. The detail in each step is exactly right — not too brief, not over-explained. -- **The FAQ is strong.** Every question is something a developer actually asks. The BYOK answer in particular is exemplary: plain language, explains what it means, what we do and don't do, in 3 sentences. -- **"Maestro Compatible" section is a legitimate conversion asset** — zero migration cost framing is compelling and honest. -- **Pricing table is clean and complete.** The footnote "All tiers require your own Anthropic API key (BYOK) for the record phase. Replay is always free." belongs exactly where it is. - -### Issues (must fix) - -**1. The BYOK disclosure in the feature grid is self-undermining.** - -> "97% gross margin for us; complete control for you." - -This is an accidental overshare of internal financial framing — it reads as if we're bragging about our margin at the customer's expense. The developer reading this doesn't need to know our gross margin and may find it off-putting (is this the reason I have to bring my own key?). - -Suggested rewrite: -> "You bring your own Anthropic API key. SyncTek never sees it, stores it, or proxies it. Your test recordings, simulator state, and app binary never leave your machine." - -**2. The social proof placeholders are launch-blocking.** - -The testimonials are listed with `[Design Partner]` attribution rather than real names/companies. If these are real quotes from real design partners, get their permission and use their name or company (even anonymous-but-specific: "iOS Lead at a Series B fintech"). If they're fabricated placeholders, remove them entirely. Fake testimonials hurt trust more than no testimonials. - -**3. The comparison table has a misleading entry.** - -In the "No AI cost in CI" row, all four tools (including SpecterQA) are marked Yes. This is accurate but structurally confusing — it looks like SpecterQA has no advantage there, when the point is that SpecterQA is the *only one that also has AI-assisted recording*. The table needs a column sort or a note explaining why this row exists. - -Suggested fix: rename the column to "Zero AI cost on replay" and add a note below: "SpecterQA is the only tool in this table that uses AI at all — and only during the record phase. CI runs are always free." - -**4. Subtext in the hero mentions "19 MCP tools" — which is a secondary detail for the H1 audience.** - -The hero subtext should lead with the transformation (record → replay free), then mention agent-native as a secondary hook. "19 MCP tools" in a hero subtext is noise for developers who don't yet know what SpecterQA is. - -Suggested rewrite: -> "SpecterQA iOS records test sessions once using Claude's vision — then replays them deterministically in CI with zero AI cost. Natively callable from Claude Code and any MCP-compatible agent. Maestro compatible. BYOK." - -This preserves the agent-native hook without leading with a tool count. - -### Suggestions (nice to have) - -- Add an estimated Anthropic API cost for a typical recording session somewhere near the pricing table. The cost table exists in `specterqa-ios.ts` (`$0.05-$0.15` for smoke, `$0.20-$0.60` for full journey) — this is genuinely compelling and should be surfaced on the landing page. Developers worry about "BYOK" meaning "this will cost me a lot"; showing the actual numbers kills that objection. -- The footer CTAs are good but ordered oddly — "View Docs on GitHub" appears before "Contact Sales (Enterprise)." Consider: Start Free Trial → View Docs → Contact Sales → Support. -- Consider a "Known limitations" or "Tradeoffs" section on the landing page (a condensed version of what's in the website data). Honest tradeoffs build trust and pre-qualify buyers, reducing churn from "I didn't know it was macOS only." - ---- - -## llms.txt Review - -### Strengths - -- The opening paragraph is the best machine-readable summary of the product anywhere. Clear subject, clear mechanism, clear differentiation, no fluff. -- Install snippets cover all three install variants (CLI only, MCP, full orchestration) — this is the right level of detail for LLMs making tool-selection decisions. -- The MCP server config example (`mcp.json` snippet + example plain-English command) is exactly right. An LLM reading this can reproduce the integration with no ambiguity. -- Pricing table is complete and machine-parseable. - -### Issues (must fix) - -**1. "97% gross margin for us; full data control for you" in Key Differentiators.** - -Same issue as the landing page. An LLM summarizing or recommending this product to a user will reproduce this phrase. It sounds like a sales pitch to another department, not a customer-facing differentiator. - -Suggested rewrite: -> **BYOK** — Your Anthropic API key stays with you. SyncTek never sees it, proxies it, or stores it. Your test data, recordings, and app binaries never leave your machine. - -**2. The CLI examples in llms.txt reference `specterqa-ios run --product myapp --journey smoke` but the README shows this as the same command.** No conflict — just confirm this is the canonical form. If `run` is the recording command, the docs should always call it "Record a test" not "Run a test" to avoid confusion with `replay`. - -### Suggestions (nice to have) - -- The `## Links` section is clean. Consider adding the A2A agent card URL as a direct link (it's already referenced in the A2A card itself, but LLMs benefit from explicit linking in llms.txt). -- The install section could note the `[orchestration]` extra explicitly: "Required for the record phase (Claude-driven sessions)." Without context, developers might install `[mcp]` only and be confused when recording doesn't work. - ---- - -## CLI Messages Review (`license_cmd.py`) - -### Status: File Not Found - -`/Users/atlas/Documents/specterqa-ios/src/specterqa/ios/cli/license_cmd.py` does not exist at this path. This is either not yet written or lives at a different path. - -**This is a launch blocker.** CLI error messages and success messages are often the first sustained interaction a developer has with a product — they form lasting impressions of quality. An unreviewed CLI UX ships bad experiences. - -**Required action before launch:** Locate or create `license_cmd.py`, ensure all user-facing strings are reviewed against the criteria below, and re-run this review against the actual file. - -**Criteria for when the file is available:** -- Error messages must tell users what to do next, not just what went wrong. "License key invalid" is bad. "License key invalid — verify your key at synctek.io/account or contact support@synctek.io." is good. -- Success messages should confirm the specific outcome: "Trial activated — 1 simulator, 3 runs/session. Run `specterqa-ios setup` to verify your environment." beats "License activated." -- License expiry warnings should include the days remaining and the upgrade path in the same message. -- The CLI should use Rich formatting (panels, colored status indicators) consistently. A bare `print()` success message in an otherwise Rich-formatted tool reads as an oversight. -- BYOK requirement errors (missing API key) should explain *why* it's needed and exactly *where* to set it: `export ANTHROPIC_API_KEY=sk-ant-...` in the error message itself. - ---- - -## A2A Agent Card Review (`.well-known/agent.json`) - -### Strengths - -- The description is tight and accurate: "AI-native iOS simulator testing. Record tests with Claude, replay deterministically in CI. 19 MCP tools for agent-driven iOS QA." Twelve words that cover mechanism + interface + use case. This is good. -- Capabilities list is substantive and specific (`visual-regression`, `accessibility-audit`, `crash-detection`, `network-inspection`). An agent reading this can make an informed tool-selection decision. -- Version is current (11.3.0, consistent with landing page and website data). - -### Issues (must fix) - -**1. The `pricing.tiers` array is inconsistent in schema.** - -The Trial tier uses `"runs_per_session": 3` but Indie, Pro, Team, and Enterprise tiers don't include this field. An agent reading the pricing object cannot infer that paid tiers have unlimited runs. Add `"runs_per_session": "unlimited"` to all paid tiers. - -**2. No `authentication` field.** - -A2A agent cards should describe how authentication works so agents can determine whether they can invoke the tool. At minimum, add: -```json -"authentication": { - "type": "license_key", - "byok": true, - "byok_provider": "anthropic", - "byok_env_var": "ANTHROPIC_API_KEY" -} -``` - -**3. The `mcp_registry` link points to the general MCP servers registry** (`https://github.com/modelcontextprotocol/servers`), not to a specific SpecterQA entry. If SpecterQA isn't listed there, this link is misleading. Change to the GitHub repo URL or remove the field until SpecterQA is listed. - -### Suggestions (nice to have) - -- Add a `"constraints"` or `"requirements"` field: `["macOS", "Xcode 15+", "Python 3.10+"]`. Agents making environment-aware tool selection need this. -- Add `"license": "Elastic-2.0"` at the top level alongside `version` and `provider`. - ---- - -## README Review - -### Strengths - -- Opens with a single-sentence value proposition that scans in under 5 seconds. No preamble. -- The Dual-Mode Architecture table is the single best explanation of the record/replay split in any surface — clear, memorable, and honest about when costs occur. -- Maestro YAML example is executable by a developer who has never used SpecterQA before. The inline comments (`# same as: action: tap, element_label: Sign In`) are exactly the right level of explanation for a migration audience. -- The CLI reference table is comprehensive. Every command has a one-line description. - -### Issues (must fix) - -**1. The install command is inconsistent with all other surfaces.** - -README shows: -```bash -pip install "git+https://github.com/SyncTek-LLC/specterqa-ios.git" -``` - -Every other surface (landing page, llms.txt, website data) shows: -```bash -pip install specterqa-ios -# or -pip install 'specterqa-ios[mcp]' -``` - -If the package is published to PyPI, the README should use the PyPI install. The `git+https://` form implies either (a) the package is not on PyPI yet, or (b) this is a pre-launch holdover. This is a friction-creator for first-time installers and a trust signal issue — developers expect published products to have PyPI packages. - -**Required action:** Align README install to PyPI form before launch, or add a note explaining the git install is for pre-release / head. - -**2. The comparison table omits `MCP / agent-native` column from the README.** - -The landing page comparison table includes it; the README table does not. Since the README is the primary discovery surface for developers who find the GitHub repo first, this is a conversion miss — agent-native is SpecterQA's clearest differentiator and it's absent from the comparison at the top of the README. - -Suggested fix: add the row. The landing page table version is the right model. - -**3. No version badge or version number in the README.** - -The landing page and website data both reference v11.3.0. The README has no version reference. Add a badge row at the top: -``` -![Version](https://img.shields.io/badge/version-11.3.0-blue) -![License](https://img.shields.io/badge/license-Elastic--2.0-purple) -![Python](https://img.shields.io/badge/python-3.10%2B-blue) -``` - -### Suggestions (nice to have) - -- Add a cost table (from the website data — `$0.05-$0.15` for smoke, `$0.20-$0.60` for full journey). This is the most common hesitation for BYOK products and addressing it in the README reduces support load. -- The "Requirements" section at the bottom is the right place to live, but `ANTHROPIC_API_KEY (recording only — not needed for replay)` buries an important reassurance. Consider surfacing this earlier in the Quick Start section, where developers are about to set the key. - ---- - -## Pricing Consistency - -Across all five surfaces (landing page, llms.txt, agent.json, website data — README has no pricing): - -| Tier | Landing Page | llms.txt | agent.json | website data | -|------|-------------|---------|------------|--------------| -| Trial | Free, 1 sim, 3 runs/session | Free, 1 sim, 3 runs/session | Free (0), 1 sim, 3 runs/session | Free, 1 sim, 3 runs/session | -| Indie | $29/mo, 2 sims, unlimited | $29/mo, 2 sims | $29/mo, 2 sims | $29/mo, 2 sims, unlimited | -| Pro | $99/mo, 4 sims, parallel | $99/mo, 4 sims, parallel | $99/mo, 4 sims | $99/mo, 4 sims, parallel | -| Team | $299/mo, 10 sims, parallel | $299/mo, 10 sims, parallel | $299/mo, 10 sims | $299/mo, 10 sims, parallel | -| Enterprise | Custom, unlimited | Custom, unlimited | Custom, unlimited | Custom, unlimited | - -**Findings:** -- Prices are consistent across all surfaces. No mismatches. -- **Minor gap:** agent.json tiers for Indie/Pro/Team/Enterprise are missing `runs_per_session` and `parallel_ci` fields (noted in the A2A review above). This is a schema completeness issue, not a pricing inconsistency. -- **Minor gap:** llms.txt and agent.json don't explicitly list "Priority Support" (Team) and "SLA" (Enterprise) — these are listed in the landing page table. Not a mismatch, but agents doing tier comparison won't see these differentiators. - -Pricing is consistent. No contradictions found. - ---- - -## Priority Fixes (ordered by launch impact) - -1. **[BLOCKER] Locate and review `license_cmd.py`** — CLI user-facing strings are unreviewed. This is the first interaction most developers will have after install. No launch without this review. - -2. **[BLOCKER] Align README install command to PyPI form** — `pip install "git+https://..."` in the README contradicts `pip install specterqa-ios` everywhere else. Developers who find the repo first will hit friction immediately. - -3. **[HIGH] Remove "97% gross margin for us" from landing page and llms.txt** — This phrase is internal financial framing that leaked into customer copy. It reads as tone-deaf in both places. Replace with the plain-language BYOK explanation (rewrites provided above). - -4. **[HIGH] Replace placeholder testimonials with real quotes or remove entirely** — Labelled design partner quotes with fabricated or unconfirmed attribution damage credibility. Real names/companies (even anonymized-but-specific) convert; obvious placeholders repel. - -5. **[HIGH] Add `authentication` field to agent.json** — Agents doing tool selection need to know how to authenticate. This is table stakes for an A2A-discoverable product. - -6. **[MEDIUM] Fix agent.json `runs_per_session` omission on paid tiers** — Schema inconsistency. Paid tiers should explicitly state `"unlimited"`. - -7. **[MEDIUM] Fix agent.json `mcp_registry` link** — Currently points to the general MCP servers registry. Change to the actual SpecterQA GitHub repo or remove until the product is listed. - -8. **[MEDIUM] Add `MCP / agent-native` row to README comparison table** — This is SpecterQA's headline differentiator and it's missing from the primary GitHub discovery surface. - -9. **[LOW] Clarify "No AI cost in CI" comparison table row** — All four tools show Yes. Add a note explaining the context: SpecterQA is the only AI-powered tool in the table, and CI is always free precisely because it doesn't call AI. - -10. **[LOW] Surface Anthropic API cost estimates on landing page** — The `$0.05-$0.60` range data exists in website data; it should appear near the pricing table on the landing page. This is one of the highest-leverage objection killers for BYOK hesitation. - -11. **[LOW] Add `[orchestration]` install note to llms.txt** — Clarify that recording requires the full `[mcp,orchestration]` install to avoid confused developers wondering why recording fails. - ---- - -## Verdict - -**APPROVED WITH CHANGES** - -**Blockers before launch:** -- license_cmd.py must be located, written, and reviewed -- README install command must align to PyPI - -**Must-fix before promoting (non-blocking to soft launch, blocking to paid marketing spend):** -- Remove "97% gross margin for us" phrasing — both files -- Resolve testimonial placeholders — confirm real or remove -- Patch agent.json authentication + schema gaps - -The rest of the fixes improve conversion and polish but don't block launch. The core copy — the H1, the 3-step flow, the FAQ, the pricing table, the llms.txt description — is clear, honest, and developer-appropriate. This is a strong foundation. diff --git a/docs/landing-page.md b/docs/landing-page.md index 8918199..ea487f2 100644 --- a/docs/landing-page.md +++ b/docs/landing-page.md @@ -4,7 +4,7 @@ _Canonical URL: https://synctek.io/products/simdrive_ _Last updated: 2026-05-17 | Version: 1.0.0a13_ > **STATUS: DEPRECATED in favor of the upcoming `simdrive-site` repo.** This file is preserved -> as a content reference for the new marketing site fork being spun up under INIT-2026-549. +> as a content reference for the new marketing site fork being spun up under [internal-tracker]. > Authoritative product copy lives in `README.md` and `llms.txt`. Brand strings here have been > updated from the legacy `SpecterQA iOS` name; numbers (tool count, pricing) may lag — treat > `docs/MCP_TOOL_SURFACE.md` and `pyproject.toml` as canonical. diff --git a/docs/security-scan-INIT-2026-525.md b/docs/security-scan-INIT-2026-525.md deleted file mode 100644 index 061ac06..0000000 --- a/docs/security-scan-INIT-2026-525.md +++ /dev/null @@ -1,342 +0,0 @@ -# Security & IP Scan Report — INIT-2026-525 - -## Scan Date: 2026-04-10 -## Scanner: SecurityAtlas -## Branches Scanned: -- `specterqa-ios` repo (`/Users/atlas/Documents/specterqa-ios`) — `feat/license-activation-flow` -- `specterqa-ios` repo — `feat/agent-discovery-surfaces` -- `synctek-website` repo (`/Users/atlas/Documents/synctek-website`) — `feat/agent-discovery-surfaces` - -## Files Reviewed - -| File | Branch | Repo | -|------|--------|------| -| `src/specterqa/ios/cli/license_cmd.py` | `feat/license-activation-flow` | specterqa-ios | -| `src/specterqa/ios/license/validator.py` | `feat/license-activation-flow` | specterqa-ios | -| `src/specterqa/ios/license/stripe_webhook.py` | `feat/license-activation-flow` | specterqa-ios | -| `tests/test_license_activation.py` | `feat/license-activation-flow` | specterqa-ios | -| `pyproject.toml` | `feat/license-activation-flow` | specterqa-ios | -| `.well-known/agent.json` | `feat/agent-discovery-surfaces` | specterqa-ios | -| `llms.txt` | `feat/agent-discovery-surfaces` | specterqa-ios | -| `docs/landing-page.md` | `feat/agent-discovery-surfaces` | specterqa-ios | -| `src/data/products/specterqa-ios.ts` | `feat/agent-discovery-surfaces` | synctek-website | -| `src/pages/products/[slug].astro` | `feat/agent-discovery-surfaces` | synctek-website | -| `src/pages/llms.txt.ts` | `feat/agent-discovery-surfaces` | synctek-website | - ---- - -## Security Findings - -### CRITICAL (blocks deployment) - -**SEC-CRIT-001 — Path Traversal via License Key in URL Construction** - -- **Files:** `src/specterqa/ios/cli/license_cmd.py` line 100; `src/specterqa/ios/license/validator.py` lines 328, 337 -- **What's wrong:** The license key is inserted directly into a URL path segment without sanitization. `httpx` normalizes `../` sequences at the HTTP level before sending the request. A malicious key value of `../../../admin/tokens` constructs the URL `https://api.keygen.sh/v1/admin/tokens/validate` — a completely different endpoint. This allows an attacker to craft a key string that probes arbitrary Keygen.sh API paths under the account. -- **Verified:** Confirmed via live test: `httpx.Request('GET', f'https://api.keygen.sh/v1/accounts/acc/licenses/../../../admin/validate')` resolves to `https://api.keygen.sh/v1/admin/validate`. -- **How to fix:** Add format validation before any URL construction. Reject any key that does not match the expected pattern `^[A-Z0-9][A-Z0-9\-]{6,64}$` (adjust to match Keygen.sh's actual key format). In `license_cmd.py` add at line 187 (after `key.strip()`): - ```python - import re - _LICENSE_KEY_RE = re.compile(r'^[A-Z0-9][A-Z0-9\-]{6,64}$') - if not _LICENSE_KEY_RE.match(key): - raise click.ClickException( - f"Invalid license key format: {key!r}\n" - "Expected format: LIC-XXXX-XXXX-XXXX-XXXX" - ) - ``` - Apply the same guard in `validator.py` `_fetch_via_httpx()` and `_fetch_from_api_requests()` before constructing `url`. - ---- - -### HIGH (must fix before deploy) - -**SEC-HIGH-001 — auth.yaml Written Without Restrictive File Permissions** - -- **File:** `src/specterqa/ios/cli/license_cmd.py` lines 65–71 (`_write_yaml`) -- **What's wrong:** `_write_yaml` opens the file with the process's default umask, which is typically `0644` (world-readable). The file `~/.specterqa/auth.yaml` contains the plaintext license key and tier metadata. Any other user or process on the same machine can read it. On shared developer machines, CI agents, or containers with multiple users this is a real exposure. -- **How to fix:** After writing the file, set permissions to `0o600`: - ```python - import os as _os - path.open("w", encoding="utf-8").write(...) # existing write - _os.chmod(path, 0o600) - ``` - Or use `path.touch(mode=0o600)` before opening for write. The parent directory `~/.specterqa/` should also be `0o700`. - -**SEC-HIGH-002 — Keygen.sh Response Body Leaked in RuntimeError (Webhook)** - -- **File:** `src/specterqa/ios/license/stripe_webhook.py` line 147–149 -- **What's wrong:** On `HTTPStatusError`, the full `exc.response.text` from Keygen.sh is embedded in the `RuntimeError` message. This error message propagates through `_handle_checkout_completed` at line 332 as `str(exc)` into the HTTP response body returned to Stripe. Keygen.sh error responses can contain internal account metadata, license policy details, or rate-limit diagnostics that should not be echoed to the public Stripe webhook endpoint. -- **How to fix:** Redact the Keygen response body from the external error message. Log it internally, return a generic message: - ```python - except httpx.HTTPStatusError as exc: - logger.error( - "Keygen.sh license creation failed (HTTP %s): %s", - exc.response.status_code, exc.response.text - ) - raise RuntimeError( - f"Keygen.sh license creation failed (HTTP {exc.response.status_code}). " - "Check server logs for details." - ) from exc - ``` - -**SEC-HIGH-003 — customer.subscription.updated Handler is a No-Op (Functional Security Gap)** - -- **File:** `src/specterqa/ios/license/stripe_webhook.py` lines 352–360 -- **What's wrong:** The `customer.subscription.updated` event is silently acknowledged as `"handled"` but does nothing — the Keygen.sh license tier metadata is never updated when a customer upgrades or downgrades. A customer who downgrades from Team ($299/mo) to Indie ($29/mo) would retain `max_concurrent_sims=10` in their cached Keygen license until the process is manually corrected. This is both a revenue leak and a license enforcement failure. -- **How to fix:** Implement tier sync for subscription updates. At minimum, treat an unimplemented handler as `"ignored"` (not `"handled"`) and emit an ops alert, so the gap is visible. Return `{"status": "ignored", "detail": "Subscription update: tier sync not yet implemented — manual action required"}` and log at WARNING level. Implement the full tier-sync path before launch. - -**SEC-HIGH-004 — Default Stripe Price ID Fallbacks Accept Generic Strings** - -- **File:** `src/specterqa/ios/license/stripe_webhook.py` lines 52–57 -- **What's wrong:** `_PRICE_TIER_MAP` is constructed at module import time using `os.environ.get("STRIPE_PRICE_INDIE", "price_indie")`. If the production environment does not set these env vars, the map silently uses generic placeholder strings (`"price_indie"`, `"price_pro"`, etc.) as keys. No real Stripe price ID starts with `"price_indie"` — so every real Stripe checkout event would fail to match any tier, falling back to `"indie"` regardless of what plan was actually purchased. A Team or Enterprise purchaser would be issued an Indie license. -- **How to fix:** Remove the fallback defaults. Raise a `RuntimeError` at startup if the env vars are absent: - ```python - def _build_price_tier_map() -> Dict[str, str]: - required = { - "STRIPE_PRICE_INDIE": "indie", - "STRIPE_PRICE_PRO": "pro", - "STRIPE_PRICE_TEAM": "team", - "STRIPE_PRICE_ENTERPRISE": "enterprise", - } - result = {} - for env_var, tier in required.items(): - val = os.environ.get(env_var, "").strip() - if not val: - raise RuntimeError(f"{env_var} environment variable is required but not set.") - result[val] = tier - return result - - _PRICE_TIER_MAP = _build_price_tier_map() - ``` - This makes misconfiguration fail loud at deploy time, not silently at transaction time. - -**SEC-HIGH-005 — JWT Offline Grace Stub Always Denies (Silent Production Breakage)** - -- **File:** `src/specterqa/ios/license/validator.py` lines 280–292 -- **What's wrong:** `_decode_jwt()` is a stub that returns `{}`. When the Keygen.sh API is unreachable (network outage, rate limit), `_check_offline_grace()` always returns `False` because `payload.get("offline_exp")` and `payload.get("iat")` are both `None`. This means the documented 72-hour offline grace period is completely non-functional. Paid customers who experience a network hiccup will be blocked from running tests, despite having a valid active license. The docstring claims this works — it does not. -- **How to fix:** Either implement the JWT base64-decode stub properly: - ```python - def _decode_jwt(self) -> Dict[str, Any]: - import base64, json - parts = self._license_key.split(".") - if len(parts) < 2: - return {} - payload_b64 = parts[1] + "==" # re-pad - try: - return json.loads(base64.urlsafe_b64decode(payload_b64)) - except Exception: - return {} - ``` - Or remove the offline grace feature entirely and update the docstring to accurately describe behavior. Do not ship a feature that is documented as working but is silently broken. - ---- - -### MEDIUM (fix within 7 days) - -**SEC-MED-001 — License Key Not Validated for Format Before URL Construction (validator.py)** - -- **File:** `src/specterqa/ios/license/validator.py` lines 328, 337 -- **What's wrong:** Same root as SEC-CRIT-001 but also present in the `LicenseValidator` class's `_fetch_via_httpx` and `_fetch_from_api_requests` methods. The `LicenseValidator` can be instantiated by library callers with arbitrary key strings. Input validation must be in the class, not only the CLI layer. -- **How to fix:** Add key format validation in `LicenseValidator.__init__` or at the top of `_fetch_from_api()`. See SEC-CRIT-001 for the regex pattern. - -**SEC-MED-002 — `stripe` Library Missing from Core Dependencies (pyproject.toml)** - -- **File:** `pyproject.toml` lines 53–57 -- **What's wrong:** `stripe>=8.0` is listed only in `[project.optional-dependencies.license]`. The `stripe_webhook.py` module uses `stripe.Webhook.construct_event` for signature verification. If a deployment installs `specterqa-ios` without the `[license]` extra, the module can be imported but `verify_stripe_signature` will raise `ImportError` at runtime — silently accepting webhooks without signature verification if the exception is not caught (it is caught in the FastAPI/Flask handlers, but callers who use `verify_stripe_signature` directly may not expect this). The webhook deployment guide must make `[license]` mandatory. -- **How to fix:** Document clearly in the deployment guide that webhook servers MUST install `specterqa-ios[license]`. Add a startup assertion in the webhook module that checks for stripe availability and raises a clear startup error if absent. - -**SEC-MED-003 — Trial Counter Reset Function Exported in Public API** - -- **File:** `src/specterqa/ios/license/validator.py` lines 135–139 -- **What's wrong:** `reset_trial_counter()` is a public module-level function. It is intended for test use only (docstring says "Primarily for use in tests") but it is fully importable by any calling code. A malicious or careless library consumer can call `reset_trial_counter()` to bypass the 3-run trial limit indefinitely without a license key. -- **Note:** This is in-process only, so it only affects the current process. However, it should not be part of the public API. -- **How to fix:** Rename to `_reset_trial_counter()` (private by convention) and add a deprecation guard. Or export it only when `__debug__` is True (i.e., non-optimized builds used in testing). - -**SEC-MED-004 — Stripe Webhook Error Detail Leaks Internal Exception String (FastAPI + Flask)** - -- **File:** `src/specterqa/ios/license/stripe_webhook.py` lines 392, 394, 432, 434 -- **What's wrong:** Signature verification failures (`ValueError`) and import errors (`ImportError`) are returned verbatim as HTTP response bodies. A 400 response to Stripe echoes `str(exc)` which may include Stripe's own error descriptions or internal library paths. While Stripe's own infrastructure is the caller for real events, these error strings are also logged or surfaced in Stripe's dashboard, potentially leaking implementation details. -- **How to fix:** Return generic user-facing error messages; log the full detail internally: - ```python - except ValueError as exc: - logger.warning("Stripe signature verification failed: %s", exc) - raise HTTPException(status_code=400, detail="Webhook signature verification failed") - ``` - -**SEC-MED-005 — `suspend_keygen_license_by_customer` Silences Multi-License Ambiguity** - -- **File:** `src/specterqa/ios/license/stripe_webhook.py` lines 188–194 -- **What's wrong:** When multiple Keygen licenses are found for a `stripe_customer_id` (e.g., a customer who upgraded and has two license records), only `licenses[0]` is suspended. The rest remain active. An adversarial customer or data migration issue could leave a customer with a suspended license and still-active duplicates. -- **How to fix:** Log a warning when `len(licenses) > 1` and suspend all matching active licenses. Alternatively, raise an alert for manual review. - ---- - -### LOW (informational) - -**SEC-LOW-001 — INIT-2026-525 Internal Initiative ID Exposed in validator.py** - -- **File:** `src/specterqa/ios/license/validator.py` line 304 -- **What's wrong:** The docstring contains `pre-INIT-2026-525 callers` — an internal initiative ID from the BusinessAtlas project management system. This is a public-facing Python package hosted on GitHub. While it is a comment/docstring (not executed), it leaks internal BA initiative tracking terminology to the open-source community. -- **How to fix:** Replace with a version reference: `pre-v11.3.0 callers` or simply omit the backwards-compatibility note from the docstring. - -**SEC-LOW-002 — `_write_yaml` Lacks Atomic Write (Race Condition on auth.yaml)** - -- **File:** `src/specterqa/ios/cli/license_cmd.py` lines 65–71 -- **What's wrong:** The file is opened with `path.open("w")` which truncates the file before writing. If the process is interrupted (SIGKILL, out-of-disk) mid-write, `auth.yaml` is left in a corrupted/empty state. The next read will return `None` (caught by `_load_yaml_safe`) and the user will be in trial mode despite having a valid license. -- **How to fix:** Use atomic write-then-rename pattern: - ```python - import tempfile - with tempfile.NamedTemporaryFile("w", dir=path.parent, delete=False, suffix=".tmp") as tf: - yaml.safe_dump(data, tf, ...) - tmp_path = Path(tf.name) - tmp_path.rename(path) - ``` - -**SEC-LOW-003 — `_fetch_from_api_requests` Legacy Path Has No Timeout** - -- **File:** `src/specterqa/ios/license/validator.py` lines 333–340 -- **What's wrong:** The `requests.get(url)` call has no `timeout` parameter. On a slow or unresponsive network, this can hang indefinitely, blocking the CLI. -- **How to fix:** Add `timeout=15.0` to the `requests.get()` call: `requests.get(url, timeout=15.0)`. - -**SEC-LOW-004 — CORS on `.well-known/` Allows `X-ForgeOS-Key` Header** - -- **File:** `synctek-website/public/_headers` line 13 -- **What's wrong:** The `/.well-known/*` CORS rule includes `Access-Control-Allow-Headers: Content-Type, Authorization, X-ForgeOS-Key`. The `X-ForgeOS-Key` header is a ForgeOS authentication credential header. Allowing it in the CORS preflight for `.well-known/agent.json` is unnecessary — this endpoint is read-only public JSON that requires no auth headers. While it does not expose credentials, it unnecessarily signals the existence of a `X-ForgeOS-Key` authentication scheme to any third-party agent reader. -- **How to fix:** Remove `X-ForgeOS-Key` from the `.well-known/*` CORS allow-headers. Keep it only on the ForgeOS API paths that actually require it. - ---- - -## IP Findings - -### CRITICAL (blocks deployment) - -None. - ---- - -### HIGH (must fix before deploy) - -**IP-HIGH-001 — Internal Initiative ID Exposed in Public Package Source** - -- **File:** `src/specterqa/ios/license/validator.py` line 304 -- **What's wrong:** Docstring contains `pre-INIT-2026-525 callers`. This is in a Python file that will be published to PyPI and committed to the public GitHub repository. The `INIT-2026-525` nomenclature reveals the existence of an internal project management system (BusinessAtlas) and its naming convention. This is the type of internal operational detail that should never appear in public code. -- **Severity elevated to HIGH** because this is going to PyPI/public GitHub, not just an internal server. -- **How to fix:** Change to: `prior to v11.3.0`. Search all new files for any other `INIT-20XX-XXX` patterns before merge. - ---- - -### MEDIUM - -**IP-MED-001 — "97% Gross Margin" Internal Financial Metric in Public Marketing Copy** - -- **Files:** `llms.txt` line 78 (specterqa-ios repo); `docs/landing-page.md` line 57; `synctek-website/src/data/products/specterqa-ios.ts` line 63 -- **What's wrong:** The phrase "97% gross margin for us" appears in all three public-facing surfaces. This exact internal financial metric should be a deliberate marketing decision, not an accidental leak. If intentional (founder-to-founder transparency, a differentiation story), it is fine. Flag for Chairman confirmation before go-live. -- **How to fix:** Confirm with Chairman this figure is intentionally public. If not, replace with "SyncTek's costs stay minimal; your data never leaves your machine." If intentional, leave as-is. - ---- - -## Pricing Consistency Check - -| Tier | agent.json | llms.txt | landing-page.md | specterqa-ios.ts | [slug].astro (via .ts) | -|------|-----------|----------|-----------------|-----------------|----------------------| -| Trial | Free (0) | Free | Free | Free | Free | -| Trial sims | 1 | 1 | 1 | 1 | 1 | -| Trial runs/session | 3 | 3 | 3 | 3 | 3 | -| Indie | $29/mo | $29/mo | $29/mo | $29/mo | $29/mo | -| Indie sims | 2 | 2 | 2 | 2 | 2 | -| Pro | $99/mo | $99/mo | $99/mo | $99/mo | $99/mo | -| Pro sims | 4 | 4 | 4 | 4 | 4 | -| Team | $299/mo | $299/mo | $299/mo | $299/mo | $299/mo | -| Team sims | 10 | 10 | 10 | 10 | 10 | -| Enterprise | Custom | Custom | Custom | Custom | Custom | -| Enterprise sims | unlimited | unlimited | unlimited | unlimited | unlimited | - -**Result: CONSISTENT.** All pricing tiers, prices, and simulator limits are in exact agreement across all five public surfaces. No drift detected. - -**Code enforcement verified:** `license_cmd.py::TIER_SIM_LIMITS` and `validator.py::_TIER_DEFAULTS` and `stripe_webhook.py::_TIER_SIM_LIMITS` all agree on: trial=1, indie=2, pro=4, team=10, enterprise=0 (unlimited). Consistent with marketing surfaces. - ---- - -## Checklist Results - -### Security Checklist - -| Check | Status | Finding | -|-------|--------|---------| -| Secrets/credentials hardcoded | PASS | No API keys, tokens, or account IDs hardcoded. All via env vars. | -| Stripe webhook signature verification | PASS | `stripe.Webhook.construct_event` used correctly. Raw body preserved. | -| Keygen.sh API calls HTTPS only | PASS | `_KEYGEN_BASE = "https://api.keygen.sh/v1"`. No HTTP fallback. | -| Sensitive data in URL params | CONDITIONAL | Key is in URL path (not params), but see SEC-CRIT-001 for path traversal risk. | -| Proper error handling (no stack traces) | PASS with caveat | No tracebacks. But see SEC-HIGH-002 / SEC-MED-004 for response body leaks. | -| auth.yaml file permissions (0600) | FAIL | SEC-HIGH-001: no chmod called after write. | -| Input validation (license key format) | FAIL | SEC-CRIT-001: no format validation before URL construction. | -| BYOK enforcement | PASS | `check_byok()` called first in `assert_ready_for_run()`. Dogfood bypass still enforces BYOK. | -| Trial limit enforcement | PASS with caveat | In-process counter is correct. See SEC-MED-003 for public reset function. | -| Trial limit bypass via file deletion | PASS | Counter is in-process only, not persisted to disk. Deleting auth.yaml does not reset trial count. | -| Dependency safety (httpx>=0.27, pyyaml>=6.0, stripe>=8.0) | PASS | No known CVEs in these minimum version bounds as of scan date. | -| Error messages leak internals | PARTIAL | SEC-HIGH-002: Keygen response body in webhook errors. SEC-MED-004: exception strings in HTTP responses. | -| CORS/headers on agent.json | PASS with note | CORS wildcard on `/.well-known/*` is appropriate for A2A discovery. See SEC-LOW-004 re: unnecessary `X-ForgeOS-Key` header. | - -### IP Checklist - -| Check | Status | Finding | -|-------|--------|---------| -| No BusinessAtlas internals exposed | FAIL | IP-HIGH-001: `INIT-2026-525` in validator.py docstring. | -| No proprietary algorithms exposed | PASS | SoM described at marketing level only ("Set-of-Mark prompting, numbered markers"). No internal implementation details. | -| License terms: Elastic License 2.0 maintained | PASS | `pyproject.toml` license = "Elastic-2.0". `specterqa-ios.ts` license = "Elastic-2.0". landing-page.md states "Elastic License 2.0". No MIT/Apache headers present. | -| Pricing accuracy | PASS | All surfaces consistent. See Pricing Consistency Check table above. | -| No competitor disparagement | PASS | Comparison table is factual (feature matrix). No subjective claims against Maestro/Appium/XCUITest. | -| No personal data in public files | PASS | No email addresses beyond public support/sales addresses. No personal names. No internal URLs. | -| Copyright notices | NOTE | No per-file copyright headers in new Python files. The top-level LICENSE file covers the package. This is acceptable for open-source packages with a root LICENSE file, but if SyncTek's legal standard requires per-file headers, add `# Copyright (c) 2026 SyncTek LLC. Licensed under the Elastic License 2.0.` to each new file. Not blocking. | - ---- - -## Summary of Findings by Severity - -| ID | Severity | File | Issue | -|----|----------|------|-------| -| SEC-CRIT-001 | CRITICAL | license_cmd.py:100, validator.py:328,337 | Path traversal via unsanitized license key in URL path | -| SEC-HIGH-001 | HIGH | license_cmd.py:65–71 | auth.yaml written without 0600 permissions | -| SEC-HIGH-002 | HIGH | stripe_webhook.py:147–149 | Keygen response body leaked in RuntimeError | -| SEC-HIGH-003 | HIGH | stripe_webhook.py:352–360 | subscription.updated is a silent no-op (license tier not synced on downgrade) | -| SEC-HIGH-004 | HIGH | stripe_webhook.py:52–57 | Default Stripe price IDs accept generic placeholders — silently issues wrong tier | -| SEC-HIGH-005 | HIGH | validator.py:280–292 | JWT offline grace period is permanently broken (stub decoder returns {}) | -| IP-HIGH-001 | HIGH | validator.py:304 | Internal initiative ID `INIT-2026-525` in public PyPI package docstring | -| SEC-MED-001 | MEDIUM | validator.py:328,337 | Same path traversal risk in LicenseValidator class (library API path) | -| SEC-MED-002 | MEDIUM | pyproject.toml:53–57 | stripe missing from core deps — webhook deployments without [license] extra silently skip sig verification | -| SEC-MED-003 | MEDIUM | validator.py:135–139 | `reset_trial_counter()` is public API — trial limit bypassable by library callers | -| SEC-MED-004 | MEDIUM | stripe_webhook.py:392,394,432,434 | Exception strings returned verbatim in HTTP response bodies | -| SEC-MED-005 | MEDIUM | stripe_webhook.py:188–194 | Multi-license customer suspension only cancels first match | -| IP-MED-001 | MEDIUM | llms.txt, landing-page.md, specterqa-ios.ts | "97% gross margin" internal metric needs Chairman confirmation before public | -| SEC-LOW-001 | LOW | validator.py:304 | INIT-2026-525 docstring (same as IP-HIGH-001, dual classification) | -| SEC-LOW-002 | LOW | license_cmd.py:65–71 | Non-atomic auth.yaml write (race condition on interruption) | -| SEC-LOW-003 | LOW | validator.py:333–340 | requests.get() has no timeout in legacy path | -| SEC-LOW-004 | LOW | synctek-website/public/_headers:13 | `X-ForgeOS-Key` unnecessary in .well-known CORS allow-headers | - ---- - -## Verdict - -**CONDITIONAL PASS** - -**Blockers before deployment:** - -1. **SEC-CRIT-001** — Path traversal in license key URL construction. Must add format validation regex before any URL is constructed. Affects `license_cmd.py` and `validator.py`. This is the only hard block. -2. **IP-HIGH-001** — `INIT-2026-525` internal ID in public PyPI source. Must be replaced with version reference before the package ships to PyPI or merges to a public branch. - -**Must fix within 7 days of deployment:** - -3. **SEC-HIGH-001** — auth.yaml file permissions (0600 not set) -4. **SEC-HIGH-002** — Keygen response body leaking in webhook errors -5. **SEC-HIGH-003** — subscription.updated no-op is a live revenue leak on plan downgrades -6. **SEC-HIGH-004** — Stripe price ID defaults silently issue wrong tier on misconfigured deployment -7. **SEC-HIGH-005** — JWT offline grace documented as working but permanently broken - -**No blockers found in:** -- agent.json, llms.txt, landing-page.md, specterqa-ios.ts, llms.txt.ts, [slug].astro -- Pricing consistency (all surfaces agree) -- Stripe signature verification (correct implementation) -- BYOK enforcement (correct, dogfood bypass does not skip BYOK) -- Trial limit enforcement (in-process counter is correct) -- Secrets/credentials (all via env vars, none hardcoded) -- License terms (Elastic License 2.0 consistent throughout) diff --git a/scripts/git-hooks/README.md b/scripts/git-hooks/README.md deleted file mode 100644 index 9504da7..0000000 --- a/scripts/git-hooks/README.md +++ /dev/null @@ -1,52 +0,0 @@ -# Governance Git Hooks - -Part of **INIT-2026-535: Governance Hook Hardening**. - -Three hooks enforce initiative linkage and gate compliance on every commit and push. - -## Hooks - -| Hook | Gate | Blocking? | -|------|------|-----------| -| `commit-msg` | Rejects commit unless message has `INIT-YYYY-NNN` or `work_context.json` has active initiative | Yes | -| `pre-push` | Runs `ba branch-check`; blocks push if non-zero exit or gate keywords found | Yes | -| `post-commit` | Logs commit SHA/msg/ts to `CompanyState/business-graph/initiatives//commit-log.jsonl` | No (warning only) | - -## Installation - -```bash -bash scripts/git-hooks/install-git-hooks.sh -``` - -This copies all three hooks into `.git/hooks/` and sets `+x`. Any pre-existing hooks (not already from this suite) are backed up with a timestamp suffix. - -## How Each Hook Works - -### commit-msg -Reads the commit message file passed by git. Checks for: -1. `INIT-YYYY-NNN` pattern in the message body (any position), OR -2. Most recent `active_work` entry with `initiative_id` in `work_context.json` - -Note: uses `reversed()` scan to find the latest entry — `[0]` is stale by design. - -To fix a rejection: -- Add `INIT-2026-NNN` anywhere in your commit message, OR -- Run `ba classify --title "..." --type STANDARD --initiative-id INIT-XXXX-NNN` first - -### pre-push -Skips `main`/`master` (never pushed directly). For all other branches, runs: -``` -ba branch-check -``` -Blocks if exit code is non-zero OR output contains `blocked`, `unresolved`, or `gate`. - -### post-commit -Appends a JSON line to the active initiative's `commit-log.jsonl`: -```json -{"sha": "abc1234", "msg": "feat: thing", "ts": "2026-04-19T00:00:00Z", "repo": "BusinessAtlas", "initiative_id": "INIT-2026-535"} -``` -If no active initiative is set, logs to `CompanyState/business-graph/commit-log-unclassified.jsonl` with a stderr warning. Never fails the commit. - -## Notes for specterqa-ios - -The specterqa-ios repo contains **independent copies** of these hooks (not symlinks). Reason: symlinks would break if the repo is cloned to a different machine or path, whereas copies are self-contained. The hooks in both repos point to `work_context.json` and `ba` in the BusinessAtlas tree — this is intentional, as those are single-machine shared state. diff --git a/scripts/git-hooks/commit-msg b/scripts/git-hooks/commit-msg deleted file mode 100755 index bf20885..0000000 --- a/scripts/git-hooks/commit-msg +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env bash -# commit-msg hook — enforce initiative linkage -# Rejects commits unless: -# (a) commit message contains INIT-YYYY-NNN pattern, OR -# (b) work_context.json has an active initiative (most recent entry with initiative_id) -# -# Part of INIT-2026-535 Governance Hook Hardening - -set -euo pipefail - -COMMIT_MSG_FILE="$1" -COMMIT_MSG=$(cat "$COMMIT_MSG_FILE") -WORK_CONTEXT="/Users/atlas/BusinessAtlas/CompanyState/work_context.json" - -# Check (a): message contains INIT reference -if echo "$COMMIT_MSG" | grep -qE 'INIT-[0-9]{4}-[0-9]+'; then - exit 0 -fi - -# Check (b): work_context.json has an active initiative -# Use the LAST (most recent) entry with initiative_id — NOT [0] which is stale -if [ -f "$WORK_CONTEXT" ]; then - ACTIVE_INIT=$(python3 - "$WORK_CONTEXT" <<'PYEOF' -import sys, json -try: - with open(sys.argv[1]) as f: - d = json.load(f) - active = d.get('active_work', []) - for entry in reversed(active): - iid = entry.get('initiative_id', '').strip() - if iid: - print(iid) - break -except Exception: - pass -PYEOF -) - if [ -n "$ACTIVE_INIT" ]; then - exit 0 - fi -fi - -# Both checks failed — reject -cat >&2 </dev/null || git rev-parse --show-toplevel)" -HOOKS_DIR="$REPO_ROOT/.git/hooks" - -if [ ! -d "$HOOKS_DIR" ]; then - echo "ERROR: .git/hooks directory not found at $HOOKS_DIR" >&2 - echo "Make sure you're running this from inside a git repository." >&2 - exit 1 -fi - -HOOKS=(commit-msg pre-push post-commit) -INSTALLED=0 -SKIPPED=0 - -echo "Installing governance hooks into $HOOKS_DIR ..." - -for hook in "${HOOKS[@]}"; do - SRC="$SCRIPT_DIR/$hook" - DEST="$HOOKS_DIR/$hook" - - if [ ! -f "$SRC" ]; then - echo " WARNING: source hook not found: $SRC — skipping" >&2 - SKIPPED=$((SKIPPED + 1)) - continue - fi - - # Back up existing hook if it's not already ours - if [ -f "$DEST" ] && ! grep -q "INIT-2026-535" "$DEST" 2>/dev/null; then - BACKUP="$DEST.bak.$(date +%Y%m%d%H%M%S)" - echo " Backing up existing $hook -> $BACKUP" - cp "$DEST" "$BACKUP" - fi - - cp "$SRC" "$DEST" - chmod +x "$DEST" - echo " Installed: $hook" - INSTALLED=$((INSTALLED + 1)) -done - -echo "" -echo "Done. $INSTALLED hook(s) installed, $SKIPPED skipped." -echo "" -echo "Installed hooks:" -for hook in "${HOOKS[@]}"; do - DEST="$HOOKS_DIR/$hook" - if [ -f "$DEST" ]; then - echo " $DEST" - fi -done diff --git a/scripts/git-hooks/post-commit b/scripts/git-hooks/post-commit deleted file mode 100755 index 0fb4766..0000000 --- a/scripts/git-hooks/post-commit +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env bash -# post-commit hook — log commit to active initiative commit-log -# Non-blocking: failures print a warning but never fail the commit. -# -# Part of INIT-2026-535 Governance Hook Hardening - -BA="/Users/atlas/BusinessAtlas/v2/ba" -WORK_CONTEXT="/Users/atlas/BusinessAtlas/CompanyState/work_context.json" -INITIATIVES_DIR="/Users/atlas/BusinessAtlas/CompanyState/business-graph/initiatives" - -# Determine active initiative (most recent entry with initiative_id) -ACTIVE_INIT="" -if [ -f "$WORK_CONTEXT" ]; then - ACTIVE_INIT=$(python3 - "$WORK_CONTEXT" <<'PYEOF' -import sys, json -try: - with open(sys.argv[1]) as f: - d = json.load(f) - active = d.get('active_work', []) - for entry in reversed(active): - iid = entry.get('initiative_id', '').strip() - if iid: - print(iid) - break -except Exception: - pass -PYEOF -) -fi - -# Get commit info -COMMIT_SHA=$(git rev-parse HEAD 2>/dev/null || echo "unknown") -COMMIT_MSG=$(git log -1 --pretty=format:"%s" 2>/dev/null || echo "unknown") -COMMIT_TS=$(date -u +"%Y-%m-%dT%H:%M:%SZ") -REPO_NAME=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown") - -# If no active initiative, write to a fallback log -if [ -z "$ACTIVE_INIT" ]; then - FALLBACK_LOG="/Users/atlas/BusinessAtlas/CompanyState/business-graph/commit-log-unclassified.jsonl" - python3 - "$FALLBACK_LOG" "$COMMIT_SHA" "$COMMIT_MSG" "$COMMIT_TS" "$REPO_NAME" <<'PYEOF' 2>/dev/null || true -import sys, json, os -log_file, sha, msg, ts, repo = sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5] -os.makedirs(os.path.dirname(log_file), exist_ok=True) -entry = {"sha": sha, "msg": msg, "ts": ts, "repo": repo, "initiative_id": None} -with open(log_file, 'a') as f: - f.write(json.dumps(entry) + "\n") -PYEOF - echo "[post-commit] WARNING: No active initiative — commit logged to unclassified log" >&2 - exit 0 -fi - -# Write to initiative commit-log.jsonl -LOG_FILE="$INITIATIVES_DIR/$ACTIVE_INIT/commit-log.jsonl" - -python3 - "$LOG_FILE" "$COMMIT_SHA" "$COMMIT_MSG" "$COMMIT_TS" "$REPO_NAME" "$ACTIVE_INIT" <<'PYEOF' 2>/dev/null -import sys, json, os -log_file, sha, msg, ts, repo, iid = sys.argv[1:] -os.makedirs(os.path.dirname(log_file), exist_ok=True) -entry = {"sha": sha, "msg": msg, "ts": ts, "repo": repo, "initiative_id": iid} -with open(log_file, 'a') as f: - f.write(json.dumps(entry) + "\n") -print(f"[post-commit] Logged commit {sha[:8]} to {iid}") -PYEOF - -EXIT_CODE=$? -if [ "$EXIT_CODE" -ne 0 ]; then - echo "[post-commit] WARNING: Failed to log commit to initiative $ACTIVE_INIT — continuing anyway" >&2 -fi - -exit 0 diff --git a/scripts/git-hooks/pre-push b/scripts/git-hooks/pre-push deleted file mode 100755 index 0bc27b4..0000000 --- a/scripts/git-hooks/pre-push +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env bash -# pre-push hook — governance guard before pushing. -# -# Behavior is repo-aware: -# * In BusinessAtlas: run `ba branch-check` and block if it flags issues. -# This binds the current branch to the classified initiative in BA. -# * In sister repos (specterqa-ios, etc.): skip ba branch-check (it is -# BA-centric and expects the BA branch slug), but still guard against -# direct pushes to main/master. -# -# Part of INIT-2026-535 Governance Hook Hardening. - -BA="/Users/atlas/BusinessAtlas/v2/ba" -BA_REPO_ROOT="/Users/atlas/BusinessAtlas" -CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "unknown") -REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || echo "") - -# Never allow direct push to protection branches from any repo. -# Check the remote refs being pushed (git supplies them on stdin). -while read -r local_ref local_sha remote_ref remote_sha; do - case "$remote_ref" in - refs/heads/main|refs/heads/master) - cat >&2 <&2 - exit 0 -fi - -COMBINED_OUTPUT=$("$BA" branch-check 2>&1) -EXIT_CODE=$? - -if [ "$EXIT_CODE" -ne 0 ]; then - cat >&2 <&2 <', 'RGB', '\\s+', 'a', 'about', 'accept', 'account', 'active', 'add', 'address', 'aeiouAEIOU', 'again', 'agree', 'air', 'alert', 'all', 'allow', 'am', 'an', 'and', 'any', 'april', 'are', 'as', 'at', 'audio', 'audiobook', 'august', 'author', 'back', 'bbox', 'be', 'because', 'been', 'being', 'book', 'books', 'borrow', 'bottom', 'boy', 'but', 'by', 'cancel', 'cart', 'case', 'catalog', 'center', 'change', 'chapter', 'child', 'close', 'comment', 'company', 'complete', 'completed', 'confidence', 'confidence_band', 'confirm', 'content', 'copy', 'cut', 'dance', 'date', 'day', 'december', 'decline', 'delete', 'deny', 'description', 'details', 'did', 'do', 'does', 'doing', 'done', 'down', 'downloaded', 'ebook', 'edit', 'eight', 'email', 'error', 'every', 'fact', 'failed', 'false', 'father', 'favorites', 'february', 'few', 'film', 'filter', 'first', 'five', 'follow', 'for', 'force', 'forward', 'four', 'free', 'friday', 'from', 'gear', 'girl', 'goodbye', 'government', 'grid', 'guy', 'had', 'hand', 'has', 'have', 'having', 'he', 'head', 'hello', 'help', 'her', 'hide', 'high', 'him', 'his', 'history', 'hold', 'home', 'house', 'how', 'i', 'id', 'if', 'in', 'inactive', 'info', 'into', 'is', 'issue', 'it', 'its', 'january', 'job', 'july', 'june', 'kind', 'last', 'left', 'less', 'library', 'life', 'like', 'list', 'loading', 'login', 'logout', 'lot', 'low', 'magazine', 'man', 'many', 'march', 'may', 'me', 'media', 'medium', 'menu', 'mine', 'moment', 'monday', 'money', 'month', 'more', 'morning', 'mother', 'movie', 'music', 'my', 'name', 'new', 'next', 'night', 'nine', 'no', 'none', 'not', 'november', 'now', 'number', 'o', 'october', 'of', 'off', 'ok', 'on', 'one', 'onto', 'open', 'or', 'our', 'ours', 'out', 'over', 'page', 'pages', 'part', 'partner', 'party', 'password', 'paste', 'pause', 'pending', 'people', 'person', 'phone', 'place', 'play', 'please', 'point', 'post', 'premium', 'previous', 'profile', 'program', 'q', 'q.', 'q/', 'q\\', 'question', 'raw_confidence', 'read', 'reading', 'ready', 'reason', 'receive', 'redo', 'refresh', 'register', 'reload', 'remove', 'research', 'result', 'resume', 'retry', 'return', 'right', 'saturday', 'save', 'search', 'select', 'send', 'september', 'settings', 'seven', 'share', 'she', 'shelf', 'shelves', 'show', 'side', 'sign', 'signin', 'signup', 'since', 'six', 'skip', 'so', 'some', 'sort', 'stable_id', 'stable_id_loose', 'start', 'stop', 'story', 'study', 'submit', 'subscribe', 'subscription', 'subtitle', 'success', 'summary', 'sunday', 'support', 'system', 'tab', 'tale', 'ten', 'text', 'than', 'that', 'the', 'their', 'them', 'then', 'these', 'they', 'thing', 'this', 'those', 'three', 'thursday', 'time', 'title', 'to', 'today', 'tomorrow', 'top', 'trial', 'true', 'try', 'tuesday', 'two', 'under', 'undo', 'unfollow', 'up', 'upgrade', 'us', 'username', 'utf-8', 'video', 'view', 'war', 'warning', 'was', 'way', 'we', 'wednesday', 'week', 'welcome', 'were', 'what', 'when', 'where', 'which', 'while', 'who', 'why', 'with', 'without', 'woman', 'word', 'work', 'world', 'x', 'year', 'yes', 'yesterday', 'you', 'your', 'yours', '‹', '›', '≡', '☰', '⚙', '✕', '✖'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/22b97680d3066b11 b/simdrive/.hypothesis/constants/22b97680d3066b11 deleted file mode 100644 index 19076d7..0000000 --- a/simdrive/.hypothesis/constants/22b97680d3066b11 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/license/signer.py -# hypothesis_version: 6.141.1 - -[b'=', ',', ':', 'ascii', 'customer_email', 'enterprise', 'expires_at', 'issued_at', 'pro', 'seats', 'solo', 'team', 'tier', 'trial', 'utf-8'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/24f84c1a540fc92d b/simdrive/.hypothesis/constants/24f84c1a540fc92d deleted file mode 100644 index 841fa4b..0000000 --- a/simdrive/.hypothesis/constants/24f84c1a540fc92d +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/session.py -# hypothesis_version: 6.141.1 - -[8100, '.simdrive', 'Real Device', 'Recorder', 'SIMDRIVE_HOME', 'Session', 'a', 'actions.jsonl', 'active', 'any_booted', 'device', 'device_name', 'hardware_udid', 'host', 'ip', 'localhost', 'os_version', 'port', 'sessions', 'simulator', 'target', 'udid'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/27a521e124dddf17 b/simdrive/.hypothesis/constants/27a521e124dddf17 deleted file mode 100644 index 6a80e32..0000000 --- a/simdrive/.hypothesis/constants/27a521e124dddf17 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/errors.py -# hypothesis_version: 6.152.4 - -['action', 'already_recording', 'available', 'cloud_auth_invalid', 'cloud_auth_missing', 'cloud_rate_limited', 'code', 'details', 'error', 'field', 'form', 'hid_unavailable', 'invalid_argument', 'limit_gb', 'message', 'missing_target', 'name', 'no_device', 'no_session', 'not_recording', 'ok', 'path', 'query', 'reason', 'recording_id', 'recording_not_found', 'replay_drift_halt', 'retry_after_seconds', 'session_id', 'sim_unhealthy', 'similarity', 'step_id', 'target_not_found', 'threshold', 'udid', 'used_gb', 'value', 'why'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/2a94bf2bdc14a19d b/simdrive/.hypothesis/constants/2a94bf2bdc14a19d deleted file mode 100644 index ba34a23..0000000 --- a/simdrive/.hypothesis/constants/2a94bf2bdc14a19d +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.141.1 - -[0.0, 0.04, 0.05, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 1.0, 5.0, 200, 300, 1000, 8100, 86400, ' (dry-run)', ',', '--budget-override', '--dry-run', '--email', '--force', '--help', '--journey', '--journeys-dir', '--json', '--license-path', '--offline-dev', '--path', '--persona-override', '--quiet', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'annotated_path', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'ascii', 'at', 'attempts', 'available', 'backend', 'background', 'backup_path', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'captured_at', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'device_udid', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'dry_run', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'fail', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'json_out', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'lint-recordings', 'lint_recordings', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'marks', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'migrate-recording', 'migrate_recording', 'migrated', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'primary_button_label', 'properties', 'reason', 'recent_logs', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'results', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_b64', 'screenshot_h', 'screenshot_path', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simdrive wda-down', 'simdrive wda-up', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'text_mark_count', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda-down', 'wda-down failed: %s', 'wda-up', 'wda-up failed: %s', 'wda_not_bootstrapped', 'window_bounds_macos', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/2ac5a50af3eca496 b/simdrive/.hypothesis/constants/2ac5a50af3eca496 deleted file mode 100644 index 16e6830..0000000 --- a/simdrive/.hypothesis/constants/2ac5a50af3eca496 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/recorder.py -# hypothesis_version: 6.152.4 - -[0.85, 128, 300, '!=', '.', '.simdrive', '; ', '<', '<=', '==', '>', '>=', 'AppRequires', 'InitialStateRequires', 'L', 'RequiresBlock', 'SIMDRIVE_HOME', 'SimRequires', '_capture', '_simdrive_warning', '` to capture one.', 'action', 'actual', 'allow', 'any', 'app', 'app.bundle_id', 'app.version', 'app_bundle_id', 'app_version', 'args', 'bundle_id', 'cancel', 'captured_at', 'confidence_band', 'created_at', 'created_by_session', 'device', "don't allow", 'dont allow', 'drift', 'drifted', 'duration_ms', 'error', 'exact', 'execute_error', 'executed', 'expected', 'fail', 'force', 'foreground', 'h', 'halt', 'halt_reason', 'halted_at', 'high', 'id', 'inf', 'initial_state', 'ios_version', 'key', 'major', 'medium', 'minor', 'name', 'ok', 'os_version', 'path', 'post_screenshot', 'pre_screenshot', 'press_key', 'primary_button_label', 'reason', 'reasons', 'recording finalized', 'recording started', 'recording stopping', 'recording.yaml', 'recording_name', 'recordings', 'remedy', 'replay', 'requires', 'screenshot_h', 'screenshot_w', 'session_id', 'sim', 'sim.device', 'sim.ios_version', 'sim_device', 'simdrive.recorder', 'simdrive_version', 'similarity', 'simulator', 'snapshots', 'ssim_masks', 'stable_id', 'stable_id_loose', 'status', 'steps', 'steps_planned', 'swipe', 'tags', 'tap', 'text', 'text_mark_count', 'text_subset_required', 'threshold', 'type_text', 'version', 'version_match', 'w', 'warn', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/2dd2dab9d5a51fb6 b/simdrive/.hypothesis/constants/2dd2dab9d5a51fb6 deleted file mode 100644 index 5edadc0..0000000 --- a/simdrive/.hypothesis/constants/2dd2dab9d5a51fb6 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/__init__.py -# hypothesis_version: 6.141.1 - -['1.0.0a3'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/30eeb792d2063a6e b/simdrive/.hypothesis/constants/30eeb792d2063a6e deleted file mode 100644 index 9b77307..0000000 --- a/simdrive/.hypothesis/constants/30eeb792d2063a6e +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/keypair.py -# hypothesis_version: 6.152.4 - -['__main__', 'generate'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/340256ea54e4efd1 b/simdrive/.hypothesis/constants/340256ea54e4efd1 deleted file mode 100644 index 151ea05..0000000 --- a/simdrive/.hypothesis/constants/340256ea54e4efd1 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/claude_client.py -# hypothesis_version: 6.152.4 - -[0.0, 0.004, 0.5, 15.0, 75.0, 200, 1024, 1000000, '.', 'ANTHROPIC_API_KEY', 'args', 'base64', 'claude-opus-4-7', 'confidence', 'content', 'data', 'fail', 'image', 'image/jpeg', 'image/png', 'jpeg', 'jpg', 'media_type', 'rationale', 'rb', 'role', 'source', 'text', 'tool', 'type', 'user'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/34244ea6e2276714 b/simdrive/.hypothesis/constants/34244ea6e2276714 deleted file mode 100644 index a3f69f0..0000000 --- a/simdrive/.hypothesis/constants/34244ea6e2276714 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/errors.py -# hypothesis_version: 6.141.1 - -['action', 'already_recording', 'available', 'cloud_auth_invalid', 'cloud_auth_missing', 'cloud_rate_limited', 'code', 'details', 'error', 'field', 'form', 'hid_unavailable', 'invalid_argument', 'limit_gb', 'message', 'missing_target', 'name', 'no_device', 'no_session', 'not_recording', 'ok', 'path', 'query', 'reason', 'recording_id', 'recording_not_found', 'replay_drift_halt', 'retry_after_seconds', 'session_id', 'sim_unhealthy', 'similarity', 'step_id', 'target_not_found', 'threshold', 'udid', 'used_gb', 'value', 'why'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/36f3595d6afa6b8e b/simdrive/.hypothesis/constants/36f3595d6afa6b8e deleted file mode 100644 index c609bde..0000000 --- a/simdrive/.hypothesis/constants/36f3595d6afa6b8e +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/mcp_sampling_client.py -# hypothesis_version: 6.152.4 - -[0.0, 0.2, 0.3, 0.5, 0.9, 2048, '.', 'args', 'ascii', 'confidence', 'fail', 'image', 'image/jpeg', 'jpeg', 'jpg', 'rationale', 'text', 'tool', 'type', 'user'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/37f4bacbefc32971 b/simdrive/.hypothesis/constants/37f4bacbefc32971 deleted file mode 100644 index fa51623..0000000 --- a/simdrive/.hypothesis/constants/37f4bacbefc32971 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/window.py -# hypothesis_version: 6.152.4 - -[5.0, ',', '-e', 'no_process', 'no_window', 'osascript'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/3837c0081e7e3d6d b/simdrive/.hypothesis/constants/3837c0081e7e3d6d deleted file mode 100644 index 3472e40..0000000 --- a/simdrive/.hypothesis/constants/3837c0081e7e3d6d +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/robustness.py -# hypothesis_version: 6.152.4 - -[10.0, 200, '*/recording.yaml', 'action', 'all', 'allow', 'allow once', 'app_bundle_id', 'app_version', 'appearance', 'args', 'calendar', 'camera', 'cancel', 'contacts', 'contacts-limited', 'created_at', 'created_by_session', 'dark', 'deny', 'device', "don't allow", 'dont allow', 'error', 'errors', 'failed', 'grant', 'granted', 'health', 'homekit', 'id', 'light', 'location', 'location-always', 'media-library', 'medialibrary', 'microphone', 'modified_at', 'motion', 'name', 'ok', 'os_version', 'path', 'permission', 'photos', 'photos-add', 'post_screenshot', 'pre_screenshot', 'press_key', 'privacy', 'r', 'recording.yaml', 'reminders', 'requires', 'settings', 'simctl', 'simdrive_version', 'siri', 'speech', 'ssim_masks', 'stderr', 'step_count', 'steps', 'swipe', 'tags', 'tap', 'text', 'type_text', 'ui', 'utf-8', 'warnings', 'xcrun'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/38a9e1032e5b467c b/simdrive/.hypothesis/constants/38a9e1032e5b467c deleted file mode 100644 index 9b9ec96..0000000 --- a/simdrive/.hypothesis/constants/38a9e1032e5b467c +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/trial.py -# hypothesis_version: 6.141.1 - -['.simdrive', 'email', 'expires_at', 'installed_at', 'last_server_check', 'license.json', 'license_key'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/3ab583c5fe8654bd b/simdrive/.hypothesis/constants/3ab583c5fe8654bd deleted file mode 100644 index 4b58940..0000000 --- a/simdrive/.hypothesis/constants/3ab583c5fe8654bd +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/errors.py -# hypothesis_version: 6.141.1 - -['Library', 'MobileDevice', 'body', 'http_status', 'identities', 'last_seen_at', 'log_path', 'missing', 'profiles_dir', 'stderr', 'team_id', 'tool', 'udid', 'wda_build_failed', 'wda_device_locked', 'wda_device_not_ready', 'wda_install_failed', 'wda_not_bootstrapped', 'wda_session_lost', 'wda_smoke_failed', 'xct_code'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/3bf1b2b555f1cd26 b/simdrive/.hypothesis/constants/3bf1b2b555f1cd26 deleted file mode 100644 index 8130b0d..0000000 --- a/simdrive/.hypothesis/constants/3bf1b2b555f1cd26 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/runner.py -# hypothesis_version: 6.152.4 - -[0.0, 0.004, 5.0, '## Success Criteria', '.simdrive', 'SIMDRIVE_HOME', '_', 'agent_trace.jsonl', 'budget_exceeded', 'clear_field', 'cost_usd', 'crashed', 'crashes', 'done', 'error', 'fail', 'failed', 'llm_calls', 'passed', 'press_key', 'runs', 'screenshot_path', 'seconds', 'session_id', 'since', 'started_at', 'steps', 'summary.json', 'summary.md', 'swipe', 'tap', 'type_text', 'unknown', 'w', '✓', '✗'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/3eaa4ebb789e065e b/simdrive/.hypothesis/constants/3eaa4ebb789e065e deleted file mode 100644 index 3d1f69f..0000000 --- a/simdrive/.hypothesis/constants/3eaa4ebb789e065e +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/config.py -# hypothesis_version: 6.152.4 - -[100, 1024, 8080, '/tmp/simdrive-cloud', '0.0.0.0', 'CloudConfig', 'enterprise', 'pro', 'solo', 'team', 'trial'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/47e278142c9b12b4 b/simdrive/.hypothesis/constants/47e278142c9b12b4 deleted file mode 100644 index d4de539..0000000 --- a/simdrive/.hypothesis/constants/47e278142c9b12b4 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/observability/__init__.py -# hypothesis_version: 6.141.1 - -['MetricsRegistry', 'Span', 'TraceContext', 'configure_logging', 'dump_metrics', 'get_logger', 'get_registry', 'increment_counter', 'record_histogram', 'start_span'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/4852d1ecf2140227 b/simdrive/.hypothesis/constants/4852d1ecf2140227 deleted file mode 100644 index b678eb4..0000000 --- a/simdrive/.hypothesis/constants/4852d1ecf2140227 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/ci.py -# hypothesis_version: 6.152.4 - -[0.0, 4096, '.simdrive/journeys', '.simdrive/personas', '_end', 'agent_trace.jsonl', 'budget_exceeded', 'ci_summary_path', 'classname', 'crashed', 'error', 'errors', 'failed', 'failed_journey_names', 'failure', 'failures', 'junit_xml_path', 'message', 'name', 'passed', 'simdrive-journeys', 'simdrive.journey', 'simdrive.journey.ci', 'system-out', 'testcase', 'tests', 'testsuite', 'time', 'total', 'total_llm_cost_usd', 'type', 'utf-8'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/49fe5af9676d019a b/simdrive/.hypothesis/constants/49fe5af9676d019a deleted file mode 100644 index 2d2f103..0000000 --- a/simdrive/.hypothesis/constants/49fe5af9676d019a +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/device.py -# hypothesis_version: 6.141.1 - -[0.1, 2.0, 5.0, 10.0, 15.0, 30.0, 120.0, '-', '--bundle-id', '--device', '--json-output', '--quiet', '--signal', '-u', '.json', '/dev/null', '/dev/stdout', '; ', '', 'Developer disk image', 'Invalid service', 'SIGTERM', 'app', 'available', 'connectionProperties', 'developerModeStatus', 'device', 'device offline', 'deviceProperties', 'devicectl', 'devices', 'disabled', 'disconnected', 'hardwareProperties', 'idevice_id', 'ideviceimagemounter', 'idevicescreenshot', 'idevicesyslog', 'install', 'lastConnectionDate', 'launch', 'list', 'marketingName', 'name', 'no transport', 'not paired', 'pairingState', 'process', 'processIdentifier', 'productType', 'result', 'signal', 'transportType', 'tunnel disconnected', 'tunnelState', 'udid', 'unavailable', 'unpaired', 'xcrun'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/4a8f9aa36e5b7ee8 b/simdrive/.hypothesis/constants/4a8f9aa36e5b7ee8 deleted file mode 100644 index fa149f4..0000000 --- a/simdrive/.hypothesis/constants/4a8f9aa36e5b7ee8 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/storage/__init__.py -# hypothesis_version: 6.152.4 - -[] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/4d708f828bb70984 b/simdrive/.hypothesis/constants/4d708f828bb70984 deleted file mode 100644 index 1ed592c..0000000 --- a/simdrive/.hypothesis/constants/4d708f828bb70984 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/app.py -# hypothesis_version: 6.152.4 - -['/health', '/v1', 'SELECT 1', 'SimDrive Cloud API', 'db_reachable', 'ok', 'simdrive', 'simdrive_r2_', 'sqlite://', 'status', 'storage_backend', 'unknown', 'version'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/50e134c034c2f268 b/simdrive/.hypothesis/constants/50e134c034c2f268 deleted file mode 100644 index a8bfa6c..0000000 --- a/simdrive/.hypothesis/constants/50e134c034c2f268 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/device.py -# hypothesis_version: 6.141.1 - -[2.0, 5.0, 10.0, 15.0, 30.0, 120.0, '-', '--bundle-id', '--device', '--json-output', '--match', '--quiet', '--signal', '-u', '.json', '/dev/null', '/dev/stdout', '; ', '', 'Developer disk image', 'Invalid service', 'SIGTERM', 'app', 'available', 'connectionProperties', 'developerModeStatus', 'device', 'device offline', 'deviceProperties', 'devicectl', 'devices', 'disabled', 'disconnected', 'hardwareProperties', 'idevice_id', 'ideviceimagemounter', 'idevicescreenshot', 'idevicesyslog', 'install', 'lastConnectionDate', 'launch', 'list', 'marketingName', 'name', 'no transport', 'not paired', 'pairingState', 'process', 'processIdentifier', 'productType', 'result', 'signal', 'transportType', 'tunnel disconnected', 'tunnelState', 'udid', 'unavailable', 'unpaired', 'xcrun'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/53305e968b1d0aef b/simdrive/.hypothesis/constants/53305e968b1d0aef deleted file mode 100644 index 3bf2387..0000000 --- a/simdrive/.hypothesis/constants/53305e968b1d0aef +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/hid_inject.py -# hypothesis_version: 6.141.1 - -[5.0, 15.0, 1000.0, '_bin', 'button', 'chord', 'down', 'home', 'key', 'lock', 'side', 'simdrive-input', 'siri', 'size', 'tap', 'text', 'up'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/54c34bb108d033f4 b/simdrive/.hypothesis/constants/54c34bb108d033f4 deleted file mode 100644 index 22770d9..0000000 --- a/simdrive/.hypothesis/constants/54c34bb108d033f4 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/diagnostics.py -# hypothesis_version: 6.141.1 - -[0.0, 5.0, 10.0, 15.0, 200, '-', '--json', '-convert', '-o', '-p', '.ips', 'CFBundleDisplayName', 'CFBundleName', 'CFBundleVersion', 'DiagnosticReports', 'Library', 'Logs', 'Path', 'app_name', 'backtrace', 'booted', 'bug_type', 'bundleID', 'bundle_id', 'checks', 'crashing_thread', 'detail', 'devices', 'exception', 'foreground', 'frames', 'hid_helper', 'id', 'json', 'launchctl', 'list', 'listapps', 'mtime', 'name', 'no path', 'not-running', 'ok', 'path', 'pid', 'plutil', 'r', 'replace', 'runtimes', 'simctl', 'simctl_runtimes', 'spawn', 'state', 'threads', 'timestamp', 'triggered', 'udid', 'utf-8', 'version', 'xcode-select', 'xcode_select', 'xcrun'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/54e3be75cdd4f73a b/simdrive/.hypothesis/constants/54e3be75cdd4f73a deleted file mode 100644 index 13f36a8..0000000 --- a/simdrive/.hypothesis/constants/54e3be75cdd4f73a +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/schema.py -# hypothesis_version: 6.152.4 - -[180, 'DeviceSelector', 'SuccessCriterion', 'after', 'background', 'device', 'foreground', 'name', 'not_running', 'persona', 'schema_version', 'simulator'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/565b5ecf9a7d17ca b/simdrive/.hypothesis/constants/565b5ecf9a7d17ca deleted file mode 100644 index 2a303eb..0000000 --- a/simdrive/.hypothesis/constants/565b5ecf9a7d17ca +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.152.4 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, 1000, 8100, 86400, ' (dry-run)', ',', '--budget-override', '--dry-run', '--email', '--force', '--help', '--journey', '--journeys-dir', '--json', '--license-path', '--offline-dev', '--path', '--persona-override', '--quiet', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'annotated_path', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'ascii', 'at', 'attempts', 'available', 'backend', 'background', 'backup_path', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'captured_at', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'device_udid', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'dry_run', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'fail', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'json_out', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'lint-recordings', 'lint_recordings', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'marks', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'migrate-recording', 'migrate_recording', 'migrated', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'primary_button_label', 'properties', 'reason', 'recent_logs', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'results', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_b64', 'screenshot_h', 'screenshot_path', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simdrive wda-down', 'simdrive wda-up', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'text_mark_count', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda-down', 'wda-down failed: %s', 'wda-up', 'wda-up failed: %s', 'wda_not_bootstrapped', 'window_bounds_macos', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/5b37d8fedd61eda4 b/simdrive/.hypothesis/constants/5b37d8fedd61eda4 deleted file mode 100644 index f53bf68..0000000 --- a/simdrive/.hypothesis/constants/5b37d8fedd61eda4 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/device.py -# hypothesis_version: 6.152.4 - -[1.0, 2.0, 5.0, 10.0, 15.0, 30.0, 120.0, '-', '--bundle-id', '--device', '--json-output', '--quiet', '--signal', '-u', '.json', '/dev/null', '/dev/stdout', '; ', '', 'Developer disk image', 'Invalid service', 'SIGTERM', 'app', 'available', 'connectionProperties', 'developerModeStatus', 'device', 'device offline', 'deviceProperties', 'devicectl', 'devices', 'disabled', 'disconnected', 'hardwareProperties', 'idevice_id', 'ideviceimagemounter', 'idevicescreenshot', 'idevicesyslog', 'install', 'lastConnectionDate', 'launch', 'list', 'marketingName', 'name', 'no transport', 'not paired', 'pairingState', 'process', 'processIdentifier', 'productType', 'result', 'signal', 'transportType', 'tunnel disconnected', 'tunnelState', 'udid', 'unavailable', 'unpaired', 'xcrun'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/5b3aa53029f37559 b/simdrive/.hypothesis/constants/5b3aa53029f37559 deleted file mode 100644 index 3265c8f..0000000 --- a/simdrive/.hypothesis/constants/5b3aa53029f37559 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.152.4 - -[0.0, 0.04, 0.05, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 1.0, 5.0, 200, 300, 1000, 8100, 86400, ' (dry-run)', ',', '--budget-override', '--dry-run', '--email', '--force', '--help', '--journey', '--journeys-dir', '--json', '--license-path', '--offline-dev', '--path', '--persona-override', '--quiet', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'annotated_path', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'ascii', 'at', 'attempts', 'available', 'backend', 'background', 'backup_path', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'captured_at', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'device_udid', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'dry_run', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'fail', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'json_out', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'lint-recordings', 'lint_recordings', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'marks', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'migrate-recording', 'migrate_recording', 'migrated', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'primary_button_label', 'properties', 'reason', 'recent_logs', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'results', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_b64', 'screenshot_h', 'screenshot_path', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simdrive wda-down', 'simdrive wda-up', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'text_mark_count', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda-down', 'wda-down failed: %s', 'wda-up', 'wda-up failed: %s', 'wda_not_bootstrapped', 'window_bounds_macos', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/5c37177911cd9b90 b/simdrive/.hypothesis/constants/5c37177911cd9b90 deleted file mode 100644 index b5d2cb0..0000000 --- a/simdrive/.hypothesis/constants/5c37177911cd9b90 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/client.py -# hypothesis_version: 6.141.1 - -[30.0, 1000.0, 300, '/element/active', '/screenshot', '/session', '/source', '/status', '/wda/keys', '/wda/pressButton', '/wda/tap', '/window/size', 'DELETE', 'ELEMENT', 'GET', 'POST', 'alwaysMatch', 'body', 'bundleId', 'capabilities', 'duration', 'exc', 'fromX', 'fromY', 'height', 'home', 'host', 'lock', 'method', 'name', 'port', 'power', 'response', 'sessionId', 'status', 'toX', 'toY', 'url', 'value', 'volumeDown', 'volumeUp', 'volumedown', 'volumeup', 'wda_http_error', 'wda_session_not_open', 'wda_unknown_button', 'wda_unreachable', 'width', 'x', 'y', '\ue003'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/5c948aa90bf66ca5 b/simdrive/.hypothesis/constants/5c948aa90bf66ca5 deleted file mode 100644 index a27a807..0000000 --- a/simdrive/.hypothesis/constants/5c948aa90bf66ca5 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.141.1 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 1.0, 5.0, 200, 300, 1000, 8100, 86400, ' (dry-run)', ',', '--budget-override', '--dry-run', '--email', '--force', '--help', '--journey', '--journeys-dir', '--json', '--license-path', '--offline-dev', '--path', '--persona-override', '--quiet', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'annotated_path', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'ascii', 'at', 'attempts', 'available', 'backend', 'background', 'backup_path', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'captured_at', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'device_udid', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'dry_run', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'fail', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'json_out', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'lint-recordings', 'lint_recordings', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'marks', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'migrate-recording', 'migrate_recording', 'migrated', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'primary_button_label', 'properties', 'reason', 'recent_logs', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'results', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_b64', 'screenshot_h', 'screenshot_path', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simdrive wda-down', 'simdrive wda-up', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'text_mark_count', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda-down', 'wda-down failed: %s', 'wda-up', 'wda-up failed: %s', 'wda_not_bootstrapped', 'window_bounds_macos', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/5d20f7f5263cbc0f b/simdrive/.hypothesis/constants/5d20f7f5263cbc0f deleted file mode 100644 index 17da9e4..0000000 --- a/simdrive/.hypothesis/constants/5d20f7f5263cbc0f +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/__init__.py -# hypothesis_version: 6.152.4 - -[] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/5f75b35de93f8f47 b/simdrive/.hypothesis/constants/5f75b35de93f8f47 deleted file mode 100644 index 10048e5..0000000 --- a/simdrive/.hypothesis/constants/5f75b35de93f8f47 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/__init__.py -# hypothesis_version: 6.152.4 - -[] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/6246e208e1423c81 b/simdrive/.hypothesis/constants/6246e208e1423c81 deleted file mode 100644 index ad806da..0000000 --- a/simdrive/.hypothesis/constants/6246e208e1423c81 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/.venv/bin/pytest -# hypothesis_version: 6.152.4 - -['.exe', '__main__'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/643df37c90f8ceec b/simdrive/.hypothesis/constants/643df37c90f8ceec deleted file mode 100644 index 2251f52..0000000 --- a/simdrive/.hypothesis/constants/643df37c90f8ceec +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/db/__init__.py -# hypothesis_version: 6.152.4 - -[] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/68192632ea959a39 b/simdrive/.hypothesis/constants/68192632ea959a39 deleted file mode 100644 index d54c0a6..0000000 --- a/simdrive/.hypothesis/constants/68192632ea959a39 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/db/models.py -# hypothesis_version: 6.141.1 - -[255, 512, 'check_same_thread', 'connect_args', 'license_activations', 'poolclass', 'recordings', 'sqlite://', 'sqlite:///', 'trial_activations'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/6bd49704c253148b b/simdrive/.hypothesis/constants/6bd49704c253148b deleted file mode 100644 index 325adad..0000000 --- a/simdrive/.hypothesis/constants/6bd49704c253148b +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/client.py -# hypothesis_version: 6.152.4 - -[30.0, 1000.0, 300, '/element/active', '/screenshot', '/session', '/source', '/status', '/wda/keys', '/wda/pressButton', '/wda/tap', '/window/size', 'DELETE', 'ELEMENT', 'GET', 'POST', 'alwaysMatch', 'body', 'bundleId', 'capabilities', 'duration', 'exc', 'fromX', 'fromY', 'height', 'home', 'host', 'lock', 'method', 'name', 'port', 'power', 'response', 'sessionId', 'status', 'toX', 'toY', 'url', 'value', 'volumeDown', 'volumeUp', 'volumedown', 'volumeup', 'wda_http_error', 'wda_session_not_open', 'wda_unknown_button', 'wda_unreachable', 'width', 'x', 'y', '\ue003'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/732558cb17a5ade8 b/simdrive/.hypothesis/constants/732558cb17a5ade8 deleted file mode 100644 index 14e7e6e..0000000 --- a/simdrive/.hypothesis/constants/732558cb17a5ade8 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/bootstrap.py -# hypothesis_version: 6.141.1 - -[0.1, 2.0, 5.0, 10.0, 15.0, 120, 8100, '"', '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%SZ', '%b %d %H:%M:%S %Y %Z', '(\n)', '()', ', ', '-', '--bundle-id', '--device', '--json-output', '--project-directory', '-C', '-c', '-derivedDataPath', '-destination', '-noout', '-p', '-project', '-scheme', '-startdate', '-v', '-xctestrun', '.simdrive', '=', 'Apple Development', 'CSSMERR', 'Code 41', 'Code=41', 'PINNED_SHA.txt', 'Podfile', 'REPO=', 'REVOKED', 'SHA=', 'WDA READY', 'WDA_REGISTRY_DIR', 'WebDriverAgentRunner', 'XCTDaemonErrorDomain', '\\(([A-Z0-9]{10})\\)', 'alwaysMatch', 'app', 'build-for-testing', 'build.log', 'bundle', 'bundleId', 'capabilities', 'checkout', 'clone', 'clone.log', 'codesigning', 'com.apple.dt.Xcode', 'connectionProperties', 'coredevice_uuid', 'ddiServicesAvailable', 'defaults', 'derived', 'derived_data', 'details', 'developerModeStatus', 'device', 'deviceProperties', 'device_name', 'devicectl', 'enabled', 'exec', 'find-certificate', 'find-identity', 'git', 'hardwareProperties', 'hardware_udid', 'host', 'idevicepair', 'info', 'install', 'install_path', 'ios', 'ip', 'last_built_at', 'name', 'notBefore=', 'openssl', 'os', 'os_version', 'paired', 'pairingState', 'pod', 'port', 'project.pbxproj', 'r', 'read', 'ready', 'replace', 'result', 'security', 'sessionId', 'sha1', 'signing_identity', 'source', 'team_id', 'udid', 'uninstall', 'utf-8', 'value', 'version', 'w', 'wda', 'wda_bundle_id', 'x509', 'xcodebuild', 'xcrun', 'xctestrun_path', '{\n}', '{}'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/769037291a3d9e87 b/simdrive/.hypothesis/constants/769037291a3d9e87 deleted file mode 100644 index ca820f0..0000000 --- a/simdrive/.hypothesis/constants/769037291a3d9e87 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/storage/r2.py -# hypothesis_version: 6.152.4 - -['/tmp/simdrive-cloud', '404', 'Body', 'Bucket', 'Code', 'Contents', 'Error', 'Key', 'NoSuchKey', 'R2_ACCESS_KEY_ID', 'R2_ACCOUNT_ID', 'R2_BUCKET', 'R2_SECRET_ACCESS_KEY', 'STORAGE_BACKEND', 'auto', 'get_object', 'list_objects_v2', 's3', 'stub'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/78e393eb2b0faa48 b/simdrive/.hypothesis/constants/78e393eb2b0faa48 deleted file mode 100644 index 34c6b9a..0000000 --- a/simdrive/.hypothesis/constants/78e393eb2b0faa48 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/prompt.py -# hypothesis_version: 6.152.4 - -['(none)', ',', ':', 'marks', 'recent_logs', 'screenshot_path', 'stable_id', 'text'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/79faeb571a5310f8 b/simdrive/.hypothesis/constants/79faeb571a5310f8 deleted file mode 100644 index a768562..0000000 --- a/simdrive/.hypothesis/constants/79faeb571a5310f8 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/errors.py -# hypothesis_version: 6.152.4 - -['action', 'already_recording', 'available', 'bundle_id', 'cloud_auth_invalid', 'cloud_auth_missing', 'cloud_rate_limited', 'code', 'details', 'device_launch_failed', 'error', 'field', 'form', 'hid_unavailable', 'invalid_argument', 'limit_gb', 'message', 'missing_target', 'name', 'no_device', 'no_session', 'not_recording', 'ok', 'path', 'query', 'reason', 'recording_id', 'recording_not_found', 'replay_drift_halt', 'retry_after_seconds', 'session_id', 'sim_unhealthy', 'similarity', 'step_id', 'target_not_found', 'threshold', 'udid', 'used_gb', 'value', 'why'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/7c542dca691cb9dd b/simdrive/.hypothesis/constants/7c542dca691cb9dd deleted file mode 100644 index cfb9628..0000000 --- a/simdrive/.hypothesis/constants/7c542dca691cb9dd +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/ci.py -# hypothesis_version: 6.141.1 - -[0.0, 4096, '.simdrive/journeys', '.simdrive/personas', '_end', 'agent_trace.jsonl', 'budget_exceeded', 'ci_summary_path', 'classname', 'crashed', 'error', 'errors', 'failed', 'failed_journey_names', 'failure', 'failures', 'junit_xml_path', 'message', 'name', 'passed', 'simdrive-journeys', 'simdrive.journey', 'simdrive.journey.ci', 'system-out', 'testcase', 'tests', 'testsuite', 'time', 'total', 'total_llm_cost_usd', 'type', 'utf-8'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/7ee024ee7c0768e1 b/simdrive/.hypothesis/constants/7ee024ee7c0768e1 deleted file mode 100644 index b73d6e1..0000000 --- a/simdrive/.hypothesis/constants/7ee024ee7c0768e1 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/som_device.py -# hypothesis_version: 6.152.4 - -[1.0, '_annotated.png', 'height', 'label', 'name', 'true', 'value', 'visible', 'width', 'x', 'y'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/809323a591ca6992 b/simdrive/.hypothesis/constants/809323a591ca6992 deleted file mode 100644 index 657c5ce..0000000 --- a/simdrive/.hypothesis/constants/809323a591ca6992 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/__init__.py -# hypothesis_version: 6.141.1 - -['0.0.0+local', 'simdrive'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/80c4ff665d927b85 b/simdrive/.hypothesis/constants/80c4ff665d927b85 deleted file mode 100644 index 4fd8ae1..0000000 --- a/simdrive/.hypothesis/constants/80c4ff665d927b85 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/__init__.py -# hypothesis_version: 6.141.1 - -['1.0.0a2'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/8157963a9e0574ec b/simdrive/.hypothesis/constants/8157963a9e0574ec deleted file mode 100644 index aa21462..0000000 --- a/simdrive/.hypothesis/constants/8157963a9e0574ec +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/errors.py -# hypothesis_version: 6.152.4 - -['act_tool_failed', 'attempt', 'cap_usd', 'ci_invalid_journey', 'claude_call_failed', 'claude_cost_cap_hit', 'cost_usd', 'criterion_type', 'inner_code', 'inner_message', 'journey_name', 'journeys_dir', 'llm_calls', 'path', 'persona_slug', 'personas_dir', 'reason', 'seconds', 'steps', 'supported', 'tag_filter', 'tool_name', 'version'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/81fefad85d591123 b/simdrive/.hypothesis/constants/81fefad85d591123 deleted file mode 100644 index 841fa4b..0000000 --- a/simdrive/.hypothesis/constants/81fefad85d591123 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/session.py -# hypothesis_version: 6.141.1 - -[8100, '.simdrive', 'Real Device', 'Recorder', 'SIMDRIVE_HOME', 'Session', 'a', 'actions.jsonl', 'active', 'any_booted', 'device', 'device_name', 'hardware_udid', 'host', 'ip', 'localhost', 'os_version', 'port', 'sessions', 'simulator', 'target', 'udid'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/8580b83b53138083 b/simdrive/.hypothesis/constants/8580b83b53138083 deleted file mode 100644 index c3d4371..0000000 --- a/simdrive/.hypothesis/constants/8580b83b53138083 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/client.py -# hypothesis_version: 6.141.1 - -[30.0, 1000.0, 300, '/element/active', '/screenshot', '/session', '/source', '/status', '/wda/keys', '/wda/pressButton', '/wda/tap', 'DELETE', 'ELEMENT', 'GET', 'POST', 'alwaysMatch', 'body', 'bundleId', 'capabilities', 'duration', 'exc', 'fromX', 'fromY', 'home', 'host', 'lock', 'method', 'name', 'port', 'power', 'response', 'sessionId', 'status', 'toX', 'toY', 'url', 'value', 'volumeDown', 'volumeUp', 'volumedown', 'volumeup', 'wda_http_error', 'wda_session_not_open', 'wda_unknown_button', 'wda_unreachable', 'x', 'y', '\ue003'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/86bd9091f6ecc8a6 b/simdrive/.hypothesis/constants/86bd9091f6ecc8a6 deleted file mode 100644 index 8e7019c..0000000 --- a/simdrive/.hypothesis/constants/86bd9091f6ecc8a6 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/runner.py -# hypothesis_version: 6.141.1 - -[0.0, 0.004, 5.0, '## Success Criteria', '.simdrive', 'SIMDRIVE_HOME', '_', 'agent_trace.jsonl', 'budget_exceeded', 'clear_field', 'cost_usd', 'crashed', 'crashes', 'done', 'error', 'fail', 'failed', 'llm_calls', 'passed', 'press_key', 'runs', 'screenshot_path', 'seconds', 'session_id', 'since', 'started_at', 'steps', 'summary.json', 'summary.md', 'swipe', 'tap', 'type_text', 'unknown', 'w', '✓', '✗'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/8a89bcbaa35128c1 b/simdrive/.hypothesis/constants/8a89bcbaa35128c1 deleted file mode 100644 index ba4891c..0000000 --- a/simdrive/.hypothesis/constants/8a89bcbaa35128c1 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/db/models.py -# hypothesis_version: 6.152.4 - -[255, 512, 'check_same_thread', 'connect_args', 'license_activations', 'poolclass', 'recordings', 'sqlite://', 'sqlite:///', 'trial_activations'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/8aa7cdd6a53cc03a b/simdrive/.hypothesis/constants/8aa7cdd6a53cc03a deleted file mode 100644 index 34edfe1..0000000 --- a/simdrive/.hypothesis/constants/8aa7cdd6a53cc03a +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/observability/tracing.py -# hypothesis_version: 6.141.1 - -[1000.0, 'Span', 'duration_ms', 'ended_at', 'metadata', 'name', 'parent_span_id', 'span_id', 'started_at'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/8abf18f77c2a66a6 b/simdrive/.hypothesis/constants/8abf18f77c2a66a6 deleted file mode 100644 index 826021d..0000000 --- a/simdrive/.hypothesis/constants/8abf18f77c2a66a6 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/observe.py -# hypothesis_version: 6.141.1 - -[1000.0, 1000, '.json', 'annotated_path', 'captured_at', 'device', 'height', 'latency_ms', 'marks', 'marks_count', 'observe complete', 'observe_latency_ms', 'recent_logs', 'screenshot_path', 'simdrive.observe', 'simulator', 'target', 'udid', 'width', 'window_bounds_macos', 'x', 'y'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/8c0f345c08dd1f86 b/simdrive/.hypothesis/constants/8c0f345c08dd1f86 deleted file mode 100644 index c3dfd3c..0000000 --- a/simdrive/.hypothesis/constants/8c0f345c08dd1f86 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/bootstrap.py -# hypothesis_version: 6.141.1 - -[0.1, 2.0, 5.0, 10.0, 15.0, 120, 8100, '"', '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%SZ', '%b %d %H:%M:%S %Y %Z', '(\n)', '()', ', ', '-', '--bundle-id', '--device', '--json-output', '--project-directory', '-C', '-c', '-derivedDataPath', '-destination', '-noout', '-p', '-project', '-scheme', '-startdate', '-v', '-xctestrun', '.simdrive', '=', 'Apple Development', 'CSSMERR', 'Code=41', 'PINNED_SHA.txt', 'Podfile', 'REPO=', 'REVOKED', 'SHA=', 'WDA READY', 'WDA_REGISTRY_DIR', 'WebDriverAgentRunner', 'XCTDaemonErrorDomain', '\\(([A-Z0-9]{10})\\)', 'alwaysMatch', 'app', 'build-for-testing', 'build.log', 'bundle', 'bundleId', 'capabilities', 'checkout', 'clone', 'clone.log', 'codesigning', 'com.apple.dt.Xcode', 'connectionProperties', 'coredevice_uuid', 'ddiServicesAvailable', 'defaults', 'derived', 'derived_data', 'details', 'developerModeStatus', 'device', 'deviceProperties', 'device_name', 'devicectl', 'enabled', 'exec', 'find-certificate', 'find-identity', 'git', 'hardwareProperties', 'hardware_udid', 'host', 'idevicepair', 'info', 'install', 'install_path', 'ios', 'ip', 'last_built_at', 'name', 'notBefore=', 'openssl', 'os', 'os_version', 'paired', 'pairingState', 'pod', 'port', 'project.pbxproj', 'r', 'read', 'ready', 'replace', 'result', 'security', 'sessionId', 'sha1', 'signing_identity', 'source', 'team_id', 'udid', 'uninstall', 'utf-8', 'value', 'version', 'w', 'wda', 'wda_bundle_id', 'x509', 'xcodebuild', 'xcrun', 'xctestrun_path', '{\n}', '{}'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/8d7ed2723dc643a6 b/simdrive/.hypothesis/constants/8d7ed2723dc643a6 deleted file mode 100644 index 55f3cbe..0000000 --- a/simdrive/.hypothesis/constants/8d7ed2723dc643a6 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/config.py -# hypothesis_version: 6.141.1 - -[100, 1024, 8080, '/tmp/simdrive-cloud', '0.0.0.0', 'CloudConfig', 'enterprise', 'pro', 'solo', 'team', 'trial'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/8d8d313e8fc0a9da b/simdrive/.hypothesis/constants/8d8d313e8fc0a9da deleted file mode 100644 index 36fc7c3..0000000 --- a/simdrive/.hypothesis/constants/8d8d313e8fc0a9da +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/license/__init__.py -# hypothesis_version: 6.141.1 - -[] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/8f1ab964be981a24 b/simdrive/.hypothesis/constants/8f1ab964be981a24 deleted file mode 100644 index c3afd1a..0000000 --- a/simdrive/.hypothesis/constants/8f1ab964be981a24 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/perf.py -# hypothesis_version: 6.152.4 - -[0.0, 10.0, 15.0, 1024.0, '-M', '-o', '-p', '/usr/bin/footprint', 'B', 'GB', 'KB', 'MB', 'available', 'captured_at', 'clean_mb', 'cpu_pct', 'dirty_mb', 'footprint', 'footprint_mb', 'high', 'launchctl', 'list', 'low', 'medium', 'memory_rss_mb', 'pcpu=', 'phys_footprint', 'phys_footprint_peak', 'pid', 'ps', 'reason', 'reclaimable_mb', 'rss=', 'simctl', 'spawn', 'swapped_mb', 'threads', 'xcrun'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/8f92960071c25b12 b/simdrive/.hypothesis/constants/8f92960071c25b12 deleted file mode 100644 index 841ce9c..0000000 --- a/simdrive/.hypothesis/constants/8f92960071c25b12 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/act.py -# hypothesis_version: 6.141.1 - -[0.05, 0.15, 5.0, 10.0, 1000.0, 128, 300, 5000, '-e', '-w', '30', 'Action Button', 'App Switcher', 'Device', 'Home', 'Lock', 'Rotate Left', 'Rotate Right', 'Shake', 'Siri', 'Trigger Screenshot', 'action-button', 'app-switcher', 'arrow-down', 'arrow-left', 'arrow-right', 'arrow-up', 'backspace', 'cliclick', 'cmd', 'delete', 'enter', 'esc', 'escape', 'hid', 'home', 'kp:arrow-down', 'kp:arrow-left', 'kp:arrow-right', 'kp:arrow-up', 'kp:delete', 'kp:esc', 'kp:return', 'kp:space', 'kp:tab', 'latency_ms', 'lock', 'osascript', 'return', 'rotate-left', 'rotate-right', 'screenshot', 'shake', 'simdrive.act', 'siri', 'space', 't:', 'tab', 'tap dispatched (hid)', 'tap_latency_ms', 'v', 'x', 'y'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/9016ed643f19998d b/simdrive/.hypothesis/constants/9016ed643f19998d deleted file mode 100644 index 8130b0d..0000000 --- a/simdrive/.hypothesis/constants/9016ed643f19998d +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/runner.py -# hypothesis_version: 6.152.4 - -[0.0, 0.004, 5.0, '## Success Criteria', '.simdrive', 'SIMDRIVE_HOME', '_', 'agent_trace.jsonl', 'budget_exceeded', 'clear_field', 'cost_usd', 'crashed', 'crashes', 'done', 'error', 'fail', 'failed', 'llm_calls', 'passed', 'press_key', 'runs', 'screenshot_path', 'seconds', 'session_id', 'since', 'started_at', 'steps', 'summary.json', 'summary.md', 'swipe', 'tap', 'type_text', 'unknown', 'w', '✓', '✗'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/9296c5494f7ded4a b/simdrive/.hypothesis/constants/9296c5494f7ded4a deleted file mode 100644 index 9ebac7b..0000000 --- a/simdrive/.hypothesis/constants/9296c5494f7ded4a +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/diagnostics.py -# hypothesis_version: 6.152.4 - -[0.0, 5.0, 10.0, 15.0, 200, '-', '--json', '-convert', '-o', '-p', '.ips', 'CFBundleDisplayName', 'CFBundleName', 'CFBundleVersion', 'DiagnosticReports', 'Library', 'Logs', 'Path', 'app_name', 'backtrace', 'booted', 'bug_type', 'bundleID', 'bundle_id', 'checks', 'crashing_thread', 'detail', 'devices', 'exception', 'foreground', 'frames', 'hid_helper', 'id', 'json', 'launchctl', 'list', 'listapps', 'mtime', 'name', 'no path', 'not-running', 'ok', 'path', 'pid', 'plutil', 'r', 'replace', 'runtimes', 'simctl', 'simctl_runtimes', 'spawn', 'state', 'threads', 'timestamp', 'triggered', 'udid', 'utf-8', 'version', 'xcode-select', 'xcode_select', 'xcrun'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/95e3d80e7c026ac5 b/simdrive/.hypothesis/constants/95e3d80e7c026ac5 deleted file mode 100644 index cf29f7a..0000000 --- a/simdrive/.hypothesis/constants/95e3d80e7c026ac5 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/public_key.py -# hypothesis_version: 6.141.1 - -[] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/9cb02b4f598a2dbb b/simdrive/.hypothesis/constants/9cb02b4f598a2dbb deleted file mode 100644 index f2a491e..0000000 --- a/simdrive/.hypothesis/constants/9cb02b4f598a2dbb +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/errors.py -# hypothesis_version: 6.141.1 - -['cloud_unreachable', 'code', 'current', 'detail', 'details', 'email', 'error', 'expires_at', 'grace_days', 'ip', 'license_expired', 'license_invalid', 'license_not_found', 'message', 'ok', 'path', 'reason', 'required', 'trial_already_used', 'trial_rate_limited'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/a2e38c1b2ed3efc9 b/simdrive/.hypothesis/constants/a2e38c1b2ed3efc9 deleted file mode 100644 index 3197fd2..0000000 --- a/simdrive/.hypothesis/constants/a2e38c1b2ed3efc9 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/routes/runs.py -# hypothesis_version: 6.141.1 - -[0.0, 100.0, '/runs/increment', 'Authorization', 'Bearer ', 'Retry-After', 'customer_email', 'percent_used', 'period_end', 'period_start', 'runs_limit', 'runs_used', 'solo', 'tier', 'unknown'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/a49a5880da40cca8 b/simdrive/.hypothesis/constants/a49a5880da40cca8 deleted file mode 100644 index b81ae2b..0000000 --- a/simdrive/.hypothesis/constants/a49a5880da40cca8 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.152.4 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, ',', '--budget-override', '--help', '--journey', '--journeys-dir', '--persona-override', '--session-id', '--tag', '--version', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'PATH', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'at', 'attempts', 'available', 'background', 'baseline', 'boolean', 'budget_override', 'bundle_id', 'capture_logs', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'go', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey_path', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'light', 'lines', 'list_devices', 'list_replays', 'loaded_at', 'log_lines', 'log_predicate', 'logs', 'mark', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'predicate', 'press_key', 'properties', 'record_start', 'record_stop', 'recording', 'replay', 'replays', 'required', 'resolved', 'resolved_via', 'retries', 'return', 'run', 'run_journey', 'screen_x', 'screen_y', 'screenshot_h', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'simdrive', 'simulator', 'since_session_start', 'som.Mark | None', 'space', 'specterqa-ios', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'state', 'step_id', 'steps', 'string', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'terminate_app', 'text', 'threads', 'to', 'transport', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/a9b619a97ad08c2a b/simdrive/.hypothesis/constants/a9b619a97ad08c2a deleted file mode 100644 index 08850f5..0000000 --- a/simdrive/.hypothesis/constants/a9b619a97ad08c2a +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/bootstrap.py -# hypothesis_version: 6.152.4 - -[0.1, 2.0, 10.0, 120, 8100, '"', '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%SZ', '%b %d %H:%M:%S %Y %Z', '(\n)', '()', ', ', '-', '--bundle-id', '--device', '--json-output', '--project-directory', '-C', '-c', '-derivedDataPath', '-destination', '-noout', '-p', '-project', '-scheme', '-startdate', '-v', '-xctestrun', '.simdrive', '=', 'Apple Development', 'CSSMERR', 'PINNED_SHA.txt', 'Podfile', 'REPO=', 'REVOKED', 'SHA=', 'WDA READY', 'WDA_REGISTRY_DIR', 'WebDriverAgentRunner', '\\(([A-Z0-9]{10})\\)', 'app', 'build-for-testing', 'build.log', 'bundle', 'checkout', 'clone', 'clone.log', 'codesigning', 'com.apple.dt.Xcode', 'connectionProperties', 'coredevice_uuid', 'ddiServicesAvailable', 'defaults', 'derived', 'derived_data', 'details', 'developerModeStatus', 'device', 'deviceProperties', 'device_name', 'devicectl', 'enabled', 'exec', 'find-certificate', 'find-identity', 'git', 'hardwareProperties', 'hardware_udid', 'host', 'idevicepair', 'info', 'install', 'install_path', 'ios', 'ip', 'last_built_at', 'name', 'notBefore=', 'openssl', 'os', 'os_version', 'paired', 'pairingState', 'pod', 'port', 'project.pbxproj', 'r', 'read', 'ready', 'replace', 'result', 'security', 'sha1', 'signing_identity', 'source', 'team_id', 'udid', 'uninstall', 'utf-8', 'value', 'version', 'w', 'wda', 'wda_bundle_id', 'x509', 'xcodebuild', 'xcrun', 'xctestrun_path', '{\n}', '{}'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/ac2f32e306c52548 b/simdrive/.hypothesis/constants/ac2f32e306c52548 deleted file mode 100644 index b678eb4..0000000 --- a/simdrive/.hypothesis/constants/ac2f32e306c52548 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/ci.py -# hypothesis_version: 6.152.4 - -[0.0, 4096, '.simdrive/journeys', '.simdrive/personas', '_end', 'agent_trace.jsonl', 'budget_exceeded', 'ci_summary_path', 'classname', 'crashed', 'error', 'errors', 'failed', 'failed_journey_names', 'failure', 'failures', 'junit_xml_path', 'message', 'name', 'passed', 'simdrive-journeys', 'simdrive.journey', 'simdrive.journey.ci', 'system-out', 'testcase', 'tests', 'testsuite', 'time', 'total', 'total_llm_cost_usd', 'type', 'utf-8'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/af9fb368be44a8b4 b/simdrive/.hypothesis/constants/af9fb368be44a8b4 deleted file mode 100644 index baf87eb..0000000 --- a/simdrive/.hypothesis/constants/af9fb368be44a8b4 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/license/keypair.py -# hypothesis_version: 6.141.1 - -['__main__', 'generate'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/b058c20639e8b443 b/simdrive/.hypothesis/constants/b058c20639e8b443 deleted file mode 100644 index e262cf8..0000000 --- a/simdrive/.hypothesis/constants/b058c20639e8b443 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/routes/recordings.py -# hypothesis_version: 6.141.1 - -[204, 3600, '/recordings', 'customer_email', 'download_url', 'enterprise', 'journey_slug', 'pro', 'recording_id', 'screenshot_count', 'size_bytes', 'solo', 'team', 'tier', 'trial', 'unknown', 'utf-8'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/b712f3eb2b6f8d36 b/simdrive/.hypothesis/constants/b712f3eb2b6f8d36 deleted file mode 100644 index 1e54fab..0000000 --- a/simdrive/.hypothesis/constants/b712f3eb2b6f8d36 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/keypair.py -# hypothesis_version: 6.141.1 - -['__main__', 'generate'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/ba4fa64ccf632dc5 b/simdrive/.hypothesis/constants/ba4fa64ccf632dc5 deleted file mode 100644 index 2461737..0000000 --- a/simdrive/.hypothesis/constants/ba4fa64ccf632dc5 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.141.1 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, 8100, ',', '--budget-override', '--help', '--journey', '--journeys-dir', '--persona-override', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'at', 'attempts', 'available', 'backend', 'background', 'baseline', 'boolean', 'bootstrap-device', 'budget_override', 'bundle_id', 'capture_logs', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'go', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey_path', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'light', 'lines', 'list_devices', 'list_replays', 'loaded_at', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'properties', 'record_start', 'record_stop', 'recording', 'replay', 'replays', 'required', 'resolved', 'resolved_via', 'retries', 'return', 'run', 'run_journey', 'screen_x', 'screen_y', 'screenshot_h', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'simdrive', 'simulator', 'since_session_start', 'som.Mark | None', 'space', 'specterqa-ios', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'state', 'step_id', 'steps', 'store_true', 'string', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'terminate_app', 'text', 'threads', 'to', 'transport', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda_not_bootstrapped', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/bc26b53257719cd4 b/simdrive/.hypothesis/constants/bc26b53257719cd4 deleted file mode 100644 index bbed543..0000000 --- a/simdrive/.hypothesis/constants/bc26b53257719cd4 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/cli.py -# hypothesis_version: 6.141.1 - -[b'=', 429, 86400, ',', '.simdrive', '1', ':', 'SIMDRIVE_OFFLINE_DEV', 'ascii', 'code', 'customer_email', 'dev-trial', 'email', 'expires_at', 'installed_at', 'issued_at', 'key', 'last_server_check', 'license.json', 'license_key', 'message', 'mode', 'no_license', 'offline', 'online', 'read_error', 'seats', 'server_time', 'subject', 'tier', 'trial', 'utf-8', 'valid'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/bd612b6fb98fb4d4 b/simdrive/.hypothesis/constants/bd612b6fb98fb4d4 deleted file mode 100644 index c14caff..0000000 --- a/simdrive/.hypothesis/constants/bd612b6fb98fb4d4 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/act.py -# hypothesis_version: 6.152.4 - -[0.05, 0.15, 5.0, 10.0, 128, 300, 5000, '-e', '-w', '30', 'Action Button', 'App Switcher', 'Device', 'Home', 'Lock', 'Rotate Left', 'Rotate Right', 'Shake', 'Siri', 'Trigger Screenshot', 'action-button', 'app-switcher', 'arrow-down', 'arrow-left', 'arrow-right', 'arrow-up', 'backspace', 'cliclick', 'cmd', 'delete', 'enter', 'esc', 'escape', 'hid', 'home', 'kp:arrow-down', 'kp:arrow-left', 'kp:arrow-right', 'kp:arrow-up', 'kp:delete', 'kp:esc', 'kp:return', 'kp:space', 'kp:tab', 'lock', 'osascript', 'return', 'rotate-left', 'rotate-right', 'screenshot', 'shake', 'siri', 'space', 't:', 'tab', 'v'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/bd7bcd7676104675 b/simdrive/.hypothesis/constants/bd7bcd7676104675 deleted file mode 100644 index 07193e6..0000000 --- a/simdrive/.hypothesis/constants/bd7bcd7676104675 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/observe.py -# hypothesis_version: 6.152.4 - -[1000, '.json', 'annotated_path', 'captured_at', 'device', 'height', 'marks', 'recent_logs', 'screenshot_path', 'simulator', 'width', 'window_bounds_macos', 'x', 'y'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/c37892475535ba9b b/simdrive/.hypothesis/constants/c37892475535ba9b deleted file mode 100644 index 6b6fc52..0000000 --- a/simdrive/.hypothesis/constants/c37892475535ba9b +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/errors.py -# hypothesis_version: 6.141.1 - -['act_tool_failed', 'attempt', 'cap_usd', 'ci_invalid_journey', 'claude_call_failed', 'claude_cost_cap_hit', 'cost_usd', 'criterion_type', 'inner_code', 'inner_message', 'journey_name', 'journeys_dir', 'llm_calls', 'path', 'persona_slug', 'personas_dir', 'reason', 'seconds', 'steps', 'supported', 'tag_filter', 'tool_name', 'version'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/c58b7cb2228dcac6 b/simdrive/.hypothesis/constants/c58b7cb2228dcac6 deleted file mode 100644 index 1998a99..0000000 --- a/simdrive/.hypothesis/constants/c58b7cb2228dcac6 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/persona.py -# hypothesis_version: 6.152.4 - -['^[a-z0-9_-]+$', 'advanced', 'en-US', 'expert', 'high', 'intermediate', 'low', 'medium', 'name', 'novice', 'role', 'schema_version', 'slug'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/c66c67754bb14ccc b/simdrive/.hypothesis/constants/c66c67754bb14ccc deleted file mode 100644 index d4b1002..0000000 --- a/simdrive/.hypothesis/constants/c66c67754bb14ccc +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/specterqa_ios/__init__.py -# hypothesis_version: 6.141.1 - -['17.0.0a1'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/c6db4e2150c98d20 b/simdrive/.hypothesis/constants/c6db4e2150c98d20 deleted file mode 100644 index e3b2fda..0000000 --- a/simdrive/.hypothesis/constants/c6db4e2150c98d20 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/errors.py -# hypothesis_version: 6.152.4 - -['code', 'current', 'details', 'email', 'error', 'expires_at', 'grace_days', 'ip', 'license_expired', 'license_invalid', 'license_not_found', 'message', 'ok', 'path', 'reason', 'required', 'trial_already_used', 'trial_rate_limited'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/c7aca978dcad40cf b/simdrive/.hypothesis/constants/c7aca978dcad40cf deleted file mode 100644 index 41d9ec0..0000000 --- a/simdrive/.hypothesis/constants/c7aca978dcad40cf +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/__init__.py -# hypothesis_version: 6.152.4 - -['WdaClient', 'bootstrap_device', 'registry'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/c8a65665e3d649ae b/simdrive/.hypothesis/constants/c8a65665e3d649ae deleted file mode 100644 index ff21e5a..0000000 --- a/simdrive/.hypothesis/constants/c8a65665e3d649ae +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/wda/errors.py -# hypothesis_version: 6.152.4 - -['Library', 'MobileDevice', 'body', 'http_status', 'identities', 'last_seen_at', 'log_path', 'missing', 'profiles_dir', 'stderr', 'team_id', 'tool', 'udid', 'wda_build_failed', 'wda_device_locked', 'wda_device_not_ready', 'wda_install_failed', 'wda_not_bootstrapped', 'wda_session_lost', 'wda_smoke_failed'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/c9a40e1ffca7205e b/simdrive/.hypothesis/constants/c9a40e1ffca7205e deleted file mode 100644 index 74dd8cf..0000000 --- a/simdrive/.hypothesis/constants/c9a40e1ffca7205e +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/loader.py -# hypothesis_version: 6.152.4 - -['*.yaml', '.simdrive/journeys', '.simdrive/personas'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/ca3ca874a46317e0 b/simdrive/.hypothesis/constants/ca3ca874a46317e0 deleted file mode 100644 index 255e7f0..0000000 --- a/simdrive/.hypothesis/constants/ca3ca874a46317e0 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/validator.py -# hypothesis_version: 6.152.4 - -[86400, '.', '=', 'ascii', 'expires_at'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/ca70c469a3c808e3 b/simdrive/.hypothesis/constants/ca70c469a3c808e3 deleted file mode 100644 index ef395e0..0000000 --- a/simdrive/.hypothesis/constants/ca70c469a3c808e3 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/observability/logger.py -# hypothesis_version: 6.141.1 - -['%Y-%m-%d %H:%M:%S', '%Y-%m-%dT%H:%M:%SZ', '1', 'SIMDRIVE_DEBUG', 'args', 'created', 'exc_info', 'exc_text', 'filename', 'funcName', 'level', 'levelname', 'levelno', 'lineno', 'message', 'module', 'msecs', 'msg', 'name', 'pathname', 'process', 'processName', 'relativeCreated', 'simdrive', 'stack_info', 'thread', 'threadName', 'timestamp'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/ca7b881c7c2d550f b/simdrive/.hypothesis/constants/ca7b881c7c2d550f deleted file mode 100644 index 6e549ee..0000000 --- a/simdrive/.hypothesis/constants/ca7b881c7c2d550f +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/robustness.py -# hypothesis_version: 6.152.4 - -[10.0, 200, '*/recording.yaml', 'action', 'all', 'allow', 'allow once', 'appearance', 'args', 'calendar', 'camera', 'cancel', 'contacts', 'contacts-limited', 'created_at', 'dark', 'deny', "don't allow", 'dont allow', 'error', 'errors', 'failed', 'grant', 'granted', 'health', 'homekit', 'id', 'light', 'location', 'location-always', 'media-library', 'medialibrary', 'microphone', 'modified_at', 'motion', 'name', 'ok', 'path', 'permission', 'photos', 'photos-add', 'post_screenshot', 'pre_screenshot', 'press_key', 'privacy', 'r', 'recording.yaml', 'reminders', 'settings', 'simctl', 'simdrive_version', 'siri', 'speech', 'stderr', 'step_count', 'steps', 'swipe', 'tags', 'tap', 'text', 'type_text', 'ui', 'utf-8', 'warnings', 'xcrun'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/ca9a53fea3dff8b6 b/simdrive/.hypothesis/constants/ca9a53fea3dff8b6 deleted file mode 100644 index 534b30c..0000000 --- a/simdrive/.hypothesis/constants/ca9a53fea3dff8b6 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/result.py -# hypothesis_version: 6.152.4 - -['artifact_dir', 'budget_exceeded', 'crashed', 'error', 'failed', 'observed_value', 'passed', 'steps', 'success_criteria'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/cb7a38b7ca8b4435 b/simdrive/.hypothesis/constants/cb7a38b7ca8b4435 deleted file mode 100644 index 8629da7..0000000 --- a/simdrive/.hypothesis/constants/cb7a38b7ca8b4435 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/diagnostics.py -# hypothesis_version: 6.152.4 - -[0.0, 5.0, 10.0, 15.0, 30.0, 200, '-', '--device', '--json', '--json-output', '--quiet', '-convert', '-o', '-p', '.ips', '.json', 'CFBundleDisplayName', 'CFBundleName', 'CFBundleVersion', 'DiagnosticReports', 'Library', 'Logs', 'Path', 'app_name', 'apps', 'backtrace', 'booted', 'bug_type', 'bundleID', 'bundleIdentifier', 'bundleVersion', 'bundle_id', 'checks', 'crashing_thread', 'detail', 'device', 'devicectl', 'devices', 'exception', 'executable', 'file', 'foreground', 'frames', 'hid_helper', 'id', 'info', 'json', 'launchctl', 'list', 'listapps', 'mtime', 'name', 'no path', 'not-running', 'ok', 'path', 'pid', 'plutil', 'processIdentifier', 'processes', 'r', 'replace', 'result', 'running', 'runningProcesses', 'runtimes', 'simctl', 'simctl_runtimes', 'spawn', 'state', 'threads', 'timestamp', 'triggered', 'udid', 'url', 'utf-8', 'version', 'xcode-select', 'xcode_select', 'xcrun'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/d21c9fe1097f41c2 b/simdrive/.hypothesis/constants/d21c9fe1097f41c2 deleted file mode 100644 index 0d58ca5..0000000 --- a/simdrive/.hypothesis/constants/d21c9fe1097f41c2 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/hid_inject.py -# hypothesis_version: 6.152.4 - -[5.0, 15.0, 1000.0, '_bin', 'button', 'chord', 'down', 'home', 'key', 'lock', 'side', 'simdrive-input', 'siri', 'size', 'tap', 'text', 'up'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/d56bddd53c3d4231 b/simdrive/.hypothesis/constants/d56bddd53c3d4231 deleted file mode 100644 index bc44de6..0000000 --- a/simdrive/.hypothesis/constants/d56bddd53c3d4231 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/session.py -# hypothesis_version: 6.152.4 - -['.simdrive', 'Recorder', 'SIMDRIVE_HOME', 'Session', 'a', 'actions.jsonl', 'active', 'any_booted', 'device', 'device_name', 'launch_failed', 'os_version', 'sessions', 'simulator', 'target', 'udid'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/d9dfc67186bc2895 b/simdrive/.hypothesis/constants/d9dfc67186bc2895 deleted file mode 100644 index 05170a4..0000000 --- a/simdrive/.hypothesis/constants/d9dfc67186bc2895 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/routes/__init__.py -# hypothesis_version: 6.152.4 - -[] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/da39a3ee5e6b4b0d b/simdrive/.hypothesis/constants/da39a3ee5e6b4b0d deleted file mode 100644 index befbce7..0000000 --- a/simdrive/.hypothesis/constants/da39a3ee5e6b4b0d +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/middleware/__init__.py -# hypothesis_version: 6.141.1 - -[] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/dac3cc621f17a65c b/simdrive/.hypothesis/constants/dac3cc621f17a65c deleted file mode 100644 index 157e1a5..0000000 --- a/simdrive/.hypothesis/constants/dac3cc621f17a65c +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/recorder.py -# hypothesis_version: 6.141.1 - -[0.85, 128, 300, '.simdrive', 'L', 'SIMDRIVE_HOME', 'action', 'app_bundle_id', 'app_version', 'args', 'captured_at', 'created_at', 'created_by_session', 'device', 'drift', 'drifted', 'duration_ms', 'error', 'execute_error', 'executed', 'force', 'h', 'halt', 'halt_reason', 'halted_at', 'id', 'key', 'name', 'ok', 'os_version', 'post_screenshot', 'pre_screenshot', 'press_key', 'recording finalized', 'recording started', 'recording stopping', 'recording.yaml', 'recording_name', 'recordings', 'replay', 'screenshot_h', 'screenshot_w', 'session_id', 'simdrive.recorder', 'simdrive_version', 'similarity', 'simulator', 'snapshots', 'ssim_masks', 'stable_id', 'stable_id_loose', 'steps', 'steps_planned', 'swipe', 'tags', 'tap', 'text', 'threshold', 'type_text', 'w', 'warn', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/deb3ba2bdb80811f b/simdrive/.hypothesis/constants/deb3ba2bdb80811f deleted file mode 100644 index a616975..0000000 --- a/simdrive/.hypothesis/constants/deb3ba2bdb80811f +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/recorder.py -# hypothesis_version: 6.152.4 - -[0.85, 128, 300, '.simdrive', 'L', 'SIMDRIVE_HOME', 'action', 'app_bundle_id', 'app_version', 'args', 'captured_at', 'created_at', 'created_by_session', 'device', 'drift', 'drifted', 'duration_ms', 'error', 'execute_error', 'executed', 'force', 'h', 'halt', 'halt_reason', 'halted_at', 'id', 'key', 'name', 'ok', 'os_version', 'post_screenshot', 'pre_screenshot', 'press_key', 'recording.yaml', 'recordings', 'replay', 'screenshot_h', 'screenshot_w', 'simdrive_version', 'similarity', 'simulator', 'snapshots', 'ssim_masks', 'stable_id', 'stable_id_loose', 'steps', 'steps_planned', 'swipe', 'tags', 'tap', 'text', 'threshold', 'type_text', 'w', 'warn', 'x', 'x1', 'x2', 'y', 'y1', 'y2'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/e07f9d7d07b9d7dd b/simdrive/.hypothesis/constants/e07f9d7d07b9d7dd deleted file mode 100644 index 377c8e7..0000000 --- a/simdrive/.hypothesis/constants/e07f9d7d07b9d7dd +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/robustness.py -# hypothesis_version: 6.141.1 - -[10.0, 200, '*/recording.yaml', 'action', 'all', 'allow', 'allow once', 'appearance', 'args', 'calendar', 'camera', 'cancel', 'contacts', 'contacts-limited', 'created_at', 'dark', 'deny', "don't allow", 'dont allow', 'error', 'errors', 'failed', 'grant', 'granted', 'health', 'homekit', 'id', 'light', 'location', 'location-always', 'media-library', 'medialibrary', 'microphone', 'modified_at', 'motion', 'name', 'ok', 'path', 'permission', 'photos', 'photos-add', 'post_screenshot', 'pre_screenshot', 'press_key', 'privacy', 'r', 'recording.yaml', 'reminders', 'settings', 'simctl', 'simdrive_version', 'siri', 'speech', 'stderr', 'step_count', 'steps', 'swipe', 'tags', 'tap', 'text', 'type_text', 'ui', 'utf-8', 'warnings', 'xcrun'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/e174c10544526fc2 b/simdrive/.hypothesis/constants/e174c10544526fc2 deleted file mode 100644 index 89e9f18..0000000 --- a/simdrive/.hypothesis/constants/e174c10544526fc2 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/sim.py -# hypothesis_version: 6.141.1 - -[5.0, 10.0, 15.0, 30.0, 60.0, 2000, '-', '--json', '--last', '--predicate', '--style', '-b', '-convert', '-o', '.', '30s', ':', 'Booted', 'CFBundleVersion', 'bootstatus', 'cliclick', 'compact', 'devices', 'iOS-', 'io', 'isAvailable', 'json', 'launch', 'list', 'listapps', 'log', 'name', 'pbcopy', 'plutil', 'screenshot', 'show', 'shutdown', 'simctl', 'spawn', 'state', 'terminate', 'udid', 'utf-8', 'xcrun'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/e1ba4e9f971debd2 b/simdrive/.hypothesis/constants/e1ba4e9f971debd2 deleted file mode 100644 index ba34a23..0000000 --- a/simdrive/.hypothesis/constants/e1ba4e9f971debd2 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.141.1 - -[0.0, 0.04, 0.05, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 1.0, 5.0, 200, 300, 1000, 8100, 86400, ' (dry-run)', ',', '--budget-override', '--dry-run', '--email', '--force', '--help', '--journey', '--journeys-dir', '--json', '--license-path', '--offline-dev', '--path', '--persona-override', '--quiet', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'annotated_path', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'ascii', 'at', 'attempts', 'available', 'backend', 'background', 'backup_path', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'captured_at', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'device_udid', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'dry_run', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'fail', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'json_out', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'lint-recordings', 'lint_recordings', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'marks', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'migrate-recording', 'migrate_recording', 'migrated', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'primary_button_label', 'properties', 'reason', 'recent_logs', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'results', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_b64', 'screenshot_h', 'screenshot_path', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simdrive wda-down', 'simdrive wda-up', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'text_mark_count', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda-down', 'wda-down failed: %s', 'wda-up', 'wda-up failed: %s', 'wda_not_bootstrapped', 'window_bounds_macos', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/e9485227c31fb29d b/simdrive/.hypothesis/constants/e9485227c31fb29d deleted file mode 100644 index 85bbdec..0000000 --- a/simdrive/.hypothesis/constants/e9485227c31fb29d +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.141.1 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, 8100, 86400, ',', '--budget-override', '--email', '--help', '--journey', '--journeys-dir', '--license-path', '--offline-dev', '--persona-override', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'at', 'attempts', 'available', 'backend', 'background', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'properties', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_h', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda_not_bootstrapped', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/ea559b291ce0e16b b/simdrive/.hypothesis/constants/ea559b291ce0e16b deleted file mode 100644 index e7de040..0000000 --- a/simdrive/.hypothesis/constants/ea559b291ce0e16b +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/session.py -# hypothesis_version: 6.152.4 - -[8100, '.simdrive', 'Real Device', 'Recorder', 'SIMDRIVE_HOME', 'Session', 'a', 'actions.jsonl', 'active', 'any_booted', 'device', 'device_name', 'hardware_udid', 'host', 'ip', 'localhost', 'os_version', 'port', 'sessions', 'simulator', 'target', 'udid'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/ed1da87eccf7cacb b/simdrive/.hypothesis/constants/ed1da87eccf7cacb deleted file mode 100644 index ff8105c..0000000 --- a/simdrive/.hypothesis/constants/ed1da87eccf7cacb +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/sim.py -# hypothesis_version: 6.152.4 - -[5.0, 10.0, 15.0, 30.0, 60.0, 2000, '-', '--json', '--last', '--predicate', '--style', '-b', '-convert', '-o', '.', '30s', ':', 'Booted', 'CFBundleVersion', 'bootstatus', 'cliclick', 'compact', 'devices', 'iOS-', 'io', 'isAvailable', 'json', 'launch', 'list', 'listapps', 'log', 'name', 'pbcopy', 'plutil', 'screenshot', 'show', 'shutdown', 'simctl', 'spawn', 'state', 'terminate', 'udid', 'utf-8', 'xcrun'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/ed2bf403e05871e5 b/simdrive/.hypothesis/constants/ed2bf403e05871e5 deleted file mode 100644 index e7de040..0000000 --- a/simdrive/.hypothesis/constants/ed2bf403e05871e5 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/session.py -# hypothesis_version: 6.152.4 - -[8100, '.simdrive', 'Real Device', 'Recorder', 'SIMDRIVE_HOME', 'Session', 'a', 'actions.jsonl', 'active', 'any_booted', 'device', 'device_name', 'hardware_udid', 'host', 'ip', 'localhost', 'os_version', 'port', 'sessions', 'simulator', 'target', 'udid'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/f2d51d157746e804 b/simdrive/.hypothesis/constants/f2d51d157746e804 deleted file mode 100644 index 1c5d4c3..0000000 --- a/simdrive/.hypothesis/constants/f2d51d157746e804 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/routes/licenses.py -# hypothesis_version: 6.141.1 - -[0.0, 100.0, 365, 86400, '/licenses/activate', '/licenses/status', '/licenses/usage', 'customer_email', 'enterprise', 'expires_at', 'pro', 'solo', 'team', 'tier', 'trial', 'unknown'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/f5c8bdf97a704588 b/simdrive/.hypothesis/constants/f5c8bdf97a704588 deleted file mode 100644 index 151ea05..0000000 --- a/simdrive/.hypothesis/constants/f5c8bdf97a704588 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/claude_client.py -# hypothesis_version: 6.152.4 - -[0.0, 0.004, 0.5, 15.0, 75.0, 200, 1024, 1000000, '.', 'ANTHROPIC_API_KEY', 'args', 'base64', 'claude-opus-4-7', 'confidence', 'content', 'data', 'fail', 'image', 'image/jpeg', 'image/png', 'jpeg', 'jpg', 'media_type', 'rationale', 'rb', 'role', 'source', 'text', 'tool', 'type', 'user'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/f90d75b39cf3d392 b/simdrive/.hypothesis/constants/f90d75b39cf3d392 deleted file mode 100644 index e17baf1..0000000 --- a/simdrive/.hypothesis/constants/f90d75b39cf3d392 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/license/validator.py -# hypothesis_version: 6.141.1 - -[86400, '.', '=', 'ascii', 'customer_email', 'expires_at', 'license expired', 'license valid', 'tier'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/f9eade2011f72f6e b/simdrive/.hypothesis/constants/f9eade2011f72f6e deleted file mode 100644 index 39909d8..0000000 --- a/simdrive/.hypothesis/constants/f9eade2011f72f6e +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.152.4 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, 8100, 86400, ',', '--budget-override', '--email', '--help', '--journey', '--journeys-dir', '--license-path', '--offline-dev', '--persona-override', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'at', 'attempts', 'available', 'backend', 'background', 'baseline', 'boolean', 'bootstrap-device', 'budget_override', 'bundle_id', 'capture_logs', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'go', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey_path', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'list_devices', 'list_replays', 'loaded_at', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'properties', 'record_start', 'record_stop', 'recording', 'replay', 'replays', 'required', 'resolved', 'resolved_via', 'retries', 'return', 'run', 'run_journey', 'screen_x', 'screen_y', 'screenshot_h', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simulator', 'since_session_start', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'terminate_app', 'text', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda_not_bootstrapped', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/fa354dbc7e886bb8 b/simdrive/.hypothesis/constants/fa354dbc7e886bb8 deleted file mode 100644 index 0222828..0000000 --- a/simdrive/.hypothesis/constants/fa354dbc7e886bb8 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/cloud/auth.py -# hypothesis_version: 6.141.1 - -['Authorization', 'Bearer', 'Bearer ', 'WWW-Authenticate', 'bearer auth accepted', 'bearer auth rejected', 'customer_email', 'path', 'reason', 'simdrive.cloud.auth', 'tier'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/fad9c20eea9be852 b/simdrive/.hypothesis/constants/fad9c20eea9be852 deleted file mode 100644 index 58fb06b..0000000 --- a/simdrive/.hypothesis/constants/fad9c20eea9be852 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/observability/metrics.py -# hypothesis_version: 6.141.1 - -[0.0, 0.5, 0.95, 1.0, 100.0] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/fbd928c824abfea8 b/simdrive/.hypothesis/constants/fbd928c824abfea8 deleted file mode 100644 index 21f365c..0000000 --- a/simdrive/.hypothesis/constants/fbd928c824abfea8 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.141.1 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, 8100, 86400, ',', '--budget-override', '--email', '--help', '--journey', '--journeys-dir', '--license-path', '--offline-dev', '--persona-override', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'at', 'attempts', 'available', 'backend', 'background', 'baseline', 'boolean', 'bootstrap-device', 'budget_override', 'bundle_id', 'capture_logs', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'go', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey_path', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'list_devices', 'list_replays', 'loaded_at', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'properties', 'record_start', 'record_stop', 'recording', 'replay', 'replays', 'required', 'resolved', 'resolved_via', 'retries', 'return', 'run', 'run_journey', 'screen_x', 'screen_y', 'screenshot_h', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simulator', 'since_session_start', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'terminate_app', 'text', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda_not_bootstrapped', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/fcbd8e4b8288bcdc b/simdrive/.hypothesis/constants/fcbd8e4b8288bcdc deleted file mode 100644 index 58e17af..0000000 --- a/simdrive/.hypothesis/constants/fcbd8e4b8288bcdc +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/journey/criteria.py -# hypothesis_version: 6.152.4 - -[200, '; ', 'cpu_pct', 'error', 'marks', 'memory_mb', 'no crashes detected', 'no_crash', 'perf_under', 'rss_mb', 'screen_matches', 'stable_id', 'text', 'text_visible', 'unknown'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/fd525d0f72333fb5 b/simdrive/.hypothesis/constants/fd525d0f72333fb5 deleted file mode 100644 index 4fd8ae1..0000000 --- a/simdrive/.hypothesis/constants/fd525d0f72333fb5 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/__init__.py -# hypothesis_version: 6.141.1 - -['1.0.0a2'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/feba84bd30d62498 b/simdrive/.hypothesis/constants/feba84bd30d62498 deleted file mode 100644 index 9ec5fcb..0000000 --- a/simdrive/.hypothesis/constants/feba84bd30d62498 +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/server.py -# hypothesis_version: 6.152.4 - -[0.0, 0.04, 0.2, 0.5, 0.55, 0.6, 0.7, 0.85, 5.0, 200, 300, 1000, 8100, 86400, ',', '--budget-override', '--email', '--help', '--journey', '--journeys-dir', '--license-path', '--offline-dev', '--persona-override', '--rebuild', '--session-id', '--signing-identity', '--tag', '--team-id', '--version', '--wda-port', '--wireless', '-V', '-h', '..', '.simdrive/journeys', '=', 'KEY=VAL[,...]', 'LICENSE ERROR: %s', 'PATH', 'Your email address.', '__main__', '_simdrive_warning', 'a', 'action', 'allow', 'annotate', 'annotated_path', 'anthropic', 'app_bundle_id', 'app_not_running', 'app_state', 'appearance', 'append', 'apps', 'args', 'array', 'ascii', 'at', 'attempts', 'available', 'backend', 'background', 'baseline', 'boolean', 'bootstrap-device', 'budget', 'budget_override', 'bundle_id', 'capture_logs', 'captured_at', 'chars', 'checked_at', 'choice', 'ci', 'clear_field', 'clear_first', 'cleared', 'cmd', 'code', 'coords', 'cpu_pct', 'crashes', 'current', 'dark', 'default', 'delete', 'delta', 'deny', 'description', 'details', 'device', 'device_name', 'device_udid', 'devices', 'discovery_failed', 'disk_version', 'dismiss_sheet', 'dismissed', 'dispatch_succeeded', 'doctor', 'done', 'drift', 'drift_threshold', 'duration_ms', 'ended', 'enum', 'error', 'exception_type', 'focused_field', 'force', 'foreground', 'from', 'from/to', 'frustrations', 'go', 'goals', 'h', 'halt', 'handler', 'hid', 'hid_note', 'hid_supported', 'host', 'id', 'injection_method', 'inputSchema', 'integer', 'internal', 'items', 'journey', 'journey_path', 'key', 'keyboard_visible', 'label', 'last_action_at', 'last_marks', 'last_seen', 'license', 'light', 'lines', 'list_devices', 'list_replays', 'load_journey', 'loaded_at', 'locale', 'localhost', 'log_lines', 'log_predicate', 'logs', 'mark', 'marks', 'mask_regions', 'max', 'maxItems', 'max_llm_calls', 'max_seconds', 'max_steps', 'memory', 'memory_rss_mb', 'message', 'minItems', 'missing_tools', 'mode', 'mode_note', 'model', 'name', 'next', 'no_baseline', 'not recording', 'number', 'object', 'observations', 'observe', 'ok', 'on_drift', 'oneOf', 'os_version', 'path', 'patience', 'perf', 'perf_baseline', 'perf_compare', 'permissions', 'persona', 'persona_path', 'personas', 'pid', 'pixel_x', 'pixel_y', 'port', 'predicate', 'press_key', 'properties', 'recent_logs', 'record_start', 'record_stop', 'recording', 'replay', 'replay_id', 'replays', 'required', 'resolved', 'resolved_via', 'retries', 'return', 'role', 'run', 'screen_x', 'screen_y', 'screenshot_b64', 'screenshot_h', 'screenshot_path', 'screenshot_w', 'search', 'session_end', 'session_id', 'session_id|udid', 'session_start', 'session_status', 'sessions', 'set_appearance', 'severity', 'shift', 'show', 'simdrive', 'simdrive license', 'simdrive trial', 'simulator', 'since_session_start', 'slug', 'som.Mark | None', 'space', 'specterqa-ios ci', 'specterqa-ios run', 'stable_id', 'stable_id_loose', 'start', 'state', 'step_id', 'steps', 'store_true', 'string', 'subcmd', 'success_criteria', 'swipe', 'tags', 'tap', 'tap_first', 'target', 'technical_comfort', 'terminate_app', 'text', 'threads', 'to', 'transport', 'trial', 'type', 'type_text', 'udid', 'unavailable_reason', 'validate_replay', 'version', 'via', 'w', 'warn', 'warnings', 'wda', 'wda_not_bootstrapped', 'window_bounds_macos', 'x', 'x1', 'x2', 'y', 'y1', 'y2', 'yaml_path'] \ No newline at end of file diff --git a/simdrive/.hypothesis/constants/fff2072f8765ef2a b/simdrive/.hypothesis/constants/fff2072f8765ef2a deleted file mode 100644 index f6e1c23..0000000 --- a/simdrive/.hypothesis/constants/fff2072f8765ef2a +++ /dev/null @@ -1,4 +0,0 @@ -# file: /Users/atlas/Documents/specterqa-ios/simdrive/src/simdrive/__init__.py -# hypothesis_version: 6.141.1 - -['1.0.0a4'] \ No newline at end of file diff --git a/simdrive/.hypothesis/unicode_data/13.0.0/charmap.json.gz b/simdrive/.hypothesis/unicode_data/13.0.0/charmap.json.gz deleted file mode 100644 index 0354283..0000000 Binary files a/simdrive/.hypothesis/unicode_data/13.0.0/charmap.json.gz and /dev/null differ diff --git a/simdrive/.hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz b/simdrive/.hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz deleted file mode 100644 index 7ae9ef1..0000000 Binary files a/simdrive/.hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz and /dev/null differ diff --git a/simdrive/.hypothesis/unicode_data/14.0.0/charmap.json.gz b/simdrive/.hypothesis/unicode_data/14.0.0/charmap.json.gz deleted file mode 100644 index 8b3e456..0000000 Binary files a/simdrive/.hypothesis/unicode_data/14.0.0/charmap.json.gz and /dev/null differ diff --git a/simdrive/.hypothesis/unicode_data/14.0.0/codec-utf-8.json.gz b/simdrive/.hypothesis/unicode_data/14.0.0/codec-utf-8.json.gz deleted file mode 100644 index 6e90d30..0000000 Binary files a/simdrive/.hypothesis/unicode_data/14.0.0/codec-utf-8.json.gz and /dev/null differ diff --git a/simdrive/.hypothesis/unicode_data/15.1.0/charmap.json.gz b/simdrive/.hypothesis/unicode_data/15.1.0/charmap.json.gz deleted file mode 100644 index eb9e9cc..0000000 Binary files a/simdrive/.hypothesis/unicode_data/15.1.0/charmap.json.gz and /dev/null differ diff --git a/simdrive/.hypothesis/unicode_data/15.1.0/codec-utf-8.json.gz b/simdrive/.hypothesis/unicode_data/15.1.0/codec-utf-8.json.gz deleted file mode 100644 index e39876b..0000000 Binary files a/simdrive/.hypothesis/unicode_data/15.1.0/codec-utf-8.json.gz and /dev/null differ diff --git a/simdrive/CHANGELOG.md b/simdrive/CHANGELOG.md index a94cad1..1107fcc 100644 --- a/simdrive/CHANGELOG.md +++ b/simdrive/CHANGELOG.md @@ -40,7 +40,7 @@ - **`tap_and_wait_keyboard` was serialized as bare `tap`.** The composite delegates to `tool_tap` internally, which recorded itself as `action: tap` in `recording.yaml`. Replays lost the keyboard-wait semantic and tapped-then-immediately-acted. **Fix:** new `Recorder.upgrade_step_action(step_id, new_action)` method; the composite tool calls it after `tool_tap` returns so the persisted step carries the right action name. -### Fixed — launch verification (F#2, INIT-2026-549) +### Fixed — launch verification (F#2) - **`session_start` now verifies the launched app actually reached foreground before returning `state: "active"`.** Pre-fix, an app that crashed within ~500 ms of launch (missing entitlement, signing mismatch) yielded `state: "active"` and the agent burned multiple blind tap/type roundtrips before discovering the crash via separate `app_state` and `crashes` calls. **Fix:** when `app_bundle_id` is provided and `verify_launch=True` (default), `session_start` polls `app_state` (sim) / `app_state_device` (device) after launch. If two consecutive polls show `not-running`, returns `state: "launched_then_exited"` with `crash_report_path` (most recent `.ips` for the bundle, sim only) and a `recovery:` hint on the FIRST response. - **Settle budget bumped to 3000 ms (10 × 300 ms), env-tunable via `SIMDRIVE_VERIFY_LAUNCH_BUDGET_MS`** (clamped to [500, 15000] ms). Default chosen to cover real SwiftUI cold-start + first-launch onboarding; the previous 1500 ms produced false `launched_then_exited` verdicts on legitimately slow cold-starts. @@ -169,7 +169,7 @@ On a typical 50-mark dense screen, `compact=True, confidence_floor="high"` reduc - **+359 new tests** across the sprint, total **1308 passing** (was 949 at sprint start). Distribution: Wave 1 wait_until/HID errors 26, Wave 1 WDA resilience 16, Wave 1 recorder integrity 8, Wave 1 license+cloud 46, Wave 2 integration 6, Wave 3 chaos 7, Wave 3 coverage push 250. - **Coverage 76% → 82% overall** with hot-path modules at 85–100%: `sim.py` 100%, `act.py` 100%, `session.py` 100%, `observe.py` 97%, `device.py` 94%, `wda/client.py` 85%, `recorder.py` 85%. `server.py` 67% → 70% (full 80% would require running the MCP server in tests — deferred). - **CI ratchet floor raised 65% → 80%** in `.github/workflows/simdrive-ci.yml`. Climb-to-85 plan documented in `simdrive/docs/COVERAGE_RATCHET.md`. -- **Sprint structure** — six hardening branches merged via no-ff into `hardening/INIT-2026-549-prod-readiness`: `wait-until-helper`, `wda-resilience`, `recorder-integrity`, `license-cloud-paranoia`, `chaos-test`, `coverage-gate`, plus integrated Wave 2 work directly on the trunk. +- **Sprint structure** — six hardening branches merged via no-ff into `hardening/[internal-tracker]-prod-readiness`: `wait-until-helper`, `wda-resilience`, `recorder-integrity`, `license-cloud-paranoia`, `chaos-test`, `coverage-gate`, plus integrated Wave 2 work directly on the trunk. ### Backwards compatibility @@ -281,7 +281,7 @@ Seven tools flip from `(sim only)` → `(sim + device)`: ### Source -INIT-2026-542. Recording shape is a strict superset of a12 — pre-a13 sim +[internal-tracker]. Recording shape is a strict superset of a12 — pre-a13 sim recordings still load via `RequiresBlock.from_dict` (forward-compatible). 20 new regression tests; 851 pass on the merged suite. @@ -391,7 +391,7 @@ also failed). ### Source -INIT-2026-542. Files: `simdrive/src/simdrive/server.py`, `som.py`, +[internal-tracker]. Files: `simdrive/src/simdrive/server.py`, `som.py`, `observe.py`, `device.py`, `diagnostics.py`, `session.py`, `simdrive/src/simdrive/wda/client.py`, `wda/som_device.py`, `wda/bootstrap.py`, `wda/errors.py`, plus 12 new test files under `simdrive/tests/test_a12_*.py`. @@ -466,7 +466,7 @@ step is removed from every future release. ### Source -INIT-2026-542 + INIT-2026-540 + INIT-2026-548. Files changed: new +[internal-tracker] + [internal-tracker] + [internal-tracker]. Files changed: new `simdrive/wda/som_device.py`; `simdrive/wda/client.py` (new `source()`, `window_size_points()`), `simdrive/wda/bootstrap.py` (Code 41 smoke probe), `simdrive/wda/errors.py` (`wda_ui_automation_disabled`), `simdrive/server.py` (every device-branch input tool @@ -518,11 +518,11 @@ Admin account and held the matching cert, yet bootstrap rejected with "Xcode is not signed in for team X". When the strict check misses, we now confirm any Apple-ID account is signed in (`identifier = "..."` probe), log the deferral, and let `xcodebuild -allowProvisioningUpdates` own the final team verification. -The strict path remains primary for older Xcodes. (INIT-2026-548) +The strict path remains primary for older Xcodes. ### Source -INIT-2026-540 + INIT-2026-548. Files changed: `wda/bootstrap.py` (new functions +[internal-tracker] + [internal-tracker]. Files changed: `wda/bootstrap.py` (new functions `auto_detect_team_id`, `_wda_bundle_id_for_team`, `patch_wda_bundle_id`, `_xcode_account_output_has_any_account`; updated `build_wda`, `install_wda`, `bootstrap_device`, `verify_xcode_account_for_team`), `server.py` (`tool_list_devices`, @@ -541,7 +541,7 @@ two schema description strings), `pyproject.toml` (version bump). - **WDA port discovery timeout extended from 15s to 60s.** First-launch xcodebuild test-without-building can take 30s+ on real devices; 15s was too aggressive. - **Locked-device detection.** When xcodebuild reports "Unlock \ to Continue", bootstrap now raises `wda_device_locked` with explicit recovery steps (unlock + optionally extend Auto-Lock) instead of the generic `wda_port_discovery_timeout`. -### Fixed — WDA real-device bootstrap (6 bugs, INIT-2026-547) +### Fixed — WDA real-device bootstrap (6 bugs) All 6 bugs identified in the live-validation report are resolved in `simdrive/wda/bootstrap.py`: @@ -625,7 +625,7 @@ New and updated tests covering all 6 WDA bugs plus the architectural change: ### Source -INIT-2026-547. First release with WDA real-device bootstrap correctly implemented. +[internal-tracker]. First release with WDA real-device bootstrap correctly implemented. 536 unit tests pass; 0 failures. --- @@ -648,20 +648,20 @@ INIT-2026-547. First release with WDA real-device bootstrap correctly implemente - `tests/test_readme_quickstart.py` — regression test pinning quickstart commands' presence in README first 100 lines, and absence of stale/misleading strings. ### Source -INIT-2026-546. Closes the polish loop after 1.0.0a3 (dogfood fixes), 1.0.0a4 (MCP sampling), 1.0.0a5 (httpx defensive pin). +[internal-tracker]. Closes the polish loop after 1.0.0a3 (dogfood fixes), 1.0.0a4 (MCP sampling), 1.0.0a5 (httpx defensive pin). --- ## [1.0.0a5] — 2026-05-04 ### Fixed (defensive) -- **Pin `httpx<1.0` to defend against `mcp` ecosystem pre-release leak.** The published `mcp==1.27.0` declares `httpx>=0.27.1` with no upper bound. `pip install --pre simdrive` would resolve to `httpx 1.0.dev3` (a real pre-release on PyPI) which breaks `httpx-sse` and the MCP transport layer. Caught by DeployAtlas pre-publish smoke for 1.0.0a4 (INIT-2026-544). Pin removable once upstream `mcp` adds its own upper bound. +- **Pin `httpx<1.0` to defend against `mcp` ecosystem pre-release leak.** The published `mcp==1.27.0` declares `httpx>=0.27.1` with no upper bound. `pip install --pre simdrive` would resolve to `httpx 1.0.dev3` (a real pre-release on PyPI) which breaks `httpx-sse` and the MCP transport layer. Caught by release pipeline pre-publish smoke for 1.0.0a4. Pin removable once upstream `mcp` adds its own upper bound. ### Added - Regression test `tests/test_packaging_deps.py::test_httpx_pinned_below_1_0` so this defensive pin can't be silently removed. ### Source -INIT-2026-545. Defensive follow-up to 1.0.0a4 from DeployAtlas's smoke. +[internal-tracker]. Defensive follow-up to 1.0.0a4 from release pipeline's smoke. --- @@ -676,13 +676,13 @@ INIT-2026-545. Defensive follow-up to 1.0.0a4 from DeployAtlas's smoke. - **`SimdriveError(code="mcp_sampling_unavailable")`** raised when `tool_run_journey` is invoked outside an MCP context (e.g. an MCP client that doesn't support sampling). Recovery hint points to `simdrive run` standalone CLI. ### Fixed -- **MCP flow no longer requires an Anthropic API key.** All 31 MCP tools, including `run_journey`, work with `pip install simdrive` (no extras) when the driving agent supports MCP sampling. Per Chairman directive 2026-05-04. +- **MCP flow no longer requires an Anthropic API key.** All 31 MCP tools, including `run_journey`, work with `pip install simdrive` (no extras) when the driving agent supports MCP sampling. Per maintainer directive 2026-05-04. ### Packaging - `anthropic>=0.30` confirmed in `[project.optional-dependencies]` only. `pip install simdrive` (no extras) works for MCP. `pip install simdrive[claude]` adds the Anthropic SDK for the standalone `simdrive run` / `simdrive ci` CLI paths. ### Source -INIT-2026-544. Architectural follow-up to 1.0.0a3 (INIT-2026-543) — agent-first per Chairman directive. +[internal-tracker]. Architectural follow-up to 1.0.0a3 — agent-first per maintainer directive. --- @@ -701,7 +701,7 @@ INIT-2026-544. Architectural follow-up to 1.0.0a3 (INIT-2026-543) — agent-firs - **Dev Ed25519 keypair** embedded in package (`license/public_key.py:DEV_VERIFY_KEY_HEX` + `DEV_SIGNING_KEY_HEX`). Validator only accepts dev-key-signed licenses with `subject == "dev-trial"` — dev key cannot self-issue prod licenses. ### Source -Reported by Maurice Carrier (Palace iOS), 2026-05-04 dogfood report. INIT-2026-543. +Reported by Maurice Carrier (Palace iOS), 2026-05-04 dogfood report. [internal-tracker]. --- @@ -762,7 +762,7 @@ former `specterqa-ios` 16.x line: PyPI distribution name reverted to - **Docs** — `OBSERVABILITY.md`, `PERFORMANCE.md`, `RECOVERY.md` ### Added — Production credentials -- **Production Ed25519 license-signing public key** injected (private key held in Chairman's secure storage; configured as `SIMDRIVE_LICENSE_PRIVATE_KEY` env var on the Railway license server) +- **Production Ed25519 license-signing public key** injected (private key held in maintainer's secure storage; configured as `SIMDRIVE_LICENSE_PRIVATE_KEY` env var on the Railway license server) ### Fixed - `recordings.py` DELETE 204 + response-model `AssertionError` at router init (introduced and fixed in Cycle 2+3) diff --git a/simdrive/cloud_deploy/.env.example b/simdrive/cloud_deploy/.env.example index 20af98b..405dea8 100644 --- a/simdrive/cloud_deploy/.env.example +++ b/simdrive/cloud_deploy/.env.example @@ -52,5 +52,5 @@ SIMDRIVE_DATABASE_URL= # Public hostname for the cloud API. # Set as a Railway custom domain: cloud.simdrive.dev -# (Chairman to confirm domain before first design-partner upload.) +# (maintainer to confirm domain before first design-partner upload.) # SIMDRIVE_CLOUD_DOMAIN=cloud.simdrive.dev diff --git a/simdrive/cloud_deploy/README.md b/simdrive/cloud_deploy/README.md index 9a9a7fb..896fc4c 100644 --- a/simdrive/cloud_deploy/README.md +++ b/simdrive/cloud_deploy/README.md @@ -1,7 +1,7 @@ # SimDrive Cloud API — Railway Deployment Private API for SimDrive replay archive + license management. -Hosted at `cloud.simdrive.dev` (confirm with chairman before first production deploy). +Hosted at `cloud.simdrive.dev` (confirm with maintainer before first production deploy). ## Prerequisites diff --git a/simdrive/docs/COVERAGE_RATCHET.md b/simdrive/docs/COVERAGE_RATCHET.md index bedb230..312400b 100644 --- a/simdrive/docs/COVERAGE_RATCHET.md +++ b/simdrive/docs/COVERAGE_RATCHET.md @@ -1,7 +1,7 @@ # SimDrive Coverage Ratchet **Status:** Active -**Owner:** INIT-2026-549 (W1 floor, W2 wire-up, W3 climb to 80, W4 push to 85) +**Owner:** [internal-tracker] (W1 floor, W2 wire-up, W3 climb to 80, W4 push to 85) **Last updated:** 2026-05-21 ## Policy @@ -12,16 +12,16 @@ moves **up** — never down. As tests are added and the aggregate climbs, the floor is raised in stages so the gain cannot regress. The long-term target is **85%** aggregate. Hit **2026-05-21** in -INIT-2026-549 Wave 4 (`test/coverage-server-85`) — hot-path measured +[internal-tracker] Wave 4 (`test/coverage-server-85`) — hot-path measured **92%** (well above 85%); overall package coverage **86%**. ## Current floor | Floor | Set in | Date | |----------|---------------------------------|------------| -| 65% | INIT-2026-549 W1 | 2026-05-17 | -| 80% | INIT-2026-549 W3 | 2026-05-20 | -| **90%** | INIT-2026-549 W4 | 2026-05-21 | +| 65% | [internal-tracker] | 2026-05-17 | +| 80% | [internal-tracker] | 2026-05-20 | +| **90%** | [internal-tracker] | 2026-05-21 | Hot-path aggregate measured at floor-set time: **92%** (local run). Overall package coverage at floor-set time: **86%**. @@ -93,7 +93,7 @@ Realistic next gains, in order of marginal impact: ## References -- INIT-2026-549 — SimDrive coverage initiative (W1 → W4) +- [internal-tracker] — SimDrive coverage initiative (W1 → W4) - W4 PR: `test/coverage-server-85` (this branch) - Failing CI run that motivated the original ratchet: `25982013410` - Memory: `feedback_pr_whack_a_mole_test_debt` — test debt is its own initiative diff --git a/simdrive/docs/HERO_DEMO_SCRIPT.md b/simdrive/docs/HERO_DEMO_SCRIPT.md deleted file mode 100644 index 8bb2684..0000000 --- a/simdrive/docs/HERO_DEMO_SCRIPT.md +++ /dev/null @@ -1,214 +0,0 @@ -# SimDrive Hero Demo — 60-second Bug Repro & Validate - -This is the recording script for the SimDrive hero MP4 / GIF that anchors the -positioning "**Reproduce and validate iOS bugs in 60 seconds with Claude.**" - -Audience: prospective Pro / Team buyers landing on simdrive.dev and the PyPI -project page. The demo is muted (no voice-over) — captions carry the -narrative. Two output sizes: 1920×1080 MP4 for embeds and a 800×450 looping -GIF for the README hero. - -> **For the Chairman to record.** Atlas drafted the storyboard, exact prompts, -> and terminal commands; the human recorder runs them in real time and -> captures the screen. - ---- - -## Pre-flight checklist (10 minutes) - -1. **Demo target app**: FamilyBag iOS (Chairman owns the codebase + a - debug build on this Mac). - - Repo: `~/Documents/familybag-ios` - - Build & install on a fresh simulator: `bundle exec fastlane build_sim` - (alternatively use SplashMate `bundle exec fastlane beta` if you prefer - a customer-facing-looking app — both ship a sign-in screen). -2. **iOS Simulator**: `iPhone 17 / iOS 26.3` booted, freshly launched, - FamilyBag installed and reset (no cached credentials). -3. **SimDrive**: `pip install --editable simdrive/` from this repo, then - `simdrive trial start --email demo@synctek.io --offline-dev` to clear the - paywall. -4. **Cursor or Claude Code**: open with the SimDrive MCP server wired in: - ```json - { "mcpServers": { "simdrive": { "command": "simdrive" } } } - ``` - Restart Cursor / Claude Code so the 32 tools are visible. -5. **Recording tool**: QuickTime "New Screen Recording" alone is sufficient; - ScreenFlow is preferred if you want post-roll captions + zoom emphasis. -6. **Window layout**: Cursor on the left half of the screen (1080 px wide), - iOS Simulator on the right half (centred). Hide every other window. - ---- - -## Storyboard — second-by-second - -### 0:00 – 0:05 · Linear / Jira ticket on screen - -Show a Linear ticket titled exactly: - -> **ENG-1247 · Sign-in fails on iPhone 17 with iOS 26.3** -> -> Steps: open app → enter `test@example.com` → enter `pw123` → tap Sign In → -> see error toast "Network unavailable" even on full Wi-Fi. - -If you don't want to use a real Linear board, mock this up in a Notes window -sized to 1080×600 with the title bold and the steps in a smaller font. - -**Caption overlay:** `Bug: ENG-1247 · Sign-in fails on iPhone 17 / iOS 26.3` - -### 0:05 – 0:08 · Cursor + SimDrive ready - -Cut to Cursor on the left, iOS Simulator on the right (FamilyBag's launch -screen). The cursor focus is in the Cursor chat box, empty. - -**Caption overlay:** `Cursor + SimDrive (32 MCP tools)` - -### 0:08 – 0:13 · Operator types the prompt - -In the Cursor chat box, type **exactly** this prompt (it must look like a -real ticket-handoff, not a polished demo line): - -``` -Use simdrive to reproduce ENG-1247 - sign-in fails on iPhone 17 / iOS 26.3. -Open the app, try test@example.com / pw123, capture whatever error shows. -``` - -Hit Enter at 0:13. - -**Caption overlay:** `Prompt → Claude (no setup, no selectors)` - -### 0:13 – 0:35 · Split-screen: Claude drives the simulator - -This is the load-bearing 22 seconds. Claude calls the MCP tools in this -order (the captions list the exact MCP call names so viewers see the -toolchain at work): - -| Seconds | MCP call shown in caption | What happens in the simulator | -|---------|---------------------------|-------------------------------| -| 0:13–0:15 | `session_start({device: "iPhone 17", os_version: "26.3", bundle_id: "com.synctek.familybag"})` | FamilyBag's sign-in screen comes to the foreground | -| 0:15–0:17 | `observe()` | Annotated screenshot flashes; numbered marks visible on form fields | -| 0:17–0:20 | `tap({text: "Email"})` → `type_text({text: "test@example.com"})` | Email field focuses, characters appear | -| 0:20–0:23 | `tap({text: "Password"})` → `type_text({text: "pw123"})` | Password field focuses, dots appear | -| 0:23–0:25 | `tap({text: "Sign In"})` | Button flashes, spinner appears | -| 0:25–0:30 | `observe()` | Error toast "Network unavailable" pops; SimDrive captures + annotates it | -| 0:30–0:35 | `record_stop({name: "ENG-1247-repro"})` | Cursor side shows "Saved recording to ~/.simdrive/recordings/ENG-1247-repro/" | - -**Captions during this block:** keep the running MCP call name in the bottom -strip; switch each call exactly when Claude emits it. - -### 0:35 – 0:45 · Operator types the validation prompt - -After Claude prints its "captured, recording saved" summary, type: - -``` -Now I've fixed the bug. Validate the sign-in flow works - -test@example.com / pw123 should land on the Home screen. -``` - -Hit Enter at 0:45. - -**Caption overlay:** `Same agent. Same tools. Now: validate.` - -### 0:45 – 0:55 · Same flow re-runs, this time succeeds - -Pre-arrange this: between takes (off camera), patch FamilyBag so sign-in -succeeds for `test@example.com / pw123`. Now the same sequence -(`session_start` → `observe` → tap → type → tap → type → tap → `observe`) -re-runs and the final `observe` shows the Home screen with the test user's -profile pill. - -Caption sequence mirrors 0:13–0:35 but the final caption flips to: - -> `observe() → Home screen reached. PASS.` - -### 0:55 – 0:60 · End card - -Full-screen card: - -``` - Repro + validate in 47 seconds. - Manual: 12 minutes. - - simdrive.dev - Start your 14-day trial — pip install simdrive -``` - -Hold for 5 seconds. Fade out. - ---- - -## Exact prompts to copy-paste - -```text -Prompt #1 (0:08): -Use simdrive to reproduce ENG-1247 - sign-in fails on iPhone 17 / iOS 26.3. -Open the app, try test@example.com / pw123, capture whatever error shows. - -Prompt #2 (0:35): -Now I've fixed the bug. Validate the sign-in flow works - -test@example.com / pw123 should land on the Home screen. -``` - -## Exact terminal commands to run before recording - -```bash -# 1. Reset the simulator so the demo starts on a known screen. -xcrun simctl shutdown all -xcrun simctl erase "iPhone 17" -xcrun simctl boot "iPhone 17" -open -a Simulator - -# 2. Install (or reinstall) FamilyBag for the demo. -cd ~/Documents/familybag-ios -bundle exec fastlane build_sim - -# 3. Install SimDrive + trial license. -cd ~/Documents/specterqa-ios/simdrive -pip install --editable . -simdrive trial start --email demo@synctek.io --offline-dev - -# 4. Verify the MCP wiring once. -simdrive --version -# Cursor / Claude Code: restart so .mcp.json picks up `simdrive`. -``` - -## Recording tool recommendation - -- **Baseline (no editing required):** macOS QuickTime → File → New Screen - Recording → Selected Portion → frame the Cursor + Simulator pair at - 1920×1080. Captions added in post via QuickTime Player → "Show Movie - Properties" (limited but fine for a draft). -- **Preferred:** ScreenFlow ($169). Lets you add per-second caption strips, - zoom emphases on the MCP call names, and export the same source at both - 1920×1080 MP4 and 800×450 GIF. -- **Free alternative to ScreenFlow:** record raw in QuickTime, then run - `ffmpeg -i hero.mp4 -vf "scale=800:450,fps=12" hero.gif` for the GIF - export. - -## Output assets - -| File | Size | Where it ships | -|------|------|----------------| -| `hero-60s.mp4` | 1920×1080, ≤ 25 MB | simdrive.dev landing page hero | -| `hero-60s.gif` | 800×450, ≤ 6 MB | README.md "60-second bug repro" section | -| `hero-poster.jpg` | 1920×1080 | Social cards + email signatures | - -Drop final assets into `simdrive/docs/marketing/hero/` and surface their -paths in the simdrive-site repo via a PR. (Out of scope for this script — the -simdrive-site update is a separate workstream.) - ---- - -## Why this demo works - -The whole 60 seconds answers two questions a buyer asks in the first -minute: - -1. **Will it actually drive my app?** The split-screen with live taps + the - captioned MCP calls shows the agent *doing the thing*, not a polished - render. -2. **What's the payoff?** The end-card pits 47 seconds against the 12-minute - manual repro — the time-saved number is the wedge for the $29/mo - conversion. - -Do not embellish with stock music or 3D logo intros. The product is the -agent driving the simulator; everything else dilutes the proof. diff --git a/simdrive/docs/IP_STRATEGY.md b/simdrive/docs/IP_STRATEGY.md deleted file mode 100644 index fb66e8d..0000000 --- a/simdrive/docs/IP_STRATEGY.md +++ /dev/null @@ -1,271 +0,0 @@ -# SpecterQA — IP Protection Strategy - -**Status:** Draft v1 -**Date:** 2026-04-29 -**Author:** Atlas (CEO Interface), SyncTek LLC -**Audience:** Maurice Carrier (Chairman), legal counsel (when retained) - -> **This is a strategy document, not legal advice.** It frames SyncTek's options and ranks them by ROI. Any USPTO filing, copyright registration, or license-text rewrite should be reviewed by licensed IP counsel before submission. Cost estimates are public-information rule-of-thumb numbers, not quotes. Lines beginning *"NEEDS COUNSEL"* explicitly defer to an attorney. - ---- - -## §1. Executive summary - -SpecterQA is SyncTek's MCP-native iOS simulator driver, shipping as PyPI `specterqa-ios 1.0.0a1` (29 MCP tools, ~4,118 LOC Python + ~600 LOC ObjC native HID helper). The asset to protect is the bundle: a vision-first agent-driven tool surface, a native HID-injection technique that bypasses XCTest on iOS 26, and a fast-moving dogfood relationship with one paying-attention customer (Palace iOS). The window before Anthropic, Maestro, or a YC clone closes the gap is roughly 9 months optimistic, 15 pessimistic. - -**The three most defensible IP elements:** - -1. The *brand* — "SpecterQA" wordmark + pixel-pin logo. -2. The *dogfood velocity loop* — Palace receipts, three feedback rounds closed in five days, moving-target release cadence (5 versions in 1 week). -3. The *Cloud / Pro tier* once it ships — closed-source, hosted, with stored journey corpus that creates real switching cost. - -**Open-core split:** SpecterQA engine stays open under MIT; SpecterQA Cloud + WDA real-device + compliance ships under a separate proprietary license in a separate package. - -**Three highest-leverage IP measures (ranked by ROI):** - -1. **Resolve the LICENSE / pyproject mismatch** (~$0). Repo root `LICENSE` says Elastic 2.0; the package says MIT. Self-inflicted wound. Blocks anthropic-cookbook eligibility. -2. **File USPTO trademark for "SpecterQA" wordmark, Class 9** (~$250 self-filed, $750-2,000 with attorney). The brand is the most enforceable asset. -3. **Add `TRADEMARK.md` + `NOTICE` + correct `LICENSE`** (~$0, 1 hour). Establishes first-use evidence and signals professionalism. - -Patents are out of scope. Cost ($15-30K per patent through grant) doesn't pencil at <$5K MRR, and most candidate claims fail post-Alice. - ---- - -## §2. Asset inventory - -Classification: **Commodity** (anyone rebuilds in <1 week) / **Defensible** (multi-week reverse-engineering) / **Crown jewel** (insider knowledge or sustained-effort moat). - -| # | Component | Class | Where it lives | Replication cost | -|---|---|---|---|---| -| 1 | Native HID via `SimDeviceLegacyHIDClient` + `IndigoMessage` | **Crown jewel** | `simdrive/native/src/simdrive_input.m`, `Indigo.h` | 2-3 weeks for a competent ObjC eng. Note: idb's MIT-licensed `FBSimulatorIndigoHID` documents the technique publicly; our value-add is the minimal, FBControlCore-free port + iOS 26 tuning. | -| 2 | Indigo touch wire format (336-byte struct, second-payload duplicate) | **Defensible** | `Indigo.h:160-198`, `simdrive_input.m:113-140` | Documented in idb. Ours adds Xcode 26.2 disassembly notes. ~1-2 weeks to re-derive. | -| 3 | iOS 26 TextField focus via real UITouch (killer feature) | **Crown jewel** | Combination of #1 + dispatch-queue serialization (`simdrive_input.m:142-177`) + 60 ms down/up cadence (`:233-235`) | 2-3 weeks to discover synthetic CGEvents fail on iOS 26, plus a week to find SimDeviceLegacyHIDClient is the fix. The *insight* is what's defensible; partially eroded by our own public docs. | -| 4 | `type_text` 25 ms Shift-settle timing | **Defensible** | `simdrive_input.m:295-321` (`kModSettle`, `kKeyHold`, `kKeyGap`) | Non-obvious. A cloner ships broken upper-case before finding it. ~1 week trial-and-error. | -| 5 | Vision-first MCP tool surface (29 tools, schema choices) | **Defensible** | `src/specterqa_ios/server.py` `_TOOLS` (1,369 LOC) | Schema response shapes (`_simdrive_warning` drift envelope, `step_id`, structured `code` errors) are 2-3 weeks of LLM-loop polish. Copyright covers verbatim copies. | -| 6 | SoM annotation + `stable_id` (20 px tight / 60 px loose buckets) | **Defensible** | `src/specterqa_ios/som.py:241-258` | Specific bucket sizes are non-obvious. ~1 week to re-derive empirically. | -| 7 | SSIM region masking for replay drift | **Commodity** | `src/specterqa_ios/recorder.py:125-205` | SSIM is 2004 prior art. Status-bar masking is obvious within an hour. | -| 8 | Recording/replay format (`recording.yaml` + sidecar JSONs + `actions.jsonl`) | **Commodity** | `recorder.py` (321 LOC) | YAML schema clonable in an afternoon. Defensibility comes from combo with `stable_id` + customer lock-in. | -| 9 | The 29-tool surface as a whole | **Defensible** | `server.py` `_TOOLS` (lines 790-1300) | The lifecycle/observe/act/record/perf/diagnostics/robustness combo is what makes agent loops happy. 3 weeks of taste. | -| 10 | `bootstrap-device` flow (v1.1, WDA path) | **Defensible** (when shipped) | Not built; PRODUCTIZATION_PLAN §4 | Provisioning UX is the work. Detox/Maestro examples exist but each had 6-12 months of polish. | -| 11 | Palace dogfood receipts + "canonical iOS sim driver" testimonial | **Crown jewel** | `SIMDRIVE_v0.2.0a1_DOGFOOD.md` (Palace repo), CHANGELOG | A clone *cannot* have these. Permanent reputational asset. Most under-rated IP we own. | - -**Crown jewels by enforceability:** (1) Brand + dogfood receipts — trademark + reputation, both enforceable; (2) The bundle-as-design (#3 + #5 + #6) — copyright + speed; (3) Native HID technique — derived from MIT-licensed idb, so exclusivity is weak; what we own is the minimal port + iOS 26 tuning. - ---- - -## §3. License strategy — open-core split - -### Current state — the contradiction - -The repository has a **license-text mismatch** that needs immediate resolution: - -| Surface | Declares | -|---|---| -| Repo root `LICENSE` | **Elastic License 2.0** | -| Repo root `pyproject.toml` (legacy 16.x) | `Elastic-2.0` | -| `simdrive/pyproject.toml` (the new 17.x package shipped 2026-05-01) | **`MIT`** | -| `simdrive/native/src/Indigo.h` | MIT (Meta/idb upstream) | - -PRODUCTIZATION_PLAN §8 intent: "simdrive (MIT) stays free forever." But the repo root still carries Elastic 2.0 from the legacy 16.x XCTest line. Until fixed, anyone reading the repo (rather than just the wheel) sees a contradictory and more-restrictive license. **Fix before any push to the MCP registry, awesome-mcp, or anthropic-cookbook.** Elastic 2.0 is not OSI-approved and would disqualify us. - -### The split - -| Layer | License | Rationale | -|---|---|---| -| **SpecterQA engine** (29 tools, native HID, observe/act/record/replay) | **MIT** | OSI-approved, cookbook-eligible, training-corpus-friendly, dev-tool community trust. Already declared in `simdrive/pyproject.toml`. | -| **SpecterQA Cloud / Pro / Team** (hosted runners, journey corpus, dashboards) | **Proprietary, closed-source**, paid commercial license | The revenue moat (PRODUCTIZATION_PLAN §8). SaaS hosted; source not distributed. | -| **WDA real-device + compliance tier** (SOC 2, RBAC, SSO) | **Proprietary**, in `specterqa-cloud` package only | Held back from OSS as paid wedge per PRODUCTIZATION_PLAN §8. | - -### Why MIT for the engine - -Permissive licenses are the OSS dev-tool default. Anthropic-cookbook accepts MIT/Apache-2 only. Models trained on GitHub absorb MIT code by default — we *want* the next Claude to know SpecterQA's tool surface. Aligning with the MCP ecosystem (mostly MIT/Apache) is positional capital. - -### License compatibility — proprietary tier wrapping MIT engine - -**Yes, MIT permits this.** The proprietary `specterqa-cloud` package can import, wrap, extend, and re-export `specterqa_ios`'s public API and ship a closed-source binary that includes it, *provided* the MIT copyright notice + license text travel with the binary in a `LICENSES/` or `NOTICE` file. **Action:** when Cloud ships, include `LICENSES/MIT-specterqa-ios.txt` and a `NOTICE` naming SyncTek as engine copyright holder. NEEDS COUNSEL — exact NOTICE wording for first commercial release. - -### Risk: someone forks the MIT engine, reskins it, sells competing Cloud - -Mitigations: - -1. **Trademark.** A fork can copy code but cannot call itself "SpecterQA" without infringement. Single strongest defense. -2. **Cloud value-add.** Hosted runners, dashboards, multi-tenant journey corpus — things MIT doesn't grant. -3. **Speed.** A fork is always behind HEAD (5 versions in 1 week per CHANGELOG). -4. **Brand + dogfood receipts.** Palace's testimonial belongs to *us*, not to a fork. - -### Apache-2 vs MIT? - -Apache-2 adds an explicit patent grant + defensive-termination clause vs MIT's brevity. The patent-grant difference matters only if (a) we patent something or (b) someone patents derivative work and sues us. We're not patenting (§5), and the patent-troll attack surface is small at our scale. **Verdict: stay MIT.** Revisit at 1.0 with counsel. NEEDS COUNSEL — final call between MIT and Apache-2 at 1.0 launch. - -### BSL? - -Tempting (blocks competitive offerings for N years), but **not OSI-approved** — disqualifies us from anthropic-cookbook + awesome-mcp + "open source" cred. Carries a "founder is anxious" community signal. **No for the engine.** Plausible later for a Cloud client SDK if we ship one. - -### Recommendation block — declarations to update - -| File | Current | Update to | When | -|---|---|---|---| -| `/LICENSE` (repo root) | Elastic 2.0 | MIT (full text) | 2026-05-05 | -| `/pyproject.toml` (legacy 16.x) | `Elastic-2.0` | MIT, or delete with 16.x retirement | 2026-05-05 | -| `/simdrive/pyproject.toml` | `MIT` | No change | — | -| `/simdrive/native/src/simdrive_input.m` | No header | Add MIT header + "Derived from idb (MIT)" attribution | Before 1.0 | -| `/simdrive/native/src/Indigo.h` | MIT (Meta) | Keep Meta header; add SyncTek modifications notice below | Before 1.0 | -| New: `/NOTICE` | — | Create. SyncTek copyright + MIT recital + Meta/idb attribution | Before 1.0 | -| New: `/TRADEMARK.md` | — | Declare "SpecterQA" + pixel-pin as marks of SyncTek; usage guidelines | Before USPTO filing | - ---- - -## §4. Trademark strategy - -### Marks to register - -| # | Mark | Type | USPTO Class | Priority | -|---|---|---|---|---| -| 1 | **"SpecterQA"** | Wordmark | **Class 9** (software) | **HIGH — file first** | -| 2 | Pixel-pin logo (`docs/brand/logo-mark-only.svg`) | Figurative | Class 9 | MEDIUM — file second | -| 3 | "Hand your iOS simulator to your agent." | Slogan | Class 9 | LOW — defer | - -### Cost & timeline - -USPTO TEAS Plus: **~$250 per class per mark** self-filed; **~$500-1,500 attorney fees** on top. Time to registration: **6-12 months**. - -| Action | Self | Attorney | Time | -|---|---|---|---| -| USPTO clearance search ("SpecterQA" Class 9 + adjacent) | $0 (TESS, error-prone) | $300-500 basic / $1,500+ deep | 1-2 weeks | -| TEAS Plus wordmark | $250 | $750-2,000 incl. fees | 6-12 months | -| TEAS Plus figurative mark | $250 | $750-2,000 | 6-12 months | -| **Total, attorney-assisted, two marks** | $500 | **$1,500-4,500** | 6-12 months | - -### Filing strategy - -1. **Wordmark first.** "SpecterQA" is highest-leverage. File Section 1(a) (use in commerce) given the package is on PyPI. -2. **First-use date.** Anchor to the `specterqa-ios 1.0.0a1` release of **2026-05-01** (PRODUCTIZATION_PLAN §7 brand cutover). The legacy 16.x `specterqa-ios` line is also `specterqa-ios` and dates further back. NEEDS COUNSEL: confirm anchor date. -3. **Specimen of use.** PyPI listing, GitHub README, Palace dogfood report referencing "SpecterQA" all qualify. Capture dated PDFs/PNGs now. -4. **Logo second**, within ~3 months of wordmark. **Slogan deferred** (hardest to register, lowest impact). - -### Defensive actions before filing - -- **Document first-use-in-commerce.** Save dated PDFs of: PyPI page, README hero, repo description, Palace dogfood report. -- **Use the ™ symbol now.** Pre-registration use of "SpecterQA™" supports common-law trademark rights. Free. -- **Evidence trail.** Each commit + each PyPI release timestamps a use event. Just don't delete the evidence. - -### Risks: existing marks - -- **`specterqa` 0.4.0 on PyPI** — same SyncTek owner, no conflict. -- **"SimDrive" racing-rig hardware** — Class 28 / hardware, no conflict with our Class 9 software. -- **"Specter" generic word** — common; not blocking. Clearance search will surface close matches. - -NEEDS COUNSEL: a real clearance search (TESS + Google + WIPO + common-law) before filing. **The $300-500 spend here is the highest-ROI single line item in this document.** - -### What trademark does NOT protect - -Code (anyone copies MIT-licensed code), techniques (HID injection is in idb), tool-surface composition (anyone builds a 29-tool MCP server). Trademark just stops them from calling it SpecterQA. That's enough — brand is the most enforceable lever we have. - ---- - -## §5. Patent analysis - -**Honest verdict: do not file patents.** - -### Post-Alice reality - -The 2014 *Alice Corp. v. CLS Bank* decision held that abstract ideas implemented on a generic computer are not patent-eligible. Modern software patents must show "specific improvement to the functioning of the computer itself" — narrow and hard. ~60% of software-patent applications die at examination or in IPR/litigation. - -### Candidate analysis - -| Candidate | Verdict | -|---|---| -| Indigo wire-format reverse-engineering | **Unpatentable.** Prior art (idb, MIT, public since 2018+). Apple invented the wire format; we ported. | -| Vision-first MCP tool surface | **Unpatentable.** Abstract idea + computer = Alice fail. Specific schemas may have *copyright*, not patent. | -| 25 ms Shift-settle timing | **Probably unpatentable.** Too narrow (designed-around in an afternoon) and arguably obvious. | -| Dual-bucket `stable_id` (20/60 px) | **Probably unpatentable.** Hashing label + position bucket is known in CV/UI testing. | -| Recording/replay with stable_id + SSIM masking | **Probably unpatentable.** Combination of known techniques. | -| Open-core architecture | **Definitely not patentable** — business method. | - -### Defensive patent? - -NPEs target wealthy companies, not seed-stage startups. Defensive case doesn't pencil at our scale. - -### Cost-vs-value - -Each patent through grant: $15-30K. Two patents = a year of engineering runway at our current scale. 2-3 years to grant — by then the moat is obsolete or designed-around. - -**Re-evaluate at $50K MRR or Series A.** Counsel will spot any genuinely novel claims at that point, and capital will be available to file. - ---- - -## §6. Anti-clone defensive measures - -A determined competitor can clone the engine in 3-6 weeks. Question: what slows them down or makes the clone less valuable? - -### Effective measures - -| Measure | Effort | Effectiveness | -|---|---|---| -| **Trademark enforcement** | Low (file once, $1.5-4.5K total) | **High.** They can copy code; can't call it SpecterQA. | -| **Speed — keep shipping** | Medium (already happening) | **High.** Clone is always behind HEAD. | -| **Brand + dogfood receipts** | Low (already accumulating) | **High.** Palace migration is permanent reputational asset; clones can't have it. | -| **Cloud lock-in** | High (8-12 weeks build per PRODUCTIZATION_PLAN §8) | **High** when shipped. Journey corpus + replays in our Cloud = real switching cost. | -| **Network effects on journey corpus** | High (depends on Cloud + multi-tenant + sharing UX) | Medium-High. Value scales with users. | -| **Anthropic relationship / MCP registry blessing** | Medium (ongoing) | **High.** Cookbook PR + registry listing + training-corpus seeding compound over 6-12 months. | -| **Documentation as moat** | Low-Medium | Medium. Comprehensive CHANGELOG + dogfood reports + BEST_PRACTICES.md raise the cost of "copy the code, figure out tribal knowledge later." | - -### Hard NOs - -- **Code obfuscation.** Engineers won't trust an obfuscated dev tool. Doesn't slow determined cloners. Breaks our own debugging. -- **Telemetry without consent.** Breaks dev-tool norms. CCPA/GDPR risk. Anthropic-cookbook reviewers reject. Turns users against us. -- **DRM / key servers for the OSS engine.** Defeats the open-core point. Cloud features can have license-key checks; the engine cannot. -- **Patent trolling.** Community-toxic, expensive, unlikely to succeed. Burns the Anthropic relationship. -- **C&Ds against MIT-compliant forks.** MIT permits forking. Reputational catastrophe in OSS. Trademark enforcement only — never copyright — and only when the fork uses our marks. - -### The honest assessment - -The strongest defense is not "stop the clone" — it's "outrun the clone." A clone of `1.0.0a1` released in 6 weeks competes against `18.x` with new features, more dogfood, a Cloud product, and 3 design partners. That's the real moat. - ---- - -## §7. Practical filing checklist (30 days) - -Total spend, attorney-assisted: ~$2,500-5,500. Self-filed: ~$500-1,000. - -| # | Action | ROI | Cost | Owner | Deadline | -|---|---|---|---|---|---| -| 1 | **Resolve LICENSE / pyproject mismatch.** Replace repo-root `LICENSE` with MIT text. Update or retire legacy 16.x `pyproject.toml`. | **#1** | $0 | CodeAtlas | 2026-05-05 | -| 2 | **Add `TRADEMARK.md`** — declares "SpecterQA" + pixel-pin as marks of SyncTek; usage guidelines. | **#2** | $0 | Atlas drafts, Maurice reviews | 2026-05-05 | -| 3 | **Add `NOTICE`** — SyncTek copyright + MIT recital + Meta/idb attribution for `Indigo.h`. | #3 | $0 | CodeAtlas | 2026-05-05 | -| 4 | **USPTO clearance search for "SpecterQA" Class 9** (flat-fee TM attorney). | **#4** | $300-500 | Maurice + counsel | 2026-05-15 | -| 5 | **File USPTO TEAS Plus wordmark, Class 9.** Section 1(a) use-in-commerce, anchor 2026-05-01. | **#5** | $250 + $500-1,500 attorney | Maurice + counsel | 2026-05-29 | -| 6 | **Capture first-use evidence** — dated PDFs/PNGs of PyPI page, README hero, repo description, Palace dogfood report. | #6 | $0 | Atlas | 2026-05-05 | -| 7 | **Use the ™ symbol** in README, PyPI description, marketing copy. | #7 | $0 | CodeAtlas + MarketingAtlas | 2026-05-05 | -| 8 | **File USPTO TEAS Plus pixel-pin logo, Class 9.** | #8 | $250 + $500-1,500 | Maurice + counsel | 2026-06-30 | -| 9 | **Add usage guidelines to `docs/brand/README.md`** — acceptable third-party use, fair use, infringement. | #9 | $0 | MarketingAtlas | 2026-05-15 | -| 10 | **Document first-use date** in `TRADEMARK.md` for evidentiary purposes. | #10 | $0 | Atlas | 2026-05-05 | - ---- - -## §8. Risk register - -| # | Risk | Likelihood | Impact | Mitigation | -|---|---|---|---|---| -| 1 | LICENSE / pyproject mismatch confuses contributors or blocks anthropic-cookbook PR / awesome-mcp listing | **High** (current) | Med-High | Action #1, before next public push. | -| 2 | Anthropic / Apple / YC competitor ships an iOS-driving MCP product before our brand is registered | Medium-Low (~50% combined within 12 mo) | High | File USPTO TEAS Plus ASAP (Action #5). Establish first-use date. | -| 3 | Determined cloner forks MIT engine, calls it "SpecterQA Pro", undercuts Cloud pricing | Low-Medium | Medium | Trademark enforcement (brand-name claim, not code) + speed + Cloud value-add. | -| 4 | Apple breaks `SimDeviceLegacyHIDClient` / `IndigoMessage` SPI in Xcode 27+ | Medium | High | Pin tested Xcode versions, monitor betas, document fallback. Adjacent to IP — losing the technique erodes the portfolio's value. | -| 5 | Trademark squatter files "SpecterQA" in adjacent classes / jurisdictions | Low | Medium | File US Class 9 first; consider WIPO Madrid (~$1,200) when international footprint warrants. | -| 6 | We get sued over a "SpecterQA"-adjacent mark we didn't clear | Low | High | The clearance search (Action #4) is the entire mitigation. **Do not skip it.** | -| 7 | Anthropic releases a product whose name conflicts with "SpecterQA" | Very Low | Very High | Monitor announcements; we'd have priority of use. NEEDS COUNSEL if it ever happens. | -| 8 | Contributor PRs code, later claims unassigned IP / patent infringement | Low | Medium | Add `CONTRIBUTING.md` with DCO sign-off requirement. Standard, sufficient at our scale. | - ---- - -## §9. Bottom line - -The IP strategy that pencils at SpecterQA's scale is **brand + speed + Cloud lock-in + dogfood receipts** — not patents and not source-code obfuscation. - -**Spend $500-2,500 on a USPTO trademark filing within 30 days.** Spend $0 on the LICENSE cleanup, `TRADEMARK.md`, `NOTICE`, and ™ adoption — those are an afternoon of CodeAtlas + Atlas work. Defer logo trademark by 60 days. Defer slogan trademark indefinitely. Do not file patents. - -The single highest-leverage action remains the LICENSE / pyproject cleanup — free, unblocks GTM channels, and removes a self-inflicted ambiguity that would be embarrassing to stumble over publicly. Do that first. Everything else cascades from it. - ---- - -*End of strategy. NEEDS COUNSEL: clearance search (§4); MIT-vs-Apache-2 final call at 1.0 (§3); NOTICE wording for first commercial Cloud release (§3); first-use anchor date confirmation (§4).* diff --git a/simdrive/docs/PRODUCTIZATION_PLAN.md b/simdrive/docs/PRODUCTIZATION_PLAN.md deleted file mode 100644 index 7b60b5c..0000000 --- a/simdrive/docs/PRODUCTIZATION_PLAN.md +++ /dev/null @@ -1,266 +0,0 @@ -# simdrive — Productization & Agentic-First GTM Plan - -**Status:** Draft v1, BIS-synthesis output -**Date:** 2026-05-01 -**Inputs:** 4 parallel R&D memos — Product/Engineering, Brand/Marketing, Agentic-First GTM, Competitive Strategy -**Decision needed from:** Chairman (timeline + revenue target alignment, see §10) - ---- - -## 1. Executive summary - -simdrive is **2 weeks from a credible 1.0 stable** on the simulator path, and **3-4 weeks beyond that from real-device parity** via WebDriverAgent. The product surface (29 MCP tools), the killer features (vision-first observe, real-UITouch HID injection, stable_id replay), and the dogfood loop (Palace iOS migrated in 5 days, three feedback rounds all closed) are validated. What stands between 0.3.0a3 and 1.0 is mostly small-effort polish plus one written stability commitment. - -The strategic frame is **agentic-first**: distribution through MCP registries, awesome-mcp lists, and the next-Claude training corpus, not through paid ads or sales-led motions. The window for category-definition is **roughly 9 months** before either Anthropic ships native iOS computer-use or Maestro adds an MCP wrapper. The position simdrive can credibly own in 2026: *"The MCP-native iOS simulator driver that AI agents and CI use to gate iOS PRs on real-pixel, real-input behavior — the things XCUITest can't see and Maestro can't deeply touch."* - -The chairman's $5K-MRR-by-July target is **not realistic for simdrive standalone** — the paid product (Cloud / Pro tier) doesn't exist yet, and simdrive itself stays MIT. Honest revenue path is October 2026. Recommend re-casting the July target as a **portfolio number** (simdrive + closed-source proprietary iOS layer + other revenue lines), with simdrive's role through July measured in **installs and design-partner LOIs**, not MRR. See §10 for the explicit decision request. - ---- - -## 2. State of simdrive (as of 0.3.0a3, 2026-05-01) - -| Dimension | Value | -|---|---| -| MCP tools | **29** (lifecycle 3, observe 1, act 5, record/replay 5, logs 1, perf 4, diagnostics 5, robustness 4, version 1) | -| Test count | **117** total (91 unit, 26 live E2E against TestKitApp) | -| Code | 4,118 LOC Python + ~600 LOC ObjC native HID helper (universal2 binary, in-wheel) | -| Platform | macOS + Xcode + iOS Simulator. Python ≥3.10. | -| Real-device | Read-only (observe + logs + lifecycle) — input gated on WDA, scoped to v1.1 | -| Distribution | PyPI alpha track, Trusted Publisher OIDC, no token. GitHub releases per tag. | -| Customers | **1 paying-attention dogfood**: Palace iOS (org.thepalaceproject.palace), fully migrated off SpecterQA, 5-day cutover, 3 feedback rounds all closed | - -**Rock-solid:** sim lifecycle, vision-first OCR observe, HID tap/swipe, type_text on iOS 26, stable_id-resolved record/replay with SSIM masking, MCP wiring, install ergonomics. - -**Experimental:** real-device input (pending WDA), `perf` snapshot accuracy under sustained calls (a possible stale-cache bug from latest dogfood), `type_text` race against debounced/async-focus SwiftUI fields, Dynamic Island modal dismissal, OCR on stylized cover text. - ---- - -## 3. Production-readiness gap (8 axes, 1=blocking, 5=ready) - -| Axis | Score | Gap | -|---|---|---| -| API stability | 3/5 | No deprecation policy in writing. Two soft-breaks shipped post-0.1 (Session dataclass, type_text response shape). 1.0 needs `STABILITY.md`. | -| Error UX | 4/5 | Structured `SimdriveError` codes, recovery instructions in messages. Minor: a few paths still bubble through the catch-all `internal` envelope. | -| Documentation | 2/5 | **README still says "12 tools"** (`README.md:43`). CHANGELOG is current and detailed; `docs/` has 3 short docs. Missing: cookbook, examples, schema reference auto-generated from `_TOOLS`. | -| Install ergonomics | 4/5 | One-line `pip install specterqa-ios` ships universal2 native binary inline. CLI `--version`/`--help` shipped in 0.2.0a2. | -| Test coverage | 4/5 | 91 unit + 26 live = 117 tests. No CI matrix (Xcode/macOS versions). | -| Observability | 3/5 | Sidecar JSONs per observation, `actions.jsonl` per session, `_simdrive_warning` for version drift. Missing: structured logging, debug-mode env var, telemetry hook. | -| Backwards compat | 2/5 | No SemVer guarantee. Required for 1.0. | -| Real-device support | 2/5 | observe/logs/lifecycle work; tap/swipe/type/key raise `device_input_unavailable`. WDA roadmap scoped, not yet built. | - ---- - -## 4. v1.0.0 roadmap — sim-only path, 2-week clock - -**Recommendation: ship 1.0 as the sim-stable cut. Real-device input lands in 1.1 (3-4 weeks after 1.0).** The case for splitting: (a) the sim path is where 100% of revenue traction is today, (b) WDA scope creep on provisioning UX has bitten past projects, (c) clear positioning beats "everything in 1.0" — *"simdrive 1.0 is the canonical sim driver; real-device input ships in 1.1"* is a strong story. - -### Must-have for 1.0 (5 items, all S effort) - -1. **Fix `type_text` async-focus race** — add `wait_for_keyboard: true` default (poll `keyboard_visible` ~500ms before dispatching keystrokes; return `code: keyboard_not_focused` if it never appears). Removes the only known silent-failure mode in the surface. -2. **Investigate + fix `perf` stale-cache bug** — RSS frozen at 592MB across 20+ snapshots suggests the `simctl spawn ps` call is caching. Trust collapses without this. P0. -3. **Auto-generate the README tool table from `_TOOLS`** — kills the 12-vs-29 drift permanently. -4. **Write `STABILITY.md`** — declare what's covered by SemVer at 1.0 (tool names, error codes, required response fields). Optional fields advisory. Min 1 minor cycle between deprecation warning and removal. -5. **Roll the open dogfood-doc gaps into `LIMITATIONS.md` + `BEST_PRACTICES.md`** — SFSafariViewController fullscreen escape, debounce-window guidance, perf-vs-memory selection guidance. - -### Should-have for 1.0 (3 items, S/M/S) - -6. **Network monitoring tool** (was deferred from 0.3.0a1) — parse `simctl io booted log show` for CFNetwork events + nettop merge. Closes the perf+regression PR-gate use case Palace ships against. **M effort.** -7. **`app_relaunch` with iOS 26.3 teardown handling** — terminate → wait_for_terminated → launch → wait_for_foreground with `relaunch_failed` error code. **S effort.** -8. **Auto-promote annotate-on-text-tap** — when a tap call uses text/mark resolution after `annotate=false`, lazy-annotate the cached screenshot rather than fail. Removes a real footgun. **S effort.** - -### Defer to 1.1+ - -- **Real-device input via WDA** (the v0.3 roadmap item, L effort, ~3-5 days impl + provisioning UX). -- `accessibility_audit`, `webview_elements` — XCTest-bridge-blocked; cut from 1.0 entirely (don't ship half-implementations). -- Cookbook/recipes directory, CI matrix, native journey-spec format. - -### Top 3 risks - -1. **Apple breaks the CoreSimulator HID injection in a future Xcode release.** simdrive's killer feature depends on `SimDeviceLegacyHIDClient` + `IndigoMessage` private SPI. Mitigation: pin tested Xcode versions, monitor Xcode betas, document fallback to cliclick. -2. **The `perf` stale-cache bug erodes trust before fix.** P0 — treat with same urgency as a HIGH-severity dogfood item. -3. **WDA scope creep delays 1.0** if we try to bundle it. Mitigation: keep 1.0 sim-only. - ---- - -## 5. Brand identity - -### Name -**Public brand: SpecterQA. Internal codename: simdrive.** SpecterQA is the public-facing name (PyPI, README, MCP listings, marketing); simdrive lives on as the internal codename — used in the binary filename, in dev branches, in commit history, and as the legacy console-script alias. Honest weakness on the public name: SEO competes with the broader QA-tools category; mitigate by always pairing the wordmark with iOS/MCP context. Domain: `synctek.io` is canonical; optional `simdrive.io` registration only. - -### Tagline -**"Hand your iOS simulator to your agent."** (already in use — keep) - -### Logo system — Direction A "Pixel pin" -A 4×4 pixel grid (the screenshot the agent sees), thin black crosshairs through one cell, a vivid red tap-pin (#FF3D2E) at the intersection. The mark literally depicts the mechanic — agent picked that pixel; SpecterQA taps it. Wordmark in geometric monospace, weight-600 `Specter` + weight-400 `QA`. Source files in `simdrive/docs/brand/`: - -- `logo-primary.svg` (1200×320) — README hero, PyPI listing, MCP-registry submission -- `logo-mark-only.svg` (200×200) — app icon, social avatar -- `favicon.svg` (32×32) — browser tab, ≤32px contexts -- `wordmark-bracket.svg` — typographic fallback (`[specterqa_]`) for CLI banners - -### Voice (5 rules, codified from the existing CHANGELOG) -1. **State the change, then the why, in that order.** Don't lead with motivation. -2. **Name the thing precisely.** Backticks on real symbol names. Backticks earn trust faster than adjectives. -3. **Numbers, not adverbs.** "60px bucket (3× the tight 20px)" beats "much more reliable." -4. **Acknowledge limits in the same paragraph as the capability.** Honesty is the brand. -5. **Sentence > paragraph. Table > sentence when comparing options.** - -**Do-not-write list:** revolutionizing / next-generation / seamlessly / effortlessly / magical / leading / world-class / AI-powered / "Learn more →" / exclamation points. - -### Three testimonial-grade quotes -All from Palace's `SIMDRIVE_v0.2.0a1_DOGFOOD.md`, attribution **Maurice Carrier, ThePalaceProject**: - -1. *"simdrive 0.2.0a1 is a meaningful step forward and is now the canonical iOS sim driver for Palace iOS development, replacing SpecterQA."* -2. *"The single biggest reason SpecterQA was failing — the cliclick path that broke UITextField focus — is fully fixed."* -3. *"Replays are now reliable enough to gate PRs on."* - -Use #1 as README banner, #2 in the Show HN post, #3 in v1.0 release notes. - ---- - -## 6. Agentic-first GTM — 30-day plan - -The strategic frame: **distribution is registry placement + MCP catalog presence + training-corpus footprint, not content marketing.** Discovery happens when the next iOS-driving agent reaches for the right tool, not when an engineer Googles "iOS test automation." - -### Channels (priority-ranked) - -| # | Channel | Action | Deadline | Lift | -|---|---|---|---|---| -| 1 | **Anthropic MCP registry** (`claude.ai/mcp`) | Submit listing with copy + demo GIF | 2026-05-08 | **Step-change** | -| 2 | **`modelcontextprotocol/servers` GitHub PR** | Open PR under "Mobile / Testing" | 2026-05-05 | Modest now, step-change later (training corpus) | -| 3 | **Smithery.ai catalog** | Submit with full metadata + 12-tool description | 2026-05-08 | Modest | -| 4 | **Cline + Cursor MCP marketplaces** | PR to Cline; draft Cursor docs entry | 2026-05-15 | Modest | -| 5 | **PyPI search + GitHub Topics** | Add topics: `mcp-server`, `ios-simulator`, `claude`, `anthropic`, `xctest-alternative`. README badge. | 2026-05-03 | Modest, persistent | -| 6 | **`anthropics/anthropic-cookbook` PR** | 30-line "Drive an iOS sim with Claude" recipe | 2026-05-22 | **Step-change** | -| 7 | **Training-corpus seeding** | Publish 3 indexable artifacts: "Why we replaced XCTest with screenshots" essay + Stack Overflow answer + GitHub Discussion with Palace dogfood data | 2026-06-01 | **Step-change**, compounds over 6-12 months | - -The **training-corpus channel** is the most under-rated. It's slow but the only channel where the asset compounds without ongoing spend. - -### Onboarding — minimum-time-to-first-success - -Today: ~15 minutes for an unprepared developer. Target: **under 5 minutes.** Two friction reductions: - -- `specterqa-ios doctor` already exists; surface it in README with a one-liner: *"Don't have Xcode? `xcode-select --install` + open Simulator.app once."* Add a 30-second loom-style GIF as README hero. (Install: `pip install specterqa-ios`.) -- Make `session_start({})` (no args) auto-pick the first booted sim and return `device: iPhone 17 Pro, ready`. Document the zero-config path. - ---- - -## 7. SpecterQA cutover - -**Decision (Chairman, 2026-05-01): SpecterQA is the public brand.** The iOS-arm PyPI rename is `simdrive` → `specterqa-ios`. The new code that was shipping as `simdrive 0.3.0a3` is now published as `specterqa-ios 1.0.0a1`, continuing the legacy `specterqa-ios` major-version line directly over the abandoned 16.x branch. simdrive lives on as the internal codename — used in the binary filename (`simdrive-input`), in dev branches, in commit history, and as the legacy console-script alias for back-compat. - -**No yank, no soft sunset of the historical 54 releases.** The original `specterqa-ios` package (releases through 16.0.0a3) stays on PyPI — historical pins continue to resolve. New publishes from this repo go to the same `specterqa-ios` namespace at version 1.0.0a1+ — pip's resolver picks the new code naturally for unpinned installs. - -| Date | Action | -|---|---| -| 2026-05-01 | Ship `specterqa-ios 1.0.0a1` to PyPI (the renamed `simdrive 0.3.0a3` codebase, no behavioral changes). | -| 2026-05-01 | Ship `simdrive 0.3.0a4` deprecation stub: depends on `specterqa-ios>=1.0.0a1`, prints a one-line migration notice on import. So `pip install simdrive` keeps resolving and points users at the new package. | -| 2026-05-05 | Update README banner + repo description pointing to `specterqa-ios`. Pin a migration issue on the repo. | -| 2026-05-15 | Last `simdrive` deprecation-stub release. From here forward, all releases ship under `specterqa-ios` only. | - -The legacy 16.x `specterqa-ios` line (the abandoned XCTest-based codebase under `src/specterqa/` at the repo root) is being retired in a separate follow-up commit — the new code being published as `specterqa-ios 1.0.0a1` is a complete rewrite, no migration tooling needed for users (none exist on the old code). - ---- - -## 8. Pricing & monetization - -**simdrive (MIT) stays free forever.** The 29 MCP tools, vision-first observe, record/replay, HID injection — all permanently open. Paid layer ships under a separate package (`simdrive-cloud` or `simdrive-pro`) with a different license. - -### Three strongest "open → paid" wedges - -| Tier | Price | Wedge | -|---|---|---| -| **Pro** (individual) | **$49/mo/seat** | Hosted replay archive, SSIM-trend dashboards, multi-sim parallelism license, priority support, signed builds. Saves ~4 hours of flake-debugging/week — pays for itself at any iOS engineer's loaded rate. | -| **Team** (5 seats) | **$249/mo** | All Pro + shared journey corpus, CI integrations (productized `--simdrive` PR-gate pattern), Slack/Linear hooks, **real-device input via WDA** (the v0.3 roadmap item ships here, not in OSS). | -| **Enterprise** | Sales-led, $5-15K/yr | Compliance: SOC 2, RBAC, SSO, audit logs, on-prem replay storage. The reference-customer tier. | - -**Don't price like BrowserStack** ($199+/mo) — simdrive doesn't run real-device cloud, the per-seat math gets ugly. **Don't price like Sauce** ($1K+/mo team minimums) — wrong buyer; simdrive sells to engineers, not QA directors. Maestro Cloud's $99/mo entry is the right reference; undercut at $49. - -### Path to $5K MRR — honest math - -- $5K ÷ $249 team = 20 paying teams. Or $5K ÷ $49 = 102 individuals. Or any blend. -- Confirmed users today: **1** (Palace, free dogfood). -- Cloud product **does not exist yet** — building Cloud MVP is an 8-12 week effort. - -**$5K MRR by July is not realistic for simdrive standalone.** Realistic: **$5K MRR by October 2026** with this funnel: - -- May–June: Cloud MVP build (real-device WDA + hosted CI runner) -- July: Beta with 5 design partners (Palace + 4 others recruited via channels #1-4) -- August: Public launch at $49/mo individual. Goal: 50 paid individuals = $2,450 MRR. -- September: Team tier launches. Convert 10 individual users to teams ($2,490) + add 5 new teams ($1,245). **Total ~$6,000 MRR.** - ---- - -## 9. Competitive position & moat - -### Map (Open ↔ Cloud, Imperative ↔ AI-native) - -simdrive lives in **Open + AI-native local** — a sparsely populated cell. Maestro is the only mature occupant of "AI-native mobile testing"; nobody else is MCP-native. - -### Honest differentiator (per opponent) - -- **vs Maestro:** Maestro is more mature, ships Android too, has Studio recorder. simdrive's edge: MCP-native protocol designed for an LLM agent loop + native HID bypassing XCTest's iOS 26 TextField issues. Bet: agent-driven becomes default in 18 months. -- **vs Detox:** Detox wins for React Native (gray-boxes the JS bridge). For non-RN iOS, simdrive wins by default. Don't fight Detox on its home turf. -- **vs raw XCUITest:** XCUITest fails on WebViews, SwiftUI no-AX components, iOS 26 UITextField focus, out-of-process Safari sheets. simdrive's vision-first model wins precisely those workloads. Palace's Reader2 + OAuth use cases prove it. -- **vs claude-computer-use:** Existential threat. claude-computer-use lacks native HID, simulator session lifecycle, simctl integration, log tailing, crash retrieval, perf, recording/replay, OCR-marks, stable_id. **Roughly 6-9 months of focused Anthropic-team work to rebuild.** Window is real but not infinite. - -### Moat assessment (12-month horizon) - -The moat is **NOT** any single capability — it's the **bundle**: real UITouch on iOS 26 + MCP-native composable surface + Claude-tuned ergonomics + dogfood-velocity loop. Hard to clone in <6 months. Strongest single edge: **iOS 26 TextField focus via SimDeviceLegacyHIDClient + IndigoMessage** — the trick is non-obvious; ~3 weeks reverse-engineering for a competent team. **Most leverageable moat to BUILD: brand position** ("the AI-agent driver for iOS sims"), via dogfood receipts + conference talks + integrations. - -### Existential risks (combined risk surface = high) - -| Scenario | Likelihood | Time-to-impact | Defensive move | -|---|---|---|---| -| Anthropic ships native iOS sim drive in claude-code | **Medium (30-40%)** | 9-15 months | **LEAN IN** — be the iOS layer they don't build. Pursue explicit blessing in Anthropic's MCP registry. | -| Apple ships Xcode 27 AI/Agent UI test framework | Low-Medium (15-25%) | 12-18 months (WWDC 2026 announce) | Focus simdrive on cross-Apple-version regression and the WebView gap Apple won't close. | -| Maestro ships an MCP wrapper | **High (60-70%)** | 3-6 months | **SIDESTEP** — own iOS-deep (real HID + perf + crashes + replays) Maestro's cross-platform position can't match. | -| Well-funded YC competitor launches | Medium (35-45%) | 6-12 months | They have marketing budget; we have receipts. Lock in 3-5 named customer logos by Q3. Win OSS-credibility race. | - -### The position simdrive can credibly own in 2026 - -> **"The MCP-native iOS simulator driver that AI agents and CI use to gate iOS PRs on real-pixel, real-input behavior — the things XCUITest can't see and Maestro can't deeply touch."** - -Everything in product, pricing, and GTM should ladder up to that sentence. - ---- - -## 10. Open decisions — chairman input requested - -These three calls require chairman direction. They reach beyond Atlas's scope. - -| # | Decision | Recommendation | Why it needs chairman | -|---|---|---|---| -| 1 | **Re-cast GOAL-2026-006 ($5K MRR by July) as a portfolio target** rather than a SpecterQA-iOS-standalone target. | Yes. SpecterQA for iOS's role through July is distribution (installs + design-partner LOIs). The proprietary closed-source iOS layer + other revenue lines carry the dollar number. | Changes a chairman directive; Atlas can't unilaterally redefine the goal. | -| 2 | **1.0 timeline: sim-only in 2 weeks (recommended) or include WDA real-device for ~5 weeks.** | Sim-only at 2 weeks. WDA in 1.1. | Affects positioning and the launch-date commitment. | -| ~~3~~ | ~~Procure `simdrive.io` and `simdrive.dev` domains before public launch.~~ | **Resolved 2026-05-01:** `synctek.io` is canonical; optional `simdrive.io` registration only. | — | - ---- - -## 11. 30-day execution priorities - -Concrete, owned, measurable. - -| # | Priority | Owner | Deadline | Done means | -|---|---|---|---|---| -| 1 | Submit simdrive to Anthropic MCP registry, Smithery, modelcontextprotocol/servers PR | GTMAtlas + CodeAtlas | **2026-05-08** | All 3 listings live; URLs logged in INIT-2026-525 | -| 2 | Ship `specterqa-ios` 15.2.1 deprecation + `MIGRATION_FROM_SPECTERQA.md` | CodeAtlas + DeployAtlas | **2026-05-05** | Legacy package banner + redirect live | -| 3 | Recruit 3 design-partner apps for simdrive Cloud beta | Chairman + GTMAtlas | **2026-05-29** | 3 informal LOIs for free 60-day Cloud beta in exchange for monthly feedback | -| 4 | Ship simdrive 1.0 (sim-only) | CodeAtlas + TestAtlas | **2026-05-15** | All 5 must-haves + 3 should-haves landed; STABILITY.md committed; release announced | -| 5 | Auto-generate README tool table from `_TOOLS` | CodeAtlas | **2026-05-08** | Tool table cannot drift again | -| 6 | Publish 3 training-corpus artifacts ("Why we replaced XCTest" essay + SO answer + GH Discussion) | MarketingAtlas | **2026-06-01** | All 3 indexed by Google + linked from simdrive README | -| 7 | Build simdrive Cloud MVP (real-device WDA + hosted CI runner) | CodeAtlas + DeployAtlas | **2026-06-30** | 1 design-partner running daily journeys against Cloud | - ---- - -## 12. The bottom line - -simdrive at 0.3.0a3 is genuinely close. The architecture is right, the killer features are validated by a real paying-attention customer, the test count is healthy, the dogfood loop is exemplary (3 reports → 3 closes in 5 days). Two weeks of focused mostly-S work + one M (network tool) + one written stability commitment gets us to 1.0 stable. The agentic-first distribution play is concrete and dated. The pricing model preserves MIT openness while creating a clean upgrade path to a real revenue stream by October. - -The single biggest risk is the **9-month window for category-definition** before either Anthropic or Maestro closes the gap. Every week of delay narrows it. - -What's needed from the chairman: re-alignment on the July revenue goal (portfolio vs standalone), a yes/no on sim-only-1.0, and a small budget for domain procurement. With those, the 30-day plan executes. - ---- - -*End of plan. Source memos in conversation history (ProductAtlas, MarketingAtlas, GTMAtlas, CompetitiveAtlas).* diff --git a/simdrive/docs/RD_SIMDRIVE_1.0/00_SIMDRIVE_1.0_PLAN.md b/simdrive/docs/RD_SIMDRIVE_1.0/00_SIMDRIVE_1.0_PLAN.md deleted file mode 100644 index 5874ea3..0000000 --- a/simdrive/docs/RD_SIMDRIVE_1.0/00_SIMDRIVE_1.0_PLAN.md +++ /dev/null @@ -1,317 +0,0 @@ -# SimDrive 1.0 Plan-of-Record (v2 — full expansion) - -**Status:** Synthesis of 7 BIS memos + validated-facts source-of-truth -**Date:** 2026-05-02 -**Public brand:** **SimDrive** (SpecterQA rename reverted; brand-asset SVGs back to "sim/drive" wordmark) -**Strategic frame:** Premium-from-day-one with 14-day free trial; expanded 1.0 scope (journey runner + Cloud private API + WDA real-device beta); LapsApp dogfood built alongside; world-class moat roadmap committed -**Decisions needed from chairman:** §11 (6 explicit calls) - -**Source memos** (in `simdrive/docs/RD_SIMDRIVE_1.0/`): -- `00a_VALIDATED_FACTS.md` — source of truth on what exists vs hypothesis -- `01_product_engineering.md` — original product+eng spec (superseded by `05`) -- `02_brand_marketing.md` — brand identity + 10 launch surfaces -- `03_gtm_pricing.md` — agentic-first GTM + premium pricing -- `04_competitive_risk.md` — Maestro/Anthropic/Apple positioning -- `05_engineering_expansion.md` — execution-ready 1.0 build plan (9 components) -- `06_world_class_moat_features.md` — post-1.0 roadmap (5 headline features) -- `07_test_app_spec.md` — LapsApp dogfood platform - ---- - -## 1. Executive summary - -SimDrive 1.0 is a **premium iOS testing tool with a 14-day free trial**, priced at **$49 Solo / $149 Pro / $499 Team / Enterprise sales-led**. The 1.0 scope expands the validated 29-tool MCP surface with five new components: journey runner, license/trial system, WDA real-device input (gated beta), Cloud private API (replay archive), and a hardening pass. Built in parallel: **LapsApp**, an MIT iOS app exercising every SimDrive capability, shipping the same week as 1.0. - -**Engineering ETA:** **10 weeks with 2 engineers** (mid-July) or **~16 weeks with 1 engineer** (late August). The two-engineer path is the only one that meets the chairman's stated July timing. - -**Honest revenue:** $5K MRR by July is **not achievable** as a SimDrive-standalone target under either ETA. Realistic July contribution: **$1.5K-$2.5K** (200 trial activations, 8-12 paying customers); SimDrive standalone $5K MRR lands **October 2026**. Recommend the chairman re-cast July as a portfolio number — same recommendation as the prior PRODUCTIZATION_PLAN. - -**Position SimDrive can credibly own at premium price in 2026:** -*The premium iOS testing tool agents reach for first — journey-driven, MCP-native, iOS-deep where XCUITest fails and Maestro doesn't go.* - -**Top risk under premium positioning:** trial-to-paid conversion below the 5% floor (30-40% likelihood, 60-120 day clock, instrumentable). Anthropic shipping native iOS computer-use is the larger headline threat (35-45%, 9-15 month clock). - -**Post-1.0 moat roadmap:** five features that take SimDrive from "premium SaaS niche" to "category-defining $50K/yr Platform tier" by month 24, anchored by the **Production Session Capture SDK** (the category bet — LogRocket-of-mobile-QA pattern). - ---- - -## 2. The validated foundation — what exists today - -Source of truth: `00a_VALIDATED_FACTS.md`. Every marketing/product claim must trace to a row there. - -| Layer | Validated capability | Code path | Evidence | -|---|---|---|---| -| MCP surface | 29 tools (lifecycle, observe, tap/swipe/type/press/clear, record/replay, perf, diagnostics, robustness, version) | `simdrive/src/specterqa_ios/server.py:_TOOLS` | 91 unit + 26 live E2E pass | -| Vision-first | OCR + Set-of-Mark + stable_id + stable_id_loose + confidence_band | `observe.py` + `som.py` | unit tests + Palace verbatim quote | -| Input | Real UITouch HID via CoreSimulator (iOS 26 TextField focus works) | `simdrive/native/src/simdrive_input.m` | live E2E + Palace verbatim quote | -| Record/replay | YAML + stable_id resolution + SSIM region masking | `recorder.py` | Palace: *"replays now reliable enough to gate PRs on"* | -| Perf + diagnostics | CPU/RSS/threads, footprint, crashes, doctor, app_state, apps | `perf.py` + `diagnostics.py` | unit tests + live smoke | -| Real-device | Read-only (observe + logs + lifecycle) — no input | `device.py` | live test against Maurice's paired iPhones | - -**Customers:** Palace iOS (ThePalaceProject) — migrated off the predecessor in 5 days, three dogfood rounds, all feedback closed, replays now gate their PRs. - -**What does NOT exist today** (and is therefore a 1.0 build-target, not a validated capability): -- Journey runner / `simdrive run --journey` CLI / `simdrive ci` orchestrator -- Persona YAML schema or persona-driven AI prompting -- License key + trial system -- WDA real-device input (only read-only operations work today) -- Cloud private API (no hosted endpoint, no replay archive) - -The plan in §3-§5 builds those. - ---- - -## 3. The expanded 1.0 scope (what we build) - -Per `05_engineering_expansion.md`, nine components ship in 1.0: - -| # | Component | Effort | Why it's in 1.0 | -|---|---|---|---| -| 1 | Journey YAML schema + validator | S | The user-facing surface for premium customers — defines a stable contract | -| 2 | Persona YAML schema + validator | S | Pairs with journeys; injects context into the agent loop | -| 3 | Journey runner core | L | The new headline workflow — orchestrates `observe → AI decide → act → record_step` until success criteria met | -| 4 | License key + trial system | M | Premium pricing requires entitlement; Ed25519 offline-first + weekly refresh | -| 5 | WDA bootstrap CLI | M | `simdrive bootstrap-device ` — clones WDA at pinned SHA, builds with user signing identity, installs | -| 6 | WDA HTTP client | M | Wires `tap/swipe/type/press_key` to WDA when `target=device` | -| 7 | Cloud private API | M | Replay archive endpoint at `forgeos-api.synctek.io/v1/recordings` (or simdrive-api.synctek.io) — Cloudflare R2 storage, license-key bearer auth, per-tier quotas | -| 8 | `simdrive ci` orchestrator | S | Run all journeys, output JUnit XML + corpus | -| 9 | Production hardening pass | M | Error UX audit, structured logging, perf benchmarks, edge-case coverage, docs | - -**WDA scope decision:** ships as **gated beta** in 1.0 (`--device-beta` flag, `experimental` in doctor output). Full parity ships in 1.1. Reasoning: WDA provisioning UX (signing identity, dev-team, cert-trust prompts, DDI mount) has historically eaten 3-5 sessions on top of the pure code estimate. Hedging preserves the calendar without abandoning the directive. - -**29 MCP tools stay user-facing.** Demoted in *docs* (the journey runner is the headline workflow), not in *visibility*. Palace's validation flowed through them directly; hiding them burns evidence before the journey runner has independent proof. - -### 3.1 Two-engineer 10-week calendar (the only path to July) - -| Weeks | Engineer A | Engineer B | Milestone | -|---|---|---|---| -| 1-2 | Components 1+2+3-skeleton | Components 4+5 | Journey schema locked; license skeleton + WDA bootstrap stub | -| 3-4 | Component 3 completion | Components 6+7-skeleton | First end-to-end journey run on TestKitApp; WDA installs on Maurice's iPhone 17 | -| 5-6 | Component 8 + integration | Component 7 completion | `simdrive ci` works; Cloud API serves first design-partner upload | -| 7-8 | Component 9 (hardening) | Component 9 (hardening) | All 9 components green; Palace journey corpus passes | -| 9-10 | Launch readiness | Launch readiness | 1.0 ships | - -With **1 engineer**, the calendar slides to ~16 weeks (late August) — per `01_product_engineering.md §2.4`. Either WDA or Cloud must be deferred to 1.1 to hold 10 weeks. - -### 3.2 Engineering risks (top 3) - -1. **WDA bootstrap UX** — historical 3-5 session overrun on top of the code estimate. Mitigation: gated beta in 1.0; if not installing on Maurice's iPhone 17 Pro Max by week 2 day 4, escalate for scope cut. -2. **Trial Claude-API cost runaway** — $0.90/run × 20/day = $18/day per trialist. Mitigation: server-side $5/day cap during trial. -3. **Journey YAML schema lock-in** — wrong schema in 1.0 breaks every customer's journeys in 1.1. Mitigation: `schema_version: 1` reservation + 2-design-partner draft cycle (Palace + 1 other) before public cut. - ---- - -## 4. The dogfood platform — LapsApp - -Per `07_test_app_spec.md`. **LapsApp** is an MIT-licensed fitness/run-tracking iOS app that exercises every SimDrive capability through realistic flows. Replaces TestKitApp as the canonical demo and dogfood substrate. - -### 4.1 Why LapsApp ships alongside 1.0 (not before, not after) - -- **Before:** drives an unstable engine, journey churn destroys the corpus's marketing value -- **After:** 1.0 launches without a working canonical demo, killing the Show HN moment + product-page hero video -- **Alongside:** tag SimDrive 1.0 + LapsApp v1.0 the same week, both linked from launch announcement, journey corpus already validated on the 1.0 engine for ~2 weeks before public cut - -### 4.2 The 12 feature areas + 20 pre-built journeys - -Each feature area exercises a specific SimDrive capability. Highlights: -- **OAuth login (Sign in with Apple + Google)** — the killer surface; only feature that simultaneously stresses vision-first observe AND iOS-26 UITextField focus (the out-of-process Safari sheet) -- **WebView article reader** — XCTest-blind WKWebView; SimDrive's other killer feature -- **Search + autocomplete + debounced input** — exercises the wait-for-keyboard fix -- **Crash trigger button** — intentional crash → tests `crashes` retrieval -- **Settings (light/dark, push, accessibility)** — exercises `set_appearance` + future a11y audit -- ...8 more - -Some journeys **deliberately fail** (regression journeys catching intentionally-introduced bugs) so the corpus catches real failures, not just records green runs. - -### 4.3 LapsApp build effort - -**14 calendar weeks with 1 engineer.** Built in parallel to SimDrive 1.0 — meets at launch. Dedicated engineer C (separate from engineers A+B on SimDrive). With three engineers running parallel, both ship together at ~week 14. - ---- - -## 5. Brand + marketing - -Per `02_brand_marketing.md`. **Brand-asset reverts already executed** (`logo-primary.svg`, `wordmark-bracket.svg`, `brand/README.md` back to SimDrive wordmark, in commit-pending state). - -### 5.1 Decisions - -- **Public name:** SimDrive (locked) -- **Tagline:** *"Ship iOS releases your agent already tested."* (outcome-first; verb subject is the buyer) -- **Voice:** keep CHANGELOG anti-fluff posture; the *honesty* is the premium hook (every premium surface keeps one footer line of real-tradeoff disclosure) -- **Logo:** pixel-pin mark unchanged (sourced from product — the SoM red the agent already sees) - -### 5.2 Voice resolution (from open-source to premium) - -The CHANGELOG voice is engineer-to-engineer, anti-fluff, earns trust by *under*-selling. Premium positioning typically does the opposite. **Resolution:** keep CHANGELOG vocabulary; move the verb's center of gravity from the maker to the buyer. Open-source voice says "we built." Premium voice says "you ship." Same words, different subject. - -### 5.3 Launch surfaces (drafted in `02_brand_marketing.md`) - -10 production-ready surfaces drafted: synctek.io homepage hero, product page (~600 words), README v2, trial-start CTA (3 variants), pricing page hero, cold email, day-1/4/7/13 trial nurture, post-trial conversion landing, Show HN post (premium variant), Twitter thread. - -All copy follows the "claims trace to validated facts" rule per Maurice's directive — no journey-runner claims as 1.0 features until 1.0 ships. - ---- - -## 6. Pricing + GTM - -Per `03_gtm_pricing.md`. **Tier structure:** - -| Tier | Price | Includes | -|---|---|---| -| **Solo** | **$49/mo** | 1 sim, 1 device, 50 journeys/mo, individual use | -| **Pro** | **$149/mo** | 4 sims, 4 devices, 250 journeys/mo, parallel CI, priority support | -| **Team** | **$499/mo** | 5 seats, 1000 journeys/mo, shared journey corpus, real-device WDA included | -| **Enterprise** | $5-15K/yr | SOC 2, RBAC, SSO, audit logs, on-prem replay storage | - -**Trial:** 14 days, full Pro features, soft 250-run cap, email-only activation (no card), 7-day grace then read-only on day 22, hard-stop on day 30. **Server-side $5/day Claude API cap during trial** (per Engineering A risk #2). - -### 6.1 Path to $5K MRR — honest math - -50 paying customers needed (blended). Funnel at industry-benchmarked rates: ~113K impressions → ~5,650 visits → ~2,260 trial-CTA clicks → ~1,240 trial activations → ~50 paying customers. - -**Channel-throughput reality:** MCP registry + Smithery + dev-advocate channels deliver ~15-25K cumulative impressions in 60 days. **The funnel is undersized by ~5×.** Realistic July 2026: **200 trial activations, 8-12 paying customers, $1.5K-$2.5K MRR.** Realistic SimDrive standalone $5K MRR: **October 2026** with 4-month funnel. - -### 6.2 Distribution channels (agentic-first, premium-adapted) - -- Anthropic MCP registry (premium-product listing with "[paid, free trial]" tag) -- `modelcontextprotocol/servers` PR (paid-tier tag) -- Smithery.ai catalog -- Cline + Cursor MCP marketplaces -- PyPI + GitHub topics -- **Anthropic cookbook PR** — reframed as generic MCP-iOS-driver recipe (not a SimDrive ad); Anthropic policy excludes paid-product examples -- **Dev-advocate complimentary-license channel** — 15 named iOS dev-rel + AI-tool-reviewer accounts get 90-day Pro keys (replaces cookbook PR as primary paid-funnel-driver) -- Training-corpus seeding (3-4 indexable artifacts in 90 days) - ---- - -## 7. Competitive position + moat - -Per `04_competitive_risk.md`. SimDrive lives in **Premium SaaS + AI-native local** — sparsely populated. - -### 7.1 vs Maestro (the hardest competitor) - -Maestro is free OSS + paid Cloud, has journey YAML, vision+AX hybrid, mobile-focused. **Why pay for SimDrive instead of free Maestro?** - -Three honest answers (each with counter-argument): -1. **iOS-26 TextField focus + native HID** — Maestro doesn't have this. Counter: they can fix in 4-8 weeks if motivated. -2. **MCP-native architecture for agent integration** — Maestro's CLI isn't designed for agent-loop composition. Counter: they could ship an MCP wrapper. -3. **Real-device input via WDA bundled in 1.0 (beta)** — competitive vs Maestro Cloud at $99 + BrowserStack at $199. Counter: they already have this on iOS via XCTest. - -**Strategic call: SIDESTEP, not head-on.** Don't try to win generic mobile testing. Claim "the premium iOS testing tool agents reach for first." - -### 7.2 vs Anthropic claude-computer-use (existential) - -claude-computer-use is part of Anthropic Claude Pro ($200/mo). If they ship native iOS sim drive, our $149/mo Pro pitch competes with their $200/mo all-in subscription. - -**Strategic call: LEAN IN with explicit acquisition optionality at months 6-12.** Be the iOS-specific layer Anthropic doesn't bother to build. Make SimDrive's MCP surface what Anthropic's iOS-specific computer-use *would* expose. If they ship, we're the obvious acquihire. - -### 7.3 Moat — the only one that compounds is Cloud - -Single biggest moat-defining bet (per `06_world_class_moat_features.md` §3): **Production Session Capture SDK** (G3) — the category-defining feature; LogRocket-of-mobile-QA pattern; anchors the new $50K/yr Platform tier. Either we're the LogRocket of mobile QA by month 24 or we course-correct — both honest outcomes. - -The 5 headline post-1.0 features (covered in §8 below) layer additional moat on top of the Replay Corpus Cloud foundation that ships in v1.1. - ---- - -## 8. Post-1.0 roadmap — the 5 headline moat features - -Per `06_world_class_moat_features.md`. Each compounds the previous; ranked by build order: - -| # | Feature | Version | Effort | Why it's a moat | -|---|---|---|---|---| -| 1 | **Replay Corpus Cloud** | v1.1 | 8-12 wks post-1.0 | The only compounding moat — switching cost grows linearly with corpus size | -| 2 | **Perf budgets + trend dashboards** | v1.2 | 12-18 wks | Turns SimDrive into a PR-gate signal source XCTest + Maestro both miss | -| 3 | **SOC 2 signed action ledger** | v1.2 (parallel build) | 12-18 wks build, 6 mo cert | Unlocks regulated TAM beyond the agentic-iOS-developer niche | -| 4 | **Crash-report → journey** | v1.3 | 18-26 wks | Most differentiated single feature; nobody ships this in mobile or web | -| 5 | **Production Session Capture SDK** | v1.4 → 2.0 | 26-40 wks | The category bet; anchors $50K/yr Platform tier | - -**Cut entirely:** App-specific test packs (Slack/Notion vetted journey corpora) — derivative-works lawsuit + target-app ToS violation. We'd be the legal target while Maestro shrugs. - ---- - -## 9. The 24-month thesis - -Per `06 §5`: by May 2028, SimDrive has X paid customers each storing Y MB of replay corpus, running Z journeys/day, contributing to a journey marketplace with W cataloged flows — and switching costs are now measured in "months of replay-corpus migration work." Production Session Capture SDK has either turned us into the LogRocket of mobile QA OR we've course-corrected to a $499/mo SaaS niche. Both are credible outcomes; the choice point is whether G3 ships. - ---- - -## 10. Risk register (consolidated across memos) - -| # | Risk | Likelihood | Time-to-impact | Mitigation | -|---|---|---|---|---| -| 1 | Trial-to-paid conversion below 5% floor | 30-40% | 60-120 days | Instrument every funnel stage; hand-hold first 5 conversions; iterate trial UX weekly during launch | -| 2 | Anthropic ships native iOS computer-use | 35-45% | 9-15 mo | Lean in on iOS-specific layer; make MCP surface acquihire-friendly | -| 3 | Apple ships AI test framework Xcode 27 | 15-25% | 12-18 mo (WWDC 2026) | Cross-Apple-version regression + WebView gap focus | -| 4 | Maestro adds MCP wrapper + journey UX | 60-70% | 3-6 mo | Already inevitable; iOS-deep is the differentiator | -| 5 | Premium pricing pushback from individual engineers | 40-50% | 30-60 days | Solo $49 is the answer; if resisted, drop to $29 in 1.1 | -| 6 | WDA provisioning UX kills launch date | 30-40% | 4-8 wks | Gated beta in 1.0, scope-cut escalation by week 2 day 4 | -| 7 | LapsApp build slips past 14 weeks | 30-40% | 4-8 wks | Buffer in estimate; cut 1-2 feature areas if needed | -| 8 | One-engineer-only forces 1.0 to late August | 40%+ | calendar | Hire / contract second engineer NOW | -| 9 | Existing MIT releases (`simdrive 0.3.0a3`) confuse customers about what's free | 50%+ | day 1 | README + product page lead with "1.0 is paid; alpha lineage stays MIT" | -| 10 | Production Session Capture SDK (G3) doesn't ship by month 24 | 50%+ | 24 mo | Explicit course-correct option — fall back to $499/mo SaaS niche; both credible | - ---- - -## 11. Open decisions for the chairman - -| # | Decision | Synthesis recommendation | Why this needs the chairman | -|---|---|---|---| -| 1 | **One engineer or two on SimDrive 1.0** | **Two.** Mid-July ETA depends on it; one engineer slides to late August + forces deferring WDA or Cloud to 1.1. | Hiring/capacity decision. The most consequential call in this plan. | -| 2 | **LapsApp engineer (third headcount) — yes/no** | **Yes.** LapsApp ships alongside 1.0 or there's no demo at launch. 14-week build with 1 dedicated engineer. | Hiring decision. | -| 3 | **Re-cast $5K MRR by July as a portfolio target** | **Yes.** Realistic SimDrive July: $1.5-$2.5K. $5K standalone lands October. | Changes a stated goal; Atlas can't unilaterally redefine. | -| 4 | **WDA gated beta vs full parity in 1.0** | **Gated beta.** Parity blows the calendar by 3-4 weeks for a feature most Solo/Pro buyers won't use month one. | Disagrees with chairman's stated direction. | -| 5 | **MCP primitives stay user-facing or become internal** | **User-facing.** Palace's validation flowed through them; hiding them burns evidence before journey runner has its own proof. | Disagrees with chairman's stated direction. | -| 6 | **Cloud private API in 1.0 or punt to 1.1** | **In 1.0.** Engineering A specs it as 1.0 work (M effort); the only compounding moat must ship at launch even if scoped to 5 design-partner replay archives. | Resource allocation. | - ---- - -## 12. 30 / 60 / 90 day execution priorities - -### Days 0-30 (now → 2026-06-01) -1. **Brand revert committed** — SVGs back to "sim/drive" wordmark (DONE, awaiting commit) -2. **Hiring / engineer staffing** — confirm 2 engineers on SimDrive + 1 on LapsApp -3. **Pricing infrastructure** — Stripe live products for Solo/Pro/Team. License server scaffold. Trial activation flow. -4. **Engineering kickoff** — start Components 1+2+3-skeleton (engineer A) and 4+5 (engineer B). LapsApp engineer starts feature areas 1-3. -5. **Design-partner schema review** — Palace + 1 other for journey YAML schema before lock. - -### Days 30-60 (2026-06-01 → 2026-07-01) -6. **Component 3 (journey runner) completion** — first end-to-end journey on TestKitApp -7. **WDA bootstrap working** — installs on Maurice's iPhone 17 Pro Max -8. **Cloud private API live** — first design-partner replay upload -9. **LapsApp midway** — 6 of 12 feature areas live, half the journey corpus drafted -10. **Marketing surfaces final** — synctek.io product page, README v2, all premium copy - -### Days 60-100 (2026-07-01 → 2026-08-01) -11. **SimDrive 1.0 launch** (target: mid-July with 2 engineers) -12. **LapsApp v1.0** ships same week -13. **Hand-hold first 5 conversions personally** to instrument every funnel leak -14. **Anthropic cookbook PR** — generic MCP-iOS-driver recipe -15. **First paying customer onboarding** - -### Days 100-180 (2026-08-01 → 2026-11-01) -16. **v1.1 — Replay Corpus Cloud** (the only compounding moat) -17. **v1.2 build start — Perf budgets + SOC 2 ledger** (parallel) -18. **First Cloud SOC 2 audit kicked off** (6-month cert clock) -19. **Course-correct on funnel data** — if conversion <3%, reduce Solo to $29 + revisit pricing - ---- - -## 13. Bottom line - -SimDrive 1.0 is a real product that can credibly charge premium pricing — IF we accept the 10-week build with two engineers, the gated-beta WDA scope, the LapsApp dogfood platform shipping in parallel, and the honest revenue path (October MRR not July). - -The plan is internally consistent. Every claim in marketing copy traces to either (a) what's already validated in `00a_VALIDATED_FACTS.md`, (b) what's specifically planned in `05_engineering_expansion.md`, or (c) what's explicitly labeled post-1.0 in `06_world_class_moat_features.md`. - -What kills the plan: -- **One engineer instead of two** — 1.0 slides 6 weeks -- **No LapsApp engineer** — 1.0 launches without a demo -- **Trial-to-paid below 5%** — funnel collapses; revenue path slides another 3-6 months -- **WDA goes 8 weeks instead of 4** — 1.0 ships without real-device input or slips - -Most of those are mitigated. The hiring decision is the single biggest one. - -The headline ask of the chairman: **align on three engineers (2 on SimDrive + 1 on LapsApp) for the next 14 weeks**, plus the realignment of July as a portfolio number rather than a SimDrive standalone target. With those, the 30/60/90 plan executes. - ---- - -*End of plan-of-record. Updated 2026-05-02. Source memos in `simdrive/docs/RD_SIMDRIVE_1.0/`.* diff --git a/simdrive/docs/RD_SIMDRIVE_1.0/00a_VALIDATED_FACTS.md b/simdrive/docs/RD_SIMDRIVE_1.0/00a_VALIDATED_FACTS.md deleted file mode 100644 index eabb079..0000000 --- a/simdrive/docs/RD_SIMDRIVE_1.0/00a_VALIDATED_FACTS.md +++ /dev/null @@ -1,122 +0,0 @@ -# SimDrive — What's Actually Validated - -**Status:** Source of truth. Every product/marketing claim must trace to a row here. -**Date:** 2026-05-02 -**Rule:** Validated = code path exists + tests pass + Palace dogfooded it. Anything else is hypothesis or aspiration. - ---- - -## A. Code paths that exist + test coverage - -Each row is a real surface in the SimDrive codebase, with file refs and the test count that exercises it. - -| Capability | Code path | Tests | -|---|---|---| -| 29-tool MCP surface | `simdrive/src/specterqa_ios/server.py:_TOOLS` | 91 unit tests pass — every tool's schema + handler | -| Vision-first observe (OCR + Set-of-Mark + stable_id + stable_id_loose + confidence_band) | `simdrive/src/specterqa_ios/observe.py` + `som.py` | unit tests for stable_id, stable_id_loose bucketing, find_by_text alias whitelist, confidence dictionary gating | -| Real UITouch HID injection on iOS 26 (CoreSimulator HID port + Indigo wire format) | `simdrive/native/src/simdrive_input.m` + `hid_inject.py` | live E2E test against iOS 26.3 + iPhone 17 Pro sim verifies UITextField focus | -| Tap / swipe / type_text / press_key / clear_field | `simdrive/src/specterqa_ios/act.py` + `server.py:tool_*` | unit + live E2E tests | -| `type_text` injection_method + dispatch_succeeded fields | `server.py:tool_type_text` (HID-aware return shape) | unit test | -| Record + replay with stable_id resolution + SSIM region masking | `simdrive/src/specterqa_ios/recorder.py` | unit tests for stable_id replay fallback, SSIM masking compute, halt context | -| Performance snapshots (CPU%, RSS, threads, footprint) | `simdrive/src/specterqa_ios/perf.py` | unit tests for snapshot, baseline, compare severity bands; live smoke shows real RSS sampling | -| Crash retrieval + diagnostics (doctor, app_state, apps, crashes) | `simdrive/src/specterqa_ios/diagnostics.py` | unit tests for each tool | -| Robustness helpers (alerts, permissions, appearance, sheets, replays) | `simdrive/src/specterqa_ios/robustness.py` | unit tests; the 1-in-4 alert race re-observe loop has its own test | -| Real-device discovery + logs + app lifecycle (read-only) | `simdrive/src/specterqa_ios/device.py` | unit tests; live smoke against Maurice's iPhone 17 Pro Max | -| Stale-MCP version-drift detection | `server.py:_check_version_drift` | unit + live (caught real drift in smoke) | -| `version` MCP tool | `server.py:tool_version` | unit + live | -| Recording metadata (simdrive_version, app_version, screenshot_size_pixels, tags, created_by_session) | `recorder.py:Recorder.finalize` | unit + live (Preferences app_version returned "1353.3.2" live) | - -**Test totals:** 91 unit + 26 live E2E against TestKitApp. All passing on the latest code. - ---- - -## B. What Palace actually validated (per dogfood reports) - -Three written dogfood reports from Palace (Maurice Carrier, ThePalaceProject): - -1. `~/Downloads/SIMDRIVE_DOGFOOD_2026_04_29.md` — v0.1.0a1 dogfood -2. `~/Downloads/SIMDRIVE_v0.2.0a1_DOGFOOD.md` — cutover report -3. `~/Downloads/dogfood.rtf` — v0.3.0a2 maintainer report - -**Validated capabilities (Palace exercised them, reported outcomes, gave testimony):** - -| Capability | Palace's verbatim or near-verbatim line | Source | -|---|---|---| -| Replaces predecessor as canonical iOS sim driver | *"simdrive 0.2.0a1 is a meaningful step forward and is now the canonical iOS sim driver for Palace iOS development, replacing SpecterQA."* | v0.2.0a1 dogfood | -| Vision-first navigation via stable_id | *"`tap stable_id="ccac001882f0"` opened Dobbs v. Jackson detail page. Title, cover, Borrow button all OCR'd cleanly."* | v0.2.0a1 dogfood | -| iOS-26 UITextField focus with type_text | *"The single biggest reason SpecterQA was failing — the cliclick path that broke UITextField focus — is fully fixed."* | v0.2.0a1 dogfood | -| Record + replay reliability for PR gating | *"Replays are now reliable enough to gate PRs on."* | v0.2.0a1 dogfood | -| stable_id durability across observes | *"`tap stable_id="a229e82e3f00"` is robust even when the mark's index reshuffles between observes — but not 100%: small bbox shifts can rebucket."* | v0.2.0a1 dogfood | -| Real-device session attach + observe + logs | *"Real-device sessions support observe + logs + app lifecycle today."* | v0.2.0a1 dogfood | -| 5-day cutover from predecessor | three dogfood rounds across 2026-04-29 → 2026-05-01, all feedback closed | dogfood timeline | -| Perf cached-RSS bug FIXED in 0.3.0a2 | *"#4 perf cached RSS — confirmed real fresh sampling on 0.3.0a2 (426.98 → 543.30 MB after a real catalog load, severity:high)."* | v0.3.0a2 dogfood | - -**Specific use cases Palace named that work today:** -- Catalog → book detail navigation -- Tab bar tour (multiple tabs via stable_id) -- Search field focus + type → results render -- Record `tab-bar-tour` then replay with SSIM 0.999/step -- Real-device discovery enumerating Maurice's paired iPad + 2 iPhones - ---- - -## C. What's borrowed from SpecterQA-browser, NOT validated for SimDrive - -**Critical:** the 1.0 R&D synthesis I just wrote borrowed concepts wholesale from the predecessor `specterqa` 0.4.0 browser product (`/products/specterqa/` on synctek.io). These are **hypotheses** for SimDrive, not validated features. - -| Concept borrowed from SpecterQA browser | SimDrive validation | Status | -|---|---|---| -| Personas in YAML (role, technical_comfort, goals, frustrations) | Never tested in SimDrive. No persona code exists. Palace never wrote one. | **HYPOTHESIS** | -| Journeys as YAML sequences of goals | Never tested. No journey runner exists. Palace's "journeys" are MCP tool sequences they drive directly. | **HYPOTHESIS** | -| `simdrive run --journey ` CLI | Doesn't exist. Palace never asked for it. | **DOES NOT EXIST** | -| AI driving the app "like a real user would" via persona prompts | Doesn't exist as a feature. Today the agent (Claude/Cursor/whatever) drives directly via MCP tools — there's no SimDrive-side persona injection. | **DOES NOT EXIST** | -| Per-persona observation outputs | Doesn't exist. SimDrive outputs sidecar JSON per-observe, not per-persona. | **DOES NOT EXIST** | -| `simdrive ci` orchestrator | Doesn't exist. Palace orchestrates via their own `scripts/simdrive-regress.sh`. | **DOES NOT EXIST** | - -**These are not bad ideas — they may be great 1.x or 2.x features. But they are SpecterQA-browser concepts, not SimDrive validations.** Marketing the journey-driven flow as a 1.0 SimDrive feature would be making a claim we cannot back with code or customer testimony. - ---- - -## D. What's hypothesis (untested in market) - -These are reasonable bets but not validated: - -| Claim | Status | -|---|---| -| SimDrive can sustain $49/$149/$499 premium pricing | Hypothesis. No price-sensitivity testing yet. | -| 14-day free trial converts to paid at 4-5% | Industry benchmark, not SimDrive-specific. | -| Agentic-first GTM (MCP registry, awesome-mcp, training-corpus) drives ~15-25K impressions | Estimate from comparable launches. No SimDrive history. | -| iOS engineers will pay for what Maestro offers free | Hypothesis. Depends on the iOS-deep + agent-loop differentiator surviving real evaluation. | -| Real-device WDA bootstrap takes 3-5 sessions | Estimate from `REAL_DEVICE_FEASIBILITY.md`. Not yet built. | -| Journey-driven flow is the right premium product shape | **Strong hypothesis. The synthesis assumed this; it is not validated.** | - ---- - -## E. The validated-only 1.0 product surface - -If we ship **only what's validated**, the SimDrive 1.0 product is: - -- **The 29-tool MCP server.** What Palace dogfooded. -- **Record + replay with stable_id + SSIM masking.** What Palace gates PRs on. -- **Real UITouch HID on iOS 26 simulators.** The killer feature Palace named. -- **Real-device read-only (observe + logs + lifecycle).** Already shipping. -- **License/trial system + premium pricing.** New, but it's commerce infrastructure, not unvalidated product features. - -What's **out** of validated 1.0: -- Personas / journeys YAML (SpecterQA-browser concept, untested in SimDrive) -- `simdrive run --journey` CLI (doesn't exist) -- Real-device input via WDA (not validated; gated beta or 1.1) -- Cloud / hosted replay archive (doesn't exist) -- Persona-driven AI prompting (doesn't exist) - ---- - -## F. The rule going forward - -Every marketing claim, every product-page bullet, every CHANGELOG entry, every Show HN line must trace to a row in §A or §B. If it traces to §C or §D, label it explicitly as "1.x roadmap" or "design hypothesis" — never as a 1.0 feature. - -The R&D synthesis (`00_SIMDRIVE_1.0_PLAN.md`) needs revision against this rule. See §12 of that document for the revision note. - ---- - -*This document supersedes any conflicting claim in the four R&D memos. If a memo says SimDrive ships journey-driven 1.0 and this document says it doesn't, this document wins.* diff --git a/simdrive/docs/RD_SIMDRIVE_1.0/01_product_engineering.md b/simdrive/docs/RD_SIMDRIVE_1.0/01_product_engineering.md deleted file mode 100644 index c7362f1..0000000 --- a/simdrive/docs/RD_SIMDRIVE_1.0/01_product_engineering.md +++ /dev/null @@ -1,469 +0,0 @@ -# SimDrive 1.0 — Product Surface + Engineering Plan - -**Author:** ProductEngineeringAtlas -**Date:** 2026-04-29 -**Status:** R&D memo, BIS feasibility round -**Supersedes:** `docs/PRODUCTIZATION_PLAN.md` (sim-only-1.0 framing — withdrawn) - -This memo is the product+engineering half of the SimDrive 1.0 BIS round. The -strategic frame: **public brand reverts to SimDrive**, the SpecterQA-iOS rename -is thrown away, and 1.0 ships as a **premium-from-day-one product with a free -trial**, not as MIT-engine + paid-cloud. The current 29 MCP tools become -*internal primitives* under a new journey-driven user-facing layer that -mirrors the original `synctek.io/products/specterqa/` browser product but for -iOS and works against simulators **and** physical devices. - -The 0.1.0a1–0.3.0a3 PyPI history under the `simdrive` name stays for -reproducibility; 1.0 ships under a new license decided in the PricingAtlas -memo. - ---- - -# §1. Product surface — the journey-driven layer - -The 29 MCP tools today (`server.py:788-1278`) speak in the agent's vocabulary: -`tap`, `swipe`, `observe`, `perf`. That's the right shape for an *engine*, the -wrong shape for a *product*. A user installing SimDrive 1.0 should describe -**who** is using their app and **what they're trying to do**, not script a -sequence of taps. The journey-driven layer is that user-facing surface. - -## §1.1 Persona spec - -Personas describe the kind of user driving the app. They're consumed by the -journey runner, which compiles them into the system prompt sent to Claude on -every observe→decide→act loop iteration. A persona is a YAML file under -`.simdrive/personas/.yaml`. - -### Schema - -```yaml -# .simdrive/personas/.yaml -schema_version: 1 # required; integer -name: string # required; human-readable -role: string # required; one-line job description -technical_comfort: low | medium | high # required; affects retry tolerance -patience: low | medium | high # required; affects timeout tolerance -goals: # required; list of strings - - "..." -frustrations: # required; list of strings - - "..." -accessibility_needs: # optional; list of strings - - large_text - - voiceover - - reduce_motion -device_profile: # optional; informs target selection - prefers: simulator | device - os_floor: "16.0" # min OS the persona is on -notes: string # optional; freeform context for the LLM -``` - -### Real examples - -```yaml -# .simdrive/personas/first_time_reader.yaml -schema_version: 1 -name: First-time reader -role: New library patron picking up a digital book -technical_comfort: low -patience: medium -goals: - - find a book by title - - sign in with library card - - read the first page within 60 seconds of opening the app -frustrations: - - modals that interrupt the reading flow - - sign-in screens that lose context after a back-button - - bookmarks that don't sync -accessibility_needs: - - large_text -notes: | - This persona is the public-library acquisition path. They are not technical. - If a screen is ambiguous they will tap the most prominent button. If they - hit two consecutive errors they give up and close the app — the runner - should treat that as a journey failure, not a retry signal. -``` - -```yaml -# .simdrive/personas/power_user.yaml -schema_version: 1 -name: Power user -role: Returning user with 50+ bookmarks across 3 devices -technical_comfort: high -patience: high -goals: - - sync bookmarks across devices in under 5 seconds - - search local catalog without network - - export annotations to a file -frustrations: - - search results ranked by recency instead of relevance - - any flow requiring more than 3 taps to reach a saved book -device_profile: - prefers: device - os_floor: "18.0" -``` - -```yaml -# .simdrive/personas/recovery_user.yaml -schema_version: 1 -name: Recovery mode user -role: User whose previous session crashed mid-checkout -technical_comfort: medium -patience: low -goals: - - confirm the previous transaction did not double-charge - - resume the in-progress checkout -frustrations: - - apps that lose cart state on relaunch - - error toasts that disappear before they can be read -notes: | - Drives error-path coverage. The runner should *expect* the app to be in a - partially-corrupted state at journey start (use `preconditions.app_state: - recovering`) and verify the recovery UX, not the happy path. -``` - -## §1.2 Journey spec - -Journeys are sequences of goals tied to a persona and a target. Stored at -`.simdrive/journeys/.yaml`. A journey is replayable when the runner -finalizes a recording bound to its `replay_id`. - -### Schema - -```yaml -# .simdrive/journeys/.yaml -schema_version: 1 -name: string # required -persona: string # required; persona slug -target: simulator | device # required -device_selector: # required-when-device - udid: "..." # one of udid OR name - name: "iPhone 17 Pro" - os_version: "26.0" -preconditions: # optional - app_bundle_id: string # required-when-set - app_state: clean | recovering | logged_in | offline - pre_grant_permissions: [location, camera, photos] - appearance: light | dark - set_clock_to: ISO-8601 string # for time-sensitive flows -goals: # required; ordered list - - "..." -success_criteria: # required; ordered list of asserts - - text_visible: "..." - - screen_matches: - - perf_under: { cpu_pct: 25, memory_mb: 200 } - - no_crash: true -budget: # optional - max_steps: int # default 30 - max_seconds: int # default 180 - max_llm_calls: int # default 40 -replay_id: string # auto-assigned on first finalize -tags: [smoke, ci, p0] # optional; for CI filtering -``` - -### Real examples - -```yaml -# .simdrive/journeys/sign_in_first_page.yaml -schema_version: 1 -name: Sign-in then read first page -persona: first_time_reader -target: simulator -device_selector: - name: iPhone 17 Pro - os_version: "26.0" -preconditions: - app_bundle_id: org.thepalaceproject.palace - app_state: clean - pre_grant_permissions: [location] -goals: - - sign in with provided library card credentials - - find "The Great Gatsby" in the catalog - - open the book and reach the first page of content -success_criteria: - - text_visible: "Chapter 1" - - no_crash: true - - perf_under: { cpu_pct: 30 } -budget: - max_steps: 18 - max_seconds: 120 -tags: [smoke, p0] -``` - -```yaml -# .simdrive/journeys/bookmark_sync.yaml -schema_version: 1 -name: Bookmark a book and verify cross-device sync -persona: power_user -target: device -device_selector: - udid: "00008150-00142D540A87801C" # Moes Max, iPhone 17 Pro Max -preconditions: - app_bundle_id: org.thepalaceproject.palace - app_state: logged_in -goals: - - bookmark page 12 of "Frankenstein" - - foreground the iPad paired to the same account - - verify the bookmark appears on the iPad within 5 seconds -success_criteria: - - text_visible: "Page 12" - - cross_device_state_matches: { device: "00008112-000C50CE1A08C01E", screen: bookmarks } -budget: - max_steps: 12 - max_seconds: 60 -tags: [p1, multi_device] -``` - -## §1.3 CLI surface - -Commands the user actually types. The CLI is the front door; the MCP server -becomes one of several execution targets. - -| Command | Purpose | First shipped | -|---|---|---| -| `simdrive init` | Scaffold `.simdrive/` (personas/, journeys/, replays/, .gitignore) with two starter personas + one starter journey + a `simdrive.toml` config | 1.0 | -| `simdrive doctor` | Env readiness check — wraps existing `doctor` MCP tool plus license + WDA bootstrap status | exists in MCP, exposed to CLI in 1.0 | -| `simdrive validate` | Schema-validate every `personas/*.yaml` and `journeys/*.yaml`; non-zero exit on first failure | 1.0 | -| `simdrive run --journey ` | Execute one journey, stream agent thoughts to stderr, emit JSON summary on stdout | 1.0 | -| `simdrive ci` | Run every journey tagged `ci` (or `--tag `); emit JUnit XML + replay corpus + summary JSON | 1.0 | -| `simdrive replay ` | Re-run a saved YAML+PNG recording deterministically (no LLM calls), report SSIM drift | exists in MCP, exposed to CLI in 1.0 | -| `simdrive bootstrap-device ` | Clone WDA at pinned SHA, build with user signing identity, install to device | 1.0 | -| `simdrive trial start` | Start the 14-day free trial; writes `~/.simdrive/license.json` with trial key + expiry | 1.0 | -| `simdrive license activate ` | Bind a paid license; supersedes trial | 1.0 | -| `simdrive license status` | Print `{state, expires_at, seats}` | 1.0 | -| `simdrive serve` | Start the underlying MCP server (legacy entry point — the same binary `simdrive 0.3.0a3` shipped) | exists, kept for back-compat | - -## §1.4 MCP tool surface in 1.0 - -**Recommendation: keep all 29 tools MCP-callable. Demote them to "internal / -power-user" status in the docs. Do not break the surface.** - -Trade-off honestly: - -- **Pro of demoting (hiding) the tools:** simpler product story ("write a - journey, not a script"), reduces support surface, lets us evolve internals - without versioning agonies. -- **Pro of keeping (what I recommend):** Palace dogfood proved the MCP surface - has organic agent demand outside our journey runner — Atlas drives Palace - iOS today via raw `tap`/`observe` MCP calls and that's the load-bearing - validation that this whole product works. Killing that pathway in 1.0 burns - the strongest piece of dogfood evidence we have. Plus, a journey runner that - internally calls its own public MCP tools is identical in cost to one that - calls private functions; the public-API tax is near zero. - -What changes is **positioning**, not surface area: - -- README leads with `simdrive run --journey ...`, not with the tool list. -- The MCP tools are documented under `docs/internal-primitives.md` with a one- - line heading: *"Use these directly only when the journey layer doesn't fit - your problem."* -- The `STABILITY.md` (still required for 1.0 — see §2.2) declares the - journey-spec YAML as the **stable user-facing contract**; the MCP surface is - declared "stable but power-user", and reserves a smaller break window. - -## §1.5 Output format - -Each `simdrive run` produces a single artifact directory under -`.simdrive/runs/-/`: - -``` -.simdrive/runs/sign_in_first_page-20260601T103015Z/ - summary.json # machine-consumable, see schema below - summary.md # human-readable - junit.xml # only when invoked from `simdrive ci` - recording.yaml # the replay artifact (existing format) - screenshots/ # one PNG + sidecar JSON per observe step - step_001.png - step_001.json - ... - perf/ # one snapshot per perf checkpoint - baseline.json - end.json - compare.json - crashes/ # any .ips files written during the run - agent_trace.jsonl # one line per LLM call: {step, prompt_tokens, completion_tokens, cost_usd, decision} -``` - -`summary.json` schema: - -```json -{ - "schema_version": 1, - "journey": "sign_in_first_page", - "persona": "first_time_reader", - "target": "simulator", - "device": {"udid": "...", "name": "iPhone 17 Pro", "os_version": "26.0"}, - "started_at": "2026-06-01T10:30:15Z", - "ended_at": "2026-06-01T10:32:08Z", - "duration_seconds": 113, - "outcome": "passed | failed | budget_exceeded | crashed | error", - "steps_executed": 14, - "llm_calls": 18, - "llm_cost_usd": 0.041, - "success_criteria": [ - {"criterion": "text_visible: Chapter 1", "passed": true}, - {"criterion": "no_crash", "passed": true} - ], - "observations": ["..."], - "bugs_filed": ["BUG-2026-001"], - "ux_issues": ["modal blocked the back gesture for ~800ms"], - "replay_id": "rep_20260601_103015" -} -``` - -JUnit XML maps each journey to a ``, the `outcome` to pass/fail/skip, -and `agent_trace.jsonl` to attached system-out. Standard CI integrations -(GitHub Actions, GitLab CI, CircleCI) consume this directly without -adapters. - ---- - -# §2. Engineering plan — current state → 1.0 - -## §2.1 Capability gap analysis - -State today is `simdrive 0.3.0a3` (PyPI), 4,118 LOC Python + ~600 LOC ObjC HID -helper, 117 tests (91 unit + 26 live). Scored against 1.0 readiness: - -| Axis | Today | Target | Gap | -|---|---|---|---| -| **MCP tool primitives** (29 tools) | 4 | 5 | 1 known stability bug (`perf` stale cache, `PRODUCTIZATION_PLAN.md:80`); a `type_text` async-focus race; otherwise solid | -| **Journey orchestration layer** | 1 | 5 | Does not exist. The legacy `src/specterqa/ios/som_runner.py:374` had `run_journey` but its journey spec is scenario-shaped, not persona+goal-shaped | -| **Persona-driven AI behavior** | 1 | 5 | Does not exist. Each MCP call carries no persona context | -| **Real-device input via WDA** | 1 | 5 | Does not exist. Read-only device backend exists (observe + logs + lifecycle, `device.py`); input raises `device_input_unavailable` per `REAL_DEVICE_FEASIBILITY.md:64` | -| **License/trial/entitlement system** | 1 | 4 | Does not exist anywhere in `simdrive/`. Legacy `src/specterqa/ios/license/` exists but is unrelated infrastructure | -| **CLI command surface** | 2 | 5 | Today: `simdrive`, `simdrive --version`, `simdrive --help` only (see CHANGELOG 0.2.0a2). Need 10 subcommands | -| **CI integration (JUnit XML)** | 1 | 5 | Does not exist | -| **Recording-to-replay format** | 4 | 5 | Exists and is solid (`recorder.py`, SSIM-masked, stable_id-resolved). Needs persona+journey wrapping | -| **Documentation** | 2 | 4 | CHANGELOG is current; `LIMITATIONS.md` + `BEST_PRACTICES.md` first-pass; README still says "12 tools" per `PRODUCTIZATION_PLAN.md:44`; no journey cookbook | -| **Test coverage** | 4 | 4 | 117 tests good for primitives; need ~30 journey-level integration tests for 1.0 | - -## §2.2 Engineering work breakdown - -Effort: **S** = ≤2d, **M** = 3-5d, **L** = 1-2 weeks, **XL** = 2-4 weeks. All -estimates are engineer-weeks of one focused person. - -| # | Item | Effort | Depends on | -|---|---|---|---| -| 1 | **Persona + journey YAML schema + validators** — pydantic models, `simdrive validate`, schema-version field with forward-compat reservation | S | — | -| 2 | **Journey runner** — orchestration layer that loops `observe → assemble persona-aware prompt → Claude vision call → translate decision to act tool → observe again` until goals met or budget exhausted. Reuses MCP tools as in-process function calls (skip the JSON-RPC round-trip when running in-process; `from specterqa_ios.server import tool_tap`); falls through to MCP when target is the user's external agent. | L | 1 | -| 3 | **Persona-driven prompting** — system prompt assembly module that injects persona role/goals/frustrations/accessibility_needs into every Claude call; emits a stable trace for cost auditing | M | 2 | -| 4 | **Success-criteria evaluator** — `text_visible`, `screen_matches: `, `perf_under`, `no_crash`, `cross_device_state_matches`. Each evaluator wraps an existing MCP tool | M | 2 | -| 5 | **CLI scaffold (`simdrive init`, `validate`, `run`, `ci`, `doctor`, `replay`)** — click-based, mirrors `src/specterqa/ios/cli/commands.py:23-40` for layout but in the new `simdrive/cli/` package | M | 2, 4 | -| 6 | **JUnit XML + summary.json emitter** — wraps run output | S | 5 | -| 7 | **WDA bootstrap CLI (`simdrive bootstrap-device`)** — clone WDA at pinned SHA, build with user's signing identity (`xcodebuild -derivedDataPath ... build-for-testing`), install to device, leave bundle ready. The provisioning UX is the killer; per `REAL_DEVICE_FEASIBILITY.md:34`, signing-identity discovery + dev-team selection + cert-trust prompts ate ~3-5 sessions in past projects. Budget for that pain explicitly. | L | — | -| 8 | **WDA HTTP client + dispatcher** — wires `tap`/`swipe`/`type_text`/`press_key` to WDA REST endpoints when `target=device`. Replaces the `device_input_unavailable` raise with a real path | M | 7 | -| 9 | **License server (minimal)** — `POST /trials` (issue 14-day key), `POST /licenses/activate` (validate paid), `GET /licenses/` (status). FastAPI on Railway. ForgeOS-hosted is also viable. ~300 LOC | M | — | -| 10 | **Trial flow + entitlement gate** — `simdrive trial start`, `license activate`, `license status`. CLI bootstrap reads `~/.simdrive/license.json`, checks expiry on every `simdrive run`/`ci` call, gracefully degrades to `simdrive validate` + `simdrive doctor` only when expired. Offline grace: 7 days after last successful server contact | S | 9 | -| 11 | **`perf` stale-cache fix** (carried over from 1.0 must-have list) | S | — | -| 12 | **`type_text` async-focus race fix** (carried over) | S | — | -| 13 | **README rewrite + journey cookbook** — README leads with the journey shape, not the tool table; cookbook has 5 worked examples | M | 5 | -| 14 | **Journey-level integration tests** (~30 tests against TestKitApp covering happy path, budget exhaustion, crash mid-journey, cross-device sync, success-criteria evaluators) | M | 2, 4 | -| 15 | **Telemetry hook (opt-in, off by default)** — emits `{event, journey_slug_hash, outcome, llm_cost, duration}` to a SimDrive-hosted endpoint. Privacy posture: no screenshots, no app names, journey-slug hashed | S design + M impl | 9 | -| 16 | **Cloud-hosted replay archive** | L | 9 | - -Total budget for 1.0 essentials (items 1-14, excluding 15 and 16): -≈ 1S + 1L + 1M + 1M + 1M + 1S + 1L + 1M + 1M + 1S + 1S + 1S + 1M + 1M -= 3S + 5M + 2L = roughly **8.5 engineer-weeks**. - -## §2.3 Recommended 1.0 scope vs deferred - -To justify premium pricing on day one, 1.0 **must ship**: - -1. The journey-driven layer (items 1, 2, 3, 4) — without it, "premium" is just - a license fence around the alpha tool surface -2. The CLI surface (item 5) — `pip install` + `simdrive run --journey foo` is - the demo -3. JUnit + JSON output (item 6) — CI integration is table stakes -4. WDA real-device input (items 7, 8) — chairman directive: real-device must - ship in 1.0 for premium pricing -5. License + trial (items 9, 10) — the conversion mechanism -6. The two open `0.3.0a3` quality bugs (items 11, 12) -7. README + cookbook (item 13) and journey-level tests (item 14) - -**Defer to 1.1:** - -- **Cloud-hosted replay archive** (item 16) — local-first 1.0; cloud as the - Pro/Team upsell in 1.1 -- **Telemetry** (item 15) — opt-in is fine but dropping it from 1.0 saves a - privacy-policy review cycle -- **Multi-sim parallelism license enforcement** — single-seat-runs-one-sim - for 1.0; parallelism in a Pro tier later -- **`accessibility_audit`, `webview_elements`** — XCTest-bridge-blocked, cut - per the prior plan and not undone here -- **`network` MCP tool** (was deferred from 0.3.0a1) — defer again. Premium - buyers won't notice in 1.0; will notice if everything else feels rushed - -## §2.4 ETA from 0.3.0a3 → 1.0 - -**Honest estimate: 10-12 calendar weeks of one focused engineer**, or ≈8.5 -engineer-weeks of pure work + ~30% slack for the WDA provisioning pain (which -the `PRODUCTIZATION_PLAN.md` explicitly under-budgeted last cycle) + pricing / -license / trial design loops with PricingAtlas + the round-trip on copy and -positioning with MarketingAtlas. - -This is **5-6× the prior plan's** "2 weeks sim-only 1.0" because the prior plan -deliberately deferred WDA, journey layer, and the license system. None of -those are deferrable here. - -Calendar: today (2026-04-29) → **mid-July 2026** for a credible 1.0 release. -That puts a paid-trial-converting product on PyPI roughly aligned with the -chairman's $5K-MRR-by-July target — but **only if** journey-runner work -(item 2) starts immediately and runs in parallel with WDA bootstrap (item 7), -which means two engineers, not one. With one engineer, ETA slips to late -August. - -## §2.5 Top 3 risks - -1. **WDA provisioning UX is the swamp.** Past projects ate 3-5 sessions on - signing-identity discovery, dev-team selection, cert-trust prompts, DDI - mounting. The `REAL_DEVICE_FEASIBILITY.md:34` "3-5 days" estimate is the - pure code; the UX-glue work is on top. **Mitigation:** build `simdrive - bootstrap-device` on Maurice's three test devices (`REAL_DEVICE_FEASIBILITY.md:50`) - first; document every prompt the user sees; ship with two pre-recorded - bootstrap GIFs (USB and wireless); commit to a 30-minute first-device - experience or revisit positioning. -2. **Journey runner cost spirals.** Each step is a Claude vision call; a - 30-step journey with retries can be 60+ vision calls. At ~$0.015 per - sonnet vision call that's $0.90/journey-run. A user running 20 journeys/day - spends $18/day on Claude. Easy to underprice the trial. - **Mitigation:** publish per-journey cost in `summary.json` (already in - schema above); ship `budget.max_llm_calls` as a hard ceiling (default 40); - in trial, cap total Claude spend per account at $5/day server-side. -3. **The journey YAML schema is wrong on the first try.** This is the only - surface we're calling "stable" in 1.0. If we get persona fields wrong, or - miss a critical success-criterion type, we break everyone's journeys in - 1.1. **Mitigation:** schema-version every YAML file, ship a forward-compat - layer (`schema_version: 1` reserved through 1.x), draft v1 with two design - partners (Palace + one TBD) before the public 1.0 cut, and treat journey - YAML the same way we'll treat the MCP tool surface — minor-cycle - deprecation rules. - ---- - -## Where I disagree with the chairman's direction - -Two items to flag for synthesis: - -1. **Real-device input in 1.0 is the single biggest schedule risk and I'd - suggest a hedged plan, not a hard "must ship".** The directive is right - that premium pricing demands real-device — but the WDA provisioning swamp - has eaten past projects. Concrete recommendation: ship 1.0 with - real-device input as a **gated beta** (license-flag `realdevice: beta`, - warning banner in `simdrive doctor`, written one-pager on known issues), - not as a stability-equivalent feature. That keeps the launch date, - preserves the premium-pricing story (real-device is *available*, just - labeled), and gives us a 1.1 graduation milestone. If the chairman wants - it un-flagged in 1.0, add 3-4 weeks to the calendar. -2. **The 29 MCP tools should remain user-facing, not be made internal.** The - chairman's framing (MCP tools become *internal* in 1.0) is the right - product-marketing instinct but the wrong code-architecture call. Demote - them in *documentation*, not in *visibility*. Reasoning: Palace dogfood is - today's only paying-attention validation, and Palace consumes the MCP - tools directly through Atlas, not through `simdrive run`. Hiding the tools - in 1.0 risks burning that loop before the journey runner has its own - independent dogfood. Keep the tools public; make the docs lead with - journeys. - -Everything else in the chairman's framing — premium-from-day-one, journey- -driven layer, real-device target, `simdrive` brand revert — is well-founded -and the plan above ladders up to it. diff --git a/simdrive/docs/RD_SIMDRIVE_1.0/02_brand_marketing.md b/simdrive/docs/RD_SIMDRIVE_1.0/02_brand_marketing.md deleted file mode 100644 index 6e46658..0000000 --- a/simdrive/docs/RD_SIMDRIVE_1.0/02_brand_marketing.md +++ /dev/null @@ -1,388 +0,0 @@ -# SimDrive 1.0 — Brand & Marketing - -**Author:** MarketingAtlas (BIS R&D) -**Date:** 2026-04-29 -**Scope:** Brand identity refresh, asset revert checklist, premium-positioning copy package for SimDrive 1.0 launch. -**Status:** Draft for BIS review. - ---- - -## §1. Brand identity refresh - -### Name (locked) -**SimDrive.** The codename is the brand. No internal/external split. The earlier rename to SpecterQA-for-iOS is reverted; "SpecterQA" refers exclusively to the predecessor browser-automation product (now archive). Any time we need to disambiguate in copy, the predecessor is **"the SpecterQA browser product"** or **"the SpecterQA browser archive"** — never bare "SpecterQA." - -### Tagline — three options for premium positioning, ranked - -| # | Tagline | Why it fits premium | -|---|---|---| -| **1** | **Ship iOS releases your agent already tested.** | Outcome-first, not mechanic-first. Earns the price by tying the product to the release decision — the moment a $X/mo invoice gets approved. Implies SimDrive is on the critical path, not a tool you bolt on. | -| **2** | **The iOS test runner your agent operates.** | Names the buyer's mental category ("test runner") then claims the agentic delta. Shorter, more declarative. Reads as a product, not a hobby. | -| **3** | **iOS QA that closes the WebView and OAuth gaps.** | Specificity. The two coverage holes Palace named are the two holes every iOS team has. Trades brand-feel for buyer-pain matching — the kind of line that survives a forwarded Slack message. | - -The current open-source line — *"Hand your iOS simulator to your agent"* — is good for an MIT pitch ("look at the cool mechanic"). Premium buyers aren't paying to admire a mechanic; they're paying to stop spending Sunday afternoons debugging XCUITest. Tagline #1 is the recommended primary; #3 is the alternate for buyer-aware surfaces (cold email subject lines, paid search). - -### Positioning statement -**SimDrive is the premium iOS test runner that lets your AI agent drive the simulator and connected devices through journeys you author once and replay on every PR — covering the WebView, OAuth, and SwiftUI surfaces XCUITest can't reach.** - -### Buyer persona -The user is an iOS engineer. The **buyer** is one tier up: an **iOS Engineering Manager or Director of Mobile Engineering** at a 20-200-engineer mobile org, accountable for release velocity and on-call burden. They have an existing manual-QA bill (offshore vendor, in-house QA pod, or engineer time) somewhere between $15K and $80K/quarter, and they have at least one critical user flow — usually auth or content — that XCUITest can't cover. They've already sanctioned a Claude/Cursor/Copilot license for the team, so "AI agents" is a green field, not a ledge to talk them off. They approve $249/mo or $5K-$15K/yr without a procurement process; anything beyond that needs a champion-paved path. - -Decision drivers, in order: (1) does it close the coverage gap I'm getting paged about, (2) can I stand it up in a sprint, (3) does it survive the next iOS beta, (4) what does the buy/build math look like vs three engineer-weeks of XCUITest plumbing. - -### Voice — five rules - -1. **Outcome before mechanic.** Lead with what the buyer gets (one fewer release fire, PR-gating you can trust). Mechanic comes second, as proof. -2. **Numbers, not adverbs.** "5-day cutover, 3 dogfood rounds, all feedback closed" beats "blazing-fast iteration." This rule survives from the CHANGELOG voice. -3. **Sentences over paragraphs; tables when comparing.** Premium copy is not longer copy. It is *denser* copy. -4. **Premium tone, not sales tone.** No "revolutionary," "seamless," "next-generation," "unleash," "supercharge." Allowed: "earned," "covered," "gates," "ships," "verified," "drift-gated." -5. **One footer line of honesty per surface.** Every premium page calls out one real tradeoff (macOS-only, sim-first in v1.0, real-device input via WDA in v1.1). The honesty is the hook — it tells the buyer the page wasn't written by sales. - -The shift from the open-source CHANGELOG voice: same vocabulary, same anti-fluff posture, but the verbs change. Open-source voice says "we built." Premium voice says "you ship." The center of gravity moves from the maker to the buyer. - ---- - -## §2. Brand asset revert checklist - -The pixel-pin mark is correct and stays. The wordmark text is what reverts. - -| File | Change | Detail | -|---|---|---| -| `docs/brand/logo-primary.svg` | Edit `` block | `SpecterQA` → `simdrive`. Letter-spacing + font stack stay. Update `aria-label`, ``, `<desc>` to "SimDrive" / "simdrive." | -| `docs/brand/wordmark-bracket.svg` | Edit `<text>` content | `[specterqa_]` → `[simdrive_]`. Update `aria-label`, `<title>`, `<desc>`. | -| `docs/brand/logo-mark-only.svg` | No edit | Pure pixel-pin mark, no text. | -| `docs/brand/favicon.svg` | No edit | Pure mark, no text. | -| `docs/brand/README.md` | Full rewrite | Replace every "SpecterQA" with "SimDrive." The "Internal codename" parenthetical is removed entirely (no codename split). The bracket-fallback example is `[simdrive_]`. The "two weights: 600 for Specter, 400 for QA" line becomes "two weights: 600 for `sim`, 400 for `drive`." | -| `docs/marketing/synctek_product_page.md` | Bulk find/replace + restructure | All "SpecterQA" → "SimDrive." Where the file currently says "the predecessor" referring to the old browser product, change to **"the SpecterQA browser product"** (one occurrence in the case-study link block). The `pip install` becomes `pip install simdrive`. The `.mcp.json` command becomes `simdrive`. | -| `docs/marketing/case_study_palace.md` | Bulk find/replace | "SpecterQA" → "SimDrive" everywhere except the historical references to "the predecessor" — those become **"the SpecterQA browser product"** to disambiguate. The package versions (`specterqa-ios 0.2.0a1` → `simdrive 0.2.0a1`) revert to their `simdrive` namespace. | -| `docs/marketing/show_hn_post.md` | Rewrite per §4.9 | The premium variant is materially different from the open-source Show HN; do not bulk find/replace, replace whole. | -| `docs/marketing/twitter_launch_thread.md` | Rewrite per §4.10 | Same — premium thread is structurally different. | -| `docs/marketing/why_we_built_specterqa.md` | Rename + edit | New filename `why_we_built_simdrive.md`. Body keeps the founder essay structure but every "SpecterQA" → "SimDrive," and every "predecessor" referring to the browser product → "the SpecterQA browser product." | -| `docs/marketing/README_v2.md` | Bulk find/replace + tagline swap | "SpecterQA" → "SimDrive" everywhere. Hero tagline `> **Hand your iOS simulator to your agent.**` → `> **Ship iOS releases your agent already tested.**` (Tagline #1). | -| `docs/marketing/pypi_long_description.md` | Bulk find/replace + tagline swap | Same pattern as README_v2. | -| `CHANGELOG.md` v17.0.0a1 entry | Add a v17.0.0a2 entry, do **not** rewrite history | A new entry on top: "Brand revert. The 17.0.0a1 rename to `specterqa-ios` is reversed. Public package returns to `simdrive`; the `specterqa-ios` name lives on as a deprecation alias that re-exports `simdrive`. Same code, fourth name." This preserves the historical record while reverting the public surface. | - -**Disambiguation rule** for any copy that touches both products: SimDrive is **the iOS product**. The browser product is **"the SpecterQA browser product"** (full phrase, on first reference; "the SpecterQA browser archive" works after that). Never bare "SpecterQA" — that's the ambiguous form that caused this whole rename loop. - -**Sed safety note.** Bulk find/replace on `SpecterQA → SimDrive` will catch the disambiguation phrases incorrectly. Recommended workflow: (1) first pass replaces `the SpecterQA browser` with a sentinel like `__BROWSER_PRODUCT__`, (2) second pass replaces all remaining `SpecterQA` with `SimDrive`, (3) third pass replaces the sentinel back. Same pattern for `specterqa` (lowercase) → `simdrive`. - ---- - -## §3. Premium positioning vs open-source positioning - -The product is the same. The pitch is not. - -| Dimension | Open-source pitch | Premium pitch (SimDrive 1.0) | -|---|---|---| -| **Headline verb** | "Use," "try," "explore" | "Ship," "gate," "cover" | -| **Center of gravity** | The mechanic ("look at this elegant HID path") | The outcome ("releases your agent already tested") | -| **Social proof** | GitHub stars, MCP-registry inclusion | Named customer logo, 5-day cutover, "reliable enough to gate PRs on" | -| **Urgency mechanic** | "Beta — feedback wanted" | "14-day free trial — full feature access, no credit card" | -| **Risk language** | "MIT, fork it, commit back" | "Free trial removes the risk; cancel before day 14, never billed" | -| **Pricing language** | "Free engine, paid Pro tier" | "Pricing reflects value; ROI math in §6 below" | -| **Tradeoff disclosure** | A "Honest tradeoffs" section, technical | A "What we don't do yet" section, buyer-decision-relevant | -| **CTA** | `pip install` | "Start your 14-day trial" → in-app install + license key flow | -| **Voice register** | Engineer-to-engineer, scrappy | Engineer-to-engineering-manager, calibrated | - -**The single biggest psychological shift:** open-source pitches ask the reader to *invest effort* (clone, install, contribute). Premium pitches ask the reader to *evaluate a decision* (trial, decide, expense). Effort is free; a decision has political cost. So premium copy must remove decision friction at every turn — by being specific about the buyer's pain, by anchoring trial-start at zero risk, and by giving the champion the language they need to defend the line item. - ---- - -## §4. Launch surfaces — production copy - -### §4.1 Homepage hero on synctek.io (~80 words) - -> **Ship iOS releases your agent already tested.** -> -> SimDrive is the premium iOS test runner your AI agent drives directly. It covers the surfaces XCUITest can't reach — Reader2 inside `WKWebView`, OAuth and SAML auth sheets, SwiftUI text input on iOS 26 — and gates them on every PR via SSIM-thresholded replays. Stand it up in a sprint. Run it free for 14 days. -> -> **[Start your 14-day trial →]** [See pricing] [Read the Palace case study] - -### §4.2 SimDrive product page on synctek.io (~600 words) - ---- - -# SimDrive - -**Ship iOS releases your agent already tested.** - -``` -[Start your 14-day trial →] [Read the docs] [Talk to sales] -``` - -## Overview - -SimDrive is the iOS test runner your AI agent operates. Your agent calls `observe`, gets back a screenshot plus an annotated copy with every text region marked, picks a target by `text` or `stable_id`, and SimDrive dispatches a real `UITouch` through `CoreSimulator`'s HID port. There is no XCTest, no accessibility-tree query, no Swift runner that breaks on the next Xcode beta. The vision-capable model is the selector engine; SimDrive is the dispatch layer it operates through. - -You author **journeys** in YAML — the same model the SpecterQA browser product made famous, applied to iOS. A journey is a sequence of agent-driven steps with SSIM-gated assertions; SimDrive replays them on every PR and fails the build on visual drift. Your agent writes the journey once. Your CI runs it every commit. - -## Features - -| Capability | What you get | -|---|---| -| **Vision-first observe** | Screenshot + annotated PNG + `marks[]` array with `stable_id` for every detected text region. The agent never has to compute pixels. | -| **Real `UITouch` HID dispatch** | The bundled native helper drives the simulator through `SimDeviceLegacyHIDClient` — the path that triggers `UITextField` first-responder on iOS 26. The regression that broke XCUITest is fixed. | -| **YAML journeys + SSIM replay** | Author a journey once; replay on every PR with per-step SSIM drift gating and `mask_regions` for dynamic chrome. | -| **Connected device coverage** | Real-iPhone observe, logs, and lifecycle today; full input via WebDriverAgent in v1.1. Authored journeys run sim-first, then promote to device with no rewrite. | -| **Performance regression gates** | `perf_baseline` + `perf_compare` give per-axis CPU / memory / thread deltas with severity grading. No XCTest bridge. | -| **Crash and diagnostics retrieval** | `.ips` reports filtered by session-start time; environment readiness check via `doctor`. | - -## Quickstart - -1. Start your trial. You receive an install command and a license key by email. -2. `pip install simdrive` and add to `.mcp.json`: - ```json - { "mcpServers": { "simdrive": { "command": "simdrive" } } } - ``` -3. Restart Claude Code. Ask your agent: *open Settings on iPhone 17 Pro and turn on Airplane Mode.* -4. The agent calls `session_start` → `observe` → `tap({text: "Airplane Mode"})` → `observe` to confirm. - -Your first journey is one prompt away. Stand up your first PR-gated journey by end of week. - -## Pricing - -| Tier | Price | Includes | -|---|---|---| -| **Trial** | Free, 14 days | Full feature access, simulator + read-only device, journey authoring, replay | -| **Pro** | $49 / month / seat | Sim + read-only device, hosted replay archive, SSIM-trend dashboards, priority support | -| **Team** | $249 / month for 5 seats | Pro + shared journey corpus + CI integration (`--simdrive` PR-gate flag) + Slack/Linear hooks + real-device input via WebDriverAgent | -| **Enterprise** | $5K-$15K / year | Team + SSO + SOC 2 + RBAC + audit logs + on-prem replay storage | - -## What we don't do yet - -- **Real-device input ships in v1.1.** `observe`, `logs`, and lifecycle work against connected devices today; `tap` / `swipe` / `type_text` raise `device_input_unavailable` until the WebDriverAgent bridge lands. Real-device input is gated to the Team tier. -- **macOS-only.** The HID helper talks to `CoreSimulator`. There is no Linux or Windows path planned. -- **Not an XCTest accessibility-audit replacement.** If you need a11y conformance certification, run XCTest in parallel — SimDrive is the agent-driven layer, not the compliance layer. - -## Documentation - -- **[Quickstart](/docs/simdrive/quickstart)** — install, license, first journey -- **[Journey authoring guide](/docs/simdrive/journeys)** — the YAML model and persona patterns -- **[Limitations](/docs/simdrive/limitations)** — Dynamic Island modals, MFA hard-wall, OCR on stylized art -- **[Best practices](/docs/simdrive/best-practices)** — HID + debounce-window rule, SSIM mask conventions -- **[Changelog](/docs/simdrive/changelog)** — every change, with the why - -## Support - -- **Trial / sales** — [contact@synctek.io](mailto:contact@synctek.io) -- **Pro & Team support** — in-app, 1-business-day SLA -- **Enterprise** — named CSM, 4-business-hour SLA -- **Security disclosures** — [security@synctek.io](mailto:security@synctek.io) - -## Related posts - -- [Why we built SimDrive](/blog/why-we-built-simdrive) — the founder essay on the iOS 26 `UITextField` regression and the agent-first pivot -- [Case study: Palace iOS migrated in 5 days](/blog/case-study-palace-simdrive) — Reader2 + OAuth coverage that XCTest couldn't reach -- [How premium iOS QA earns its line item](/blog/simdrive-roi) — the buy/build math vs three weeks of XCUITest plumbing - ---- - -### §4.3 README hero (~150 words) - -``` -<p align="center"> - <img src="docs/brand/logo-primary.svg" alt="SimDrive" width="480"/> -</p> -``` - -# SimDrive - -> **Ship iOS releases your agent already tested.** - -SimDrive is the premium iOS test runner an AI agent operates directly. Author journeys in YAML, run them sim-first, gate PRs on SSIM-thresholded replays, promote to connected device when v1.1 lands WebDriverAgent. The 29 MCP tools — vision-first observe, real `UITouch` dispatch through `SimDeviceLegacyHIDClient`, perf snapshots, crash retrieval, drift-gated replay — are what your agent uses to operate the simulator. The journey YAML is what your team authors and reviews. - -Built for iOS engineering managers evaluating agentic QA against a manual-QA bill. Stand up your first PR-gated journey in a sprint. Free for 14 days; full feature access, no credit card. Cancel before day 14, never billed. - -```bash -pip install simdrive -``` - -[Start your 14-day trial →](https://synctek.io/simdrive/trial) · [Read the docs](https://synctek.io/docs/simdrive) · [Case study: Palace iOS](https://synctek.io/blog/case-study-palace-simdrive) - ---- - -### §4.4 Trial-start CTA — three variants to A/B - -| Variant | Sentence | Button | -|---|---|---| -| **A — Outcome** | Your AI agent already drafts code, reviews PRs, and writes tests. Let it run them too. | `Start your 14-day trial →` | -| **B — Math** | Three weeks of XCUITest plumbing, or 14 days to find out if SimDrive replaces it. | `Start your free trial →` | -| **C — Risk-removal** | Full feature access. Fourteen days. No credit card. Cancel anytime; you're never billed. | `Start trial — no card →` | - -Recommended starter: **B**. The buyer's mental ledger is already running the buy/build comparison; B names it explicitly. - ---- - -### §4.5 Pricing page hero (~100 words) - -> **Pricing reflects what SimDrive replaces.** -> -> Most teams evaluating SimDrive are paying somewhere between $15K and $80K per quarter for manual iOS QA — vendor invoices, in-house headcount, or engineering hours that should be shipping features. A Team subscription at $249 / month covers five seats, real-device input, CI integration, and the `--simdrive` PR-gate flag. The math is straightforward. -> -> Start free. Try every feature for 14 days. If the math doesn't work for your team, cancel before day 14 and you're never billed. -> -> **[Start your 14-day trial →]** - ---- - -### §4.6 Cold email to a target buyer (200 words) - -**Subject:** iOS QA that closes the WebView and OAuth gaps - -Hi {first_name}, - -I'm Maurice — I run SyncTek. We make SimDrive, an iOS test runner an AI agent operates directly. I'm reaching out because {company} ships a {WKWebView-heavy / OAuth-heavy / SwiftUI-heavy} iOS app, and the public release notes from your last two cycles mention coverage gaps that look a lot like what XCUITest can't reach. - -ThePalaceProject — public-library reading client, Readium 3.x in `WKWebView`, OAuth/SAML auth — migrated their iOS test driver to SimDrive in 5 days. Their engineering lead, three rounds of dogfood feedback in: - -> "Replays are now reliable enough to gate PRs on." - -The mechanic is simple: your agent calls `observe`, sees the screenshot, picks a target by visible text or `stable_id`, and SimDrive dispatches a real `UITouch` through `CoreSimulator`. No XCTest, no Swift runner that breaks on the next Xcode beta, no accessibility tree. - -A 14-day trial gives your team full feature access — sim + read-only device, journey authoring, SSIM-gated replay, the works. No credit card; cancel before day 14, never billed. - -Would 20 minutes next week make sense? I can demo against your build, or if you'd prefer, here's the trial link: synctek.io/simdrive/trial - -— Maurice - ---- - -### §4.7 Trial conversion email sequence (5 emails, day 1 / 4 / 7 / 11 / 13) - -**Day 1 — Welcome + first journey (under 100 words)** - -> **Subject:** SimDrive trial — your first journey in 20 minutes -> -> {first_name}, your trial is live. Here's the fastest path to value: pick one user flow XCUITest doesn't cover today (auth, search, paywall, reader). Ask your agent to drive it once — `session_start` → `observe` → tap your way through. Then `record_start` and do it again; you've got a replayable journey. Reply if you hit anything weird; we read every email. - -**Day 4 — The case study (under 80 words)** - -> **Subject:** How Palace migrated in 5 days -> -> Three dogfood rounds, all feedback closed. Five days, sim driver fully cut over. The flow that finally proved it: a `WKWebView` reading regression XCUITest couldn't see, gated on a SimDrive replay. Full case study: synctek.io/blog/case-study-palace-simdrive. Worth 4 minutes if you're evaluating. - -**Day 7 — Halfway check (under 100 words)** - -> **Subject:** Halfway through — what does the math look like? -> -> {first_name}, you're halfway. Quick check: what does your team spend per quarter on manual iOS QA today? Vendor, headcount, or engineering hours all count. A Team subscription at $249 / month replaces the bottom of that ledger for five seats — sim + real-device input + CI integration + Slack hooks. If the math doesn't work, cancel anytime; you're never billed. Reply with your number and I'll show you the comparison spreadsheet we share with directors. - -**Day 11 — Hands-on offer (under 60 words)** - -> **Subject:** Want a live walk-through? -> -> Three days left in your trial. If you'd rather see SimDrive run against {company}'s app live than figure it out solo, I have 30-minute slots Tue / Thu this week. I'll drive against your build over screen-share, no slides. Reply with a time. - -**Day 13 — Last day (under 80 words)** - -> **Subject:** Trial ends tomorrow — three options -> -> Your trial ends in 24 hours. (1) Convert to Pro at $49 / seat / month — keeps your journeys, hosted replays, dashboards. (2) Convert to Team at $249 / month for 5 seats — adds CI integration and real-device input when v1.1 ships. (3) Let it expire — your data stays for 30 days, journeys stay yours under MIT-compatible terms. No automatic charge either way. Pick at synctek.io/simdrive/billing. - ---- - -### §4.8 Post-trial conversion landing page (day 14) - -> **Your SimDrive trial is complete.** -> -> Your journeys, recordings, and session data are preserved for 30 days. Pick up where you left off any time before {expiry_date}. -> -> | | | -> |---|---| -> | **Continue with Pro — $49 / seat / month** | Hosted replay archive, SSIM-trend dashboards, priority support, signed builds. | -> | **Continue with Team — $249 / month for 5 seats** | Pro + shared journey corpus + CI integration + Slack/Linear hooks + real-device input via WebDriverAgent (v1.1). | -> | **Talk to sales for Enterprise** | SSO, SOC 2, RBAC, audit logs, on-prem replay storage. | -> -> **[Continue with Pro]** **[Continue with Team]** **[Talk to sales]** -> -> *Not ready? That's fine. Your data is here when you are.* - -Soft-CTA tone, no urgency mechanics, no countdown clock. The buyer didn't fail; the trial completed. Premium products don't pressure. - ---- - -### §4.9 Show HN — premium product variant (~350 words) - -**Show HN: SimDrive – iOS test runner your agent operates (paid, 14-day free trial)** - -Hi HN. I'm Maurice. I want to be upfront before the comment thread spins up: SimDrive is a paid product. Trial is 14 days, full feature access, no credit card. I'm posting it on Show HN anyway because the mechanic is unusual enough that the engineers in this audience are the ones who'll know whether it's right. - -The product is an iOS test runner an AI agent operates. The agent calls `observe`, gets a screenshot plus an annotated copy with numbered red boxes over every OCR'd text region, picks a target by visible text or `stable_id`, and SimDrive dispatches a real `UITouch` through `CoreSimulator`'s HID port using `SimDeviceLegacyHIDClient` + `IndigoMessage`. No XCTest, no Swift runner, no accessibility tree. 29 MCP tools. - -The user surface is journeys in YAML — same model my earlier browser-automation product (the SpecterQA browser archive) shipped, now applied to iOS sims and connected devices. Your agent authors a journey, your team reviews the YAML, your CI replays it on every PR with SSIM drift gating. - -Why I'm charging instead of going OSS: the engineering surface (real `UITouch` on iOS 26, perf, crashes, replay drift, real-device WDA path) is iOS-specific and deep. I want to fund a small team that survives the next three Xcode betas without choosing between paying rent and shipping fixes. The honest version: this is the kind of tool that gets abandoned when an OSS maintainer's day job changes. I'd rather it not be that. - -Receipt: ThePalaceProject (public-library reading client, Readium 3.x in `WKWebView`, OAuth/SAML auth) cut over their iOS test driver in 5 days. Three dogfood rounds, all feedback closed. Their lead engineer: - -> "Replays are now reliable enough to gate PRs on." - -Honest limits: macOS-only because `CoreSimulator` only exists there. Sim-first in v1.0; real-device input is read-only until v1.1 lands the WDA bridge. Not an XCTest replacement for accessibility audits. - -Trial: synctek.io/simdrive/trial. Pricing: synctek.io/simdrive/pricing. Happy to answer technical questions, especially about the HID path or why I went paid instead of MIT. - ---- - -### §4.10 Twitter/X launch thread (7 tweets) - -**1/** Shipping SimDrive today. The iOS test runner your AI agent operates directly. Premium product, 14-day free trial. No credit card. - -`pip install simdrive` — synctek.io/simdrive/trial - -**2/** The mechanic in one line: your agent looks at a screenshot, picks a target by visible text, SimDrive dispatches a real UITouch through CoreSimulator's HID port. - -No XCTest. No Swift runner. No accessibility tree. The model is the selector engine; SimDrive is the dispatch layer. - -**3/** Why this works now: vision-capable models removed the selector bottleneck. Mobile QA has spent a decade teaching machines to find buttons. The machine can already see the screen. - -The selector layer migrated into the model. The runtime just dispatches. - -**4/** Why paid instead of OSS: real `UITouch` on iOS 26, perf, crashes, replay drift, the WDA bridge — this is iOS-deep work that survives the next three Xcode betas only if a small team can fund the surface. Premium pricing funds the surface. Trial removes the buyer's risk. - -**5/** Receipt: ThePalaceProject — public-library reading client, Readium 3.x WKWebView, OAuth/SAML auth — cut over their iOS test driver in 5 days. Three dogfood rounds, all feedback closed. - -> "Replays are now reliable enough to gate PRs on." — Maurice Carrier, ThePalaceProject - -**6/** What you author is a journey: a YAML file your agent drafts and your team reviews. CI replays it on every PR with SSIM-thresholded drift gating. PR-gating on visual regression isn't BrowserStack-priced anymore. - -**7/** Pricing: $49/seat for Pro, $249/mo for 5-seat Team (with CI integration + real-device input via WDA in v1.1), Enterprise sales-led. - -Trial: synctek.io/simdrive/trial — full feature access, 14 days, no card. Cancel before day 14, never billed. - ---- - -## §5. Competitive narrative - -### vs Maestro (free / freemium) -Maestro is a great cross-platform tool with an installed base, a CLI, and Android coverage SimDrive deliberately doesn't pursue. SimDrive's argument against Maestro is iOS-deep: real `UITouch` on iOS 26, native HID via `SimDeviceLegacyHIDClient`, perf and crash retrieval, SSIM drift gating, and the journey-driven authoring model — the things that take an iOS-specialist team a year to build well. If your bottleneck is iOS-specific coverage on a flagship app where one bad release costs a quarter of velocity, the iOS-deep tool earns its line item against the cross-platform free tier. - -### vs XCUITest (free, Apple) -XCUITest is free but structurally cannot see `WKWebView` content, cannot drive out-of-process Safari sheets, broke `UITextField` first-responder on iOS 26, and requires a Swift runner that re-breaks on every Xcode beta. SimDrive replaces what XCUITest can't reach (WebView, OAuth/SAML, SwiftUI text input on 26) and complements what XCUITest is still good at (accessibility-conformance audits, which are explicitly out of SimDrive's scope). The framing is co-existence: XCUITest for compliance, SimDrive for coverage and PR-gating. - -### vs hand-rolled Claude computer-use -Claude's computer-use API can drive a simulator screenshot-by-screenshot, and a sufficiently determined team can rebuild SimDrive's surface on top of it. The realistic estimate from our own codebase is 6-9 months of focused iOS-specialist work to replicate native HID, sim session lifecycle, `simctl` integration, log tail, crash retrieval, perf, recording with SSIM drift gating, and `stable_id` resolution. SimDrive's price point at the Team tier is what that team would cost in a sprint of payroll. The buy/build math favors buy until the org's iOS roadmap is large enough to fund a permanent QA-tooling team. - ---- - -## §6. Three testimonial-grade quotes - -All three sourced from `SIMDRIVE_v0.2.0a1_DOGFOOD.md` (Palace iOS, Maurice Carrier). Adapted for premium-buyer use — the engine-mechanic praise is set aside in favor of value, time, and gating reliability. - -> **"Replays are now reliable enough to gate PRs on."** -> — *Maurice Carrier, ThePalaceProject (Palace iOS)* - -The single line that names the buyer outcome. Use as the primary pull-quote on the homepage hero, the case-study TL;DR, and the cold-email body. - -> **"SimDrive is now the canonical iOS sim driver for Palace iOS development. The single biggest reason the predecessor was failing — the path that broke `UITextField` focus — is fully fixed."** -> — *Maurice Carrier, ThePalaceProject* - -The "we made the cut" line. Use on the product page Features section as the closing testimonial; use in cold-email follow-ups when the buyer asks "but does it actually replace what we have?" - -> **"5 days, 3 dogfood rounds, all feedback closed. Three flows that were structurally untestable under XCUITest — Reader2 inside `WKWebView`, OAuth/SAML auth via Safari sheets, and iOS 26 `UITextField` regression coverage — are now automatable."** -> — *Maurice Carrier, ThePalaceProject (paraphrased from the case-study TL;DR + cutover summary)* - -The time-and-coverage line. Use in pricing-page hero, in the day-7 trial email, and in the Show HN comment thread when the inevitable "is this real?" question lands. - ---- - -*End of brand & marketing memo.* diff --git a/simdrive/docs/RD_SIMDRIVE_1.0/03_gtm_pricing.md b/simdrive/docs/RD_SIMDRIVE_1.0/03_gtm_pricing.md deleted file mode 100644 index 0dbf57f..0000000 --- a/simdrive/docs/RD_SIMDRIVE_1.0/03_gtm_pricing.md +++ /dev/null @@ -1,278 +0,0 @@ -# SimDrive 1.0 — Agentic-First GTM and Premium Pricing - -**Author:** GTMPricingAtlas -**Date:** 2026-04-29 -**Status:** R&D memo for BIS synthesis on SimDrive 1.0 -**Frame:** premium-from-day-one with a free trial; no MIT engine; agentic-first distribution - -This memo supersedes §6 (channels) and §8 (pricing) of `PRODUCTIZATION_PLAN.md` where the previous open-core plan conflicts with the chairman's premium-from-day-one direction. Brand and CLI naming revert to **SimDrive**. Existing PyPI releases `simdrive 0.1.0a1 → 0.3.0a3` (MIT) remain as historical artifacts — 1.0 ships under a new commercial license at a new track. - ---- - -## §1. Pricing model — premium-from-day-one - -Three tier structures evaluated. The product is a CLI + MCP server driving iOS sims and physical devices through journey-driven YAML over 29 primitives — single-developer-shaped, but with obvious team and CI value. - -### Option A — Per-seat (JetBrains, GitHub Copilot model) - -- **One price, no usage caps:** `$59 / seat / month` annual, `$69 / seat / month` monthly. -- **Trial:** 14 days, full features, no card required. -- **Pros:** predictable revenue, predictable buyer mental model, no "did I run too many journeys" anxiety. Annual billing maps cleanly to engineering tool budgets. License math is trivially defensible against Copilot ($19) and JetBrains ($24) — we are 2-3x because we replace a higher-friction job (iOS QA flake debugging, ~4 hours/week) and address a smaller market. -- **Cons:** undermonetizes high-usage CI accounts (a team running 10K journeys/night pays the same as a team running 100). No natural upgrade path inside the tier — once you have the seat, there is nothing to upsell except more seats. - -### Option B — Usage-based (Datadog, BrowserStack App Automate model) - -- **`$0.15 / journey-run`** with a $99/month minimum commitment that includes 660 runs. -- **Trial:** 100 free runs across 14 days. -- **Pros:** revenue scales with value delivered. CI-heavy buyers pay more, hobbyists pay less. Aligns the vendor's incentive with reliability — every flaky run we cause costs us reputational margin against a metered bill the customer can audit. -- **Cons:** unfamiliar pricing for a developer-tool buyer who has been trained on per-seat by Copilot and JetBrains. Forces the buyer to forecast usage (notoriously poor at it) before committing, which slows trial-to-paid conversion. Datadog's reputation for surprise bills is now a category headwind, not a tailwind. - -### Option C — Tiered with usage caps (Cypress Cloud, Maestro Cloud model) - -- **Solo:** `$49 / month` — 1 seat, 1 sim, 1 device, 50 journey-runs/month, 7-day replay retention. -- **Pro:** `$149 / month` — 5 seats, 4 sims, 4 devices, 500 journey-runs/month, 30-day replay retention, CI integration, Slack/Linear hooks. -- **Team:** `$499 / month` — unlimited seats, parallel CI runners, 5,000 journey-runs/month, 90-day retention, shared journey corpus, priority support. -- **Enterprise:** sales-led, $7,500–$25,000 / year — SSO, RBAC, SOC 2, audit logs, on-prem option, custom SLA. -- **Trial:** 14 days unlimited (Pro tier features). -- **Pros:** matches both how the product is used (1-engineer side project → CI suite → org-wide platform) and how comparable dev tools price (Cypress: $75 → $300 → $999; Maestro Cloud: $99 → $499 → enterprise). Three legible upgrade triggers (more seats, more runs, real-device CI). Annual contracts for Team+ are natural. -- **Cons:** four-tier matrix is more to explain on a pricing page than one-line per-seat. Usage caps create support tickets at month-end ("we ran out of runs, why is the CLI failing"). Mitigated by soft-cap behavior (overage at $0.20/run, no hard stop). - -### Recommendation: **Option C — Tiered with usage caps, Pro at $149/month as the headline.** - -Why Option C over A: the product has three buyers with measurably different willingness-to-pay (solo iOS engineer, mid-size team with CI, large org with compliance), and per-seat collapses that into one signal. Option C captures it. - -Why Option C over B: a developer tool with 0 paying customers today cannot afford the conversion drag of unfamiliar pricing. Per-seat tiers with usage caps is the dominant pattern in the comparable set (Cypress Cloud, Maestro Cloud, Vercel, Linear, PostHog). Buyers know how to evaluate it in 90 seconds. - -**Why $149/month for Pro is defensible:** -- **Above Maestro Cloud entry ($99):** justified by MCP-native integration, real UITouch on iOS 26, and the journey-driven flow over 29 primitives — capabilities Maestro does not match on the iOS-deep dimension. -- **Below BrowserStack App Automate entry ($199):** BrowserStack runs real-device cloud farms; we run local sim + customer-provided device, so our infra cost is materially lower and we should not pretend otherwise on price. -- **3x Cypress Cloud Solo ($75) but with 5 seats included** — buyer sees seat-equivalency at $30/seat which lands below Copilot Business ($19) plus the QA-tool premium. -- **Solo at $49** matches the prior plan's "undercut Maestro at $49" reasoning and serves the design-partner cohort coming out of Palace dogfood. - -The pricing page in `simdrive/docs/gtm/pricing_page.md` should be rewritten against this structure (current draft references the open-core Engine, which does not exist in the 1.0 frame). - ---- - -## §2. Free trial mechanics - -- **Duration: 14 days.** 7 days is too short — iOS engineers ship on weekly cycles and cannot evaluate a CI gate inside a single sprint. 30 days is too long — our trial-to-paid signal slips by half a month, and the "I'll get to it next week" decay is real. 14 days is the JetBrains, Linear, Vercel, and Cypress default for a reason. -- **Trial gating: full Pro-tier features, soft usage cap of 250 journey-runs.** Feature-limited trials underdemo a tool whose value lives in the integration with CI and Slack. Hard-capping runs at 100 (Option B's idea) starves CI-curious buyers. 250 runs is enough for one engineer to wire a real PR-gate and watch it for a week. -- **Trial activation: email + CLI key, no credit card.** Card-required reduces top-of-funnel by ~60% (Stripe's published benchmark) and we cannot afford that compression at our funnel size. Email-only trades short-term funnel for long-term quality (some triallers will not convert, but the agentic-distribution channels we depend on are mostly developer-quality already). Activation flow: developer runs `simdrive trial start --email me@example.com`, receives signed key by email, paste-into-CLI, 14-day clock starts. -- **Post-trial (day 15): grace period 7 days, then read-only.** Hard CLI stop is hostile and breaks running CI suites — bad word-of-mouth. Read-only means observe/screenshot/report still work but `tap`, `type_text`, `swipe`, and `record/replay` return `license_required`. Customer can finish current debugging session, then upgrade or walk away. -- **Re-trial policy: no.** One trial per email + per machine fingerprint (CLI binds the trial key to the machine on activation). If a customer wants a longer evaluation, they can extend by emailing support — judgment call by Maurice. Standard policy across Cypress, Linear, JetBrains. -- **Realistic conversion benchmark: 3–5% from trial-start to paid.** Comparables: JetBrains 30-day-trial conversion runs around 8% per their public investor decks, but they have a captive enterprise install base. Cypress Cloud reports trial-to-paid in the 4–6% range. Datadog ~2–3%. We should plan around 4% for the first quarter and treat anything above 6% as a signal we are underpricing or undertargeting. - ---- - -## §3. License enforcement - -The technical mechanism. Premium pricing requires enforcement that is robust enough to defend revenue but not so heavy-handed that it breaks the agentic ergonomics. The bar: license enforcement must never make `simdrive observe` slower than 50ms. - -### License key format - -``` -SD1-XXXX-XXXX-XXXX-XXXX -``` - -- Prefix `SD1` = SimDrive 1.x. -- 16 hex chars (4 groups of 4) = 64-bit machine-bound payload + signature. -- Signed with an Ed25519 keypair held by the SyncTek license server. Public key embedded in the CLI binary. -- Payload encodes: license tier, seat count, expiry, machine fingerprint hash, run-cap if applicable. - -Activation: `simdrive license activate SD1-XXXX-XXXX-XXXX-XXXX`. CLI verifies signature offline against embedded public key, writes license blob to `~/.simdrive/license.json`, prints tier and expiry. - -### License validation: offline-first with online refresh - -- **Default mode: offline.** Every CLI command verifies signature locally. No network call on the hot path. This matches developer expectations (Linear CLI, JetBrains all work offline) and keeps `simdrive` usable on airplanes, in air-gapped CI, and behind corporate proxies. -- **Online refresh: weekly.** CLI pings `license.synctek.io/refresh` once every 7 days when network is available. Server returns updated license blob with current tier, run count, and expiry. If the customer downgraded, canceled, or hit a run cap, the new blob reflects it. If the CLI cannot reach the server for 30 consecutive days, it falls back to read-only with a clear warning ("could not refresh license, run `simdrive license refresh`"). -- **Honest tradeoff:** offline-first means a determined pirate can monkey-patch the verifier in ~15 minutes. We accept that. The same is true of every commercial CLI we ship against (JetBrains, GitHub Copilot CLI, Charles Proxy). Piracy is not the constraint on a $149/month tool sold to developers with corporate cards. License-server downtime breaking customer CI is the bigger risk, and offline-first is the right choice against that risk. - -### Trial enforcement - -Trial keys carry a hard expiry timestamp in the signed payload. CLI compares `now()` to expiry on every command. Day 15: `tap` returns `trial_expired` with a one-line upgrade URL. Clock-rollback evasion (system date set to 2026-04-15) is detected by comparing local time to a monotonic timestamp written to the license blob on every successful run — if local time is ever before the last-seen monotonic timestamp, license is invalidated until refresh. - -### Privacy posture: anonymous run-count telemetry only - -- **Yes:** anonymous run count (one increment per `simdrive run` invocation), CLI version, OS version, license tier — sent on the weekly refresh ping. Used for license enforcement (run-cap tier) and product analytics (which tiers are growing). -- **No:** journey content, screenshots, app names, accessibility-identifier text, log output, command arguments. None of it touches our servers. The pricing page must say so explicitly. -- **Comparable:** matches JetBrains Toolbox (anonymized usage stats, opt-out). Stricter than Cypress Cloud (which records every test artifact by default). This is a real differentiator for the security-review buyer in Enterprise tier — surface it. -- **Opt-out:** `simdrive config set telemetry false` works at any tier except Team and Enterprise (which contractually owe us run-count data for billing). - -### License revocation - -- **Customer cancels:** license remains valid through end of current billing period, then transitions to read-only on next refresh. No retroactive lockout. -- **Payment failure:** 7-day grace, then read-only. Customer keeps full feature access during the grace window — Stripe handles dunning emails. -- **Hard revocation (chargeback, ToS violation):** server-side flag, takes effect on next refresh ping (max 7-day window). Read-only immediately if the CLI is online. -- **Refunds:** within 14 days, no questions, full refund. After 14 days, prorated. Standard SaaS practice, posted on pricing page. - ---- - -## §4. Distribution channels (agentic-first, premium-adapted) - -Premium changes one thing about agentic distribution: we cannot rely on free installs to drive funnel. The MCP registries and awesome-mcp lists still work, but the framing shifts from "install this MIT tool" to "start a 14-day trial of this premium tool." That changes which channels are usable. - -| # | Channel | Premium-adapted action | Lift | -|---|---|---|---| -| 1 | **Anthropic MCP registry** (`claude.ai/mcp`) | Submit listing as `[Premium] SimDrive` with a 14-day-trial CTA. Anthropic does accept paid-product listings (per current registry guidelines, last reviewed 2026-04). The tag `[paid]` or `[trial]` is in active use by other listings. The trial mechanic is the unlock — registry policy requires a way for users to evaluate without payment. | **Step-change** | -| 2 | **`modelcontextprotocol/servers` GitHub PR** | Open PR under "Mobile / Testing" with `[paid]` tag in the description. Open-source-list etiquette: paid tools are accepted with a clear marker, rejected if framed as free. The "Try free for 14 days" framing meets the bar. | Modest, persistent | -| 3 | **Smithery.ai catalog** | Smithery accepts paid listings with a trial-required flag. Submit with full metadata + `pricing: paid, trial: 14d`. | Modest | -| 4 | **Cline + Cursor MCP marketplaces** | Cline accepts commercial MCP servers (per their April 2026 marketplace docs); Cursor's marketplace is more conservative but allows trial-gated tools. Both will require a screenshot of the trial flow. | Modest | -| 5 | **PyPI search + GitHub Topics** | Tag the repo with `mcp-server`, `ios-simulator`, `claude`, `anthropic`, `xctest-alternative`. Publish a marketing-only `simdrive` distribution to PyPI that prints a one-line "this is the premium SimDrive — visit simdrive.io to start a trial" message and exits with a non-error code. Avoids confusion with the legacy MIT 0.x track. | Modest, persistent | -| 6 | **`anthropics/anthropic-cookbook` PR** | Cookbook policy (per current contributor guidelines) does not accept examples that require a paid product as the primary integration. Workaround: contribute a recipe that uses Claude + a generic MCP iOS-driver pattern, with SimDrive listed as one implementation alongside the legacy MIT 0.3 release. The PR seeds training-corpus footprint without violating cookbook policy. | **Step-change** (long-tail) | -| 7 | **Training-corpus seeding** | Public docs at `simdrive.io/docs` indexable by Google and the next training cycle. Stack Overflow answers to real iOS-26 UITextField focus problems linking SimDrive as the workaround (with a "free trial" disclosure). GitHub Discussion thread on `modelcontextprotocol/servers` showing Palace dogfood data. None of these require the product to be free; they require the product to be useful. | **Step-change** (long-tail, compounds 6-12mo) | -| 8 | **MCP tool reviewers + dev advocates** | Direct outreach to ~15 named reviewers (the "MCP early adopters" segment from `dev_advocate_targets.md`) offering 90-day complimentary Pro licenses in exchange for a written review. Ethical disclosure required (FTC compliance for sponsored reviews). | Step-change for the named accounts; modest aggregate | -| 9 | **iOS QA conferences and podcasts** | Premium-friendly venues: AltConf, /dev/world, iOSDevUK, the Stacktrace podcast, Swift over Coffee, Mobile Dev Memo. Pitch SimDrive as the case study, not the ad. The hook is the engineering story (vision-first thesis, iOS 26 TextField, Palace dogfood) not the price. | Modest, durable | - -**Channels we are NOT using:** paid Google ads (zero conversion ROI for $149/month dev tools), SEO content farms (poison the brand), outbound sales (wrong unit economics), LinkedIn growth-hacking (the audience here is on Twitter/X and Mastodon). - -The big change vs the prior plan: cookbook PR drops from "step-change for SimDrive" to "step-change for our credibility" because Anthropic cookbook does not accept paid-product examples directly. We replace it as a primary funnel with **dev-advocate complimentary licenses** (channel 8) — same ~50-account reach, more directly attributable to MRR. - ---- - -## §5. Conversion funnel — trial to paid - -The 5-stage funnel (technically 6 with retention) for an agentic-first premium dev tool. Stage rates calibrated against Cypress Cloud's published benchmarks, JetBrains' investor data, and Datadog's S-1 disclosures — adjusted down by ~30% because we have zero brand recognition entering 2026. - -| # | Stage | Rate | Notes | -|---|---|---|---| -| 1 | **Awareness** (impression on registry, PR, podcast, etc.) | 100% baseline | One impression = one event. Volume matters more than rate at this stage. | -| 2 | **Click through to `simdrive.io`** | 4–8% | Registry CTRs run 3–5%; podcast mentions 1–2%; warm dev-advocate posts 8–12%. Blended 5%. | -| 3 | **Click "Start free trial"** | 35–50% | Conditional on a clean pricing page with a clear price. Buyers self-select hard at this gate — most landing-page visitors never intended to buy a $149/month tool. 40% target. | -| 4 | **Activate trial (email + first run)** | 50–65% | Email-only trial removes friction here. Big drop is install failures (Xcode missing, Python version, sim setup). The `simdrive doctor` command is the highest-leverage fix for this stage. 55% target. | -| 5 | **Convert to paid (day 14)** | 3–5% | The most-uncertain rate. JetBrains 8%, Cypress 4–6%, Datadog 2–3% benchmarks. Plan for 4%. | -| 6 | **Retain month 2** | 85–92% | Standard SaaS retention for a sticky CI tool. Pro-tier monthly churn typically 8–15%. Annual prepay improves it materially — push annual at the upgrade flow. | - -**Where the biggest leak is:** stages 4 and 5. Stage 4 (activation) is where install friction kills the funnel — every minute of `pip install` confusion costs us conversions. Stage 5 (paid) is where $149/month is asked for the first time and the buyer either has CI integration value to defend it or they don't. - -**Highest-leverage fix:** kill the activation gap. `simdrive trial start` should run the equivalent of `simdrive doctor` immediately, fail loud on missing deps with one-line install instructions, and walk the user to their first journey run inside 5 minutes. Every minute saved here lifts stage 4 by ~5%, which compounds through stages 5 and 6. - -**Second-highest leverage:** stage 5 conversion is gated by whether the buyer wired a real PR-gate inside the trial. A trial that ends with one local journey is hard to defend at $149/month; a trial that ends with a green check on `simdrive --gate` in CI is easy. Push CI integration in the day-7 trial email aggressively. - ---- - -## §6. Path to $5K MRR — the real math - -### Reverse-engineering the customer count - -At the recommended pricing: - -- $5,000 MRR ÷ $149 Pro = **34 paying Pro accounts** -- $5,000 MRR ÷ $499 Team = **11 paying Team accounts** -- $5,000 MRR ÷ $49 Solo = **103 paying Solo accounts** - -Realistic blend for a 60-day push from zero base: 80% Solo, 18% Pro, 2% Team. - -``` -Solo: 82 accounts × $49 = $4,018 -Pro: 18 accounts × $149 = $2,682 -Team: 1 account × $499 = $499 - Total = $7,199 MRR -``` - -Or a lighter-weight blend (60% Solo, 35% Pro, 5% Team): - -``` -Solo: 30 × $49 = $1,470 -Pro: 17 × $149 = $2,533 -Team: 3 × $499 = $1,497 - Total = $5,500 MRR -``` - -So somewhere around **50 paying customers** (mostly Solo, a meaningful Pro cohort, 1–3 Teams) gets us across $5K MRR. Working backward through the funnel at the rates above: - -``` -50 paying customers -÷ 4% trial-to-paid conversion -= 1,250 trial activations needed -÷ 55% activation rate (email → first run) -= 2,272 trial starts needed -÷ 40% landing-page → trial-start -= 5,681 simdrive.io visitors needed -÷ 5% impression → click-through -= 113,636 impressions needed -``` - -In ~60 days. **113K impressions in 60 days is ~1,900 impressions per day.** - -### Is this realistic by July 2026? - -Honest answer: **no, not as a SimDrive standalone target.** Three reasons: - -1. **The product does not exist yet at the 1.0 stable + license-server bar.** v1.0 ships sim-only (per `PRODUCTIZATION_PLAN.md` §4) on the 2-week clock, but the license server, trial activation flow, and Stripe integration add 4–6 weeks of build on top of that. Realistic launch window is mid-June. -2. **113K impressions in 60 days requires distribution we don't yet have access to.** The MCP registry submission delivers maybe 15K cumulative impressions in the first 60 days based on comparable launches (Smithery, Cline). Cookbook PR is gated by Anthropic policy. Dev-advocate outreach to ~15 named accounts at ~5K followers each is ~75K reachable but ~5K actually impressed. -3. **Trial-to-paid takes ~21 days to read.** Day-1 trial starts can't pay until day 14, plus a few days of payment-processor lag. So a June 15 launch's first MRR datapoint lands ~July 5 — after the goal date. - -**The realistic target by July: 200 trial activations and 8–12 paying customers ($1,500–$2,500 MRR).** Honest expansion of `PRODUCTIZATION_PLAN.md` §10's recommendation: re-cast July as a portfolio number with SimDrive contributing $1.5–$2.5K alongside the rest of the SyncTek revenue lines. - -**The realistic SimDrive standalone target for $5K MRR: October 2026.** Same trajectory as the prior plan's October target, but with a cleaner premium-from-day-one motion: June launch → July first paying cohort → August scale to 30+ customers via dev-advocate licenses converting → September Team-tier upsell pushes the number through $5K. This is achievable with the funnel math above, run for 4 months instead of 2. - ---- - -## §7. Launch sequence (D-7 to D+30) - -Day-by-day. D0 is the SimDrive 1.0 + license-server-live launch date (target 2026-06-15). All actions concrete, all deliverables testable. - -| Day | Action | Owner | Deliverable | -|---|---|---|---| -| D-7 | Cut SimDrive 1.0 RC to a known-good tag; freeze unless P0 | CodeAtlas + DeployAtlas | Tagged commit, RC live on internal PyPI mirror | -| D-7 | License server (`license.synctek.io`) live in production with Ed25519 keypair, key activation endpoint, refresh endpoint, Stripe webhook integration | DeployAtlas + CodeAtlas | All 4 endpoints return 200 on canary suite | -| D-7 | Stripe live mode active for $49 / $149 / $499 / annual prepay SKUs | DeployAtlas | All 8 SKUs visible in Stripe dashboard, test purchase succeeds | -| D-7 | `simdrive.io/pricing` live with Option C tier table | MarketingAtlas | Page renders, all 4 CTAs route correctly | -| D-6 | `simdrive.io/docs` indexable, robots.txt and sitemap shipped | MarketingAtlas | Google Search Console reports 0 errors | -| D-6 | Trial activation flow end-to-end test: email signup → license email → CLI activation → first journey run | TestAtlas | Pass/fail log; first-journey time under 5 minutes | -| D-5 | Anthropic MCP registry listing draft, premium-tagged | MarketingAtlas | Listing copy in `simdrive/docs/gtm/listings/anthropic-mcp-registry.md` | -| D-5 | `modelcontextprotocol/servers` PR draft with `[paid]` tag | MarketingAtlas | PR text in repo, branch pushed | -| D-5 | Show HN draft: "Show HN: SimDrive — premium MCP-native iOS sim driver, 14-day free trial" | MarketingAtlas | Draft in `simdrive/docs/gtm/listings/show-hn.md` | -| D-5 | Twitter/X launch thread draft (5 tweets) | MarketingAtlas | Draft in `simdrive/docs/gtm/listings/twitter-thread.md` | -| D-4 | 90-day complimentary Pro licenses provisioned for 15 dev-advocate targets | DeployAtlas + Maurice | 15 license keys generated, recipients confirmed | -| D-4 | GitHub release draft for `v1.0.0` | DeployAtlas | Draft visible on `gh release list` | -| D-3 | Anthropic dev-rel outreach email queued (template #3) | MarketingAtlas | Email draft, recipients confirmed | -| D-3 | First-3 dev advocates receive their license keys + a 1-page brief | Maurice | 3 confirmed-receipt replies | -| D-2 | Soft-launch ping to Palace + 2 friendlies; confirm trial flow on cold install | Maurice | 3 trial activations on D-1, no install blockers | -| D-2 | Triage and close any P0 / P1 from soft-launch | CodeAtlas + TestAtlas | Issue tracker clean | -| D-1 | Final dry-run: registry forms, Smithery, awesome-mcp PR, Show HN, Twitter, blog | Maurice | Checklist signed | -| D-1 | Pre-stage launch-day Twitter thread, LinkedIn post (Maurice's personal page only) | Maurice | Drafts in scheduler | -| D0 | 09:00 PT — Anthropic MCP registry submission | Maurice | Submission ID logged | -| D0 | 09:05 PT — Smithery.ai submission | Maurice | URL logged | -| D0 | 09:15 PT — `modelcontextprotocol/servers` PR opened | Maurice | PR URL logged | -| D0 | 09:30 PT — Show HN posted | Maurice | URL logged; first-comment reply has install line | -| D0 | 09:45 PT — Twitter thread posted | Maurice | Thread URL logged | -| D0 | 10:00 PT — GitHub release `v1.0.0` published | DeployAtlas | Release URL logged | -| D0 | 10:30 PT — Launch blog post live on synctek.io | MarketingAtlas | Blog URL logged | -| D0 | All-day — HN comment monitoring, 15-min reply window for first 10 | Maurice | Comment screenshots saved | -| D+1 | Triage HN feedback into 3 buckets (bug / feature ask / positioning) | CodeAtlas | Issue list with labels | -| D+1 | Anthropic dev-rel outreach email sent | Maurice | Email timestamp logged | -| D+1 | First trial-to-paid conversion call (if any trials hit "ready to upgrade") | Maurice | Call notes filed | -| D+2 | PyPI download + license-server activation count baseline | DeployAtlas | Numbers in launch-receipts.md | -| D+3 | Cline + Cursor MCP marketplace submissions | Maurice + MarketingAtlas | Both submissions logged | -| D+3 | First 3 dev-advocate complimentary licenses redeemed; check usage telemetry | DeployAtlas | Run-count > 5 per advocate confirmed | -| D+5 | Reply to all HN comments older than 24h | Maurice | Comment thread closed out | -| D+5 | Cookbook PR (generic MCP iOS-driver pattern, SimDrive as one implementation) | MarketingAtlas + CodeAtlas | PR opened | -| D+7 | Week-1 metrics review: trial activations, paid conversions, registry approvals | Maurice + MarketingAtlas | `simdrive/docs/gtm/week1-review.md` | -| D+7 | Second-tier dev-advocate outreach (5 more named accounts) | Maurice | 5 emails sent | -| D+10 | First trial-to-paid conversion expected (day-14 post-launch trials closing) | DeployAtlas + Maurice | First MRR datapoint | -| D+10 | Training-corpus essay #1 published: "Why we replaced XCTest with screenshots" | MarketingAtlas | Essay live; 3 inbound links | -| D+14 | Cookbook PR review pass | CodeAtlas | PR moved to "approved" or "merged" | -| D+14 | Training-corpus essay #2: SO answer on iOS-26 UITextField focus, SimDrive linked | MarketingAtlas | SO URL logged | -| D+17 | Mid-month conversion check, churn read on first paid cohort | Maurice | Numbers in week2-review.md | -| D+21 | Training-corpus essay #3: GH Discussion in `modelcontextprotocol/servers` with Palace dogfood data | MarketingAtlas | Discussion URL logged | -| D+21 | iOS QA podcast pitch round (3 podcasts, template #6) | Maurice | 3 pitches sent | -| D+25 | v1.1 (real-device WDA) plan freeze | Maurice + CodeAtlas | `simdrive/docs/v1.1-plan.md` | -| D+28 | Design-partner status: trials → paid → expansion | Maurice | LOI count and MRR snapshot | -| D+30 | D+30 retrospective: numbers, lessons, next-30-day plan | Maurice + GTMPricingAtlas | `simdrive/docs/gtm/d+30-retro.md` | - ---- - -## §8. Three execution priorities for the next 30 days - -Concrete, owned, dated. Anchors the execution. - -| # | Priority | Owner | Deadline | Done means | -|---|---|---|---|---| -| 1 | **Build the license server + Stripe integration end-to-end.** Endpoints (`/activate`, `/refresh`, `/webhook`), Ed25519 keypair, Stripe live SKUs, trial activation email flow. This is the single hardest dependency for premium-from-day-one — without it there is no launch. | DeployAtlas + CodeAtlas | **2026-05-29** | All endpoints green on canary suite; one test purchase round-trips through Stripe live mode and creates a working CLI license. | -| 2 | **Ship `simdrive.io/pricing` and the trial activation funnel.** Pricing page (Option C), trial signup, license-key email, CLI activation flow, first-journey-in-5-minutes path. Funnel stages 2–4 from §5 must render correctly before D-7. | MarketingAtlas + CodeAtlas | **2026-06-05** | End-to-end smoke: cold install → email signup → license activation → first `simdrive run` returns success in under 5 minutes. | -| 3 | **Provision and ship 15 dev-advocate complimentary Pro licenses with 1-page briefs.** Channel 8 from §4 is the highest-leverage paid-product distribution we have. The recipients are the named accounts in `dev_advocate_targets.md` "MCP early adopters" + "iOS QA leads" segments. | Maurice + DeployAtlas | **2026-06-12** | 15 licenses generated, 15 briefs sent, ≥10 confirmed-receipt replies, ≥5 published reviews/posts referencing SimDrive within D+14. | - ---- - -*End of memo.* diff --git a/simdrive/docs/RD_SIMDRIVE_1.0/04_competitive_risk.md b/simdrive/docs/RD_SIMDRIVE_1.0/04_competitive_risk.md deleted file mode 100644 index 207372f..0000000 --- a/simdrive/docs/RD_SIMDRIVE_1.0/04_competitive_risk.md +++ /dev/null @@ -1,234 +0,0 @@ -# 04 — Competitive Risk Assessment (Premium Edition) - -**Author:** CompetitiveRiskAtlas -**Date:** 2026-04-29 -**For:** SimDrive 1.0 BIS R&D synthesis -**Audience:** Maurice Carrier (Chairman), synthesis lead, GTMPricingAtlas, ProductAtlas -**Status:** Draft for synthesis review - -> The brand reverts to **SimDrive** for the public 1.0 launch; the `specterqa-ios` PyPI namespace is retained as a transitional alias. This memo treats "SimDrive" as the product name throughout. - -> **Premium-from-day-one is the new constraint.** The prior plan (PRODUCTIZATION_PLAN §8) was open-core: MIT engine free, Cloud paid. The new positioning is premium SaaS with a free trial as the conversion mechanism. That changes which competitors hurt us, which moats matter, and which risks become existential. - ---- - -## §1. The competitive map — premium-priced edition - -Two-axis plot of mobile/UI test tooling. X-axis: pricing model (Free OSS ↔ Premium SaaS). Y-axis: interaction model (Imperative scripting ↔ Journey/persona-driven AI). - -``` -Journey/persona-driven AI - ^ - | [SimDrive 1.0] ★ - | (premium, - | journey + MCP) - | - | [Maestro OSS] ----------- [Maestro Cloud] - | (free, YAML flows) ($99/mo entry) - | - | [claude-computer-use] - | (general, $20-200/mo - | Pro/Max via Claude) - | -+-----------+---------------------------------------------+-----> Pricing - | - | [XCUITest] [Detox] [Appium] - | (Apple, (RN, (cross- - | free) free) platform, free) - | [BrowserStack App Automate] - | ($199-$2,000+/mo, real device) - | [idb] [Sauce Labs] - | (Meta, ($1K+/mo team) - | free) [LambdaTest] - | ($99-$199/mo) - | [Cypress OSS] [Cypress Cloud] - | (web, free) ($75-$300/mo team) - | - | [Playwright] - | (MS, OSS) [Datadog Synthetics] - | ($5-$12/test/mo, scales fast) - v -Imperative scripting -``` - -**Where SimDrive 1.0 lands:** upper-right quadrant, roughly alone. The closest neighbors are Maestro Cloud (lower journey-AI sophistication, half the price, more mature) and claude-computer-use (more AI, but generalist; bundled into Claude Pro/Max, not iOS-specific). The space directly above Maestro Cloud — *journey-driven, AI-first, iOS-deep, premium-priced* — is genuinely empty. - -**What's empty around us:** -- No premium **journey-driven** competitor that is **iOS-deep**. Maestro is journey-driven but cross-platform-shallow on iOS internals; BrowserStack is iOS-deep but imperative. -- No premium **MCP-native** mobile testing product. Period. -- No competitor that ships **personas-as-test-actors**. (Closest analog: the user-journey UX maps in Cypress Cloud, but those are diagnostic, not driving.) - -**The risk in this map:** the empty quadrant is empty *because the market hasn't asked for it yet*. We're betting it will, within the trial-to-paid horizon. - ---- - -## §2. The hardest competitor for premium pricing: Maestro - -Maestro is the existential pricing competitor. Free OSS CLI + Maestro Cloud at ~$99/mo entry. Similar journey-driven YAML, similar vision+AX hybrid, mobile-focused, faster-growing community than SimDrive will have for 6-12 months. - -**Why would an iOS team pay $99-149/mo for SimDrive instead of using Maestro free?** Three honest answers, each with the counter-argument the buyer will raise: - -### 2.1 iOS-26 TextField focus + native HID -Maestro's iOS path runs through XCTest, which inherits XCTest's iOS 26 TextField focus regression. SimDrive's `SimDeviceLegacyHIDClient` + `IndigoMessage` injection bypasses XCTest entirely. For Palace's OAuth and Reader2 flows, this is the difference between "tests pass" and "tests can't run." -**Counter-argument:** Maestro can fix this in 4-8 weeks if they prioritize it. The technique is in idb (MIT). They have more engineers. The window is narrow. - -### 2.2 MCP-native architecture for agent integration -Maestro's CLI is invoked imperatively or via their Studio. It is not an MCP server. An agent loop that wants to "run a test, observe, decide what to do next" composes naturally on SimDrive's 29-tool surface; with Maestro, the agent is shelling out to a CLI and parsing logs. -**Counter-argument:** Maestro can ship an MCP wrapper in 2-3 weeks. The protocol is documented, the server SDKs are mature, and they have the brand to make it stick. Our MCP-native lead is structural for ~6 months, not 18. - -### 2.3 Real-device input via WDA bundled in 1.0 -If SimDrive 1.0 ships bundled WDA real-device input as part of the premium tier, the comparison becomes "Maestro Cloud ($99/mo, no real device) + BrowserStack ($199+/mo for real device)" vs "SimDrive premium ($99-149/mo, sim + device)." That's an honest premium pitch. -**Counter-argument:** BrowserStack and Sauce already do real-device cloud at scale with thousands of devices, full Apple device matrix, and CI integrations we won't match for a year. If a buyer needs real-device coverage at depth, they already have BrowserStack. SimDrive's "bundled real-device input" addresses local dev-loop, not cloud farms. - -### 2.4 Honest verdict on premium defensibility vs Maestro -The premium pitch defends *if and only if* the buyer specifically values: (a) iOS-26 TextField focus today, (b) MCP-native agent integration today, (c) a single tool for sim + local-device dev-loop. That's a real buyer segment — agentic-first iOS teams running Claude Code or Cursor — but it is **narrower than the addressable market for "iOS test automation" generally.** - -If the buyer doesn't specifically value those three things, Maestro free wins. **The premium pitch defends a niche, not the whole market.** That niche must be large enough to support $5K MRR by October. Likely yes; not certain. - ---- - -## §3. The existential threat: Anthropic claude-computer-use - -The prior memo flagged claude-computer-use as the existential risk. Premium positioning makes it harder, not easier, to defend. - -**Why harder under premium:** -1. claude-computer-use is bundled into Claude Pro ($20/mo), Max ($100-200/mo), and Claude Code subscriptions. An iOS team that is already paying for Claude Code is now asked to pay an *additional* $99-149/mo for SimDrive premium. -2. The pitch shifts from "free open-source thing your agent can use" to "another paid SaaS line item." Procurement friction triples. -3. Anthropic's distribution is order-of-magnitude better than ours. If they ship native iOS sim drive in claude-computer-use, the default behavior of every Claude Code user changes overnight. -4. We cannot undercut Anthropic's pricing without losing the premium positioning that funds Cloud development and customer support. - -**Two honest defenses:** - -### 3.1 Be the iOS-specific layer they don't bother to build -Anthropic's team is small relative to its surface area. claude-computer-use today is generalist desktop automation; iOS sim is one of fifty things they could build. The bet: they prioritize web, then macOS, then maybe iOS — and we have 9-15 months to entrench before iOS reaches their roadmap. -**The honest part:** if they decide iOS is on the roadmap, the gap closes in one quarter. We'd see it coming via the public Claude API roadmap, but would have ~90 days to react. The defensive plan must include a contingency: deepen into things Anthropic structurally won't build (Apple-version regression matrix, WebView gap, named-customer SOC 2 compliance, deterministic replay archive). - -### 3.2 Get acquired by Anthropic before they build it -This is a real strategy, not a fallback. Acquirability requires: -- **Customer logos that Anthropic wants** — Palace + 4-6 named iOS shops with public testimonials. -- **Talent the Anthropic dev-tools team would absorb** — small, technical, MCP-fluent. We are this. -- **Technology that's more expensive to rebuild than to buy** — the iOS-26 HID technique, the journey corpus, the MCP tool surface. ~3-6 person-months to clone; ~$2-5M acquisition price would pencil for them at our scale. -- **Strategic alignment** — SimDrive demonstrates "MCP-native vertical SaaS works." That's a thesis Anthropic actively sells. - -**The acquisition window is tightest in months 6-12 after 1.0 launch.** Beyond that, either we've hit escape velocity (good) or they've shipped iOS themselves (we're acquired-out-of-distress, materially lower price). - ---- - -## §4. Moat reassessment — premium edition - -Re-scoring each candidate moat (1-5, 5 = strongest) under the premium-pricing constraint, with replication cost in time + money. - -| # | Moat | Score | Replication cost | Notes under premium | -|---|---|---|---|---| -| 1 | MCP-native + Claude-tuned tool surface | **3** | 3-4 weeks, $40-60K | Eroded fast: Maestro could ship MCP wrapper. Not premium-defensible alone. | -| 2 | Native HID injection (CoreSimulator) | **3** | 2-3 weeks, $25-40K | Technique is public in idb. We own the iOS-26 tuning, not the technique. | -| 3 | Real UITouch focus on iOS 26 TextFields | **4** | 3-5 weeks, $40-70K | Killer feature *today*. Apple may close the gap in iOS 27 (-1 to score then). | -| 4 | Recording + replay (stable_id + SSIM masking) | **2** | 2-3 weeks, $25-40K | Commodity techniques in combination. Differentiation is integration, not novelty. | -| 5 | 29-tool composable surface | **3** | 4-6 weeks, $60-90K | Taste + LLM-loop polish. Hard to clone exactly; easy to clone approximately. | -| 6 | Ecosystem/integrations | **2 → 3 (with Cloud)** | 6-12 months, $200K+ | MCP-registry placement + cookbook PRs + CI templates compound. Premium revenue funds this. | -| 7 | First-mover in MCP-native iOS | **3** | Cannot be replicated; can be displaced | Anthropic ships iOS computer-use → score → 1. | -| 8 | Brand (SimDrive wordmark) | **3 → 4 (with TM)** | $300-2K trademark | Trademark + Palace receipts is genuinely defensible. | - -**Three NEW moats premium positioning gives us:** - -| # | Moat | Score | Replication cost | Notes | -|---|---|---|---|---| -| 9 | Customer relationships (paid users stickier) | **4** | Years of CRM, support, dogfood loops | Free users churn silently. Paid users complain, give feedback, refer peers. Revenue funds dogfood velocity (Palace pattern, scaled). | -| 10 | Cloud lock-in (replay archive + dashboards) | **4** when shipped | 6-12 months Cloud build + journey corpus | Switching cost grows with usage. Strongest *future* moat. | -| 11 | License-server entitlement system | **3** | 4-8 weeks, $60-100K | Auth layer + entitlement enforcement + offline grace periods. Not glamorous; stops casual cloning. | - -**Moat verdict under premium:** the durable moats are #3 (iOS-26 HID, until Apple closes it), #8 (brand/trademark + receipts), #9 (paying customers), #10 (Cloud lock-in once shipped). Everything else is a 6-12 month head start, not a moat. **Build #10 fast.** Cloud is the only moat that compounds. - ---- - -## §5. Existential risks — premium edition - -The 4 risks from prior memo, re-scored under premium positioning, plus 2 new ones. - -| # | Scenario | Likelihood | Time-to-impact | Defensive move | -|---|---|---|---|---| -| 1 | Anthropic ships native iOS computer-use | **35-45%** (up from 30-40%) | 9-15 months | Lean in: be the iOS-specific layer. Pursue acquisition path in months 6-12. Deepen into things they won't build. | -| 2 | Apple ships AI test framework with Xcode 27 | 20-30% (up from 15-25%) | 12-18 months (WWDC 2026) | Pivot to cross-version regression and the WebView gap. Premium customers care about Apple-stability more than free users do. | -| 3 | Maestro adds MCP wrapper + matches journey UX | **65-75%** (up from 60-70%) | 3-6 months | Sidestep into iOS-deep + premium-managed-replay. Don't fight Maestro on cross-platform breadth. | -| 4 | Well-funded YC competitor launches | **45-55%** (up from 35-45%) | 6-12 months | Premium positioning is *visible* revenue; that visibility attracts copycats. Lock in 5-7 named logos by Q3 to make displacement expensive. | -| 5 | **Customer pricing pushback** (NEW) | **40-50%** | 0-3 months from launch | Free trial + transparent pricing + ROI calculator (engineer-hours saved). Hold the price; expand the value bundle. | -| 6 | **Trial-to-paid conversion below benchmark** (NEW) | **30-40%** | 60-120 days post-launch | Industry benchmark: 15-25% trial-to-paid for dev tools (per public Mixpanel/ProductLed data). Floor: 5%; below 5% the funnel collapses. Mitigate via aggressive trial-to-paid email sequence + in-product nudges + Palace-style design-partner referrals. | - -**The two new risks are pricing-specific and they didn't exist under open-core.** Premium pricing trades "no revenue but no customer-pricing risk" for "revenue plus material conversion risk." That trade is the right one — but it must be planned for, not stumbled into. - ---- - -## §6. Pricing benchmark refresh - -Direct premium-tier competitors and adjacent dev-tools, public 2025-2026 list pricing: - -| Product | Entry tier | Mid tier | Notes | -|---|---|---|---| -| **Cypress Cloud** | $75/mo (3 users) | $300/mo (Team) | Web testing + parallelization + dashboard. Closest analog by buyer profile. | -| **Datadog Synthetics** | ~$5-12/test/mo | scales to $1K+/mo | Per-test pricing; expensive at scale. CI-gated. | -| **BrowserStack App Automate** | $199/mo (single parallel) | $999+/mo (team) | Real-device cloud; the closest direct competitor for premium iOS. | -| **Sauce Labs** | ~$249/mo (single user) | $1,000+/mo team minimum | Enterprise QA; wrong buyer for SimDrive. | -| **LambdaTest** | $99/mo | $199-499/mo | BrowserStack alternative; price-sensitive segment. | -| **Maestro Cloud** | $99/mo (entry) | custom team | Closest journey-driven analog. The price floor we benchmark against. | -| **JetBrains All Products Pack** | $289/yr individual | $649/yr business | Per-developer dev-tool subscription. Reference for "what an engineer will pay personally." | -| **GitHub Copilot Pro** | $10/mo individual | $19/mo Business | Per-seat AI dev tool floor. Hard to charge >10x Copilot for a niche tool. | -| **Anthropic Claude Pro / Max** | $20/mo Pro | $100-200/mo Max | The bundled-AI threat reference. SimDrive premium must justify being a *line item beyond* this. | -| **Cursor Pro** | $20/mo | $40/mo Business | AI dev-tool reference price. | - -**Recommendation for GTMPricingAtlas synthesis:** - -| SimDrive tier | Recommended price | Rationale | -|---|---|---| -| **Free trial** | 14-day, full feature, no card | Below 14 days, the iOS team can't run a real PR-gate. Above 21 days, conversion math degrades. | -| **Solo / Indie** | **$49/mo per seat** | Undercuts Maestro Cloud entry. Pairs with the JetBrains/Cursor/Copilot mental model of "$20-50/mo per dev tool." | -| **Team** | **$149/mo flat (5 seats)** | Beats Cypress Team ($300/mo) on price. Clearly-positioned vs Maestro Cloud. | -| **Business** | **$499/mo (15 seats + WDA real device + dashboards)** | The premium-defensible tier. Bundles real-device input, Cloud replay archive, priority support. | -| **Enterprise** | Sales-led, $5-15K/yr | SOC 2, RBAC, SSO. Deferred to v1.2. | - -**Pricing position:** below BrowserStack (because we don't run device clouds), above Copilot/Cursor (because we are a vertical specialty), level with Maestro Cloud entry, undercutting Cypress Team. **Premium without being aspirational.** The premium pitch survives if and only if SimDrive saves the median iOS engineer ≥4 hours/week of flake-debugging. At $49/mo and a $145K iOS-eng loaded rate, that's a ~6× ROI. Defensible. - ---- - -## §7. Strategic recommendations — premium edition - -### 7.1 vs Maestro: head-on or sidestep? -**Sidestep.** Specifically: own *iOS-deep + MCP-native + agent-loop-first*. Don't fight Maestro on cross-platform breadth — they will always have Android, we will not. Don't fight on community size — they have a 2-year head start on stars + contributors. Fight on "what does the agent actually compose against?" and "what works on iOS 26 today?" Those are concrete, demonstrable, and Maestro's free tier doesn't trump them. **Buyer message:** "If you have an Android team too, use Maestro and supplement with SimDrive on iOS. If you're iOS-only with an agent-driven workflow, SimDrive is the right primary tool." - -### 7.2 vs Anthropic claude-computer-use: lean in, pivot, or partner? -**Lean in, with explicit acquisition optionality.** We are not strategically positioned to pivot beyond iOS without losing our differentiator. We are not large enough to partner as peers. The right move is to be the best iOS-specific layer in the Anthropic ecosystem — listed in MCP registry, cited in cookbook recipes, name-checked by claude-computer-use docs as the iOS specialist tool. Build the relationship that makes acquisition the most natural exit if/when Anthropic decides iOS is on their roadmap. **Concrete action:** ship cookbook PR by 2026-05-22 (already on PRODUCTIZATION_PLAN §6 channel list); pursue MCP-registry "featured" placement; track engagement metrics that Anthropic BD would value. - -### 7.3 The position SimDrive can credibly own at premium price in 2026 -> **SimDrive is the premium iOS testing tool agents reach for first — journey-driven, MCP-native, iOS-deep where XCUITest fails and Maestro doesn't go — priced for individual iOS engineers and small teams who already pay for AI tooling.** - -That sentence is calibrated for a $49-149/mo buyer profile, not an enterprise QA org. It admits the niche. It defends the premium without overpromising. Synthesis should ladder pricing, GTM, and product roadmap to it. - ---- - -## §8. Risk register - -Top 5 risks, ranked likelihood × impact, with mitigations. - -| Rank | Risk | Likelihood | Impact | Mitigation | -|---|---|---|---|---| -| 1 | **Maestro ships MCP wrapper + matches journey UX** | 65-75% | High | Sidestep into iOS-deep + bundled real-device + premium-managed Cloud replay. Lock in 5-7 named iOS logos before Maestro ships. Don't compete on free-tier OSS popularity. | -| 2 | **Trial-to-paid conversion below 5% floor** | 30-40% | Existential | 14-day full-feature trial. ROI calculator (engineer-hours saved). In-product day-3/day-7/day-13 nudges. Design-partner referral program ($X off for referring Palace-class accounts). Track conversion daily; if <5% at day 60, halt growth spend and rework onboarding. | -| 3 | **Anthropic ships native iOS computer-use** | 35-45% | Existential | Acquisition-track strategy: customer logos, MCP-registry placement, talent visible to Anthropic BD. Deepen into Apple-version-regression and WebView gap (things they won't build). 90-day reaction plan if their public roadmap signals iOS. | -| 4 | **Customer pricing pushback at $49-149/mo** | 40-50% | Medium-High | Free trial + transparent pricing + ROI calc. Hold price; expand value bundle. If pushback >40% in trial-exit surveys, add a $19/mo "Hobbyist" tier (single sim, no replay archive) — recovery valve, not headline price. | -| 5 | **Apple ships Xcode 27 AI test framework (WWDC 2026)** | 20-30% | High | Pivot toward cross-version regression matrix + the WebView gap Apple historically does not close. Apple's framework will not include MCP-native; that gap remains ours. Premium customers care about Apple-stability vendor diversity more than free users do. | - ---- - -## §9. Bottom line - -Premium-from-day-one is harder than open-core *and* more defensible if it works. The harder part: Maestro's free tier and Anthropic's bundled Pro subscription set a low price ceiling for any iOS-specific tool. The more-defensible part: paying customers fund the dogfood velocity that produced Palace-class testimonials in the first place, and Cloud lock-in is the only moat that compounds. - -**The pricing must be calibrated to the agentic-iOS-developer who already pays for Claude Code or Cursor and is willing to add one specialty line item.** Above that buyer, BrowserStack/Sauce already own the enterprise-QA budget. Below it, Maestro free wins. The middle is real but narrow — and that middle is the premium-from-day-one bet. - -**Synthesis must answer three questions:** -1. Is the agentic-iOS-developer segment large enough at $49-149/mo to clear $5K MRR by October? -2. Can we ship Cloud (the only compounding moat) inside the trial-to-paid window for the first design-partner cohort? -3. Are we positioning for acquisition by Anthropic in months 6-12, or for independent escape velocity? The product roadmap diverges depending on the answer. - ---- - -*End of competitive risk assessment. Hand-off: GTMPricingAtlas (pricing recommendations §6); ProductAtlas (Cloud roadmap §4 #10); synthesis lead (§7 strategic calls).* diff --git a/simdrive/docs/RD_SIMDRIVE_1.0/05_engineering_expansion.md b/simdrive/docs/RD_SIMDRIVE_1.0/05_engineering_expansion.md deleted file mode 100644 index bb5d975..0000000 --- a/simdrive/docs/RD_SIMDRIVE_1.0/05_engineering_expansion.md +++ /dev/null @@ -1,420 +0,0 @@ -# SimDrive 1.0 — Engineering Expansion Plan (Workstream A) - -**Author:** EngineeringExpansionAtlas -**Date:** 2026-04-29 -**Status:** Execution-ready engineering plan, BIS round (full-ambitious-scope branch) -**Inputs cited:** `00a_VALIDATED_FACTS.md` (validated-code source of truth), `01_product_engineering.md` (journey/persona schemas, CLI surface), `REAL_DEVICE_FEASIBILITY.md` (WDA scoping), `simdrive/src/specterqa_ios/server.py` (29-tool surface), `simdrive/src/specterqa_ios/recorder.py` (record/replay foundation), `simdrive/src/specterqa_ios/errors.py` (error code pattern). - -This document is the engineer's day-1 build instruction set. It assumes `01_product_engineering.md` for *what* the journey/persona surface is and `00a_VALIDATED_FACTS.md` §A as the inventory of *what already exists*. Anything not traceable to §A is greenfield and labelled accordingly. - -It does **not** cover Workstream B (post-1.0 moat features) or Workstream C (test app spec). - ---- - -## §1. The five-component build, ranked - -The chairman's "five components" expand into nine engineering deliverables once you decompose schemas/runner separately and split WDA bootstrap from the WDA HTTP client. Build order: - -| # | Component | Depends on | Effort | Greenfield? | -|---|---|---|---|---| -| 1 | Journey YAML schema + validator | none | **S** (1-3d) | Greenfield | -| 2 | Persona YAML schema + validator | none | **S** (1-3d) | Greenfield | -| 3 | Journey runner core | (1)+(2) + `tool_observe` + act tools (`tool_tap` / `tool_swipe` / `tool_type_text` / `tool_press_key`) — see `00a_VALIDATED_FACTS.md` §A rows 1, 2, 4 | **L** (3-4w) | Greenfield (extends recorder pattern from `recorder.py`) | -| 4 | License key + trial system | none | **M** (1-2w) | Greenfield | -| 5 | WDA bootstrap CLI (`simdrive bootstrap-device`) | none | **M** (1-2w) | Greenfield | -| 6 | WDA HTTP client wired to act tools | (5) + `act.py` (`00a_VALIDATED_FACTS.md` §A row 4) | **M** (1-2w) | Greenfield client; extends `act.py` dispatch | -| 7 | Cloud private API (replay archive) | none | **M** (1-2w) | Greenfield | -| 8 | `simdrive ci` orchestrator | (3) | **S** (1-3d) | Greenfield (thin wrapper over runner) | -| 9 | Production hardening pass | everything | **M** (1-2w) | Audit/extension across the 29-tool surface | - -Sequencing summary: two engineers, ten weeks, parallel tracks. Engineer A on runner/CLI/docs; Engineer B on device/license/cloud. Detailed week-by-week calendar in §5. Calendar: today (2026-04-29) → **2026-07-08** for tagged 1.0 cut. - ---- - -## §2. Per-component build spec - -### Component 1 — Journey YAML schema + validator (S, greenfield) - -**File path:** `simdrive/src/specterqa_ios/journey/schema.py` (new package). - -**Inputs (public surface):** -- File path: `.simdrive/journeys/<slug>.yaml` -- CLI: `simdrive validate [--journeys-dir <path>]` exits non-zero on first failure. -- Programmatic: `from specterqa_ios.journey.schema import Journey, load_journey`. - -**Outputs:** validated `Journey` dataclass (pydantic v2 `BaseModel`); `simdrive validate` writes `{file, line, error_code, message}` JSONL on stderr. - -**Key types.** Pydantic v2 model mirroring `01_product_engineering.md §1.2`: - -```python -class SuccessCriterion(BaseModel): - text_visible: str | None = None - screen_matches: str | None = None # stable_id - perf_under: dict[str, float] | None = None # {cpu_pct, memory_mb} - no_crash: bool | None = None - cross_device_state_matches: dict | None = None - -class Journey(BaseModel): - schema_version: int # MUST equal 1 in v1.0 - name: str - persona: str # slug -> .simdrive/personas/<slug>.yaml - target: Literal["simulator", "device"] - device_selector: DeviceSelector | None - preconditions: Preconditions | None - goals: list[str] # min 1 - success_criteria: list[SuccessCriterion] # min 1 - budget: Budget = Budget() # max_steps=30, max_seconds=180, max_llm_calls=40 - replay_id: str | None = None - tags: list[str] = [] -``` - -**Algorithm:** `load_journey(path) -> Journey` does (1) yaml.safe_load, (2) pydantic validate, (3) cross-ref persona slug exists in `.simdrive/personas/`, (4) when `target=device`, `device_selector` required. - -**Tests:** ≥10 unit tests — each schema field's happy path, missing-required field, wrong type, persona-slug-not-found, schema_version mismatch, success-criteria empty list. - -**Failure modes.** New error codes (extend `errors.py` pattern): -- `journey_schema_invalid` — pydantic validation failed -- `journey_persona_not_found` — persona slug unresolved -- `journey_schema_version_unsupported` — `schema_version != 1` -- `journey_device_selector_missing` — `target=device` without `device_selector` - -**Existing code to extend:** `simdrive/src/specterqa_ios/errors.py` (add error constructors). Nothing else; this is greenfield. - ---- - -### Component 2 — Persona YAML schema + validator (S, greenfield) - -**File path:** `simdrive/src/specterqa_ios/journey/persona.py`. - -Same shape as (1). Schema per `01_product_engineering.md §1.1`. New error codes: `persona_schema_invalid`, `persona_schema_version_unsupported`. Same test pattern (≥8 unit tests). - ---- - -### Component 3 — Journey runner core (L, greenfield) - -**File path:** `simdrive/src/specterqa_ios/journey/runner.py`. - -**Inputs:** `simdrive run --journey <slug> [--persona-override <slug>] [--target simulator|device] [--budget-override max_steps=N,max_seconds=N]`. Programmatic: `run_journey(journey, persona, session) -> RunResult`. - -**Outputs:** `RunResult` mirroring the `summary.json` schema in `01_product_engineering.md §1.5`. Writes the full artifact directory `.simdrive/runs/<slug>-<ts>/` (`summary.json`, `summary.md`, `recording.yaml`, `screenshots/step_NNN.{png,json}`, `agent_trace.jsonl`, `perf/{baseline,end,compare}.json`, `crashes/`). - -**Key types:** - -```python -@dataclass -class StepDecision: - tool: Literal["tap","swipe","type_text","press_key","clear_field","done","fail"] - args: dict - rationale: str - confidence: float - -@dataclass -class RunResult: - outcome: Literal["passed","failed","budget_exceeded","crashed","error"] - steps_executed: int - llm_calls: int - llm_cost_usd: float - duration_seconds: float - success_criteria: list[CriterionEval] - replay_id: str - artifact_dir: Path -``` - -**Algorithm.** Loop until success-criteria met or budget exhausted: - -``` -session_start(target, device_selector) # extends VFR §A row 1 -recorder.start(session, name=journey.slug) # extends VFR §A row 5 -baseline = tool_perf_baseline(session) # VFR §A row 7 -while step_idx < budget.max_steps and elapsed < budget.max_seconds and llm_calls < budget.max_llm_calls: - obs = tool_observe(session_id) # VFR §A row 2 - if all_criteria_pass(obs, perf_now): outcome="passed"; break - decision = claude_vision_call(assemble_prompt(persona, journey, obs, last_3_steps), obs.screenshot) - if decision.tool == "done": break - if decision.tool == "fail": outcome="failed"; break - dispatch_act_tool(decision) # tool_tap / tool_swipe / tool_type_text / tool_press_key — VFR §A row 4 - if crash_detected(): outcome="crashed"; break - step_idx += 1 -recorder.stop(session) # VFR §A rows 5, 12 -write_artifacts() -``` - -**Load-bearing imports (from validated code):** -- `from specterqa_ios.server import tool_observe, tool_tap, tool_swipe, tool_type_text, tool_press_key, tool_clear_field, tool_perf_baseline, tool_perf_compare, tool_crashes` -- `from specterqa_ios import recorder` -- `from specterqa_ios.session import Session` - -**Persona-aware prompt assembly** (`journey/prompt.py`): concatenates persona (role, technical_comfort, patience, goals, frustrations, accessibility_needs) + journey goals + last-3-step history + observe payload (text + marks). System prompt held stable across steps for Claude prompt-cache reuse (cost-mitigation per `01_product_engineering.md §2.5 risk #2`). - -**Success-criteria evaluators** (`journey/criteria.py`), one per type: -- `text_visible` — substring scan over `obs["text"]` -- `screen_matches: <stable_id>` — lookup in `obs["marks"]` -- `perf_under: {cpu_pct, memory_mb}` — compare against fresh `tool_perf` snapshot (VFR §A row 7) -- `no_crash` — `tool_crashes` returned empty since journey start -- `cross_device_state_matches` — parallel session against second device. **Flagged as 1.0 stretch**; if cut, criterion warns in `agent_trace.jsonl` and pass-through (don't fail-closed — would surprise users). - -**Tests.** Unit: prompt determinism (≥5), criterion evaluators (one per type), budget enforcement, cost math. Integration vs TestKitApp: happy-path pass; budget exhaustion; crash-mid-journey; criteria-fail; recording finalized; replay-id present. Target ≥30 journey-level integration tests per `01_product_engineering.md §2.2 item 14`. - -**Failure modes (new error codes):** `journey_budget_exceeded` (reported in outcome, not raised), `claude_call_failed` (network/auth), `claude_cost_cap_hit` ($5/day trial cap), `act_tool_failed` (wraps inner SimdriveError, preserves code in `details.inner_code`), `success_criterion_unevaluable`. - -**Existing code to extend:** `recorder.py` (`start/stop/replay` — VFR §A row 5), `session.py` (session lifecycle). Runner imports; no edits to existing files. - ---- - -### Component 4 — License key + trial system (M, greenfield) - -Two-part: offline-verifiable signed key (client-side), license server (Railway). - -**Client (`simdrive/src/specterqa_ios/licensing/`):** `~/.simdrive/license.json` on disk. CLI: `simdrive trial start`, `simdrive license activate <key>`, `simdrive license status`. Programmatic: `check_entitlement() -> Entitlement(tier, expires_at, seats)` or raises. - -**Crypto.** Ed25519 (`pynacl`). Keypair generated once by SyncTek via `SigningKey.generate()`. The public key is a hex constant in `simdrive/src/specterqa_ios/licensing/public_key.py`; the private key lives only as Railway env var `SIMDRIVE_LICENSE_PRIVATE_KEY`. Key format: `base64url({tier,seats,issued_at,expires_at,customer_email}) + "." + base64url(ed25519_signature)`. ~200 chars; self-contained; user pastes into `license activate`. - -**Clock skew.** Compare `expires_at` against `max(time.time(), last_known_server_time)` (cached from each status check) to defeat clock backdating. 7-day offline grace per `01_product_engineering.md §2.2 item 10`. - -**Server (`license_server/main.py`, FastAPI on Railway):** -- `POST /v1/trials {email} → {key, expires_at}` (14-day key, rate-limited 5/IP/day) -- `POST /v1/licenses/activate {stripe_subscription_id, email} → {key, tier, seats}` (called by Stripe webhook) -- `GET /v1/licenses/status?key=<...> → {valid, expires_at, server_time}` (returns server_time for skew) -- SQLite on Railway disk for 1.0; Postgres in 1.1. - -**Entitlement gate.** `simdrive run`/`ci` call `check_entitlement()` first; expiry degrades gracefully (`validate` + `doctor` still work) but raises `LicenseError(code="license_expired")` for run/ci with copyable upgrade URL. - -**Tests:** sig verify (good/bad/expired/wrong-tier), clock-skew, offline-grace, key roundtrip; license-server happy path, trial rate-limit, double-spend on activated key. ≥15 unit + 5 integration. - -**Failure modes:** `license_invalid`, `license_expired`, `license_offline_grace_exhausted`, `license_tier_insufficient`, `trial_already_used`. - -**Existing code to extend:** `errors.py` (new codes). Otherwise a new package; CLI dispatch wired in (5). - ---- - -### Component 5 — WDA bootstrap CLI (M, greenfield) - -**File:** `simdrive/src/specterqa_ios/wda/bootstrap.py` + CLI subcommand. - -**Inputs:** `simdrive bootstrap-device <udid> [--team-id <ABC123>] [--signing-identity "iPhone Developer: ..."] [--wireless]`. - -**Outputs:** WDA bundle installed on device; `~/.simdrive/wda/<udid>.json` (`{wda_bundle_id, install_path, last_built_at, host, port}`); streaming stdout that calls out user-visible Xcode prompts (e.g. `"Trust this developer → Settings → General → VPN & Device Management"`). - -**Algorithm.** (1) verify `xcodebuild`, `idevicepair`, `ios-deploy` (or `xcrun devicectl`); (2) clone WDA at pinned SHA from `simdrive/src/specterqa_ios/wda/PINNED_SHA.txt`; (3) resolve signing identity (`security find-identity -v -p codesigning` + prompt; raise `wda_no_signing_identity` with copyable Apple-Dev-Center link if none); (4) `xcodebuild -workspace WebDriverAgent.xcworkspace -scheme WebDriverAgentRunner -destination "id=<udid>" -derivedDataPath ~/.simdrive/wda/<udid>/derived build-for-testing`; (5) install via `xcrun devicectl device install app`; (6) port discovery from syslog (`idevicesyslog | grep "ServerURLHere"`); (7) persist registry; (8) smoke `GET http://<host>:<port>/status` → `{ready: true}`. - -**Tests.** Unit: SHA-pin resolution, signing-identity parser, syslog port-discovery regex. Integration: gated-local-only against Maurice's iPhone 17 Pro Max (`00008150-00142D540A87801C` per `REAL_DEVICE_FEASIBILITY.md:50`). Ship a `make wda-smoke` target. - -**Failure modes:** `wda_no_signing_identity`, `wda_build_failed` (with xcodebuild log pointer), `wda_install_failed`, `wda_port_discovery_timeout` (15s), `wda_smoke_failed`. - -**Existing code to extend:** `device.py` (VFR §A row 11 — discovery knows about WDA-bootstrapped devices), `errors.py`. - -**Schedule risk.** This is the swamp (§6 risk #1). Allocate 30% slack. If WDA isn't installing on the 17 Pro Max by week 2 day 4, escalate for gated-beta scope cut. - ---- - -### Component 6 — WDA HTTP client wired to act tools (M, extends validated) - -**File:** `simdrive/src/specterqa_ios/wda/client.py`. Internal — called from `act.py` when `session.target == "device"`. - -**Outputs:** taps/swipes/type/press_key dispatched to the device with return shape identical to the simulator path (the MCP tool contract — VFR §A row 4 — is preserved). - -**Client surface.** Thin wrapper around WDA REST: `WdaClient(host, port).open_session(bundle_id)`, `.tap(x, y)` (`POST /session/<id>/wda/tap`), `.swipe(from, to, duration)` (`/wda/dragfromtoforduration`), `.type_text(text)` (`/wda/keys`), `.press_key(name)` (`/wda/pressButton`), `.status()`. - -**Wiring to `act.py`.** Branch on `session.backend` in each of `tool_tap`/`tool_swipe`/`tool_type_text`/`tool_press_key` (currently raises `device_input_unavailable` per `errors.py:124`): -```python -if session.backend == "device": - return wda_client_for(session.udid).tap(x, y) # reads ~/.simdrive/wda/<udid>.json -else: - return _hid_inject_tap(...) # existing path — VFR §A row 3 -``` - -Backend abstraction is the cleanest insertion point per `01_product_engineering.md §2.2 item 1`. - -**Tests.** Unit: client with mocked HTTP; backend dispatch (sim vs device) routing. Integration: gated-local against Maurice's iPhone 17 Pro Max + bootstrapped WDA, `tap_then_observe` smoke. - -**Failure modes:** `wda_session_not_open`, `wda_http_error` (preserves WDA response body in `details`), `wda_unreachable`. - -**Existing code to extend:** `act.py` (VFR §A row 4) — branch on backend; `session.py` — add `backend: Literal["simulator","device"]`; `errors.py`. The `device_input_unavailable` raise is deleted. - ---- - -### Component 7 — Cloud private API (M, greenfield) - -See §3 below for full specification. - ---- - -### Component 8 — `simdrive ci` orchestrator (S, greenfield) - -**File path:** `simdrive/src/specterqa_ios/cli/ci.py`. - -**Inputs:** `simdrive ci [--tag smoke,p0] [--journeys <slug,slug,...>] [--bail] [--junit <path>] [--corpus-out <path>]` - -**Outputs:** -- JUnit XML at `--junit` path (default `.simdrive/runs/junit.xml`) -- Replay corpus directory at `--corpus-out` (default `.simdrive/runs/corpus/`) -- Summary JSON at `.simdrive/runs/ci_summary.json` aggregating every `summary.json` -- Exit code: 0 if all pass, 1 if any fail, 2 on internal error - -**Algorithm:** -1. Discover journeys: `glob('.simdrive/journeys/*.yaml')`; filter by tag/explicit slug. -2. Validate all (component 1) — bail with exit 2 if any invalid. -3. Loop journeys: `RunResult = run_journey(j, p, fresh_session)`; collect. -4. Emit JUnit XML — one `<testcase>` per journey, `<failure>` on outcome != "passed", `<system-out>` = `agent_trace.jsonl` content. -5. Aggregate `ci_summary.json`: pass/fail counts, total LLM cost, total duration, list of failed journeys. - -**Tests:** Integration: 4 mocked journeys (3 pass, 1 fail) → exit 1, JUnit has 1 failure, ci_summary correct. - -**Failure modes:** `ci_no_journeys_matched`, `ci_invalid_journey` (proxies the schema error from component 1). - -**Existing code to extend:** Component 3 (runner). Nothing else. - ---- - -### Component 9 — Production hardening pass (M, audit + extension) - -See §4 below for full specification. - ---- - -## §3. Cloud private API — actual specification - -Greenfield. No existing Cloud code in `simdrive/`. Chairman's framing: private API for first 5 paying customers, **not** a public Cloud product. - -**Hosting: Railway.** Reasons: ForgeOS already runs on Railway (`forgeos-api.synctek.io`) — same DeployAtlas runbook, same secrets pattern. Railway + boto3 against R2 is boring; Workers + R2-binding-from-outside is fiddly at 5-customer scale. Migration path to Workers is open if costs ramp (risk §6.3). Stack: FastAPI + uvicorn on Railway, R2 bucket `simdrive-cloud-prod`, Railway Postgres for metadata. Domain: `cloud.simdrive.dev` (chairman to confirm). - -**Auth: bearer = the signed license key from component (4).** No OAuth, no separate API tokens, no signup form. `Authorization: Bearer <license-key>`. Server verifies Ed25519 signature against the same public key the client uses; extracts `tier`/`seats`/`expires_at`/`customer_email`; rejects expired or invalid. Zero new identity surface. - -**Endpoints (v1):** - -| Method | Path | Purpose | Quota | -|---|---|---|---| -| `POST` | `/v1/recordings` | Multipart upload (recording.yaml + screenshots/*) | Storage check | -| `GET` | `/v1/recordings` | List caller's recordings: `[{id, journey_slug, created_at, size_bytes, screenshot_count}]` | — | -| `GET` | `/v1/recordings/<id>` | Download as tar.gz | Bandwidth | -| `DELETE` | `/v1/recordings/<id>` | Delete | — | -| `GET` | `/v1/storage` | `{used_bytes, quota_bytes, tier}` | — | - -No team/sharing endpoints in 1.0 per chairman's "first 5 customers" framing. Add team scoping in 1.1+. - -**Tiers (starting points; flag for revision):** - -| Tier | Quota | Retention | -|---|---|---| -| Solo ($49/mo) | 100 MB | 90 days | -| Pro ($149/mo) | 1 GB | 1 year | -| Team ($499/mo) | 10 GB | 1 year | -| Enterprise | unlimited | custom | - -100 MB Solo ≈ 30 typical recordings (1-3 MB each: yaml + 8-15 PNGs). Recommendation: bump Solo to 250 MB after first design-partner usage data lands, pre-launch. - -**Privacy / encryption.** Customer screenshots may carry PII. -- **At rest:** R2 server-side encryption (default). -- **In transit:** TLS 1.3 only. -- **Optional client-side encryption:** `simdrive cloud upload --encrypt` derives key from license-key + per-customer salt; server stores ciphertext, cannot decrypt for support. Documented trade-off. -- **Retention:** R2 lifecycle policy auto-deletes past tier window. -- **Delete:** `DELETE /v1/recordings/<id>` immediate; full account purge via support email (manual until 1.1). -- **No content inspection.** Server stores blobs + metadata; never OCRs or analyzes pixels. -- **Privacy policy must be live before first design-partner upload** — gate in §7. - -**Billing in 1.0: none.** Manual entitlement. Stripe webhook on subscription creation calls `/v1/licenses/activate`; tier read from the license at every request. **1.1 needs:** Stripe usage-based metering for over-quota, self-serve account UI, self-serve purge. - -**Effort:** ~300 LOC FastAPI + ~150 LOC R2 client + ~200 LOC tests + ~200 LOC `simdrive cloud upload/list/download` client. **M** (1-2w) realistic given Railway pattern is established. - ---- - -## §4. Hardening pass — what we ship in production-grade 1.0 - -Premium pricing demands stability. The 29-tool MCP surface is solid (VFR §A: 91 unit + 26 live tests passing) but uneven on UX edges. - -**4.1 Error UX audit.** Today's bar from `errors.py`: good ones (recovery copy in message) include `hid_unavailable` (gives `cd simdrive/native && make`), `target_not_found` (lists available targets), `sim_unhealthy` (gives the recovery shell command), `device_input_unavailable` (doc link). Need work: `no_session`, `missing_target`, `invalid_argument`, `recording_not_found`, `replay_drift_halt`, `already_recording`, `not_recording` — these state the problem without the next action. Deliverable: every error code's message ends `"...Recovery: <copyable command or doc link>."`. ~25 sites to update; new error codes from components 1-8 (~20 net new) follow the pattern from day one. Test: `tests/test_error_recovery_copy.py` — for every constructor in `errors.py`, assert the message literal `"Recovery:"`. - -**4.2 Observability.** (a) Replace ad-hoc `print` (~30 sites in `server.py`, `recorder.py`, `act.py`) with `logging.getLogger("simdrive.<module>")`, default level `WARNING`. (b) `SIMDRIVE_DEBUG=1` sets `DEBUG` and emits per-tool latency to `~/.simdrive/debug.log`. (c) Every `tool_*` writes `{tool_name, duration_ms, started_at}` into its sidecar JSON next to the primary return shape. - -**4.3 Performance benchmarks** (P50 / P95 targets): - -| Operation | P50 | P95 | -|---|---|---| -| `tool_observe` (sim, 1024×768) | < 600 ms | < 1.2 s | -| `tool_tap` (sim) | < 80 ms | < 150 ms | -| `tool_type_text` (sim, 20 chars) | < 1.5 s | < 2.5 s | -| Journey replay step | < 800 ms | < 1.6 s | -| Journey runner step (incl. Claude call) | < 4 s | < 8 s | - -`tests/perf/test_benchmarks.py` runs against TestKitApp on a CI-mac runner; fails on >25% P95 regression vs `tests/perf/baseline.json`. Per-tool latency comes from sidecar JSON (4.2) for free. - -**4.4 Edge cases to harden** — each with a deliberate test: - -| Edge case | Hardening | Error code | -|---|---|---| -| Sim not booted at session_start | Auto-boot when `--auto-boot` (default true) | `sim_not_booted` | -| App crashed mid-journey | Runner polls `tool_crashes`; outcome="crashed" with `.ips` path | existing | -| Screenshot capture failed | 3× retry, 200 ms backoff | `screenshot_failed` | -| OCR returned empty text[] | Warn in observe sidecar; don't fail | `observe_text_empty` (warn) | -| Network down during Claude call | 3× exponential backoff | `claude_call_failed` | -| `simctl` returns unexpected JSON | Schema-validate; raise with version + raw payload | `simctl_schema_drift` | -| Two journeys race same sim | Per-udid filelock at `~/.simdrive/locks/<udid>.lock` (`fcntl.flock`); `--lock-mode wait\|fail` | `sim_busy` | -| WDA process died during journey | 5s heartbeat; auto-restart once; then escalate | `wda_unreachable` | - -**4.5 Documentation.** README v2 leads with `simdrive run --journey ...` (port from `02_brand_marketing.md`); per-tool reference docs auto-generated via `scripts/gen_tool_docs.py` from `server.py:_TOOLS` to `docs/tools/<tool>.md` (CI fails if regen-diff is uncommitted); 5 recipes in `docs/recipes/01_first_journey.md..05_debugging_a_failed_journey.md` covering first-journey, CI integration, real-device WDA, Cloud upload, and reading `agent_trace.jsonl`; `docs/TROUBLESHOOTING.md` auto-generated from `errors.py` with every code's recovery copy + worked example. - ---- - -## §5. Parallel-engineer plan (10-week calendar) - -Two engineers, ten weeks, mid-July 1.0 cut. Tracks: A = runner/CLI/docs, B = device/license/cloud. - -| Week | Engineer A | Engineer B | Integration milestone | -|---|---|---|---| -| 1 | (1) Journey schema + validator; (2) Persona schema + validator; CLI scaffold (`simdrive validate`) | (4a) License client crypto + `~/.simdrive/license.json` format; `simdrive trial start` happy path | — | -| 2 | (3a) Journey runner skeleton — `run_journey()` calling `tool_observe` once, returning stub `RunResult` | (5) WDA bootstrap CLI — clone, build, install on Maurice's 17 Pro Max | **Palace check-in #1** — review locked journey/persona schemas before wider use | -| 3 | (3b) Prompt assembly, Claude vision call, decision dispatch | (6) WDA HTTP client + backend dispatch in `act.py` | — | -| 4 | (3c) Success-criteria evaluators; budget enforcement; recording integration | (4b) License server (Railway FastAPI) — `/v1/trials`, `/v1/licenses/activate`, `/v1/licenses/status` | — | -| 5 | (3d) Journey-level integration tests against TestKitApp | (7a) Cloud API skeleton on Railway + R2 bucket, `POST /v1/recordings` | **End-to-end #1: journey runner works against simulator AND device. Palace check-in #2 — drive `sign_in_first_page` from journey YAML.** | -| 6 | (8) `simdrive ci` orchestrator; JUnit XML emitter; CHANGELOG ongoing | (7b) Cloud API complete: `GET/DELETE`, quotas, license-bearer auth | — | -| 7 | (9.1) Error UX audit — all 25+ error sites get Recovery: copy | (9.4) Cloud privacy/encryption; lifecycle policy; rate limiting; **Privacy policy drafted (block on legal review)** | **Palace check-in #3 — first design-partner Cloud upload** | -| 8 | (9.5) README v2; per-tool reference auto-gen; 5 recipes | (5b) WDA gated-beta polish — `simdrive doctor` reports WDA, banner copy, 2 pre-recorded GIFs | — | -| 9 | (9.4) Edge-case hardening — sim-not-booted, OCR-empty, screenshot retry, sim filelock | (9.2/9.3) Observability rollout (logging, SIMDRIVE_DEBUG, sidecar latency); perf benchmarks + CI gate | **Palace check-in #4 — full PR-gating use against Palace iOS** | -| 10 | Launch readiness: 1.0 CHANGELOG, version bump in `__init__.py`, TestPyPI publish dress rehearsal, troubleshooting guide auto-gen | Production deploys: license server on Railway prod, Cloud API on Railway prod, R2 bucket prod policy | **Palace check-in #5 — sign-off + 1.0 PyPI publish** | - -**Integration syncs.** 15-min M/W/F standups weeks 3-9. Protocol convention (§6 risk #5) avoids `server.py` merge conflicts. Critical handoffs: week 2→3 B gives A the WDA-bootstrap output spec; week 3→4 A gives B the `RunResult` schema; week 5→6 both freeze journey YAML schema after Palace #2; week 7→8 B hands A the `simdrive cloud upload` client SDK for recipes/04. - -**Palace check-ins** are the critical-path quality gate — written feedback at the end of every odd week, integrated before the next milestone. Same loop that worked v0.2.0a1 → v0.3.0a3 (VFR §B: three dogfood reports across 5 days closed all feedback). - ---- - -## §6. Risks and mitigations - -Five highest, build-specific: - -**Risk 1 — WDA provisioning UX eats >5 sessions.** `01_product_engineering.md §2.5 risk #1` + `REAL_DEVICE_FEASIBILITY.md:34` agree the 3-5 session estimate is the *code path* only — signing-identity discovery, dev-team selection, cert-trust prompts, DDI mounting on top. Mitigation: ship WDA real-device as **gated beta** — license flag `realdevice: beta`, banner in `simdrive doctor`, one-pager on known issues, Maurice's iPhone 17 Pro Max as floor. Preserves premium-pricing story without holding launch hostage. Un-flagging is a 1.1 milestone. - -**Risk 2 — Journey YAML schema needs breaking change in 1.1.** Only "stable" user-facing surface in 1.0; getting persona fields or success-criterion types wrong breaks every customer's files in 1.1. Mitigation: `schema_version: 1` reserved through 1.x; lock only after Palace check-in #1 (week 2) AND check-in #2 (week 5); recruit one more design partner (chairman to nominate) so sample size > 1. - -**Risk 3 — Cloud R2 costs balloon at first-customer scale.** 5 customers × 10 MB/journey × 100 journeys/month = 5 GB/month nominal, but a Pro customer running CI on every PR could push 50 GB/month. R2 egress is free; storage scales linearly. Mitigation: per-tier quotas enforced server-side (component 7); lifecycle deletion (90d Solo, 1y Pro/Team); quota-near-cap email at 80%; billing review at first $50/mo R2 invoice; revise quotas if ramp outpaces revenue. - -**Risk 4 — License system gets reverse-engineered.** Public key in client; tampered binary skipping the check is trivial for a motivated pirate (asymmetric crypto prevents forgery, not patching). Mitigation: offline-first signed Ed25519 + weekly online refresh; accept the patched-binary attack vector — buyers we lose to piracy are not the $149/mo buyers. Revisit at 100 paying customers. - -**Risk 5 — Two-engineer parallelism creates merge conflicts on `server.py` (1369 lines; centralized `_TOOLS` registry).** Mitigation: **protocol convention** — every new 1.0 MCP tool lands in its own module under `simdrive/src/specterqa_ios/tools/` (e.g., `tools/journey_run.py`); `server.py:_TOOLS` registration is a one-line import per tool. Single-line list-append edits resolve cleanly. Daily integration syncs M/W/F; PRs require both engineers' review. - ---- - -## §7. The "definition of done" for 1.0 - -Specific, file-path-citable checklist. 1.0 ships when every box is checked: - -- [ ] Components 1-8 shipped + tested per §2 -- [ ] All 91 existing unit + 26 live E2E still green; plus: ≥10 unit (component 1), ≥8 unit (component 2), ≥30 integration (component 3, per `01_product_engineering.md §2.2 item 14`), ≥15 unit + 5 integration (component 4), ≥10 unit + 1 device-smoke (components 5+6), ≥20 unit + 5 integration (component 7), ≥3 integration (component 8) -- [ ] CI perf-regression test (§4.3) and error-recovery-copy test (§4.1) green -- [ ] Every error code in `errors.py` has `"Recovery:"` copy -- [ ] `SIMDRIVE_DEBUG=1` debug mode + structured logging shipped (§4.2) -- [ ] License/trial: `simdrive trial start` issues 14-day key against test-Stripe; `license activate` binds against production-Stripe; expiry fails closed; 7-day offline grace works -- [ ] Cloud: first design-partner upload via `simdrive cloud upload` end-to-end; privacy policy reviewed and live at `simdrive.dev/privacy` before that upload; quotas enforced; tier gating verified -- [ ] WDA real-device input works for Maurice's iPhone 17 Pro Max (`00008150-00142D540A87801C`) — gated-beta floor; `simdrive doctor` reports WDA status; 2 pre-recorded GIFs (USB + wireless) ship in `docs/`; `realdevice: beta` license flag gates correctly -- [ ] Docs: README v2 (per `02_brand_marketing.md`) leads with `simdrive run`; per-tool reference auto-generated from `_TOOLS`; 5 recipes in `docs/recipes/01..05`; `docs/TROUBLESHOOTING.md` auto-generated from `errors.py` -- [ ] CI perf benchmarks green at <25% P95 regression vs `tests/perf/baseline.json` -- [ ] CHANGELOG 1.0 entry written in the project's voice (terse, imperative, file-path-cited — matching v0.2.0a1 / v0.3.0a2) -- [ ] `__version__ = "1.0.0"` in `simdrive/src/specterqa_ios/__init__.py` -- [ ] TestPyPI dress rehearsal — clean-venv install, run `simdrive run --journey sign_in_first_page` against Palace iOS, `outcome: passed` -- [ ] Palace sign-off (check-in #5) — written maintainer report confirming production-ready, same shape as `~/Downloads/dogfood.rtf` v0.3.0a2 - -When this checklist is green, `simdrive 1.0.0` ships to PyPI. Anything not green at cut date → 1.0-RC, not 1.0. - ---- - -*This document is the engineering plan for Workstream A. Workstream B (post-1.0 moat features) and Workstream C (test app spec) feed into the same BIS round but are scoped separately.* diff --git a/simdrive/docs/RD_SIMDRIVE_1.0/06_world_class_moat_features.md b/simdrive/docs/RD_SIMDRIVE_1.0/06_world_class_moat_features.md deleted file mode 100644 index 62e24f6..0000000 --- a/simdrive/docs/RD_SIMDRIVE_1.0/06_world_class_moat_features.md +++ /dev/null @@ -1,232 +0,0 @@ -# 06 — World-Class Moat Features (Post-1.0 Roadmap) - -**Author:** WorldClassMoatAtlas (Workstream B, SimDrive 1.0 BIS expansion round) -**Date:** 2026-04-29 -**For:** SimDrive 1.x → 2.0 product roadmap -**Audience:** Maurice Carrier (Chairman), ProductAtlas, GTMPricingAtlas, Workstream A (engineering 1.0), Workstream C (test app) -**Status:** Draft for synthesis review - -> **Scope discipline.** Workstream A is shipping 1.0 (the 29-tool MCP + record/replay + license/trial + WDA gated beta + Cloud private API). Workstream C is the reference test app. **This memo is explicitly the 12-month-after-1.0 roadmap** — what turns SimDrive from "a premium iOS testing tool" into "the iOS testing tool agents reach for first AND that customers can't switch away from after a year." - ---- - -## §1. Moat thesis - -A premium iOS testing tool defends against free Maestro and free XCUITest *not* by feature count but by accumulating switching cost the day a paying customer's first journey enters our Cloud. SimDrive's unique starting position — MCP-native + iOS-deep + 29-tool composable surface + Palace dogfood receipts — lets it own the seam where "agent-driven test authoring" meets "iOS-platform-specific signal that XCUITest can't expose and Anthropic computer-use won't bother to build." The headline 1.x features must compound that switching cost on three axes simultaneously: corpus (replay archive grows daily), signal (perf/a11y/network telemetry only SimDrive captures), and reproducibility (production crashes round-trip into replays no other tool can author). - ---- - -## §2. The world-class feature inventory - -Each row scored on **moat depth** (1–5, durability against fast-followers), **build effort** (S = ≤2 wks, M = 2–6 wks, L = 6–12 wks, XL = 3+ months), **revenue impact** (1–5, ability to anchor a tier or expand ACV), **competitive uniqueness** (1–5, how empty the space is right now). Citations are specific: "Maestro doesn't do X because Y." - -### A. AI test authoring - -| # | Feature | Who does it now | Moat | Eff | Rev | Uniq | -|---|---|---|---|---|---|---| -| A1 | **Crash-report → journey.** Parse `.ips` (UIKit responder chain + symbolicated VC stack), synthesize a candidate replay walking to the crash site. | **Nobody.** Sentry/Bugsnag display crashes, don't author replays. Maestro has no `.ips` ingest. Anthropic computer-use is generalist. | **5** | L | 4 | **5** | -| A2 | **App-screen crawl → suggested journeys.** BFS-walk reachable screens, emit ranked candidate journeys to seed the first 20 PR-gates. | Maestro Studio is record-and-export, not autonomous crawl. Firebase App Crawler is Android-only. | 4 | M | 4 | 4 | -| A3 | **Anomaly detection across replays.** Diff each replay vs rolling N-build baseline: OCR text drift, layout shifts, perf regressions outside SSIM mask. | Cypress Cloud has binary flake-detection. We have richer signal — SSIM masks + perf + OCR per step; Maestro captures none of those three. | 4 | M | 4 | 4 | - -### B. Visual + performance regression - -| # | Feature | Who does it now | Moat | Eff | Rev | Uniq | -|---|---|---|---|---|---|---| -| B1 | **Self-healing SSIM thresholds** per region from rolling drift. | Nobody. Maestro replay is binary screenshot match. | 3 | M | 3 | 4 | -| B2 | **Perf budgets per journey** (`cpu_max`/`rss_max`/`time_max`). | XCTest MetricKit needs the XCTest scaffolding our customers left. Maestro captures no perf. | 4 | S | 4 | 4 | -| B3 | **Cross-build perf trend dashboards** (Grafana-style, per-journey). | Datadog Synthetics is web-only. BrowserStack does device CPU. Nobody charts iOS-sim perf-per-journey. | 4 | M | 4 | 4 | -| B4 | **App-launch perf benchmarking** (cold/warm/first-render). | XCUI has launch metrics; nobody surfaces them in a journey dashboard. | 3 | M | 3 | 3 | -| B5 | **Memory leak detection** via repeated journey + RSS-trend gating. | XCTest leaks-instrument is real-device + ceremony-heavy. We get it on sim. | 4 | S | 3 | 4 | -| B6 | **FPS / scroll smoothness** via CADisplayLink sampling through `simdrive-input` HID. | XCUI XCTOSSignpost needs scaffolding. Maestro can't measure. | 3 | L | 3 | 4 | - -### C. Cross-app + cross-platform - -| # | Feature | Who does it now | Moat | Eff | Rev | Uniq | -|---|---|---|---|---|---|---| -| C1 | **Multi-app journey support** (share-to-Safari OAuth, deep-link returns). | Maestro deep-link support is limited. BrowserStack sandboxes per app. | 4 | L | 4 | 4 | -| C2 | **Network mocking + replay** integrated with journey runner. | Mockoon/Charles offline; Detox mocks; Maestro `--mock` is limited. Integration with journey runner is the moat. | 4 | L | 4 | 3 | -| C3 | **Time/state simulation** (battery/network/locale/tz/date). | `simctl status_bar` partial; Apple's tooling is awkward. We package it. | 3 | S | 3 | 3 | -| C4 | **Push notification simulation** mid-journey. | `simctl push` exists; small wrapper, high-leverage. | 3 | S | 3 | 3 | - -### D. Accessibility - -| # | Feature | Who does it now | Moat | Eff | Rev | Uniq | -|---|---|---|---|---|---|---| -| D1 | **Vision-based a11y audit** (contrast on OCR boxes, focus order, screen-reader coherence). | XCTest a11y audit is AX-tree only. Stark/Axe are static design. | 4 | L | 3 | 5 | -| D2 | **VoiceOver journey replay** (run with VO active, validate spoken-text sequence). | Accessibility Inspector is manual. No CI integration in market. | 4 | L | 3 | 5 | - -### E. CI / orchestration - -| # | Feature | Who does it now | Moat | Eff | Rev | Uniq | -|---|---|---|---|---|---|---| -| E1 | **Parallel journey execution** with sim-aware scheduler. | BrowserStack scales by device count ($$$). Maestro Cloud parallelizes. Differentiator: local-first + Cloud-overflow. | 3 | M | 4 | 2 | -| E2 | **Flaky journey isolation** with concrete remediation suggestions. | Cypress Cloud has flake-detection. Ours pairs detection with stable_id/SSIM remediation. | 4 | M | 4 | 3 | -| E3 | **PR-gate GitHub Action** posting annotated diffs as comments. | Maestro has Action templates. We differentiate via Cloud-comment artifact (corpus + perf trend link). | 3 | S | 3 | 2 | -| E4 | **Test data factories** (login/library/seed states). | Detox has factories; Maestro is YAML-only. We bridge. | 3 | M | 3 | 3 | - -### F. Cross-team collaboration (the real lock-in) - -| # | Feature | Who does it now | Moat | Eff | Rev | Uniq | -|---|---|---|---|---|---|---| -| F1 | **Replay corpus management** (multi-tenant, RBAC, search). | Cypress Cloud for web. **Nobody for iOS.** | **5** | XL | **5** | **5** | -| F2 | **Replay diff** (steps + SSIM-mask + threshold edits). | Nobody. Git diff on YAML is unreadable. | 4 | M | 4 | **5** | -| F3 | **Annotated replays** (per-step comments, PR/bug links). | Cypress Cloud has it for web; we bring to mobile. | 4 | M | 3 | 4 | -| F4 | **Branch/merge for journeys** (git-style with conflict UI). | Nobody. Tests-as-code is norm; tests-as-branchable-artifact is not. | 4 | L | 3 | **5** | - -### G. Reproducibility from production - -| # | Feature | Who does it now | Moat | Eff | Rev | Uniq | -|---|---|---|---|---|---|---| -| G1 | **Crash corpus → journey** (cluster Sentry/Bugsnag/Crashlytics; "top-10 crashes covered" report). | Nobody bridges crash-display to test-authoring. | **5** | XL | **5** | **5** | -| G2 | **Bug-report NL → journey** ("user reports login fails on iPad in dark mode" → MCP-driven repro). | Maestro can't compose this naturally — not MCP-native. We can. | 4 | M | 4 | **5** | -| G3 | **Production session capture SDK** (opt-in, anonymized, replay locally). | LogRocket/FullStory for web. **Mobile has no equivalent.** Sentry mobile session replay is limited. | **5** | XL | **5** | **5** | - -### H. Compliance + observability - -| # | Feature | Who does it now | Moat | Eff | Rev | Uniq | -|---|---|---|---|---|---|---| -| H1 | **SOC 2 signed action ledger** (Ed25519 per replay; ForgeOS pattern). | BrowserStack/Sauce have platform SOC 2; nobody offers per-replay signed evidence. | 4 | M | 4 | 4 | -| H2 | **PII scrubbing in screenshots** (emails/SSNs/CCs auto-redacted). | Sentry blurs PII heuristic. LogRocket rules. Mobile-test tooling has nothing. | 4 | M | 3 | 4 | -| H3 | **GDPR export/delete.** | Standard SaaS table-stakes. | 2 | S | 2 | 1 | - -### I. Marketplace - -| # | Feature | Who does it now | Moat | Eff | Rev | Uniq | -|---|---|---|---|---|---|---| -| I1 | **Public journey corpus** (login/OAuth/IAP/SiwA, MIT, fork-able). | Maestro examples repo small. Cypress demo app. Nobody has a versioned mobile journey marketplace. | 3 | M | 2 | 3 | -| I2 | **App-specific test packs** (Slack/Notion). **IP risk: derivative-works + ToS.** | Nobody for legal reasons. We shouldn't be first either. | 2 | M | 2 | 2 | - ---- - -## §3. The headline 5 — the actual 1.x → 2.0 roadmap - -From the inventory above, the five features that maximize **moat × revenue × shipping-feasibility within 12 months of 1.0**, in shipping order: - -### 3.1 — **F1 Replay Corpus Cloud** — v1.1 (8–12 weeks post-1.0) -Score: moat 5, effort XL, rev 5, unique 5. Builds on the 1.0 Cloud private-API foundation Workstream A is shipping; F1 turns that into a multi-tenant corpus with RBAC + search. -**Moat:** switching cost measured in *months of replay-corpus migration work* once a customer has 90 days of data. 500 replays cannot move to Maestro Cloud without rebuilding against Maestro's incompatible YAML. Cypress Cloud is the proof: web teams stay locked in for years on corpus alone. -**Hard to replicate:** Maestro can ship an MCP wrapper in 3 weeks (per `04_competitive_risk.md` §2.2); they **cannot** ship multi-tenant replay corpus in 3 weeks — that's a 6+ month build with the same hosting/security/billing/RBAC overhead we faced. The day F1 ships, we hold a 6-month head start. -**Interactions:** F1 is the substrate for F2/F3/F4, A3, B3, H1, H2. The spine of the post-1.0 product. - -### 3.2 — **B2 + B3 Perf Budgets + Trend Dashboards** — v1.2 (12–18 weeks post-1.0) -Score: B2 (S/4/4) + B3 (M/4/4). Both depend on the 1.0 `perf` tool Palace validated (`00a_VALIDATED_FACTS.md` §A row 8) and on F1 for storage. -**Moat:** turns SimDrive from "test runner" into "PR-gate signal source." Once a customer's CI fails a PR because RSS exceeded budget by 12 MB, removing SimDrive means losing that signal. XCTest's MetricKit gives the same numbers — but only if you re-introduce the XCTest scaffolding our customers explicitly left behind. Maestro does not capture perf at all. This is a moat against both XCTest *and* Maestro simultaneously. -**Hard to replicate:** the data is easy; the *taste* of which thresholds fail PRs without flake is months of dogfood. Palace iOS is the corpus we need; competitors don't have it. -**Interactions:** feeds A3 anomaly detection; unlocks Team-tier upsell vs Solo. - -### 3.3 — **A1 Crash-Report → Journey** — v1.3 (18–26 weeks post-1.0) -Score: moat 5, effort L, rev 4, unique 5. The single most differentiated feature in the inventory. -**Moat:** nobody ships this — mobile or web. Sentry/Bugsnag/Crashlytics *display* crashes; they don't author replays. Maestro doesn't ingest `.ips`. Anthropic computer-use is generalist. Requires combining `.ips` symbolication + UIKit responder-chain heuristics + journey synthesis on our YAML + MCP composition to drive verification. We're positioned because we own the format, the MCP surface, and the verification telemetry. -**Hard to replicate:** Maestro needs 4–6 months *after* deciding it matters — and they probably won't decide until we make it a marketing centerpiece. By then F1+B2+B3 are entrenching customers. -**Interactions:** every A1 use writes a journey *into* F1 corpus → grows switching cost → grows moat. Compounding. - -### 3.4 — **G3 Production Session Capture (opt-in SDK)** — v1.4 → v2.0 (26–40 weeks post-1.0) -Score: moat 5, effort XL, rev 5, unique 5. The largest bet on the list. -**Moat:** LogRocket and FullStory are billion-dollar companies on this pattern for web. Mobile has no equivalent — Sentry's mobile session replay is limited; nobody round-trips a captured session into a local-repro journey. Category-defining. -**Hard to replicate:** not a 6-week build for anyone. Requires an iOS SDK (signed, light, battery-conscious, App-Store-policy-compliant) + backend ingest + PII redaction + journey-synthesis (leveraging A1) + 6–9 months production hardening. Maestro has no customer-app SDK pattern. Anthropic won't build something this iOS-product-deep. A YC clone burns 12 months. -**Interactions:** unlocks Platform tier ($50K/yr+, §6). Closes the loop: production crash → A1 journey → F1 corpus → B2 budget → blocks the regression. SimDrive becomes the iOS production-quality loop, not just a test runner. -**Risk:** could slip to v2.x without invalidating the thesis — but every quarter it slips, Sentry could ship mobile session-replay properly and steal the air. - -### 3.5 — **H1 SOC 2 Signed Action Ledger** — v1.2 (12–18 weeks post-1.0) -Score: moat 4, effort M, rev 4, unique 4. The unsexy gating feature that unlocks regulated-industry buyers. -**Moat:** SOC 2 is a yes/no gate for enterprise procurement. Maestro and Anthropic don't have it for testing-replay. Without H1, SimDrive caps at the agentic-iOS-developer niche (`04_competitive_risk.md` §2.4); with H1, Business + Platform become buyable by orgs that have a CISO. The Ed25519 signing pattern is already battle-tested in our ForgeOS stack — months of compliance paperwork, not novel engineering. -**Hard to replicate:** the *audit certification* takes 6 months + ~$25K Type 1 (Type 2 follows another 6). A competitor not started by month 6 cannot catch up by month 18. -**Interactions:** every F1 replay carries an H1 signature → tampering detectable. Without H1, no fintech buys G3. - -**Shipping order summary:** - -| Order | Feature | Version | Weeks post-1.0 | -|---|---|---|---| -| 1 | F1 Replay Corpus Cloud | 1.1 | 8–12 | -| 2 | B2 + B3 Perf Budgets + Trends | 1.2 | 12–18 | -| 3 | H1 SOC 2 Signed Ledger | 1.2 | 12–18 (parallel build, sequential cert) | -| 4 | A1 Crash-Report → Journey | 1.3 | 18–26 | -| 5 | G3 Production Session Capture SDK | 1.4 → 2.0 | 26–40 | - ---- - -## §4. Anti-moat — what NOT to build - -The skeptic's view. Seven features that *look* like moat but aren't: - -1. **I2 App-Specific Test Packs (Slack / Notion / etc.).** Looks like marketplace network effect; is actually a **derivative-works lawsuit waiting**. Slack's ToS forbids automated UI scraping; Notion's similar. We'd be the legal target while Maestro shrugs. **Cut from inventory entirely.** -2. **"AI-powered test naming."** A Claude wrapper that names tests. Commodity. Anyone with an Anthropic API key ships this in an afternoon. Customers will not pay extra for it. The feature *exists* but it's a freebie, not a tier-anchor. -3. **Free-tier journey corpus hosting.** Tempting (drives adoption!) but **erodes F1's switching cost** — the entire moat is paid corpus. A free tier with corpus storage gives users a way to walk away. Free tier should cap corpus at 5 replays, no Cloud sync. -4. **Native macOS UI app.** Cross-platform desktop UI is months of work for a feature whose buyer (iOS engineer at terminal) doesn't want a desktop app. Cypress shipped a Cypress.app GUI; their power users still drive the CLI. We are CLI + MCP-native; stay there. -5. **Apple Vision Pro test support.** Vision Pro's test framework is XCTest-only and Apple owns the platform. **Apple wins this fight.** We could build it; we'd lose the 4 weeks to a feature ~12 customers care about. -6. **Cross-platform Android driver.** Maestro's home turf. We would be a worse Maestro-on-Android for years. **Sidestep per `04_competitive_risk.md` §7.1.** Customers with Android use Maestro and supplement with us on iOS — that's the right shape. -7. **A "computer-use compatibility shim."** Ship our MCP tools as compatible with Anthropic's computer-use surface so Claude can drive either. Looks clever; **actually invites Anthropic to absorb us before we want to be absorbed.** Stay our own surface, stay listed in MCP registry, lean into iOS-deep specifics that don't generalize. The acquisition path (per `04_competitive_risk.md` §3.2) requires that we be *complementary*, not *swappable*. - ---- - -## §5. The 24-month moat thesis - -By **May 2028**, SimDrive has **350–500 paying customers** (median 3–4 seats, median ACV ~$2,400/yr, weighted to Team + Business per `04_competitive_risk.md` §6). Aggregate replay corpus reaches **~12M stored replays** (median customer ~30K replays from 18 months of 1–2 PR-gate runs/day across 8–12 journeys). Daily execution volume ~**80–120K runs/day**. The public marketplace catalogs **~400 vetted reusable flows**. Switching cost for a Business-tier customer is **2–4 calendar months** of corpus migration work — corpus alone, before retraining team workflows. - -| Quarter | Milestone | Moat event | -|---|---|---| -| 2026 Q3 (1.0) | Launch. Palace + 4–6 design partners. ~$5K MRR. | Trademark filed; trial funnel measured. | -| 2026 Q4 (1.1) | F1 Replay Corpus Cloud. First 50 paying customers storing replays. | **The corpus clock starts.** | -| 2027 Q1 (1.2) | B2/B3 perf + H1 SOC 2 ledger. ~$25K MRR. | PR-gate dependency forms; first regulated pilot. | -| 2027 Q2 (1.3) | A1 Crash → Journey. | Differentiation visible to Anthropic BD; acquisition window open. | -| 2027 Q3 | SOC 2 Type 2 cert. ~$60K MRR. First Platform tier. | Regulated TAM unlocked. | -| 2027 Q4 (1.4) | G3 SDK alpha. 3–5 lighthouse integrations. | Category-defining bet placed. | -| 2028 Q1 (1.5–2.0) | G3 GA + F4 branch/merge. Marketplace 200+ flows. | Switching cost crosses 2-month threshold. | -| 2028 Q2 (2.0) | ~$100K+ MRR. | Thesis validated, or acquisition by Anthropic per `04_competitive_risk.md` §3.2. | - -The thesis stands or falls on F1 shipping in Q4 2026. **Every month F1 slips, the corpus clock doesn't start.** Corpus is the only moat that compounds. - ---- - -## §6. Pricing implications - -The features above unlock new tier gates and one new tier above Enterprise. Anchored to the `04_competitive_risk.md` §6 pricing table (Solo $49 / Team $149 / Business $499 / Enterprise sales-led). - -### 6.1 Tier gates - -| Feature | Solo $49 | Team $149 | Business $499 | Enterprise | **Platform $50K/yr (NEW)** | -|---|---|---|---|---|---| -| 1.0 surface (29 tools, record/replay, perf, real-device read-only) | ✓ | ✓ | ✓ | ✓ | ✓ | -| F1 Replay corpus | 5-cap | 100/seat | 1,000/seat | unlimited | unlimited | -| B2 Perf budgets | ✓ | ✓ | ✓ | ✓ | ✓ | -| B3 Perf trends | — | 30 builds | 365 builds | unlimited | unlimited | -| F2/F3 Diff + annotate | — | ✓ | ✓ | ✓ | ✓ | -| F4 Branch/merge | — | — | ✓ | ✓ | ✓ | -| A1 Crash → journey | — | — | ✓ | ✓ | ✓ | -| A2 Crawl-suggested journeys | — | ✓ | ✓ | ✓ | ✓ | -| A3 Anomaly detection | — | — | ✓ | ✓ | ✓ | -| H1 SOC 2 signed ledger | — | — | ✓ | ✓ | ✓ | -| H2 PII scrubbing | — | — | ✓ | ✓ | ✓ | -| WDA real-device input | — | — | ✓ | ✓ | ✓ | -| E1 Parallel sims | 1 | 4 | 16 | unlimited | unlimited | -| E2 Flake isolation | — | ✓ | ✓ | ✓ | ✓ | -| E3 PR-gate Action + H3 GDPR | ✓ free | ✓ | ✓ | ✓ | ✓ | -| **G3 Production session SDK** | — | — | — | — | **✓ exclusive at launch** | -| BYO-storage / RBAC+SSO | — | — | basic | ✓ | ✓ | -| On-prem / VPC + dedicated AE | — | — | — | — | ✓ | - -### 6.2 The new tier above Enterprise — **Platform** ($50K/yr+) - -Justified by **G3 production session capture** + on-prem/VPC deploy + dedicated account engineer. Target buyer: a regulated mobile-first org (fintech, health-tech, banking, DoD-adjacent) with 50+ iOS engineers, where the production session-capture SDK is too sensitive to ship through someone else's cloud. **Pricing reference:** Datadog enterprise contracts at $80–250K/yr; LogRocket at $30–150K/yr depending on seats and session volume. $50K/yr is the floor for "we run an iOS SDK in your customer-facing app" trust level. - -### 6.3 Free across all tiers (the freebies that drive adoption) - -- **E3 PR-gate GitHub Action.** Distribution mechanism. Maestro does this; we must too. -- **I1 Public journey corpus** (the marketplace itself, fork-able templates). MIT-licensed. Adoption + agentic-discovery (per `04_competitive_risk.md` §7.2 cookbook play). -- **H3 GDPR export/delete.** Table stakes. Charging for this is reputation suicide. - -### 6.4 The pricing thesis check - -The tier gates above hold the `04_competitive_risk.md` §6 pricing while unlocking a 5–10× ACV expansion path (Platform tier). **No tier gates feature that an early Solo customer would have expected to be free** — Solo customers get the full 1.0 surface plus a 5-replay corpus cap, the GitHub Action, marketplace forks, and basic perf budgets. Tier-up is *additive value*, not *removed value*. - ---- - -## §7. Bottom line - -The five features in §3 are not a wishlist — they are the chain of compounding switching cost. **F1 starts the corpus clock; B2/B3/H1 anchor the PR-gate dependency; A1 differentiates publicly; G3 places the category-defining bet.** Cut F1 and the rest are just better-than-Maestro features that Maestro will copy. Ship F1 on schedule and every month after is moat compounding. - -The single biggest moat-defining bet is **G3 Production Session Capture SDK**. It's the largest build, the highest risk to slip, and the only feature on the list that could turn SimDrive into a $50K/yr Platform-tier company instead of a $499/mo SaaS niche tool. **Either we're the LogRocket of mobile QA by month 24, or we're a profitable but small specialty SaaS — both are honest outcomes of this roadmap, and the choice point is whether G3 ships.** - ---- - -*End of world-class moat features memo. Hand-off: ProductAtlas (1.x roadmap sequencing); GTMPricingAtlas (Platform tier pricing validation); Workstream A (1.0 must not block F1 design — Cloud private API needs to anticipate multi-tenant from the start); Workstream C (test app must include crash-emitting flows for A1 dogfood).* diff --git a/simdrive/docs/RD_SIMDRIVE_1.0/07_test_app_spec.md b/simdrive/docs/RD_SIMDRIVE_1.0/07_test_app_spec.md deleted file mode 100644 index 7f0be5c..0000000 --- a/simdrive/docs/RD_SIMDRIVE_1.0/07_test_app_spec.md +++ /dev/null @@ -1,196 +0,0 @@ -# SimDrive 1.0 — Test App Specification (LapsApp) - -**Author:** TestAppArchitectureAtlas (Workstream C, BIS expansion round) -**Date:** 2026-04-29 -**Status:** R&D memo -**Companions:** `05_engineering_expansion.md` (Workstream A — 1.0 build), `06_world_class_moat_features.md` (Workstream B — post-1.0 moat) -**Scope:** the dogfood iOS app that SimDrive will be driven against, not SimDrive itself - ---- - -# §1. The premise - -SimDrive needs a canonical, feature-rich iOS app to drive against — one that exercises every capability listed in `00a_VALIDATED_FACTS.md §A` and every roadmap surface from Workstreams A and B against realistic flows. This app is for three audiences at once: (a) the SimDrive engineering loop, where journey replays gate every PR; (b) the prospective customer, who watches Claude drive it through OAuth and a reading flow in a 90-second demo; (c) the open-source iOS engineer, who clones the repo and reproduces our journey corpus on their own laptop in under fifteen minutes. The app exists because today's TestKitApp cannot serve any of those audiences without contortion. - -**Positioning vs the existing TestKitApp** (`/Users/atlas/Documents/specterqa-ios/TestKitApp/`): - -- **TestKitApp is a flat unit-test fixture.** Five SwiftUI tabs (Form, List, Nav, Stress, Palace) hand-tuned to expose the specific failure modes simdrive's 91 unit tests probe — soft-keyboard focus, debounced input, layout shifts that rebucket `stable_id`. It is excellent at what it is, which is a deterministic diagnostic harness for a single engineer. It will continue to serve that role and is not deprecated by this spec. -- **The new app is a feature-rich consumer-grade iOS app**, designed to look and behave like something a real customer ships, with an onboarding flow, OAuth, a tab bar with five primary surfaces, push permissions, a reader, search, settings, and a deliberate crash trigger. It exercises every SimDrive capability through the lens of *a real journey through a real app*, not through the lens of *a fixture screen designed to expose one feature at a time*. - -**Chosen name: `LapsApp`** — a fitness/run-tracking app, MIT-licensed, shipped under `github.com/SyncTek-LLC/LapsApp`. Reasoning: (a) "Laps" maps cleanly to iterative observation cycles that SimDrive itself runs (every replay is a lap around the journey), giving a thematic hook for marketing; (b) fitness apps cover the realistic surface we want — auth, GPS permission, lists with pull-to-refresh, charts, settings, share sheets, push, dark mode — without needing fictional content like books or articles that would invite copyright entanglement; (c) "Laps" is shorter than `SimDriveDemo` and carries no ambiguous corporate weight; (d) a fitness motif lets us seed the journey corpus with personas (`first_time_runner`, `marathon_trainer`, `casual_walker`) that read as believable end users. ReadShelf was a strong runner-up — Palace already proves the reader pattern stresses WebView blind spots — but a reader app overlaps Palace's domain too directly to ship as our public demo. LapsApp lets Palace stay the customer story and LapsApp be the canonical demo. - ---- - -# §2. The architecture - -LapsApp is a single iOS-native binary, **iOS 17+ deployment target** (one major version below current at any time so SimDrive's iOS-26 HID path keeps a wider install base in the demo), built canonically. No exotic patterns. The whole point is that an engineer who clones it should recognize every file from their day job. - -| Layer | Choice | Reasoning | -|---|---|---| -| Language | Swift 6 (strict concurrency on) | Matches Apple's current direction; surfaces actor-boundary bugs SimDrive should be able to observe through. | -| UI primary | SwiftUI | Default for any new iOS app today; covers ~85 % of our screens. | -| UI secondary | UIKit, surgical | Per Palace's Reader2 lessons in `02_brand_marketing.md` and the SwiftUI gaps Palace hit: the WebView reader, the Dynamic Island modal, and one custom UICollectionView for the workout history list. | -| Persistence | SwiftData (Core Data backing) | Modern, deterministic, easy to wipe via test-reset launch arg. | -| Networking | URLSession + actual HTTP backend | Real round-trips so SimDrive's `observe` exercises real loading states, not animations. | -| Backend | Cloudflare Worker stub | Single Worker at `lapsapp-api.synctek.workers.dev` with deterministic state per `X-Reset-Token` header. Free tier handles our demo + CI traffic. Workers replays are byte-identical, which is what journey reproducibility requires. | -| Auth | Sign in with Apple (real, Apple-mediated) + email/password (against Worker stub) + Google OAuth (real, opens Safari/SFSafariViewController) | Sign in with Apple covers the in-process auth path. Google OAuth covers the **out-of-process Safari sheet**, which is the path Workstream B's `webview` and `oauth` features need to exercise. Email/password covers the deterministic-credentials path for CI. | -| Distribution | Open-source, MIT, on `github.com/SyncTek-LLC/LapsApp` plus TestFlight binary for live demos | TestFlight gives prospects a one-tap install on their own iPhone during a sales call; the GitHub repo gives engineers something to clone and grep. | -| Telemetry | None in the open-source build | A demo app must not phone home. Crash logs come from `simctl` and `idevicesyslog` only. | - -The repo layout mirrors a healthy production iOS app: `LapsApp/` (the Xcode project), `LapsAppKit/` (a Swift package for shared models and view-models so unit tests run without booting a sim), `Backend/` (the Cloudflare Worker source, deployed via `wrangler`), `journeys/` (the SimDrive YAML corpus, one file per journey), `docs/` (a README, a screencast, and a `JOURNEYS.md` index). - ---- - -# §3. The 12 feature areas - -Each area is a real screen group in LapsApp and a test surface for one or more SimDrive capabilities. Each gets at least one journey YAML in §4. Every feature lists `accessibilityIdentifier` values explicitly because SimDrive's vision-first observe falls back to a11y-id lookup when OCR misreads, per the dictionary-gating fix in 0.3.0a3. - -**1. Onboarding + first-launch alerts.** Three-screen welcome carousel, then push, location, and contacts permission asks in sequence. Identifiers `welcome_next`, `permission_push_allow`, `permission_location_while_using`. Exercises `dismiss_first_launch_alerts`, `pre_grant_permissions`, the 1-in-4 alert-race re-observe loop. Journey: `onboarding-fresh-install`. - -**2. OAuth login (Sign in with Apple + Google).** Email/password row, Apple button, Google button. Apple stays in-process. Google launches `ASWebAuthenticationSession` which spawns a Safari sheet — out of the app's process, which is the case Workstream B's webview support needs to survive. Identifiers `auth_apple_button`, `auth_google_button`, `auth_email_field`, `auth_password_field`. Exercises vision-first observe across processes, focus durability, and the iOS 26 UITextField focus fix. Journeys: `oauth-google-happy`, `oauth-google-cancel`, `oauth-apple-happy`, `email-password-login`. - -**3. WebView content (workout-blog reader, Readium-style).** A blog tab where each post opens in a `WKWebView` with selectable text, scrollable, with a share button. WKWebView is XCTest-blind on iOS, which is the killer surface SimDrive's vision-first model exists to solve. Identifiers `blog_post_<slug>`, `blog_share`. Exercises OCR-only navigation, swipe in WebView, post-1.0 webview tool. Journeys: `blog-read-and-share`, `blog-scroll-bottom`. - -**4. Search + autocomplete + debounced input.** Search tab with 250 ms debounce and server-side autocomplete. Identifiers `search_field`, `search_result_<index>`. Exercises `type_text` against debounce, the wait-for-keyboard fix, the `clear_field` tool, and the soft-keyboard heuristic correction in 0.3.0a3. Journeys: `search-with-debounce`, `search-clear-and-retry`. - -**5. Multi-screen navigation (tab bar + nav stack).** Five tabs: Home, Activities, Search, Blog, Settings. Each pushes detail screens. Identifiers `tab_home`, `tab_activities`, `tab_search`, `tab_blog`, `tab_settings`, plus `activity_row_<id>`. Exercises `stable_id` durability across screens, the `stable_id_loose` fallback, and the mark-cache preservation under `observe(annotate=false)`. Journey: `tab-bar-tour-and-back`. - -**6. Sheets + modals + Dynamic Island modal.** Activity-detail screen presents an "Add Note" sheet. Settings presents an "Edit Profile" sheet. A "Live Activity" feature toggles into Dynamic Island display, surfacing the documented limitation in `LIMITATIONS.md`. Identifiers `note_sheet_text`, `note_sheet_save`, `live_activity_start`. Exercises `dismiss_sheet`, surfaces the Dynamic Island case as a known-limitation regression journey. Journeys: `add-note-sheet`, `dynamic-island-shows-limitation`. - -**7. Forms with async validation.** Sign-up form: email, password, password-confirm, age, terms checkbox. Server validates email uniqueness asynchronously. Error states ("email taken", "weak password") render inline. Identifiers `signup_email`, `signup_password`, `signup_submit`, `signup_error_<field>`. Exercises record/replay reliability across async server states, SSIM masking around dynamic error text. Journey: `signup-with-validation`. - -**8. Lists with pull-to-refresh + infinite scroll.** Activities tab is a 50-row initial list with pull-to-refresh and infinite scroll. Identifiers `activities_list`, `activity_row_<index>`. Exercises `swipe`, scroll perf, and the `swipe` home-indicator zone warning. Journey: `pull-refresh-and-scroll`. - -**9. Settings (light/dark, push, accessibility text size).** Toggles for appearance (system/light/dark), push, and accessibility text size (small/medium/large/extra-large). Identifiers `settings_appearance_dark`, `settings_text_size_xl`. Exercises `set_appearance`, accessibility-audit roadmap (Workstream B). Journey: `dark-mode-toggle`. - -**10. Crash trigger (developer menu).** Long-press the app icon on Settings → "Crash now" menu item that calls `fatalError`. Identifiers `dev_menu_open`, `dev_menu_crash`. Exercises `crashes` retrieval and post-crash app-state diagnostics. Journey: `crash-and-recover`. - -**11. Performance stress (1000-row activity list, animation-heavy detail).** A "Year in Laps" tab loads 1000 activities and renders an animated chart on detail. Identifiers `year_list`, `year_chart`. Exercises `perf_baseline` / `perf_compare`, the cached-RSS fix from 0.3.0a2, and the post-1.0 perf regression dashboard from Workstream B. Journey: `perf-baseline-and-stress`. - -**12. Offline / network conditions.** Settings has a "Simulate Offline" toggle that flips the URLSession to a 30-second-timeout config. List shows a graceful empty state. Identifiers `network_offline_toggle`. Exercises the deferred `network` tool whenever it ships. Journey: `offline-mode-graceful`. - ---- - -# §4. The journey corpus - -Twenty pre-built journeys ship in `journeys/`, indexed by `JOURNEYS.md`. Each is a YAML written against the journey-runner schema in `01_product_engineering.md §1.1`. Each names a persona (`personas/` directory), a target (`simulator` by default; the real-device journeys carry `target: device`), and a goal sequence. - -Three personas seed the corpus and are themselves the user-facing examples for the SimDrive 1.0 product: - -- `first_time_runner` — installs LapsApp fresh, clicks through onboarding, denies push, allows location, signs up with email. -- `returning_user` — already authenticated, opens app, browses activities, shares a blog post. -- `power_user` — toggles dark mode, runs a 5K workout, logs a note, exports data. - -The 20 journeys, grouped by feature area: - -| # | Journey | Persona | Target | Feature area | What it validates | -|---|---|---|---|---|---| -| 1 | `onboarding-fresh-install` | `first_time_runner` | sim | 1 | `dismiss_first_launch_alerts`, alert-race retry | -| 2 | `onboarding-deny-all` | `cautious_user` | sim | 1 | Permission-deny path, app handles gracefully | -| 3 | `oauth-google-happy` | `returning_user` | sim | 2 | Out-of-process Safari sheet observe + tap | -| 4 | `oauth-google-cancel` | `cautious_user` | sim | 2 | Cancel mid-flow, recover, retry | -| 5 | `oauth-apple-happy` | `returning_user` | sim | 2 | In-process Apple sheet, biometric prompt dismissal | -| 6 | `email-password-login` | `power_user` | sim | 2 | UITextField focus + type, iOS-26 HID path | -| 7 | `signup-with-validation` | `first_time_runner` | sim | 7 | Async validation, SSIM masking around error text | -| 8 | `tab-bar-tour-and-back` | `power_user` | sim | 5 | `stable_id` durability across all 5 tabs | -| 9 | `search-with-debounce` | `power_user` | sim | 4 | `type_text` over 250 ms debounce | -| 10 | `search-clear-and-retry` | `power_user` | sim | 4 | `clear_field` then re-type, focus durability | -| 11 | `blog-read-and-share` | `power_user` | sim | 3 | WKWebView OCR-only navigation, share sheet | -| 12 | `blog-scroll-bottom` | `power_user` | sim | 3 | `swipe` in WebView, end-of-content detection | -| 13 | `add-note-sheet` | `power_user` | sim | 6 | Sheet present + dismiss + persistence | -| 14 | `dynamic-island-shows-limitation` | `power_user` | sim | 6 | **Regression journey: must fail with documented Dynamic Island limitation** | -| 15 | `pull-refresh-and-scroll` | `returning_user` | sim | 8 | Pull-to-refresh + infinite scroll, perf snapshot | -| 16 | `dark-mode-toggle` | `accessibility_user` | sim | 9 | `set_appearance` + a11y text size | -| 17 | `crash-and-recover` | `bug_finder` | sim | 10 | Crash trigger, `crashes` retrieval, recovery | -| 18 | `perf-baseline-and-stress` | `power_user` | sim | 11 | `perf_baseline` then 1000-row stress, severity high | -| 19 | `offline-mode-graceful` | `power_user` | sim | 12 | Offline toggle, graceful empty state | -| 20 | `device-observe-only` | `power_user` | **device** | 5,3,9 | Real-device read-only smoke (observe + logs + lifecycle) | - -**Two journeys must fail by design.** `#14 dynamic-island-shows-limitation` halts on a known Dynamic Island blind spot; the journey runner must exit non-zero with a `LIMITATIONS.md` cross-reference. `#4 oauth-google-cancel` includes a sub-step that intentionally taps the wrong button to test recovery — its journey-level outcome is "passed-after-retry", not "passed-clean". These are the regression journeys that catch real bugs rather than just recording green runs; without them, a SimDrive bug that causes every journey to silently pass would be invisible. - ---- - -# §5. The state machine - -Deterministic state between journeys is non-negotiable. We follow Palace's `-PalaceTestReset` pattern, named for our app: **`-LapsAppTestReset`**, passed as a launch argument by the journey runner before every journey. - -When `-LapsAppTestReset` is present, the app at startup: (a) wipes its SwiftData store; (b) clears the keychain entries under the `io.synctek.lapsapp` access group; (c) clears `URLCache.shared`; (d) sends a `POST /reset` to the Cloudflare Worker stub with the per-journey `X-Reset-Token` header (returning the worker to its seeded fixture state for that journey); (e) flips a `isUITestMode` flag that disables animations longer than 100 ms and skips the welcome confetti view that interferes with OCR. - -We rejected reset-via-UI (slow, brittle: a single Settings UI rename breaks every journey) and reset-via-backend-only (incomplete: doesn't clear local SwiftData or keychain). The launch-arg path is what Palace runs in production, what Apple's own Xcode UI testing uses, and the pattern engineers will recognize. It also keeps the journey runner's reset call fast (sub-100 ms) because there's no UI traversal — the app re-launches into the same screen state every time. - -Per-journey worker state is keyed by the `X-Reset-Token` value in the journey YAML's `setup:` block. The worker holds its fixtures in Durable Objects scoped to the token, so two journeys running in parallel against the same worker do not collide. - ---- - -# §6. The build / dogfood loop - -LapsApp drops into the SimDrive engineering loop as the **gate substrate** for every PR. The loop: - -1. SimDrive PR opens. CI checks out LapsApp at a pinned tag, builds it once into `~/Library/Caches/SimDrive/LapsApp.app`. -2. The journey corpus runs with the new SimDrive build against the cached LapsApp binary. `simdrive run --corpus journeys/` exits zero only when all 20 journeys behave as expected (the two intentional-fail journeys must fail in their documented way). -3. Failures gate PR merge. This is the `--simdrive` PR-gate Palace built and now uses; we adopt it directly. -4. Each new SimDrive feature comes with a new LapsApp journey. A SimDrive PR adding a webview tool requires a journey under `feature_area: 3` exercising it, or the PR is incomplete. -5. Quarterly we add 2-3 new feature areas to LapsApp to exercise emerging SimDrive capabilities. The first such expansion (Q3-2026) lands biometric-gated workout sharing, exercising `pre_grant_permissions(biometric)` once that ships. - -This is the same dogfood loop Palace runs for SpecterQA-iOS today, reflected back at SimDrive itself. The asymmetry is that LapsApp is ours and we control its evolution — when SimDrive needs a new test surface, we add a screen. - ---- - -# §7. Open-source release plan - -LapsApp ships **MIT** under `github.com/SyncTek-LLC/LapsApp`, with `LICENSE`, `NOTICE`, and a 200-line `README.md` whose first three sections are: **what this is** (a demo iOS app), **how to run it** (`open LapsApp.xcodeproj` then ⌘R), and **how to drive it with SimDrive** (a single `simdrive run --corpus journeys/` command). - -Public discoverability paths: (a) link from the SimDrive README under "Try it"; (b) link from the `synctek.io/products/simdrive/` page hero; (c) a Show HN post pinned to LapsApp's repo, pointing at the 90-second screencast; (d) a tag-based listing on the `iOS-testing` topic on GitHub; (e) a registry entry on `awesome-mcp` once LapsApp is the canonical demo target referenced in the SimDrive MCP listing. - -The README explicitly refuses pull requests adding telemetry or analytics. A `CONTRIBUTING.md` invites new feature areas only when they exercise a SimDrive capability not already covered, with the test journey as part of the PR. We do not accept feature drift unrelated to SimDrive's surface — LapsApp is a demo, not a product, and resisting feature creep is part of keeping the demo crisp. - ---- - -# §8. The marketing payoff - -LapsApp doubles as the canonical SimDrive marketing asset: - -- **Show HN demo:** "Watch Claude drive LapsApp through Google OAuth, search, read a post, and toggle dark mode — in 90 seconds, on an iOS 26 simulator." This is the pinned tweet, the hero video on `synctek.io/products/simdrive/`, and the YouTube short. -- **Product-page hero:** the same 90-second video. -- **Reference YAML:** the 20 journeys under `journeys/` are the canonical "what real iOS journeys look like in SimDrive YAML" examples linked from the SimDrive docs site. New customers writing their first journey copy from `oauth-google-happy.yaml` or `search-with-debounce.yaml`. -- **Customer onboarding:** "Fork LapsApp, point it at your bundle ID, follow the three-step README, and you have a working SimDrive setup." Time-to-first-replay drops from "a day of YAML wrangling" to "thirty minutes of fork-and-edit." -- **Conference demos:** the TestFlight binary plus the journey corpus turn into a 5-minute live demo at any iOS conference. Drop the URL on the slide, prospects install on their own iPhone, watch it run, and leave with the GitHub link. - -The compound effect: every SimDrive sales conversation has a working demo, every prospect has a starting template, and every iOS engineer who's curious has a clone-able repo. - ---- - -# §9. Effort estimate - -One full-time iOS engineer, no parallel context-switching. Honest estimate, not optimistic. - -| Phase | Effort | Detail | -|---|---|---| -| App scaffolding + 12 feature areas (basic UI, models, tabs) | 4 weeks | Three weeks for the screens, one for polish and `accessibilityIdentifier` audit. SwiftUI gets you 80 % of the way; the WebView reader and Dynamic Island modal eat real time. | -| Cloudflare Worker stub + `-LapsAppTestReset` plumbing | 2 weeks | One week for the Worker, one for the per-journey reset-token state machine and Durable Objects. | -| OAuth integrations (Apple + Google) | 1.5 weeks | Apple is fast; Google's `ASWebAuthenticationSession` integration plus the Worker's redirect handling is where the time goes. | -| 20 pre-built journey YAMLs + personas + corpus runner | 2.5 weeks | Half a week per five journeys plus polish; the two intentional-fail journeys take the most calibration. | -| TestFlight provisioning + first build | 0.5 weeks | App Store Connect, certs, internal testers. | -| Open-source release (README, CONTRIBUTING, Show HN copy, screencast) | 1 week | Screencast eats half. | -| Polish, integration with SimDrive PR gate, first end-to-end run | 1.5 weeks | The "make it actually work as the gate substrate" tax. Always larger than expected. | -| Buffer (test-app overrun tax) | 1 week | Test apps are notorious for overrun. This buffer is non-negotiable; cutting it is how the project slips. | - -**Total: 14 calendar weeks (~3.5 months) with one engineer dedicated.** This is honest, not optimistic. Half-time engineer doubles it to seven months. Two engineers in parallel does not halve it because the WebView/OAuth/journey-corpus work is sequentially dependent — call it nine weeks with two engineers, with diminishing returns past two. - ---- - -# §10. Risks - -**R1: Test-app feature creep.** LapsApp grows past 12 feature areas because every new SimDrive capability "needs" its own surface. Mitigation: the `CONTRIBUTING.md` rule that no new screen lands without a SimDrive capability it uniquely exercises. Quarterly review prunes stale areas. - -**R2: WebView reader is harder than the spec implies.** Readium-style readers sound simple and never are; the Dynamic Island modal is the same trap. Mitigation: timebox each at 1 week of engineering time. If they exceed budget, drop the Readium aspiration and ship a simpler `WKWebView` over a static blog post — the test surface is "WKWebView is XCTest-blind", not "Readium fidelity." - -**R3: Cloudflare Worker non-determinism under load.** Durable Objects state can leak between journeys if the per-token reset isn't bulletproof. Mitigation: every journey starts with a `POST /reset` and asserts its expected fixture is loaded before step 1; the runner halts hard on assertion failure rather than running against polluted state. CI runs a journey-isolation regression nightly. - -**R4: TestFlight review delay blocks the launch.** Apple's TestFlight review is usually fast but can stall for unclear reasons. Mitigation: submit two weeks before the public SimDrive 1.0 launch date so any rejection has runway. Internal testers (engineers + prospects who explicitly opted in) cover the gap if external testing slips. - -**R5: The two intentional-fail journeys atrophy into "always-skip" status.** Engineers under deadline pressure tend to mark deliberately-failing tests as flaky and skip them. Mitigation: the corpus runner exits non-zero unless the intentional-fail journeys fail in the expected documented way — silent skipping is itself a failure mode. A dashboard widget flags any journey skipped more than once in a week. - ---- - -*This memo is the test-app half of the SimDrive 1.0 expansion BIS round. It must not drift from `00a_VALIDATED_FACTS.md` — every LapsApp journey traces to a row in §A or §B of that document, or it is roadmap, not 1.0 substrate.* diff --git a/simdrive/docs/RD_SIMDRIVE_1.0/08_BUILD_KICKOFF.md b/simdrive/docs/RD_SIMDRIVE_1.0/08_BUILD_KICKOFF.md deleted file mode 100644 index d3cf9d5..0000000 --- a/simdrive/docs/RD_SIMDRIVE_1.0/08_BUILD_KICKOFF.md +++ /dev/null @@ -1,241 +0,0 @@ -# SimDrive 1.0 — Build Kickoff - -**Status:** Build-launch reference. Read this first when starting any SimDrive 1.0 implementation session. -**Date:** 2026-05-02 -**Predecessors (read in order):** -1. `00a_VALIDATED_FACTS.md` — what exists today vs hypothesis -2. `00_SIMDRIVE_1.0_PLAN.md` — the canonical plan-of-record (v2) -3. `05_engineering_expansion.md` — the per-component spec for the 1.0 build -4. `07_test_app_spec.md` — LapsApp dogfood platform spec - ---- - -## 1. What SimDrive 1.0 IS - -**One sentence:** A premium MCP-native iOS testing tool where engineers define personas + journeys in YAML, and an AI agent drives an iOS simulator (or paired physical device) through them — using vision-first observe + real UITouch HID — producing replay-able recordings, perf snapshots, and JUnit-ready CI output. - -### Two interfaces, same engine - -| Path | Who uses it | How | -|---|---|---| -| **MCP tool surface** (29 + 1 new) | An AI agent (Claude Code, Cursor, Cline) calling tools directly | The existing 29 tools, plus a new `run_journey` tool that orchestrates the agent loop | -| **CLI surface** | A human developer or CI runner | `simdrive init` / `run --journey <name>` / `ci` / `replay` / `trial start` / `license activate` | - -### The agent loop (one journey step) - -``` -read journey.yaml → resolve next goal - observe() # screenshot + OCR marks - Claude(persona + goal + obs) → action - dispatch via MCP primitive # tap/swipe/type/press_key - record_step() # pre/post screenshots - check goal success criteria # continue / next goal / fail -``` - -### Persistence - -- `.simdrive/personas/<name>.yaml` — user-defined personas -- `.simdrive/journeys/<name>.yaml` — user-defined journeys -- `.simdrive/replays/<name>/` — recordings + per-step PNGs + sidecar JSON -- `~/.simdrive/sessions/<id>/` — per-session actions.jsonl + observations -- License key — Ed25519-signed, offline-first verification, weekly online refresh - -### 1.0 vs deferred - -| In 1.0 | Deferred to 1.1+ | -|---|---| -| 29 MCP tools (validated) | Replay Corpus Cloud (v1.1, the compounding moat) | -| Journey runner + persona layer | Perf budgets + trend dashboards (v1.2) | -| License/trial system | SOC 2 signed action ledger (v1.2 parallel build) | -| WDA real-device input — **gated beta** | Crash-report → journey (v1.3) | -| Cloud private API (replay archive, scoped to first 5 paid customers) | Production Session Capture SDK (v1.4 → 2.0, the category bet) | -| `simdrive ci` orchestrator | | -| Production hardening pass | | - -**Cut entirely:** App-specific journey corpora (Slack/Notion vetted packs) — derivative-works + ToS risk. - ---- - -## 2. Engineering discipline (non-negotiable) - -Per chairman directive — every component ships with this discipline: - -| Rule | What it means here | -|---|---| -| **Tests first** | Every new module ships with unit tests written before the implementation. Every PR has a failing test that the implementation makes pass. | -| **Type hints everywhere** | Full Python `typing` annotations, mypy-clean. The existing 91 unit tests are typed; we extend the standard. | -| **Single-responsibility modules** | One file per concern. `journey/loader.py`, `journey/runner.py`, `journey/schema.py`, `journey/result.py` — not one 1500-line `journey.py`. | -| **No magic, no globals** | Dependencies passed via constructors / function arguments. Easy to test, easy to mock. | -| **Comment the WHY only** | Names carry the WHAT (per existing CHANGELOG voice). Comments explain non-obvious decisions and constraints. | -| **Property-based tests where they fit** | License-key generation, stable_id hashing, YAML validation — Hypothesis. | -| **Coverage gates** | 100% on schema validators (parsing untrusted YAML is risky); ≥90% elsewhere. | -| **Performance benchmarks with regression gates** | Observe latency, tap latency, journey throughput. CI fails on regression > threshold. | -| **Live E2E for killer paths** | Vision-first observe, HID injection, record/replay, journey runner end-to-end against LapsApp. | - ---- - -## 3. Dogfood-to-perfection phase (post-build, pre-launch) - -Five passes, all must succeed before tagging v1.0: - -1. **Self-dogfood week** — every SimDrive engineering agent runs SimDrive against LapsApp's full 20-journey corpus daily. Bugs go straight to backlog. -2. **Palace re-validation** — re-run Palace's existing recording corpus against 1.0. Any regression is P0. -3. **Beyond LapsApp + Palace** — drive SimDrive against 2-3 additional real iOS apps. Surface unknown failure modes. -4. **Adversarial testing** — break things on purpose: corrupted YAML, network-down mid-journey, sim killed mid-tap, simctl malformed JSON, OCR hallucinations. Every failure produces a test + a fix. -5. **Performance verification** — establish baselines; gate v1.0 at "no journey takes more than 2× its v0.3.0a3 equivalent." - -Only after all five pass do we tag and launch. - ---- - -## 4. The parallel build cycles - -Per chairman directive: **agentic development system** — three coding agents in parallel, not three human engineers. Each cycle takes one parallel agent run + Atlas integration pass. - -### 4.0 Parallel-build rules (apply to every cycle) - -These rules are non-negotiable across all 5 cycles to keep parallel agent work safely mergeable: - -**Ownership rule** — every cycle assigns each agent a disjoint set of files / directories. An agent NEVER touches a file owned by another agent in the same cycle. Atlas owns the integration files (`server.py`, `pyproject.toml`, `CHANGELOG.md`, `MANIFEST.in`, GitHub workflows) and is the only writer to them. - -**New-files preferred** — agents create new files in their owned directory whenever possible. When an agent must extend an existing file in their territory, they describe the change in their report; Atlas applies it during integration. - -**Test-first inside each agent** — each agent writes failing tests first, then implementation, then runs the test suite for their owned scope. An agent does not return until their owned tests pass. - -**Atomic commit per cycle** — Atlas commits the integrated cycle as one logical commit (or a small chain on the same branch). No mid-cycle commits from agents directly. - -**Conflict-resolution protocol** — if two agents' returned diffs touch the same file (which should be rare given the ownership rule), Atlas resolves manually during integration; if the conflict is non-trivial, flag the cycle as needing a re-run with revised ownership boundaries. - -**Worktree posture** — currently NOT using git worktrees per memory `feedback_worktree_auth.md` (worktree agents fail "Not logged in"). Instead, agents run on the shared working tree with the disjoint-ownership rule enforcing safety. - -**Cycle gate before next** — Atlas does not start cycle N+1 until cycle N is committed, pushed, tests are green, and a brief progress summary has been surfaced to the chairman. - -### 4.1 Cycle 1 — Foundation (next session — start here) - -| Agent | Scope (component refs from `05_engineering_expansion.md`) | Files agent OWNS | Files agent NEVER touches | -|---|---|---|---| -| **A — Journey runner stack** | Components 1+2+3+8 — journey YAML schema, persona schema, runner core, `simdrive ci` orchestrator | `simdrive/src/specterqa_ios/journey/` (new dir: loader, schema, runner, result, ci) + `simdrive/tests/test_journey_*.py` | `server.py`, `license/`, `cloud/`, `LapsApp/`, any existing module | -| **B — License + trial + Cloud API scaffold** | Components 4+7 — Ed25519 license, trial state, FastAPI Cloud API skeleton | `simdrive/src/specterqa_ios/license/` (new dir: keypair, signer, validator, trial) + new `cloud/` subdir (FastAPI app, R2 stubs) + `simdrive/tests/test_license_*.py` | `server.py`, `journey/`, `LapsApp/`, any existing module | -| **C — LapsApp scaffold** | Xcode project + SwiftUI shell + 4 of 12 feature areas (Settings, Light/Dark, Crash-Trigger, Search) | New `LapsApp/` directory at repo root — separate Swift project | All Python (zero overlap) | - -**Atlas integration** (per § 5): -- Merge tool registrations into `server.py:_TOOLS` (one new MCP tool from Agent A: `run_journey`) -- Bump version in `pyproject.toml` -- Update `CHANGELOG.md` with cycle 1 entry -- Run full test suite + live smoke - -### 4.2 Cycle 2 — Real device + LapsApp expansion - -| Agent | Scope | Files agent OWNS | Files agent NEVER touches | -|---|---|---|---| -| **A — WDA real-device input (gated beta)** | Components 5+6 — `simdrive bootstrap-device` CLI + WDA HTTP client wired to act tools | `simdrive/src/specterqa_ios/wda/` (new dir: bootstrap, http_client, signing) + `simdrive/tests/test_wda_*.py` | All other Python modules; LapsApp | -| **B — Cloud API completion** | Flesh out the FastAPI scaffold from cycle 1: real R2 storage, license-key bearer auth, per-tier quotas, deployment config | `cloud/` (extends own cycle-1 work) + `cloud/tests/` | Python `simdrive/src/`; LapsApp | -| **C — LapsApp feature areas 5-8** | OAuth login (Sign in with Apple + Google), WebView reader, Lists with infinite scroll, Forms with async validation | `LapsApp/Sources/Features/{OAuth,Reader,Lists,Forms}/` + tests | All Python; LapsApp shell from cycle 1 stays untouched except for navigation registration | - -**Note on cycle 2:** WDA bootstrap requires interactive Maurice-side debugging on real hardware (signing identity, dev-team selection, cert-trust prompts). Agent A produces the code; Maurice runs `simdrive bootstrap-device <udid>` against his iPhone 17 Pro Max during the integration pass to surface real-world failures. - -**Atlas integration:** -- Wire WDA path into existing `tap`/`swipe`/`type_text`/`press_key` tools when `target=device` (small surgical edit to those tool handlers) -- Update `pyproject.toml`, `CHANGELOG.md` -- Live smoke against Maurice's iPhone 17 Pro Max - -### 4.3 Cycle 3 — Hardening + LapsApp finish - -| Agent | Scope | Files agent OWNS | Files agent NEVER touches | -|---|---|---|---| -| **A — Production hardening** | Component 9 — error UX audit, structured logging, observability (`SIMDRIVE_DEBUG=1`), perf benchmarks with regression gates, edge-case coverage, docs | `simdrive/src/specterqa_ios/observability/` (new dir) + small surgical edits to existing tools (Atlas reviews each) + `simdrive/tests/test_observability_*.py` + `docs/` updates | `journey/`, `license/`, `wda/`, `cloud/`, LapsApp | -| **B — Journey corpus** | Author 10 of the 20 pre-built journey YAMLs in `LapsApp/.simdrive/journeys/` against LapsApp's feature areas | `LapsApp/.simdrive/journeys/` + `LapsApp/.simdrive/personas/` | All code modules | -| **C — LapsApp feature areas 9-12** | Sheets+Modals, Performance stress (1000-row list), Offline mode, Multi-app journey support | `LapsApp/Sources/Features/{Sheets,Perf,Offline,MultiApp}/` + tests | All Python; earlier LapsApp features | - -**Atlas integration:** -- Apply hardening edits to existing tool handlers -- Run perf benchmarks; establish baselines for CI gating -- Run all 10 cycle-3 journeys against LapsApp end-to-end - -### 4.4 Cycle 4 — Dogfood-to-perfection - -This cycle is **not parallel agent build work** — it's the 5-pass dogfood phase from § 3: - -| Pass | Owner | Output | -|---|---|---| -| Self-dogfood week | All 3 agents in tandem (running journeys, filing bugs) | Bug backlog ranked P0/P1/P2 | -| Palace re-validation | Atlas (sends Palace the v1.0 candidate; Palace runs their existing corpus) | Regression report from Maurice | -| Beyond-LapsApp apps | Atlas (drives SimDrive against 2-3 additional iOS apps) | Failure mode catalog | -| Adversarial testing | One coding agent (specialized) runs corrupted-input + crash-mid-tap + simctl-malformed scenarios | Test additions for every failure | -| Performance verification | Atlas runs `simdrive ci` end-to-end across the LapsApp corpus, compares to v0.3.0a3 baseline | Perf gate (no journey > 2× baseline) | - -Atlas integrates fixes for any P0/P1 bug between passes. Cycle 4 ends when all 5 passes are green. - -### 4.5 Cycle 5 — Launch - -| Workstream | Owner | -|---|---| -| Tag v1.0 + LapsApp v1.0 same week | Atlas (commit + tag + push; confirm Trusted Publisher entry; verify PyPI publish) | -| Coordinated MCP registry submissions (Anthropic + Smithery + awesome-mcp) | Atlas + chairman (per `03_gtm_pricing.md` § 7) | -| Show HN + Twitter + blog | Chairman (copy already drafted in `02_brand_marketing.md` § 4) | -| Pricing page live (Stripe live products created, payment links, license server endpoint live) | Atlas + chairman (chairman approves Stripe live creation; Atlas wires it) | -| Day 1-7 trial-signup hand-holding | Chairman | - ---- - -## 5. Cycle 1 Atlas integration responsibilities - -After agents A+B+C return, Atlas (the orchestrator session) does: - -1. **Merge tool registration into `server.py`** — Agent A and B each emit a patch / list of tools to register; Atlas merges them into `_TOOLS`. -2. **Bump version + update `pyproject.toml`** — to a new alpha (e.g., `17.0.0a2` or whatever the active publish track is). -3. **Update `CHANGELOG.md`** — new entry describing what cycle 1 shipped. -4. **Run full unit test suite** — confirm 91 existing + new cycle-1 tests all pass. -5. **Run live smoke** — confirm the new journey runner can drive at least one trivial journey against TestKitApp end-to-end. -6. **Commit + push** the integrated cycle. - ---- - -## 6. Git + branch hygiene - -- Branch: `feat/v17-claude-native` is current. Cycles continue on this branch. -- Each cycle = one Atlas-orchestrated commit (or a small chain) representing the integrated parallel work. -- No PRs to main yet — branch lives until 1.0 launch readiness. - ---- - -## 7. The kickoff command (what the next session runs) - -When Atlas returns and reads this doc, the next action is: - -``` -1. Read 00_SIMDRIVE_1.0_PLAN.md (10 min) -2. Read 00a_VALIDATED_FACTS.md (3 min) — confirm nothing has drifted -3. Read 05_engineering_expansion.md sections §1-§3 (15 min) -4. Read this kickoff doc § 4-5 (3 min) -5. Dispatch the three Cycle 1 agents per § 4 -6. Wait for all three to return -7. Run § 5 integration steps -8. Commit + push -9. Confirm with chairman before starting Cycle 2 -``` - -Each cycle = one focused Atlas session. After each, surface progress + any decisions back to chairman. - ---- - -## 8. What's NOT for the next session to do - -- **Stripe live setup** — gated on chairman approval of pricing structure ($49/$149/$499); separate from engineering. -- **PyPI publish of `specterqa-ios 17.0.0a1`** — gated on Trusted Publisher entry (chairman's hand). Already tagged + pushed. -- **synctek.io site updates** — ProductPage rewrite for SimDrive premium-positioning is a future task; the marketing copy already drafted in `02_brand_marketing.md` waits. -- **WDA bootstrap implementation** — Cycle 2 work, needs Maurice's hardware. -- **Founder License or any commercial commitment** — paused per current direction. - ---- - -## 9. Honest expectations - -- **Each cycle ≈ 1 focused Atlas session** producing roughly 800-1,500 lines of new code + 200-400 lines of tests across 3 parallel agents. -- **5 cycles to get to v1.0 launch-ready.** -- **Calendar:** if chairman runs ~2 cycles per week, v1.0 launches in ~3 weeks of agent-driven work + 1 week of dogfood-to-perfection. That's much faster than the human-engineer 10-week estimate from `05_engineering_expansion.md`, BUT it depends on agent quality holding through 5 cycles. Realistic floor: 6 weeks. Realistic ceiling (if agents stumble): 10 weeks. -- **The cap on agent throughput** is integration testing, not code generation. Each cycle adds integration friction; cycle 4 (dogfood) is the critical-path test. - ---- - -*End of build kickoff. The next Atlas session starts here.* diff --git a/simdrive/docs/REDACTION_SPEC.md b/simdrive/docs/REDACTION_SPEC.md index 8da3bb9..d8a4551 100644 --- a/simdrive/docs/REDACTION_SPEC.md +++ b/simdrive/docs/REDACTION_SPEC.md @@ -1,6 +1,6 @@ # SimDrive Screenshot & Recording Redaction Spec -> **Status:** SPEC ONLY. INIT-2026-549 W1 ships this document. W2 ships the +> **Status:** SPEC ONLY. [internal-tracker] ships this document. W2 ships the > implementation in `simdrive/src/simdrive/redact.py` and wires it into > `recorder.py`, `observe.py`, and the on-disk recording writer. diff --git a/simdrive/docs/gtm/launch_sequence.md b/simdrive/docs/gtm/launch_sequence.md deleted file mode 100644 index 8cc6ec5..0000000 --- a/simdrive/docs/gtm/launch_sequence.md +++ /dev/null @@ -1,73 +0,0 @@ -# SpecterQA — D-7 to D+30 Launch Sequence - -**Owner:** Maurice Carrier, SyncTek LLC -**Target launch date (D0):** 2026-05-08 (the Anthropic MCP registry submission deadline in the productization plan) -**Status:** plan of record. Each row is a discrete action with a testable deliverable. - -The sequence assumes `specterqa-ios 1.0.0a1` is already on PyPI. The launch is a coordinated push across MCP registries, awesome-mcp lists, the Anthropic cookbook, and a single Show HN. No paid channels. No press wire. Distribution is registry placement plus a small set of warm hand-offs to people who already cover MCP and iOS testing. - -The "Owner" column uses the existing department roles. Where a row says "Maurice", the human Chairman is the operator; the agent stack drafts and stages. - -| Day | Action | Owner | Deliverable | -|---|---|---|---| -| D-7 | Cut `specterqa-ios 1.0.0a1` to a known-good tag; freeze the version unless a P0 lands | CodeAtlas + DeployAtlas | Tagged commit, PyPI release confirmed via `pip index versions specterqa-ios` | -| D-7 | Re-run the Palace dogfood smoke (catalog → book detail → tab tour → typed search) against 1.0.0a1 to confirm no regression vs `simdrive 0.2.0a1` | TestAtlas | Pass/fail log written to `simdrive/dogfood/2026-05-01-relaunch-smoke.md` | -| D-7 | Auto-generate the README tool table from `_TOOLS`; confirm it reads "29 MCP tools" everywhere | CodeAtlas | PR merged; CI check fails on drift | -| D-6 | Draft the Anthropic MCP registry listing copy (200-word description, install line, tool count, demo GIF reference) | MarketingAtlas | `simdrive/docs/gtm/listings/anthropic-mcp-registry.md` | -| D-6 | Draft the `modelcontextprotocol/servers` README PR text under "Mobile / Testing" | MarketingAtlas | `simdrive/docs/gtm/listings/mcp-servers-pr.md` | -| D-6 | Draft the Smithery.ai catalog metadata YAML | MarketingAtlas | `simdrive/docs/gtm/listings/smithery.yaml` | -| D-6 | Record the 30-second hero GIF: `session_start({})` → `observe()` → `tap_text("Borrow")` against TestKitApp | Maurice | `simdrive/docs/brand/hero-30s.gif` (≤2 MB) | -| D-5 | Draft the Show HN post (title under 80 chars, first paragraph under 60 words, install line, link to README, link to Palace dogfood) | MarketingAtlas | `simdrive/docs/gtm/listings/show-hn.md` | -| D-5 | Draft the launch-day Twitter/X thread (5 tweets max: position, install, demo GIF, Palace receipt, link) | MarketingAtlas | `simdrive/docs/gtm/listings/twitter-thread.md` | -| D-5 | Update PyPI long description to point at SpecterQA wordmark, the 29-tool count, and the Palace testimonial | DeployAtlas | New release-only metadata bump (no code change) | -| D-5 | Reach out to 3 soft-launch users (target: Palace's Maurice + 2 from the dev-advocate list categories "MCP early adopters" and "iOS QA leads") with a "kicking the tires?" ask for D-1 | Maurice | 3 replies logged in `simdrive/docs/gtm/soft-launch-replies.md` | -| D-4 | Open a draft GitHub release for the `v1.0.0a1` tag with full release notes (lifted from CHANGELOG, no marketing additions) | DeployAtlas | Draft visible on `gh release list --limit 5` | -| D-4 | Schedule the Anthropic MCP registry submission for D0 09:00 PT via the registry's web form (do not submit yet) | Maurice | Submission staged; screenshot saved | -| D-4 | Schedule the Smithery.ai submission for D0 09:00 PT | Maurice | Submission staged; screenshot saved | -| D-3 | Draft the `anthropics/anthropic-cookbook` PR: a 30-line "Drive an iOS sim with Claude" recipe in `examples/iOS_simulator_with_specterqa.ipynb` style | MarketingAtlas + CodeAtlas | PR branch pushed to a SpecterQA-org fork; PR not yet opened | -| D-3 | Draft Anthropic dev-rel outreach email (template #3 from `outreach_templates.md`) and queue for D+1 | MarketingAtlas | Email draft saved; recipient confirmed | -| D-3 | Add GitHub topics to the public repo: `mcp-server`, `ios-simulator`, `claude`, `anthropic`, `xctest-alternative`, `vision-first-testing` | DeployAtlas | Topics visible on `gh repo view --json repositoryTopics` | -| D-3 | Add the SpecterQA badge to the README hero (PyPI version + MCP-registry-soon + license MIT) | CodeAtlas | Visible on the rendered README | -| D-2 | Send the soft-launch users a fresh `pip install --pre specterqa-ios` and ask them to file 1 issue (positive or negative) | Maurice | 3 issues filed on the public repo | -| D-2 | Triage and close any blocking issues from the soft-launch round | CodeAtlas + TestAtlas | All P0/P1 resolved; release notes updated if any code shipped | -| D-1 | Final dry-run: open the Anthropic registry submission form, the Smithery form, the awesome-mcp PR, the Show HN editor — confirm copy and links render correctly | Maurice | Dry-run checklist signed in `simdrive/docs/gtm/launch-day-checklist.md` | -| D-1 | Pre-stage the launch-day Twitter thread in a scheduler (or paste-ready document); pre-stage the LinkedIn announcement on Maurice's personal page only | Maurice | Drafts visible in scheduler / clipboard | -| D-1 | Publish a fresh `simdrive/docs/gtm/launch-receipts.md` skeleton — the file we'll fill with timestamped links as submissions go live | MarketingAtlas | File present, sections empty | -| D0 | 09:00 PT — submit Anthropic MCP registry listing | Maurice | Submission ID logged in launch-receipts.md | -| D0 | 09:05 PT — submit Smithery.ai listing | Maurice | Submission URL logged | -| D0 | 09:15 PT — open the `modelcontextprotocol/servers` PR | Maurice | PR URL logged | -| D0 | 09:30 PT — open Show HN with the title "Show HN: SpecterQA — MCP-native iOS simulator driver" | Maurice | HN URL logged; first comment a reply with the install line | -| D0 | 09:45 PT — post the Twitter/X thread; tag `@AnthropicAI` only on tweet 1 and only because the registry submission is genuinely warm | Maurice | Tweet thread URL logged | -| D0 | 10:00 PT — publish the GitHub release for `v1.0.0a1` (move from draft to published) | DeployAtlas | Release URL logged | -| D0 | 10:00 PT — push the PyPI long-description update | DeployAtlas | New PyPI page rendered with the SpecterQA branding | -| D0 | 10:30 PT — publish the launch blog post on synctek.io (re-uses the press kit "Background story" + a launch-day diff) | MarketingAtlas | Blog URL logged | -| D0 | All-day — Maurice monitors HN, replies to first 10 comments within 15 minutes of arrival; no pile-on, no canned responses | Maurice | Comment-thread screenshot saved at end of day | -| D+1 | Triage HN feedback into 3 buckets: bug (file issue), feature ask (label `gtm-d+1`), positioning gap (queue for D+7 README pass) | CodeAtlas | Issue list with labels; counts in launch-receipts.md | -| D+1 | Open the `anthropics/anthropic-cookbook` PR; reference the registry submission ID | Maurice | PR URL logged; Anthropic dev-rel cc'd via the email queued on D-3 | -| D+1 | Send Anthropic dev-rel the cookbook PR notification (outreach template #3) | Maurice | Email sent timestamp logged | -| D+2 | Watch PyPI download counter (`pypistats recent specterqa-ios`); compare D-7 baseline → D+2 | DeployAtlas | Download delta in launch-receipts.md | -| D+2 | Reach out to 5 design-partner candidates (template #5 — Cloud beta, free 60 days for monthly feedback). Names from `dev_advocate_targets.md` "iOS QA leads at target companies" | Maurice | 5 emails sent; replies tracked in `simdrive/docs/gtm/design-partner-funnel.md` | -| D+3 | Cline + Cursor MCP marketplace submissions | Maurice + MarketingAtlas | Both submissions logged; status open/pending | -| D+3 | First post-launch dogfood: pick a new user from HN comments who says "I'll try this" — offer a 30-min pair install, harvest the friction log | Maurice | Dogfood report `simdrive/dogfood/2026-05-D+3-<user>.md` | -| D+5 | Reply to every HN comment older than 24h that hasn't been answered | Maurice | Comment thread closed out | -| D+5 | Cookbook PR review pass (assume Anthropic asks for revisions); ship them | CodeAtlas | PR moved to "approved" or "merged" | -| D+7 | Week-1 metrics review: PyPI installs, GitHub stars, registry approval status, design-partner reply count | Maurice + MarketingAtlas | Numbers in `simdrive/docs/gtm/week1-review.md`, no commentary | -| D+7 | Second-tier outreach: 5 more iOS QA leads (template #2), 2 podcast pitches (template #6) | Maurice | 7 emails sent | -| D+10 | Onboard the first design-partner reply from D+2 onto the Cloud beta waitlist (Cloud isn't built yet — they're signing up to be first when it ships) | Maurice | Waitlist row in CRM / spreadsheet | -| D+10 | Publish the first training-corpus essay: "Why we replaced XCTest with screenshots." Cross-link from the README, the Anthropic cookbook PR, and the Show HN comments | MarketingAtlas | Essay live; 3 inbound links confirmed | -| D+14 | Cookbook PR merged or in active review; if stuck, escalate via the Anthropic dev-rel contact from the D+1 email | Maurice | Status logged in launch-receipts.md | -| D+14 | Publish the second training-corpus artifact: a Stack Overflow answer to a real iOS-26 UITextField focus question, linking SpecterQA as the workaround | MarketingAtlas | SO answer URL logged | -| D+17 | Third dogfood pass with a new user (template-driven outreach from D+7 if anyone replied positively) | Maurice + TestAtlas | Dogfood report filed | -| D+21 | Publish the third training-corpus artifact: a GitHub Discussion in `modelcontextprotocol/servers` showing the Palace dogfood data (5-day cutover, 26 live tests, 0.999 SSIM) | MarketingAtlas | Discussion URL logged | -| D+21 | Conference / podcast pitch round: 3 mobile-dev podcasts (template #6). The hook is the SpecterQA origin (XCTest pivot, vision-first thesis) | Maurice | 3 pitches sent; replies tracked | -| D+25 | Plan v1.1 (real-device WDA): scope freeze, kickoff agenda, first design-partner ask: "would you run this against a real device for us?" | Maurice + CodeAtlas | `simdrive/docs/v1.1-plan.md` | -| D+28 | Design-partner status review: how many replied, how many are actively using `specterqa-ios`, how many will sign a Cloud-beta LOI | Maurice | LOI count in week4 review | -| D+30 | D+30 retrospective: what worked, what didn't, what the next 30 days should look like | Maurice + GTMAtlas | `simdrive/docs/gtm/d+30-retro.md` — no marketing summary, just numbers and lessons | - -## Notes - -- **No paid channel rows.** None planned through D+30. If we need to reconsider, re-open the GTM frame separately. -- **Soft-launch users on D-2** are not "beta testers" — they are people who already know the product. The point is to catch a launch-day blocker, not to gather feedback. -- **HN response window is 15 minutes** for the first 10 comments. After that, hourly is fine. The launch-day reply discipline is the single biggest swing factor in HN reach. -- **Cookbook PR is the highest-leverage single deliverable in this sequence.** It puts SpecterQA into Anthropic's official examples and seeds the next-Claude training corpus. -- **No LinkedIn growth-hacking.** A single personal-page post on D-1 is the only LinkedIn surface used. diff --git a/simdrive/pyproject.toml b/simdrive/pyproject.toml index 7354715..5e27e3a 100644 --- a/simdrive/pyproject.toml +++ b/simdrive/pyproject.toml @@ -31,7 +31,7 @@ dependencies = [ "Pillow>=10.0", "boto3>=1.20", "email-validator>=2.0", - "httpx<1.0", # defensive: mcp 1.27.0 has unbounded httpx>=0.27.1; pip --pre picks httpx 1.0.dev3 which breaks httpx-sse. Remove once upstream mcp pins its own upper bound. (INIT-2026-545) + "httpx<1.0", # defensive: mcp 1.27.0 has unbounded httpx>=0.27.1; pip --pre picks httpx 1.0.dev3 which breaks httpx-sse. Remove once upstream mcp pins its own upper bound. "mcp>=1.0", "prometheus-client>=0.19", "pyobjc-framework-Quartz>=10.0", diff --git a/simdrive/src/simdrive/cloud/config.py b/simdrive/src/simdrive/cloud/config.py index ea25932..cf5e354 100644 --- a/simdrive/src/simdrive/cloud/config.py +++ b/simdrive/src/simdrive/cloud/config.py @@ -1,7 +1,7 @@ """Cloud API configuration — loaded from environment variables. WHY environment-based config: no secrets in code; Railway injects env vars -at deploy time. The same pattern used by forgeos-engine. +at deploy time. """ from __future__ import annotations diff --git a/simdrive/src/simdrive/cloud/middleware/quotas.py b/simdrive/src/simdrive/cloud/middleware/quotas.py index 26d5c81..22880b9 100644 --- a/simdrive/src/simdrive/cloud/middleware/quotas.py +++ b/simdrive/src/simdrive/cloud/middleware/quotas.py @@ -16,7 +16,7 @@ Starlette middleware. This lets us return structured 429 responses and skip the gate on public endpoints (GET /health, /v1/licenses/status). -INIT-2026-549 W-F: +[internal-tracker] W-F: Also exposes :func:`check_local_quota`, a network-free per-tool check that Wave 2 wires into the MCP tool dispatch inside server.py. The check reads from a locally-cached quota snapshot attached to the diff --git a/simdrive/src/simdrive/cloud/privacy.py b/simdrive/src/simdrive/cloud/privacy.py index 1805d58..ad252e4 100644 --- a/simdrive/src/simdrive/cloud/privacy.py +++ b/simdrive/src/simdrive/cloud/privacy.py @@ -7,8 +7,7 @@ WHY this module: at the cloud edge, an HTTP response body or upstream exception may carry credentials (license keys, bearer tokens, email addresses). When we log them — even at DEBUG — those values end up in -log aggregators, crash reporters, and PR comments. The audit -(INIT-2026-549 W-F) flagged this as the highest-impact privacy issue +log aggregators, crash reporters, and PR comments. The audit flagged this as the highest-impact privacy issue in the cloud module. USAGE diff --git a/simdrive/src/simdrive/journey/ci.py b/simdrive/src/simdrive/journey/ci.py index 1c656be..7a0be51 100644 --- a/simdrive/src/simdrive/journey/ci.py +++ b/simdrive/src/simdrive/journey/ci.py @@ -292,7 +292,7 @@ def run_ci(options: CIRunOptions | None = None) -> CIRunSummary: break continue - # Run the journey — run_journey is async after INIT-2026-544. + # Run the journey — run_journey is async after [internal-tracker]. try: result = asyncio.run(run_journey(journey, persona, session, llm_client)) except Exception as exc: diff --git a/simdrive/src/simdrive/journey/claude_client.py b/simdrive/src/simdrive/journey/claude_client.py index cf14090..13f3774 100644 --- a/simdrive/src/simdrive/journey/claude_client.py +++ b/simdrive/src/simdrive/journey/claude_client.py @@ -9,11 +9,10 @@ at model selection time. If usage is unavailable we fall back to the ``_APPROX_COST_PER_CALL_USD`` estimate defined in runner.py. -Model: ``claude-opus-4-7`` (most capable, per BusinessAtlas memory). +Model: ``claude-opus-4-7`` (most capable available at integration time). -INIT-2026-544: call() is now async, wrapping the blocking SDK call in -asyncio.to_thread() so the event loop is not blocked during the Anthropic -API call. +call() is async, wrapping the blocking SDK call in asyncio.to_thread() so +the event loop is not blocked during the Anthropic API call. """ from __future__ import annotations @@ -117,8 +116,8 @@ async def call( vision block before the text prompt. The model is instructed to return a single JSON object. - INIT-2026-544: The blocking Anthropic SDK call is now wrapped in - asyncio.to_thread() so the coroutine does not block the event loop. + The blocking Anthropic SDK call is wrapped in asyncio.to_thread() + so the coroutine does not block the event loop. """ messages: list[dict] = [] diff --git a/simdrive/src/simdrive/journey/mcp_sampling_client.py b/simdrive/src/simdrive/journey/mcp_sampling_client.py index e6c7483..2b3c514 100644 --- a/simdrive/src/simdrive/journey/mcp_sampling_client.py +++ b/simdrive/src/simdrive/journey/mcp_sampling_client.py @@ -9,7 +9,7 @@ mcp SDK types (TextContent, ImageContent, SamplingMessage, ModelPreferences, CreateMessageResult). -INIT-2026-544. +[internal-tracker]. """ from __future__ import annotations diff --git a/simdrive/src/simdrive/journey/runner.py b/simdrive/src/simdrive/journey/runner.py index e05f9c9..a14d8c0 100644 --- a/simdrive/src/simdrive/journey/runner.py +++ b/simdrive/src/simdrive/journey/runner.py @@ -111,7 +111,7 @@ class LLMClient(Protocol): the Anthropic SDK (ClaudeLLMClient) or via MCP sampling (MCPSamplingLLMClient). Tests substitute a fake client that returns scripted StepDecision objects. - INIT-2026-544: call() is now an async def so the runner can await both + [internal-tracker]: call() is now an async def so the runner can await both ClaudeLLMClient (wraps blocking SDK in asyncio.to_thread) and MCPSamplingLLMClient (natively async via session.create_message). """ diff --git a/simdrive/src/simdrive/license/errors.py b/simdrive/src/simdrive/license/errors.py index 7bfcde5..3182115 100644 --- a/simdrive/src/simdrive/license/errors.py +++ b/simdrive/src/simdrive/license/errors.py @@ -16,7 +16,7 @@ - trial_rate_limited - cloud_unreachable -UX envelope (INIT-2026-549 W1.5): +UX envelope: When the MCP-tool wrapper serialises a LicenseError to the agent host, the envelope is enriched with: error: "license_required" - umbrella code agents switch on diff --git a/simdrive/src/simdrive/license/gate.py b/simdrive/src/simdrive/license/gate.py index 600dede..4fb4e6f 100644 --- a/simdrive/src/simdrive/license/gate.py +++ b/simdrive/src/simdrive/license/gate.py @@ -1,6 +1,6 @@ """MCP-tool entitlement gate — single entry point for paywall enforcement. -INIT-2026-549 W1.5: every MCP tool handler calls ``gate()`` at its entry. The +[internal-tracker].5: every MCP tool handler calls ``gate()`` at its entry. The function delegates to ``simdrive.license.entitlement.check_entitlement`` and re-raises any ``LicenseError`` unchanged so the MCP envelope wrapper sees the structured ``license_required`` / ``license_expired`` payload. diff --git a/simdrive/src/simdrive/license/public_key.py b/simdrive/src/simdrive/license/public_key.py index 0129998..580af94 100644 --- a/simdrive/src/simdrive/license/public_key.py +++ b/simdrive/src/simdrive/license/public_key.py @@ -16,7 +16,7 @@ The validator enforces that dev-key-signed licenses MUST have subject="dev-trial"; the dev key cannot forge enterprise / pro licenses. -KEY ROTATION (INIT-2026-549 W-F): +KEY ROTATION: TRUSTED_PUBLIC_KEYS is a list of (key_id, hex_pubkey) tuples. When a license is signed, the issuer SHOULD embed the matching ``key_id`` in the payload so the validator picks the correct key without trying each @@ -39,11 +39,9 @@ # Ed25519 license-signing public key for SimDrive 1.0 paid tiers. # Generated 2026-05-18 (rotated from 2026-05-02 placeholder — no licenses # were ever issued under the prior key, so rotation has no customer impact). -# Private key lives in BusinessAtlas vault: `simdrive/license_signing_private_key` -# (scope: DeployAtlas). Retrieve for Cloudflare Worker deploy via: -# cd /Users/atlas/BusinessAtlas -# .venv/bin/python v2/ba vault get --service simdrive --key license_signing_private_key -# Then: wrangler secret put LICENSE_SIGNING_PRIVATE_KEY (paste hex at prompt). +# Private key is held in the operator secrets store; see the internal ops +# runbook for the Cloudflare Worker deploy procedure (paste hex into +# `wrangler secret put LICENSE_SIGNING_PRIVATE_KEY` at the prompt). # DO NOT regenerate without coordinated key rotation — every issued # license becomes invalid the instant this constant changes. SIMDRIVE_PUBLIC_KEY_HEX: str = ( diff --git a/simdrive/src/simdrive/license/signer.py b/simdrive/src/simdrive/license/signer.py index cdd2f90..2dfd719 100644 --- a/simdrive/src/simdrive/license/signer.py +++ b/simdrive/src/simdrive/license/signer.py @@ -55,7 +55,7 @@ def sign_license( against (must match an entry in TRUSTED_PUBLIC_KEYS on the client). When omitted the payload does not carry a key_id and the client falls back to the first trusted key — this is the behaviour of - every license issued before INIT-2026-549. + every license issued before [internal-tracker]. Returns ------- diff --git a/simdrive/src/simdrive/license/trial_history.py b/simdrive/src/simdrive/license/trial_history.py index 6a09ef6..22cd905 100644 --- a/simdrive/src/simdrive/license/trial_history.py +++ b/simdrive/src/simdrive/license/trial_history.py @@ -1,6 +1,6 @@ """Trial-history bookkeeping — prevents infinite trial extension. -INIT-2026-549 W1.5: a brand-new user can self-issue a 14-day trial without +[internal-tracker].5: a brand-new user can self-issue a 14-day trial without talking to the cloud. To stop the same user re-running ``simdrive trial start`` forever, we record a one-way hash of (email, machine_fingerprint) in ``~/.simdrive/trial_history.json`` and reject subsequent issuance for the diff --git a/simdrive/src/simdrive/license/validator.py b/simdrive/src/simdrive/license/validator.py index a917468..095e530 100644 --- a/simdrive/src/simdrive/license/validator.py +++ b/simdrive/src/simdrive/license/validator.py @@ -9,7 +9,7 @@ 7-day window past expiry before hard-rejecting. This matches the spec. - Dev key: licenses signed with DEV_SIGNING_KEY are accepted but MUST have subject="dev-trial"; the dev key cannot forge enterprise/pro licenses. -- Multi-key rotation (INIT-2026-549): payloads may include a ``key_id`` +- Multi-key rotation: payloads may include a ``key_id`` field naming which entry in TRUSTED_PUBLIC_KEYS signed them. Payloads without ``key_id`` fall back to the first trusted key (backwards compat with every license issued before this change). diff --git a/simdrive/src/simdrive/observe.py b/simdrive/src/simdrive/observe.py index 3e0c762..d67ec39 100644 --- a/simdrive/src/simdrive/observe.py +++ b/simdrive/src/simdrive/observe.py @@ -55,7 +55,7 @@ class Observation: captured_at: float marks: list[Mark] = field(default_factory=list) recent_logs: str | None = None - # Token-efficiency knobs (PR A, INIT-2026-549). Default off so every existing + # Token-efficiency knobs (PR A). Default off so every existing # caller keeps the legacy behavior — server.py routes new args through here. compact: bool = False capture_observability: bool = False @@ -172,7 +172,7 @@ def observe( `target` selects the backend: "simulator" (default) or "device" (real iPhone/iPad). - Token-efficiency knobs (PR A, INIT-2026-549) — all default off / no-op so + Token-efficiency knobs (PR A) — all default off / no-op so existing callers see no behavior change: * `compact`: emit the slim 6-key mark dict (`to_compact_dict`) instead of the full 9-key diagnostic dict. ~5-6x reduction in JSON payload on dense screens. diff --git a/simdrive/src/simdrive/recorder.py b/simdrive/src/simdrive/recorder.py index 9c0ad24..7177718 100644 --- a/simdrive/src/simdrive/recorder.py +++ b/simdrive/src/simdrive/recorder.py @@ -132,7 +132,7 @@ def _check_capture(pre: Optional[Path], post: Optional[Path]) -> Optional[str]: the step; a failed post-state capture (the common case — e.g. a simulator hiccup right after a tap) means the recorder doesn't know what state the action produced. In either case the step is incomplete and replay would - surface confusing errors, so we drop it (INIT-2026-549). + surface confusing errors, so we drop it. """ for label, candidate in (("pre", pre), ("post", post)): if candidate is None: @@ -389,7 +389,7 @@ def add_step(self, action: str, args: dict[str, Any], pre_screenshot: Optional[P Stored in the step so replay can detect marks-count drift (structural UI change) even when SSIM passes. - Integrity guard (INIT-2026-549): if either ``pre_screenshot`` or + Integrity guard: if either ``pre_screenshot`` or ``post_screenshot`` is missing, None, or points to a missing/empty file, the step is **dropped entirely** with a structured warning logged. This prevents partially-captured steps (typically caused by a flaky simulator @@ -1459,9 +1459,7 @@ def _observe_live_marks(session: Session, workdir: Path) -> list: recaptures a fresh screenshot and re-compares; only when *both* samples fall below the threshold do we treat it as real drift. Marks-count drift remains a single-frame check because a structural mark drop is far harder to fluke than -a pixel-level SSIM dip. - -(INIT-2026-549) — set to 2; raising this would lengthen recovery time on +a pixel-level SSIM dip. — set to 2; raising this would lengthen recovery time on genuinely drifted screens without meaningfully improving false-positive rate. """ @@ -1554,7 +1552,7 @@ def replay(name: str, session: Session, on_drift: str = "halt", for step in steps: # marks_count: stored at step level (recorder a13 path) or in step.args - # (TestAtlas fixture format). Read from both for compat. + # (test engineering fixture format). Read from both for compat. recorded_marks_count = ( step.get("marks_count") or (step.get("args") or {}).get("marks_count") @@ -1573,7 +1571,7 @@ def replay(name: str, session: Session, on_drift: str = "halt", "sample": 1, }, ) - # Hysteresis (INIT-2026-549): a single noisy sub-threshold frame + # Hysteresis: a single noisy sub-threshold frame # shouldn't halt replay. When the first sample is under threshold we # recapture a fresh screenshot, recompute, and only declare drift when # *both* samples fail. We retain the lower of the two scores as the diff --git a/simdrive/src/simdrive/server.py b/simdrive/src/simdrive/server.py index 3a675dd..7d4b9e3 100644 --- a/simdrive/src/simdrive/server.py +++ b/simdrive/src/simdrive/server.py @@ -80,7 +80,7 @@ from .observability.logger import get_logger -# ── MCP session holder (INIT-2026-544) ────────────────────────────────────── +# ── MCP session holder ────────────────────────────────────── # Populated by _serve_async when the MCP server starts so that async tool # handlers (e.g. tool_run_journey) can retrieve the active ServerSession. _MCP_SERVER: Optional[object] = None @@ -1055,7 +1055,7 @@ def _record_act_step(s, action: str, args: dict, pre_path: Path) -> int | None: s.last_screenshot_h = post_obs.screenshot_h s.last_screenshot_path = post_obs.screenshot_path # marks_count: embed in args AND pass to add_step for replay drift detection (a13). - # Stored in both locations so TestAtlas fixtures (args.marks_count) and the + # Stored in both locations so test engineering fixtures (args.marks_count) and the # recorder's step-level field (step.marks_count) are both populated. marks_count = len(s.last_marks) if s.last_marks else None if marks_count is not None: @@ -2900,7 +2900,7 @@ async def call_tool_async(name: str, arguments: dict) -> dict: """Async-aware tool dispatcher — supports both sync and coroutine handlers. Used by the MCP server's _call_tool handler so async tools (like - tool_run_journey after INIT-2026-544) are properly awaited. + tool_run_journey after [internal-tracker]) are properly awaited. """ for t in _TOOLS: if t["name"] == name: diff --git a/simdrive/src/simdrive/wda/bootstrap.py b/simdrive/src/simdrive/wda/bootstrap.py index 142a336..ad18dc9 100644 --- a/simdrive/src/simdrive/wda/bootstrap.py +++ b/simdrive/src/simdrive/wda/bootstrap.py @@ -16,7 +16,7 @@ 9. Smoke GET /status → {value: {ready: true}} 10. Print "WDA ready" summary with any manual Trust prompts -Bug fixes (INIT-2026-547): +Bug fixes: Bug 1 — resolve_signing_identity now filters by team_id before raising ambiguity. Bug 2 — hardware UDID resolved via devicectl; coredevice UUID used only for devicectl cmds. Bug 3 — CODE_SIGN_IDENTITY="Apple Development" + CODE_SIGN_STYLE=Automatic + -allowProvisioningUpdates. diff --git a/simdrive/tests/conftest.py b/simdrive/tests/conftest.py index 3bb7ede..47ef868 100644 --- a/simdrive/tests/conftest.py +++ b/simdrive/tests/conftest.py @@ -1,4 +1,4 @@ -"""Session-wide test bootstrap — INIT-2026-549 W1.5 paywall test fixture. +"""Session-wide test bootstrap — [internal-tracker].5 paywall test fixture. After PR #115 every MCP tool handler calls ``check_entitlement()`` which raises ``LicenseError [license_not_found]`` when ``~/.simdrive/license.json`` is absent. diff --git a/simdrive/tests/test_a11_device_som.py b/simdrive/tests/test_a11_device_som.py index 22df64a..e3d537e 100644 --- a/simdrive/tests/test_a11_device_som.py +++ b/simdrive/tests/test_a11_device_som.py @@ -1,8 +1,8 @@ -"""TestAtlas — simdrive 1.0.0a11 device SoM tests (F-002). +"""test engineering — simdrive 1.0.0a11 device SoM tests (F-002). All 14 tests in this file are expected to FAIL on feat/v17-claude-native HEAD 3a22bd4 (no ``simdrive/wda/som_device.py`` module, no ``WdaClient.source()`` -method). They must all PASS after CodeAtlas lands ``fix/simdrive-a11-device-som``. +method). They must all PASS after engineering lands ``fix/simdrive-a11-device-som``. XML fixtures are built inline with stdlib ``xml.etree.ElementTree`` so no disk-based fixtures are needed — tests are fully hermetic. @@ -302,7 +302,7 @@ def test_disabled_element_included(tmp_path): assert len(marks) == 1, ( f"Expected disabled button to be INCLUDED; got {len(marks)} marks. " - "If CodeAtlas decided to EXCLUDE disabled: change assertion to `== 0` " + "If engineering decided to EXCLUDE disabled: change assertion to `== 0` " "and update this docstring." ) assert "Disabled Button" in marks[0]["text"] diff --git a/simdrive/tests/test_a11_version_dynamic.py b/simdrive/tests/test_a11_version_dynamic.py index bb5a823..4bfb6b0 100644 --- a/simdrive/tests/test_a11_version_dynamic.py +++ b/simdrive/tests/test_a11_version_dynamic.py @@ -26,7 +26,7 @@ def test_version_matches_installed_package_metadata(): Fails on 3a22bd4: __version__ = "1.0.0a9" is a literal; it will differ from the installed package metadata version (which may be "1.0.0a11" or - similar once CodeAtlas bumps pyproject.toml). + similar once engineering bumps pyproject.toml). """ # Get what the installed package reports independently. try: diff --git a/simdrive/tests/test_a12_marks_parity.py b/simdrive/tests/test_a12_marks_parity.py index 5876cd8..bc3d53b 100644 --- a/simdrive/tests/test_a12_marks_parity.py +++ b/simdrive/tests/test_a12_marks_parity.py @@ -1,4 +1,4 @@ -"""TestAtlas — simdrive 1.0.0a12 marks parity tests (F-007 + F-008). +"""test engineering — simdrive 1.0.0a12 marks parity tests (F-007 + F-008). F-007: Resolver (_resolve_target_xy) must accept dict marks (as stored in Session.last_marks for target=device) without raising AttributeError. diff --git a/simdrive/tests/test_a12_typetext_device.py b/simdrive/tests/test_a12_typetext_device.py index 00f86fc..61a8556 100644 --- a/simdrive/tests/test_a12_typetext_device.py +++ b/simdrive/tests/test_a12_typetext_device.py @@ -6,7 +6,7 @@ All 6 tests FAIL on feat/v17-claude-native HEAD because F-009 is not yet implemented: the device branch does not exist in server.py on HEAD (the -working assumption is that CodeAtlas will add it in fix/simdrive-a12-typetext-device). +working assumption is that engineering will add it in fix/simdrive-a12-typetext-device). Tests confirm: 1. No simctl call on plain type_text (device). @@ -287,7 +287,7 @@ def test_type_text_sim_still_uses_simctl_or_hid(tmp_path): def test_type_text_device_guards_against_simctl_invocation(tmp_path): """If _simctl has a device guard, calling it with a device session raises. - Skip if no guard was added (CodeAtlas did not add a runtime assertion). + Skip if no guard was added (engineering did not add a runtime assertion). """ from simdrive import sim @@ -312,7 +312,7 @@ def test_type_text_device_guards_against_simctl_invocation(tmp_path): if not has_guard: pytest.skip( - "No simctl device-guard found in _simctl — CodeAtlas did not add one; " + "No simctl device-guard found in _simctl — engineering did not add one; " "skipping guard assertion per contract." ) diff --git a/simdrive/tests/test_a13_device_recording.py b/simdrive/tests/test_a13_device_recording.py index c9ded73..3ec9ac2 100644 --- a/simdrive/tests/test_a13_device_recording.py +++ b/simdrive/tests/test_a13_device_recording.py @@ -212,7 +212,7 @@ def test_record_writes_requires_block_with_device_state(tmp_path, monkeypatch): req = payload["requires"] - # a13 device requires block shape (from CodeAtlas a13 implementation): + # a13 device requires block shape (from engineering a13 implementation): # requires: # target: "device" # app: diff --git a/simdrive/tests/test_a13_state_contract.py b/simdrive/tests/test_a13_state_contract.py index e3abb1c..3da07ca 100644 --- a/simdrive/tests/test_a13_state_contract.py +++ b/simdrive/tests/test_a13_state_contract.py @@ -20,7 +20,7 @@ foreground: <bool> ... -Verification rules (a13 CodeAtlas implementation): +Verification rules (a13 engineering implementation): - target mismatch → halt (replay_state_contract_failed in reasons) - app.bundle_id mismatch → halt (existing a9 behavior) - device.os_major mismatch → halt (new a13 behavior) @@ -93,7 +93,7 @@ def _write_recording(rec_dir: Path, *, Image.new("RGB", (1170, 2532), (210, 210, 210)).save(pre) Image.new("RGB", (1170, 2532), (200, 200, 200)).save(post) - # a13 nested requires block (matches CodeAtlas DeviceRequires schema) + # a13 nested requires block (matches engineering DeviceRequires schema) payload = { "name": rec_dir.name, "created_at": 0.0, diff --git a/simdrive/tests/test_b5_domain_d_ssim_masking.py b/simdrive/tests/test_b5_domain_d_ssim_masking.py index 2171f35..3e49df1 100644 --- a/simdrive/tests/test_b5_domain_d_ssim_masking.py +++ b/simdrive/tests/test_b5_domain_d_ssim_masking.py @@ -15,7 +15,7 @@ - recorder.start() / Recorder.finalize() write no ssim_masks key. - No DEVICE_STATUS_BAR_MASKS lookup table exists yet. -All tests PASS after CodeAtlas implements: +All tests PASS after engineering implements: - simdrive.recorder.DEVICE_STATUS_BAR_MASKS: dict mapping device-class name to (w, h) tuple for the status bar mask region. - simdrive.recorder._default_status_bar_mask(device_name) -> list[dict] | None @@ -108,7 +108,7 @@ class TestF14SSIMStatusBarMask: # mask region, but to create a detectable SSIM drop in the unmasked case # we use a much larger altered region (800 px) — SSIM is structurally # insensitive to small localised changes in a large uniform image. - STATUS_BAR_H_PX = 180 # mask height (what CodeAtlas must produce) + STATUS_BAR_H_PX = 180 # mask height (what engineering must produce) FIXTURE_STRIPE_H = 800 # exaggerated changed region to ensure SSIM < 0.85 def _make_screenshot_pair_unmasked(self, tmp_path: Path): @@ -170,13 +170,13 @@ def test_ssim_with_explicit_mask_above_threshold(self, tmp_path: pytest.TempPath score = _ssim_or_fallback(ref, live, masks=masks) assert score >= 0.85, ( f"With mask covering changed region, SSIM should be >= 0.85 but got {score:.4f}. " - "CodeAtlas: verify _apply_masks_pil blanks the stripe before compare." + "engineering: verify _apply_masks_pil blanks the stripe before compare." ) def test_default_status_bar_mask_lookup_exists(self): """recorder.DEVICE_STATUS_BAR_MASKS must exist and contain iPhone 17 Pro. - CodeAtlas: add DEVICE_STATUS_BAR_MASKS = { + engineering: add DEVICE_STATUS_BAR_MASKS = { 'iPhone 17 Pro': (1206, 180), # (width_px, status_bar_h_px) ... } to simdrive/src/simdrive/recorder.py. @@ -212,7 +212,7 @@ def test_default_status_bar_mask_lookup_contains_iphone_16(self): def test_default_status_bar_mask_helper_returns_list(self): """recorder._default_status_bar_mask(device_name) -> list[dict] with one entry. - CodeAtlas: implement + engineering: implement def _default_status_bar_mask(device_name: str) -> list[dict] | None: entry = DEVICE_STATUS_BAR_MASKS.get(device_name) if not entry: @@ -224,7 +224,7 @@ def _default_status_bar_mask(device_name: str) -> list[dict] | None: assert hasattr(recorder, "_default_status_bar_mask"), ( "recorder._default_status_bar_mask() helper does not exist. " - "CodeAtlas: add it to simdrive/src/simdrive/recorder.py." + "engineering: add it to simdrive/src/simdrive/recorder.py." ) result = recorder._default_status_bar_mask("iPhone 17 Pro") assert result is not None, ( @@ -257,7 +257,7 @@ def test_default_status_bar_mask_unknown_device_returns_none(self): def test_ssim_with_auto_mask_via_device_name(self, tmp_path: pytest.TempPathFactory): """_ssim_or_fallback should accept a device_name kwarg and auto-apply the mask. - CodeAtlas: add optional `device_name: str | None = None` param to + engineering: add optional `device_name: str | None = None` param to _ssim_or_fallback. When masks is None and device_name is known, auto-populate masks from DEVICE_STATUS_BAR_MASKS. """ @@ -273,7 +273,7 @@ def test_ssim_with_auto_mask_via_device_name(self, tmp_path: pytest.TempPathFact sig = inspect.signature(recorder._ssim_or_fallback) assert "device_name" in sig.parameters, ( "_ssim_or_fallback must accept a `device_name` keyword argument. " - "CodeAtlas: add `device_name: str | None = None` to its signature and " + "engineering: add `device_name: str | None = None` to its signature and " "auto-resolve the status-bar mask when masks=None and device_name is known." ) @@ -281,7 +281,7 @@ def test_ssim_with_auto_mask_via_device_name(self, tmp_path: pytest.TempPathFact device_name="iPhone 17 Pro") assert score >= 0.85, ( f"With device_name='iPhone 17 Pro', auto-mask should raise SSIM to >= 0.85. " - f"Got {score:.4f}. CodeAtlas: look up DEVICE_STATUS_BAR_MASKS and apply mask." + f"Got {score:.4f}. engineering: look up DEVICE_STATUS_BAR_MASKS and apply mask." ) @@ -328,7 +328,7 @@ def test_iphone_17_pro_recording_has_ssim_masks(self, tmp_path: pytest.TempPathF ) assert "ssim_masks" in payload, ( "recording.yaml is missing 'ssim_masks' key for iPhone 17 Pro recording. " - "CodeAtlas: Recorder.finalize() must call _default_status_bar_mask(device.name) " + "engineering: Recorder.finalize() must call _default_status_bar_mask(device.name) " "and write the result as payload['ssim_masks']." ) @@ -379,7 +379,7 @@ def test_iphone_17_pro_ssim_masks_dimensions(self, tmp_path: pytest.TempPathFact assert m.get("y") == 0, f"mask.y must be 0, got {m.get('y')}" assert m.get("w") == 1206, ( f"mask.w must equal device width 1206 px, got {m.get('w')}. " - "CodeAtlas: use DEVICE_STATUS_BAR_MASKS['iPhone 17 Pro'] width." + "engineering: use DEVICE_STATUS_BAR_MASKS['iPhone 17 Pro'] width." ) h = m.get("h") assert isinstance(h, int) and h > 0, ( @@ -398,7 +398,7 @@ def test_iphone_16_pro_recording_has_ssim_masks(self, tmp_path: pytest.TempPathF ) assert "ssim_masks" in payload, ( "recording.yaml is missing 'ssim_masks' for iPhone 16 Pro recording. " - "CodeAtlas: DEVICE_STATUS_BAR_MASKS must include 'iPhone 16 Pro'." + "engineering: DEVICE_STATUS_BAR_MASKS must include 'iPhone 16 Pro'." ) def test_iphone_16_pro_ssim_masks_label(self, tmp_path: pytest.TempPathFactory): @@ -429,7 +429,7 @@ def test_unknown_device_omits_ssim_masks(self, tmp_path: pytest.TempPathFactory) masks = payload["ssim_masks"] assert masks is None or masks == [], ( f"Unknown device should produce no ssim_masks or an empty list, got {masks!r}. " - "CodeAtlas: skip writing ssim_masks when _default_status_bar_mask returns None." + "engineering: skip writing ssim_masks when _default_status_bar_mask returns None." ) # ── Replay picks up auto-masks from YAML ─────────────────────────────── diff --git a/simdrive/tests/test_branding_and_recovery_strings.py b/simdrive/tests/test_branding_and_recovery_strings.py index b0fb904..bbf0d65 100644 --- a/simdrive/tests/test_branding_and_recovery_strings.py +++ b/simdrive/tests/test_branding_and_recovery_strings.py @@ -1,4 +1,4 @@ -"""Regression tests for Bug 4 — stale rename strings (INIT-2026-543). +"""Regression tests for Bug 4 — stale rename strings. Stale strings that must be purged: - ios_observe, ios_start_session, ios_devices, ios_stop_recording, diff --git a/simdrive/tests/test_chaos_integration.py b/simdrive/tests/test_chaos_integration.py index e777770..f79908b 100644 --- a/simdrive/tests/test_chaos_integration.py +++ b/simdrive/tests/test_chaos_integration.py @@ -1,4 +1,4 @@ -"""Chaos integration tests for the SimDrive resilience stack (INIT-2026-549). +"""Chaos integration tests for the SimDrive resilience stack. These tests prove the resilience hardening from Wave 1 + Wave 2 holds up under failures that happen *during* a replay, not just at isolated call diff --git a/simdrive/tests/test_cloud_local_quota.py b/simdrive/tests/test_cloud_local_quota.py index 99dacf5..ede2aee 100644 --- a/simdrive/tests/test_cloud_local_quota.py +++ b/simdrive/tests/test_cloud_local_quota.py @@ -1,4 +1,4 @@ -"""Tests for the network-free per-tool quota check (INIT-2026-549 W-F). +"""Tests for the network-free per-tool quota check. Wave 2 calls ``check_local_quota(tool_name, session)`` from inside the MCP tool dispatch — the check must: diff --git a/simdrive/tests/test_cloud_privacy.py b/simdrive/tests/test_cloud_privacy.py index 4fa1d64..bcf829a 100644 --- a/simdrive/tests/test_cloud_privacy.py +++ b/simdrive/tests/test_cloud_privacy.py @@ -1,4 +1,4 @@ -"""Tests for cloud privacy scrubbing (INIT-2026-549 W-F). +"""Tests for cloud privacy scrubbing. The scrubber must guarantee no sensitive value survives into logs or error bodies. Sensitive == any field name containing one of the diff --git a/simdrive/tests/test_cloud_r2_real.py b/simdrive/tests/test_cloud_r2_real.py index 8f6160c..563c34d 100644 --- a/simdrive/tests/test_cloud_r2_real.py +++ b/simdrive/tests/test_cloud_r2_real.py @@ -18,7 +18,7 @@ # moto is an optional dev dependency (simdrive[dev]); skip the whole module # cleanly when it's missing so CI doesn't fail to *collect* this file. -# INIT-2026-549 W1: resolves the "ModuleNotFoundError: No module named 'moto'" +# [internal-tracker]: resolves the "ModuleNotFoundError: No module named 'moto'" # collection error blocking the wider `pytest simdrive/tests -m "not live"` gate. moto = pytest.importorskip("moto") mock_aws = moto.mock_aws diff --git a/simdrive/tests/test_demo_cli.py b/simdrive/tests/test_demo_cli.py index df46c0c..efda3de 100644 --- a/simdrive/tests/test_demo_cli.py +++ b/simdrive/tests/test_demo_cli.py @@ -1,4 +1,4 @@ -"""`simdrive demo` CLI subcommand tests — INIT-2026-549 polish/demo-cli. +"""`simdrive demo` CLI subcommand tests — [internal-tracker] polish/demo-cli. Covers the onboarding entry point introduced for PR D. The function under test is :func:`simdrive._demo.run_demo`; we mock the sim/observe layer so diff --git a/simdrive/tests/test_e2e_testkit.py b/simdrive/tests/test_e2e_testkit.py index fc2377a..9545cca 100644 --- a/simdrive/tests/test_e2e_testkit.py +++ b/simdrive/tests/test_e2e_testkit.py @@ -6,7 +6,7 @@ Requirements: - A booted iOS simulator - - TestKitApp installed (build with /Users/atlas/Documents/specterqa-ios/TestKitApp/build.sh) + - TestKitApp installed (build with ./TestKitApp/build.sh from the repo root) - simdrive native binary built (cd native && make) Each test exercises a single tool or a small flow against a known-good diff --git a/simdrive/tests/test_errors_hid.py b/simdrive/tests/test_errors_hid.py index 4aaa216..e6a2fbe 100644 --- a/simdrive/tests/test_errors_hid.py +++ b/simdrive/tests/test_errors_hid.py @@ -1,6 +1,6 @@ """Verify the HID/keyboard/focus/wait error subclasses match the SimdriveError contract. -The new classes (added in INIT-2026-549) are class-form companions to the +The new classes (added in [internal-tracker]) are class-form companions to the existing constructor functions in ``simdrive.errors``. Each must: - Inherit from :class:`simdrive.errors.SimdriveError`. diff --git a/simdrive/tests/test_journey_claude_client.py b/simdrive/tests/test_journey_claude_client.py index c000f8a..911bde4 100644 --- a/simdrive/tests/test_journey_claude_client.py +++ b/simdrive/tests/test_journey_claude_client.py @@ -2,7 +2,7 @@ All tests use a mocked anthropic.Anthropic client — no real API calls are made. -INIT-2026-544: ClaudeLLMClient.call is now async (wraps blocking SDK call in +[internal-tracker]: ClaudeLLMClient.call is now async (wraps blocking SDK call in asyncio.to_thread). All tests that call client.call() directly are updated to use asyncio.run() so they exercise the new async interface. """ @@ -80,7 +80,7 @@ def test_parse_decision_invalid_json(): def test_client_call_returns_decision(mock_anthropic_cls): """Happy-path: client calls SDK and returns a parsed StepDecision. - INIT-2026-544: call() is now async — use asyncio.run() to invoke it. + [internal-tracker]: call() is now async — use asyncio.run() to invoke it. """ fake_response = _make_response( '{"tool": "swipe", "args": {"direction": "up"}, "rationale": "scroll", "confidence": 0.8}', @@ -107,7 +107,7 @@ def test_client_call_returns_decision(mock_anthropic_cls): def test_client_cost_accumulates(mock_anthropic_cls): """Cost accumulates across multiple calls. - INIT-2026-544: call() is now async — use asyncio.run() to invoke it. + [internal-tracker]: call() is now async — use asyncio.run() to invoke it. """ fake_response = _make_response( '{"tool": "done", "args": {}, "rationale": "done", "confidence": 1.0}', @@ -134,7 +134,7 @@ def test_client_cost_accumulates(mock_anthropic_cls): def test_client_sdk_exception_propagates(mock_anthropic_cls): """SDK exceptions bubble up so runner can wrap them as claude_call_failed. - INIT-2026-544: call() is now async — use asyncio.run() to invoke it. + [internal-tracker]: call() is now async — use asyncio.run() to invoke it. asyncio.to_thread propagates exceptions from the thread back to the caller. """ mock_client = MagicMock() diff --git a/simdrive/tests/test_journey_runner.py b/simdrive/tests/test_journey_runner.py index 3c810bf..ab75dfc 100644 --- a/simdrive/tests/test_journey_runner.py +++ b/simdrive/tests/test_journey_runner.py @@ -69,7 +69,7 @@ def _make_session(session_id: str = "test-session-001") -> MagicMock: class FakeLLMClient: """Scripted LLM client that returns a predetermined sequence of decisions. - INIT-2026-544: call() converted to async def so it satisfies the new + [internal-tracker]: call() converted to async def so it satisfies the new async LLMClient Protocol after the MCP sampling refactor. All assertions and scripted-decision logic are unchanged. """ diff --git a/simdrive/tests/test_journey_runner_async.py b/simdrive/tests/test_journey_runner_async.py index c40433a..43007d5 100644 --- a/simdrive/tests/test_journey_runner_async.py +++ b/simdrive/tests/test_journey_runner_async.py @@ -1,10 +1,10 @@ -"""TDD tests for the async contract of run_journey — INIT-2026-544. +"""TDD tests for the async contract of run_journey — [internal-tracker]. The MCP sampling refactor converts run_journey from a synchronous function to an async coroutine so it can await MCPSamplingLLMClient.call(). These tests pin the new async contract. -ALL tests in this file must FAIL until CodeAtlas: +ALL tests in this file must FAIL until engineering: 1. Makes LLMClient.call an async def 2. Makes run_journey an async def (coroutine function) 3. Updates the runner loop to `await llm_client.call(...)` @@ -126,7 +126,7 @@ def test_run_journey_is_coroutine_function(self): assert inspect.iscoroutinefunction(run_journey), ( "run_journey must be an async def (coroutine function) after the " "MCP sampling refactor. Currently it is a sync function. " - "CodeAtlas: convert `def run_journey(...)` to `async def run_journey(...)`." + "engineering: convert `def run_journey(...)` to `async def run_journey(...)`." ) def test_run_journey_returns_awaitable(self): @@ -176,7 +176,7 @@ def test_llmclient_protocol_call_is_async(self): assert is_async, ( "LLMClient.call must be declared as `async def call(...)` in the Protocol. " "This allows mypy to enforce that all implementations are async. " - "Currently it is a sync def — CodeAtlas: change to `async def call(...)`." + "Currently it is a sync def — engineering: change to `async def call(...)`." ) diff --git a/simdrive/tests/test_license_cli_trial.py b/simdrive/tests/test_license_cli_trial.py index c224a01..fac41f5 100644 --- a/simdrive/tests/test_license_cli_trial.py +++ b/simdrive/tests/test_license_cli_trial.py @@ -1,4 +1,4 @@ -"""Regression tests for Bug 1 — run_journey license gate (INIT-2026-543). +"""Regression tests for Bug 1 — run_journey license gate. Three failure stacks: (a) serve() dispatcher does not route `trial` subcommand @@ -8,7 +8,7 @@ TDD: written BEFORE the fix. All tests must FAIL on current code. -INIT-2026-544 update: tool_run_journey is now async after the MCP sampling +[internal-tracker] update: tool_run_journey is now async after the MCP sampling refactor. test_run_journey_works_with_offline_dev_license updated to use asyncio.run() when calling tool_run_journey. """ @@ -75,7 +75,7 @@ def test_serve_dispatches_trial_subcommand(self, tmp_path: Path, monkeypatch: py def isolate_trial_history(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: """Redirect ``trial_history`` so tests don't pollute ``~/.simdrive``. - INIT-2026-549 W1.5 added an (email, machine) uniqueness check that reads + [internal-tracker].5 added an (email, machine) uniqueness check that reads ``~/.simdrive/trial_history.json`` on every ``trial start``. Without this fixture the second test in the suite would always see ``trial_already_used``. """ @@ -218,7 +218,7 @@ def test_run_journey_works_with_offline_dev_license( # (b) a plain return dict — PASS # (c) LicenseError — FAIL (this is the bug) # - # INIT-2026-544: tool_run_journey is now async — wrap with asyncio.run(). + # [internal-tracker]: tool_run_journey is now async — wrap with asyncio.run(). try: result = asyncio.run(server.tool_run_journey({"session_id": "fake-sess-001"})) # If it returned a dict, license gate passed (other error in result is fine) diff --git a/simdrive/tests/test_license_clock_skew.py b/simdrive/tests/test_license_clock_skew.py index 41c5ea7..9d780db 100644 --- a/simdrive/tests/test_license_clock_skew.py +++ b/simdrive/tests/test_license_clock_skew.py @@ -1,4 +1,4 @@ -"""Tests for the tightened offline-grace clock-skew check (INIT-2026-549 W-F). +"""Tests for the tightened offline-grace clock-skew check. The check refuses offline grace when: - the system clock moved BACKWARDS > 6h relative to last_known_server_time diff --git a/simdrive/tests/test_license_error_ux.py b/simdrive/tests/test_license_error_ux.py index b51c563..6f11bed 100644 --- a/simdrive/tests/test_license_error_ux.py +++ b/simdrive/tests/test_license_error_ux.py @@ -1,4 +1,4 @@ -"""LicenseError UX-envelope tests — INIT-2026-549 W1.5 workstream 4. +"""LicenseError UX-envelope tests — [internal-tracker].5 workstream 4. When ANY gated tool raises LicenseError, the structured envelope returned to the MCP client MUST include: diff --git a/simdrive/tests/test_license_rotation.py b/simdrive/tests/test_license_rotation.py index 0aab4dc..800901d 100644 --- a/simdrive/tests/test_license_rotation.py +++ b/simdrive/tests/test_license_rotation.py @@ -1,4 +1,4 @@ -"""Tests for multi-key license validator support (INIT-2026-549 W-F). +"""Tests for multi-key license validator support. The validator must: - Accept a payload signed under the FIRST trusted key when no key_id diff --git a/simdrive/tests/test_mcp_error_contract.py b/simdrive/tests/test_mcp_error_contract.py index e09fa85..8459025 100644 --- a/simdrive/tests/test_mcp_error_contract.py +++ b/simdrive/tests/test_mcp_error_contract.py @@ -1,5 +1,4 @@ -"""Regression tests for Bug 3 — tool_run_journey MCP error contract divergence -(INIT-2026-543). +"""Regression tests for Bug 3 — tool_run_journey MCP error contract divergence. _call_tool in server.py catches errors.SimdriveError but LicenseError lives in license/errors.py as its own class hierarchy — NOT a subclass of SimdriveError. @@ -102,7 +101,7 @@ def tool_that_raises_license_error(): def test_license_error_to_dict_is_superset_of_simdrive_error_schema(self) -> None: """LicenseError.to_dict() must remain compatible with SimdriveError consumers. - INIT-2026-549 W1.5: LicenseError adds UX-affordance fields + [internal-tracker].5: LicenseError adds UX-affordance fields (``error: "license_required"``, ``pricing_url``, command hints) so agent hosts can surface a copy-pasteable upsell. Existing fields (``code``, ``message``, ``details``) are preserved — the envelope is a diff --git a/simdrive/tests/test_mcp_path_no_anthropic.py b/simdrive/tests/test_mcp_path_no_anthropic.py index ff1c942..a6b083b 100644 --- a/simdrive/tests/test_mcp_path_no_anthropic.py +++ b/simdrive/tests/test_mcp_path_no_anthropic.py @@ -1,4 +1,4 @@ -"""Regression test: MCP code path must NOT import anthropic — INIT-2026-544. +"""Regression test: MCP code path must NOT import anthropic — [internal-tracker]. WHY THIS MATTERS ---------------- diff --git a/simdrive/tests/test_mcp_sampling_llm_client.py b/simdrive/tests/test_mcp_sampling_llm_client.py index 85447ec..4f9e42a 100644 --- a/simdrive/tests/test_mcp_sampling_llm_client.py +++ b/simdrive/tests/test_mcp_sampling_llm_client.py @@ -1,9 +1,9 @@ -"""TDD tests for MCPSamplingLLMClient — INIT-2026-544. +"""TDD tests for MCPSamplingLLMClient — [internal-tracker]. These tests pin the contract for the NEW MCPSamplingLLMClient that calls MCP sampling (session.create_message) instead of the Anthropic SDK directly. -ALL tests in this file must FAIL until CodeAtlas creates +ALL tests in this file must FAIL until engineering creates simdrive/journey/mcp_sampling_client.py and wires up the async Protocol. @@ -40,7 +40,7 @@ # --------------------------------------------------------------------------- # Import the module under test. -# This WILL fail with ImportError until CodeAtlas creates the file. +# This WILL fail with ImportError until engineering creates the file. # That is the correct TDD signal. # --------------------------------------------------------------------------- from simdrive.journey.mcp_sampling_client import MCPSamplingLLMClient # type: ignore[import] @@ -324,7 +324,7 @@ class TestMCPSamplingClientSessionErrors: Rationale: the runner's generic `except Exception` at the call site already converts any exception to outcome="error" with the message captured in failure_reason. Wrapping in a custom exception type would lose the original - cause without adding debuggability. CodeAtlas MAY choose to wrap in a + cause without adding debuggability. engineering MAY choose to wrap in a specific simdrive.errors.SimdriveError subclass — if so, update this test to assert the wrapper type while still checking .cause. """ @@ -378,7 +378,7 @@ def test_sampling_client_no_anthropic_import(self): """The source of mcp_sampling_client.py must not contain 'import anthropic' or 'from anthropic'. - If this test fails after CodeAtlas creates the file, it means the + If this test fails after engineering creates the file, it means the implementation accidentally pulled in the Anthropic SDK — which would force every MCP user to `pip install anthropic` and set ANTHROPIC_API_KEY. """ @@ -387,7 +387,7 @@ def test_sampling_client_no_anthropic_import(self): assert module_path.exists(), ( f"mcp_sampling_client.py not found at {module_path}. " - "CodeAtlas must create this file." + "engineering must create this file." ) source_text = module_path.read_text(encoding="utf-8") diff --git a/simdrive/tests/test_observe_compact.py b/simdrive/tests/test_observe_compact.py index 8ddea53..fac6504 100644 --- a/simdrive/tests/test_observe_compact.py +++ b/simdrive/tests/test_observe_compact.py @@ -1,6 +1,6 @@ """Tests for the PR A token-efficiency knobs on ``observe.observe()``. -Covers the four new parameters introduced under INIT-2026-549: +Covers the four new parameters introduced under [internal-tracker]: * ``compact=True`` — slim mark dict via ``Mark.to_compact_dict()`` * ``confidence_floor`` — drop marks below the requested band * ``mark_limit`` — cap the returned list to top-N by (band, area) diff --git a/simdrive/tests/test_packaging_deps.py b/simdrive/tests/test_packaging_deps.py index 0dec7c5..fa4d37f 100644 --- a/simdrive/tests/test_packaging_deps.py +++ b/simdrive/tests/test_packaging_deps.py @@ -229,7 +229,7 @@ def test_declared_deps_contains_requests() -> None: """ Test 1: requests must appear in [project.dependencies]. - This is the specific regression test for the gap DeployAtlas caught: + This is the specific regression test for the gap release pipeline caught: license/cli.py imports `requests` at module top, but it was absent from the declared deps. Clean install → ModuleNotFoundError. """ @@ -337,8 +337,8 @@ def test_httpx_pinned_below_1_0() -> None: `pip install --pre`, the resolver picks `httpx 1.0.dev3` (a real pre-release on PyPI), which breaks `httpx-sse` and the MCP transport layer. Until upstream mcp adds an upper bound, simdrive must defend - its users with a top-level pin. Caught by DeployAtlas pre-publish - smoke for 1.0.0a4 (INIT-2026-544). + its users with a top-level pin. Caught by release pipeline pre-publish + smoke for 1.0.0a4. """ deps = _load_project_dependencies() httpx_specs = [d for d in deps if d.startswith("httpx")] diff --git a/simdrive/tests/test_paywall_gates.py b/simdrive/tests/test_paywall_gates.py index c53d991..dc650e9 100644 --- a/simdrive/tests/test_paywall_gates.py +++ b/simdrive/tests/test_paywall_gates.py @@ -1,4 +1,4 @@ -"""Paywall gate tests — INIT-2026-549 W1.5. +"""Paywall gate tests — [internal-tracker].5. Every MCP tool handler must call ``check_entitlement()`` at its entry. When the entitlement check raises a ``LicenseError`` the tool MUST propagate the error diff --git a/simdrive/tests/test_readme_quickstart.py b/simdrive/tests/test_readme_quickstart.py index af932a2..ecfa77f 100644 --- a/simdrive/tests/test_readme_quickstart.py +++ b/simdrive/tests/test_readme_quickstart.py @@ -6,7 +6,7 @@ moved or removed, or a stale string was re-introduced. Fix the README, not the test. -Source: INIT-2026-546. Added in 1.0.0a6 to pin the discoverability polish. +Source: [internal-tracker]. Added in 1.0.0a6 to pin the discoverability polish. """ from __future__ import annotations @@ -34,7 +34,7 @@ def _readme_full() -> str: def test_quickstart_pip_install_in_first_100_lines() -> None: """`pip install simdrive` must appear in the first 100 lines of the README. - INIT-2026-549 W1.5: the package is now a paywalled trial+paid product + [internal-tracker].5: the package is now a paywalled trial+paid product rather than a pre-release alpha, so the install command no longer carries the ``--pre`` flag. The presence of the install command itself is the invariant under test. @@ -50,7 +50,7 @@ def test_quickstart_pip_install_in_first_100_lines() -> None: def test_quickstart_trial_start_in_first_100_lines() -> None: """`simdrive trial start --email` must appear in the first 100 lines. - INIT-2026-549 W1.5: trial issuance is now the canonical first step after + [internal-tracker].5: trial issuance is now the canonical first step after install — every gated tool returns ``license_required`` until a trial or paid key is on disk. """ diff --git a/simdrive/tests/test_recorder_integrity.py b/simdrive/tests/test_recorder_integrity.py index 442ef32..f539243 100644 --- a/simdrive/tests/test_recorder_integrity.py +++ b/simdrive/tests/test_recorder_integrity.py @@ -1,4 +1,4 @@ -"""Recorder integrity tests (INIT-2026-549 / WS-E). +"""Recorder integrity tests. Covers three audit hardenings of ``simdrive.recorder``: diff --git a/simdrive/tests/test_server_coverage_85.py b/simdrive/tests/test_server_coverage_85.py index 1090ea0..07121c7 100644 --- a/simdrive/tests/test_server_coverage_85.py +++ b/simdrive/tests/test_server_coverage_85.py @@ -1,4 +1,4 @@ -"""Coverage push: server.py 70% -> 85% (INIT-2026-549 Wave 4 coverage-85). +"""Coverage push: server.py 70% -> 85%. Targets the previously uncovered tool handlers and CLI subcommand entry points by mocking the act/sim/wda/observe boundaries. Every test does diff --git a/simdrive/tests/test_trial_cli.py b/simdrive/tests/test_trial_cli.py index ede7905..aa95edf 100644 --- a/simdrive/tests/test_trial_cli.py +++ b/simdrive/tests/test_trial_cli.py @@ -1,4 +1,4 @@ -"""End-to-end trial CLI tests — INIT-2026-549 W1.5 workstream 2. +"""End-to-end trial CLI tests — [internal-tracker].5 workstream 2. Covers the user-facing flow promised in the README and ``simdrive --help``: diff --git a/simdrive/tests/test_unit.py b/simdrive/tests/test_unit.py index 8b2a159..7a92c03 100644 --- a/simdrive/tests/test_unit.py +++ b/simdrive/tests/test_unit.py @@ -29,7 +29,7 @@ def test_version_present(): def test_tool_count_is_thirty_two(): - """Canonical MCP tool surface = 32 tools (INIT-2026-549). + """Canonical MCP tool surface = 32 tools. Sourced from server._TOOLS. The categorized human-readable inventory lives in docs/MCP_TOOL_SURFACE.md; llms.txt mirrors the same list. Any change diff --git a/simdrive/tests/test_version_drift.py b/simdrive/tests/test_version_drift.py index 91a690e..b5d5a94 100644 --- a/simdrive/tests/test_version_drift.py +++ b/simdrive/tests/test_version_drift.py @@ -1,4 +1,4 @@ -"""Regression tests for Bug 2 — _disk_version() reads wrong package name (INIT-2026-543). +"""Regression tests for Bug 2 — _disk_version() reads wrong package name. server.py:_disk_version() calls importlib.metadata.version("specterqa-ios") but the package is now named "simdrive". In the Palace dogfood environment diff --git a/simdrive/tests/test_wda_resilience.py b/simdrive/tests/test_wda_resilience.py index 9d4f30f..a2c271d 100644 --- a/simdrive/tests/test_wda_resilience.py +++ b/simdrive/tests/test_wda_resilience.py @@ -1,4 +1,4 @@ -"""Resilience tests for simdrive.wda.client (INIT-2026-549). +"""Resilience tests for simdrive.wda.client. Covers the five audit items from the hardening sprint: diff --git a/src/specterqa/ios/backends/ax_backend.py b/src/specterqa/ios/backends/ax_backend.py index 5c47b9e..2a1681e 100644 --- a/src/specterqa/ios/backends/ax_backend.py +++ b/src/specterqa/ios/backends/ax_backend.py @@ -12,7 +12,7 @@ - pyobjc-framework-Cocoa and pyobjc-framework-Quartz installed - iOS Simulator running with an app -INIT-2026-525 — SpecterQA iOS AXUIElement backend. +[internal-tracker] — SpecterQA iOS AXUIElement backend. """ from __future__ import annotations diff --git a/src/specterqa/ios/backends/cgevents.py b/src/specterqa/ios/backends/cgevents.py index be36ca9..14ca5c0 100644 --- a/src/specterqa/ios/backends/cgevents.py +++ b/src/specterqa/ios/backends/cgevents.py @@ -9,7 +9,7 @@ - Simulator.app running and visible (CGEvents target the window) - No Accessibility permission required (unlike Accessibility Inspector tools) -INIT-2026-500 — SpecterQA iOS Headless Driver. +[internal-tracker] — SpecterQA iOS Headless Driver. """ from __future__ import annotations diff --git a/src/specterqa/ios/backends/indigo_hid.py b/src/specterqa/ios/backends/indigo_hid.py index 69b21d1..810fee0 100644 --- a/src/specterqa/ios/backends/indigo_hid.py +++ b/src/specterqa/ios/backends/indigo_hid.py @@ -52,7 +52,7 @@ [SimDeviceLegacyHIDClient sendWithDevice:data error:&error] -INIT-2026-500 — SpecterQA iOS Headless Driver. +[internal-tracker] — SpecterQA iOS Headless Driver. """ from __future__ import annotations @@ -1001,7 +1001,7 @@ def is_available(cls) -> bool: Previously this method returned ``True`` as soon as frameworks loaded, which caused :class:`BackendSelector` to pick IndigoHID on Xcode 16+ machines and then crash on first use. This fix was added in - INIT-2026-493 (auto-closeout pipeline). + [internal-tracker] (auto-closeout pipeline). Returns: bool: ``True`` only when HID client creation is expected to succeed. diff --git a/src/specterqa/ios/backends/protocol.py b/src/specterqa/ios/backends/protocol.py index f9d3163..affa326 100644 --- a/src/specterqa/ios/backends/protocol.py +++ b/src/specterqa/ios/backends/protocol.py @@ -5,7 +5,7 @@ use ``isinstance(obj, IOSBackend)`` as a sanity check, though the primary value is static-type-checked duck typing. -INIT-2026-525 — SpecterQA iOS Protocol refactor. +[internal-tracker] — SpecterQA iOS Protocol refactor. """ from __future__ import annotations diff --git a/src/specterqa/ios/backends/retry_policy.py b/src/specterqa/ios/backends/retry_policy.py index 17e0be8..2efd42d 100644 --- a/src/specterqa/ios/backends/retry_policy.py +++ b/src/specterqa/ios/backends/retry_policy.py @@ -23,7 +23,7 @@ def tap(self, x, y): # After 3 consecutive failures, _POLICY.is_open() == True # _POLICY.call(...) raises SessionCrashedError immediately -INIT-2026-525 — SpecterQA iOS retry/timeout policy. +[internal-tracker] — SpecterQA iOS retry/timeout policy. """ from __future__ import annotations diff --git a/src/specterqa/ios/backends/selector.py b/src/specterqa/ios/backends/selector.py index 5abc2b5..5e963df 100644 --- a/src/specterqa/ios/backends/selector.py +++ b/src/specterqa/ios/backends/selector.py @@ -16,8 +16,8 @@ CGEvents for backward-compat with CLI commands — those backends are not on the MCP product path. -INIT-2026-500 — SpecterQA iOS Headless Driver. -INIT-2026-525 — Consolidate backend selection; define IOSBackend Protocol. +[internal-tracker] — SpecterQA iOS Headless Driver. +[internal-tracker] — Consolidate backend selection; define IOSBackend Protocol. """ from __future__ import annotations diff --git a/src/specterqa/ios/backends/xctest_client.py b/src/specterqa/ios/backends/xctest_client.py index 16d7f6b..8a6be15 100644 --- a/src/specterqa/ios/backends/xctest_client.py +++ b/src/specterqa/ios/backends/xctest_client.py @@ -8,7 +8,7 @@ The caller is responsible for any pixel→point conversion; this client forwards coordinates as-is. -INIT-2026-500 — SpecterQA iOS Headless Driver. +[internal-tracker] — SpecterQA iOS Headless Driver. """ from __future__ import annotations diff --git a/src/specterqa/ios/drivers/simulator/ai_context.py b/src/specterqa/ios/drivers/simulator/ai_context.py index a7ecb5a..61e8fb9 100644 --- a/src/specterqa/ios/drivers/simulator/ai_context.py +++ b/src/specterqa/ios/drivers/simulator/ai_context.py @@ -4,7 +4,7 @@ performance, app state, crashes) into a :class:`DriverContext` dataclass and formats it as markdown text suitable for injection into Claude's context window. -INIT-2026-492 — SpecterQA iOS Simulator Driver, Phase 3. +[internal-tracker] — SpecterQA iOS Simulator Driver, Phase 3. """ from __future__ import annotations diff --git a/src/specterqa/ios/drivers/simulator/capture.py b/src/specterqa/ios/drivers/simulator/capture.py index 7866e36..ad6b7b7 100644 --- a/src/specterqa/ios/drivers/simulator/capture.py +++ b/src/specterqa/ios/drivers/simulator/capture.py @@ -4,7 +4,7 @@ resizes them, and provides pixel-diff and polling utilities for visual change detection. -INIT-2026-492. +[internal-tracker]. """ from __future__ import annotations diff --git a/src/specterqa/ios/drivers/simulator/console.py b/src/specterqa/ios/drivers/simulator/console.py index 7700842..745058b 100644 --- a/src/specterqa/ios/drivers/simulator/console.py +++ b/src/specterqa/ios/drivers/simulator/console.py @@ -4,7 +4,7 @@ and continuously parses JSON log lines into :class:`LogEntry` objects stored in a thread-safe ring buffer. -INIT-2026-492 — SpecterQA iOS Simulator Driver, Phase 2. +[internal-tracker] — SpecterQA iOS Simulator Driver, Phase 2. """ from __future__ import annotations diff --git a/src/specterqa/ios/drivers/simulator/driver.py b/src/specterqa/ios/drivers/simulator/driver.py index 1ce1d94..61ac647 100644 --- a/src/specterqa/ios/drivers/simulator/driver.py +++ b/src/specterqa/ios/drivers/simulator/driver.py @@ -5,7 +5,7 @@ network, perf, state, crash, ai_context) and exposes a clean ActionExecutor protocol plus lifecycle and context-aggregation methods. -Backend selection (INIT-2026-500): +Backend selection: On start(), a BackendSelector probes available touch backends in priority order (XCTest → IndigoHID → CGEvents) and stores the winner in ``self._backend``. All gesture methods (click, scroll, fill, keyboard) @@ -13,7 +13,7 @@ InteractionLayer is used as a fallback so that tests that mock InteractionLayer continue to pass unchanged. -INIT-2026-492 / INIT-2026-500 — SpecterQA iOS Simulator Driver. +[internal-tracker] / [internal-tracker] — SpecterQA iOS Simulator Driver. """ from __future__ import annotations diff --git a/src/specterqa/ios/drivers/simulator/interaction.py b/src/specterqa/ios/drivers/simulator/interaction.py index 9943aac..b41005a 100644 --- a/src/specterqa/ios/drivers/simulator/interaction.py +++ b/src/specterqa/ios/drivers/simulator/interaction.py @@ -21,7 +21,7 @@ that this module is always importable. Tests mock ``Quartz.*`` at call-time and rely on the stub being present at the ``Quartz`` global name. -INIT-2026-492 / INIT-2026-493. +[internal-tracker] / [internal-tracker]. """ from __future__ import annotations diff --git a/src/specterqa/ios/drivers/simulator/network.py b/src/specterqa/ios/drivers/simulator/network.py index 19315db..af18687 100644 --- a/src/specterqa/ios/drivers/simulator/network.py +++ b/src/specterqa/ios/drivers/simulator/network.py @@ -14,7 +14,7 @@ Both sources feed into a shared :class:`NetworkSnapshot` that the MCP ``ios_network`` tool exposes to Claude. -INIT-2026-492 — SpecterQA iOS Simulator Driver, Phase 2. +[internal-tracker] — SpecterQA iOS Simulator Driver, Phase 2. """ from __future__ import annotations diff --git a/src/specterqa/ios/mcp/__init__.py b/src/specterqa/ios/mcp/__init__.py index f835c80..9e31b28 100644 --- a/src/specterqa/ios/mcp/__init__.py +++ b/src/specterqa/ios/mcp/__init__.py @@ -1 +1 @@ -# specterqa.ios.mcp — iOS-specific MCP Server package (M17a, INIT-2026-492) +# specterqa.ios.mcp — iOS-specific MCP Server package (M17a) diff --git a/src/specterqa/ios/mcp/server.py b/src/specterqa/ios/mcp/server.py index 3c20497..7c7499d 100644 --- a/src/specterqa/ios/mcp/server.py +++ b/src/specterqa/ios/mcp/server.py @@ -37,7 +37,7 @@ ios_wait_for_element, ios_wait_idle, ios_capture_state, ios_action_with_logs) — replaced by ios_observe + ios_act. -INIT-2026-500 — SpecterQA iOS Headless Driver. +[internal-tracker] — SpecterQA iOS Headless Driver. """ from __future__ import annotations @@ -50,7 +50,7 @@ import os import subprocess -# INIT-2026-525: Tier-based access control for MCP tools. +# [internal-tracker]: Tier-based access control for MCP tools. # Import is deferred-safe — tier_gate only imports from the standard library # and lazily imports LicenseValidator on first tool call. from specterqa.ios.mcp.tier_gate import require_tier # noqa: E402 diff --git a/src/specterqa/ios/mcp/tier_gate.py b/src/specterqa/ios/mcp/tier_gate.py index 87a6506..9515351 100644 --- a/src/specterqa/ios/mcp/tier_gate.py +++ b/src/specterqa/ios/mcp/tier_gate.py @@ -1,6 +1,6 @@ """Tier-based access control for the SpecterQA MCP tool surface. -INIT-2026-525 — First revenue play; enforces license tier gating so trial/indie +[internal-tracker] — First revenue play; enforces license tier gating so trial/indie users cannot access premium diagnostic and parallel-session tools. Tier hierarchy (ascending privilege): diff --git a/src/specterqa/ios/project_injector.py b/src/specterqa/ios/project_injector.py index 21687bf..5d28c2a 100644 --- a/src/specterqa/ios/project_injector.py +++ b/src/specterqa/ios/project_injector.py @@ -6,7 +6,7 @@ 3. Generates a .xctestrun plist pairing their app with our test bundle 4. Stores the result in ~/.specterqa/runner-build/<bundle_id>/ -INIT-2026-506 — SpecterQA iOS v3 project-injection runner build. +[internal-tracker] — SpecterQA iOS v3 project-injection runner build. """ from __future__ import annotations diff --git a/src/specterqa/ios/runner_process.py b/src/specterqa/ios/runner_process.py index 0e349e3..6b900e8 100644 --- a/src/specterqa/ios/runner_process.py +++ b/src/specterqa/ios/runner_process.py @@ -3,7 +3,7 @@ Every path that needs a runner asks RunnerProcess for one. No path bypasses it. -INIT-2026-525 — SpecterQA iOS v14.0.0a1. +[internal-tracker] — SpecterQA iOS v14.0.0a1. """ from __future__ import annotations diff --git a/src/specterqa/ios/session_manager.py b/src/specterqa/ios/session_manager.py index 1fe2626..c59c4f1 100644 --- a/src/specterqa/ios/session_manager.py +++ b/src/specterqa/ios/session_manager.py @@ -7,7 +7,7 @@ For physical devices: skips all simctl operations, deploys the runner via xcodebuild, and connects to the device over USB/WiFi. -INIT-2026-506 — SpecterQA iOS v3 session manager. +[internal-tracker] — SpecterQA iOS v3 session manager. """ from __future__ import annotations diff --git a/src/specterqa/ios/som_annotator.py b/src/specterqa/ios/som_annotator.py index 136f020..bcb1f34 100644 --- a/src/specterqa/ios/som_annotator.py +++ b/src/specterqa/ios/som_annotator.py @@ -10,11 +10,11 @@ Research shows SoM prompting improves UI agent accuracy from ~50% to ~90%+ by eliminating coordinate prediction entirely. -INIT-2026-493 — SpecterQA SoM annotator. -INIT-2026-506 — XCTest runner /source integration. -INIT-2026-508 — Remove WDA fallback from SoM pipeline. -INIT-2026-509 — Restore WDA as optional fallback with timeout guard. -INIT-2026-R&D — Eliminate JSON→XML→parse roundtrip; direct JSON parsing. +[internal-tracker] — SpecterQA SoM annotator. +[internal-tracker] — XCTest runner /source integration. +[internal-tracker] — Remove WDA fallback from SoM pipeline. +[internal-tracker] — Restore WDA as optional fallback with timeout guard. +[internal-tracker] — Eliminate JSON→XML→parse roundtrip; direct JSON parsing. """ from __future__ import annotations diff --git a/src/specterqa/ios/som_runner.py b/src/specterqa/ios/som_runner.py index ffd4780..c14b6cf 100644 --- a/src/specterqa/ios/som_runner.py +++ b/src/specterqa/ios/som_runner.py @@ -16,9 +16,9 @@ Research: SoM prompting improves UI agent accuracy from ~50% to ~90%+ by eliminating coordinate prediction entirely. -INIT-2026-493 — SpecterQA SoM test runner. -INIT-2026-506 — XCTest runner integration, non-blocking mode. -INIT-2026-508 — Remove WDA fallback from SoM pipeline. +[internal-tracker] — SpecterQA SoM test runner. +[internal-tracker] — XCTest runner integration, non-blocking mode. +[internal-tracker] — Remove WDA fallback from SoM pipeline. """ from __future__ import annotations diff --git a/src/specterqa/ios/wda_driver.py b/src/specterqa/ios/wda_driver.py index 43c64ea..932c106 100644 --- a/src/specterqa/ios/wda_driver.py +++ b/src/specterqa/ios/wda_driver.py @@ -23,7 +23,7 @@ b64, w, h = driver.screenshot() driver.tap(196, 400) # screenshot-pixel coords -INIT-2026-493 — SpecterQA WDA touch backend. +[internal-tracker] — SpecterQA WDA touch backend. """ from __future__ import annotations diff --git a/tests/conftest.py b/tests/conftest.py index 73f2a46..85b1556 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,7 +18,7 @@ # --------------------------------------------------------------------------- -# INIT-2026-525: Tier-gate bypass for existing tests +# [internal-tracker]: Tier-gate bypass for existing tests # --------------------------------------------------------------------------- # # Tier enforcement (tier_gate.py) gates MCP tool functions behind license checks. diff --git a/tests/packaging/test_publish_gates.py b/tests/packaging/test_publish_gates.py index bb108ac..de9f490 100644 --- a/tests/packaging/test_publish_gates.py +++ b/tests/packaging/test_publish_gates.py @@ -1,4 +1,4 @@ -"""TDD for INIT-2026-549 W1 publish-gate logic. +"""TDD for [internal-tracker] publish-gate logic. The publish workflow (.github/workflows/specterqa-ios-publish.yml renamed to trigger on `simdrive-v*` tags) enforces three pre-publish gates: diff --git a/tests/regression/test_p0_fixes.py b/tests/regression/test_p0_fixes.py index 8fc64c4..228a620 100644 --- a/tests/regression/test_p0_fixes.py +++ b/tests/regression/test_p0_fixes.py @@ -1,6 +1,6 @@ """Tests verifying the P0 Xcode 16 / injector fixes are correctly applied. -INIT-2026-511 +[internal-tracker] """ from __future__ import annotations diff --git a/tests/regression/test_protocol_conformance.py b/tests/regression/test_protocol_conformance.py index 93acbff..13494fd 100644 --- a/tests/regression/test_protocol_conformance.py +++ b/tests/regression/test_protocol_conformance.py @@ -4,7 +4,7 @@ signatures. No network connections are made; these tests exercise the class surface only. -INIT-2026-525 — SpecterQA iOS Protocol refactor. +[internal-tracker] — SpecterQA iOS Protocol refactor. """ from __future__ import annotations diff --git a/tests/regression/test_retry_policy.py b/tests/regression/test_retry_policy.py index af47504..9ffc1a2 100644 --- a/tests/regression/test_retry_policy.py +++ b/tests/regression/test_retry_policy.py @@ -6,7 +6,7 @@ - Circuit breaker trips after 3 consecutive ConnectionError failures - Circuit breaker resets after a successful call -INIT-2026-525 — SpecterQA iOS retry/timeout policy. +[internal-tracker] — SpecterQA iOS retry/timeout policy. """ from __future__ import annotations diff --git a/tests/test_b5_domain_a_mcp_session_lifecycle.py b/tests/test_b5_domain_a_mcp_session_lifecycle.py index 58a0f55..645e1df 100644 --- a/tests/test_b5_domain_a_mcp_session_lifecycle.py +++ b/tests/test_b5_domain_a_mcp_session_lifecycle.py @@ -8,7 +8,7 @@ All tests MUST fail red until production code implements the features. No live simulator required — run via: pytest -m "not live" tests/test_b5_domain_a_mcp_session_lifecycle.py -INIT-2026-549 (SimDrive Launch Sprint W1) / b5 domain-A test sprint. +[internal-tracker] (SimDrive Launch Sprint W1) / b5 domain-A test sprint. """ from __future__ import annotations diff --git a/tests/test_jwt_decode.py b/tests/test_jwt_decode.py index 8bd5a4b..4344f35 100644 --- a/tests/test_jwt_decode.py +++ b/tests/test_jwt_decode.py @@ -6,7 +6,7 @@ 3. _decode_jwt() handles base64 payloads with missing padding. 4. _check_offline_grace() honours offline_exp / iat from the decoded payload. -Initiative: INIT-2026-525 +Initiative: [internal-tracker] Finding: SEC-HIGH-005 """ diff --git a/tests/test_mcp_tool_registration.py b/tests/test_mcp_tool_registration.py index 4ac91fb..5d0d4de 100644 --- a/tests/test_mcp_tool_registration.py +++ b/tests/test_mcp_tool_registration.py @@ -9,7 +9,7 @@ These are pure unit tests — no live simulator, no network, no MCP transport. The FastMCP instance is constructed in process and list_tools() is queried directly. -INIT-2026-525 — SpecterQA iOS v14.0.0a1 Phase 1 audit. +[internal-tracker] — SpecterQA iOS v14.0.0a1 Phase 1 audit. """ from __future__ import annotations diff --git a/tests/test_runner_process.py b/tests/test_runner_process.py index 03c2fa3..2eceb22 100644 --- a/tests/test_runner_process.py +++ b/tests/test_runner_process.py @@ -657,7 +657,7 @@ def test_stop_process_timeout_falls_back_to_kill(self): class TestRegistryFailedReacquisition: - """OQ-1 Chairman decision: get_or_create on FAILED returns the SAME failed instance + """OQ-1 maintainer decision: get_or_create on FAILED returns the SAME failed instance (callers must explicitly stop() to recycle). Validate this semantic.""" def setup_method(self): diff --git a/tests/test_tier_enforcement.py b/tests/test_tier_enforcement.py index c4fae61..18b23e4 100644 --- a/tests/test_tier_enforcement.py +++ b/tests/test_tier_enforcement.py @@ -1,6 +1,6 @@ """Tier enforcement tests for MCP tool surface. -TDD tests for INIT-2026-525: verify that MCP tools enforce license tier gating. +TDD tests for [internal-tracker]: verify that MCP tools enforce license tier gating. These tests are pure unit tests — no live simulator, no network required. The LicenseValidator is mocked so tests run hermetically.