diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eda9f6f..2cf45df 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -77,8 +77,15 @@ jobs: --base-sha "${{ github.event.pull_request.base.sha }}" \ --head-sha "${{ github.event.pull_request.head.sha }}" - - name: Run runtime and installer unit tests - run: python3 -m unittest discover tests -v + - name: Run hard gate tests (contract + smoke + distribution) + run: | + pip install --quiet pytest + python3 -m pytest tests -m "not implementation_mirror" -v + + - name: Run implementation-mirror tests (advisory) + if: always() + continue-on-error: true + run: python3 -m pytest tests -m "implementation_mirror" -v - name: Run runtime smoke check run: bash scripts/check-runtime-smoke.sh diff --git a/.sopify-skills/blueprint/tasks.md b/.sopify-skills/blueprint/tasks.md index f50af69..bf28a45 100644 --- a/.sopify-skills/blueprint/tasks.md +++ b/.sopify-skills/blueprint/tasks.md @@ -21,8 +21,9 @@ | P3a | contract_aligned_cleanup | P2 | 已完成。以 protocol/validator 已稳定为前提,清理 runtime 旧 contract 面 | | P3b | perimeter_cleanup | P3a | 已完成。外围面清理:release gate 修复、CHANGELOG 去文件列表化、tests 分类、旧概念清理 | | P4a | external_surface_freeze | P3b | 已完成。薄切片:冻结不可删外部消费面 keep-list | -| P4b | runtime_surface_consolidation | P4a | Runtime 结构性减重(26K→<20K),先删后并 | -| P4c | host_consumption_governance | P4a | 宿主只消费 contract,不定义 truth | +| P4b | runtime_surface_consolidation | P4a | 已完成。prove-kept-or-delete 证明 <20K 不可达,实删 15 LOC | +| P4b.5 | runtime_optionality_audit | P4b | 设计/审计型:宿主接入层级矩阵,定义 runtime 可选边界 | +| P4c | host_consumption_governance | P4b.5 | 宿主只消费 contract,不定义 truth | ### P0: Blueprint Rebaseline(已完成) @@ -52,23 +53,21 @@ ✅ 已完成。Frozen External Surface keep-list(15 条)+ Output Rendering Audit(20 条字段分类 + 5 个已知热点)。纯文档变更,不写运行代码。归档:`history/2026-05/20260509_p4a_external_surface_freeze/` -### P4b: Runtime Surface Consolidation +### P4b: Runtime Surface Consolidation(已完成) -P4a keep-list 确认后执行。先删后并,不先设计新结构。 +✅ 已完成。prove-kept-or-delete 全量扫描证明 runtime 在当前 contract 约束下已接近最小可行体积(24,334 LOC)。<20K 目标在不改 distribution/installer contract 的约束下不可达。交付物:Phase 0 test re-audit(653 hard / 31 soft gate)、Phase 1 CI/preflight 真实降载、Phase 2 全量死代码扫描(15 LOC 删除)。归档:`history/2026-05/20260509_p4b_runtime_surface_consolidation/` -- 目标:runtime/*.py LOC 26K → <20K -- 红线:ActionProposal → Validator → Handoff/Receipt/Archive 主链完整;keep-list 内保留,keep-list 外默认删除 -- 执行顺序(硬约束,不可并行跳跃): - 1. release gate 范围收口 — 发布门禁从全量测试缩为 contract + smoke + distribution + eval gate(runner 切换在 P3b 完成) - 2. runtime 旧面删除 — 砍 compat / bridge / fallback / 旧分支;此时 implementation-mirror tests 仍在,作为管道完整性验证 - 3. implementation-mirror tests 收口 — runtime 瘦身稳定后,删除保护对象已不存在的镜像测试 -- 不允许在 release gate 未降载前同步大规模删除 runtime 与 mirror tests -- 约束:不改 machine contract、不改 protocol 语义、不扩 canonical budget -- 不先承诺合并方案 — 删完再评估是否需要并文件 +### P4b.5: Runtime Optionality & Host Onboarding Audit(待开) + +设计/审计型,不大改代码。P4b 证明 runtime 不能靠内部删代码大幅瘦身,根因是大量 runtime 代码实际承载 distribution/installer contract。下一步需定义宿主接入层级,明确"runtime 可选"的规则边界。 + +- 产出:宿主接入层级矩阵(convention_only / payload_capable / deep_verified),每层定义必须消费、可选消费、禁止依赖的面 +- 不改代码:只做策略和 blast radius 审计,为 P4c 定边界 +- 位置:P4b-close 后、P4c 前执行 ### P4c: Host Consumption Governance -宿主只消费稳定 contract,不再定义 machine truth。独立于减重,P4a 之后可与 P4b 并行或顺序执行。 +宿主只消费稳定 contract,不再定义 machine truth。P4b.5 宿主接入层级矩阵就绪后执行。 - prompt 不定义机器契约、不维护路由表 - doctor/status 输出只渲染 machine truth,不作为 truth source @@ -82,6 +81,14 @@ P4a keep-list 确认后执行。先删后并,不先设计新结构。 ## 未完成长期项 +### P4b 后续路线(P4c 后视评估) + +- [ ] P4d New Host Pilot:选 1 个非 deep 宿主做试点(convention_only 或 payload_capable),不接完整 runtime。验证 P4b.5/P4c 的分层是否真正降低接入成本。可与 P4c 后期并行启动。 +- [ ] P5 Contract Surface Shrinkage:在 P4d 验证后,按 evidence 逐项删除或降级 deep runtime 专属的 contract surface(bridge capability / manifest entry / installer bundle 项)。此时已知哪些 contract 是新宿主需要 vs 历史包袱。 +- [ ] P6 Runtime Sunset / Reference Runtime:将 runtime 明确降级为 reference implementation 或 deep host hardening layer。新宿主默认走 Protocol/Convention 模式,runtime 不再承载新增产品能力。可能与 P5 合并。 + +### 其他长期项 + - [ ] 补宿主级 first-hop ingress proof / diagnostics - [ ] `~compare` shortlist facade 收敛进默认主链路 - [-] `workflow-learning` 独立 helper 与更稳定 replay retrieval → P3b replay 能力下线后,未来如需重设计另行评估 @@ -90,7 +97,7 @@ P4a keep-list 确认后执行。先删后并,不先设计新结构。 - [ ] CrossReview Phase 4a:advisory skill 接入 develop 后审查 - [ ] Plan intake checklist(在 intake 模板/脚本落地前,后续新 plan 开包时手工回答以下问题): - 1. 主命中哪个蓝图里程碑(P3b / P4a / P4b / P4c)?若不命中主线,须显式标记为"长期项"或"延后项",不强行归类 + 1. 主命中哪个蓝图里程碑(P4b.5 / P4c / P4d / P5 / P6)?若不命中主线,须显式标记为"长期项"或"延后项",不强行归类 2. 这次改动定义的是 contract acceptance boundary,还是 execution strategy / implementation wave?(前者进 blueprint,后者留方案包) 3. 是否新增、删除、替代 action / route / state / checkpoint / receipt 中的任一 machine truth?若是,对照 `design.md` 削减预算表 4. 若涉及 legacy surface,替代 contract 是否已在 `design.md` sunset 表中对应里程碑稳定? diff --git a/.sopify-skills/history/2026-05/20260509_p4b_runtime_surface_consolidation/background.md b/.sopify-skills/history/2026-05/20260509_p4b_runtime_surface_consolidation/background.md new file mode 100644 index 0000000..86e3acb --- /dev/null +++ b/.sopify-skills/history/2026-05/20260509_p4b_runtime_surface_consolidation/background.md @@ -0,0 +1,43 @@ +# 变更提案: P4b Runtime Surface Consolidation + +## 需求背景 + +P4a 已冻结外部消费面 keep-list(15 条),Host Capability Governance bridge 已落地。现在有明确的红线边界:keep-list 内保留,keep-list 外默认可删。 + +当前 `runtime/*.py` 共 25,534 LOC,55 个 .py 文件。蓝图目标 <20,000 LOC,需削减 ~5,500+ LOC。 + +### 削减预算实况 + +两轮代码审计结论: + +| 削减来源 | 估算 LOC | 备注 | +|----------|---------|------| +| engine.py 旧路由/bridge/checkpoint 胶水 | 1,200–1,800 | 最大单点;需逐段验证 | +| failure_recovery.py legacy 恢复路径 | 250–400 | 明确的 legacy 快照处理 | +| decision_bridge.py 全文或大部分 | 180–220 | CLI fallback/text renderer bridge | +| workspace_preflight.py fallback/legacy | 220–320 | 最强 fallback 文件 | +| clarification_bridge.py 全文或大部分 | 140–180 | host-side bridge helper | +| plan_orchestrator.py bridge 胶水 | 120–180 | CLI/bridge wrapper | +| context_snapshot.py compat 字段 | 50–80 | legacy global review state | +| router.py 旧分支 | 40–80 | old-branch classification | +| gate.py legacy wrapper/fallback | 15–30 | action_proposal_retry 主路径在 keep-list(blueprint design.md:354),不可删;仅删周边 legacy 分支 | +| message_templates.py 模板精简 | 20–60 | 渲染模板胶水 | +| action_intent.py fallback | 20–40 | decision fallback router | +| 其他散布 compat | 100–200 | archive_lifecycle, context_v1_scope 等 | +| **合计** | **2,355–3,590** | **实际:15 LOC** | + +**P4b-close 结论**:prove-kept-or-delete 全量扫描证明,原估计基于错误假设(fallback/bridge/compat 被视为"可删旧面",实际多已变为 machine contract / distribution contract / hard gate 保护面)。实际死代码仅 15 LOC。最终 baseline:24,334 LOC。详见 design.md Phase 2 执行结论。 + +## 与蓝图里程碑的关系 + +- **定位**:P4b Runtime Surface Consolidation(tasks.md P4b 节) +- **前提**:P4a freeze 已完成(keep-list 是红线) +- **下游**:P4c Host Consumption Governance(P4b 减完旧面后 P4c 治理范围更小) + +## Plan Intake Checklist + +1. **主命中里程碑**:P4b +2. **改动性质**:runtime code reduction — 删除 compat/bridge/fallback/dead code +3. **Machine truth 变更**:无。不改 machine contract、不改 protocol 语义、不扩 canonical budget +4. **Legacy surface**:大量 legacy surface 将被删除 +5. **Core promotion rule / hard max 影响**:无(削减预算表的 target/hard max 不变) diff --git a/.sopify-skills/history/2026-05/20260509_p4b_runtime_surface_consolidation/design.md b/.sopify-skills/history/2026-05/20260509_p4b_runtime_surface_consolidation/design.md new file mode 100644 index 0000000..657de15 --- /dev/null +++ b/.sopify-skills/history/2026-05/20260509_p4b_runtime_surface_consolidation/design.md @@ -0,0 +1,202 @@ +# 技术设计: P4b Runtime Surface Consolidation + +## 方案概述 + +分 4 个阶段顺序执行(硬约束,不可并行跳跃): +0. Test inventory re-audit + hard/soft gate matrix +1. CI / release-preflight 真实降载 +2. Runtime 旧面删除(prove-kept-or-delete) +3. Implementation-mirror tests 收口 + +原则:"先删后并,不先设计新结构"。 + +## Scope 边界 + +### 在 scope 内 + +- 删除 keep-list 外的 compat / bridge / fallback / dead code +- Test inventory re-audit:纠正 contract / implementation-mirror 误分类 +- CI + release-preflight 真实降载:hard gate 仅含 contract + smoke + distribution + eval,implementation-mirror 降为 advisory +- 删除保护对象已不存在的 implementation-mirror tests +- 删完后评估是否有结构性合并价值(非 LOC 驱动,不预先承诺) + +### 不在 scope 内 + +- 不改 machine contract / protocol 语义 +- 不扩 canonical budget(削减预算表的 target/hard max 不变) +- 不做 output.py 改造(属 P4c) +- 不改 host adapter / installer +- 不预先设计新模块结构 + +## 阶段设计 + +### Phase 0: Test Inventory Re-audit + Hard/Soft Gate Matrix + +当前测试分类存在**语义失真**:23 个测试文件标为 contract,但其中多个实际测试 runtime 内部实现(如 `test_runtime_failure_recovery.py`、`test_runtime_router.py`),只有 1 个标为 implementation-mirror(`test_runtime_knowledge_layout.py`)。如果不先纠正分类,Phase 2 删代码会触发标为 contract 的测试红灯,卡死 hard gate。 + +注意:混合文件(如 engine.py)的测试可能部分是 contract、部分是 mirror——engine.py 本身在主链上,但也包含大量 compat/bridge/旧 route 胶水。对此类文件需要**用例级**而非文件级判定。 + +**混合文件分类承载机制**:在测试文件头部保留 `# Test classification: contract`(表示文件主体分类),对其中属于 implementation-mirror 的用例方法加 `@pytest.mark.implementation_mirror` marker。CI/preflight 按 marker 选择性跳过:hard gate 用 `-m "not implementation_mirror"` 运行,soft gate 用 `-m implementation_mirror` 运行。不拆文件,避免增加文件数。 + +**产出物**: +1. 每个 `tests/test_*.py` 的 contract / implementation-mirror / smoke / distribution 重标(混合文件的测试按用例粒度判定) +2. Hard/soft gate matrix:contract + smoke + distribution + eval = hard gate;implementation-mirror = soft gate(advisory) +3. 判定标准(锚点优先级):测试用例的保护对象在 **keep-list** 或 **canonical main chain** 或 **distribution/install user-facing contract** 中 → contract;否则 → implementation-mirror。CI hard gate 是这些锚点的投影结果,不反过来作为分类的原始事实 + +### Phase 1: CI / Release-Preflight 真实降载 + +当前 `.github/workflows/ci.yml:80` 直接跑 `python3 -m unittest discover tests -v`(全量单测硬阻断 CI),`scripts/release-preflight.sh:67` 跑 `python3 -m pytest "$ROOT_DIR/tests" -v`(全量单测硬阻断发布)。两者的 runner 还不一致(unittest vs pytest)。如果只改 preflight 文案分层而不改 CI test selection,Phase 2 删代码仍会卡死。 + +**当前 gate 项(release-preflight.sh)**: +1. sync skills +2. verify sync +3. version consistency +4. builtin catalog drift +5. fail-close contract +6. context checkpoints +7. runtime unit tests(`pytest tests`) +8. install/payload bootstrap smoke +9. prompt runtime gate smoke +10. bundle runtime smoke +11. optional skill eval quality gate + +**收口策略**:不删除任何 gate 项。将 gate 分为两层: +- **hard gate**(blocking):contract checks (1-6) + smoke (8-10) + eval (11) +- **soft gate**(advisory,不阻断发布/CI):implementation-mirror tests (7) + +落实到两个入口: +- `scripts/release-preflight.sh:67`:将 `pytest tests` 替换为仅跑 hard gate 分类的测试;implementation-mirror 测试降为 advisory(失败不阻断) +- `.github/workflows/ci.yml:80`:将 `unittest discover tests` 替换为仅跑 hard gate 分类的测试;implementation-mirror 测试降为 advisory step + +hard/soft 边界由 Phase 0 产出的测试分类决定,不由测试框架(unittest vs pytest)决定。 + +### Phase 2: Runtime 旧面删除(prove-kept-or-delete) + +方法论:不按文件修枝,而是按 P4a frozen surface 反推。对每个 runtime 文件/函数,验证三个条件: +1. 在 keep-list 15 条中? +2. 在 ActionProposal → Validator → Handoff/Receipt/Archive 主链调用图上? +3. 在 distribution/install user-facing contract 中? + +三个都不命中 → 默认整段删除,不做精细缝合。三个命中任一 → 保留,仅删其内部 legacy wrapper/fallback 分支。CI hard gate 是上述三个锚点的投影,不反过来作为保留依据。 + +以下 Tier 分层是 prove-kept-or-delete 验证的预期结果排序,不是穷举清单——验证中发现的额外可删面直接归入对应 Tier。按削减信心和影响面分 3 个 tier 执行: + +**Tier 1: 高信心删除(~700–1,000 LOC)** + +| 文件 | 动作 | 估算 LOC | 理由 | +|------|------|---------|------| +| decision_bridge.py (864) | 大幅裁剪或整体删除 | 180–220 | CLI fallback/text renderer bridge;如 plan_orchestrator 不再消费则可整体删 | +| clarification_bridge.py (401) | 大幅裁剪或整体删除 | 140–180 | host-side bridge helper;同上 | +| workspace_preflight.py (925) | 裁剪 legacy/fallback 段 | 220–320 | vendored fallback、legacy workspace entry、LEGACY_* 分支 | +| plan_orchestrator.py (272) | 裁剪 bridge 胶水 | 120–180 | CLI/bridge wrapper;如 bridge 文件删除则大部分可删 | + +**Tier 2: 中信心删除(~600–1,000 LOC)** + +| 文件 | 动作 | 估算 LOC | 理由 | +|------|------|---------|------| +| failure_recovery.py (719) | 裁剪 legacy 恢复路径 | 250–400 | legacy snapshot handling + 通用 evaluator | +| context_snapshot.py (973) | 裁剪 compat 字段 | 50–80 | current_plan_proposal compat、legacy global review state | +| router.py (795) | 裁剪旧分支分类 | 40–80 | old-branch / fallback classification | +| gate.py (941) | 仅裁剪 legacy wrapper/fallback | 15–30 | action_proposal_retry 主路径在 keep-list(blueprint design.md:354),不可删;仅删周边 legacy 分支(如 _fallback_state_contract 等) | +| message_templates.py (265) | 精简模板 | 20–60 | 渲染模板胶水 | +| action_intent.py (884) | 裁剪 fallback | 20–40 | DECISION_FALLBACK_ROUTER、allow_current_plan_fallback | + +**Tier 3: engine.py 专项(~1,200–1,800 LOC)** + +engine.py (2,737 LOC, 68 functions) 是最大单点。需要逐段审查: +- 旧 route 处理函数(已不在 canonical route family 内的) +- checkpoint 编排中支持被裁剪 checkpoint type 的段落 +- compat/bridge 胶水(调用 decision_bridge / clarification_bridge 的段) +- 已被 Tier 1 删除的文件的调用残留 + +engine.py 不预先承诺削减量——Tier 1 + Tier 2 删完后,根据调用图残留再定。 + +### Phase 3: Implementation-mirror Tests 收口 + +Phase 0 已完成测试分类重标,Phase 2 删完后,找出保护对象已不存在的 mirror tests: +- 对应 runtime 模块已删除的测试文件(Phase 0 已重标为 implementation-mirror) +- 测试 compat/bridge 行为的用例 +- 测试已删除 route/checkpoint type 的用例 + +当前 `tests/` 共 19,158 LOC。预计可清理与 Phase 2 删除面对应的测试代码。 + +注意:Phase 0 的 re-audit 是本阶段可执行的前提。如果跳过 Phase 0,大量实际测试内部实现的文件仍标为 contract,Phase 2 删代码后 hard gate 会红,本阶段会退化为"边删代码边争论测试标签"。 + +## Phase 2 执行结论 + +### 全量 prove-kept-or-delete 扫描结果 + +对全部 runtime/*.py(24,354 LOC、20+ 文件)执行 prove-kept-or-delete 方法论,结论如下: + +**Tier 1 结果(全部偏离预期)**: + +| 文件 | 原估 LOC | 实际可删 | 原因 | +|------|---------|---------|------| +| decision_bridge.py | 180–220 | **0** | 不在 engine/gate 主链上,但绑定 distribution anchor(manifest.py、entry_guard.py、installer/validate.py、installer/runtime_bundle.py、check-runtime-smoke.sh)。删除需同步改 distribution 面,超出 P4b scope("不改 host adapter / installer")| +| clarification_bridge.py | 140–180 | **0** | 同上,distribution anchor 绑定 | +| workspace_preflight.py | 220–320 | **0** | vendored fallback (228 LOC) 是 bundle 部署下的生产路径(installer 包不可用时必须走),11 个 hard gate 测试验证。LEGACY_FALLBACK_SELECTED 分支被 5 个 hard gate 测试文件覆盖,是活跃 contract | +| plan_orchestrator.py | 120–180 | **0** | bridge 文件保留后,桥接胶水不可删 | + +**Tier 2 结果(极小量)**: + +| 文件 | 原估 LOC | 实际可删 | 原因 | +|------|---------|---------|------| +| failure_recovery.py | 250–400 | **0** | 全部在 distribution anchor 上 | +| context_snapshot.py | 50–80 | **0** | `_should_ignore_legacy_global_review_state` 被内部调用,改变行为有风险 | +| router.py | 40–80 | **9** | `_contains_intent` (3 LOC) + `_runtime_skill` (6 LOC) 确认死代码 | +| gate.py | 15–30 | **0** | `_action_proposal_from_command_alias` 在主路径上(gate.py:173),非 legacy | +| message_templates.py | 20–60 | **0** | 全部被 scripts 消费 | +| action_intent.py | 20–40 | **0** | `resolve_action_proposal` 被 gate.py:84 主动调用 | + +**Tier 3(engine.py 补充扫描)**: + +| 函数 | 可删 LOC | +|------|---------| +| `_phase_for_route` | **6** | + +**全量死代码总计:15 LOC**(router.py 9 + engine.py 6)。已删除并通过 hard gate 验证。 + +### 根因分析 + +P4b 原假设"fallback/bridge/compat 是可删旧面"不成立。实际复核发现: + +1. **vendored fallback 是生产路径**:workspace_preflight.py 的 except ModuleNotFoundError 块在 bundle 部署中(无 installer 包)是唯一可用路径 +2. **bridge 文件绑定 distribution anchor**:虽不在 engine/gate 主链上,但 manifest、installer、smoke 均依赖其存在 +3. **legacy 分支有活跃 contract 覆盖**:LEGACY_FALLBACK_SELECTED、legacy helper argv 等被 hard gate 测试保护 +4. **runtime 代码密度高**:全量死函数扫描仅发现 3 个(15 LOC),其余均在 3 个锚点(keep-list / 主链 / distribution)至少命中一个 + +### 结论 + +**runtime 在当前 contract 约束下已接近最小可行体积。** <20K LOC 目标在"不改 host adapter / installer、不删 contract surface"的约束下不可达。 + +P4b 的实际交付物不是 LOC 削减,而是: +1. Phase 0:test re-audit 建立了 653 hard / 31 soft gate 的分类基线 +2. Phase 1:CI + preflight 真实降载,解锁未来删代码时的 gate 隔离能力 +3. Phase 2:prove-kept-or-delete 全量扫描,用 evidence 证明了 runtime 的真实状态 + +### 后续方向建议(不在 P4b scope 内执行) + +进一步缩减 runtime 需要改变约束条件,有三个独立方向: + +**(A) Contract Surface Shrinkage**:显式删除部分 contract surface(如 bridge capability、legacy fallback contract),允许 manifest/installer/smoke/tests 跟着同步收缩。这不是"删实现"而是"少承诺能力"。需要独立的 blast radius 分析。 + +**(B) Canonical Registry Consolidation**:对暂时保留的 contract,将分散在 runtime/manifest/installer/tests 的重复投影收拢为单一事实源。前提是定量验证"真重复" LOC 足够大(建议 >500 LOC 才值得投入)。注意过度设计风险。 + +**(C) Runtime Sunset Roadmap**:战略层面,将 runtime 从"必须依赖"降级为"可选适配层",新宿主默认走 Protocol/Convention 模式。这是蓝图层面的方向决策(background.md:7, protocol.md:31),不适合作为战术执行计划。 + +推荐顺序:P4b-close → (A) 设计审计(不改代码)→ P4c。 + +## 红线约束 + +- ActionProposal → Validator → Handoff/Receipt/Archive 主链完整 +- P4a keep-list 内保留,keep-list 外默认删除 +- 不允许在 release gate 未降载前同步大规模删除 runtime 与 mirror tests +- 削减预算表的 target/hard max 不变 +- 不先承诺合并方案——删完再评估是否有结构性合并价值(非 LOC 驱动) + +## 风险 + +- **削减不足风险**:纯删估计到 22–23K,距 <20K 有 2–3K 缺口。缓解:prove-kept-or-delete 方法论比按文件修枝更激进,可能发现更多可删面;Tier 3 engine.py 专项可能补足;仅在有结构性价值时合并,不做 LOC 化妆 +- **主链回归风险**:删错导致 canonical flow 断裂。缓解:每个 tier 完成后跑 pytest,Phase 1 已保留 hard gate +- **engine.py 风险**:2.7K LOC 的巨型文件改动面大。缓解:不预先承诺削减量,逐段审查 +- **测试分类失真风险**:23 个 contract 中有多个实际测试内部实现。缓解:Phase 0 先做 re-audit,不跳过 diff --git a/.sopify-skills/history/2026-05/20260509_p4b_runtime_surface_consolidation/tasks.md b/.sopify-skills/history/2026-05/20260509_p4b_runtime_surface_consolidation/tasks.md new file mode 100644 index 0000000..4132624 --- /dev/null +++ b/.sopify-skills/history/2026-05/20260509_p4b_runtime_surface_consolidation/tasks.md @@ -0,0 +1,82 @@ +--- +plan_id: 20260509_p4b_runtime_surface_consolidation +feature_key: p4b_runtime_surface_consolidation +level: standard +lifecycle_state: completed +knowledge_sync: + project: skip + background: skip + design: update + tasks: update +archive_ready: true +plan_status: completed +--- + +# 任务清单: P4b Runtime Surface Consolidation + +## 当前阶段目标 + +runtime/*.py 从 25,534 LOC 削减到 <20,000 LOC。先删后并,不先设计新结构。 + +**P4b-close 结论**:prove-kept-or-delete 全量扫描后,runtime 在当前 contract 约束下已接近最小可行体积。实际可删死代码仅 15 LOC(已删除)。最终 baseline:24,334 LOC。<20K 目标在不改 distribution/installer contract 的约束下不可达。 + +## Phase 0: Test Inventory Re-audit + Hard/Soft Gate Matrix + +- [x] 0.1 盘点所有 tests/test_*.py 的当前分类标记(contract / implementation-mirror / smoke / distribution) +- [x] 0.2 对每个标为 contract 的测试,验证其保护对象是否在 keep-list / canonical main chain / distribution·install user-facing contract 中;不命中者重标为 implementation-mirror +- [x] 0.3 产出 hard/soft gate matrix:contract + smoke + distribution + eval = hard gate;implementation-mirror = soft gate(advisory) +- [x] 0.4 提交 re-audit 结果,作为 Phase 1 降载的分类依据 + +## Phase 1: CI / Release-Preflight 真实降载 + +- [x] 1.1 release-preflight.sh 分层:hard gate (contract + smoke + distribution + eval) vs soft gate (implementation-mirror advisory) +- [x] 1.2 ci.yml test step 降载:将 `python3 -m unittest discover tests -v` 替换为仅跑 hard gate 分类的测试;implementation-mirror 测试降为 advisory step(失败不阻断 CI) +- [x] 1.3 验证 hard gate 通过(contract + smoke + distribution + eval 在当前代码上绿) +- [x] 1.4 验证 implementation-mirror advisory 在当前代码上也绿(基线确认) + +## Phase 2: Runtime 旧面删除(prove-kept-or-delete) + +方法论:对每个 runtime 文件/函数,验证是否在 keep-list / 主链调用图 / distribution·install user-facing contract 中。三个都不命中 → 默认整段删除。 + +### Tier 1: 高信心删除 → 全部偏离预期,不可删 + +- [x] 2.1 decision_bridge.py → **保留**:绑定 distribution anchor(manifest/installer/smoke),超出 P4b scope +- [x] 2.2 clarification_bridge.py → **保留**:同上 +- [x] 2.3 workspace_preflight.py → **保留**:vendored fallback 是 bundle 部署生产路径,LEGACY_FALLBACK_SELECTED 有 hard gate 覆盖 +- [x] 2.4 plan_orchestrator.py → **保留**:bridge 保留后胶水不可删 +- [x] 2.5 Tier 1 完成后跑 pytest,确认主链完整 ✅ + +### Tier 2: 中信心删除 → 仅 9 LOC 死代码 + +- [x] 2.6 failure_recovery.py → **0 LOC**:全部在 distribution anchor 上 +- [x] 2.7 context_snapshot.py → **0 LOC**:内部调用,改行为有风险 +- [x] 2.8 router.py → **9 LOC 已删**:`_contains_intent` (3) + `_runtime_skill` (6) 确认死代码 +- [x] 2.9 gate.py → **0 LOC**:`_action_proposal_from_command_alias` 在主路径上 +- [x] 2.10 message_templates.py → **0 LOC**:被 scripts 消费 +- [x] 2.11 action_intent.py → **0 LOC**:`resolve_action_proposal` 被 gate.py 调用 +- [x] 2.12 其他散布 compat 清理 → **0 LOC**:全量扫描无更多死代码 +- [x] 2.13 Tier 2 完成后跑 pytest,确认主链完整 ✅ 653 passed + +### Tier 3: engine.py 专项 → 仅 6 LOC 死代码 + +- [x] 2.14 engine.py 全量死函数扫描 → `_phase_for_route` (6 LOC) 确认死代码,**已删** +- [x] 2.15-2.18 engine.py 全量验证完成,无更多可删面 +- [x] 2.19 结构性合并评估 → **不适用**:死代码仅 15 LOC,无合并目标 + +## Phase 3: Implementation-mirror Tests 收口 + +Phase 2 未删除任何 contract surface,mirror tests 保护对象均仍存在,无需收口。 + +- [x] 3.1 找出保护对象已不存在的 implementation-mirror tests → **无**(Phase 2 未删面) +- [x] 3.2-3.3 不需要执行 +- [x] 3.4 最终 LOC 盘点:runtime/*.py = 24,334 LOC;tests 未变 + +## 完成标准(修订后) + +- [x] prove-kept-or-delete 全量扫描完成,所有 runtime 文件/函数已验证锚点命中 +- [x] 确认可删死代码已删(15 LOC) +- [x] pytest hard gate 全绿(653 passed) +- [x] P4a keep-list 内面全部保留 +- [x] ActionProposal → Validator → Handoff/Receipt/Archive 主链完整 +- [x] 结论文档已写入 design.md +- [ ] ~~runtime/*.py LOC < 20,000~~ → 不可达,原因见 design.md Phase 2 执行结论 diff --git a/.sopify-skills/history/index.md b/.sopify-skills/history/index.md index 92c1134..46392ff 100644 --- a/.sopify-skills/history/index.md +++ b/.sopify-skills/history/index.md @@ -4,6 +4,7 @@ ## 索引 +- `2026-05-09` [`20260509_p4b_runtime_surface_consolidation`](2026-05/20260509_p4b_runtime_surface_consolidation/) - standard - P4b Runtime Surface Consolidation: prove-kept-or-delete 全量扫描证明 runtime 已近最小体积(24,334 LOC),实删 15 LOC,<20K 目标在现有 contract 约束下不可达 - `2026-05-09` [`20260509_host_capability_governance`](2026-05/20260509_host_capability_governance/) - standard - Host Capability Governance(P4a→P4c bridge):3 级 canonical 梯度定义 + 接入判定 Checklist + Convention Quickstart 最小交付面 + Prompt 镜像治理原则 - `2026-05-09` [`20260509_p4a_external_surface_freeze`](2026-05/20260509_p4a_external_surface_freeze/) - standard - P4a External Surface Freeze: Frozen External Surface keep-list(15 条)+ Output Rendering Audit(20 条字段分类) - `2026-05-08` [`20260508_p3b_perimeter_cleanup`](2026-05/20260508_p3b_perimeter_cleanup/) - standard - 任务清单: P3b Perimeter Cleanup diff --git a/CHANGELOG.md b/CHANGELOG.md index b3cdc87..2694254 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,51 @@ Format: Summary → Changed → Plan Packages. File-level details live in `git l ## [Unreleased] +## [2026-05-09.175537] - 2026-05-09 + +### Summary + +- Archived 1 plan package(s); Changes across: Docs, Runtime, Skills. + +### Changed + +- **Docs**: Refined public documentation (2 files) +- **Runtime**: Updated runtime internals (2 files) +- **Skills**: Synced prompt-layer skills (4 files) + +### Plan Packages + +- `20260509_p4b_runtime_surface_consolidation` (archived) + +## [2026-05-09.170825] - 2026-05-09 + +### Summary + +- Archived 1 plan package(s); Changes across: Runtime. + +### Changed + +- **Runtime**: Updated runtime internals (2 files) + +### Plan Packages + +- `20260509_p4b_runtime_surface_consolidation` (archived) + +## [2026-05-09.152019] - 2026-05-09 + +### Summary + +- Updated 1 active plan package(s); Changes across: Scripts, Changed. + +### Changed + +- **Scripts**: Adjusted maintenance scripts (1 files) +- **Changed**: Updated project files (1 files) + +### Plan Packages + +- `20260509_p4b_runtime_surface_consolidation` (active) + ## [2026-05-08.191000] - 2026-05-08 ### Summary diff --git a/Claude/Skills/CN/CLAUDE.md b/Claude/Skills/CN/CLAUDE.md index e01181c..159f0a0 100644 --- a/Claude/Skills/CN/CLAUDE.md +++ b/Claude/Skills/CN/CLAUDE.md @@ -1,5 +1,5 @@ - + # Sopify - 自适应 AI 编程助手 diff --git a/Claude/Skills/EN/CLAUDE.md b/Claude/Skills/EN/CLAUDE.md index 2402f6b..828cac8 100644 --- a/Claude/Skills/EN/CLAUDE.md +++ b/Claude/Skills/EN/CLAUDE.md @@ -1,5 +1,5 @@ - + # Sopify - Adaptive AI Programming Assistant diff --git a/Codex/Skills/CN/AGENTS.md b/Codex/Skills/CN/AGENTS.md index 3174fe7..ca68604 100644 --- a/Codex/Skills/CN/AGENTS.md +++ b/Codex/Skills/CN/AGENTS.md @@ -1,5 +1,5 @@ - + # Sopify - 自适应 AI 编程助手 diff --git a/Codex/Skills/EN/AGENTS.md b/Codex/Skills/EN/AGENTS.md index e095964..8903c40 100644 --- a/Codex/Skills/EN/AGENTS.md +++ b/Codex/Skills/EN/AGENTS.md @@ -1,5 +1,5 @@ - + # Sopify - Adaptive AI Programming Assistant diff --git a/README.md b/README.md index eb766d0..669c42f 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](./LICENSE) [![Docs](https://img.shields.io/badge/docs-CC%20BY%204.0-green.svg)](./LICENSE-docs) -[![Version](https://img.shields.io/badge/version-2026--05--08.191000-orange.svg)](#version-history) +[![Version](https://img.shields.io/badge/version-2026--05--09.175537-orange.svg)](#version-history) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](./CONTRIBUTING.md) English · [简体中文](./README.zh-CN.md) · [Quick Start](#quick-start) · [Contributors](./CONTRIBUTORS.md) diff --git a/README.zh-CN.md b/README.zh-CN.md index 99f1fda..9ae4b78 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -8,7 +8,7 @@ [![许可证](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](./LICENSE) [![文档](https://img.shields.io/badge/docs-CC%20BY%204.0-green.svg)](./LICENSE-docs) -[![版本](https://img.shields.io/badge/version-2026--05--08.191000-orange.svg)](#版本历史) +[![版本](https://img.shields.io/badge/version-2026--05--09.175537-orange.svg)](#版本历史) [![欢迎PR](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](./CONTRIBUTING_CN.md) [English](./README.md) · 简体中文 · [快速开始](#快速开始) · [贡献者](./CONTRIBUTORS.md) diff --git a/runtime/engine.py b/runtime/engine.py index 5c7b247..de71ef8 100644 --- a/runtime/engine.py +++ b/runtime/engine.py @@ -1592,14 +1592,6 @@ def _find_skill(skills: tuple[SkillMeta, ...], skill_id: str) -> SkillMeta | Non return None -def _phase_for_route(decision: RouteDecision) -> str: - if decision.route_name in {"plan_only", "workflow", "light_iterate", "clarification_pending", "clarification_resume", "decision_pending", "decision_resume"}: - return "design" - if decision.route_name in {"resume_active", "exec_plan", "quick_fix"}: - return "develop" - return "analysis" - - def _build_skill_activation( *, decision: RouteDecision, diff --git a/runtime/router.py b/runtime/router.py index 5a0e9b2..50eea5e 100644 --- a/runtime/router.py +++ b/runtime/router.py @@ -767,11 +767,6 @@ def _split_active_plan_review_fragments(text: str) -> tuple[str, ...]: -def _contains_intent(text: str, keywords: Iterable[str]) -> bool: - lowered = text.lower() - return any(keyword.lower() in lowered for keyword in keywords) - - def _normalize(text: str) -> str: return " ".join(text.strip().lower().split()) @@ -784,12 +779,5 @@ def _candidate_skills(route_name: str, skills: Iterable[SkillMeta], *preferred: ) -def _runtime_skill(route_name: str, skills: Iterable[SkillMeta], skill_id: str) -> str | None: - return resolve_runtime_skill_id( - route_name, - skills, - fallback_preferred=skill_id, - ) - diff --git a/scripts/release-preflight.sh b/scripts/release-preflight.sh index 7a0fe4d..8b68a66 100755 --- a/scripts/release-preflight.sh +++ b/scripts/release-preflight.sh @@ -64,7 +64,14 @@ run_step "Check version consistency" bash "$ROOT_DIR/scripts/check-version-consi run_step "Check builtin catalog drift" check_builtin_catalog_drift run_step "Check fail-close contract" python3 "$ROOT_DIR/scripts/check-fail-close-contract.py" run_step "Check context checkpoints" python3 "$ROOT_DIR/scripts/check-context-checkpoints.py" repo --root "$ROOT_DIR" -run_step "Run runtime unit tests" python3 -m pytest "$ROOT_DIR/tests" -v +run_step "Run hard gate tests (contract + smoke + distribution)" python3 -m pytest "$ROOT_DIR/tests" -m "not implementation_mirror" -v + +echo "[release-preflight] Running implementation-mirror tests (advisory, non-blocking)..." +if python3 -m pytest "$ROOT_DIR/tests" -m "implementation_mirror" -v; then + echo "[release-preflight] Implementation-mirror tests passed." +else + echo "[release-preflight] WARNING: Implementation-mirror tests failed (advisory, not blocking release)." +fi run_step "Run install/payload bootstrap smoke" python3 "$ROOT_DIR/scripts/check-install-payload-bundle-smoke.py" run_step "Run prompt runtime gate smoke" python3 "$ROOT_DIR/scripts/check-prompt-runtime-gate-smoke.py" run_step "Run bundle runtime smoke check" bash "$ROOT_DIR/scripts/check-runtime-smoke.sh" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..0daabad --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,9 @@ +"""Shared pytest configuration and marker registration.""" +import pytest + + +def pytest_configure(config: pytest.Config) -> None: + config.addinivalue_line( + "markers", + "implementation_mirror: marks tests as implementation-mirror (not part of hard gate)", + ) diff --git a/tests/pytest_entries/fail_close_contract_entry.py b/tests/pytest_entries/fail_close_contract_entry.py index 79c12a0..e6dcc31 100644 --- a/tests/pytest_entries/fail_close_contract_entry.py +++ b/tests/pytest_entries/fail_close_contract_entry.py @@ -1,3 +1,4 @@ +# Test classification: implementation-mirror from __future__ import annotations from functools import lru_cache diff --git a/tests/test_runtime_config.py b/tests/test_runtime_config.py index 954c1df..9dcca6f 100644 --- a/tests/test_runtime_config.py +++ b/tests/test_runtime_config.py @@ -1,4 +1,4 @@ -# Test classification: contract +# Test classification: implementation-mirror from __future__ import annotations from tests.runtime_test_support import * diff --git a/tests/test_runtime_engine.py b/tests/test_runtime_engine.py index d0fa47a..ae8cfd9 100644 --- a/tests/test_runtime_engine.py +++ b/tests/test_runtime_engine.py @@ -1,6 +1,8 @@ # Test classification: contract from __future__ import annotations +import pytest + from dataclasses import replace from tests.runtime_test_support import * @@ -1200,6 +1202,8 @@ def test_develop_decision_resume_returns_continue_host_develop(self) -> None: self.assertEqual(resumed.recovered_context.current_run.stage, "executing") self.assertFalse((workspace / ".sopify-skills" / "state" / "current_decision.json").exists()) + @pytest.mark.implementation_mirror + def test_develop_decision_resume_can_fallback_to_plan_review(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: workspace = Path(temp_dir) @@ -1270,6 +1274,8 @@ def test_develop_clarification_resume_returns_continue_host_develop(self) -> Non self.assertEqual(resumed.recovered_context.current_run.stage, "executing") self.assertFalse((workspace / ".sopify-skills" / "state" / "current_clarification.json").exists()) + @pytest.mark.implementation_mirror + def test_develop_pending_decision_does_not_bypass_checkpoint_when_resume_bridge_is_missing(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: workspace = Path(temp_dir) @@ -1630,6 +1636,8 @@ def test_archive_handoff_does_not_synthesize_status_without_engine_payload(self) self.assertNotIn("archived_plan_path", handoff.artifacts) self.assertNotIn("state_cleared", handoff.artifacts) + @pytest.mark.implementation_mirror + def test_archive_normalizes_legacy_archive_front_matter_projection(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: workspace = Path(temp_dir) @@ -1751,6 +1759,8 @@ def test_archive_allows_review_and_blocks_required_by_knowledge_sync(self) -> No self.assertIn("required_missing", blocked_sync) self.assertGreater(len(blocked_sync["required_missing"]), 0) + @pytest.mark.implementation_mirror + def test_archive_blocks_legacy_plan_without_auto_doctor(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: workspace = Path(temp_dir) @@ -1804,6 +1814,8 @@ def test_archive_blocks_legacy_plan_without_auto_doctor(self) -> None: self.assertEqual(result.handoff.artifacts["archive_lifecycle"]["archive_changed_files"], []) self.assertTrue((workspace / ".sopify-skills" / "state" / "current_plan.json").exists()) + @pytest.mark.implementation_mirror + def test_archive_keeps_legacy_plan_blocked_after_session_interruption(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: workspace = Path(temp_dir) @@ -2270,6 +2282,8 @@ def test_run_plan_loop_fail_closes_repeated_checkpoint_signatures(self) -> None: self.assertEqual(orchestrated.stopped_reason, "repeated_checkpoint") self.assertEqual(orchestrated.loop_count, 3) + @pytest.mark.implementation_mirror + def test_run_plan_loop_fail_closes_when_bridge_cannot_complete_roundtrip(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: workspace = Path(temp_dir) @@ -2862,6 +2876,8 @@ def test_synced_runtime_bundle_supports_develop_callback_helper(self) -> None: self.assertEqual(submit_payload["required_host_action"], "confirm_decision") self.assertTrue((workspace / ".sopify-skills" / "state" / "current_decision.json").exists()) + @pytest.mark.implementation_mirror + def test_synced_runtime_bundle_supports_cli_decision_bridge_prompt(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: temp_root = Path(temp_dir) @@ -3001,6 +3017,8 @@ def test_synced_runtime_bundle_supports_clarification_checkpoint(self) -> None: self.assertFalse((workspace / ".sopify-skills" / "state" / "current_clarification.json").exists()) self.assertTrue((workspace / ".sopify-skills" / "state" / "current_plan.json").exists()) + @pytest.mark.implementation_mirror + def test_repo_local_runtime_entry_blocks_runtime_first_requests_without_override(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: workspace = Path(temp_dir) @@ -3048,6 +3066,8 @@ def test_repo_local_runtime_entry_blocks_runtime_first_requests_without_override self.assertEqual(allowed.returncode, 0, msg=allowed.stderr) self.assertTrue((workspace / ".sopify-skills" / "state" / "current_handoff.json").exists()) + @pytest.mark.implementation_mirror + def test_repo_local_runtime_entry_blocks_finalize_alias_without_override(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: workspace = Path(temp_dir) @@ -3620,6 +3640,8 @@ def test_propose_plan_produces_plan_artifact(self) -> None: # -- B8: bare text request fallback -- + @pytest.mark.implementation_mirror + def test_bare_text_request_uses_router_classify(self) -> None: """No ActionProposal → Router.classify fallback.""" with tempfile.TemporaryDirectory() as td: @@ -3628,6 +3650,8 @@ def test_bare_text_request_uses_router_classify(self) -> None: result = run_runtime("解释 router 的工作原理", workspace_root=workspace, user_home=workspace / "home") self.assertEqual(result.route.route_name, "consult") + @pytest.mark.implementation_mirror + def test_bare_text_modify_uses_router_classify(self) -> None: """No ActionProposal, modify request → Router.classify determines route.""" with tempfile.TemporaryDirectory() as td: diff --git a/tests/test_runtime_failure_recovery.py b/tests/test_runtime_failure_recovery.py index 07cf316..9bc0e84 100644 --- a/tests/test_runtime_failure_recovery.py +++ b/tests/test_runtime_failure_recovery.py @@ -1,6 +1,8 @@ # Test classification: contract from __future__ import annotations +import pytest + from tests.runtime_test_support import * from runtime.decision_tables import DEFAULT_DECISION_TABLES_PATH, load_default_decision_tables @@ -156,18 +158,24 @@ def test_default_failure_recovery_table_loads(self) -> None: self.assertEqual(Path(table["decision_tables_source_path"]), DEFAULT_DECISION_TABLES_PATH.resolve()) self.assertEqual(len(table["rows"]), 8) + @pytest.mark.implementation_mirror + def test_legacy_standalone_recovery_asset_can_still_load_explicitly(self) -> None: table = load_failure_recovery_table(LEGACY_FAILURE_RECOVERY_TABLE_PATH) self.assertEqual(Path(table["source_path"]), LEGACY_FAILURE_RECOVERY_TABLE_PATH.resolve()) self.assertEqual(Path(table["decision_tables_source_path"]), DEFAULT_DECISION_TABLES_PATH.resolve()) self.assertEqual(table["schema_version"], "failure_recovery.v1") + @pytest.mark.implementation_mirror + def test_default_and_legacy_recovery_tables_stay_in_sync(self) -> None: embedded = load_default_failure_recovery_table() legacy = load_failure_recovery_table(LEGACY_FAILURE_RECOVERY_TABLE_PATH) assert_failure_recovery_tables_consistent(embedded, legacy) self.assertEqual(embedded["rows"], legacy["rows"]) + @pytest.mark.implementation_mirror + def test_embedded_decision_table_errors_are_normalized(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: asset_path = Path(temp_dir) / "decision_tables.yaml" @@ -181,6 +189,8 @@ def test_embedded_decision_table_errors_are_normalized(self) -> None: ): load_failure_recovery_table(asset_path) + @pytest.mark.implementation_mirror + def test_unified_asset_detection_does_not_depend_on_v1_schema_literal(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: asset_path = Path(temp_dir) / "decision_tables.yaml" @@ -196,6 +206,8 @@ def test_unified_asset_detection_does_not_depend_on_v1_schema_literal(self) -> N ): load_failure_recovery_table(asset_path) + @pytest.mark.implementation_mirror + def test_decision_table_context_errors_are_normalized_for_legacy_load(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: decision_asset_path = Path(temp_dir) / "decision_tables.yaml" diff --git a/tests/test_runtime_gate.py b/tests/test_runtime_gate.py index 27c2a24..cdba394 100644 --- a/tests/test_runtime_gate.py +++ b/tests/test_runtime_gate.py @@ -1,6 +1,8 @@ # Test classification: contract from __future__ import annotations +import pytest + import importlib.util import json import os @@ -311,6 +313,7 @@ def _write_gate_receipt_fixture( class RuntimeGateTests(unittest.TestCase): + @pytest.mark.implementation_mirror def test_workspace_preflight_fallback_keeps_outcome_contract_in_sync(self) -> None: standalone_module = _load_module_without_repo_installer( REPO_ROOT / "runtime" / "workspace_preflight.py", @@ -349,6 +352,8 @@ def test_workspace_preflight_fallback_keeps_outcome_contract_in_sync(self) -> No self.assertEqual(actual.get("action_level"), expected.get("action_level")) self.assertEqual(actual.get("message_hint"), expected.get("message_hint")) + @pytest.mark.implementation_mirror + def test_gate_output_fallback_keeps_outcome_summary_rendering_in_sync(self) -> None: standalone_module = _load_module_without_repo_installer( REPO_ROOT / "runtime" / "gate_output.py", @@ -368,6 +373,8 @@ def test_gate_output_fallback_keeps_outcome_summary_rendering_in_sync(self) -> N render_outcome_summary(payload), ) + @pytest.mark.implementation_mirror + def test_gate_preflight_falls_back_to_legacy_helper_argv_contract(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: temp_root = Path(temp_dir) @@ -756,6 +763,8 @@ def test_preflight_returns_selected_pinned_bundle_contract_instead_of_payload_ac self.assertEqual(result["runtime_gate_entry"], "scripts/runtime_gate_pinned.py") self.assertEqual(result["preferences_preload_entry"], "scripts/preferences_preload_pinned.py") + @pytest.mark.implementation_mirror + def test_preflight_exposes_legacy_workspace_entries_when_global_bundle_falls_back(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: temp_root = Path(temp_dir) @@ -881,6 +890,8 @@ def test_gate_non_blocking_config_error_still_surfaces_normally(self) -> None: self.assertEqual(result["status"], "error") self.assertEqual(result["error_code"], "config_error") + @pytest.mark.implementation_mirror + def test_gate_preflight_block_uses_pre_config_fallback_paths_even_with_custom_plan_directory(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: temp_root = Path(temp_dir) @@ -1457,6 +1468,8 @@ def test_gate_preflight_short_circuits_ingress_violations_at_host_id_before_payl }, ) + @pytest.mark.implementation_mirror + def test_gate_preflight_falls_back_when_helper_rejects_host_id_only(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: temp_root = Path(temp_dir) @@ -1533,6 +1546,8 @@ def test_gate_preflight_falls_back_when_helper_rejects_host_id_only(self) -> Non self.assertEqual(result["status"], "ready") self.assertEqual(result["preflight"]["helper_argv_mode"], "legacy_request_preserved") + @pytest.mark.implementation_mirror + def test_gate_preflight_preserves_request_when_helper_only_rejects_host_id(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: temp_root = Path(temp_dir) @@ -1554,6 +1569,8 @@ def test_gate_preflight_preserves_request_when_helper_only_rejects_host_id(self) self.assertEqual(result["preflight"]["helper_argv_mode"], "legacy_request_preserved") self.assertFalse((workspace / ".sopify-runtime" / "manifest.json").exists()) + @pytest.mark.implementation_mirror + def test_gate_preflight_fail_closes_when_legacy_helper_cannot_honor_non_interactive_mode(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: temp_root = Path(temp_dir) @@ -1576,6 +1593,8 @@ def test_gate_preflight_fail_closes_when_legacy_helper_cannot_honor_non_interact self.assertIn("Refresh the local Sopify install", result["message"]) self.assertFalse((workspace / ".sopify-runtime" / "manifest.json").exists()) + @pytest.mark.implementation_mirror + def test_drop_cli_arg_pairs_preserves_request_value_that_matches_removed_flag_name(self) -> None: command = [ sys.executable, @@ -2371,6 +2390,8 @@ def test_runtime_gate_cli_prints_compact_json_contract(self) -> None: self.assertEqual(payload["allowed_response_mode"], NORMAL_RUNTIME_FOLLOWUP) self.assertIn("handoff", payload) + @pytest.mark.implementation_mirror + def test_runtime_gate_cli_text_renders_field_level_ingress_details(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: workspace = Path(temp_dir) / "workspace" diff --git a/tests/test_runtime_message_templates.py b/tests/test_runtime_message_templates.py index 63d8b6e..524770f 100644 --- a/tests/test_runtime_message_templates.py +++ b/tests/test_runtime_message_templates.py @@ -1,6 +1,8 @@ # Test classification: contract from __future__ import annotations +import pytest + import copy from tests.runtime_test_support import * @@ -13,6 +15,7 @@ class MessageTemplatesTests(unittest.TestCase): + @pytest.mark.implementation_mirror def test_reason_code_family_prefix_template_renders(self) -> None: templates = load_default_host_message_templates() result = render_host_message( @@ -29,6 +32,8 @@ def test_reason_code_family_prefix_template_renders(self) -> None: self.assertIn("决策确认", result["text"]) self.assertIn("修复契约", result["text"]) + @pytest.mark.implementation_mirror + def test_prompt_mode_fallback_is_used_when_no_template_matches(self) -> None: templates = load_default_host_message_templates() result = render_host_message( @@ -40,6 +45,8 @@ def test_prompt_mode_fallback_is_used_when_no_template_matches(self) -> None: self.assertEqual(result["render_events"], []) self.assertIn("当前输入还不足以继续", result["text"]) + @pytest.mark.implementation_mirror + def test_missing_template_variable_falls_back_without_crashing(self) -> None: templates = load_default_host_message_templates() result = render_host_message( @@ -52,6 +59,8 @@ def test_missing_template_variable_falls_back_without_crashing(self) -> None: self.assertEqual(result["render_events"], [MESSAGE_TEMPLATE_RENDER_FAILED]) self.assertIn("暂时不能安全执行", result["text"]) + @pytest.mark.implementation_mirror + def test_broken_fallback_uses_safe_fallback_message(self) -> None: templates = load_default_host_message_templates() broken = copy.deepcopy(templates) diff --git a/tests/test_runtime_router.py b/tests/test_runtime_router.py index bcd8a79..bcf80f3 100644 --- a/tests/test_runtime_router.py +++ b/tests/test_runtime_router.py @@ -1,6 +1,8 @@ # Test classification: contract from __future__ import annotations +import pytest + from tests.runtime_test_support import * @@ -100,6 +102,8 @@ def test_short_architecture_action_request_still_routes_to_workflow(self) -> Non self.assertEqual(route.route_name, "workflow") + @pytest.mark.implementation_mirror + def test_quick_fix_and_consult_output_hide_repo_local_runtime_wording(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: workspace = Path(temp_dir)