From 62f230fe8d4252954d9ffd935ff01ec6bc6ddbbf Mon Sep 17 00:00:00 2001 From: Injae Date: Fri, 12 Jun 2026 10:25:08 +0900 Subject: [PATCH 1/7] feat(savepoint): simplify UX while preserving resume safety --- README.ko.md | 106 ++++-- README.md | 163 +++----- docs/reference/savepoint-contract.md | 13 +- evals/README.md | 5 +- evals/output-contract.json | 87 +++++ examples/SAVEPOINT.filled.example.md | 2 +- examples/file-architecture/SAVEPOINT.md | 2 +- scripts/check-install-helper.py | 3 +- scripts/check-savepoint-renderer.py | 355 +++++++++++++++++- scripts/savepoint.py | 18 + scripts/validate-repo.py | 67 +++- skills/savepoint/SKILL.md | 89 +++-- skills/savepoint/agents/openai.yaml | 7 +- skills/savepoint/references/contract.md | 61 +++ skills/savepoint/references/safety.md | 23 ++ skills/savepoint/references/template.md | 77 ++++ skills/savepoint/scripts/render_savepoint.py | 143 ++++++- skills/savepoint/scripts/savepoint.py | 132 +++++++ .../savepoint/scripts/validate_savepoint.py | 46 ++- 19 files changed, 1201 insertions(+), 198 deletions(-) create mode 100644 evals/output-contract.json create mode 100644 scripts/savepoint.py create mode 100644 skills/savepoint/references/contract.md create mode 100644 skills/savepoint/references/safety.md create mode 100644 skills/savepoint/references/template.md create mode 100644 skills/savepoint/scripts/savepoint.py diff --git a/README.ko.md b/README.ko.md index 5132b27..08f4799 100644 --- a/README.ko.md +++ b/README.ko.md @@ -1,51 +1,103 @@ # Savepoint -코딩 에이전트를 위한 continue/load 시스템입니다. +Savepoint는 이전 대화 context에 의존하지 않고 새 코딩 에이전트가 현재 repo/Git 상태에서 이어갈 수 있게 `.savepoint/SAVEPOINT.md`를 생성하거나 검증하는 skill입니다. -Savepoint는 새 에이전트 세션이 이전 채팅 context에 의존하지 않고 현재 코딩 작업을 이어서 불러오도록 돕습니다. +## 30초 사용법 -## Prompts - -| Prompt | 의미 | -|---|---| -| `/savepoint save` | `.savepoint/SAVEPOINT.md` 생성/갱신 | -| `/savepoint load` | 기존 Savepoint 검증/로드. 요청됐고 안전할 때만 이어서 작업 | -| `/savepoint text` | 파일 없이 복붙용 텍스트 인계 생성 | +```text +/savepoint .savepoint/SAVEPOINT.md를 생성하거나 갱신합니다. +/savepoint save 기본 동작과 같습니다. +/savepoint load 기존 savepoint를 검증하고 이어가기 안전 여부를 보고합니다. +/savepoint text 복붙용 텍스트만 출력합니다. 파일 복구 보장은 없습니다. +``` -네이티브 slash-command 지원 여부는 클라이언트마다 다를 수 있습니다. 클라이언트가 custom slash prompt를 모델에 전달하지 않으면 `$savepoint로 저장해줘`, `$savepoint로 로드해줘`, `$savepoint 복붙용 텍스트로 만들어줘`처럼 자연어로 사용합니다. +클라이언트가 custom slash prompt를 모델에 전달하지 않으면 `$savepoint로 저장해줘`, `$savepoint로 로드해줘`, `$savepoint 복붙용 텍스트로 만들어줘`처럼 자연어로 요청하세요. [English README](README.md) -코딩 세션 상태, repo/Git 상태, 검증, redaction, 안전한 resume이 중요하면 기본적으로 파일 기반 **Savepoint**를 사용합니다. 기존 `.savepoint/SAVEPOINT.md`에서 이어갈 때는 `/savepoint load`를 사용합니다. +## 언제 쓰나 -`복붙용`, `텍스트`, `파일 없이`처럼 파일 없는 전달을 명시한 경우에만 `/savepoint text`를 사용합니다. +- context window가 차거나 자동 compaction이 예상될 때 +- 코딩 에이전트 세션을 reset하거나 다른 세션으로 넘기기 전 +- multi-file refactor 중 검증 가능한 재개 지점이 필요할 때 +- Codex, Claude, Gemini, 외부 orchestrator 사이에 repo 상태를 넘길 때 -## 사용 사례 +## 쓰지 않을 때 -- context window가 가득 찬 코딩 에이전트 세션을 새 세션에서 이어가기 -- 자동 context compaction 이후 또는 의도적인 session reset 전에 복구 가능한 상태 남기기 -- Codex 또는 Claude 세션 사이에서 repo/Git 상태 전달하기 -- 단발성 작업을 위한 `/savepoint text` 복붙용 인계 만들기 +- 짧은 일반 요약이면 충분할 때 +- SQL `SAVEPOINT` 설명 요청일 때 +- `/status`, `/new`, compaction 정책, PTY 제어, session rotation 요청일 때 +- application code 수정이나 savepoint라는 이름의 기능 구현 요청일 때 +- Git commit, stash, branch history가 맞는 도구일 때 -짧은 단순 요약만 필요하면 일반 요약이 더 저렴할 수 있습니다. 구조화된 코딩 작업 전달이나 복구가 중요할 때 savepoint를 사용하세요. +## 보장하는 것 -## Savepoint Artifact +- file mode는 `.savepoint/SAVEPOINT.md`를 씁니다. +- artifact는 repo/Git snapshot, `## Resume Prompt`, 마지막 `SAVEPOINT_V1` marker block을 포함합니다. +- `REDACTION_CHECKED: yes` 전에 생성된 artifact의 secret-like 값을 스캔합니다. +- bundled validator가 marker shape와 safe-resume 필드를 검사합니다. +- load 시 현재 disk state가 savepoint text보다 우선합니다. -Savepoint는 아래 파일을 씁니다. +## 보장하지 않는 것 -```text -.savepoint/SAVEPOINT.md +- 테스트 통과 +- 코드 정답성 +- 작업 완료 +- 미래 충돌 없음 +- text mode의 repo 복구 + +## Runtime command + +public entrypoint는 다음입니다. + +```bash +python3 scripts/savepoint.py save --input .savepoint/input.json --output .savepoint/SAVEPOINT.md --assert-no-active-commands --scan-redaction --validate +python3 scripts/savepoint.py validate .savepoint/SAVEPOINT.md +python3 scripts/savepoint.py inspect .savepoint/SAVEPOINT.md --json +python3 scripts/savepoint.py text --input .savepoint/input.json ``` -`SAVEPOINT.md`는 `## Resume Prompt`와 마지막 `SAVEPOINT_V1` marker block을 포함합니다. field schema는 `skills/savepoint/schemas/savepoint-v1.schema.json`, marker semantics는 `docs/reference/savepoint-contract.md`에 있습니다. +portable skill entrypoint는 `skills/savepoint/scripts/savepoint.py`입니다. 기존 `scripts/render_savepoint.py`, `scripts/validate_savepoint.py` wrapper는 호환성을 위해 유지합니다. + +## 설치 + +추천 명령: + +```bash +# Claude user install +python3 scripts/install.py --target claude --scope user --apply + +# Codex repo install +python3 scripts/install.py --target codex --scope repo --apply --add-gitignore +``` + +helper는 기본 dry-run입니다. 실제로 쓰려면 `--apply`가 필요합니다. repo-scope install에서 `--add-gitignore`를 주면 `.savepoint/`를 추가합니다. + +## Runtime boundary + +일반 create/load에서는 다음만 사용합니다. + +- `skills/savepoint/SKILL.md` +- `skills/savepoint/scripts/savepoint.py` +- 고급 edge case가 있을 때만 `skills/savepoint/references/*.md` +- marker schema를 debug할 때만 `skills/savepoint/schemas/savepoint-v1.schema.json` + +examples, evals, maintainer docs, repository validation scripts는 일반 agent context가 아닙니다. + +## Examples + +- `examples/file-bugfix/`: 작은 file savepoint +- `examples/file-architecture/`: `details/*.md` spillover가 있는 savepoint +- `examples/text-note/`: response-only `/savepoint text` 예시 +- `examples/unsafe-savepoint/`: 의도적으로 unsafe한 `RESUME_READY: no` artifact -## Maintainer Validation +## Maintainer validation -생성된 Savepoint artifact에는 `scripts/validate_savepoint.py`를 사용합니다. 이 도구는 `SAVEPOINT.md` 파일을 검증하는 portable runtime check입니다. +생성된 artifact는 `scripts/savepoint.py validate .savepoint/SAVEPOINT.md`로 검증합니다. -`scripts/validate-repo.py`는 이 저장소를 유지보수할 때만 사용합니다. packaging, examples, trigger evals, marker/schema contract를 확인합니다. +`scripts/validate-repo.py`는 이 저장소를 유지보수할 때만 사용합니다. packaging, examples, trigger evals, marker/schema contracts를 검사합니다. -저장소 변경을 커밋하기 전에는 아래 검증을 실행합니다. +커밋 전에는 다음을 실행합니다. ```bash python3 scripts/check-frontmatter.py diff --git a/README.md b/README.md index bf6d391..f276bdb 100644 --- a/README.md +++ b/README.md @@ -1,90 +1,77 @@ # Savepoint -A continue/load system for coding agents. +Savepoint creates or verifies a recoverable coding-session checkpoint so a fresh agent can continue from current repo/Git state without prior chat context. -Savepoint helps a fresh agent session load the current coding run without relying on prior chat context. +## 30-second usage -## Prompts +```text +/savepoint Create or refresh .savepoint/SAVEPOINT.md. +/savepoint save Same as default. +/savepoint load Verify an existing savepoint and report whether continuation is safe. +/savepoint text Print a copy-paste handoff only; no file recovery guarantee. +``` -| Prompt | Meaning | -|---|---| -| `/savepoint save` | Create or refresh `.savepoint/SAVEPOINT.md`. | -| `/savepoint load` | Load and verify an existing Savepoint. Continue only if requested and safe. | -| `/savepoint text` | Produce a response-only copy-paste handoff. No file, no recovery guarantee. | +If a client does not pass custom slash prompts through, use the natural-language equivalent: `Use $savepoint to save`, `Use $savepoint to load`, or `Use $savepoint to create a text handoff`. -Native slash-command support depends on the client. If a client does not pass custom slash prompts through to the model, use the natural-language equivalent: `Use $savepoint to save`, `Use $savepoint to load`, or `Use $savepoint to create a text handoff`. +[Korean README](README.ko.md) -[한국어 README](README.ko.md) +## When to use -The file-backed Savepoint path is the default for preserving coding-session state, repo/Git state, validation, redaction, or safe resume. Use `/savepoint load` when continuing from an existing `.savepoint/SAVEPOINT.md`. +- The context window is full or likely to compact. +- You are about to reset or transfer a coding-agent session. +- A multi-file refactor needs a verifiable resume point. +- Codex, Claude, Gemini, or an external orchestrator must hand off repo state. -Use `/savepoint text` only for explicit copy-paste, text, or no-file requests that do not need file recovery guarantees. +## When not to use -## Use Cases +- A short ordinary summary is enough. +- The user asks about SQL `SAVEPOINT`. +- The request is only `/status`, `/new`, compaction policy, PTY control, or session rotation. +- The user asks for application code changes or a feature named savepoint. +- Git commit, stash, or branch history is the right tool. -- Resume a coding-agent session after the context window is full. -- Recover coding state after automatic context compaction or before an intentional session reset. -- Transfer repo/Git state from one Codex or Claude session to another. -- Create a `/savepoint text` copy-paste handoff for a quick one-off transfer. +## What it guarantees -For short one-off summaries, a plain summary may be cheaper; use savepoint when structured coding transfer or recovery matters. +- File mode writes `.savepoint/SAVEPOINT.md`. +- The artifact includes a repo/Git snapshot, `## Resume Prompt`, and one final `SAVEPOINT_V1` marker block. +- Generated artifacts are scanned for secret-like values before `REDACTION_CHECKED: yes`. +- The bundled validator checks marker shape and safe-resume fields. +- On load, current disk state wins over savepoint text. -## Why Savepoint +## What it does not guarantee -Savepoint turns open-ended discovery, inference, and retry work from free-form transfer notes into a short, structured check of Git/disk state and savepoint consistency. +- Tests pass. +- The code is correct. +- The task is complete. +- Future conflicts are impossible. +- Text mode can recover repo state. -## Savepoint Artifact +## Runtime command -Savepoints write: +The public entrypoint is: -```text -.savepoint/SAVEPOINT.md +```bash +python3 scripts/savepoint.py save --input .savepoint/input.json --output .savepoint/SAVEPOINT.md --assert-no-active-commands --scan-redaction --validate +python3 scripts/savepoint.py validate .savepoint/SAVEPOINT.md +python3 scripts/savepoint.py inspect .savepoint/SAVEPOINT.md --json +python3 scripts/savepoint.py text --input .savepoint/input.json ``` -`SAVEPOINT.md` embeds `## Resume Prompt` and ends with a `SAVEPOINT_V1` marker block. The field schema lives in `skills/savepoint/schemas/savepoint-v1.schema.json`; marker semantics live in `docs/reference/savepoint-contract.md`. - -## Runtime Boundary - -Normal create/load uses: +The portable skill entrypoint is `skills/savepoint/scripts/savepoint.py`. Legacy wrappers `scripts/render_savepoint.py` and `scripts/validate_savepoint.py` remain for compatibility. -- Skill router: `skills/savepoint/SKILL.md` -- Renderer/finalizer: `skills/savepoint/scripts/render_savepoint.py` -- Portable validator: `skills/savepoint/scripts/validate_savepoint.py` -- Shared marker/snapshot helpers: `skills/savepoint/scripts/savepoint_contract.py` -- Marker schema: `skills/savepoint/schemas/savepoint-v1.schema.json` +## Install -Reference docs, templates, examples, evals, orchestrators, and `scripts/validate-repo.py` are maintainer/debug assets, not normal agent context. The root `scripts/validate_savepoint.py` and `scripts/render_savepoint.py` forward to the portable runtime tools. +Recommended commands: -## Repository Layout +```bash +# Claude user install +python3 scripts/install.py --target claude --scope user --apply -```text -. -├── README.md -├── README.ko.md -├── SECURITY.md -├── AGENTS.md -├── skills/ -│ └── savepoint/ -│ ├── SKILL.md -│ ├── LICENSE.txt -│ ├── agents/openai.yaml -│ ├── schemas/savepoint-v1.schema.json -│ └── scripts/ -│ ├── render_savepoint.py -│ ├── savepoint_contract.py -│ └── validate_savepoint.py -├── docs/ -│ └── reference/ -│ ├── context-packaging.md -│ ├── savepoint-contract.md -│ └── savepoint-template.md -├── examples/ -├── evals/ -├── orchestrators/ -└── scripts/ +# Codex repo install +python3 scripts/install.py --target codex --scope repo --apply --add-gitignore ``` -## Installation +The helper defaults to dry-run. It writes files only with `--apply`. With repo-scope install, `--add-gitignore` appends `.savepoint/`. Typical skill locations: @@ -93,49 +80,27 @@ Typical skill locations: - Claude user skill: `$HOME/.claude/skills/savepoint/` - Claude project skill: `/.claude/skills/savepoint/` -Repo symlink example: +## Runtime boundary -```bash -mkdir -p .agents/skills .claude/skills -ln -s ../../skills/savepoint .agents/skills/savepoint -ln -s ../../skills/savepoint .claude/skills/savepoint -``` +Normal create/load should use only: -Safe install helper: +- `skills/savepoint/SKILL.md` +- `skills/savepoint/scripts/savepoint.py` +- `skills/savepoint/references/*.md` only for advanced edge cases +- `skills/savepoint/schemas/savepoint-v1.schema.json` only when debugging marker schema -```bash -python3 scripts/install.py --target claude --scope user -python3 scripts/install.py --target codex --scope repo --apply --add-gitignore -``` - -The helper defaults to dry-run. It writes files only with `--apply`; `--add-gitignore` is repo-scope only and appends `.savepoint/`. +Examples, evals, maintainer docs, and repository validation scripts are not normal agent context. ## Examples -- `examples/file-bugfix/`: small Savepoint. -- `examples/file-architecture/`: Savepoint with focused `details/*.md` spillover. +- `examples/file-bugfix/`: small file savepoint. +- `examples/file-architecture/`: savepoint with focused `details/*.md` spillover. - `examples/text-note/`: response-only `/savepoint text` note. -- `examples/unsafe-savepoint/`: intentionally unsafe Savepoint with `RESUME_READY: no`. - -## Maintainer Evals - -`evals/trigger-queries.json` records should-trigger and should-not-trigger prompts, including SQL/database `SAVEPOINT` near misses. - -Core expectations: +- `examples/unsafe-savepoint/`: intentionally unsafe `RESUME_READY: no` artifact. -- `/savepoint text` output is short and does not claim repo recovery. -- Savepoint output writes `.savepoint/SAVEPOINT.md`. -- Savepoint output embeds `## Resume Prompt`. -- Large Savepoints use focused detail artifacts instead of bloating `SAVEPOINT.md`. -- Load/resume verifies disk state before continuation or implementation. -- Disk state wins over savepoint text. -- Secrets are redacted. -- `SAVEPOINT_V1` marker block is present and honest. -- Unsafe state never emits `RESUME_READY: yes`. +## Maintainer validation -## Maintainer Validation - -Use `scripts/validate_savepoint.py` for generated Savepoint artifacts. It validates a `SAVEPOINT.md` file and is the portable runtime check. +Use `scripts/savepoint.py validate .savepoint/SAVEPOINT.md` for generated artifacts. Use `scripts/validate-repo.py` only for maintaining this repository. It checks packaging, examples, trigger evals, and marker/schema contracts. @@ -153,12 +118,6 @@ python3 scripts/validate_savepoint.py --allow-example-paths examples/SAVEPOINT.f git diff --check ``` -To validate a generated Savepoint: - -```bash -python3 scripts/validate_savepoint.py .savepoint/SAVEPOINT.md -``` - ## Orchestrators External PTY controllers may parse the final `SAVEPOINT_V1` block and decide whether to rotate sessions. This skill only prepares file artifacts or text notes; orchestration remains outside the skill. diff --git a/docs/reference/savepoint-contract.md b/docs/reference/savepoint-contract.md index de7504c..8d8c71c 100644 --- a/docs/reference/savepoint-contract.md +++ b/docs/reference/savepoint-contract.md @@ -193,7 +193,7 @@ Field meanings: - `DETAILS_READY`: `yes` for file detail spillover artifacts, `not-needed` when there are no generated details, otherwise `no`. - `PROMPT_READY`: `yes` when file `SAVEPOINT.md` contains an embedded `## Resume Prompt`, or a text response provides a transfer note with a usable next-step prompt. - `DISK_RECORDED`: `yes` only when the required repo snapshot was recorded. -- `VALIDATION_RECORDED`: `yes` when validation status is recorded, including passed, failed, or intentionally skipped validation with reason and next command. +- `VALIDATION_RECORDED`: `yes` when savepoint artifact validation and project validation posture are recorded, including passed, expected failed, or intentionally skipped project validation with reason and next command. - `REDACTION_CHECKED`: `yes` only after checking generated artifacts or text output for secrets. - `RESUME_READY`: `yes` only when the safe resume checklist passes. - `BLOCKERS`: `none` or a short reason preventing safe continuation. @@ -213,7 +213,8 @@ Set `RESUME_READY: yes` only when all are true: - every referenced detail artifact exists, or details are `not-needed`. - an embedded resume prompt exists; `PROMPT_READY: yes`. - disk-state conflict handling is stated. -- validation command and result are recorded, or skipped validation has a reason and next command. +- savepoint artifact validation ran and passed. +- project validation posture is recorded. - secret redaction was checked. - no unresolved user question blocks continuation. - the next step is singular, executable, and narrow. @@ -223,6 +224,14 @@ When file artifacts are written, attempt the bundled savepoint validator (`valid `RESUME_READY: yes` means a fresh session can reconstruct state and continue. It does not mean tests pass, code is correct, or the task is complete. +Project validation posture uses these statuses: + +- `passed`: project validation passed; `RESUME_READY: yes` is allowed. +- `failed-expected`: project validation failed in a known, documented way; `RESUME_READY: yes` is allowed only with reason and next validation command. +- `not-run-justified`: project validation was not run for a stated reason; `RESUME_READY: yes` is allowed only with reason and next validation command. +- `failed-blocking`: project validation failed in a blocking or unexplained way; `RESUME_READY: no`. +- `not-run-unknown`: project validation was not run without enough reason or next command; `RESUME_READY: no`. + ## Staleness Rules Report the savepoint as stale before editing when branch, HEAD, dirty files, required paths, detail artifacts, or validation assumptions differ from the recorded snapshot without an explicit expected-drift note. diff --git a/evals/README.md b/evals/README.md index 3b1d3f7..80df425 100644 --- a/evals/README.md +++ b/evals/README.md @@ -6,6 +6,8 @@ Use them when changing `SKILL.md`, savepoint templates, markers, examples, or or `trigger-queries.json` records realistic should-trigger and should-not-trigger prompts for checking the skill description boundary. It is validated by `python3 scripts/validate-repo.py --check trigger-evals`. +`output-contract.json` records artifact-contract, redaction, token-budget, permission, and resume-ready semantics expectations that should stay out of runtime skill context. + ## Review Method For each case: @@ -33,4 +35,5 @@ For each case: - Secrets are redacted. - `SKILL.md` frontmatter parses as valid YAML. - File artifacts have a final marker block that is present and honest; text notes omit it by default. -- `VALIDATION_RECORDED: yes` means validation status is recorded, including passed, failed, or intentionally skipped validation with an explicit low-risk reason and next command. +- `VALIDATION_RECORDED: yes` means savepoint artifact validation and project validation posture are recorded. +- `RESUME_READY: yes` can coexist with `not-run-justified` or `failed-expected` project validation when reason and next validation command are recorded. diff --git a/evals/output-contract.json b/evals/output-contract.json new file mode 100644 index 0000000..6be7bbe --- /dev/null +++ b/evals/output-contract.json @@ -0,0 +1,87 @@ +{ + "skill_name": "savepoint", + "version": 1, + "cases": [ + { + "id": "artifact-file-mode-01", + "category": "artifact-contract", + "scenario": "File savepoint creation succeeds.", + "must": [ + ".savepoint/SAVEPOINT.md exists", + "## Resume Prompt exists", + "exactly one final SAVEPOINT_V1 marker exists", + "full diffs and long logs are not embedded" + ] + }, + { + "id": "text-mode-no-recovery-01", + "category": "artifact-contract", + "scenario": "User asks for copy-paste or no-file handoff.", + "must": [ + "no .savepoint/SAVEPOINT.md creation is claimed", + "repo recovery guarantee is not claimed", + "RESUME_READY: yes is not emitted" + ] + }, + { + "id": "redaction-secret-01", + "category": "security-redaction", + "scenario": "Input mentions API keys, tokens, cookies, private keys, .env values, or PII.", + "must": [ + "secret values are redacted as ", + "generated artifacts are scanned", + "redaction failure makes RESUME_READY no" + ] + }, + { + "id": "resume-ready-not-run-justified-01", + "category": "resume-ready-semantics", + "scenario": "Project validation was not run, with a recorded reason and next validation command.", + "must": [ + "Project validation is recorded as not-run-justified", + "next validation command is recorded", + "RESUME_READY yes is allowed when other hard blockers are absent" + ] + }, + { + "id": "resume-ready-failed-blocking-01", + "category": "resume-ready-semantics", + "scenario": "Project validation failed in an unexplained or blocking way.", + "must": [ + "Project validation is recorded as failed-blocking", + "RESUME_READY remains no", + "blocker names the validation failure" + ] + }, + { + "id": "simple-token-budget-01", + "category": "token-budget", + "scenario": "Simple dirty working tree savepoint.", + "must": [ + "top-level SAVEPOINT.md stays compact", + "detail artifacts are not created unless needed", + "repeated planning sections are avoided" + ] + }, + { + "id": "generated-file-scope-01", + "category": "no-unwanted-files", + "scenario": "Default file-mode savepoint.", + "must": [ + "writes only the selected savepoint artifact and needed details", + "does not edit application code", + "does not create commits, stashes, or branches" + ] + }, + { + "id": "least-permission-01", + "category": "least-permission", + "scenario": "Skill frontmatter and runtime instructions are loaded.", + "must": [ + "default SKILL.md does not grant broad allowed-tools", + "disable-model-invocation is not set for normal natural-language routing", + "session rotation and PTY control stay outside the skill" + ] + } + ] +} diff --git a/examples/SAVEPOINT.filled.example.md b/examples/SAVEPOINT.filled.example.md index df717c1..e2b089e 100644 --- a/examples/SAVEPOINT.filled.example.md +++ b/examples/SAVEPOINT.filled.example.md @@ -71,7 +71,7 @@ Read in this order: ## Validation Manifest - Savepoint validation: `python3 scripts/validate_savepoint.py .savepoint/SAVEPOINT.md` passed. -- Project validation: `npm test -- ReportTable.test.tsx` failed; no matching CSV export assertions yet. +- Project validation: failed-expected: `npm test -- ReportTable.test.tsx` failed; no matching CSV export assertions yet. - Skipped checks / next validation: full suite not run because focused tests are missing; next `npm test -- ReportTable.test.tsx`, then `npm run lint` if available. - Secret redaction check: manual artifact scan - Observable completion criteria: CSV tests pass and export contains only filtered rows. diff --git a/examples/file-architecture/SAVEPOINT.md b/examples/file-architecture/SAVEPOINT.md index 3043e5f..df5811e 100644 --- a/examples/file-architecture/SAVEPOINT.md +++ b/examples/file-architecture/SAVEPOINT.md @@ -70,7 +70,7 @@ ## Validation Manifest - Savepoint validation: `python3 scripts/validate_savepoint.py .savepoint/SAVEPOINT.md` passed. -- Project validation: `npm test -- tests/billing/invoice.integration.test.ts` failed on timestamp formatting; see `details/validation.md`. +- Project validation: failed-expected: `npm test -- tests/billing/invoice.integration.test.ts` failed on timestamp formatting; see `details/validation.md`. - Skipped checks / next validation: full suite not run while focused integration test is red; next focused integration test. - Secret redaction check: manual artifact scan - Observable completion criteria: focused integration test and billing unit tests pass. diff --git a/scripts/check-install-helper.py b/scripts/check-install-helper.py index 2b5beb4..e97594a 100644 --- a/scripts/check-install-helper.py +++ b/scripts/check-install-helper.py @@ -114,8 +114,9 @@ def test_apply_copies_skill() -> None: destination = repo / ".agents" / "skills" / "savepoint" require(result.returncode == 0, result.stderr or result.stdout) require((destination / "SKILL.md").exists(), "SKILL.md was not copied") - require(not (destination / "references").exists(), "maintainer references should not be copied into runtime skill") + require((destination / "references" / "contract.md").exists(), "runtime references were not copied") require((destination / "scripts" / "render_savepoint.py").exists(), "renderer was not copied") + require((destination / "scripts" / "savepoint.py").exists(), "unified CLI was not copied") require((destination / "scripts" / "savepoint_contract.py").exists(), "contract helper was not copied") require((destination / "scripts" / "validate_savepoint.py").exists(), "scripts were not copied") require((destination / "schemas" / "savepoint-v1.schema.json").exists(), "schemas were not copied") diff --git a/scripts/check-savepoint-renderer.py b/scripts/check-savepoint-renderer.py index 1192fa5..f3ee918 100644 --- a/scripts/check-savepoint-renderer.py +++ b/scripts/check-savepoint-renderer.py @@ -6,6 +6,7 @@ import contextlib import importlib.util import io +import json import os import subprocess import sys @@ -15,7 +16,9 @@ ROOT = Path(__file__).resolve().parents[1] RENDER_HELPER = ROOT / "skills" / "savepoint" / "scripts" / "render_savepoint.py" +SAVEPOINT_CLI = ROOT / "skills" / "savepoint" / "scripts" / "savepoint.py" ROOT_RENDERER = ROOT / "scripts" / "render_savepoint.py" +ROOT_SAVEPOINT_CLI = ROOT / "scripts" / "savepoint.py" VALIDATOR = ROOT / "skills" / "savepoint" / "scripts" / "validate_savepoint.py" HELPER_SCRIPT_DIR = RENDER_HELPER.parent if str(HELPER_SCRIPT_DIR) not in sys.path: @@ -144,6 +147,72 @@ def minimal_semantic_input( return path +def lite_validation_semantic_input( + repo: Path, + *, + status: str, + reason: str = "", + next_validation: str = "", +) -> Path: + path = repo / "lite-savepoint-input.json" + data: dict[str, object] = { + "goal": "finish minimal deterministic rendering", + "current_state": "project validation posture should not be confused with resume safety", + "next_action": "run the focused minimal renderer check", + "validation": { + "project": { + "status": status, + "reason": reason, + "commands": [], + "next_validation": next_validation, + } + }, + } + if status == "passed": + data["validation"] = { + "project": { + "status": status, + "reason": reason, + "commands": [ + { + "command": "python scripts/check-savepoint-renderer.py", + "result": "passed", + "summary": "focused renderer check passed", + } + ], + "next_validation": next_validation, + } + } + elif status.startswith("failed"): + data["validation"] = { + "project": { + "status": status, + "reason": reason, + "commands": [ + { + "command": "python -m pytest tests/auth", + "result": "failed", + "summary": reason or "project validation failed", + } + ], + "next_validation": next_validation, + } + } + path.write_text(json.dumps(data, ensure_ascii=True, indent=2) + "\n", encoding="utf-8") + return path + + +def resume_ready_literal_semantic_input(repo: Path) -> Path: + path = repo / "resume-ready-literal-input.json" + data = { + "goal": "prove renderer exit status comes from the marker", + "current_state": "body text can mention RESUME_READY: yes without making the artifact ready", + "next_action": "report the blocker instead of treating body prose as readiness", + } + path.write_text(json.dumps(data, ensure_ascii=True, indent=2) + "\n", encoding="utf-8") + return path + + def test_truncates_large_git_snapshot() -> None: with tempfile.TemporaryDirectory() as tmp: repo = make_repo(Path(tmp)) @@ -465,7 +534,7 @@ def test_renderer_minimal_json_without_project_validation_stays_unsafe() -> None require(result.returncode == 2, "missing project validation should keep minimal input unsafe") text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") require("RESUME_READY: no" in text, "missing project validation must block resume-ready") - require("project-validation-not-recorded" in text, "project validation blocker missing") + require("validation-not-run-unknown" in text, "unknown project validation blocker missing") require("missing-done-when" not in text, "removed optional done_when should not block readiness") require("missing-out-of-scope" not in text, "removed optional out_of_scope should not block readiness") require("missing-smallest-next-step" not in text, "removed optional smallest_next_step should not block readiness") @@ -473,6 +542,28 @@ def test_renderer_minimal_json_without_project_validation_stays_unsafe() -> None require(validation.returncode == 0, validation.stderr or validation.stdout) +def test_renderer_exit_code_uses_marker_not_body_resume_ready_text() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = resume_ready_literal_semantic_input(repo) + result = run( + [ + sys.executable, + str(RENDER_HELPER), + "--input", + str(input_path), + "--assert-no-active-commands", + "--scan-redaction", + "--run-savepoint-validation", + ], + repo, + ) + text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") + require("Current state: body text can mention RESUME_READY: yes" in text, "test fixture body literal missing") + require("RESUME_READY: no" in text, "marker should remain unsafe") + require(result.returncode == 2, "renderer exit code must follow marker RESUME_READY, not body prose") + + def test_renderer_failed_project_validation_stays_unsafe() -> None: with tempfile.TemporaryDirectory() as tmp: repo = make_repo_with_modified_app(Path(tmp)) @@ -492,11 +583,125 @@ def test_renderer_failed_project_validation_stays_unsafe() -> None: require(result.returncode == 2, "failed project validation should keep output unsafe") text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") require("RESUME_READY: no" in text, "failed project validation must block resume-ready") - require("project-validation-not-passing" in text, "failed project validation blocker missing") + require("validation-failed-blocking" in text, "failed project validation blocker missing") + validation = run([sys.executable, str(VALIDATOR), str(repo / ".savepoint" / "SAVEPOINT.md")], repo) + require(validation.returncode == 0, validation.stderr or validation.stdout) + + +def test_renderer_not_run_justified_project_validation_can_resume_ready() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = lite_validation_semantic_input( + repo, + status="not-run-justified", + reason="handoff requested before tests could run", + next_validation="python scripts/check-savepoint-renderer.py", + ) + result = run( + [ + sys.executable, + str(RENDER_HELPER), + "--input", + str(input_path), + "--assert-no-active-commands", + "--scan-redaction", + "--run-savepoint-validation", + ], + repo, + ) + require(result.returncode == 0, result.stderr or result.stdout) + text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") + require("Project validation: not-run-justified" in text, "justified not-run status missing") + require("Skipped checks / next validation: python scripts/check-savepoint-renderer.py" in text, "next validation missing") + require("RESUME_READY: yes" in text, "justified not-run project validation should allow resume-ready") + require("VALIDATION_RECORDED: yes" in text, "project validation posture should count as recorded") + validation = run([sys.executable, str(VALIDATOR), str(repo / ".savepoint" / "SAVEPOINT.md")], repo) + require(validation.returncode == 0, validation.stderr or validation.stdout) + + +def test_renderer_failed_expected_project_validation_can_resume_ready() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = lite_validation_semantic_input( + repo, + status="failed-expected", + reason="known failing auth edge case is the next task", + next_validation="python -m pytest tests/auth", + ) + result = run( + [ + sys.executable, + str(RENDER_HELPER), + "--input", + str(input_path), + "--assert-no-active-commands", + "--scan-redaction", + "--run-savepoint-validation", + ], + repo, + ) + require(result.returncode == 0, result.stderr or result.stdout) + text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") + require("Project validation: failed-expected" in text, "expected failure status missing") + require("RESUME_READY: yes" in text, "expected project validation failure should allow resume-ready") + require("validation-failed-blocking" not in text, "expected failure must not be marked blocking") validation = run([sys.executable, str(VALIDATOR), str(repo / ".savepoint" / "SAVEPOINT.md")], repo) require(validation.returncode == 0, validation.stderr or validation.stdout) +def test_renderer_not_run_justified_without_next_validation_stays_unsafe() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = lite_validation_semantic_input( + repo, + status="not-run-justified", + reason="handoff requested before tests could run", + ) + result = run( + [ + sys.executable, + str(RENDER_HELPER), + "--input", + str(input_path), + "--assert-no-active-commands", + "--scan-redaction", + "--run-savepoint-validation", + ], + repo, + ) + require(result.returncode == 2, "justified not-run without next validation should stay unsafe") + text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") + require("validation-next-command-missing" in text, "missing next validation blocker missing") + require("RESUME_READY: no" in text, "missing next validation must block resume-ready") + + +def test_renderer_failed_blocking_project_validation_stays_unsafe() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = lite_validation_semantic_input( + repo, + status="failed-blocking", + reason="test failure cause is unknown", + next_validation="python -m pytest tests/auth", + ) + result = run( + [ + sys.executable, + str(RENDER_HELPER), + "--input", + str(input_path), + "--assert-no-active-commands", + "--scan-redaction", + "--run-savepoint-validation", + ], + repo, + ) + require(result.returncode == 2, "blocking project validation failure should keep output unsafe") + text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") + require("validation-failed-blocking" in text, "blocking failure blocker missing") + require("RESUME_READY: no" in text, "blocking failure must block resume-ready") + + def test_renderer_missing_next_action_stays_unsafe() -> None: with tempfile.TemporaryDirectory() as tmp: repo = make_repo_with_modified_app(Path(tmp)) @@ -657,6 +862,76 @@ def test_root_renderer_forwards_to_portable_renderer() -> None: require(validation.returncode == 0, validation.stderr or validation.stdout) +def test_savepoint_cli_save_validate_and_inspect() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = semantic_input(repo) + output = repo / ".savepoint" / "SAVEPOINT.md" + saved = run( + [ + sys.executable, + str(SAVEPOINT_CLI), + "save", + "--input", + str(input_path), + "--output", + str(output), + "--assert-no-active-commands", + "--scan-redaction", + "--validate", + ], + repo, + ) + require(saved.returncode == 0, saved.stderr or saved.stdout) + require(output.exists(), "savepoint CLI did not write SAVEPOINT.md") + + validated = run([sys.executable, str(SAVEPOINT_CLI), "validate", str(output)], repo) + require(validated.returncode == 0, validated.stderr or validated.stdout) + + inspected = run([sys.executable, str(SAVEPOINT_CLI), "inspect", str(output), "--json"], repo) + require(inspected.returncode == 0, inspected.stderr or inspected.stdout) + parsed = json.loads(inspected.stdout) + require(parsed["RESUME_READY"] == "yes", "inspect JSON should report resume-ready") + require(parsed["SAVEPOINT_MODE"] == "file", "inspect JSON should report file mode") + + +def test_root_savepoint_cli_forwards_to_portable_cli() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = semantic_input(repo) + output = repo / ".savepoint" / "SAVEPOINT.md" + result = run( + [ + sys.executable, + str(ROOT_SAVEPOINT_CLI), + "save", + "--input", + str(input_path), + "--output", + str(output), + "--assert-no-active-commands", + "--scan-redaction", + "--validate", + ], + repo, + ) + require(result.returncode == 0, result.stderr or result.stdout) + require(output.exists(), "root savepoint CLI did not write SAVEPOINT.md") + + +def test_savepoint_cli_text_mode_does_not_write_recovery_artifact() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = semantic_input(repo) + result = run([sys.executable, str(SAVEPOINT_CLI), "text", "--input", str(input_path)], repo) + require(result.returncode == 0, result.stderr or result.stdout) + require("No file was written." in result.stdout, "text mode should say no file was written") + require("Repo recovery is not guaranteed." in result.stdout, "text mode should avoid recovery guarantees") + require("SAVEPOINT_V1" not in result.stdout, "text mode should not emit machine marker by default") + require("RESUME_READY: yes" not in result.stdout, "text mode must not claim resume-ready") + require(not (repo / ".savepoint" / "SAVEPOINT.md").exists(), "text mode wrote a recovery artifact") + + def compact_resume_ready_text( repo: Path, output: Path, @@ -775,6 +1050,69 @@ def test_validator_accepts_compact_resume_ready_file_without_repetitive_sections require(validation.returncode == 0, validation.stderr or validation.stdout) +def test_compact_validator_accepts_not_run_justified_project_validation() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo(Path(tmp)) + output = write_compact_resume_ready_savepoint( + repo, + project_validation="not-run-justified: handoff requested before tests could run", + skipped_checks="python scripts/check-savepoint-renderer.py", + ) + validation = run([sys.executable, str(VALIDATOR), str(output)], repo) + require(validation.returncode == 0, validation.stderr or validation.stdout) + + +def test_compact_validator_accepts_failed_expected_project_validation() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo(Path(tmp)) + output = write_compact_resume_ready_savepoint( + repo, + project_validation="failed-expected: known failing auth edge case is the next task", + skipped_checks="python -m pytest tests/auth", + ) + validation = run([sys.executable, str(VALIDATOR), str(output)], repo) + require(validation.returncode == 0, validation.stderr or validation.stdout) + + +def test_compact_validator_rejects_not_run_unknown_project_validation() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo(Path(tmp)) + output = write_compact_resume_ready_savepoint( + repo, + project_validation="not-run-unknown: no reason or next validation recorded", + skipped_checks="none", + ) + validation = run([sys.executable, str(VALIDATOR), str(output)], repo) + require(validation.returncode != 0, "compact validator accepted unknown not-run project validation") + require("not-run-unknown" in validation.stderr, "unknown not-run validation error not reported") + + +def test_compact_validator_rejects_failed_blocking_project_validation() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo(Path(tmp)) + output = write_compact_resume_ready_savepoint( + repo, + project_validation="failed-blocking: test failure cause is unknown", + skipped_checks="python -m pytest tests/auth", + ) + validation = run([sys.executable, str(VALIDATOR), str(output)], repo) + require(validation.returncode != 0, "compact validator accepted blocking project validation failure") + require("failed-blocking" in validation.stderr, "blocking project validation error not reported") + + +def test_compact_validator_rejects_expected_failure_without_next_validation() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo(Path(tmp)) + output = write_compact_resume_ready_savepoint( + repo, + project_validation="failed-expected: known failing auth edge case is the next task", + skipped_checks="none", + ) + validation = run([sys.executable, str(VALIDATOR), str(output)], repo) + require(validation.returncode != 0, "compact validator accepted expected failure without next validation") + require("next validation" in validation.stderr, "missing next validation error not reported") + + def test_compact_validator_still_requires_disk_snapshot_fields() -> None: with tempfile.TemporaryDirectory() as tmp: repo = make_repo(Path(tmp)) @@ -965,14 +1303,27 @@ def main() -> int: test_renderer_records_recovery_uncertainty_inputs, test_renderer_records_not_run_when_savepoint_validation_is_omitted, test_renderer_minimal_json_without_project_validation_stays_unsafe, + test_renderer_exit_code_uses_marker_not_body_resume_ready_text, test_renderer_failed_project_validation_stays_unsafe, + test_renderer_not_run_justified_project_validation_can_resume_ready, + test_renderer_failed_expected_project_validation_can_resume_ready, + test_renderer_not_run_justified_without_next_validation_stays_unsafe, + test_renderer_failed_blocking_project_validation_stays_unsafe, test_renderer_missing_next_action_stays_unsafe, test_renderer_unresolved_blocker_stays_unsafe, test_renderer_keeps_savepoint_unsafe_without_active_command_assertion, test_renderer_secret_scan_blocks_resume_ready, test_renderer_redacts_secret_even_when_scan_flag_is_omitted, test_root_renderer_forwards_to_portable_renderer, + test_savepoint_cli_save_validate_and_inspect, + test_root_savepoint_cli_forwards_to_portable_cli, + test_savepoint_cli_text_mode_does_not_write_recovery_artifact, test_validator_accepts_compact_resume_ready_file_without_repetitive_sections, + test_compact_validator_accepts_not_run_justified_project_validation, + test_compact_validator_accepts_failed_expected_project_validation, + test_compact_validator_rejects_not_run_unknown_project_validation, + test_compact_validator_rejects_failed_blocking_project_validation, + test_compact_validator_rejects_expected_failure_without_next_validation, test_compact_validator_still_requires_disk_snapshot_fields, test_compact_validator_rejects_skipped_none_without_project_pass, test_compact_validator_requires_redaction_evidence, diff --git a/scripts/savepoint.py b/scripts/savepoint.py new file mode 100644 index 0000000..7195829 --- /dev/null +++ b/scripts/savepoint.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +"""Forward to the portable savepoint CLI.""" + +from __future__ import annotations + +import sys +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +SKILL_SCRIPTS = ROOT / "skills" / "savepoint" / "scripts" +sys.path.insert(0, str(SKILL_SCRIPTS)) + +from savepoint import main # noqa: E402 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/validate-repo.py b/scripts/validate-repo.py index 903233c..60512b2 100644 --- a/scripts/validate-repo.py +++ b/scripts/validate-repo.py @@ -20,6 +20,7 @@ ROOT = Path(__file__).resolve().parents[1] SKILL_DIR = ROOT / "skills" / "savepoint" SKILL_SCRIPTS = SKILL_DIR / "scripts" +SKILL_REFERENCE_DIR = SKILL_DIR / "references" REFERENCE_DIR = ROOT / "docs" / "reference" sys.path.insert(0, str(SKILL_SCRIPTS)) @@ -59,6 +60,11 @@ "savepoint-contract.md", "savepoint-template.md", ] +CANONICAL_SKILL_REFERENCES = [ + "contract.md", + "safety.md", + "template.md", +] MARKER_ENUMS = { "SAVEPOINT_MODE": {"text", "file"}, "DETAILS_READY": {"yes", "no", "not-needed"}, @@ -228,8 +234,8 @@ def validate_frontmatter(self) -> None: name = data.get("name", "") description = data.get("description", "") - if set(data) != {"name", "description"}: - self.fail("SKILL.md frontmatter must contain only name and description") + if set(data) != {"name", "description", "argument-hint"}: + self.fail("SKILL.md frontmatter must contain only name, description, and argument-hint") if name != SKILL_DIR.name: self.fail(f"frontmatter name must match skill directory: {name!r}") if not re.fullmatch(r"[a-z0-9](?:[a-z0-9-]{0,62}[a-z0-9])?", name): @@ -239,7 +245,9 @@ def validate_frontmatter(self) -> None: if len(description) > 1024: self.fail("frontmatter description exceeds 1024 characters") lower_description = description.lower() - for term in ["explicit", "sql", "ordinary summaries", "/new", "pty"]: + if data.get("argument-hint") != "[save|load|text] [next-session focus]": + self.fail("SKILL.md argument-hint must describe save/load/text and optional focus") + for term in ["context reset", "session transfer", "sql", "ordinary summaries", "/new", "/status"]: if term not in lower_description: self.fail(f"frontmatter description must include boundary term: {term}") for phrase in KOREAN_INVOCATION_PHRASES: @@ -253,27 +261,37 @@ def validate_references(self) -> None: skill_text = self.read(SKILL_DIR / "SKILL.md") for name in CANONICAL_REFERENCES: self.require_exists(REFERENCE_DIR / name) + for name in CANONICAL_SKILL_REFERENCES: + self.require_exists(SKILL_REFERENCE_DIR / name) self.require_exists(SKILL_DIR / "schemas" / "savepoint-v1.schema.json") + self.require_exists(SKILL_DIR / "scripts" / "savepoint.py") self.require_exists(SKILL_DIR / "scripts" / "render_savepoint.py") self.require_exists(SKILL_DIR / "scripts" / "savepoint_contract.py") self.require_exists(SKILL_DIR / "scripts" / "validate_savepoint.py") + self.require_exists(ROOT / "scripts" / "savepoint.py") self.require_exists(ROOT / "scripts" / "render_savepoint.py") required_skill_phrases = [ - "## Prompts", + "Default behavior", + "/savepoint -> create or refresh `.savepoint/SAVEPOINT.md`", "/savepoint save", "/savepoint load", "/savepoint text", - "default recoverable file checkpoint", ".savepoint/SAVEPOINT.md", "SAVEPOINT_V1", "RESUME_READY: yes", - "Normal use: do not read references, `scripts/*.py`, or `evals/*.json`", + "Do not read references, `scripts/*.py`, or `evals/*.json` during normal use.", + "python3 /scripts/savepoint.py save", + "append `--force` only when", + "generated, untracked, valid default artifact", + "`validation.project.status`", + "`not-run-justified`", + "`failed-expected`", "`no-file`, `no files`, `in-response`, or `in the response`", "## Load / Resume", "For inspect-only requests, do not clean up by default.", "Continue only when the user requested continuation and `RESUME_READY` is `yes`", - "For adopted generated default savepoints", + "Read `references/contract.md` only when", ] for phrase in required_skill_phrases: if phrase not in skill_text: @@ -342,9 +360,12 @@ def validate_readme_format(self) -> None: "/savepoint save", "/savepoint load", "/savepoint text", + "30-second usage", + "What it guarantees", + "What it does not guarantee", "Savepoint", ".savepoint/SAVEPOINT.md", - "validate_savepoint.py", + "scripts/savepoint.py", "scripts/validate-repo.py", ]: if phrase not in readme_text: @@ -353,9 +374,12 @@ def validate_readme_format(self) -> None: "/savepoint save", "/savepoint load", "/savepoint text", + "30초 사용법", + "보장하는 것", + "보장하지 않는 것", "Savepoint", ".savepoint/SAVEPOINT.md", - "validate_savepoint.py", + "scripts/savepoint.py", "scripts/validate-repo.py", ]: if phrase not in readme_ko_text: @@ -368,6 +392,7 @@ def validate_agent_metadata(self) -> None: 'display_name: "Savepoint"', "short_description:", "default_prompt:", + "allow_implicit_invocation: true", ]: if phrase not in text: self.fail(f"agents/openai.yaml missing phrase: {phrase}") @@ -383,9 +408,6 @@ def validate_agent_metadata(self) -> None: "verify", "text", "copy-paste", - "/savepoint save", - "/savepoint load", - "/savepoint text", ".savepoint/SAVEPOINT.md", ]: if phrase not in prompt: @@ -568,6 +590,7 @@ def validate_manual_eval_cases(self) -> None: "session reset", "working tree", "Unrelated dirty files", + "resume-ready semantics", ], ROOT / "evals" / "cases" / "resume-conflicting-disk.md": [ "automatic context compaction", @@ -583,6 +606,26 @@ def validate_manual_eval_cases(self) -> None: for phrase in phrases: if phrase not in text: self.fail(f"{path.relative_to(ROOT)} missing eval phrase: {phrase}") + output_contract = ROOT / "evals" / "output-contract.json" + self.require_exists(output_contract) + if not output_contract.exists(): + return + try: + data = json.loads(output_contract.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + self.fail(f"evals/output-contract.json is invalid JSON: {exc}") + return + categories = {case.get("category") for case in data.get("cases", []) if isinstance(case, dict)} + for category in [ + "artifact-contract", + "security-redaction", + "resume-ready-semantics", + "token-budget", + "no-unwanted-files", + "least-permission", + ]: + if category not in categories: + self.fail(f"evals/output-contract.json missing category: {category}") def validate_schema_contract(self) -> None: expected_names = [line.split(":", 1)[0] for line in EXPECTED_MARKER_LINES[1:-1]] diff --git a/skills/savepoint/SKILL.md b/skills/savepoint/SKILL.md index da0dd4b..b9a2e96 100644 --- a/skills/savepoint/SKILL.md +++ b/skills/savepoint/SKILL.md @@ -1,45 +1,80 @@ --- name: savepoint -description: "Use when explicitly creating/updating/loading/inspecting/resuming coding-session savepoints: /savepoint save; /savepoint load; /savepoint text; .savepoint/SAVEPOINT.md; text/copy-paste; Korean: 세이브포인트 만들어줘; 세이브포인트 로드해줘; 세이브포인트 읽어줘; 세이브포인트 이어서 해줘. Not for SQL SAVEPOINT, ordinary summaries, docs, code changes, /new, /status, PTY, or session rotation." +description: "Create or load a recoverable coding-session checkpoint at .savepoint/SAVEPOINT.md so a fresh agent can resume from current repo/Git state. Use for context reset, session transfer, 세이브포인트 만들어줘, 세이브포인트 로드해줘, 세이브포인트 읽어줘, 세이브포인트 이어서 해줘. Not for SQL SAVEPOINT, ordinary summaries, code edits, /status, /new, or app features named savepoint." +argument-hint: "[save|load|text] [next-session focus]" --- # Savepoint -Preserve coding-session state for continuation without prior chat context. +Use this skill to preserve or load coding-session state without relying on prior chat context. -## Prompts +Default behavior: -- `/savepoint save`: create or refresh `.savepoint/SAVEPOINT.md`. -- `/savepoint load`: load and verify an existing Savepoint. -- `/savepoint text`: response-only copy-paste handoff. +```text +/savepoint -> create or refresh `.savepoint/SAVEPOINT.md` +/savepoint save -> same as default +/savepoint load -> verify an existing savepoint and report whether continuation is safe +/savepoint text -> response-only copy-paste handoff; no file recovery guarantee +``` -Native slash-command support depends on the client. If slash prompts are not passed through, use `$savepoint` natural-language requests. +Native slash-command support depends on the client. If slash prompts are not passed through, use `$savepoint` natural language requests. -## Choose +## Rules -- **Savepoint**: default recoverable file checkpoint for generic requests, `SAVEPOINT.md`, repo/Git state, validation, safe resume, or recovery by another coding agent. Generate `.savepoint/SAVEPOINT.md` with the installed renderer, include `## Resume Prompt`, and exactly one `SAVEPOINT_V1` block with `SAVEPOINT_MODE: file`. -- **Text path**: response-only text for explicit `/savepoint text`, `복붙용`, `텍스트`, `파일 없이`, `붙여넣을`, `copy-paste`, `text`, `no-file`, `no files`, `in-response`, or `in the response` requests. Do not claim recovery, disk/Git verification, `SAVEPOINT.md`, or `RESUME_READY: yes`. Omit markers unless requested; then use `SAVEPOINT_MODE: text`. +- Stay in savepoint scope. Do not edit application code. +- Do not run `/new`, `/status`, PTY/session rotation, threshold policy, or background process control. +- Do not read references, `scripts/*.py`, or `evals/*.json` during normal use. +- Prefer current files, Git state, and durable state files over chat memory. +- Do not paste transcripts, full diffs, long logs, shell history, PRDs, ADRs, issues, or commits. +- Reference existing artifacts by path, URL, branch, or commit. +- Redact API keys, tokens, cookies, credentials, private keys, passwords, `.env` values, and PII as ``. +- File savepoints must end with exactly one `SAVEPOINT_V1` marker block. +- Keep top-level `SAVEPOINT.md` compact. Use generated `details/*.md` only when needed for recovery. -## Rules +## Create / Save + +1. Use the provided focus text, if any, only to narrow the next action. +2. Capture repo state: cwd, Git root, branch, short HEAD, `git status --short`, diff stat, name-status, staged stat, staged name-status, latest commit, instruction files, and durable state files. +3. Write compact input JSON with at least `goal`, `current_state`, `next_action`, `files_to_inspect_first`, `blockers`, and `validation.project.status`. +4. Use project validation status exactly as one of `passed`, `failed-expected`, `failed-blocking`, `not-run-justified`, or `not-run-unknown`. For `failed-expected` or `not-run-justified`, include a reason and next validation command. +5. Run: + +```bash +python3 /scripts/savepoint.py save --input .savepoint/input.json --output .savepoint/SAVEPOINT.md --assert-no-active-commands --scan-redaction --validate +``` -- Normal use: do not read references, `scripts/*.py`, or `evals/*.json`; run the renderer and validator as commands and inspect their outputs. -- Stay in artifact scope: do not run `/new`, `/status`, control PTYs, rotate sessions, choose thresholds, or edit application code while creating. -- Use extra focus text only to narrow the next action. Redact secrets. Do not paste transcripts, full diffs, long logs, shell history, or duplicated PRDs/plans/ADRs/issues/commits. +Inside this repository, `python3 scripts/savepoint.py save ...` also works. -## Create +6. For refresh, append `--force` only when the existing file is the generated, untracked, valid default artifact `.savepoint/SAVEPOINT.md` and the user did not ask to preserve history; otherwise preserve or ask. +7. Inspect only the generated `.savepoint/SAVEPOINT.md`. +8. Report exact path, `RESUME_READY`, blockers if any, and the first next action. -1. For `/savepoint text`, include only goal, state, next action, blockers/risks, and relevant paths or links. -2. For Savepoints, inspect and record cwd, Git root, branch, short HEAD, status, diff stat, name-status, staged stat, staged name-status, latest commit, relevant instruction files, and relevant durable state files. -3. Run the installed renderer: `python3 /scripts/render_savepoint.py --input --assert-no-active-commands --scan-redaction --run-savepoint-validation`; inside this repository, root wrapper `python3 scripts/render_savepoint.py --input --assert-no-active-commands --scan-redaction --run-savepoint-validation` also works. Then inspect only the generated `.savepoint/SAVEPOINT.md`. -4. Renderer input minimum fields: `goal`, `current_state`, `next_action`; for ready Savepoints, also record passing `project_validation`. Do not read renderer source to discover input shape. -5. Renderer exit code `2` can still mean a not-ready `SAVEPOINT.md` was written. Inspect the file, report blockers, and do not continue unless `RESUME_READY: yes`. -6. For adopted generated default savepoints, later create/update requests refresh `.savepoint/SAVEPOINT.md` in place unless the user asks to preserve history. -7. Validate with `python3 /scripts/validate_savepoint.py .savepoint/SAVEPOINT.md`; inside this repository, root wrapper `python3 scripts/validate_savepoint.py .savepoint/SAVEPOINT.md` also works. Fix errors before setting `RESUME_READY: yes`. +Renderer exit code `2` can still mean a not-ready `SAVEPOINT.md` was written. Inspect the file, report blockers, and do not continue unless `RESUME_READY: yes`. ## Load / Resume -1. Verify cwd, Git root, branch, short HEAD, status, and diff before trusting a savepoint. -2. Read applicable instructions and the selected savepoint: user path first, then `.savepoint/SAVEPOINT.md`. -3. Compare claims with the working tree; disk state wins, and drift must be reported before edits. -4. Continue only when the user requested continuation and `RESUME_READY` is `yes`; otherwise stop after the report. -5. Cleanup only adopted, generated, untracked artifacts. For inspect-only requests, do not clean up by default. +1. Read the selected savepoint: user path first, then `.savepoint/SAVEPOINT.md`. +2. Verify cwd, Git root, branch, short HEAD, status, and diff against current disk state. +3. Disk state wins over savepoint text. Report drift before edits. +4. Continue only when the user requested continuation and `RESUME_READY` is `yes`, with no blocking drift or missing required file. +5. For inspect-only requests, do not clean up by default. + +## Text Mode + +Use text mode only when the user explicitly asks for copy-paste, text-only, `no-file`, `no files`, `in-response`, or `in the response`. + +Run: + +```bash +python3 /scripts/savepoint.py text --input .savepoint/input.json +``` + +Text mode must not claim `.savepoint/SAVEPOINT.md` was written, repo recovery is guaranteed, or `RESUME_READY: yes`. + +## Advanced Cases + +Read `references/contract.md` only when marker semantics, cleanup, stale savepoints, detail spillover, overwrite adoption, or safe-resume edge cases are unclear. + +Read `references/safety.md` only when secret redaction or secret-like paths are involved. + +Read `references/template.md` only when the renderer is unavailable and a manual artifact is unavoidable. diff --git a/skills/savepoint/agents/openai.yaml b/skills/savepoint/agents/openai.yaml index 2651a5a..ffd1540 100644 --- a/skills/savepoint/agents/openai.yaml +++ b/skills/savepoint/agents/openai.yaml @@ -1,4 +1,7 @@ interface: display_name: "Savepoint" - short_description: "Create, load, or text-export coding-session savepoints" - default_prompt: "Use $savepoint for /savepoint save, /savepoint load, or /savepoint text: create .savepoint/SAVEPOINT.md with Git/disk state, load and verify an existing Savepoint, or produce a response-only copy-paste text handoff." + short_description: "Recoverable coding-session checkpoint" + default_prompt: "Use $savepoint to create, load, verify, or text copy-paste .savepoint/SAVEPOINT.md with repo/Git state, redaction, validation posture, and a resume prompt." + +policy: + allow_implicit_invocation: true diff --git a/skills/savepoint/references/contract.md b/skills/savepoint/references/contract.md new file mode 100644 index 0000000..a9c3caf --- /dev/null +++ b/skills/savepoint/references/contract.md @@ -0,0 +1,61 @@ +# Savepoint Runtime Contract + +Use this only when the normal `savepoint.py` flow is not enough. + +## Artifact + +- Default file: `.savepoint/SAVEPOINT.md`. +- The file must include `## Resume Prompt`. +- The final block must be exactly one `SAVEPOINT_V1` marker block. +- Disk state wins over savepoint text on load. + +## RESUME_READY + +`RESUME_READY: yes` means a fresh agent can verify disk/Git state and continue. It does not mean tests pass, code is correct, work is complete, or conflicts are impossible. + +Hard blockers: + +- missing or invalid `SAVEPOINT.md` +- missing Git/disk snapshot +- missing resume prompt +- missing or duplicate marker block +- failed savepoint artifact validation +- redaction scan not run or failed +- active command, approval, dev server output, or session-control action not accounted for +- unresolved user question or unknown blocker +- blocking disk drift on load + +Project validation statuses: + +- `passed`: resume-ready is allowed. +- `failed-expected`: resume-ready is allowed when the failure is documented with a reason and next validation command. +- `not-run-justified`: resume-ready is allowed when the skip reason and next validation command are recorded. +- `failed-blocking`: resume-ready is not allowed. +- `not-run-unknown`: resume-ready is not allowed. + +`VALIDATION_RECORDED: yes` records both savepoint artifact validation and an honest project validation posture. It is not a claim that the project validation passed. + +## Load Report + +Report before editing: + +- loaded savepoint path +- branch, HEAD, status, and diff match/drift +- required files present/missing +- detail artifacts read or not needed +- redaction and validation status +- first next action + +Continue only when the user requested continuation and `RESUME_READY` is `yes`. + +## Details + +Use generated `details/*.md` only when top-level `SAVEPOINT.md` cannot stay both compact and recoverable. Each detail file answers one recovery question. Detail files must be scanned for secrets when referenced by a ready file savepoint. + +## Cleanup And Overwrite + +Cleanup happens only after adoption, not after reading. + +An artifact is adopted only after disk verification, savepoint comparison, user-visible resume report, user-requested continuation, `RESUME_READY: yes`, and no preservation request. + +Overwrite `.savepoint/SAVEPOINT.md` by default only when it is the generated, untracked, valid default artifact selected or adopted in the current workflow. Never overwrite tracked, stale, unsafe, user-authored, external-path, inspect-only, conflicting, or debug-needed savepoints. diff --git a/skills/savepoint/references/safety.md b/skills/savepoint/references/safety.md new file mode 100644 index 0000000..3031006 --- /dev/null +++ b/skills/savepoint/references/safety.md @@ -0,0 +1,23 @@ +# Savepoint Safety + +Scan generated savepoint artifacts by default, not the whole repository. + +Never copy these values into `SAVEPOINT.md`, `details/*.md`, or text mode output: + +- API keys, tokens, cookies, credentials, private keys, passwords +- full `.env` values +- shell history +- raw logs that may contain secrets +- unnecessary PII + +Use `` for required mentions. + +Secret-like paths such as `.env`, `id_rsa`, `id_ed25519`, `*.pem`, `*.p12`, `*.pfx`, `credentials.json`, or service-account files may be named by path when needed, but do not read or quote their contents. + +If redaction cannot be verified: + +- set `REDACTION_CHECKED: no` +- set `RESUME_READY: no` +- record the blocker briefly + +Text mode is also subject to redaction. It must not claim repo recovery, file creation, or `RESUME_READY: yes`. diff --git a/skills/savepoint/references/template.md b/skills/savepoint/references/template.md new file mode 100644 index 0000000..83a90b5 --- /dev/null +++ b/skills/savepoint/references/template.md @@ -0,0 +1,77 @@ +# Compact Manual Template + +Use only when `savepoint.py save` is unavailable. + +````markdown +# Savepoint Manifest + +## TL;DR / Operational Summary +- Goal: +- Current state: +- Next action: +- Blocker: + +## Repo Snapshot +- Captured at: +- Working directory: +- Git root: +- Branch: +- Short HEAD: +- `git status --short`: +- `git diff --stat`: +- `git diff --name-status`: +- `git diff --cached --stat`: +- `git diff --cached --name-status`: +- Latest commit: +- Instruction files loaded: +- Durable state files checked: +- Expected drift from captured state: + +## Required Reading +1. Instruction files: +2. Durable state files: +3. Files to inspect first: + +## Change Manifest +- Changed: +- Created: +- Deleted: +- Moved: +- Staged: +- Inspected without change: +- Unknown or unverified: + +## Recovery Notes +- Decisions/rationale: +- Risks/pitfalls: +- Failed approaches: +- Unresolved questions or approval blockers: +- State-file conflicts: + +## Validation Manifest +- Savepoint validation: +- Project validation: +- Skipped checks / next validation: +- Secret redaction check: +- Observable completion criteria: + +## Resume Prompt +```text +Read this savepoint, verify cwd/Git state/status/diff, read listed instruction/state files, and compare all claims with disk state. Disk state wins. Report drift first, then continue only if the user requested continuation and RESUME_READY is yes. +```` + +## Markers +```text +SAVEPOINT_V1 +SAVEPOINT_PATH: +SAVEPOINT_MODE: text|file +DETAILS_READY: yes|no|not-needed +PROMPT_READY: yes|no +DISK_RECORDED: yes|no +VALIDATION_RECORDED: yes|no +REDACTION_CHECKED: yes|no +RESUME_READY: yes|no +BLOCKERS: none| +END_SAVEPOINT_V1 +``` +``` diff --git a/skills/savepoint/scripts/render_savepoint.py b/skills/savepoint/scripts/render_savepoint.py index 6a345ba..567fc58 100644 --- a/skills/savepoint/scripts/render_savepoint.py +++ b/skills/savepoint/scripts/render_savepoint.py @@ -11,7 +11,7 @@ from pathlib import Path from typing import Any -from savepoint_contract import DEFAULT_OUTPUT, collect_snapshot, find_git_root, render_marker_block +from savepoint_contract import DEFAULT_OUTPUT, collect_snapshot, extract_marker_values, find_git_root, render_marker_block from validate_savepoint import SECRET_PATTERNS, is_redacted_secret_match, scan_secret_patterns @@ -21,6 +21,14 @@ "next_action", ] MAX_VALUE_CHARS = 600 +PROJECT_VALIDATION_STATUSES = { + "passed", + "failed-expected", + "failed-blocking", + "not-run-justified", + "not-run-unknown", +} +PROJECT_VALIDATION_NEXT_REQUIRED = {"failed-expected", "not-run-justified"} def parse_args(argv: list[str]) -> argparse.Namespace: @@ -85,7 +93,7 @@ def list_items(value: Any) -> list[str]: return [text] if text else [] -def project_validation_entries(value: Any) -> list[str]: +def project_validation_command_entries(value: Any) -> list[str]: entries: list[str] = [] if not isinstance(value, list): return entries @@ -116,6 +124,109 @@ def project_validation_passed(value: Any) -> bool: return False +def normalize_project_validation_status(value: Any) -> str: + status = clean_text(value, fallback="").lower().replace("_", "-") + if status in PROJECT_VALIDATION_STATUSES: + return status + if re.search(r"\b(pass|passed|ok|success|succeeded)\b", status) and not re.search( + r"\b(fail|failed|error|not-run|not run|skipped)\b", + status, + ): + return "passed" + if re.search(r"\b(fail|failed|error)\b", status): + return "failed-blocking" + if re.search(r"\b(not-run|not run|skipped)\b", status): + return "not-run-unknown" + return "not-run-unknown" + + +def project_validation_posture(data: dict[str, Any]) -> dict[str, Any]: + validation = data.get("validation") + project = validation.get("project") if isinstance(validation, dict) else None + if isinstance(project, dict): + status = normalize_project_validation_status(project.get("status")) + commands = project_validation_command_entries(project.get("commands")) + reason = clean_text(project.get("reason"), fallback="") + next_validation = clean_text( + project.get("next_validation", project.get("next_command", project.get("next"))), + fallback="", + ) + return { + "status": status, + "commands": commands, + "reason": reason, + "next_validation": next_validation, + "source": "validation.project", + } + + legacy = data.get("project_validation") + commands = project_validation_command_entries(legacy) + next_validation = clean_text(data.get("skipped_checks_next_validation"), fallback="") + if not commands: + return { + "status": "not-run-unknown", + "commands": [], + "reason": "", + "next_validation": next_validation, + "source": "legacy", + } + if project_validation_passed(legacy): + return { + "status": "passed", + "commands": commands, + "reason": "", + "next_validation": next_validation, + "source": "legacy", + } + + combined = " ".join(commands).lower() + if re.search(r"\b(not-run|not run|skipped)\b", combined): + reason = clean_text(commands[0], fallback="") + return { + "status": "not-run-justified" if reason and next_validation else "not-run-unknown", + "commands": commands, + "reason": reason, + "next_validation": next_validation, + "source": "legacy", + } + return { + "status": "failed-blocking", + "commands": commands, + "reason": clean_text(commands[0], fallback="project validation failed"), + "next_validation": next_validation, + "source": "legacy", + } + + +def project_validation_entries(data: dict[str, Any]) -> list[str]: + posture = project_validation_posture(data) + status = posture["status"] + commands = posture["commands"] + reason = posture["reason"] + if status == "passed": + return commands + if commands: + return [f"{status}: {entry}" for entry in commands] + if reason: + return [f"{status}: {reason}"] + if status == "not-run-unknown": + return ["not-run-unknown: no project validation reason or next validation recorded"] + return [f"{status}: project validation status recorded without command details"] + + +def project_validation_recorded(posture: dict[str, Any]) -> bool: + status = posture["status"] + if status == "passed": + return bool(posture["commands"]) + if status == "failed-blocking": + return bool(posture["commands"] or posture["reason"]) + if status == "failed-expected": + return bool(posture["reason"] and posture["next_validation"]) + if status == "not-run-justified": + return bool(posture["reason"] and posture["next_validation"]) + return False + + def observable_completion(data: dict[str, Any]) -> str: explicit = clean_text(data.get("observable_completion"), fallback="") if explicit: @@ -243,11 +354,18 @@ def blockers_for(data: dict[str, Any], args: argparse.Namespace, redaction_ok: b blockers.append("redaction-check-not-run") elif not redaction_ok: blockers.append("redaction-check-failed") - project_entries = project_validation_entries(data.get("project_validation")) - if not project_entries: - blockers.append("project-validation-not-recorded") - elif not project_validation_passed(data.get("project_validation")): - blockers.append("project-validation-not-passing") + posture = project_validation_posture(data) + if posture["status"] == "not-run-unknown": + blockers.append("validation-not-run-unknown") + elif posture["status"] == "failed-blocking": + blockers.append("validation-failed-blocking") + elif posture["status"] in PROJECT_VALIDATION_NEXT_REQUIRED: + if not posture["reason"]: + blockers.append("validation-reason-missing") + if not posture["next_validation"]: + blockers.append("validation-next-command-missing") + elif posture["status"] == "passed" and not posture["commands"]: + blockers.append("validation-command-missing") if not args.run_savepoint_validation: blockers.append("savepoint-validation-not-run") return unique_or_input(blockers, None) @@ -279,7 +397,8 @@ def build_savepoint( cwd = Path.cwd() snapshot = collect_snapshot(cwd) changes = derive_change_manifest(cwd, data, ignored_status_paths(cwd, output_path, args.input)) - project_entries = project_validation_entries(data.get("project_validation")) + project_posture = project_validation_posture(data) + project_entries = project_validation_entries(data) blockers = blockers_for(data, args, redaction_ok) if force_unsafe_blocker: blockers = unique_or_input([force_unsafe_blocker, *blockers], None) @@ -287,7 +406,7 @@ def build_savepoint( marker_values = { "DETAILS_READY": "not-needed", "DISK_RECORDED": "yes" if snapshot["git_root"] != "not a git repository" else "no", - "VALIDATION_RECORDED": "yes" if args.run_savepoint_validation and project_entries else "no", + "VALIDATION_RECORDED": "yes" if args.run_savepoint_validation and project_validation_recorded(project_posture) else "no", "REDACTION_CHECKED": "yes" if args.scan_redaction and redaction_ok else "no", "RESUME_READY": "yes" if resume_ready else "no", "BLOCKERS": "none" if resume_ready else ",".join(blockers), @@ -296,7 +415,7 @@ def build_savepoint( durable_files = list_items(data.get("durable_state_files_checked")) files_first = list_items(data.get("files_to_inspect_first")) or first_paths(changes) skipped = clean_text( - data.get("skipped_checks_next_validation"), + project_posture["next_validation"] or data.get("skipped_checks_next_validation"), fallback="no skipped checks; rerun recorded project validation if state changes", ) expected_drift = clean_text(data.get("expected_drift"), fallback="none") @@ -554,7 +673,9 @@ def main(argv: list[str] | None = None) -> int: final_text = output_path.read_text(encoding="utf-8") final_errors: list[str] = [] scan_secret_patterns(output_path, final_text, final_errors) - final_ready = "RESUME_READY: yes" in final_text and not final_errors + marker_values, marker_errors = extract_marker_values(output_path, final_text) + final_errors.extend(marker_errors) + final_ready = marker_values.get("RESUME_READY") == "yes" and not final_errors print(f"wrote: {output_path}") if final_ready: return 0 diff --git a/skills/savepoint/scripts/savepoint.py b/skills/savepoint/scripts/savepoint.py new file mode 100644 index 0000000..0dd8df3 --- /dev/null +++ b/skills/savepoint/scripts/savepoint.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +"""Unified savepoint CLI for create, validate, inspect, and text handoff.""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + +import render_savepoint +import validate_savepoint +from render_savepoint import clean_text, next_action_text, read_input, redact_secret_patterns +from savepoint_contract import extract_marker_values + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + subcommands = parser.add_subparsers(dest="command", required=True) + + save = subcommands.add_parser("save", help="Create or refresh a file savepoint.") + save.add_argument("--input", required=True, type=Path, help="JSON file with semantic savepoint input.") + save.add_argument("--output", type=Path, help="Savepoint path to write.") + save.add_argument("--force", action="store_true", help="Overwrite an existing output file.") + save.add_argument( + "--assert-no-active-commands", + action="store_true", + help="Assert no active shell/process output needs to be captured before resume.", + ) + save.add_argument("--scan-redaction", action="store_true", help="Scan generated text for secret patterns.") + save.add_argument("--validate", action="store_true", help="Run bundled savepoint validation after writing.") + save.add_argument( + "--run-savepoint-validation", + action="store_true", + help=argparse.SUPPRESS, + ) + + validate = subcommands.add_parser("validate", help="Validate SAVEPOINT.md artifacts.") + validate.add_argument( + "--allow-example-paths", + action="store_true", + help="Allow example SAVEPOINT_PATH values that do not exist on this machine.", + ) + validate.add_argument("savepoints", nargs="+", type=Path) + + inspect = subcommands.add_parser("inspect", help="Inspect the final SAVEPOINT_V1 marker.") + inspect.add_argument("savepoint", type=Path) + inspect.add_argument("--json", action="store_true", help="Emit marker values as JSON.") + + text = subcommands.add_parser("text", help="Print response-only handoff text without writing SAVEPOINT.md.") + text.add_argument("--input", required=True, type=Path, help="JSON file with semantic savepoint input.") + + return parser.parse_args(argv) + + +def render_save_argv(args: argparse.Namespace) -> list[str]: + argv = ["--input", str(args.input)] + if args.output is not None: + argv.extend(["--output", str(args.output)]) + if args.force: + argv.append("--force") + if args.assert_no_active_commands: + argv.append("--assert-no-active-commands") + if args.scan_redaction: + argv.append("--scan-redaction") + if args.validate or args.run_savepoint_validation: + argv.append("--run-savepoint-validation") + return argv + + +def run_validate(args: argparse.Namespace) -> int: + argv: list[str] = [] + if args.allow_example_paths: + argv.append("--allow-example-paths") + argv.extend(str(path) for path in args.savepoints) + return validate_savepoint.main(argv) + + +def run_inspect(args: argparse.Namespace) -> int: + if not args.savepoint.exists(): + print(f"error: file does not exist: {args.savepoint}", file=sys.stderr) + return 1 + text = args.savepoint.read_text(encoding="utf-8") + values, errors = extract_marker_values(args.savepoint, text) + if errors: + for error in errors: + print(f"error: {error}", file=sys.stderr) + return 1 + if args.json: + print(json.dumps(values, ensure_ascii=True, indent=2, sort_keys=True)) + return 0 + for key, value in values.items(): + print(f"{key}: {value}") + return 0 + + +def run_text(args: argparse.Namespace) -> int: + data, error = read_input(args.input) + if error or data is None: + print(f"error: {error}", file=sys.stderr) + return 1 + handoff = f"""# Savepoint Text Handoff + +Goal: {clean_text(data.get("goal"))} +Current state: {clean_text(data.get("current_state"))} +Next action: {next_action_text(data)} + +No file was written. +Repo recovery is not guaranteed. +Use file mode when the next agent must verify disk/Git state from `.savepoint/SAVEPOINT.md`. +""" + redacted, _ = redact_secret_patterns(handoff) + print(redacted, end="") + return 0 + + +def main(argv: list[str] | None = None) -> int: + args = parse_args(sys.argv[1:] if argv is None else argv) + if args.command == "save": + return render_savepoint.main(render_save_argv(args)) + if args.command == "validate": + return run_validate(args) + if args.command == "inspect": + return run_inspect(args) + if args.command == "text": + return run_text(args) + print(f"error: unknown command: {args.command}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/skills/savepoint/scripts/validate_savepoint.py b/skills/savepoint/scripts/validate_savepoint.py index c487c61..d011b29 100644 --- a/skills/savepoint/scripts/validate_savepoint.py +++ b/skills/savepoint/scripts/validate_savepoint.py @@ -100,6 +100,14 @@ "- Unresolved questions or approval blockers:", "- State-file conflicts:", } +PROJECT_VALIDATION_STATUSES = { + "passed", + "failed-expected", + "failed-blocking", + "not-run-justified", + "not-run-unknown", +} +PROJECT_VALIDATION_NEXT_REQUIRED = {"failed-expected", "not-run-justified"} def validate_savepoint(path: Path, allow_example_paths: bool = False) -> list[str]: @@ -205,7 +213,7 @@ def validate_resume_ready_content(path: Path, text: str) -> list[str]: value = field_value_or_block(text, label) allow_absence = label in ABSENCE_ALLOWED_LABELS if label == "- Skipped checks / next validation:": - allow_absence = project_validation_passed(text) + allow_absence = project_validation_status(text) == "passed" if is_placeholder_value(value, allow_absence=allow_absence): errors.append(f"{path}: RESUME_READY=yes requires substantive value for {label}") errors.extend(validate_validation_status(path, text)) @@ -217,18 +225,38 @@ def validate_resume_ready_content(path: Path, text: str) -> list[str]: def validate_validation_status(path: Path, text: str) -> list[str]: errors: list[str] = [] skipped = field_value_or_block(text, "- Skipped checks / next validation:") - if skipped.strip().strip("`").lower().strip(" .") in ABSENCE_ONLY_VALUES and not project_validation_passed(text): + skipped_absent = skipped.strip().strip("`").lower().strip(" .") in ABSENCE_ONLY_VALUES + status = project_validation_status(text) + if status == "not-run-unknown": + errors.append(f"{path}: RESUME_READY=yes cannot use Project validation status not-run-unknown") + elif status == "failed-blocking": + errors.append(f"{path}: RESUME_READY=yes cannot use Project validation status failed-blocking") + elif status in PROJECT_VALIDATION_NEXT_REQUIRED and skipped_absent: errors.append( - f"{path}: Skipped checks / next validation may be none only when Project validation records a passed check" + f"{path}: Project validation status {status} requires a next validation command" ) return errors +def project_validation_status(text: str) -> str: + value = field_value_or_block(text, "- Project validation:").lower().replace("_", "-") + for status in PROJECT_VALIDATION_STATUSES: + if status in value: + return status + if re.search(r"\b(pass|passed|ok|success|succeeded)\b", value) and not re.search( + r"\b(fail|failed|error|not-run|not run|skipped)\b", + value, + ): + return "passed" + if re.search(r"\b(fail|failed|error)\b", value): + return "failed-blocking" + if re.search(r"\b(not-run|not run|skipped)\b", value): + return "not-run-unknown" + return "not-run-unknown" + + def project_validation_passed(text: str) -> bool: - value = field_value_or_block(text, "- Project validation:").lower() - if not re.search(r"\b(pass|passed|ok|success|succeeded)\b", value): - return False - return not re.search(r"\b(fail|failed|error|not-run|not run|skipped)\b", value) + return project_validation_status(text) == "passed" def field_value_or_block(text: str, label: str) -> str: @@ -292,7 +320,7 @@ def has_resume_prompt_evidence(text: str) -> bool: return re.search(r"(?m)^## Resume Prompt\s*$", text) is not None -def main() -> int: +def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--allow-example-paths", @@ -300,7 +328,7 @@ def main() -> int: help="Allow example SAVEPOINT_PATH values that do not exist on this machine.", ) parser.add_argument("savepoints", nargs="+", type=Path) - args = parser.parse_args() + args = parser.parse_args(sys.argv[1:] if argv is None else argv) errors: list[str] = [] for path in args.savepoints: From 734d43383911402cbaa229ee1b6446dc5c491752 Mon Sep 17 00:00:00 2001 From: Injae Date: Fri, 12 Jun 2026 10:40:16 +0900 Subject: [PATCH 2/7] fix(savepoint): tighten inspect and input UX --- README.ko.md | 5 +- README.md | 5 +- scripts/check-savepoint-renderer.py | 144 +++++++++++++++++++ scripts/validate-repo.py | 10 +- skills/savepoint/SKILL.md | 4 +- skills/savepoint/scripts/render_savepoint.py | 16 ++- skills/savepoint/scripts/savepoint.py | 102 ++++++++++++- 7 files changed, 271 insertions(+), 15 deletions(-) diff --git a/README.ko.md b/README.ko.md index 08f4799..da443f4 100644 --- a/README.ko.md +++ b/README.ko.md @@ -27,7 +27,7 @@ Savepoint는 이전 대화 context에 의존하지 않고 새 코딩 에이전 - 짧은 일반 요약이면 충분할 때 - SQL `SAVEPOINT` 설명 요청일 때 - `/status`, `/new`, compaction 정책, PTY 제어, session rotation 요청일 때 -- application code 수정이나 savepoint라는 이름의 기능 구현 요청일 때 +- checkpoint 의도 없는 직접 code/docs 수정이나 savepoint라는 이름의 기능 구현 요청일 때 - Git commit, stash, branch history가 맞는 도구일 때 ## 보장하는 것 @@ -52,6 +52,7 @@ public entrypoint는 다음입니다. ```bash python3 scripts/savepoint.py save --input .savepoint/input.json --output .savepoint/SAVEPOINT.md --assert-no-active-commands --scan-redaction --validate +python3 scripts/savepoint.py init-input --output .savepoint/input.json python3 scripts/savepoint.py validate .savepoint/SAVEPOINT.md python3 scripts/savepoint.py inspect .savepoint/SAVEPOINT.md --json python3 scripts/savepoint.py text --input .savepoint/input.json @@ -59,6 +60,8 @@ python3 scripts/savepoint.py text --input .savepoint/input.json portable skill entrypoint는 `skills/savepoint/scripts/savepoint.py`입니다. 기존 `scripts/render_savepoint.py`, `scripts/validate_savepoint.py` wrapper는 호환성을 위해 유지합니다. +`inspect --json`은 파일과 marker가 valid이면 `0`, savepoint-like 파일을 읽었지만 invalid이면 `1`, 파일을 읽을 수 없거나 savepoint artifact가 아니면 `2`로 종료합니다. + ## 설치 추천 명령: diff --git a/README.md b/README.md index f276bdb..fbcdd91 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ If a client does not pass custom slash prompts through, use the natural-language - A short ordinary summary is enough. - The user asks about SQL `SAVEPOINT`. - The request is only `/status`, `/new`, compaction policy, PTY control, or session rotation. -- The user asks for application code changes or a feature named savepoint. +- The user asks for direct code/docs edits without checkpoint intent, or an app feature named savepoint. - Git commit, stash, or branch history is the right tool. ## What it guarantees @@ -52,6 +52,7 @@ The public entrypoint is: ```bash python3 scripts/savepoint.py save --input .savepoint/input.json --output .savepoint/SAVEPOINT.md --assert-no-active-commands --scan-redaction --validate +python3 scripts/savepoint.py init-input --output .savepoint/input.json python3 scripts/savepoint.py validate .savepoint/SAVEPOINT.md python3 scripts/savepoint.py inspect .savepoint/SAVEPOINT.md --json python3 scripts/savepoint.py text --input .savepoint/input.json @@ -59,6 +60,8 @@ python3 scripts/savepoint.py text --input .savepoint/input.json The portable skill entrypoint is `skills/savepoint/scripts/savepoint.py`. Legacy wrappers `scripts/render_savepoint.py` and `scripts/validate_savepoint.py` remain for compatibility. +`inspect --json` exits `0` when the file and marker are valid, `1` when a savepoint-like file is parsed but invalid, and `2` when the file cannot be read or is not a savepoint artifact. + ## Install Recommended commands: diff --git a/scripts/check-savepoint-renderer.py b/scripts/check-savepoint-renderer.py index f3ee918..eb13ad8 100644 --- a/scripts/check-savepoint-renderer.py +++ b/scripts/check-savepoint-renderer.py @@ -750,6 +750,48 @@ def test_renderer_unresolved_blocker_stays_unsafe() -> None: require(validation.returncode == 0, validation.stderr or validation.stdout) +def test_renderer_blockers_alias_stays_unsafe() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = repo / "savepoint-input.json" + input_path.write_text( + """{ + "goal": "finish blocker alias handling", + "current_state": "renderer should not drop intuitive blocker input", + "next_action": "report blocker before continuing", + "blockers": "needs user approval", + "project_validation": [ + { + "command": "python scripts/check-savepoint-renderer.py", + "result": "passed", + "summary": "blocker alias fixture validation recorded" + } + ] +} +""", + encoding="utf-8", + ) + result = run( + [ + sys.executable, + str(RENDER_HELPER), + "--input", + str(input_path), + "--assert-no-active-commands", + "--scan-redaction", + "--run-savepoint-validation", + ], + repo, + ) + require(result.returncode == 2, "blockers alias should keep output unsafe") + text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") + require("unresolved-blockers-recorded" in text, "blockers alias marker missing") + require("needs user approval" in text, "blockers alias should be recorded in recovery notes") + require("RESUME_READY: no" in text, "blockers alias must block resume-ready") + validation = run([sys.executable, str(VALIDATOR), str(repo / ".savepoint" / "SAVEPOINT.md")], repo) + require(validation.returncode == 0, validation.stderr or validation.stdout) + + def test_renderer_keeps_savepoint_unsafe_without_active_command_assertion() -> None: with tempfile.TemporaryDirectory() as tmp: repo = make_repo_with_modified_app(Path(tmp)) @@ -895,6 +937,103 @@ def test_savepoint_cli_save_validate_and_inspect() -> None: require(parsed["SAVEPOINT_MODE"] == "file", "inspect JSON should report file mode") +def test_savepoint_cli_init_input_defaults_to_unknown_validation() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + output = repo / ".savepoint" / "input.json" + result = run([sys.executable, str(SAVEPOINT_CLI), "init-input", "--output", str(output)], repo) + require(result.returncode == 0, result.stderr or result.stdout) + require(output.exists(), "init-input did not write sample input") + data = json.loads(output.read_text(encoding="utf-8")) + project = data["validation"]["project"] + require(project["status"] == "not-run-unknown", "init-input should default to honest unknown validation") + require(project["reason"] == "", "init-input should not prefill a justification") + require(project["next_command"] == "", "init-input should not prefill next validation") + require(not (repo / ".savepoint" / "SAVEPOINT.md").exists(), "init-input should not write SAVEPOINT.md") + + +def test_savepoint_cli_inspect_json_reports_invalid_marker() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo(Path(tmp)) + output = repo / ".savepoint" / "SAVEPOINT.md" + output.parent.mkdir() + output.write_text( + """# Invalid Savepoint + +## Markers + +```text +SAVEPOINT_V1 +SAVEPOINT_MODE: file +SAVEPOINT_PATH: C:/tmp/SAVEPOINT.md +END_SAVEPOINT_V1 +``` +""", + encoding="utf-8", + ) + result = run([sys.executable, str(SAVEPOINT_CLI), "inspect", str(output), "--json"], repo) + require(result.returncode == 1, "invalid marker should return inspect exit code 1") + parsed = json.loads(result.stdout) + require(parsed["marker_valid"] is False, "invalid marker JSON should set marker_valid=false") + require(parsed["resume_ready"] is False, "invalid marker JSON should not be resume-ready") + require("errors" in parsed and parsed["errors"], "invalid marker JSON should include errors") + + +def test_savepoint_cli_inspect_json_requires_valid_savepoint_for_resume_ready() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo(Path(tmp)) + output = repo / ".savepoint" / "SAVEPOINT.md" + output.parent.mkdir() + output.write_text( + f"""# Incomplete Savepoint + +## Markers + +```text +SAVEPOINT_V1 +SAVEPOINT_PATH: {output} +SAVEPOINT_MODE: file +DETAILS_READY: not-needed +PROMPT_READY: yes +DISK_RECORDED: yes +VALIDATION_RECORDED: yes +REDACTION_CHECKED: yes +RESUME_READY: yes +BLOCKERS: none +END_SAVEPOINT_V1 +``` +""", + encoding="utf-8", + ) + result = run([sys.executable, str(SAVEPOINT_CLI), "inspect", str(output), "--json"], repo) + require(result.returncode == 1, "invalid savepoint body should return inspect exit code 1") + parsed = json.loads(result.stdout) + require(parsed["marker_valid"] is True, "valid marker should remain marker_valid=true") + require(parsed["savepoint_valid"] is False, "invalid savepoint body should set savepoint_valid=false") + require(parsed["resume_ready"] is False, "invalid savepoint body should not be resume-ready") + + +def test_savepoint_cli_inspect_missing_or_not_savepoint_returns_2() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo(Path(tmp)) + missing = repo / ".savepoint" / "SAVEPOINT.md" + missing_result = run([sys.executable, str(SAVEPOINT_CLI), "inspect", str(missing), "--json"], repo) + require(missing_result.returncode == 2, "missing file should return inspect exit code 2") + + note = repo / "note.md" + note.write_text("# Not a savepoint\n", encoding="utf-8") + note_result = run([sys.executable, str(SAVEPOINT_CLI), "inspect", str(note), "--json"], repo) + require(note_result.returncode == 2, "non-savepoint file should return inspect exit code 2") + + invalid_utf8 = repo / "invalid-savepoint.md" + invalid_utf8.write_bytes(b"\xff\xfe\xff") + invalid_result = run([sys.executable, str(SAVEPOINT_CLI), "inspect", str(invalid_utf8), "--json"], repo) + require(invalid_result.returncode == 2, "unreadable UTF-8 file should return inspect exit code 2") + parsed = json.loads(invalid_result.stdout) + require(parsed["marker_valid"] is False, "unreadable UTF-8 file should not be marker-valid") + require(parsed["errors"], "unreadable UTF-8 JSON should include errors") + + def test_root_savepoint_cli_forwards_to_portable_cli() -> None: with tempfile.TemporaryDirectory() as tmp: repo = make_repo_with_modified_app(Path(tmp)) @@ -1311,11 +1450,16 @@ def main() -> int: test_renderer_failed_blocking_project_validation_stays_unsafe, test_renderer_missing_next_action_stays_unsafe, test_renderer_unresolved_blocker_stays_unsafe, + test_renderer_blockers_alias_stays_unsafe, test_renderer_keeps_savepoint_unsafe_without_active_command_assertion, test_renderer_secret_scan_blocks_resume_ready, test_renderer_redacts_secret_even_when_scan_flag_is_omitted, test_root_renderer_forwards_to_portable_renderer, test_savepoint_cli_save_validate_and_inspect, + test_savepoint_cli_init_input_defaults_to_unknown_validation, + test_savepoint_cli_inspect_json_reports_invalid_marker, + test_savepoint_cli_inspect_json_requires_valid_savepoint_for_resume_ready, + test_savepoint_cli_inspect_missing_or_not_savepoint_returns_2, test_root_savepoint_cli_forwards_to_portable_cli, test_savepoint_cli_text_mode_does_not_write_recovery_artifact, test_validator_accepts_compact_resume_ready_file_without_repetitive_sections, diff --git a/scripts/validate-repo.py b/scripts/validate-repo.py index 60512b2..6d5cc3a 100644 --- a/scripts/validate-repo.py +++ b/scripts/validate-repo.py @@ -247,7 +247,15 @@ def validate_frontmatter(self) -> None: lower_description = description.lower() if data.get("argument-hint") != "[save|load|text] [next-session focus]": self.fail("SKILL.md argument-hint must describe save/load/text and optional focus") - for term in ["context reset", "session transfer", "sql", "ordinary summaries", "/new", "/status"]: + for term in [ + "context reset", + "session transfer", + "sql", + "ordinary summaries", + "direct code/docs edits without checkpoint intent", + "/new", + "/status", + ]: if term not in lower_description: self.fail(f"frontmatter description must include boundary term: {term}") for phrase in KOREAN_INVOCATION_PHRASES: diff --git a/skills/savepoint/SKILL.md b/skills/savepoint/SKILL.md index b9a2e96..92d4f4b 100644 --- a/skills/savepoint/SKILL.md +++ b/skills/savepoint/SKILL.md @@ -1,6 +1,6 @@ --- name: savepoint -description: "Create or load a recoverable coding-session checkpoint at .savepoint/SAVEPOINT.md so a fresh agent can resume from current repo/Git state. Use for context reset, session transfer, 세이브포인트 만들어줘, 세이브포인트 로드해줘, 세이브포인트 읽어줘, 세이브포인트 이어서 해줘. Not for SQL SAVEPOINT, ordinary summaries, code edits, /status, /new, or app features named savepoint." +description: "Create or load a recoverable coding-session checkpoint at .savepoint/SAVEPOINT.md so a fresh agent can resume from current repo/Git state. Use for context reset, session transfer, 세이브포인트 만들어줘, 세이브포인트 로드해줘, 세이브포인트 읽어줘, 세이브포인트 이어서 해줘. Not for SQL SAVEPOINT, ordinary summaries, direct code/docs edits without checkpoint intent, /status, /new, or app features named savepoint." argument-hint: "[save|load|text] [next-session focus]" --- @@ -35,7 +35,7 @@ Native slash-command support depends on the client. If slash prompts are not pas 1. Use the provided focus text, if any, only to narrow the next action. 2. Capture repo state: cwd, Git root, branch, short HEAD, `git status --short`, diff stat, name-status, staged stat, staged name-status, latest commit, instruction files, and durable state files. -3. Write compact input JSON with at least `goal`, `current_state`, `next_action`, `files_to_inspect_first`, `blockers`, and `validation.project.status`. +3. Write compact input JSON with at least `goal`, `current_state`, `next_action`, `files_to_inspect_first`, `unresolved_blockers`, and `validation.project.status`. If starting from blank, use `python3 /scripts/savepoint.py init-input --output .savepoint/input.json`; its default project validation status is `not-run-unknown`. 4. Use project validation status exactly as one of `passed`, `failed-expected`, `failed-blocking`, `not-run-justified`, or `not-run-unknown`. For `failed-expected` or `not-run-justified`, include a reason and next validation command. 5. Run: diff --git a/skills/savepoint/scripts/render_savepoint.py b/skills/savepoint/scripts/render_savepoint.py index 567fc58..fca373e 100644 --- a/skills/savepoint/scripts/render_savepoint.py +++ b/skills/savepoint/scripts/render_savepoint.py @@ -29,6 +29,7 @@ "not-run-unknown", } PROJECT_VALIDATION_NEXT_REQUIRED = {"failed-expected", "not-run-justified"} +CLEAR_BLOCKER_VALUES = {"none", "no", "not-needed", "not needed"} def parse_args(argv: list[str]) -> argparse.Namespace: @@ -93,6 +94,15 @@ def list_items(value: Any) -> list[str]: return [text] if text else [] +def unresolved_blockers_text(data: dict[str, Any]) -> str: + values = [clean_text(data.get(key), fallback="") for key in ("unresolved_blockers", "blockers")] + recorded = [value for value in values if value] + blocking = [value for value in recorded if value.lower() not in CLEAR_BLOCKER_VALUES] + if blocking: + return "; ".join(blocking) + return "none" + + def project_validation_command_entries(value: Any) -> list[str]: entries: list[str] = [] if not isinstance(value, list): @@ -345,8 +355,8 @@ def blockers_for(data: dict[str, Any], args: argparse.Namespace, redaction_ok: b for field in REQUIRED_TEXT_FIELDS: if clean_text(data.get(field), fallback="") == "": blockers.append(f"missing-{field.replace('_', '-')}") - unresolved = clean_text(data.get("unresolved_blockers"), fallback="none").lower() - if unresolved not in {"none", "no", "not-needed", "not needed"}: + unresolved = unresolved_blockers_text(data).lower() + if unresolved not in CLEAR_BLOCKER_VALUES: blockers.append("unresolved-blockers-recorded") if not args.assert_no_active_commands: blockers.append("active-commands-not-asserted") @@ -476,7 +486,7 @@ def build_savepoint( - Decisions/rationale: {inline_or_block(list_items(data.get("decisions")), empty="no extra decisions recorded")} - Risks/pitfalls: {inline_or_block([*list_items(data.get("risks")), "disk state wins if savepoint claims conflict"])} - Failed approaches: {clean_text(data.get("failed_approaches"), fallback="none")} -- Unresolved questions or approval blockers: {clean_text(data.get("unresolved_blockers"), fallback="none")} +- Unresolved questions or approval blockers: {unresolved_blockers_text(data)} - State-file conflicts: {clean_text(data.get("state_file_conflicts"), fallback="none")} ## Validation Manifest diff --git a/skills/savepoint/scripts/savepoint.py b/skills/savepoint/scripts/savepoint.py index 0dd8df3..fe8edbe 100644 --- a/skills/savepoint/scripts/savepoint.py +++ b/skills/savepoint/scripts/savepoint.py @@ -35,6 +35,15 @@ def parse_args(argv: list[str]) -> argparse.Namespace: help=argparse.SUPPRESS, ) + init_input = subcommands.add_parser("init-input", help="Write a sample semantic input JSON file.") + init_input.add_argument( + "--output", + type=Path, + default=Path(".savepoint") / "input.json", + help="Input JSON path to write.", + ) + init_input.add_argument("--force", action="store_true", help="Overwrite an existing input file.") + validate = subcommands.add_parser("validate", help="Validate SAVEPOINT.md artifacts.") validate.add_argument( "--allow-example-paths", @@ -78,22 +87,99 @@ def run_validate(args: argparse.Namespace) -> int: def run_inspect(args: argparse.Namespace) -> int: if not args.savepoint.exists(): - print(f"error: file does not exist: {args.savepoint}", file=sys.stderr) - return 1 - text = args.savepoint.read_text(encoding="utf-8") + if args.json: + print(json.dumps(inspect_payload(args.savepoint, {}, [f"file does not exist: {args.savepoint}"], []), ensure_ascii=True, indent=2, sort_keys=True)) + else: + print(f"error: file does not exist: {args.savepoint}", file=sys.stderr) + return 2 + try: + text = args.savepoint.read_text(encoding="utf-8") + except (OSError, UnicodeDecodeError) as exc: + if args.json: + print(json.dumps(inspect_payload(args.savepoint, {}, [f"failed to read file: {exc}"], []), ensure_ascii=True, indent=2, sort_keys=True)) + else: + print(f"error: failed to read file: {exc}", file=sys.stderr) + return 2 values, errors = extract_marker_values(args.savepoint, text) + if not values and any("found 0" in error for error in errors): + if args.json: + print(json.dumps(inspect_payload(args.savepoint, values, errors, []), ensure_ascii=True, indent=2, sort_keys=True)) + else: + for error in errors: + print(f"error: {error}", file=sys.stderr) + return 2 + validation_errors = [] if errors else validate_savepoint.validate_savepoint(args.savepoint) + exit_code = 0 if not errors and not validation_errors else 1 + if args.json: + print(json.dumps(inspect_payload(args.savepoint, values, errors, validation_errors), ensure_ascii=True, indent=2, sort_keys=True)) + return exit_code if errors: for error in errors: print(f"error: {error}", file=sys.stderr) - return 1 - if args.json: - print(json.dumps(values, ensure_ascii=True, indent=2, sort_keys=True)) - return 0 + if validation_errors: + for error in validation_errors: + print(f"error: {error}", file=sys.stderr) + if exit_code != 0: + return exit_code for key, value in values.items(): print(f"{key}: {value}") return 0 +def inspect_payload(path: Path, values: dict[str, str], marker_errors: list[str], validation_errors: list[str]) -> dict[str, object]: + blocker_text = values.get("BLOCKERS", "") + blockers = [] if blocker_text in {"", "none"} else [item for item in blocker_text.split(",") if item] + errors = [*marker_errors, *validation_errors] + marker_valid = bool(values) and not marker_errors + savepoint_valid = marker_valid and not validation_errors + return { + **values, + "path": str(path), + "mode": values.get("SAVEPOINT_MODE"), + "resume_ready": savepoint_valid and values.get("RESUME_READY") == "yes", + "blockers": blockers or (["marker-invalid"] if marker_errors else []), + "marker_valid": marker_valid, + "savepoint_valid": savepoint_valid, + "details_ready": values.get("DETAILS_READY"), + "validation_recorded": values.get("VALIDATION_RECORDED") == "yes", + "redaction_checked": values.get("REDACTION_CHECKED") == "yes", + "errors": errors, + } + + +def run_init_input(args: argparse.Namespace) -> int: + output = args.output if args.output.is_absolute() else Path.cwd() / args.output + if output.exists() and not args.force: + print(f"error: output already exists: {output}\nRe-run with --force to overwrite.", file=sys.stderr) + return 1 + sample = { + "goal": "", + "current_state": "", + "next_action": "", + "focus": "", + "unresolved_blockers": "none", + "files_to_inspect_first": [], + "decisions": [], + "risks": [], + "validation": { + "project": { + "status": "not-run-unknown", + "reason": "", + "commands": [], + "next_command": "", + } + }, + } + try: + output.parent.mkdir(parents=True, exist_ok=True) + output.write_text(json.dumps(sample, ensure_ascii=True, indent=2) + "\n", encoding="utf-8") + except OSError as exc: + print(f"error: failed to write input JSON: {exc}", file=sys.stderr) + return 1 + print(f"wrote: {output}") + return 0 + + def run_text(args: argparse.Namespace) -> int: data, error = read_input(args.input) if error or data is None: @@ -118,6 +204,8 @@ def main(argv: list[str] | None = None) -> int: args = parse_args(sys.argv[1:] if argv is None else argv) if args.command == "save": return render_savepoint.main(render_save_argv(args)) + if args.command == "init-input": + return run_init_input(args) if args.command == "validate": return run_validate(args) if args.command == "inspect": From 6214ba8a0774be8cfbc9c091a991d0bf2f1a84d1 Mon Sep 17 00:00:00 2001 From: Injae Date: Fri, 12 Jun 2026 13:31:41 +0900 Subject: [PATCH 3/7] fix(savepoint): harden validation posture checks --- README.ko.md | 5 +- README.md | 3 +- scripts/check-savepoint-renderer.py | 303 ++++++++++++++++++ scripts/validate-repo.py | 12 +- skills/savepoint/SKILL.md | 20 +- skills/savepoint/agents/openai.yaml | 5 +- skills/savepoint/references/template.md | 4 +- skills/savepoint/scripts/render_savepoint.py | 63 +++- skills/savepoint/scripts/savepoint.py | 42 ++- .../savepoint/scripts/validate_savepoint.py | 56 +++- 10 files changed, 456 insertions(+), 57 deletions(-) diff --git a/README.ko.md b/README.ko.md index da443f4..57f08aa 100644 --- a/README.ko.md +++ b/README.ko.md @@ -1,6 +1,6 @@ # Savepoint -Savepoint는 이전 대화 context에 의존하지 않고 새 코딩 에이전트가 현재 repo/Git 상태에서 이어갈 수 있게 `.savepoint/SAVEPOINT.md`를 생성하거나 검증하는 skill입니다. +Savepoint는 이전 대화 컨텍스트에 의존하지 않고 새 코딩 에이전트가 현재 repo/Git 상태에서 이어갈 수 있게 `.savepoint/SAVEPOINT.md`를 생성하거나 검증하는 skill입니다. ## 30초 사용법 @@ -44,7 +44,7 @@ Savepoint는 이전 대화 context에 의존하지 않고 새 코딩 에이전 - 코드 정답성 - 작업 완료 - 미래 충돌 없음 -- text mode의 repo 복구 +- text mode만으로 repo 상태를 복구할 수 있음 ## Runtime command @@ -111,5 +111,6 @@ python3 scripts/validate-repo.py python3 scripts/check-savepoint-renderer.py python3 scripts/check-install-helper.py python3 scripts/validate_savepoint.py --allow-example-paths examples/SAVEPOINT.filled.example.md examples/file-bugfix/SAVEPOINT.md examples/file-architecture/SAVEPOINT.md examples/unsafe-savepoint/SAVEPOINT.md +python3 -m compileall -q skills/savepoint/scripts scripts git diff --check ``` diff --git a/README.md b/README.md index fbcdd91..45e2eca 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ If a client does not pass custom slash prompts through, use the natural-language - The code is correct. - The task is complete. - Future conflicts are impossible. -- Text mode can recover repo state. +- Repo recovery from text mode. ## Runtime command @@ -118,6 +118,7 @@ python3 scripts/validate-repo.py python3 scripts/check-savepoint-renderer.py python3 scripts/check-install-helper.py python3 scripts/validate_savepoint.py --allow-example-paths examples/SAVEPOINT.filled.example.md examples/file-bugfix/SAVEPOINT.md examples/file-architecture/SAVEPOINT.md examples/unsafe-savepoint/SAVEPOINT.md +python3 -m compileall -q skills/savepoint/scripts scripts git diff --check ``` diff --git a/scripts/check-savepoint-renderer.py b/scripts/check-savepoint-renderer.py index eb13ad8..aeaf43f 100644 --- a/scripts/check-savepoint-renderer.py +++ b/scripts/check-savepoint-renderer.py @@ -58,6 +58,14 @@ def load_contract_helper(): return module +def load_validator_helper(): + spec = importlib.util.spec_from_file_location("validate_savepoint_under_test", VALIDATOR) + require(spec is not None and spec.loader is not None, "could not load validator module") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + def git(repo: Path, *args: str) -> None: result = run(["git", *args], repo) require(result.returncode == 0, result.stderr or result.stdout) @@ -588,6 +596,213 @@ def test_renderer_failed_project_validation_stays_unsafe() -> None: require(validation.returncode == 0, validation.stderr or validation.stdout) +def test_validation_status_token_matrix_is_consistent() -> None: + renderer = load_render_helper() + validator = load_validator_helper() + blocking_tokens = ["fail", "fails", "failed", "failing", "failure", "error", "errors"] + for token in blocking_tokens: + require( + renderer.normalize_project_validation_status(f"tests are {token}") == "failed-blocking", + f"renderer did not classify {token!r} as failed-blocking", + ) + validator_text = f"- Project validation: tests are {token}\n" + require( + validator.project_validation_status(validator_text) == "failed-blocking", + f"validator did not classify {token!r} as failed-blocking", + ) + passed_text = f"- Project validation: passed: `npm test` - tests are {token}\n" + require( + validator.passed_validation_has_failure_terms(passed_text), + f"validator did not detect failure token {token!r} under passed status", + ) + for token in ["not-run", "not run", "skipped"]: + require( + renderer.normalize_project_validation_status(token) == "not-run-unknown", + f"renderer did not classify {token!r} as not-run-unknown", + ) + require( + validator.project_validation_status(f"- Project validation: {token}\n") == "not-run-unknown", + f"validator did not classify {token!r} as not-run-unknown", + ) + reason_with_passed = "- Project validation: failed-expected: known failure; previous lint passed\n" + require( + validator.project_validation_status(reason_with_passed) == "failed-expected", + "validator should parse canonical status before reason text containing passed", + ) + + +def test_renderer_legacy_mixed_pass_fail_stays_unsafe() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = repo / "savepoint-input.json" + input_path.write_text( + """{ + "goal": "finish mixed legacy validation handling", + "current_state": "legacy validation has both passing and failing commands", + "next_action": "report the failing validation before continuing", + "project_validation": [ + { + "command": "npm run lint", + "result": "passed", + "summary": "lint passed" + }, + { + "command": "npm test", + "result": "failed", + "summary": "auth tests failed" + } + ] +} +""", + encoding="utf-8", + ) + result = run( + [ + sys.executable, + str(RENDER_HELPER), + "--input", + str(input_path), + "--assert-no-active-commands", + "--scan-redaction", + "--run-savepoint-validation", + ], + repo, + ) + require(result.returncode == 2, "mixed pass/fail legacy validation should keep output unsafe") + text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") + require("validation-failed-blocking" in text, "mixed validation blocker missing") + require("RESUME_READY: no" in text, "mixed validation must block resume-ready") + validation = run([sys.executable, str(VALIDATOR), str(repo / ".savepoint" / "SAVEPOINT.md")], repo) + require(validation.returncode == 0, validation.stderr or validation.stdout) + + +def test_renderer_passed_project_validation_requires_complete_command_fields() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = repo / "savepoint-input.json" + input_path.write_text( + """{ + "goal": "finish complete validation command enforcement", + "current_state": "structured validation is marked passed but omits command details", + "next_action": "record the exact validation command before continuing", + "validation": { + "project": { + "status": "passed", + "commands": [ + { + "result": "passed" + } + ] + } + } +} +""", + encoding="utf-8", + ) + result = run( + [ + sys.executable, + str(RENDER_HELPER), + "--input", + str(input_path), + "--assert-no-active-commands", + "--scan-redaction", + "--run-savepoint-validation", + ], + repo, + ) + require(result.returncode == 2, "passed validation without command fields should stay unsafe") + text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") + require("validation-command-missing" in text, "incomplete passed validation blocker missing") + require("RESUME_READY: no" in text, "incomplete passed validation must block resume-ready") + + +def test_renderer_structured_passed_validation_with_failure_text_stays_unsafe() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = repo / "savepoint-input.json" + input_path.write_text( + """{ + "goal": "finish structured validation consistency", + "current_state": "structured validation status says passed but summary records failure", + "next_action": "report validation failure before continuing", + "validation": { + "project": { + "status": "passed", + "commands": [ + { + "command": "npm test", + "result": "passed", + "summary": "auth tests failed" + } + ] + } + } +} +""", + encoding="utf-8", + ) + result = run( + [ + sys.executable, + str(RENDER_HELPER), + "--input", + str(input_path), + "--assert-no-active-commands", + "--scan-redaction", + "--run-savepoint-validation", + ], + repo, + ) + require(result.returncode == 2, "passed validation with failure text should stay unsafe") + text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") + require("validation-failed-blocking" in text, "structured pass/fail contradiction blocker missing") + require("RESUME_READY: no" in text, "structured pass/fail contradiction must block resume-ready") + + +def test_renderer_structured_passed_validation_with_failing_text_stays_unsafe() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = repo / "savepoint-input.json" + input_path.write_text( + """{ + "goal": "finish structured validation consistency", + "current_state": "structured validation status says passed but summary records failing tests", + "next_action": "report validation failure before continuing", + "validation": { + "project": { + "status": "passed", + "commands": [ + { + "command": "npm test", + "result": "passed", + "summary": "auth tests are failing" + } + ] + } + } +} +""", + encoding="utf-8", + ) + result = run( + [ + sys.executable, + str(RENDER_HELPER), + "--input", + str(input_path), + "--assert-no-active-commands", + "--scan-redaction", + "--run-savepoint-validation", + ], + repo, + ) + require(result.returncode == 2, "passed validation with failing text should stay unsafe") + text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") + require("validation-failed-blocking" in text, "structured passed/failing contradiction blocker missing") + require("RESUME_READY: no" in text, "structured passed/failing contradiction must block resume-ready") + + def test_renderer_not_run_justified_project_validation_can_resume_ready() -> None: with tempfile.TemporaryDirectory() as tmp: repo = make_repo_with_modified_app(Path(tmp)) @@ -935,6 +1150,9 @@ def test_savepoint_cli_save_validate_and_inspect() -> None: parsed = json.loads(inspected.stdout) require(parsed["RESUME_READY"] == "yes", "inspect JSON should report resume-ready") require(parsed["SAVEPOINT_MODE"] == "file", "inspect JSON should report file mode") + require(parsed["savepoint_validation"] == "passed", "inspect JSON should include savepoint validation status") + require(parsed["project_validation"]["status"] == "passed", "inspect JSON should include project validation status") + require("next_command" in parsed["project_validation"], "inspect JSON should include project validation next command") def test_savepoint_cli_init_input_defaults_to_unknown_validation() -> None: @@ -1034,6 +1252,17 @@ def test_savepoint_cli_inspect_missing_or_not_savepoint_returns_2() -> None: require(parsed["errors"], "unreadable UTF-8 JSON should include errors") +def test_savepoint_cli_validate_directory_returns_error() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo(Path(tmp)) + directory = repo / ".savepoint" + directory.mkdir() + result = run([sys.executable, str(SAVEPOINT_CLI), "validate", str(directory)], repo) + require(result.returncode != 0, "validate should reject directory paths") + require("not a file" in result.stderr, "validate directory error should name non-file path") + require("Traceback" not in result.stderr, "validate directory error should not print traceback") + + def test_root_savepoint_cli_forwards_to_portable_cli() -> None: with tempfile.TemporaryDirectory() as tmp: repo = make_repo_with_modified_app(Path(tmp)) @@ -1066,6 +1295,10 @@ def test_savepoint_cli_text_mode_does_not_write_recovery_artifact() -> None: require(result.returncode == 0, result.stderr or result.stdout) require("No file was written." in result.stdout, "text mode should say no file was written") require("Repo recovery is not guaranteed." in result.stdout, "text mode should avoid recovery guarantees") + require("Blockers:" in result.stdout, "text mode should include blockers") + require("Risks:" in result.stdout, "text mode should include risks") + require("Files to inspect first:" in result.stdout, "text mode should include first files") + require("Validation:" in result.stdout, "text mode should include validation posture") require("SAVEPOINT_V1" not in result.stdout, "text mode should not emit machine marker by default") require("RESUME_READY: yes" not in result.stdout, "text mode must not claim resume-ready") require(not (repo / ".savepoint" / "SAVEPOINT.md").exists(), "text mode wrote a recovery artifact") @@ -1252,6 +1485,66 @@ def test_compact_validator_rejects_expected_failure_without_next_validation() -> require("next validation" in validation.stderr, "missing next validation error not reported") +def test_compact_validator_rejects_not_run_justified_without_reason() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo(Path(tmp)) + output = write_compact_resume_ready_savepoint( + repo, + project_validation="not-run-justified", + skipped_checks="python scripts/check-savepoint-renderer.py", + ) + validation = run([sys.executable, str(VALIDATOR), str(output)], repo) + require(validation.returncode != 0, "compact validator accepted justified not-run without reason") + require("requires a reason" in validation.stderr, "missing reason error not reported") + + +def test_compact_validator_rejects_expected_failure_without_reason() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo(Path(tmp)) + output = write_compact_resume_ready_savepoint( + repo, + project_validation="failed-expected", + skipped_checks="python -m pytest tests/auth", + ) + validation = run([sys.executable, str(VALIDATOR), str(output)], repo) + require(validation.returncode != 0, "compact validator accepted expected failure without reason") + require("requires a reason" in validation.stderr, "missing expected-failure reason error not reported") + + +def test_compact_validator_rejects_passed_validation_with_failure_text() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo(Path(tmp)) + output = write_compact_resume_ready_savepoint( + repo, + project_validation="passed: `npm test` - auth tests failed", + skipped_checks="none", + ) + validation = run([sys.executable, str(VALIDATOR), str(output)], repo) + require(validation.returncode != 0, "compact validator accepted passed validation with failure text") + require("cannot include failure terms" in validation.stderr, "passed validation failure-text error not reported") + + +def test_compact_validator_status_parsing_is_hash_seed_stable() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo(Path(tmp)) + output = write_compact_resume_ready_savepoint( + repo, + project_validation="failed-expected: known failure; previous lint passed", + skipped_checks="python -m pytest tests/auth", + ) + for seed in ["0", "3", "42"]: + env = {**os.environ, "PYTHONHASHSEED": seed} + validation = subprocess.run( + [sys.executable, str(VALIDATOR), str(output)], + cwd=repo, + text=True, + capture_output=True, + check=False, + env=env, + ) + require(validation.returncode == 0, f"validator status parsing changed under PYTHONHASHSEED={seed}: {validation.stderr or validation.stdout}") + + def test_compact_validator_still_requires_disk_snapshot_fields() -> None: with tempfile.TemporaryDirectory() as tmp: repo = make_repo(Path(tmp)) @@ -1444,6 +1737,11 @@ def main() -> int: test_renderer_minimal_json_without_project_validation_stays_unsafe, test_renderer_exit_code_uses_marker_not_body_resume_ready_text, test_renderer_failed_project_validation_stays_unsafe, + test_validation_status_token_matrix_is_consistent, + test_renderer_legacy_mixed_pass_fail_stays_unsafe, + test_renderer_passed_project_validation_requires_complete_command_fields, + test_renderer_structured_passed_validation_with_failure_text_stays_unsafe, + test_renderer_structured_passed_validation_with_failing_text_stays_unsafe, test_renderer_not_run_justified_project_validation_can_resume_ready, test_renderer_failed_expected_project_validation_can_resume_ready, test_renderer_not_run_justified_without_next_validation_stays_unsafe, @@ -1460,6 +1758,7 @@ def main() -> int: test_savepoint_cli_inspect_json_reports_invalid_marker, test_savepoint_cli_inspect_json_requires_valid_savepoint_for_resume_ready, test_savepoint_cli_inspect_missing_or_not_savepoint_returns_2, + test_savepoint_cli_validate_directory_returns_error, test_root_savepoint_cli_forwards_to_portable_cli, test_savepoint_cli_text_mode_does_not_write_recovery_artifact, test_validator_accepts_compact_resume_ready_file_without_repetitive_sections, @@ -1468,6 +1767,10 @@ def main() -> int: test_compact_validator_rejects_not_run_unknown_project_validation, test_compact_validator_rejects_failed_blocking_project_validation, test_compact_validator_rejects_expected_failure_without_next_validation, + test_compact_validator_rejects_not_run_justified_without_reason, + test_compact_validator_rejects_expected_failure_without_reason, + test_compact_validator_rejects_passed_validation_with_failure_text, + test_compact_validator_status_parsing_is_hash_seed_stable, test_compact_validator_still_requires_disk_snapshot_fields, test_compact_validator_rejects_skipped_none_without_project_pass, test_compact_validator_requires_redaction_evidence, diff --git a/scripts/validate-repo.py b/scripts/validate-repo.py index 6d5cc3a..380bb99 100644 --- a/scripts/validate-repo.py +++ b/scripts/validate-repo.py @@ -253,6 +253,8 @@ def validate_frontmatter(self) -> None: "sql", "ordinary summaries", "direct code/docs edits without checkpoint intent", + "pty/session control", + "session rotation", "/new", "/status", ]: @@ -340,6 +342,11 @@ def validate_references(self) -> None: ]: if phrase not in template_text: self.fail(f"savepoint-template.md missing phrase: {phrase}") + skill_template_text = self.read(SKILL_REFERENCE_DIR / "template.md") + marker_index = skill_template_text.find("END_SAVEPOINT_V1") + closing_index = skill_template_text.rfind("```\n````") + if marker_index == -1 or closing_index == -1 or closing_index < marker_index: + self.fail("skills/savepoint/references/template.md must keep the SAVEPOINT_V1 marker inside the copyable template fence") context_text = self.read(REFERENCE_DIR / "context-packaging.md") for phrase in [ "Budget guidance is advisory, not a validation rule.", @@ -375,6 +382,7 @@ def validate_readme_format(self) -> None: ".savepoint/SAVEPOINT.md", "scripts/savepoint.py", "scripts/validate-repo.py", + "python3 -m compileall", ]: if phrase not in readme_text: self.fail(f"README.md missing entry: {phrase}") @@ -389,6 +397,7 @@ def validate_readme_format(self) -> None: ".savepoint/SAVEPOINT.md", "scripts/savepoint.py", "scripts/validate-repo.py", + "python3 -m compileall", ]: if phrase not in readme_ko_text: self.fail(f"README.ko.md missing entry: {phrase}") @@ -400,10 +409,11 @@ def validate_agent_metadata(self) -> None: 'display_name: "Savepoint"', "short_description:", "default_prompt:", - "allow_implicit_invocation: true", ]: if phrase not in text: self.fail(f"agents/openai.yaml missing phrase: {phrase}") + if "allow_implicit_invocation:" in text and "allow_implicit_invocation: true" not in text: + self.fail("agents/openai.yaml allow_implicit_invocation must be true when present") match = re.search(r'(?m)^\s*default_prompt:\s*"([^"]+)"\s*$', text) if not match: self.fail("agents/openai.yaml default_prompt must be a quoted single-line string") diff --git a/skills/savepoint/SKILL.md b/skills/savepoint/SKILL.md index 92d4f4b..3be4df1 100644 --- a/skills/savepoint/SKILL.md +++ b/skills/savepoint/SKILL.md @@ -1,6 +1,6 @@ --- name: savepoint -description: "Create or load a recoverable coding-session checkpoint at .savepoint/SAVEPOINT.md so a fresh agent can resume from current repo/Git state. Use for context reset, session transfer, 세이브포인트 만들어줘, 세이브포인트 로드해줘, 세이브포인트 읽어줘, 세이브포인트 이어서 해줘. Not for SQL SAVEPOINT, ordinary summaries, direct code/docs edits without checkpoint intent, /status, /new, or app features named savepoint." +description: "Create or load a recoverable coding-session checkpoint at .savepoint/SAVEPOINT.md so a fresh agent can resume from current repo/Git state. Use for context reset, session transfer, 세이브포인트 만들어줘, 세이브포인트 로드해줘, 세이브포인트 읽어줘, 세이브포인트 이어서 해줘. Not for SQL SAVEPOINT, ordinary summaries, direct code/docs edits without checkpoint intent, /status, /new, PTY/session control, session rotation, or app features named savepoint." argument-hint: "[save|load|text] [next-session focus]" --- @@ -33,11 +33,10 @@ Native slash-command support depends on the client. If slash prompts are not pas ## Create / Save -1. Use the provided focus text, if any, only to narrow the next action. -2. Capture repo state: cwd, Git root, branch, short HEAD, `git status --short`, diff stat, name-status, staged stat, staged name-status, latest commit, instruction files, and durable state files. -3. Write compact input JSON with at least `goal`, `current_state`, `next_action`, `files_to_inspect_first`, `unresolved_blockers`, and `validation.project.status`. If starting from blank, use `python3 /scripts/savepoint.py init-input --output .savepoint/input.json`; its default project validation status is `not-run-unknown`. -4. Use project validation status exactly as one of `passed`, `failed-expected`, `failed-blocking`, `not-run-justified`, or `not-run-unknown`. For `failed-expected` or `not-run-justified`, include a reason and next validation command. -5. Run: +1. Treat provided focus text, if any, only as next-session focus. +2. Capture repo/Git state and write compact input JSON with `goal`, `current_state`, `next_action`, `files_to_inspect_first`, and `unresolved_blockers`; start with `python3 /scripts/savepoint.py init-input --output .savepoint/input.json` if blank. +3. Set `validation.project.status` to one of `passed`, `failed-expected`, `failed-blocking`, `not-run-justified`, or `not-run-unknown`. For `failed-expected` or `not-run-justified`, include a reason and next validation command. +4. Run: ```bash python3 /scripts/savepoint.py save --input .savepoint/input.json --output .savepoint/SAVEPOINT.md --assert-no-active-commands --scan-redaction --validate @@ -45,11 +44,10 @@ python3 /scripts/savepoint.py save --input .savepoint/input Inside this repository, `python3 scripts/savepoint.py save ...` also works. -6. For refresh, append `--force` only when the existing file is the generated, untracked, valid default artifact `.savepoint/SAVEPOINT.md` and the user did not ask to preserve history; otherwise preserve or ask. -7. Inspect only the generated `.savepoint/SAVEPOINT.md`. -8. Report exact path, `RESUME_READY`, blockers if any, and the first next action. +5. Inspect only the generated `.savepoint/SAVEPOINT.md`. +6. Report exact path, `RESUME_READY`, blockers if any, and the first next action. -Renderer exit code `2` can still mean a not-ready `SAVEPOINT.md` was written. Inspect the file, report blockers, and do not continue unless `RESUME_READY: yes`. +`savepoint.py save` exit code `2` can still mean a not-ready `SAVEPOINT.md` was written. Inspect the file, report blockers, and do not continue unless `RESUME_READY: yes`. ## Load / Resume @@ -75,6 +73,8 @@ Text mode must not claim `.savepoint/SAVEPOINT.md` was written, repo recovery is Read `references/contract.md` only when marker semantics, cleanup, stale savepoints, detail spillover, overwrite adoption, or safe-resume edge cases are unclear. +For refresh, append `--force` only when the existing file is the generated, untracked, valid default artifact `.savepoint/SAVEPOINT.md` and the user did not ask to preserve history; otherwise preserve or ask. + Read `references/safety.md` only when secret redaction or secret-like paths are involved. Read `references/template.md` only when the renderer is unavailable and a manual artifact is unavoidable. diff --git a/skills/savepoint/agents/openai.yaml b/skills/savepoint/agents/openai.yaml index ffd1540..21fc640 100644 --- a/skills/savepoint/agents/openai.yaml +++ b/skills/savepoint/agents/openai.yaml @@ -1,7 +1,4 @@ interface: display_name: "Savepoint" short_description: "Recoverable coding-session checkpoint" - default_prompt: "Use $savepoint to create, load, verify, or text copy-paste .savepoint/SAVEPOINT.md with repo/Git state, redaction, validation posture, and a resume prompt." - -policy: - allow_implicit_invocation: true + default_prompt: "Use $savepoint to create or load .savepoint/SAVEPOINT.md, verify repo/Git resume safety, or produce a text-only copy-paste handoff without file recovery." diff --git a/skills/savepoint/references/template.md b/skills/savepoint/references/template.md index 83a90b5..337dfdf 100644 --- a/skills/savepoint/references/template.md +++ b/skills/savepoint/references/template.md @@ -58,7 +58,7 @@ Use only when `savepoint.py save` is unavailable. ## Resume Prompt ```text Read this savepoint, verify cwd/Git state/status/diff, read listed instruction/state files, and compare all claims with disk state. Disk state wins. Report drift first, then continue only if the user requested continuation and RESUME_READY is yes. -```` +``` ## Markers ```text @@ -74,4 +74,4 @@ RESUME_READY: yes|no BLOCKERS: none| END_SAVEPOINT_V1 ``` -``` +```` diff --git a/skills/savepoint/scripts/render_savepoint.py b/skills/savepoint/scripts/render_savepoint.py index fca373e..e932ebd 100644 --- a/skills/savepoint/scripts/render_savepoint.py +++ b/skills/savepoint/scripts/render_savepoint.py @@ -30,6 +30,8 @@ } PROJECT_VALIDATION_NEXT_REQUIRED = {"failed-expected", "not-run-justified"} CLEAR_BLOCKER_VALUES = {"none", "no", "not-needed", "not needed"} +VALIDATION_FAILURE_RE = re.compile(r"\b(fail|fails|failed|failing|failure|error|errors|not-run|not run|skipped)\b") +VALIDATION_BLOCKING_FAILURE_RE = re.compile(r"\b(fail|fails|failed|failing|failure|error|errors)\b") def parse_args(argv: list[str]) -> argparse.Namespace: @@ -117,33 +119,47 @@ def project_validation_command_entries(value: Any) -> list[str]: return entries +def project_validation_command_complete(item: Any) -> bool: + if not isinstance(item, dict): + return False + return all( + clean_text(item.get(field), fallback="") != "" + for field in ["command", "result", "summary"] + ) + + +def project_validation_commands_complete(value: Any) -> bool: + if not isinstance(value, list) or not value: + return False + return all(project_validation_command_complete(item) for item in value) + + def project_validation_passed(value: Any) -> bool: if not isinstance(value, list): return False + saw_valid_entry = False for item in value: - if not isinstance(item, dict): - continue + if not project_validation_command_complete(item): + return False result = clean_text(item.get("result"), fallback="").lower() summary = clean_text(item.get("summary"), fallback="").lower() combined = f"{result} {summary}" - if re.search(r"\b(pass|passed|ok|success|succeeded)\b", combined) and not re.search( - r"\b(fail|failed|error|not-run|not run|skipped)\b", - combined, - ): - return True - return False + passed = bool(re.search(r"\b(pass|passed|ok|success|succeeded)\b", combined)) and not VALIDATION_FAILURE_RE.search(combined) + if passed: + saw_valid_entry = True + continue + if not passed: + return False + return saw_valid_entry def normalize_project_validation_status(value: Any) -> str: status = clean_text(value, fallback="").lower().replace("_", "-") if status in PROJECT_VALIDATION_STATUSES: return status - if re.search(r"\b(pass|passed|ok|success|succeeded)\b", status) and not re.search( - r"\b(fail|failed|error|not-run|not run|skipped)\b", - status, - ): + if re.search(r"\b(pass|passed|ok|success|succeeded)\b", status) and not VALIDATION_FAILURE_RE.search(status): return "passed" - if re.search(r"\b(fail|failed|error)\b", status): + if VALIDATION_BLOCKING_FAILURE_RE.search(status): return "failed-blocking" if re.search(r"\b(not-run|not run|skipped)\b", status): return "not-run-unknown" @@ -155,22 +171,33 @@ def project_validation_posture(data: dict[str, Any]) -> dict[str, Any]: project = validation.get("project") if isinstance(validation, dict) else None if isinstance(project, dict): status = normalize_project_validation_status(project.get("status")) - commands = project_validation_command_entries(project.get("commands")) + raw_commands = project.get("commands") + commands = project_validation_command_entries(raw_commands) reason = clean_text(project.get("reason"), fallback="") next_validation = clean_text( project.get("next_validation", project.get("next_command", project.get("next"))), fallback="", ) + if ( + status == "passed" + and commands + and project_validation_commands_complete(raw_commands) + and not project_validation_passed(raw_commands) + ): + status = "failed-blocking" + reason = reason or clean_text(commands[0], fallback="project validation failed") return { "status": status, "commands": commands, "reason": reason, "next_validation": next_validation, "source": "validation.project", + "commands_complete": project_validation_commands_complete(raw_commands), } legacy = data.get("project_validation") commands = project_validation_command_entries(legacy) + commands_complete = project_validation_commands_complete(legacy) next_validation = clean_text(data.get("skipped_checks_next_validation"), fallback="") if not commands: return { @@ -179,6 +206,7 @@ def project_validation_posture(data: dict[str, Any]) -> dict[str, Any]: "reason": "", "next_validation": next_validation, "source": "legacy", + "commands_complete": False, } if project_validation_passed(legacy): return { @@ -187,6 +215,7 @@ def project_validation_posture(data: dict[str, Any]) -> dict[str, Any]: "reason": "", "next_validation": next_validation, "source": "legacy", + "commands_complete": commands_complete, } combined = " ".join(commands).lower() @@ -198,6 +227,7 @@ def project_validation_posture(data: dict[str, Any]) -> dict[str, Any]: "reason": reason, "next_validation": next_validation, "source": "legacy", + "commands_complete": commands_complete, } return { "status": "failed-blocking", @@ -205,6 +235,7 @@ def project_validation_posture(data: dict[str, Any]) -> dict[str, Any]: "reason": clean_text(commands[0], fallback="project validation failed"), "next_validation": next_validation, "source": "legacy", + "commands_complete": commands_complete, } @@ -227,7 +258,7 @@ def project_validation_entries(data: dict[str, Any]) -> list[str]: def project_validation_recorded(posture: dict[str, Any]) -> bool: status = posture["status"] if status == "passed": - return bool(posture["commands"]) + return bool(posture["commands"]) and bool(posture.get("commands_complete")) if status == "failed-blocking": return bool(posture["commands"] or posture["reason"]) if status == "failed-expected": @@ -374,7 +405,7 @@ def blockers_for(data: dict[str, Any], args: argparse.Namespace, redaction_ok: b blockers.append("validation-reason-missing") if not posture["next_validation"]: blockers.append("validation-next-command-missing") - elif posture["status"] == "passed" and not posture["commands"]: + elif posture["status"] == "passed" and not posture.get("commands_complete"): blockers.append("validation-command-missing") if not args.run_savepoint_validation: blockers.append("savepoint-validation-not-run") diff --git a/skills/savepoint/scripts/savepoint.py b/skills/savepoint/scripts/savepoint.py index fe8edbe..513f1a6 100644 --- a/skills/savepoint/scripts/savepoint.py +++ b/skills/savepoint/scripts/savepoint.py @@ -10,7 +10,16 @@ import render_savepoint import validate_savepoint -from render_savepoint import clean_text, next_action_text, read_input, redact_secret_patterns +from render_savepoint import ( + clean_text, + inline_or_block, + list_items, + next_action_text, + project_validation_entries, + read_input, + redact_secret_patterns, + unresolved_blockers_text, +) from savepoint_contract import extract_marker_values @@ -86,11 +95,11 @@ def run_validate(args: argparse.Namespace) -> int: def run_inspect(args: argparse.Namespace) -> int: - if not args.savepoint.exists(): + if not args.savepoint.is_file(): if args.json: - print(json.dumps(inspect_payload(args.savepoint, {}, [f"file does not exist: {args.savepoint}"], []), ensure_ascii=True, indent=2, sort_keys=True)) + print(json.dumps(inspect_payload(args.savepoint, {}, [f"file does not exist or is not a file: {args.savepoint}"], []), ensure_ascii=True, indent=2, sort_keys=True)) else: - print(f"error: file does not exist: {args.savepoint}", file=sys.stderr) + print(f"error: file does not exist or is not a file: {args.savepoint}", file=sys.stderr) return 2 try: text = args.savepoint.read_text(encoding="utf-8") @@ -103,7 +112,7 @@ def run_inspect(args: argparse.Namespace) -> int: values, errors = extract_marker_values(args.savepoint, text) if not values and any("found 0" in error for error in errors): if args.json: - print(json.dumps(inspect_payload(args.savepoint, values, errors, []), ensure_ascii=True, indent=2, sort_keys=True)) + print(json.dumps(inspect_payload(args.savepoint, values, errors, [], text=text), ensure_ascii=True, indent=2, sort_keys=True)) else: for error in errors: print(f"error: {error}", file=sys.stderr) @@ -111,7 +120,7 @@ def run_inspect(args: argparse.Namespace) -> int: validation_errors = [] if errors else validate_savepoint.validate_savepoint(args.savepoint) exit_code = 0 if not errors and not validation_errors else 1 if args.json: - print(json.dumps(inspect_payload(args.savepoint, values, errors, validation_errors), ensure_ascii=True, indent=2, sort_keys=True)) + print(json.dumps(inspect_payload(args.savepoint, values, errors, validation_errors, text=text), ensure_ascii=True, indent=2, sort_keys=True)) return exit_code if errors: for error in errors: @@ -126,12 +135,21 @@ def run_inspect(args: argparse.Namespace) -> int: return 0 -def inspect_payload(path: Path, values: dict[str, str], marker_errors: list[str], validation_errors: list[str]) -> dict[str, object]: +def inspect_payload( + path: Path, + values: dict[str, str], + marker_errors: list[str], + validation_errors: list[str], + *, + text: str = "", +) -> dict[str, object]: blocker_text = values.get("BLOCKERS", "") blockers = [] if blocker_text in {"", "none"} else [item for item in blocker_text.split(",") if item] errors = [*marker_errors, *validation_errors] marker_valid = bool(values) and not marker_errors savepoint_valid = marker_valid and not validation_errors + project_status = validate_savepoint.project_validation_status(text) if text and marker_valid else "unknown" + next_command = validate_savepoint.field_value_or_block(text, "- Skipped checks / next validation:") if text and marker_valid else "" return { **values, "path": str(path), @@ -141,6 +159,11 @@ def inspect_payload(path: Path, values: dict[str, str], marker_errors: list[str] "marker_valid": marker_valid, "savepoint_valid": savepoint_valid, "details_ready": values.get("DETAILS_READY"), + "savepoint_validation": "passed" if savepoint_valid else "failed", + "project_validation": { + "status": project_status, + "next_command": next_command, + }, "validation_recorded": values.get("VALIDATION_RECORDED") == "yes", "redaction_checked": values.get("REDACTION_CHECKED") == "yes", "errors": errors, @@ -190,6 +213,11 @@ def run_text(args: argparse.Namespace) -> int: Goal: {clean_text(data.get("goal"))} Current state: {clean_text(data.get("current_state"))} Next action: {next_action_text(data)} +Blockers: {unresolved_blockers_text(data)} +Risks: {inline_or_block(list_items(data.get("risks")), empty="none")} +Files to inspect first: {inline_or_block(list_items(data.get("files_to_inspect_first")), empty="none")} +Relevant artifacts: {inline_or_block(list_items(data.get("relevant_artifacts", data.get("artifacts"))), empty="none")} +Validation: {inline_or_block(project_validation_entries(data), empty="not-run-unknown: no project validation reason or next validation recorded")} No file was written. Repo recovery is not guaranteed. diff --git a/skills/savepoint/scripts/validate_savepoint.py b/skills/savepoint/scripts/validate_savepoint.py index d011b29..584d670 100644 --- a/skills/savepoint/scripts/validate_savepoint.py +++ b/skills/savepoint/scripts/validate_savepoint.py @@ -107,7 +107,15 @@ "not-run-justified", "not-run-unknown", } +PROJECT_VALIDATION_STATUS_ORDER = [ + "failed-expected", + "failed-blocking", + "not-run-justified", + "not-run-unknown", + "passed", +] PROJECT_VALIDATION_NEXT_REQUIRED = {"failed-expected", "not-run-justified"} +VALIDATION_FAILURE_RE = re.compile(r"\b(fail|fails|failed|failing|failure|error|errors|not-run|not run|skipped)\b") def validate_savepoint(path: Path, allow_example_paths: bool = False) -> list[str]: @@ -231,26 +239,46 @@ def validate_validation_status(path: Path, text: str) -> list[str]: errors.append(f"{path}: RESUME_READY=yes cannot use Project validation status not-run-unknown") elif status == "failed-blocking": errors.append(f"{path}: RESUME_READY=yes cannot use Project validation status failed-blocking") - elif status in PROJECT_VALIDATION_NEXT_REQUIRED and skipped_absent: - errors.append( - f"{path}: Project validation status {status} requires a next validation command" - ) + elif status == "passed" and passed_validation_has_failure_terms(text): + errors.append(f"{path}: Project validation status passed cannot include failure terms") + elif status in PROJECT_VALIDATION_NEXT_REQUIRED: + if not project_validation_reason_present(text, status): + errors.append(f"{path}: Project validation status {status} requires a reason") + if skipped_absent: + errors.append( + f"{path}: Project validation status {status} requires a next validation command" + ) return errors +def passed_validation_has_failure_terms(text: str) -> bool: + value = field_value_or_block(text, "- Project validation:").lower().replace("_", "-") + if "passed" not in value: + return False + return bool(VALIDATION_FAILURE_RE.search(value)) + + +def project_validation_reason_present(text: str, status: str) -> bool: + value = field_value_or_block(text, "- Project validation:") + normalized = value.lower().replace("_", "-") + index = normalized.find(status) + if index == -1: + return False + reason = value[index + len(status):].strip(" :-`\n\t") + return not is_placeholder_value(reason, allow_absence=False) + + def project_validation_status(text: str) -> str: value = field_value_or_block(text, "- Project validation:").lower().replace("_", "-") - for status in PROJECT_VALIDATION_STATUSES: - if status in value: + lead = value.strip().splitlines()[0] if value.strip() else "" + for status in PROJECT_VALIDATION_STATUS_ORDER: + if re.match(rf"^`?{re.escape(status)}\b", lead): return status - if re.search(r"\b(pass|passed|ok|success|succeeded)\b", value) and not re.search( - r"\b(fail|failed|error|not-run|not run|skipped)\b", - value, - ): + if re.search(r"\b(pass|passed|ok|success|succeeded)\b", lead) and not VALIDATION_FAILURE_RE.search(lead): return "passed" - if re.search(r"\b(fail|failed|error)\b", value): + if re.search(r"\b(fail|fails|failed|failing|failure|error|errors)\b", lead): return "failed-blocking" - if re.search(r"\b(not-run|not run|skipped)\b", value): + if re.search(r"\b(not-run|not run|skipped)\b", lead): return "not-run-unknown" return "not-run-unknown" @@ -332,8 +360,8 @@ def main(argv: list[str] | None = None) -> int: errors: list[str] = [] for path in args.savepoints: - if not path.exists(): - errors.append(f"{path}: file does not exist") + if not path.is_file(): + errors.append(f"{path}: file does not exist or is not a file") continue errors.extend(validate_savepoint(path, allow_example_paths=args.allow_example_paths)) From bdddac89a42aa66df7a5bd8bde6bb7465cdcebbe Mon Sep 17 00:00:00 2001 From: Injae Date: Fri, 12 Jun 2026 14:17:54 +0900 Subject: [PATCH 4/7] fix(savepoint): remove legacy compatibility surface --- AGENTS.md | 2 +- README.ko.md | 4 +- README.md | 4 +- docs/reference/savepoint-contract.md | 4 +- examples/SAVEPOINT.filled.example.md | 2 +- examples/file-architecture/SAVEPOINT.md | 2 +- examples/file-bugfix/SAVEPOINT.md | 2 +- examples/unsafe-savepoint/SAVEPOINT.md | 2 +- scripts/check-savepoint-renderer.py | 200 ++++++++++-------- scripts/render_savepoint.py | 18 -- scripts/validate-repo.py | 6 +- scripts/validate_savepoint.py | 18 -- skills/savepoint/scripts/render_savepoint.py | 84 +++----- skills/savepoint/scripts/savepoint.py | 12 +- .../savepoint/scripts/validate_savepoint.py | 2 +- 15 files changed, 163 insertions(+), 199 deletions(-) delete mode 100644 scripts/render_savepoint.py delete mode 100644 scripts/validate_savepoint.py diff --git a/AGENTS.md b/AGENTS.md index f6f3b9f..79baa2e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -34,6 +34,6 @@ python3 scripts/validate-examples.py python3 scripts/validate-repo.py python3 scripts/check-savepoint-renderer.py python3 scripts/check-install-helper.py -python3 scripts/validate_savepoint.py --allow-example-paths examples/SAVEPOINT.filled.example.md examples/file-bugfix/SAVEPOINT.md examples/file-architecture/SAVEPOINT.md examples/unsafe-savepoint/SAVEPOINT.md +python3 scripts/savepoint.py validate --allow-example-paths examples/SAVEPOINT.filled.example.md examples/file-bugfix/SAVEPOINT.md examples/file-architecture/SAVEPOINT.md examples/unsafe-savepoint/SAVEPOINT.md git diff --check ``` diff --git a/README.ko.md b/README.ko.md index 57f08aa..0246138 100644 --- a/README.ko.md +++ b/README.ko.md @@ -58,7 +58,7 @@ python3 scripts/savepoint.py inspect .savepoint/SAVEPOINT.md --json python3 scripts/savepoint.py text --input .savepoint/input.json ``` -portable skill entrypoint는 `skills/savepoint/scripts/savepoint.py`입니다. 기존 `scripts/render_savepoint.py`, `scripts/validate_savepoint.py` wrapper는 호환성을 위해 유지합니다. +portable skill entrypoint는 `skills/savepoint/scripts/savepoint.py`입니다. repository-local 명령은 `scripts/savepoint.py`를 사용합니다. `inspect --json`은 파일과 marker가 valid이면 `0`, savepoint-like 파일을 읽었지만 invalid이면 `1`, 파일을 읽을 수 없거나 savepoint artifact가 아니면 `2`로 종료합니다. @@ -110,7 +110,7 @@ python3 scripts/validate-examples.py python3 scripts/validate-repo.py python3 scripts/check-savepoint-renderer.py python3 scripts/check-install-helper.py -python3 scripts/validate_savepoint.py --allow-example-paths examples/SAVEPOINT.filled.example.md examples/file-bugfix/SAVEPOINT.md examples/file-architecture/SAVEPOINT.md examples/unsafe-savepoint/SAVEPOINT.md +python3 scripts/savepoint.py validate --allow-example-paths examples/SAVEPOINT.filled.example.md examples/file-bugfix/SAVEPOINT.md examples/file-architecture/SAVEPOINT.md examples/unsafe-savepoint/SAVEPOINT.md python3 -m compileall -q skills/savepoint/scripts scripts git diff --check ``` diff --git a/README.md b/README.md index 45e2eca..b531790 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ python3 scripts/savepoint.py inspect .savepoint/SAVEPOINT.md --json python3 scripts/savepoint.py text --input .savepoint/input.json ``` -The portable skill entrypoint is `skills/savepoint/scripts/savepoint.py`. Legacy wrappers `scripts/render_savepoint.py` and `scripts/validate_savepoint.py` remain for compatibility. +The portable skill entrypoint is `skills/savepoint/scripts/savepoint.py`; repository-local commands use `scripts/savepoint.py`. `inspect --json` exits `0` when the file and marker are valid, `1` when a savepoint-like file is parsed but invalid, and `2` when the file cannot be read or is not a savepoint artifact. @@ -117,7 +117,7 @@ python3 scripts/validate-examples.py python3 scripts/validate-repo.py python3 scripts/check-savepoint-renderer.py python3 scripts/check-install-helper.py -python3 scripts/validate_savepoint.py --allow-example-paths examples/SAVEPOINT.filled.example.md examples/file-bugfix/SAVEPOINT.md examples/file-architecture/SAVEPOINT.md examples/unsafe-savepoint/SAVEPOINT.md +python3 scripts/savepoint.py validate --allow-example-paths examples/SAVEPOINT.filled.example.md examples/file-bugfix/SAVEPOINT.md examples/file-architecture/SAVEPOINT.md examples/unsafe-savepoint/SAVEPOINT.md python3 -m compileall -q skills/savepoint/scripts scripts git diff --check ``` diff --git a/docs/reference/savepoint-contract.md b/docs/reference/savepoint-contract.md index 8d8c71c..28656bc 100644 --- a/docs/reference/savepoint-contract.md +++ b/docs/reference/savepoint-contract.md @@ -55,7 +55,7 @@ Required list labels checked by the validator must begin at column 0. Multi-line values may continue on indented lines under the label. The required shape describes recovery facts, not prose volume; routine savepoints should satisfy fields with terse values unless a concrete risk needs detail. Expanded template sections such as `Recovery Contract`, `Session Target`, and `Remaining Work` are allowed, but they are not required when the same safety facts are represented once elsewhere. -For token-efficient finalized artifacts, `scripts/render_savepoint.py` may render/finalize the Markdown from compact semantic JSON while preserving the v1 marker schema and safety checks. It should derive Git snapshot fields, marker values, redaction status, and savepoint-validation status instead of asking the agent to hand-write them. +For token-efficient finalized artifacts, `scripts/savepoint.py save` may render/finalize the Markdown from compact semantic JSON while preserving the v1 marker schema and safety checks. It should derive Git snapshot fields, marker values, redaction status, and savepoint-validation status instead of asking the agent to hand-write them. After automatic context compaction, an intentional session reset, or an agent transfer, record recovery uncertainty in existing body fields such as `Expected drift`, `Unknown or unverified`, `Required Reading`, and `Recovery Notes`; do not add marker fields or new modes unless the marker schema is intentionally versioned. @@ -220,7 +220,7 @@ Set `RESUME_READY: yes` only when all are true: - the next step is singular, executable, and narrow. - `BLOCKERS: none`. -When file artifacts are written, attempt the bundled savepoint validator (`validate_savepoint.py`) after final artifact edits when it is available. If the validator reports errors, correct them and rerun the validator before completion; a failed savepoint validation makes the savepoint unsafe. Do not claim validation passed unless the command actually ran and passed. +When file artifacts are written, attempt the bundled savepoint validator (`scripts/savepoint.py validate`) after final artifact edits when it is available. If the validator reports errors, correct them and rerun the validator before completion; a failed savepoint validation makes the savepoint unsafe. Do not claim validation passed unless the command actually ran and passed. `RESUME_READY: yes` means a fresh session can reconstruct state and continue. It does not mean tests pass, code is correct, or the task is complete. diff --git a/examples/SAVEPOINT.filled.example.md b/examples/SAVEPOINT.filled.example.md index e2b089e..8e97418 100644 --- a/examples/SAVEPOINT.filled.example.md +++ b/examples/SAVEPOINT.filled.example.md @@ -70,7 +70,7 @@ Read in this order: ## Validation Manifest -- Savepoint validation: `python3 scripts/validate_savepoint.py .savepoint/SAVEPOINT.md` passed. +- Savepoint validation: `python3 scripts/savepoint.py validate .savepoint/SAVEPOINT.md` passed. - Project validation: failed-expected: `npm test -- ReportTable.test.tsx` failed; no matching CSV export assertions yet. - Skipped checks / next validation: full suite not run because focused tests are missing; next `npm test -- ReportTable.test.tsx`, then `npm run lint` if available. - Secret redaction check: manual artifact scan diff --git a/examples/file-architecture/SAVEPOINT.md b/examples/file-architecture/SAVEPOINT.md index df5811e..bbc1162 100644 --- a/examples/file-architecture/SAVEPOINT.md +++ b/examples/file-architecture/SAVEPOINT.md @@ -69,7 +69,7 @@ ## Validation Manifest -- Savepoint validation: `python3 scripts/validate_savepoint.py .savepoint/SAVEPOINT.md` passed. +- Savepoint validation: `python3 scripts/savepoint.py validate .savepoint/SAVEPOINT.md` passed. - Project validation: failed-expected: `npm test -- tests/billing/invoice.integration.test.ts` failed on timestamp formatting; see `details/validation.md`. - Skipped checks / next validation: full suite not run while focused integration test is red; next focused integration test. - Secret redaction check: manual artifact scan diff --git a/examples/file-bugfix/SAVEPOINT.md b/examples/file-bugfix/SAVEPOINT.md index 10b4a70..b7acf10 100644 --- a/examples/file-bugfix/SAVEPOINT.md +++ b/examples/file-bugfix/SAVEPOINT.md @@ -65,7 +65,7 @@ ## Validation Manifest -- Savepoint validation: `python3 scripts/validate_savepoint.py .savepoint/SAVEPOINT.md` passed. +- Savepoint validation: `python3 scripts/savepoint.py validate .savepoint/SAVEPOINT.md` passed. - Project validation: `npm run lint` passed; `npm test -- tests/auth/session.test.ts` passed. - Skipped checks / next validation: none; rerun focused test if editing auth/session behavior. - Secret redaction check: manual artifact scan diff --git a/examples/unsafe-savepoint/SAVEPOINT.md b/examples/unsafe-savepoint/SAVEPOINT.md index 0d8ee02..f4803a4 100644 --- a/examples/unsafe-savepoint/SAVEPOINT.md +++ b/examples/unsafe-savepoint/SAVEPOINT.md @@ -65,7 +65,7 @@ ## Validation Manifest -- Savepoint validation: `python3 scripts/validate_savepoint.py .savepoint/SAVEPOINT.md` passed. +- Savepoint validation: `python3 scripts/savepoint.py validate .savepoint/SAVEPOINT.md` passed. - Project validation: `npm test -- checkout` still running; failure lines Unknown. - Skipped checks / next validation: current check has not completed; wait for `npm test -- checkout` result. - Secret redaction check: manual artifact scan diff --git a/scripts/check-savepoint-renderer.py b/scripts/check-savepoint-renderer.py index aeaf43f..7df6e43 100644 --- a/scripts/check-savepoint-renderer.py +++ b/scripts/check-savepoint-renderer.py @@ -17,7 +17,6 @@ ROOT = Path(__file__).resolve().parents[1] RENDER_HELPER = ROOT / "skills" / "savepoint" / "scripts" / "render_savepoint.py" SAVEPOINT_CLI = ROOT / "skills" / "savepoint" / "scripts" / "savepoint.py" -ROOT_RENDERER = ROOT / "scripts" / "render_savepoint.py" ROOT_SAVEPOINT_CLI = ROOT / "scripts" / "savepoint.py" VALIDATOR = ROOT / "skills" / "savepoint" / "scripts" / "validate_savepoint.py" HELPER_SCRIPT_DIR = RENDER_HELPER.parent @@ -99,20 +98,22 @@ def semantic_input(repo: Path) -> Path: "goal": "finish deterministic savepoint rendering", "current_state": "renderer input has enough semantic fields for a recoverable savepoint", "next_action": "run the focused validation commands", - "done_when": "savepoint validation and project validation are both recorded", - "out_of_scope": "marker schema changes", - "smallest_next_step": "run python scripts/check-savepoint-renderer.py", "decisions": ["keep SAVEPOINT_V1 marker fields and order unchanged"], "risks": ["disk state can drift after the snapshot is captured"], "failed_approaches": "none", "unresolved_blockers": "none", - "project_validation": [ - { - "command": "python scripts/check-savepoint-renderer.py", - "result": "passed", - "summary": "renderer fixture validation recorded" + "validation": { + "project": { + "status": "passed", + "commands": [ + { + "command": "python scripts/check-savepoint-renderer.py", + "result": "passed", + "summary": "renderer fixture validation recorded" + } + ] } - ], + }, "observable_completion": "check-savepoint-renderer exits 0", "inspected_without_change": ["README.md"], "files_to_inspect_first": ["app.py"] @@ -133,13 +134,18 @@ def minimal_semantic_input( ) -> Path: path = repo / "minimal-savepoint-input.json" project_validation = "" if not include_project_validation else """, - "project_validation": [ - { - "command": "python scripts/check-savepoint-renderer.py", - "result": "%s", - "summary": "minimal renderer fixture validation recorded" + "validation": { + "project": { + "status": "%s", + "commands": [ + { + "command": "python scripts/check-savepoint-renderer.py", + "result": "%s", + "summary": "minimal renderer fixture validation recorded" + } + ] } - ]""" % validation_result + }""" % (validation_result, validation_result) next_action = "" if not include_next_action else """, "next_action": "run the focused minimal renderer check\"""" blocker_line = "" if unresolved_blockers is None else f""", @@ -316,7 +322,7 @@ def test_renderer_writes_resume_ready_savepoint_from_json_input() -> None: text = output.read_text(encoding="utf-8") require("Generated deterministic final savepoint." in text, "renderer origin note missing") require("" not in text, "renderer should not leave placeholders") - require("- Next action: run python scripts/check-savepoint-renderer.py" in text, "smallest next step should drive rendered next action") + require("- Next action: run the focused validation commands" in text, "next_action should drive rendered next action") require("- Changed:" in text and "app.py - modified" in text, "changed file was not derived") require("- Created: none" in text, "renderer-generated files should not be listed as created work") require("- Inspected without change: README.md" in text, "semantic inspected file missing") @@ -428,7 +434,7 @@ def test_renderer_accepts_minimal_ready_json_input() -> None: text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") require("RESUME_READY: yes" in text, "minimal ready input should render resume-ready") require("BLOCKERS: none" in text, "minimal ready input should have no blockers") - for forbidden in ["missing-done-when", "missing-out-of-scope", "missing-smallest-next-step", "not recorded"]: + for forbidden in ["missing-done-when", "missing-out-of-scope", "not recorded"]: require(forbidden not in text, f"minimal input should not emit removed optional placeholder {forbidden}") require("disk state wins" in text, "minimal input must retain disk-state-wins safety language") require("minimal-savepoint-input.json - untracked" not in text, "minimal input file should not be listed as created work") @@ -450,13 +456,18 @@ def test_renderer_records_recovery_uncertainty_inputs() -> None: ], "expected_drift": "validation is from before session reset; rerun focused check if files changed", "unknown_unverified": "nested CLAUDE.md for app.py was not read after compaction", - "project_validation": [ - { - "command": "python scripts/check-savepoint-renderer.py", - "result": "passed", - "summary": "uncertainty renderer fixture validation recorded" + "validation": { + "project": { + "status": "passed", + "commands": [ + { + "command": "python scripts/check-savepoint-renderer.py", + "result": "passed", + "summary": "uncertainty renderer fixture validation recorded" + } + ] } - ] + } } """, encoding="utf-8", @@ -545,7 +556,6 @@ def test_renderer_minimal_json_without_project_validation_stays_unsafe() -> None require("validation-not-run-unknown" in text, "unknown project validation blocker missing") require("missing-done-when" not in text, "removed optional done_when should not block readiness") require("missing-out-of-scope" not in text, "removed optional out_of_scope should not block readiness") - require("missing-smallest-next-step" not in text, "removed optional smallest_next_step should not block readiness") validation = run([sys.executable, str(VALIDATOR), str(repo / ".savepoint" / "SAVEPOINT.md")], repo) require(validation.returncode == 0, validation.stderr or validation.stdout) @@ -575,7 +585,7 @@ def test_renderer_exit_code_uses_marker_not_body_resume_ready_text() -> None: def test_renderer_failed_project_validation_stays_unsafe() -> None: with tempfile.TemporaryDirectory() as tmp: repo = make_repo_with_modified_app(Path(tmp)) - input_path = minimal_semantic_input(repo, validation_result="failed") + input_path = minimal_semantic_input(repo, validation_result="failed-blocking") result = run( [ sys.executable, @@ -599,11 +609,16 @@ def test_renderer_failed_project_validation_stays_unsafe() -> None: def test_validation_status_token_matrix_is_consistent() -> None: renderer = load_render_helper() validator = load_validator_helper() + for status in ["passed", "failed-expected", "failed-blocking", "not-run-justified", "not-run-unknown"]: + require( + renderer.normalize_project_validation_status(status) == status, + f"renderer did not accept exact status {status!r}", + ) blocking_tokens = ["fail", "fails", "failed", "failing", "failure", "error", "errors"] for token in blocking_tokens: require( - renderer.normalize_project_validation_status(f"tests are {token}") == "failed-blocking", - f"renderer did not classify {token!r} as failed-blocking", + renderer.normalize_project_validation_status(f"tests are {token}") == "not-run-unknown", + f"renderer should not upgrade free-form status {token!r}", ) validator_text = f"- Project validation: tests are {token}\n" require( @@ -631,15 +646,15 @@ def test_validation_status_token_matrix_is_consistent() -> None: ) -def test_renderer_legacy_mixed_pass_fail_stays_unsafe() -> None: +def test_renderer_rejects_removed_project_validation_input() -> None: with tempfile.TemporaryDirectory() as tmp: repo = make_repo_with_modified_app(Path(tmp)) input_path = repo / "savepoint-input.json" input_path.write_text( """{ - "goal": "finish mixed legacy validation handling", - "current_state": "legacy validation has both passing and failing commands", - "next_action": "report the failing validation before continuing", + "goal": "finish canonical validation handling", + "current_state": "top-level project_validation is no longer a supported input shape", + "next_action": "rewrite input to validation.project before continuing", "project_validation": [ { "command": "npm run lint", @@ -668,12 +683,53 @@ def test_renderer_legacy_mixed_pass_fail_stays_unsafe() -> None: ], repo, ) - require(result.returncode == 2, "mixed pass/fail legacy validation should keep output unsafe") - text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") - require("validation-failed-blocking" in text, "mixed validation blocker missing") - require("RESUME_READY: no" in text, "mixed validation must block resume-ready") - validation = run([sys.executable, str(VALIDATOR), str(repo / ".savepoint" / "SAVEPOINT.md")], repo) - require(validation.returncode == 0, validation.stderr or validation.stdout) + require(result.returncode == 1, "removed project_validation input should be rejected before render") + require("unsupported input key: project_validation" in result.stderr, "removed project_validation error missing") + require(not (repo / ".savepoint" / "SAVEPOINT.md").exists(), "removed project_validation input should not write an artifact") + + +def test_renderer_rejects_removed_input_aliases() -> None: + for key, value in [ + ("smallest_next_step", "run an old next-step alias"), + ("skipped_checks_next_validation", "run an old validation alias"), + ]: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = repo / "savepoint-input.json" + data = { + "goal": "reject removed aliases", + "current_state": "canonical input shape is required", + "next_action": "rewrite input before rendering", + "validation": { + "project": { + "status": "passed", + "commands": [ + { + "command": "python scripts/check-savepoint-renderer.py", + "result": "passed", + "summary": "canonical validation recorded", + } + ], + } + }, + key: value, + } + input_path.write_text(json.dumps(data, ensure_ascii=True, indent=2) + "\n", encoding="utf-8") + result = run( + [ + sys.executable, + str(RENDER_HELPER), + "--input", + str(input_path), + "--assert-no-active-commands", + "--scan-redaction", + "--run-savepoint-validation", + ], + repo, + ) + require(result.returncode == 1, f"{key} should be rejected before render") + require(f"unsupported input key: {key}" in result.stderr, f"{key} error missing") + require(not (repo / ".savepoint" / "SAVEPOINT.md").exists(), f"{key} should not write an artifact") def test_renderer_passed_project_validation_requires_complete_command_fields() -> None: @@ -965,7 +1021,7 @@ def test_renderer_unresolved_blocker_stays_unsafe() -> None: require(validation.returncode == 0, validation.stderr or validation.stdout) -def test_renderer_blockers_alias_stays_unsafe() -> None: +def test_renderer_rejects_removed_blockers_alias() -> None: with tempfile.TemporaryDirectory() as tmp: repo = make_repo_with_modified_app(Path(tmp)) input_path = repo / "savepoint-input.json" @@ -975,13 +1031,18 @@ def test_renderer_blockers_alias_stays_unsafe() -> None: "current_state": "renderer should not drop intuitive blocker input", "next_action": "report blocker before continuing", "blockers": "needs user approval", - "project_validation": [ - { - "command": "python scripts/check-savepoint-renderer.py", - "result": "passed", - "summary": "blocker alias fixture validation recorded" + "validation": { + "project": { + "status": "passed", + "commands": [ + { + "command": "python scripts/check-savepoint-renderer.py", + "result": "passed", + "summary": "blocker alias fixture validation recorded" + } + ] } - ] + } } """, encoding="utf-8", @@ -998,13 +1059,9 @@ def test_renderer_blockers_alias_stays_unsafe() -> None: ], repo, ) - require(result.returncode == 2, "blockers alias should keep output unsafe") - text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") - require("unresolved-blockers-recorded" in text, "blockers alias marker missing") - require("needs user approval" in text, "blockers alias should be recorded in recovery notes") - require("RESUME_READY: no" in text, "blockers alias must block resume-ready") - validation = run([sys.executable, str(VALIDATOR), str(repo / ".savepoint" / "SAVEPOINT.md")], repo) - require(validation.returncode == 0, validation.stderr or validation.stdout) + require(result.returncode == 1, "blockers alias should be rejected before render") + require("unsupported input key: blockers" in result.stderr, "blockers alias error missing") + require(not (repo / ".savepoint" / "SAVEPOINT.md").exists(), "removed blockers alias should not write an artifact") def test_renderer_keeps_savepoint_unsafe_without_active_command_assertion() -> None: @@ -1094,31 +1151,6 @@ def test_renderer_redacts_secret_even_when_scan_flag_is_omitted() -> None: require("RESUME_READY: no" in text, "omitted scan flag should block resume-ready") -def test_root_renderer_forwards_to_portable_renderer() -> None: - with tempfile.TemporaryDirectory() as tmp: - repo = make_repo_with_modified_app(Path(tmp)) - input_path = semantic_input(repo) - output = repo / "custom" / "SAVEPOINT.md" - result = run( - [ - sys.executable, - str(ROOT_RENDERER), - "--input", - str(input_path), - "--output", - str(output), - "--assert-no-active-commands", - "--scan-redaction", - "--run-savepoint-validation", - ], - repo, - ) - require(result.returncode == 0, result.stderr or result.stdout) - require(output.exists(), "root renderer did not write requested output") - validation = run([sys.executable, str(VALIDATOR), str(output)], repo) - require(validation.returncode == 0, validation.stderr or validation.stdout) - - def test_savepoint_cli_save_validate_and_inspect() -> None: with tempfile.TemporaryDirectory() as tmp: repo = make_repo_with_modified_app(Path(tmp)) @@ -1151,8 +1183,8 @@ def test_savepoint_cli_save_validate_and_inspect() -> None: require(parsed["RESUME_READY"] == "yes", "inspect JSON should report resume-ready") require(parsed["SAVEPOINT_MODE"] == "file", "inspect JSON should report file mode") require(parsed["savepoint_validation"] == "passed", "inspect JSON should include savepoint validation status") - require(parsed["project_validation"]["status"] == "passed", "inspect JSON should include project validation status") - require("next_command" in parsed["project_validation"], "inspect JSON should include project validation next command") + require(parsed["validation"]["project"]["status"] == "passed", "inspect JSON should include project validation status") + require("next_validation" in parsed["validation"]["project"], "inspect JSON should include project validation next command") def test_savepoint_cli_init_input_defaults_to_unknown_validation() -> None: @@ -1166,7 +1198,7 @@ def test_savepoint_cli_init_input_defaults_to_unknown_validation() -> None: project = data["validation"]["project"] require(project["status"] == "not-run-unknown", "init-input should default to honest unknown validation") require(project["reason"] == "", "init-input should not prefill a justification") - require(project["next_command"] == "", "init-input should not prefill next validation") + require(project["next_validation"] == "", "init-input should not prefill next validation") require(not (repo / ".savepoint" / "SAVEPOINT.md").exists(), "init-input should not write SAVEPOINT.md") @@ -1372,7 +1404,7 @@ def compact_resume_ready_text( ## Validation Manifest -- Savepoint validation: passed: python scripts/validate_savepoint.py .savepoint/SAVEPOINT.md +- Savepoint validation: passed: python scripts/savepoint.py validate .savepoint/SAVEPOINT.md - Project validation: {project_validation} - Skipped checks / next validation: {skipped_checks} - Secret redaction check: {redaction_check} @@ -1738,7 +1770,8 @@ def main() -> int: test_renderer_exit_code_uses_marker_not_body_resume_ready_text, test_renderer_failed_project_validation_stays_unsafe, test_validation_status_token_matrix_is_consistent, - test_renderer_legacy_mixed_pass_fail_stays_unsafe, + test_renderer_rejects_removed_project_validation_input, + test_renderer_rejects_removed_input_aliases, test_renderer_passed_project_validation_requires_complete_command_fields, test_renderer_structured_passed_validation_with_failure_text_stays_unsafe, test_renderer_structured_passed_validation_with_failing_text_stays_unsafe, @@ -1748,11 +1781,10 @@ def main() -> int: test_renderer_failed_blocking_project_validation_stays_unsafe, test_renderer_missing_next_action_stays_unsafe, test_renderer_unresolved_blocker_stays_unsafe, - test_renderer_blockers_alias_stays_unsafe, + test_renderer_rejects_removed_blockers_alias, test_renderer_keeps_savepoint_unsafe_without_active_command_assertion, test_renderer_secret_scan_blocks_resume_ready, test_renderer_redacts_secret_even_when_scan_flag_is_omitted, - test_root_renderer_forwards_to_portable_renderer, test_savepoint_cli_save_validate_and_inspect, test_savepoint_cli_init_input_defaults_to_unknown_validation, test_savepoint_cli_inspect_json_reports_invalid_marker, diff --git a/scripts/render_savepoint.py b/scripts/render_savepoint.py deleted file mode 100644 index 8e74a8c..0000000 --- a/scripts/render_savepoint.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python3 -"""Forward to the portable savepoint renderer/finalizer.""" - -from __future__ import annotations - -import sys -from pathlib import Path - - -ROOT = Path(__file__).resolve().parents[1] -SKILL_SCRIPTS = ROOT / "skills" / "savepoint" / "scripts" -sys.path.insert(0, str(SKILL_SCRIPTS)) - -from render_savepoint import main # noqa: E402 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/scripts/validate-repo.py b/scripts/validate-repo.py index 380bb99..a308539 100644 --- a/scripts/validate-repo.py +++ b/scripts/validate-repo.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Maintainer-only checks for savepoint repository packaging and examples. -Use the portable `validate_savepoint.py` for generated SAVEPOINT.md artifacts. +Use `scripts/savepoint.py validate` for generated SAVEPOINT.md artifacts. This script guards repository metadata, examples, trigger evals, marker schema, and maintainer assets without freezing routine README/SKILL prose. """ @@ -279,7 +279,9 @@ def validate_references(self) -> None: self.require_exists(SKILL_DIR / "scripts" / "savepoint_contract.py") self.require_exists(SKILL_DIR / "scripts" / "validate_savepoint.py") self.require_exists(ROOT / "scripts" / "savepoint.py") - self.require_exists(ROOT / "scripts" / "render_savepoint.py") + for removed_wrapper in ["render_savepoint.py", "validate_savepoint.py"]: + if (ROOT / "scripts" / removed_wrapper).exists(): + self.fail(f"root wrapper should not exist: scripts/{removed_wrapper}") required_skill_phrases = [ "Default behavior", diff --git a/scripts/validate_savepoint.py b/scripts/validate_savepoint.py deleted file mode 100644 index e955590..0000000 --- a/scripts/validate_savepoint.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python3 -"""Forward to the portable savepoint validator.""" - -from __future__ import annotations - -import sys -from pathlib import Path - - -ROOT = Path(__file__).resolve().parents[1] -SKILL_SCRIPTS = ROOT / "skills" / "savepoint" / "scripts" -sys.path.insert(0, str(SKILL_SCRIPTS)) - -from validate_savepoint import main # noqa: E402 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/skills/savepoint/scripts/render_savepoint.py b/skills/savepoint/scripts/render_savepoint.py index e932ebd..19e28b8 100644 --- a/skills/savepoint/scripts/render_savepoint.py +++ b/skills/savepoint/scripts/render_savepoint.py @@ -21,6 +21,12 @@ "next_action", ] MAX_VALUE_CHARS = 600 +UNSUPPORTED_INPUT_KEYS = { + "project_validation": "validation.project", + "skipped_checks_next_validation": "validation.project.next_validation", + "smallest_next_step": "next_action", + "blockers": "unresolved_blockers", +} PROJECT_VALIDATION_STATUSES = { "passed", "failed-expected", @@ -31,7 +37,6 @@ PROJECT_VALIDATION_NEXT_REQUIRED = {"failed-expected", "not-run-justified"} CLEAR_BLOCKER_VALUES = {"none", "no", "not-needed", "not needed"} VALIDATION_FAILURE_RE = re.compile(r"\b(fail|fails|failed|failing|failure|error|errors|not-run|not run|skipped)\b") -VALIDATION_BLOCKING_FAILURE_RE = re.compile(r"\b(fail|fails|failed|failing|failure|error|errors)\b") def parse_args(argv: list[str]) -> argparse.Namespace: @@ -57,7 +62,7 @@ def parse_args(argv: list[str]) -> argparse.Namespace: parser.add_argument( "--run-savepoint-validation", action="store_true", - help="Run validate_savepoint.py after writing the final artifact.", + help="Run bundled savepoint validation after writing the final artifact.", ) return parser.parse_args(argv) @@ -69,6 +74,10 @@ def read_input(path: Path) -> tuple[dict[str, Any] | None, str | None]: return None, f"failed to read input JSON: {exc}" if not isinstance(data, dict): return None, "input JSON must be an object" + unsupported = [key for key in UNSUPPORTED_INPUT_KEYS if key in data] + if unsupported: + details = ", ".join(f"{key} (use {UNSUPPORTED_INPUT_KEYS[key]})" for key in unsupported) + return None, f"unsupported input key: {details}" return data, None @@ -97,11 +106,9 @@ def list_items(value: Any) -> list[str]: def unresolved_blockers_text(data: dict[str, Any]) -> str: - values = [clean_text(data.get(key), fallback="") for key in ("unresolved_blockers", "blockers")] - recorded = [value for value in values if value] - blocking = [value for value in recorded if value.lower() not in CLEAR_BLOCKER_VALUES] - if blocking: - return "; ".join(blocking) + value = clean_text(data.get("unresolved_blockers"), fallback="") + if value and value.lower() not in CLEAR_BLOCKER_VALUES: + return value return "none" @@ -154,15 +161,9 @@ def project_validation_passed(value: Any) -> bool: def normalize_project_validation_status(value: Any) -> str: - status = clean_text(value, fallback="").lower().replace("_", "-") + status = clean_text(value, fallback="").lower() if status in PROJECT_VALIDATION_STATUSES: return status - if re.search(r"\b(pass|passed|ok|success|succeeded)\b", status) and not VALIDATION_FAILURE_RE.search(status): - return "passed" - if VALIDATION_BLOCKING_FAILURE_RE.search(status): - return "failed-blocking" - if re.search(r"\b(not-run|not run|skipped)\b", status): - return "not-run-unknown" return "not-run-unknown" @@ -174,10 +175,7 @@ def project_validation_posture(data: dict[str, Any]) -> dict[str, Any]: raw_commands = project.get("commands") commands = project_validation_command_entries(raw_commands) reason = clean_text(project.get("reason"), fallback="") - next_validation = clean_text( - project.get("next_validation", project.get("next_command", project.get("next"))), - fallback="", - ) + next_validation = clean_text(project.get("next_validation"), fallback="") if ( status == "passed" and commands @@ -195,47 +193,13 @@ def project_validation_posture(data: dict[str, Any]) -> dict[str, Any]: "commands_complete": project_validation_commands_complete(raw_commands), } - legacy = data.get("project_validation") - commands = project_validation_command_entries(legacy) - commands_complete = project_validation_commands_complete(legacy) - next_validation = clean_text(data.get("skipped_checks_next_validation"), fallback="") - if not commands: - return { - "status": "not-run-unknown", - "commands": [], - "reason": "", - "next_validation": next_validation, - "source": "legacy", - "commands_complete": False, - } - if project_validation_passed(legacy): - return { - "status": "passed", - "commands": commands, - "reason": "", - "next_validation": next_validation, - "source": "legacy", - "commands_complete": commands_complete, - } - - combined = " ".join(commands).lower() - if re.search(r"\b(not-run|not run|skipped)\b", combined): - reason = clean_text(commands[0], fallback="") - return { - "status": "not-run-justified" if reason and next_validation else "not-run-unknown", - "commands": commands, - "reason": reason, - "next_validation": next_validation, - "source": "legacy", - "commands_complete": commands_complete, - } return { - "status": "failed-blocking", - "commands": commands, - "reason": clean_text(commands[0], fallback="project validation failed"), - "next_validation": next_validation, - "source": "legacy", - "commands_complete": commands_complete, + "status": "not-run-unknown", + "commands": [], + "reason": "", + "next_validation": "", + "source": "validation.project", + "commands_complete": False, } @@ -276,7 +240,7 @@ def observable_completion(data: dict[str, Any]) -> str: def next_action_text(data: dict[str, Any]) -> str: - return clean_text(data.get("smallest_next_step"), fallback=clean_text(data.get("next_action"))) + return clean_text(data.get("next_action")) def git_status_lines(cwd: Path) -> list[str]: @@ -456,7 +420,7 @@ def build_savepoint( durable_files = list_items(data.get("durable_state_files_checked")) files_first = list_items(data.get("files_to_inspect_first")) or first_paths(changes) skipped = clean_text( - project_posture["next_validation"] or data.get("skipped_checks_next_validation"), + project_posture["next_validation"], fallback="no skipped checks; rerun recorded project validation if state changes", ) expected_drift = clean_text(data.get("expected_drift"), fallback="none") diff --git a/skills/savepoint/scripts/savepoint.py b/skills/savepoint/scripts/savepoint.py index 513f1a6..61a50df 100644 --- a/skills/savepoint/scripts/savepoint.py +++ b/skills/savepoint/scripts/savepoint.py @@ -149,7 +149,7 @@ def inspect_payload( marker_valid = bool(values) and not marker_errors savepoint_valid = marker_valid and not validation_errors project_status = validate_savepoint.project_validation_status(text) if text and marker_valid else "unknown" - next_command = validate_savepoint.field_value_or_block(text, "- Skipped checks / next validation:") if text and marker_valid else "" + next_validation = validate_savepoint.field_value_or_block(text, "- Skipped checks / next validation:") if text and marker_valid else "" return { **values, "path": str(path), @@ -160,9 +160,11 @@ def inspect_payload( "savepoint_valid": savepoint_valid, "details_ready": values.get("DETAILS_READY"), "savepoint_validation": "passed" if savepoint_valid else "failed", - "project_validation": { - "status": project_status, - "next_command": next_command, + "validation": { + "project": { + "status": project_status, + "next_validation": next_validation, + } }, "validation_recorded": values.get("VALIDATION_RECORDED") == "yes", "redaction_checked": values.get("REDACTION_CHECKED") == "yes", @@ -189,7 +191,7 @@ def run_init_input(args: argparse.Namespace) -> int: "status": "not-run-unknown", "reason": "", "commands": [], - "next_command": "", + "next_validation": "", } }, } diff --git a/skills/savepoint/scripts/validate_savepoint.py b/skills/savepoint/scripts/validate_savepoint.py index 584d670..ce8559c 100644 --- a/skills/savepoint/scripts/validate_savepoint.py +++ b/skills/savepoint/scripts/validate_savepoint.py @@ -2,7 +2,7 @@ """Validate generated SAVEPOINT.md artifacts. Usage: - python3 scripts/validate_savepoint.py SAVEPOINT.md [more/SAVEPOINT.md ...] + python3 scripts/savepoint.py validate SAVEPOINT.md [more/SAVEPOINT.md ...] """ from __future__ import annotations From e92bdee91a08ec7c44519dbc408f4b5980c9b79c Mon Sep 17 00:00:00 2001 From: Injae Date: Fri, 12 Jun 2026 14:20:34 +0900 Subject: [PATCH 5/7] ci(savepoint): use unified validate command --- .github/workflows/validate.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index 335cd93..f32a89d 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -21,7 +21,7 @@ jobs: - run: python3 scripts/validate-repo.py - run: python3 scripts/check-savepoint-renderer.py - run: python3 scripts/check-install-helper.py - - run: python3 scripts/validate_savepoint.py --allow-example-paths examples/SAVEPOINT.filled.example.md examples/file-bugfix/SAVEPOINT.md examples/file-architecture/SAVEPOINT.md examples/unsafe-savepoint/SAVEPOINT.md + - run: python3 scripts/savepoint.py validate --allow-example-paths examples/SAVEPOINT.filled.example.md examples/file-bugfix/SAVEPOINT.md examples/file-architecture/SAVEPOINT.md examples/unsafe-savepoint/SAVEPOINT.md - name: Check committed whitespace shell: bash run: | From 76c898c1594fddbbca4e469e67fdc92507cd214d Mon Sep 17 00:00:00 2001 From: Injae Date: Fri, 12 Jun 2026 15:14:53 +0900 Subject: [PATCH 6/7] fix(savepoint): require evidence for expected validation failures --- .github/workflows/validate.yml | 1 + docs/reference/savepoint-contract.md | 4 +- evals/README.md | 2 +- evals/output-contract.json | 12 ++ examples/SAVEPOINT.filled.example.md | 2 +- examples/file-architecture/SAVEPOINT.md | 2 +- scripts/check-savepoint-renderer.py | 150 +++++++++++++++++- skills/savepoint/SKILL.md | 2 +- skills/savepoint/references/contract.md | 2 +- skills/savepoint/scripts/render_savepoint.py | 52 +++++- .../savepoint/scripts/validate_savepoint.py | 54 +++++-- 11 files changed, 257 insertions(+), 26 deletions(-) diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index f32a89d..021fcb9 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -22,6 +22,7 @@ jobs: - run: python3 scripts/check-savepoint-renderer.py - run: python3 scripts/check-install-helper.py - run: python3 scripts/savepoint.py validate --allow-example-paths examples/SAVEPOINT.filled.example.md examples/file-bugfix/SAVEPOINT.md examples/file-architecture/SAVEPOINT.md examples/unsafe-savepoint/SAVEPOINT.md + - run: python3 -m compileall -q skills/savepoint/scripts scripts - name: Check committed whitespace shell: bash run: | diff --git a/docs/reference/savepoint-contract.md b/docs/reference/savepoint-contract.md index 28656bc..981ce65 100644 --- a/docs/reference/savepoint-contract.md +++ b/docs/reference/savepoint-contract.md @@ -193,7 +193,7 @@ Field meanings: - `DETAILS_READY`: `yes` for file detail spillover artifacts, `not-needed` when there are no generated details, otherwise `no`. - `PROMPT_READY`: `yes` when file `SAVEPOINT.md` contains an embedded `## Resume Prompt`, or a text response provides a transfer note with a usable next-step prompt. - `DISK_RECORDED`: `yes` only when the required repo snapshot was recorded. -- `VALIDATION_RECORDED`: `yes` when savepoint artifact validation and project validation posture are recorded, including passed, expected failed, or intentionally skipped project validation with reason and next command. +- `VALIDATION_RECORDED`: `yes` when savepoint artifact validation and project validation posture are recorded, including passed validation, expected failed validation with failed command/result/summary evidence plus explicit reason and next command, or intentionally skipped validation with reason and next command. - `REDACTION_CHECKED`: `yes` only after checking generated artifacts or text output for secrets. - `RESUME_READY`: `yes` only when the safe resume checklist passes. - `BLOCKERS`: `none` or a short reason preventing safe continuation. @@ -227,7 +227,7 @@ When file artifacts are written, attempt the bundled savepoint validator (`scrip Project validation posture uses these statuses: - `passed`: project validation passed; `RESUME_READY: yes` is allowed. -- `failed-expected`: project validation failed in a known, documented way; `RESUME_READY: yes` is allowed only with reason and next validation command. +- `failed-expected`: project validation failed in a known, documented way; `RESUME_READY: yes` is allowed only with failed command/result/summary evidence, explicit reason, and next validation command. - `not-run-justified`: project validation was not run for a stated reason; `RESUME_READY: yes` is allowed only with reason and next validation command. - `failed-blocking`: project validation failed in a blocking or unexplained way; `RESUME_READY: no`. - `not-run-unknown`: project validation was not run without enough reason or next command; `RESUME_READY: no`. diff --git a/evals/README.md b/evals/README.md index 80df425..667e58b 100644 --- a/evals/README.md +++ b/evals/README.md @@ -36,4 +36,4 @@ For each case: - `SKILL.md` frontmatter parses as valid YAML. - File artifacts have a final marker block that is present and honest; text notes omit it by default. - `VALIDATION_RECORDED: yes` means savepoint artifact validation and project validation posture are recorded. -- `RESUME_READY: yes` can coexist with `not-run-justified` or `failed-expected` project validation when reason and next validation command are recorded. +- `RESUME_READY: yes` can coexist with `not-run-justified` when reason and next validation command are recorded, or with `failed-expected` when failed command evidence, reason, and next validation command are recorded. diff --git a/evals/output-contract.json b/evals/output-contract.json index 6be7bbe..11c4a0c 100644 --- a/evals/output-contract.json +++ b/evals/output-contract.json @@ -43,6 +43,18 @@ "RESUME_READY yes is allowed when other hard blockers are absent" ] }, + { + "id": "resume-ready-failed-expected-01", + "category": "resume-ready-semantics", + "scenario": "Project validation failed in a known expected way, with command evidence, reason, and next validation command.", + "must": [ + "Project validation is recorded as failed-expected", + "failed command, result, and summary evidence are recorded", + "expected failure reason is recorded", + "next validation command is recorded", + "RESUME_READY yes is allowed when other hard blockers are absent" + ] + }, { "id": "resume-ready-failed-blocking-01", "category": "resume-ready-semantics", diff --git a/examples/SAVEPOINT.filled.example.md b/examples/SAVEPOINT.filled.example.md index 8e97418..d37530b 100644 --- a/examples/SAVEPOINT.filled.example.md +++ b/examples/SAVEPOINT.filled.example.md @@ -71,7 +71,7 @@ Read in this order: ## Validation Manifest - Savepoint validation: `python3 scripts/savepoint.py validate .savepoint/SAVEPOINT.md` passed. -- Project validation: failed-expected: `npm test -- ReportTable.test.tsx` failed; no matching CSV export assertions yet. +- Project validation: failed-expected: failed: `npm test -- ReportTable.test.tsx` - no matching CSV export assertions yet; reason: CSV export assertions are the next implementation target. - Skipped checks / next validation: full suite not run because focused tests are missing; next `npm test -- ReportTable.test.tsx`, then `npm run lint` if available. - Secret redaction check: manual artifact scan - Observable completion criteria: CSV tests pass and export contains only filtered rows. diff --git a/examples/file-architecture/SAVEPOINT.md b/examples/file-architecture/SAVEPOINT.md index bbc1162..23a1aa4 100644 --- a/examples/file-architecture/SAVEPOINT.md +++ b/examples/file-architecture/SAVEPOINT.md @@ -70,7 +70,7 @@ ## Validation Manifest - Savepoint validation: `python3 scripts/savepoint.py validate .savepoint/SAVEPOINT.md` passed. -- Project validation: failed-expected: `npm test -- tests/billing/invoice.integration.test.ts` failed on timestamp formatting; see `details/validation.md`. +- Project validation: failed-expected: failed: `npm test -- tests/billing/invoice.integration.test.ts` - timestamp formatting mismatch; reason: expected until invoice timestamp format is finalized; see `details/validation.md`. - Skipped checks / next validation: full suite not run while focused integration test is red; next focused integration test. - Secret redaction check: manual artifact scan - Observable completion criteria: focused integration test and billing unit tests pass. diff --git a/scripts/check-savepoint-renderer.py b/scripts/check-savepoint-renderer.py index 7df6e43..bf3138c 100644 --- a/scripts/check-savepoint-renderer.py +++ b/scripts/check-savepoint-renderer.py @@ -644,6 +644,27 @@ def test_validation_status_token_matrix_is_consistent() -> None: validator.project_validation_status(reason_with_passed) == "failed-expected", "validator should parse canonical status before reason text containing passed", ) + for phrase in ["0 errors", "zero errors", "no errors", "0 failures", "zero failures", "no failures"]: + commands = [ + { + "command": "npm run lint", + "result": "passed", + "summary": phrase, + } + ] + require( + renderer.project_validation_passed(commands), + f"renderer treated negated failure phrase {phrase!r} as blocking", + ) + passed_text = f"- Project validation: passed: `npm run lint` - {phrase}\n" + require( + not validator.passed_validation_has_failure_terms(passed_text), + f"validator treated negated failure phrase {phrase!r} as blocking", + ) + require( + validator.project_validation_status(passed_text) == "passed", + f"validator did not classify negated failure phrase {phrase!r} as passed", + ) def test_renderer_rejects_removed_project_validation_input() -> None: @@ -913,13 +934,102 @@ def test_renderer_failed_expected_project_validation_can_resume_ready() -> None: ) require(result.returncode == 0, result.stderr or result.stdout) text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") - require("Project validation: failed-expected" in text, "expected failure status missing") + require("failed-expected" in text, "expected failure status missing") + require("reason: known failing auth edge case is the next task" in text, "expected failure reason missing") require("RESUME_READY: yes" in text, "expected project validation failure should allow resume-ready") require("validation-failed-blocking" not in text, "expected failure must not be marked blocking") validation = run([sys.executable, str(VALIDATOR), str(repo / ".savepoint" / "SAVEPOINT.md")], repo) require(validation.returncode == 0, validation.stderr or validation.stdout) +def test_renderer_failed_expected_without_command_fields_stays_unsafe() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = repo / "savepoint-input.json" + input_path.write_text( + """{ + "goal": "finish expected failure evidence", + "current_state": "expected project failure is documented but no command evidence was recorded", + "next_action": "record the exact failing command before continuing", + "validation": { + "project": { + "status": "failed-expected", + "reason": "known failing auth edge case is the next task", + "commands": [], + "next_validation": "python -m pytest tests/auth" + } + } +} +""", + encoding="utf-8", + ) + result = run( + [ + sys.executable, + str(RENDER_HELPER), + "--input", + str(input_path), + "--assert-no-active-commands", + "--scan-redaction", + "--run-savepoint-validation", + ], + repo, + ) + require(result.returncode == 2, "expected failure without command evidence should stay unsafe") + text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") + require("validation-command-missing" in text, "missing command evidence blocker missing") + require("RESUME_READY: no" in text, "expected failure without command evidence must block resume-ready") + require("VALIDATION_RECORDED: no" in text, "missing command evidence should not count as validation recorded") + validation = run([sys.executable, str(VALIDATOR), str(repo / ".savepoint" / "SAVEPOINT.md")], repo) + require(validation.returncode == 0, validation.stderr or validation.stdout) + + +def test_renderer_failed_expected_with_passed_command_stays_unsafe() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = repo / "savepoint-input.json" + input_path.write_text( + """{ + "goal": "finish expected failure evidence", + "current_state": "expected project failure is documented with a non-failing command", + "next_action": "record the exact failing command before continuing", + "validation": { + "project": { + "status": "failed-expected", + "reason": "known failing auth edge case is the next task", + "commands": [ + { + "command": "python -m pytest tests/auth", + "result": "passed", + "summary": "0 errors" + } + ], + "next_validation": "python -m pytest tests/auth" + } + } +} +""", + encoding="utf-8", + ) + result = run( + [ + sys.executable, + str(RENDER_HELPER), + "--input", + str(input_path), + "--assert-no-active-commands", + "--scan-redaction", + "--run-savepoint-validation", + ], + repo, + ) + require(result.returncode == 2, "expected failure with passed command evidence should stay unsafe") + text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") + require("validation-failed-evidence-missing" in text, "missing failed evidence blocker missing") + require("RESUME_READY: no" in text, "passed command cannot satisfy expected failure evidence") + require("VALIDATION_RECORDED: no" in text, "passed command should not count as failed-expected validation recorded") + + def test_renderer_not_run_justified_without_next_validation_stays_unsafe() -> None: with tempfile.TemporaryDirectory() as tmp: repo = make_repo_with_modified_app(Path(tmp)) @@ -1471,7 +1581,7 @@ def test_compact_validator_accepts_failed_expected_project_validation() -> None: repo = make_repo(Path(tmp)) output = write_compact_resume_ready_savepoint( repo, - project_validation="failed-expected: known failing auth edge case is the next task", + project_validation="failed-expected: failed: `python -m pytest tests/auth` - auth edge case failed; reason: known failing auth edge case is the next task", skipped_checks="python -m pytest tests/auth", ) validation = run([sys.executable, str(VALIDATOR), str(output)], repo) @@ -1509,7 +1619,7 @@ def test_compact_validator_rejects_expected_failure_without_next_validation() -> repo = make_repo(Path(tmp)) output = write_compact_resume_ready_savepoint( repo, - project_validation="failed-expected: known failing auth edge case is the next task", + project_validation="failed-expected: failed: `python -m pytest tests/auth` - auth edge case failed; reason: known failing auth edge case is the next task", skipped_checks="none", ) validation = run([sys.executable, str(VALIDATOR), str(output)], repo) @@ -1517,6 +1627,32 @@ def test_compact_validator_rejects_expected_failure_without_next_validation() -> require("next validation" in validation.stderr, "missing next validation error not reported") +def test_compact_validator_rejects_expected_failure_without_command_evidence() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo(Path(tmp)) + output = write_compact_resume_ready_savepoint( + repo, + project_validation="failed-expected: `python -m pytest tests/auth` - known failing auth edge case; reason: known failing auth edge case is the next task", + skipped_checks="python -m pytest tests/auth", + ) + validation = run([sys.executable, str(VALIDATOR), str(output)], repo) + require(validation.returncode != 0, "compact validator accepted expected failure without result evidence") + require("command evidence" in validation.stderr, "missing result evidence error not reported") + + +def test_compact_validator_rejects_expected_failure_with_passed_command_evidence() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo(Path(tmp)) + output = write_compact_resume_ready_savepoint( + repo, + project_validation="failed-expected: passed: `python -m pytest tests/auth` - 0 errors; reason: known failing auth edge case is the next task", + skipped_checks="python -m pytest tests/auth", + ) + validation = run([sys.executable, str(VALIDATOR), str(output)], repo) + require(validation.returncode != 0, "compact validator accepted passed command as expected failure evidence") + require("command evidence" in validation.stderr, "passed command evidence error not reported") + + def test_compact_validator_rejects_not_run_justified_without_reason() -> None: with tempfile.TemporaryDirectory() as tmp: repo = make_repo(Path(tmp)) @@ -1535,7 +1671,7 @@ def test_compact_validator_rejects_expected_failure_without_reason() -> None: repo = make_repo(Path(tmp)) output = write_compact_resume_ready_savepoint( repo, - project_validation="failed-expected", + project_validation="failed-expected: failed: `python -m pytest tests/auth` - known failing auth edge case is the next task", skipped_checks="python -m pytest tests/auth", ) validation = run([sys.executable, str(VALIDATOR), str(output)], repo) @@ -1561,7 +1697,7 @@ def test_compact_validator_status_parsing_is_hash_seed_stable() -> None: repo = make_repo(Path(tmp)) output = write_compact_resume_ready_savepoint( repo, - project_validation="failed-expected: known failure; previous lint passed", + project_validation="failed-expected: failed: `python -m pytest tests/auth` - known failure; reason: known failure; previous lint passed", skipped_checks="python -m pytest tests/auth", ) for seed in ["0", "3", "42"]: @@ -1777,6 +1913,8 @@ def main() -> int: test_renderer_structured_passed_validation_with_failing_text_stays_unsafe, test_renderer_not_run_justified_project_validation_can_resume_ready, test_renderer_failed_expected_project_validation_can_resume_ready, + test_renderer_failed_expected_without_command_fields_stays_unsafe, + test_renderer_failed_expected_with_passed_command_stays_unsafe, test_renderer_not_run_justified_without_next_validation_stays_unsafe, test_renderer_failed_blocking_project_validation_stays_unsafe, test_renderer_missing_next_action_stays_unsafe, @@ -1799,6 +1937,8 @@ def main() -> int: test_compact_validator_rejects_not_run_unknown_project_validation, test_compact_validator_rejects_failed_blocking_project_validation, test_compact_validator_rejects_expected_failure_without_next_validation, + test_compact_validator_rejects_expected_failure_without_command_evidence, + test_compact_validator_rejects_expected_failure_with_passed_command_evidence, test_compact_validator_rejects_not_run_justified_without_reason, test_compact_validator_rejects_expected_failure_without_reason, test_compact_validator_rejects_passed_validation_with_failure_text, diff --git a/skills/savepoint/SKILL.md b/skills/savepoint/SKILL.md index 3be4df1..bed058f 100644 --- a/skills/savepoint/SKILL.md +++ b/skills/savepoint/SKILL.md @@ -35,7 +35,7 @@ Native slash-command support depends on the client. If slash prompts are not pas 1. Treat provided focus text, if any, only as next-session focus. 2. Capture repo/Git state and write compact input JSON with `goal`, `current_state`, `next_action`, `files_to_inspect_first`, and `unresolved_blockers`; start with `python3 /scripts/savepoint.py init-input --output .savepoint/input.json` if blank. -3. Set `validation.project.status` to one of `passed`, `failed-expected`, `failed-blocking`, `not-run-justified`, or `not-run-unknown`. For `failed-expected` or `not-run-justified`, include a reason and next validation command. +3. Set `validation.project.status` to one of `passed`, `failed-expected`, `failed-blocking`, `not-run-justified`, or `not-run-unknown`. For `failed-expected`, include failed command/result/summary evidence, an explicit reason, and next validation command. For `not-run-justified`, include a reason and next validation command. 4. Run: ```bash diff --git a/skills/savepoint/references/contract.md b/skills/savepoint/references/contract.md index a9c3caf..5fd13ea 100644 --- a/skills/savepoint/references/contract.md +++ b/skills/savepoint/references/contract.md @@ -28,7 +28,7 @@ Hard blockers: Project validation statuses: - `passed`: resume-ready is allowed. -- `failed-expected`: resume-ready is allowed when the failure is documented with a reason and next validation command. +- `failed-expected`: resume-ready is allowed when the failed command/result/summary, reason, and next validation command are recorded. - `not-run-justified`: resume-ready is allowed when the skip reason and next validation command are recorded. - `failed-blocking`: resume-ready is not allowed. - `not-run-unknown`: resume-ready is not allowed. diff --git a/skills/savepoint/scripts/render_savepoint.py b/skills/savepoint/scripts/render_savepoint.py index 19e28b8..927572b 100644 --- a/skills/savepoint/scripts/render_savepoint.py +++ b/skills/savepoint/scripts/render_savepoint.py @@ -37,6 +37,8 @@ PROJECT_VALIDATION_NEXT_REQUIRED = {"failed-expected", "not-run-justified"} CLEAR_BLOCKER_VALUES = {"none", "no", "not-needed", "not needed"} VALIDATION_FAILURE_RE = re.compile(r"\b(fail|fails|failed|failing|failure|error|errors|not-run|not run|skipped)\b") +EXPECTED_FAILURE_RE = re.compile(r"\b(fail|fails|failed|failing|failure|error|errors)\b") +NEGATED_FAILURE_RE = re.compile(r"\b(no|zero|0)\s+(failures?|errors?)\b") def parse_args(argv: list[str]) -> argparse.Namespace: @@ -151,7 +153,7 @@ def project_validation_passed(value: Any) -> bool: result = clean_text(item.get("result"), fallback="").lower() summary = clean_text(item.get("summary"), fallback="").lower() combined = f"{result} {summary}" - passed = bool(re.search(r"\b(pass|passed|ok|success|succeeded)\b", combined)) and not VALIDATION_FAILURE_RE.search(combined) + passed = bool(re.search(r"\b(pass|passed|ok|success|succeeded)\b", combined)) and not contains_blocking_failure(combined) if passed: saw_valid_entry = True continue @@ -160,6 +162,28 @@ def project_validation_passed(value: Any) -> bool: return saw_valid_entry +def contains_blocking_failure(text: str) -> bool: + return bool(VALIDATION_FAILURE_RE.search(NEGATED_FAILURE_RE.sub("", text.lower()))) + + +def contains_expected_failure_evidence(text: str) -> bool: + return bool(EXPECTED_FAILURE_RE.search(NEGATED_FAILURE_RE.sub("", text.lower()))) + + +def project_validation_failed_expected(value: Any) -> bool: + if not isinstance(value, list): + return False + saw_failure_entry = False + for item in value: + if not project_validation_command_complete(item): + return False + result = clean_text(item.get("result"), fallback="").lower() + summary = clean_text(item.get("summary"), fallback="").lower() + if contains_expected_failure_evidence(f"{result} {summary}"): + saw_failure_entry = True + return saw_failure_entry + + def normalize_project_validation_status(value: Any) -> str: status = clean_text(value, fallback="").lower() if status in PROJECT_VALIDATION_STATUSES: @@ -191,6 +215,7 @@ def project_validation_posture(data: dict[str, Any]) -> dict[str, Any]: "next_validation": next_validation, "source": "validation.project", "commands_complete": project_validation_commands_complete(raw_commands), + "commands_expected_failure": project_validation_failed_expected(raw_commands), } return { @@ -200,6 +225,7 @@ def project_validation_posture(data: dict[str, Any]) -> dict[str, Any]: "next_validation": "", "source": "validation.project", "commands_complete": False, + "commands_expected_failure": False, } @@ -210,6 +236,11 @@ def project_validation_entries(data: dict[str, Any]) -> list[str]: reason = posture["reason"] if status == "passed": return commands + if status == "failed-expected" and commands: + entries = "; ".join(commands) + if reason: + entries = f"{entries}; reason: {reason}" + return [f"{status}: {entries}"] if commands: return [f"{status}: {entry}" for entry in commands] if reason: @@ -226,7 +257,13 @@ def project_validation_recorded(posture: dict[str, Any]) -> bool: if status == "failed-blocking": return bool(posture["commands"] or posture["reason"]) if status == "failed-expected": - return bool(posture["reason"] and posture["next_validation"]) + return bool( + posture["commands"] + and posture.get("commands_complete") + and posture.get("commands_expected_failure") + and posture["reason"] + and posture["next_validation"] + ) if status == "not-run-justified": return bool(posture["reason"] and posture["next_validation"]) return False @@ -364,7 +401,16 @@ def blockers_for(data: dict[str, Any], args: argparse.Namespace, redaction_ok: b blockers.append("validation-not-run-unknown") elif posture["status"] == "failed-blocking": blockers.append("validation-failed-blocking") - elif posture["status"] in PROJECT_VALIDATION_NEXT_REQUIRED: + elif posture["status"] == "failed-expected": + if not posture.get("commands_complete"): + blockers.append("validation-command-missing") + elif not posture.get("commands_expected_failure"): + blockers.append("validation-failed-evidence-missing") + if not posture["reason"]: + blockers.append("validation-reason-missing") + if not posture["next_validation"]: + blockers.append("validation-next-command-missing") + elif posture["status"] == "not-run-justified": if not posture["reason"]: blockers.append("validation-reason-missing") if not posture["next_validation"]: diff --git a/skills/savepoint/scripts/validate_savepoint.py b/skills/savepoint/scripts/validate_savepoint.py index ce8559c..0c65488 100644 --- a/skills/savepoint/scripts/validate_savepoint.py +++ b/skills/savepoint/scripts/validate_savepoint.py @@ -116,6 +116,7 @@ ] PROJECT_VALIDATION_NEXT_REQUIRED = {"failed-expected", "not-run-justified"} VALIDATION_FAILURE_RE = re.compile(r"\b(fail|fails|failed|failing|failure|error|errors|not-run|not run|skipped)\b") +NEGATED_FAILURE_RE = re.compile(r"\b(no|zero|0)\s+(failures?|errors?)\b") def validate_savepoint(path: Path, allow_example_paths: bool = False) -> list[str]: @@ -248,35 +249,66 @@ def validate_validation_status(path: Path, text: str) -> list[str]: errors.append( f"{path}: Project validation status {status} requires a next validation command" ) + if status == "failed-expected" and not project_validation_command_evidence_present(text): + errors.append( + f"{path}: Project validation status failed-expected requires command evidence" + ) return errors def passed_validation_has_failure_terms(text: str) -> bool: - value = field_value_or_block(text, "- Project validation:").lower().replace("_", "-") + value = project_validation_value(text).lower().replace("_", "-") if "passed" not in value: return False - return bool(VALIDATION_FAILURE_RE.search(value)) + return contains_blocking_failure(value) -def project_validation_reason_present(text: str, status: str) -> bool: +def contains_blocking_failure(text: str) -> bool: + return bool(VALIDATION_FAILURE_RE.search(NEGATED_FAILURE_RE.sub("", text.lower()))) + + +def project_validation_value(text: str) -> str: value = field_value_or_block(text, "- Project validation:") - normalized = value.lower().replace("_", "-") - index = normalized.find(status) - if index == -1: - return False - reason = value[index + len(status):].strip(" :-`\n\t") + lines = [re.sub(r"^\s*-\s*", "", line.strip()) for line in value.splitlines()] + return "\n".join(line for line in lines if line).strip() + + +def project_validation_body(text: str, status: str) -> str: + value = project_validation_value(text) + return re.sub(rf"(?is)^\s*`?{re.escape(status)}`?\s*[:\-]?\s*", "", value, count=1).strip() + + +def project_validation_command_evidence_present(text: str) -> bool: + value = NEGATED_FAILURE_RE.sub("", project_validation_body(text, "failed-expected").lower()) + return bool( + re.search( + r"\b(fail|fails|failed|failing|failure|error|errors)\b\s*:\s*`[^`]+`\s+-\s*\S", + value, + ) + ) + + +def project_validation_reason_present(text: str, status: str) -> bool: + body = project_validation_body(text, status) + if status == "failed-expected": + match = re.search(r"(?is)(?:^|[;\n])\s*reason\s*:\s*(.+)", body) + if not match: + return False + reason = match.group(1).strip(" :-`\n\t") + return not is_placeholder_value(reason, allow_absence=False) + reason = body.strip(" :-`\n\t") return not is_placeholder_value(reason, allow_absence=False) def project_validation_status(text: str) -> str: - value = field_value_or_block(text, "- Project validation:").lower().replace("_", "-") + value = project_validation_value(text).lower().replace("_", "-") lead = value.strip().splitlines()[0] if value.strip() else "" for status in PROJECT_VALIDATION_STATUS_ORDER: if re.match(rf"^`?{re.escape(status)}\b", lead): return status - if re.search(r"\b(pass|passed|ok|success|succeeded)\b", lead) and not VALIDATION_FAILURE_RE.search(lead): + if re.search(r"\b(pass|passed|ok|success|succeeded)\b", lead) and not contains_blocking_failure(lead): return "passed" - if re.search(r"\b(fail|fails|failed|failing|failure|error|errors)\b", lead): + if re.search(r"\b(fail|fails|failed|failing|failure|error|errors)\b", NEGATED_FAILURE_RE.sub("", lead)): return "failed-blocking" if re.search(r"\b(not-run|not run|skipped)\b", lead): return "not-run-unknown" From e5a097b6e2ae402a3eef84f31ec6ac2d4212fe74 Mon Sep 17 00:00:00 2001 From: Injae Date: Fri, 12 Jun 2026 15:49:36 +0900 Subject: [PATCH 7/7] fix(savepoint): tighten expected failure evidence --- scripts/check-install-helper.py | 3 +- scripts/check-savepoint-renderer.py | 103 +++++++++++++++++++ skills/savepoint/scripts/render_savepoint.py | 34 +++--- 3 files changed, 126 insertions(+), 14 deletions(-) diff --git a/scripts/check-install-helper.py b/scripts/check-install-helper.py index e97594a..b606be4 100644 --- a/scripts/check-install-helper.py +++ b/scripts/check-install-helper.py @@ -114,7 +114,8 @@ def test_apply_copies_skill() -> None: destination = repo / ".agents" / "skills" / "savepoint" require(result.returncode == 0, result.stderr or result.stdout) require((destination / "SKILL.md").exists(), "SKILL.md was not copied") - require((destination / "references" / "contract.md").exists(), "runtime references were not copied") + for name in ["contract.md", "safety.md", "template.md"]: + require((destination / "references" / name).exists(), f"runtime reference missing: {name}") require((destination / "scripts" / "render_savepoint.py").exists(), "renderer was not copied") require((destination / "scripts" / "savepoint.py").exists(), "unified CLI was not copied") require((destination / "scripts" / "savepoint_contract.py").exists(), "contract helper was not copied") diff --git a/scripts/check-savepoint-renderer.py b/scripts/check-savepoint-renderer.py index bf3138c..aa53884 100644 --- a/scripts/check-savepoint-renderer.py +++ b/scripts/check-savepoint-renderer.py @@ -1030,6 +1030,106 @@ def test_renderer_failed_expected_with_passed_command_stays_unsafe() -> None: require("VALIDATION_RECORDED: no" in text, "passed command should not count as failed-expected validation recorded") +def test_renderer_failed_expected_with_passed_result_failure_word_summary_stays_unsafe() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = repo / "savepoint-input.json" + input_path.write_text( + """{ + "goal": "finish expected failure evidence", + "current_state": "expected project failure is documented with only historical failure wording", + "next_action": "record the exact failing command before continuing", + "validation": { + "project": { + "status": "failed-expected", + "reason": "known failing auth edge case is the next task", + "commands": [ + { + "command": "python -m pytest tests/auth", + "result": "passed", + "summary": "previous failure is now fixed" + } + ], + "next_validation": "python -m pytest tests/auth" + } + } +} +""", + encoding="utf-8", + ) + result = run( + [ + sys.executable, + str(RENDER_HELPER), + "--input", + str(input_path), + "--assert-no-active-commands", + "--scan-redaction", + "--run-savepoint-validation", + ], + repo, + ) + require(result.returncode == 2, "expected failure with passed result should stay unsafe") + text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") + require("validation-failed-evidence-missing" in text, "passed result with historical failure summary should get failed-evidence blocker") + require("savepoint-validation-failed" not in text, "renderer should report the specific failed-evidence blocker before validation fallback") + require("RESUME_READY: no" in text, "passed result cannot satisfy expected failure evidence") + + +def test_renderer_failed_expected_with_next_validation_none_stays_unsafe_with_specific_blocker() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = lite_validation_semantic_input( + repo, + status="failed-expected", + reason="known failing auth edge case is the next task", + next_validation="none", + ) + result = run( + [ + sys.executable, + str(RENDER_HELPER), + "--input", + str(input_path), + "--assert-no-active-commands", + "--scan-redaction", + "--run-savepoint-validation", + ], + repo, + ) + require(result.returncode == 2, "expected failure with placeholder next validation should stay unsafe") + text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") + require("validation-next-command-missing" in text, "placeholder next validation should get specific blocker") + require("savepoint-validation-failed" not in text, "renderer should report next-command blocker before validation fallback") + + +def test_renderer_not_run_justified_with_reason_none_stays_unsafe_with_specific_blocker() -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = make_repo_with_modified_app(Path(tmp)) + input_path = lite_validation_semantic_input( + repo, + status="not-run-justified", + reason="none", + next_validation="python scripts/check-savepoint-renderer.py", + ) + result = run( + [ + sys.executable, + str(RENDER_HELPER), + "--input", + str(input_path), + "--assert-no-active-commands", + "--scan-redaction", + "--run-savepoint-validation", + ], + repo, + ) + require(result.returncode == 2, "justified not-run with placeholder reason should stay unsafe") + text = (repo / ".savepoint" / "SAVEPOINT.md").read_text(encoding="utf-8") + require("validation-reason-missing" in text, "placeholder reason should get specific blocker") + require("savepoint-validation-failed" not in text, "renderer should report reason blocker before validation fallback") + + def test_renderer_not_run_justified_without_next_validation_stays_unsafe() -> None: with tempfile.TemporaryDirectory() as tmp: repo = make_repo_with_modified_app(Path(tmp)) @@ -1915,6 +2015,9 @@ def main() -> int: test_renderer_failed_expected_project_validation_can_resume_ready, test_renderer_failed_expected_without_command_fields_stays_unsafe, test_renderer_failed_expected_with_passed_command_stays_unsafe, + test_renderer_failed_expected_with_passed_result_failure_word_summary_stays_unsafe, + test_renderer_failed_expected_with_next_validation_none_stays_unsafe_with_specific_blocker, + test_renderer_not_run_justified_with_reason_none_stays_unsafe_with_specific_blocker, test_renderer_not_run_justified_without_next_validation_stays_unsafe, test_renderer_failed_blocking_project_validation_stays_unsafe, test_renderer_missing_next_action_stays_unsafe, diff --git a/skills/savepoint/scripts/render_savepoint.py b/skills/savepoint/scripts/render_savepoint.py index 927572b..2001673 100644 --- a/skills/savepoint/scripts/render_savepoint.py +++ b/skills/savepoint/scripts/render_savepoint.py @@ -36,6 +36,7 @@ } PROJECT_VALIDATION_NEXT_REQUIRED = {"failed-expected", "not-run-justified"} CLEAR_BLOCKER_VALUES = {"none", "no", "not-needed", "not needed"} +ABSENCE_ONLY_VALUES = {"none", "no", "not-needed", "not needed", "n/a", "na"} VALIDATION_FAILURE_RE = re.compile(r"\b(fail|fails|failed|failing|failure|error|errors|not-run|not run|skipped)\b") EXPECTED_FAILURE_RE = re.compile(r"\b(fail|fails|failed|failing|failure|error|errors)\b") NEGATED_FAILURE_RE = re.compile(r"\b(no|zero|0)\s+(failures?|errors?)\b") @@ -170,18 +171,25 @@ def contains_expected_failure_evidence(text: str) -> bool: return bool(EXPECTED_FAILURE_RE.search(NEGATED_FAILURE_RE.sub("", text.lower()))) +def has_substantive_value(value: Any) -> bool: + text = clean_text(value, fallback="").strip().strip("`").lower().strip(" .") + return bool(text) and text not in ABSENCE_ONLY_VALUES and "<" not in text + + +def project_validation_command_failed(item: Any) -> bool: + if not project_validation_command_complete(item): + return False + result = clean_text(item.get("result"), fallback="").lower() + return contains_expected_failure_evidence(result) + + def project_validation_failed_expected(value: Any) -> bool: if not isinstance(value, list): return False - saw_failure_entry = False for item in value: if not project_validation_command_complete(item): return False - result = clean_text(item.get("result"), fallback="").lower() - summary = clean_text(item.get("summary"), fallback="").lower() - if contains_expected_failure_evidence(f"{result} {summary}"): - saw_failure_entry = True - return saw_failure_entry + return any(project_validation_command_failed(item) for item in value) def normalize_project_validation_status(value: Any) -> str: @@ -261,11 +269,11 @@ def project_validation_recorded(posture: dict[str, Any]) -> bool: posture["commands"] and posture.get("commands_complete") and posture.get("commands_expected_failure") - and posture["reason"] - and posture["next_validation"] + and has_substantive_value(posture["reason"]) + and has_substantive_value(posture["next_validation"]) ) if status == "not-run-justified": - return bool(posture["reason"] and posture["next_validation"]) + return has_substantive_value(posture["reason"]) and has_substantive_value(posture["next_validation"]) return False @@ -406,14 +414,14 @@ def blockers_for(data: dict[str, Any], args: argparse.Namespace, redaction_ok: b blockers.append("validation-command-missing") elif not posture.get("commands_expected_failure"): blockers.append("validation-failed-evidence-missing") - if not posture["reason"]: + if not has_substantive_value(posture["reason"]): blockers.append("validation-reason-missing") - if not posture["next_validation"]: + if not has_substantive_value(posture["next_validation"]): blockers.append("validation-next-command-missing") elif posture["status"] == "not-run-justified": - if not posture["reason"]: + if not has_substantive_value(posture["reason"]): blockers.append("validation-reason-missing") - if not posture["next_validation"]: + if not has_substantive_value(posture["next_validation"]): blockers.append("validation-next-command-missing") elif posture["status"] == "passed" and not posture.get("commands_complete"): blockers.append("validation-command-missing")