diff --git a/.claude/.gitignore b/.claude/.gitignore index 040232cd4..32f4af113 100644 --- a/.claude/.gitignore +++ b/.claude/.gitignore @@ -6,3 +6,5 @@ projects/ plans/ # Ways and CLAUDE.md ARE committed (shared team knowledge) +# ...but raw audit/scan byproducts dropped beside a way are local scratch +ways/**/*-raw.json diff --git a/.claude/ways/kg/documentation/way.md b/.claude/ways/kg/documentation/way.md new file mode 100644 index 000000000..3f3ebefcf --- /dev/null +++ b/.claude/ways/kg/documentation/way.md @@ -0,0 +1,64 @@ +--- +pattern: doc.?catalog|di[aá]taxis|coverage matrix|frontmatter.*id|DD\.NNN|doclint|doc(s)?.*(graph|catalog)|reference page.*generat +files: docs/.*\.md$ +commands: docs/scripts/(doc|doclint) +description: The docs/ tree is one typed graph — catalog frontmatter, DD.NNN.P ids, and the doclint CI gate (ADR-908) +vocabulary: documentation catalog diataxis frontmatter id domain mode doclint coverage matrix typed graph reference generated retired range +scope: agent, subagent +--- +# KG Documentation Catalog Way + +`docs/` is **one typed graph**, not a pile of files — pages and ADRs are nodes, +`related`/`supersedes` are edges. A page's type lives in **frontmatter**, enforced +by a linter; folders are a *view* for human readers. Decision: **ADR-908**. This +repo is the reference implementation that the agent-ways framework generalized into +its canonical documentation model (see ADR-908 → "Upstream"). + +## Catalog frontmatter + +A `docs/` page joins the catalog **only** when it declares these — untagged prose +is ignored (opt-in): + +```yaml +id: 04.001.H # DD.NNN.P (octet-style) +domain: auth # ADR-900 domain key — the shared first octet with ADRs +mode: how-to # Diátaxis: tutorial | how-to | reference | explanation +``` + +- **Identity is `DD.NNN`** — domain band + domain-scoped serial, assigned once, + never reused. The trailing **pole `P`** (`T`/`H`/`R`/`E`) is a *classifier* that + must agree with `mode:`, not part of the key. Re-classifying a page flips only + the pole; the number is untouched. +- **Mode is reader posture, not audience.** The four modes are a closed 2×2 — there + is no fifth. "Operations" is an audience facet carried by `domain`, never a mode. +- Domain bands mirror the ADR series (`01` infra · `02` db · `03` ingest · + `04` auth · `05` query · `06` vocab · `07` ui · `08` ai · `09` meta). + +## Tooling + +| Command | Role | +|---------|------| +| `docs/scripts/doc coverage` / `gaps` / `list` | Diagnostic front-end — domain×mode matrix | +| `docs/scripts/doclint.py --check` | The **test** — CI gate, exit 1 on errors | + +Both are **vendored copies** of the canonical agent-ways tools (copy-not-symlink); +re-vendor from canonical rather than editing in place. They share `adr.yaml`'s +domain config. Keep lint at **0 errors** — it enforces frontmatter validity, +resolvable edges, no supersede cycles, and the vacated-range guard. + +## Gotchas + +- **Generated reference pages** (`reference/{cli,mcp,fuse,schema}.md`) are + overwritten every build — their frontmatter is **emitted by the generator** + (`cli/scripts/*`), never hand-injected, or the next regen wipes it. +- **Retired-range guard is ON** (`legacy: {retired: true}`): no doc/ADR/source may + reference the vacated legacy range (ADR numbers 1–99). The scan honors `.gitignore`, so + gitignored corpora (e.g. `examples/.../claude-ai-history`) and scratch are + skipped. Raw audit/scan byproducts go to `*-raw.json` (gitignored), not committed. +- mkdocs strips unknown frontmatter, so catalog ids never reach readers — they are + maintainer/linter metadata only. + +## See Also + +- `.claude/ways/kg/adr/way.md` — ADR domain numbering (shares `adr.yaml`) +- ADR-908 — the decision; ADR-900 — the domain/numbering system diff --git a/cli/scripts/generate-mcp-docs.mjs b/cli/scripts/generate-mcp-docs.mjs index cd21a664c..11f44c7cd 100644 --- a/cli/scripts/generate-mcp-docs.mjs +++ b/cli/scripts/generate-mcp-docs.mjs @@ -190,7 +190,7 @@ async function main() { // Documentation-catalog frontmatter (ADR-908). Emitted here, not hand-injected, // because this page is overwritten on every docs build. domain=ui (MCP), // mode=reference. - index.push('---\nid: 7.R.04\ndomain: ui\nmode: reference\n---\n'); + index.push('---\nid: 07.007.R\ndomain: ui\nmode: reference\n---\n'); index.push('# MCP Server Tool Reference (Auto-Generated)\n'); index.push('> **Auto-Generated Documentation**'); index.push('> '); diff --git a/cli/scripts/simple-doc-gen.mjs b/cli/scripts/simple-doc-gen.mjs index 19f665154..e1669a2bd 100644 --- a/cli/scripts/simple-doc-gen.mjs +++ b/cli/scripts/simple-doc-gen.mjs @@ -132,7 +132,7 @@ async function main() { // Documentation-catalog frontmatter (ADR-908). Emitted here, not hand-injected, // because this page is overwritten on every docs build. Stripped from GitHub // Pages (mkdocs ignores unknown keys). domain=ui (CLI), mode=reference. - index.push('---\nid: 7.R.03\ndomain: ui\nmode: reference\n---\n'); + index.push('---\nid: 07.004.R\ndomain: ui\nmode: reference\n---\n'); index.push('# CLI Command Reference (Auto-Generated)\n'); index.push('> **Auto-Generated Documentation**'); index.push('> '); diff --git a/docs/architecture/access-workflow/ADR-908-documentation-strategy.md b/docs/architecture/access-workflow/ADR-908-documentation-strategy.md index 94a4d5b4f..23281c71f 100644 --- a/docs/architecture/access-workflow/ADR-908-documentation-strategy.md +++ b/docs/architecture/access-workflow/ADR-908-documentation-strategy.md @@ -32,20 +32,52 @@ page drift apart, and a contributor never knows which one to edit. Audience is a real lens, but it is the wrong axis for the directory structure. We adopt **Diátaxis** as the organizing taxonomy instead. Each page has one -*mode* and serves one reader job: - -| Mode | Folder | The page answers | -|------|--------|------------------| -| Tutorial | `get-started/` | "Walk me through my first success." | -| How-to | `how-to/` | "I have a goal; give me the steps." | -| Reference | `reference/` | "State the facts I look up." | -| Explanation | `explanation/` | "Help me understand why." | -| Operations | `self-host/` | "I run the platform; deploy and keep it healthy." | - -`self-host/` is a fifth, operations-flavored section — Diátaxis-adjacent rather -than canonical — because running the appliance is a distinct, large reader job. -Architecture/ADRs remain a separate tree (the builder-pair audience) reachable -from their index. +*mode* — the reader's epistemic posture — and serves one reader job: + +| Mode | The page answers | +|------|------------------| +| Tutorial | "Walk me through my first success." | +| How-to | "I have a goal; give me the steps." | +| Reference | "State the facts I look up." | +| Explanation | "Help me understand why." | + +These four are not a list to extend — they are a closed 2×2 derived from two +orthogonal axes (*action* vs *cognition*, *acquisition* vs *application*). +Tutorial and How-to are action; Reference and Explanation are cognition. +Tutorial and Reference serve acquisition-adjacent and lookup needs; How-to and +Explanation serve the working reader. There is no fifth quadrant, because there +is no third axis. Mode is therefore the *highest* level of the catalog: it +classifies every page by reader posture alone, independent of subject. + +**Folders are reader destinations, not modes.** The default site layout maps +one mode to one folder — `get-started/` (tutorial), `how-to/`, `reference/`, +`explanation/` — but a folder may hold several modes when it serves a coherent +*audience*. `self-host/` is exactly that: the operator's home, holding a +tutorial (`quick-start`), how-tos (`upgrading`, `tls`, `backup-restore`), +reference (`configuration`), and explanation (`security`, `production`). +"Operations" is an **audience facet**, not a mode — it answers *who reads this* +and *what subsystem* (deployment, networking, backup), which the catalog already +captures on the **domain** axis (`infra`, with the security model under `auth`). +It does not belong in the mode slot. Architecture/ADRs likewise remain a +separate tree (the builder-pair audience) reachable from their index. + +> **Correction (2026-06-16).** The 2026-06-15 amendment first defined a fifth +> mode, `operations` (letter `O`), homed in `self-host/` — "Diátaxis-adjacent." +> That was a category error: it placed an *audience* label on the *mode* axis, +> which is reserved for reader posture. Operations content is never outside the +> four modes (it decomposes into a tutorial, how-tos, reference, and +> explanation), and its distinct-job-ness lives on the **domain** axis, which +> the `self-host/` pages already encode correctly (`infra`, plus `auth` for the +> security model). The mode vocabulary is now the canonical four (`T/H/R/E`); +> `O` is retired. The same correction reshapes the catalog id from +> `..` to the octet-style `DD.NNN.P` defined below, so +> that mode (the mutable part) trails the identity instead of sitting inside it. +> **Migration:** re-tag the 11 `self-host/` pages to their true mode, drop +> `operations` from `doclint.py`'s `MODE_LETTER`, renumber every catalog id to +> `DD.NNN.P` with domain-scoped serials, and update the four reference-page +> generators (`cli`, `mcp`, `fuse`, `schema`) to emit the new ids. The +> `self-host/` folder and its nav are unaffected — it remains a multi-mode, +> audience-coherent destination. The consolidation that applied this taxonomy is recorded in `specs/documentation-consolidation-spec.md` (an intermediary spec, not an ADR): @@ -62,21 +94,41 @@ part number that is invisible on the showroom floor: ```yaml --- -id: 4.R.01 # .. -domain: auth # ADR-900 domain key — the shared "first octet" -mode: reference # Diátaxis: tutorial | how-to | reference | explanation | operations +id: 04.001.H #
.. +domain: auth # ADR-900 domain key — the shared "first octet" +mode: how-to # Diátaxis: tutorial | how-to | reference | explanation --- ``` -- **domain** reuses the ADR-900 domain bands (1 infra · 2 db · 3 ingest · - 4 auth · 5 query · 6 vocab · 7 ui · 8 ai · 9 meta). A doc and the ADRs that - govern it share the leading digit, so "everything about auth" spans both trees. -- **mode** is the Diátaxis mode (letter `T` / `H` / `R` / `E` / `O` in the ID). -- **serial** is a 2-digit sequence within `(domain, mode)`. +The id is a fixed-width, octet-style number — `DD.NNN.P`: + +- **domain band (`DD`)** — two digits, the ADR-900 band (`01` infra · `02` db · + `03` ingest · `04` auth · `05` query · `06` vocab · `07` ui · `08` ai · + `09` meta). A doc and the ADRs that govern it share the band, so "everything + about auth" spans both trees. +- **serial (`NNN`)** — a three-digit sequence **scoped to the domain**, not to + `(domain, mode)`. Assigned once at creation and never reused. `DD.NNN` + together is the page's immutable identity. +- **pole (`P`)** — the Diátaxis mode as a single trailing letter + (`T`/`H`/`R`/`E`). It is a *classifier*, not part of the identity; it must + agree with the `mode:` field, and the linter enforces that. + +**Why the pole trails and the serial is domain-scoped.** The original scheme was +`..` with the serial scoped to `(domain, mode)` — which +baked a *mutable* attribute into the *middle of the identity*. Re-classifying a +page then forced its part number to change: the 2026-06-16 correction below +re-shelved eleven `self-host/` pages and, under the old scheme, every one of +their ids would have churned. A handle that mutates when you re-shelve the part +is not a handle. Under `DD.NNN.P` the identity is `DD.NNN`; re-classifying flips +only the trailing pole (`…​.H` → `…​.E`) and the number is untouched. The pole +stays visible so the id self-documents, but it is a *view* of the `mode:` field, +never the key. Domain-scoped serials mean the linter can treat any id collision +as a real clash (`check_duplicate_ids`), since two pages in a domain can never +legitimately share a serial. These keys are **management metadata, not display**. mkdocs ignores unknown frontmatter keys, so the catalog is stripped from GitHub Pages — readers never -see `4.R.01`, maintainers and the linter do. Diátaxis stays a *principle* for +see `04.001.H`, maintainers and the linter do. Diátaxis stays a *principle* for readers; the catalog is the *index* for maintainers. The two are facets of the same page, not competing schemes. @@ -101,6 +153,20 @@ references. It lints for: Linting is **enforced on `docs/`** (errors fail CI) and **warns on ADRs** until the ADR frontmatter sweep lands (tracked separately as the #520 fast-follow). +### Upstream: generalized into the agent-ways framework + +This catalog — the typed doc+ADR graph, the `DD.NNN.P` id, the `doc`/`doclint` +tooling — was generalized out of this repo into the agent-ways framework as its +canonical *documentation model* (the framework's own `ADR-302`, unrelated to this +project's `ADR-302`). This repo is that model's **reference implementation**, and +`docs/scripts/{doc,doclint.py}` are **vendored copies** of the canonical tools +(copy-not-symlink, refreshed from canonical). That is why the vendored sources +cite `ADR-302` (the upstream model) and credit `ADR-908`/`ADR-900` (this repo's +local decisions) — the two numbers name two different things and both are correct. +The framework-side reference is prose only: do **not** add `ADR-302` to this ADR's +`related:` edges, since here that id resolves to the multimodal-ingestion ADR and +would be a false graph edge. + ### Numbering freeze Per ADR-900, legacy ADRs (1–99) are frozen at their numbers — we do not diff --git a/docs/architecture/adr.yaml b/docs/architecture/adr.yaml index 9ba1379f2..042468998 100644 --- a/docs/architecture/adr.yaml +++ b/docs/architecture/adr.yaml @@ -82,6 +82,7 @@ defaults: legacy: range: [1, 99] label: "Retired (renumbered into domains 2026-06-15)" + retired: true # vacated range — doclint fails the build on any reference into 1–99 # Viewer command for `adr view` # Use {file} as placeholder for the file path diff --git a/docs/contributing/docstring-coverage.md b/docs/contributing/docstring-coverage.md index 229f6ae55..a9db347fa 100644 --- a/docs/contributing/docstring-coverage.md +++ b/docs/contributing/docstring-coverage.md @@ -1,5 +1,5 @@ --- -id: 9.H.01 +id: 09.001.H domain: meta mode: how-to --- diff --git a/docs/contributing/test-suite.md b/docs/contributing/test-suite.md index a09fc396c..d0c09d38e 100644 --- a/docs/contributing/test-suite.md +++ b/docs/contributing/test-suite.md @@ -1,5 +1,5 @@ --- -id: 9.R.01 +id: 09.002.R domain: meta mode: reference --- diff --git a/docs/contributing/voice.md b/docs/contributing/voice.md index 0588fe7e6..853e4f074 100644 --- a/docs/contributing/voice.md +++ b/docs/contributing/voice.md @@ -1,5 +1,5 @@ --- -id: 9.R.02 +id: 09.003.R domain: meta mode: reference --- diff --git a/docs/explanation/computed-evidence.md b/docs/explanation/computed-evidence.md index 275bb7843..23f1f99f2 100644 --- a/docs/explanation/computed-evidence.md +++ b/docs/explanation/computed-evidence.md @@ -1,5 +1,5 @@ --- -id: 6.E.01 +id: 06.001.E domain: vocab mode: explanation --- diff --git a/docs/explanation/embedding-landscape.md b/docs/explanation/embedding-landscape.md index eec465390..318985dcc 100644 --- a/docs/explanation/embedding-landscape.md +++ b/docs/explanation/embedding-landscape.md @@ -1,5 +1,5 @@ --- -id: 8.E.01 +id: 08.001.E domain: ai mode: explanation --- diff --git a/docs/explanation/graph-program.md b/docs/explanation/graph-program.md index c2cf815c4..c9772da1b 100644 --- a/docs/explanation/graph-program.md +++ b/docs/explanation/graph-program.md @@ -1,5 +1,5 @@ --- -id: 5.E.01 +id: 05.001.E domain: query mode: explanation --- diff --git a/docs/explanation/grounding.md b/docs/explanation/grounding.md index 04938b1f7..40895307b 100644 --- a/docs/explanation/grounding.md +++ b/docs/explanation/grounding.md @@ -1,5 +1,5 @@ --- -id: 6.E.02 +id: 06.002.E domain: vocab mode: explanation --- diff --git a/docs/explanation/how-it-works.md b/docs/explanation/how-it-works.md index 53d3ab345..664403589 100644 --- a/docs/explanation/how-it-works.md +++ b/docs/explanation/how-it-works.md @@ -1,5 +1,5 @@ --- -id: 3.E.01 +id: 03.001.E domain: ingest mode: explanation --- diff --git a/docs/explanation/recursive-upsert.md b/docs/explanation/recursive-upsert.md index 222a427b6..001056adc 100644 --- a/docs/explanation/recursive-upsert.md +++ b/docs/explanation/recursive-upsert.md @@ -1,5 +1,5 @@ --- -id: 3.E.02 +id: 03.002.E domain: ingest mode: explanation --- diff --git a/docs/explanation/storage-and-freshness.md b/docs/explanation/storage-and-freshness.md index 106418bd4..1b06541c7 100644 --- a/docs/explanation/storage-and-freshness.md +++ b/docs/explanation/storage-and-freshness.md @@ -1,5 +1,5 @@ --- -id: 2.E.01 +id: 02.001.E domain: db mode: explanation --- diff --git a/docs/explanation/vocabulary-lifecycle.md b/docs/explanation/vocabulary-lifecycle.md index b73be765e..568602ecd 100644 --- a/docs/explanation/vocabulary-lifecycle.md +++ b/docs/explanation/vocabulary-lifecycle.md @@ -1,5 +1,5 @@ --- -id: 6.E.03 +id: 06.003.E domain: vocab mode: explanation --- diff --git a/docs/explanation/worker-lanes.md b/docs/explanation/worker-lanes.md index 9674d7346..c2333377a 100644 --- a/docs/explanation/worker-lanes.md +++ b/docs/explanation/worker-lanes.md @@ -1,5 +1,5 @@ --- -id: 1.E.01 +id: 01.001.E domain: infra mode: explanation --- diff --git a/docs/get-started/first-graph.md b/docs/get-started/first-graph.md index c839eff28..f745e9e30 100644 --- a/docs/get-started/first-graph.md +++ b/docs/get-started/first-graph.md @@ -1,5 +1,5 @@ --- -id: 3.T.01 +id: 03.003.T domain: ingest mode: tutorial --- diff --git a/docs/get-started/first-query.md b/docs/get-started/first-query.md index 1fd4322b5..fde655dde 100644 --- a/docs/get-started/first-query.md +++ b/docs/get-started/first-query.md @@ -1,5 +1,5 @@ --- -id: 5.T.01 +id: 05.002.T domain: query mode: tutorial --- diff --git a/docs/get-started/github-history.md b/docs/get-started/github-history.md index c28a7459a..cc6fa737b 100644 --- a/docs/get-started/github-history.md +++ b/docs/get-started/github-history.md @@ -1,5 +1,5 @@ --- -id: 3.T.02 +id: 03.004.T domain: ingest mode: tutorial --- diff --git a/docs/get-started/mcp-quickstart.md b/docs/get-started/mcp-quickstart.md index 790e2203b..19aae5698 100644 --- a/docs/get-started/mcp-quickstart.md +++ b/docs/get-started/mcp-quickstart.md @@ -1,5 +1,5 @@ --- -id: 7.T.01 +id: 07.001.T domain: ui mode: tutorial --- diff --git a/docs/get-started/what-and-why.md b/docs/get-started/what-and-why.md index 2c47ee39b..840a18e2f 100644 --- a/docs/get-started/what-and-why.md +++ b/docs/get-started/what-and-why.md @@ -1,5 +1,5 @@ --- -id: 9.E.01 +id: 09.004.E domain: meta mode: explanation --- diff --git a/docs/how-to/ai-providers.md b/docs/how-to/ai-providers.md index 63ed8d585..e93c7385e 100644 --- a/docs/how-to/ai-providers.md +++ b/docs/how-to/ai-providers.md @@ -1,5 +1,5 @@ --- -id: 8.H.01 +id: 08.002.H domain: ai mode: how-to --- diff --git a/docs/how-to/embeddings.md b/docs/how-to/embeddings.md index 1f8da240a..fcc3f8d61 100644 --- a/docs/how-to/embeddings.md +++ b/docs/how-to/embeddings.md @@ -1,5 +1,5 @@ --- -id: 8.H.02 +id: 08.003.H domain: ai mode: how-to --- diff --git a/docs/how-to/epistemic-status.md b/docs/how-to/epistemic-status.md index 49c66e2d5..9716b26cd 100644 --- a/docs/how-to/epistemic-status.md +++ b/docs/how-to/epistemic-status.md @@ -1,5 +1,5 @@ --- -id: 6.H.01 +id: 06.004.H domain: vocab mode: how-to --- diff --git a/docs/how-to/extraction-quality.md b/docs/how-to/extraction-quality.md index 91e405130..060b4bb30 100644 --- a/docs/how-to/extraction-quality.md +++ b/docs/how-to/extraction-quality.md @@ -1,5 +1,5 @@ --- -id: 8.H.03 +id: 08.004.H domain: ai mode: how-to --- diff --git a/docs/how-to/fuse.md b/docs/how-to/fuse.md index 51f00d99c..7656aad9d 100644 --- a/docs/how-to/fuse.md +++ b/docs/how-to/fuse.md @@ -1,5 +1,5 @@ --- -id: 7.H.01 +id: 07.002.H domain: ui mode: how-to --- diff --git a/docs/how-to/ingest.md b/docs/how-to/ingest.md index 799a8eca1..2aba340c1 100644 --- a/docs/how-to/ingest.md +++ b/docs/how-to/ingest.md @@ -1,5 +1,5 @@ --- -id: 3.H.01 +id: 03.005.H domain: ingest mode: how-to --- diff --git a/docs/how-to/polarity-axis.md b/docs/how-to/polarity-axis.md index 6f128d515..741066cd0 100644 --- a/docs/how-to/polarity-axis.md +++ b/docs/how-to/polarity-axis.md @@ -1,5 +1,5 @@ --- -id: 8.H.04 +id: 08.005.H domain: ai mode: how-to --- diff --git a/docs/how-to/query.md b/docs/how-to/query.md index 2e17263e1..a216398ac 100644 --- a/docs/how-to/query.md +++ b/docs/how-to/query.md @@ -1,5 +1,5 @@ --- -id: 5.H.01 +id: 05.003.H domain: query mode: how-to --- diff --git a/docs/how-to/vocabulary.md b/docs/how-to/vocabulary.md index 6f7838588..b1b366133 100644 --- a/docs/how-to/vocabulary.md +++ b/docs/how-to/vocabulary.md @@ -1,5 +1,5 @@ --- -id: 6.H.02 +id: 06.005.H domain: vocab mode: how-to --- diff --git a/docs/index.md b/docs/index.md index ce67ecc56..48862faa5 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,5 +1,5 @@ --- -id: 9.E.02 +id: 09.005.E domain: meta mode: explanation --- diff --git a/docs/reference/api.md b/docs/reference/api.md index bd4691c97..f5a2be07f 100644 --- a/docs/reference/api.md +++ b/docs/reference/api.md @@ -1,5 +1,5 @@ --- -id: 7.R.01 +id: 07.003.R domain: ui mode: reference --- diff --git a/docs/reference/backup-object-spec.md b/docs/reference/backup-object-spec.md index 2e06111c9..129ea8c72 100644 --- a/docs/reference/backup-object-spec.md +++ b/docs/reference/backup-object-spec.md @@ -1,5 +1,5 @@ --- -id: 1.R.01 +id: 01.002.R domain: infra mode: reference --- diff --git a/docs/reference/cli.md b/docs/reference/cli.md index dd4dbaf68..529fd5f6d 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -1,5 +1,5 @@ --- -id: 7.R.03 +id: 07.004.R domain: ui mode: reference --- diff --git a/docs/reference/cypher.md b/docs/reference/cypher.md index a95f4af7f..4a11e4049 100644 --- a/docs/reference/cypher.md +++ b/docs/reference/cypher.md @@ -1,5 +1,5 @@ --- -id: 5.R.01 +id: 05.004.R domain: query mode: reference --- diff --git a/docs/reference/fuse.md b/docs/reference/fuse.md index 882d1405c..b9f3cf884 100644 --- a/docs/reference/fuse.md +++ b/docs/reference/fuse.md @@ -1,5 +1,5 @@ --- -id: 7.R.05 +id: 07.005.R domain: ui mode: reference --- diff --git a/docs/reference/graph-program-security.md b/docs/reference/graph-program-security.md index 9606e42f8..1c657f77b 100644 --- a/docs/reference/graph-program-security.md +++ b/docs/reference/graph-program-security.md @@ -1,5 +1,5 @@ --- -id: 5.R.02 +id: 05.005.R domain: query mode: reference --- diff --git a/docs/reference/graph-program-spec.md b/docs/reference/graph-program-spec.md index 8cd9a99ff..d70242eff 100644 --- a/docs/reference/graph-program-spec.md +++ b/docs/reference/graph-program-spec.md @@ -1,5 +1,5 @@ --- -id: 5.R.03 +id: 05.006.R domain: query mode: reference --- diff --git a/docs/reference/graph-program-validation.md b/docs/reference/graph-program-validation.md index fa797cd31..a68291340 100644 --- a/docs/reference/graph-program-validation.md +++ b/docs/reference/graph-program-validation.md @@ -1,5 +1,5 @@ --- -id: 5.R.04 +id: 05.007.R domain: query mode: reference --- diff --git a/docs/reference/mcp-session-context.md b/docs/reference/mcp-session-context.md index 8a6bfefa6..8b16b37df 100644 --- a/docs/reference/mcp-session-context.md +++ b/docs/reference/mcp-session-context.md @@ -1,5 +1,5 @@ --- -id: 7.R.02 +id: 07.006.R domain: ui mode: reference --- diff --git a/docs/reference/mcp.md b/docs/reference/mcp.md index 395e5a3a3..0caf1b7c2 100644 --- a/docs/reference/mcp.md +++ b/docs/reference/mcp.md @@ -1,5 +1,5 @@ --- -id: 7.R.04 +id: 07.007.R domain: ui mode: reference --- diff --git a/docs/reference/schema.md b/docs/reference/schema.md index 095d9daf6..c05bea611 100644 --- a/docs/reference/schema.md +++ b/docs/reference/schema.md @@ -1,5 +1,5 @@ --- -id: 2.R.01 +id: 02.002.R domain: db mode: reference --- diff --git a/docs/scripts/doc b/docs/scripts/doc index fad297ff7..1dda31b2b 100755 --- a/docs/scripts/doc +++ b/docs/scripts/doc @@ -2,12 +2,13 @@ """ doc — a librarian for the documentation catalog. -Presents the catalog defined by ADR-087 (frontmatter `id`/`domain`/`mode`) over -the ADR-900 domain bands: a domain×mode coverage matrix, page listings, and a -gap report that weighs doc coverage against ADR count per domain. +Presents the catalog (frontmatter `id`/`domain`/`mode`, ADR-908) over the shared +domain bands: a domain×mode coverage matrix, page listings, and a gap report that +weighs doc coverage against ADR count per domain. -This is the *diagnostic* front-end. The *test* is `doclint.py` (the CI linter); +This is the *diagnostic* front-end. The *test* is `doclint` (the CI linter); `doc lint` invokes it. Sibling of the `adr` tool — same `adr.yaml` config. +Generalized from the knowledge-graph-system reference implementation. Usage: doc # coverage matrix (default) @@ -43,14 +44,20 @@ except ImportError: # ============================================================================ def _cfg() -> dict: - """Load adr.yaml (domains, ranges, names).""" - with open(doclint.ADR_YAML) as f: - return yaml.safe_load(f) + return doclint.load_config() + + +def _digits() -> dict: + return doclint.domain_digits(_cfg()) def _title(path: Path) -> str: """Return a page's first H1 (its display title), or '(untitled)'.""" - for line in path.read_text().split("\n"): + try: + text = path.read_text() + except (OSError, UnicodeDecodeError): + return "(unreadable)" + for line in text.split("\n"): m = re.match(r"^#\s+(.+)", line) if m: return m.group(1).strip() @@ -58,11 +65,17 @@ def _title(path: Path) -> str: def load_pages() -> list: - """Parse every catalog page into a doclint Node, annotated with its title.""" - digits = doclint.load_domain_digits() + """Parse every catalog page into a doclint Node, annotated with its title. + + Honors doclint's opt-in membership: non-catalog prose (no id/domain/mode) + yields None and is skipped. + """ + digits = _digits() pages = [] for p in doclint.iter_catalog_pages(): node = doclint.build_doc_node(p, digits) + if node is None: + continue node.title = _title(p) pages.append(node) return pages @@ -89,11 +102,7 @@ def adr_counts() -> Counter: # ============================================================================ def render_table(headers, rows, aligns=None, rule_before_last=False) -> str: - """Render a box-drawn grid (Unicode borders). - - aligns is a per-column list of 'l'/'r'; rule_before_last draws a separator - above the final row (for a totals line). - """ + """Render a box-drawn grid (Unicode borders).""" cols = len(headers) aligns = aligns or ["l"] * cols widths = [ @@ -125,14 +134,13 @@ def render_table(headers, rows, aligns=None, rule_before_last=False) -> str: # ============================================================================ def cmd_coverage(args) -> int: - """Render the domain×mode coverage matrix with doc and ADR totals.""" cfg = _cfg() pages = load_pages() adrs = adr_counts() - modes = list(doclint.MODE_LETTER) # tutorial..operations, in order + modes = list(doclint.MODE_LETTER) letters = [doclint.MODE_LETTER[m] for m in modes] grid = Counter((n.domain, n.mode) for n in pages if n.domain and n.mode) - domains = list(cfg["domains"]) # adr.yaml order = octet order + domains = list(cfg["domains"]) headers = ["domain"] + letters + ["docs", "ADRs"] rows = [] @@ -152,7 +160,6 @@ def cmd_coverage(args) -> int: def cmd_list(args) -> int: - """List catalog pages (optionally filtered by domain/mode).""" pages = load_pages() if args.domain: pages = [p for p in pages if p.domain == args.domain] @@ -162,13 +169,13 @@ def cmd_list(args) -> int: keymap = { "id": lambda p: (p.key,), "path": lambda p: (p.rel,), - "title": lambda p: (p.title.lower(),), + "title": lambda p: (getattr(p, "title", "").lower(),), } pages.sort(key=keymap[args.sort]) headers = ["id", "domain", "mode", "title"] rows = [[p.key or "?", p.domain or "?", p.mode or "?", - p.title + (" !!" if p.issues else "")] for p in pages] + getattr(p, "title", "") + (" !!" if p.issues else "")] for p in pages] if rows: print() print(render_table(headers, rows)) @@ -178,7 +185,6 @@ def cmd_list(args) -> int: def cmd_gaps(args) -> int: - """Report empty (domain×mode) cells and domains where docs lag ADRs.""" cfg = _cfg() pages = load_pages() adrs = adr_counts() @@ -191,14 +197,13 @@ def cmd_gaps(args) -> int: for dom in domains: missing = [doclint.MODE_LETTER[m] for m in modes if not grid.get((dom, m))] if missing: - print(f" {dom:9} no {', '.join(missing)}") + print(f" {dom:12} no {', '.join(missing)}") print("\nDoc/ADR imbalance (domains with many decisions, few docs):") - rows = sorted(domains, key=lambda d: (per_domain.get(d, 0) - adrs.get(d, 0))) - for dom in rows: + for dom in sorted(domains, key=lambda d: (per_domain.get(d, 0) - adrs.get(d, 0))): nd, na = per_domain.get(dom, 0), adrs.get(dom, 0) if na and nd <= max(2, na // 4): - print(f" {dom:9} {na:>3} ADRs → {nd:>2} docs") + print(f" {dom:12} {na:>3} ADRs → {nd:>2} docs") issues = [p for p in pages if p.issues] if issues: @@ -209,17 +214,16 @@ def cmd_gaps(args) -> int: def cmd_domains(args) -> int: - """Print the domain bands (mirrors `adr domains`, catalog-flavored).""" cfg = _cfg() print("\nCatalog domains (shared first octet with ADRs)\n") for key, d in cfg["domains"].items(): lo, hi = d["range"] - print(f" {lo // 100} {key:8} {lo}-{hi} {d['name']}") + print(f" {lo // 100} {key:12} {lo}-{hi} {d['name']}") return 0 def cmd_lint(args) -> int: - """Delegate to doclint.py (the catalog test).""" + """Delegate to the doclint linter (the catalog test).""" cmd = [sys.executable, str(HERE / "doclint.py"), "--check"] if args.strict: cmd.append("--enforce-adrs") diff --git a/docs/scripts/doclint.py b/docs/scripts/doclint.py index b3750ef1b..fc7f47d7d 100755 --- a/docs/scripts/doclint.py +++ b/docs/scripts/doclint.py @@ -2,35 +2,46 @@ """ doclint — graph-aware linter for the documentation catalog. -Extends the ADR linter's approach (`docs/scripts/adr lint`) from ADRs to the -whole `docs/` tree, treating docs and ADRs as a single *decision graph*: nodes -are records, edges are `related`/`supersedes` references. See ADR-908 -(documentation catalog) and ADR-900 (numbering domain system). +Extends the ADR linter (`docs/scripts/adr lint`) from ADRs to the whole `docs/` +tree, treating docs and ADRs as a single *decision graph*: nodes are records, +edges are `related`/`supersedes` references. See ADR-302 (unified documentation +model); generalized from the knowledge-graph-system reference implementation +(its ADR-908/ADR-900). -It checks three things: +It checks: 1. Frontmatter validity — every catalog page carries a well-formed - `id`/`domain`/`mode`, and the ID's domain digit + mode letter agree with the + `id`/`domain`/`mode`; the id's domain band and mode pole agree with the `domain`/`mode` fields. Domains come from `adr.yaml` (single source of truth). 2. Reference graph — every `related`/`supersedes` target resolves (no dangling - reference), no supersede cycles, and no catalog page is orphaned from the - mkdocs nav. + reference), no supersede cycles, and (when a site nav exists) no catalog page + is orphaned from it. 3. Coverage matrix — which `(domain, mode)` cells hold pages, surfacing gaps. -Catalog pages (docs/ outside architecture/) are ENFORCED: their issues are -errors. ADRs (docs/architecture/) WARN by default; `--enforce-adrs` promotes -them to errors once the ADR frontmatter sweep lands. +Portability (this is the canonical, multi-repo tool, not a single project's copy): + +- **Catalog membership is opt-in.** A `docs/` page is a catalog node only if it + declares catalog frontmatter (`id`/`domain`/`mode`). Un-declared prose is + ignored, so a repo can adopt the catalog gradually instead of all-at-once. +- **mkdocs nav is optional.** No `mkdocs.yml` → the orphan check is skipped. +- **The retired-range guard is opt-in.** Set `legacy: {retired: true}` in + `adr.yaml` to fail on references into a vacated pre-domain range (the + ADR-900 move). Repos that still use their legacy range leave it off (default). +- **Project root is discovered** (git, then walking up for + `docs/architecture/adr.yaml`), so the script works whether it is a symlink + into the ways corpus (agent-ways dogfooding) or a vendored copy in a project. Usage: - doclint.py [--check] [--enforce-adrs] [--quiet] + doclint [--check] [--enforce-adrs] [--quiet] --check exit 1 if any errors (CI mode) --enforce-adrs treat ADR issues as errors, not warnings - --quiet suppress the coverage matrix and per-file OK lines + --quiet suppress the coverage matrix """ import argparse import re +import subprocess import sys from dataclasses import dataclass, field from pathlib import Path @@ -41,53 +52,76 @@ print("Error: PyYAML is required (pip install pyyaml).", file=sys.stderr) sys.exit(1) -DOCS = Path(__file__).resolve().parent.parent # docs/ -REPO = DOCS.parent + +# ============================================================================ +# Project root discovery (works under symlink or vendored copy) +# ============================================================================ + +def get_project_root() -> Path: + """Find the project root via git, then by walking up to docs/architecture/.""" + try: + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, text=True, timeout=5) + if result.returncode == 0: + root = Path(result.stdout.strip()) + if (root / "docs" / "architecture" / "adr.yaml").exists(): + return root + except (subprocess.TimeoutExpired, FileNotFoundError): + pass + candidate = Path.cwd() + for _ in range(10): + if (candidate / "docs" / "architecture" / "adr.yaml").exists(): + return candidate + if candidate.parent == candidate: + break + candidate = candidate.parent + return Path.cwd() + + +REPO = get_project_root() +DOCS = REPO / "docs" ADR_YAML = DOCS / "architecture" / "adr.yaml" MKDOCS_YML = REPO / "mkdocs.yml" -# Retired-range guard (ADR-900): scan these trees for references into the -# retired pre-domain number range and fail the build. -RETIRED_SCAN_DIRS = ["docs", "specs", "api", "cli", "fuse", "schema", - "web/src", "operator", "scripts"] +MODE_LETTER = {"tutorial": "T", "how-to": "H", "reference": "R", "explanation": "E"} + +ID_RE = re.compile(r"^(\d{2})\.(\d{3})\.([A-Z])$") +ADR_REF_RE = re.compile(r"^ADR-(\d+(?:\.\d+)?)$") +ADR_FILE_RE = re.compile(r"^ADR-(\d+(?:\.\d+)?)") +WIKILINK_RE = re.compile(r"^\[\[(.+?)\]\]$") + +# Catalog pages live in docs/ but not these subtrees. +SKIP_DIR_PARTS = {"architecture", "scripts"} + +# Retired-range scan (opt-in via adr.yaml legacy.retired). RETIRED_SCAN_EXTS = {".md", ".py", ".ts", ".tsx", ".js", ".mjs", ".rs", ".sh", ".yml", ".yaml", ".json"} RETIRED_SKIP_PARTS = {"node_modules", "dist", "site", ".git"} -# Files that define the retired range and may legitimately name it. RETIRED_EXEMPT_NAMES = {"adr.yaml"} RETIRED_EXEMPT_PREFIXES = ("ADR-900-",) RETIRED_ALLOW_MARKER = "doclint-allow-retired" ADR_ANYREF_RE = re.compile(r"\bADR-0*(\d+)(\.\d+)?\b") -MODE_LETTER = { - "tutorial": "T", "how-to": "H", "reference": "R", - "explanation": "E", "operations": "O", -} -LETTER_MODE = {v: k for k, v in MODE_LETTER.items()} - -ID_RE = re.compile(r"^(\d)\.([A-Z])\.(\d{2})$") -ADR_REF_RE = re.compile(r"^ADR-(\d+(?:\.\d+)?)$") -ADR_FILE_RE = re.compile(r"^ADR-(\d+(?:\.\d+)?)") - -# Catalog pages live in docs/ but not these subtrees. Generated per-item stubs -# under reference/{cli,mcp,fuse}/ are excluded from the published site -# (mkdocs.yml exclude_docs) and are not catalog pages. -SKIP_DIR_PARTS = {"architecture", "security"} -SKIP_PREFIXES = ("reference/cli/", "reference/mcp/", "reference/fuse/") - # ============================================================================ # Config # ============================================================================ -def load_domain_digits() -> dict: - """Map domain key -> leading digit, derived from adr.yaml ranges.""" +def load_config() -> dict: + if not ADR_YAML.exists(): + print(f"Error: {ADR_YAML} not found (run from a repo with ADR tooling).", + file=sys.stderr) + sys.exit(1) with open(ADR_YAML) as f: - cfg = yaml.safe_load(f) + return yaml.safe_load(f) or {} + + +def domain_digits(cfg: dict) -> dict: + """Map domain key -> leading digit, derived from adr.yaml ranges.""" digits = {} for key, dcfg in cfg.get("domains", {}).items(): - lo = dcfg["range"][0] - digits[key] = lo // 100 + digits[key] = dcfg["range"][0] // 100 return digits @@ -97,20 +131,21 @@ def load_domain_digits() -> dict: @dataclass class Node: - """A record in the decision graph (a catalog page or an ADR).""" kind: str # 'doc' | 'adr' - key: str # catalog id ('4.O.01') or 'ADR-411' + key: str # catalog id ('04.001.H') or 'ADR-411' path: Path - rel: str # display path, relative to repo root + rel: str domain: str = None mode: str = None - refs: list = field(default_factory=list) # (field_name, target_string) + refs: list = field(default_factory=list) # (field_name, target_string) issues: list = field(default_factory=list) # (severity, message) def parse_frontmatter(path: Path) -> dict: - """Return the YAML frontmatter as a dict, or {} if absent/empty.""" - text = path.read_text() + try: + text = path.read_text() + except (OSError, UnicodeDecodeError): + return {} # skip unreadable/non-UTF-8 files rather than abort the run if not text.startswith("---"): return {} lines = text.split("\n") @@ -128,52 +163,58 @@ def parse_frontmatter(path: Path) -> dict: def _as_ref_list(value) -> list: - """Coerce a related/supersedes frontmatter value into a list of strings.""" + """Coerce a related/supersedes value into target strings, stripping wikilinks. + + ADR-302 edges are Obsidian `[[wikilinks]]`; the inside is the catalog id or + ADR reference. The aliased form `[[target|alias]]` keeps only `target`. Bare + strings are accepted too (pre-wikilink ADRs). + """ if not value: return [] - if isinstance(value, str): - return [value] - if isinstance(value, list): - return [str(v) for v in value] - return [] + items = [value] if isinstance(value, str) else value if isinstance(value, list) else [] + out = [] + for v in items: + s = str(v).strip() + m = WIKILINK_RE.match(s) + if m: + s = m.group(1).split("|", 1)[0].strip() # [[target|alias]] -> target + out.append(s) + return out def iter_catalog_pages(): - """Yield catalog page paths (docs/ outside architecture/ and security/).""" + """Yield candidate catalog page paths (docs/ outside skipped subtrees).""" + if not DOCS.exists(): + return for p in sorted(DOCS.rglob("*.md")): - parts = set(p.relative_to(DOCS).parts) - if parts & SKIP_DIR_PARTS: - continue - rel = str(p.relative_to(DOCS)) - if rel.startswith(SKIP_PREFIXES): + if set(p.relative_to(DOCS).parts) & SKIP_DIR_PARTS: continue yield p def iter_adrs(): - """Yield ADR file paths under docs/architecture/.""" - yield from sorted((DOCS / "architecture").rglob("ADR-*.md")) + arch = DOCS / "architecture" + if arch.exists(): + yield from sorted(arch.rglob("ADR-*.md")) -# ============================================================================ -# Building the graph -# ============================================================================ +def build_doc_node(path: Path, digits: dict): + """Parse a docs/ page into a catalog Node, or None if it is not a catalog page. -def build_doc_node(path: Path, digits: dict) -> Node: - """Parse a catalog page and validate its frontmatter into a Node.""" - rel = str(path.relative_to(DOCS.parent)) + Opt-in membership: a page is only a catalog node if it declares at least one + of id/domain/mode. That lets a repo adopt the catalog gradually. + """ fm = parse_frontmatter(path) - node = Node(kind="doc", key=fm.get("id") or f"?{rel}", path=path, rel=rel) - cid, domain, mode = fm.get("id"), fm.get("domain"), fm.get("mode") - node.domain, node.mode = domain, mode + if not (cid or domain or mode): + return None # ordinary prose, not (yet) a catalog page - if not fm: - node.issues.append(("error", "missing frontmatter (need id/domain/mode)")) - return node + rel = str(path.relative_to(REPO)) + node = Node(kind="doc", key=cid or f"?{rel}", path=path, rel=rel, + domain=domain, mode=mode) - for fname in ("id", "domain", "mode"): - if not fm.get(fname): + for fname, val in (("id", cid), ("domain", domain), ("mode", mode)): + if not val: node.issues.append(("error", f"missing frontmatter key: {fname}")) if domain and domain not in digits: @@ -185,17 +226,17 @@ def build_doc_node(path: Path, digits: dict) -> Node: if cid: m = ID_RE.match(cid) if not m: - node.issues.append(("error", f"malformed id: {cid} (want ..)")) + node.issues.append(("error", f"malformed id: {cid} (want
..)")) else: - digit, letter, _ = m.group(1), m.group(2), m.group(3) - if domain in digits and int(digit) != digits[domain]: + band, _serial, letter = m.groups() + if domain in digits and int(band) != digits[domain]: node.issues.append( - ("error", - f"id domain digit {digit} != domain '{domain}' (expected {digits[domain]})")) + ("error", f"id domain band {band} != domain '{domain}' " + f"(expected {digits[domain]:02d})")) if mode in MODE_LETTER and letter != MODE_LETTER[mode]: node.issues.append( - ("error", - f"id mode letter {letter} != mode '{mode}' (expected {MODE_LETTER[mode]})")) + ("error", f"id pole {letter} != mode '{mode}' " + f"(expected {MODE_LETTER[mode]})")) node.refs += [("related", r) for r in _as_ref_list(fm.get("related"))] node.refs += [("supersedes", r) for r in _as_ref_list(fm.get("supersedes"))] @@ -203,8 +244,7 @@ def build_doc_node(path: Path, digits: dict) -> Node: def build_adr_node(path: Path) -> Node: - """Parse an ADR into a Node (key from filename, refs from frontmatter).""" - rel = str(path.relative_to(DOCS.parent)) + rel = str(path.relative_to(REPO)) m = ADR_FILE_RE.match(path.name) key = f"ADR-{m.group(1)}" if m else path.stem fm = parse_frontmatter(path) @@ -218,11 +258,13 @@ def build_adr_node(path: Path) -> Node: # Graph checks # ============================================================================ -def collect_nav_pages() -> set: - """Return the set of doc paths (relative to docs/) referenced by mkdocs nav.""" +def collect_nav_pages(): + """Doc paths (relative to docs/) referenced by mkdocs nav, or None if no nav.""" + if not MKDOCS_YML.exists(): + return None + class _Loader(yaml.SafeLoader): pass - _Loader.add_multi_constructor( "tag:yaml.org,2002:python/", lambda loader, suffix, node: None) with open(MKDOCS_YML) as f: @@ -246,21 +288,10 @@ def walk(node): def check_references(nodes: list): - """Flag related/supersedes targets that resolve to no known node. - - Decimal-ADR convention (ADR-900): a decision may be split into parts - (ADR-603.1, ADR-603.2). A bare base reference (`ADR-603`) is the family - identifier and is satisfied by any of its parts — references cite the - decision, not a specific part. An exact part reference (`ADR-603.2`) must - match exactly. - """ + """Flag related/supersedes targets that resolve to no known node.""" keys = {n.key for n in nodes} - base_parts = set() - for k in keys: - m = re.match(r"^(ADR-\d+)\.\d+$", k) - if m: - base_parts.add(m.group(1)) - + base_parts = {m.group(1) for k in keys + if (m := re.match(r"^(ADR-\d+)\.\d+$", k))} for node in nodes: for fname, target in node.refs: t = target.strip() @@ -269,54 +300,18 @@ def check_references(nodes: list): if t in keys: continue if "." not in t and t in base_parts: - continue # base reference satisfied by a part (ADR-603 -> ADR-603.1) + continue # base ref satisfied by a part (ADR-603 -> ADR-603.1) node.issues.append( ("error", f"dangling {fname} reference: {t} (no such record)")) -def check_retired_refs(lo: int, hi: int): - """Scan docs + source for references into the retired number range (ADR-900). - - Returns a list of (relpath, lineno, ref). The files that define the range - (this ADR, adr.yaml) are exempt, as is any line carrying the allow-marker. - """ - hits = [] - for d in RETIRED_SCAN_DIRS: - base = REPO / d - if not base.exists(): - continue - for f in base.rglob("*"): - if not f.is_file() or f.suffix not in RETIRED_SCAN_EXTS: - continue - if RETIRED_SKIP_PARTS & set(f.parts): - continue - if f.name in RETIRED_EXEMPT_NAMES or f.name.startswith(RETIRED_EXEMPT_PREFIXES): - continue - try: - text = f.read_text() - except (OSError, UnicodeDecodeError): - continue - if "ADR-" not in text: - continue - rel = str(f.relative_to(REPO)) - for ln, line in enumerate(text.split("\n"), 1): - if RETIRED_ALLOW_MARKER in line: - continue - for m in ADR_ANYREF_RE.finditer(line): - if lo <= int(m.group(1)) <= hi: - hits.append((rel, ln, m.group(0))) - return hits - - def check_supersede_cycles(nodes: list): - """Detect cycles in the supersedes relation.""" edges = {} for n in nodes: edges.setdefault(n.key, []) for fname, target in n.refs: if fname == "supersedes": edges[n.key].append(target.strip()) - WHITE, GRAY, BLACK = 0, 1, 2 color = {k: WHITE for k in edges} by_key = {n.key: n for n in nodes} @@ -327,52 +322,109 @@ def visit(k, stack): if nxt not in color: continue if color[nxt] == GRAY: - cycle = " -> ".join(stack + [nxt]) if k in by_key: - by_key[k].issues.append(("error", f"supersede cycle: {cycle}")) + by_key[k].issues.append( + ("error", f"supersede cycle: {' -> '.join(stack + [nxt])}")) elif color[nxt] == WHITE: visit(nxt, stack + [nxt]) color[k] = BLACK - for k in edges: + for k in list(edges): if color[k] == WHITE: visit(k, [k]) -def check_orphans(doc_nodes: list, nav_pages: set): - """Flag catalog pages on disk that the mkdocs nav never references.""" +def check_orphans(doc_nodes: list, nav_pages): + if nav_pages is None: + return for n in doc_nodes: - rel_to_docs = str(n.path.relative_to(DOCS)) - if rel_to_docs not in nav_pages: + if str(n.path.relative_to(DOCS)) not in nav_pages: n.issues.append(("warning", "orphan: not referenced by mkdocs nav")) +def check_duplicate_ids(doc_nodes: list): + by_id = {} + for n in doc_nodes: + if n.key and not n.key.startswith("?"): + by_id.setdefault(n.key, []).append(n) + for cid, group in by_id.items(): + if len(group) > 1: + for n in group: + mates = ", ".join(sorted(g.rel for g in group if g is not n)) + n.issues.append(("error", f"duplicate catalog id {cid} (also on: {mates})")) + + +def _retired_scan_files(): + """Yield candidate files for the retired-range scan, respecting .gitignore. + + Enumerate via `git ls-files` (tracked + untracked-but-not-ignored) so a + gitignored corpus, build tree, or scratch dir is never walked — without it, + the scan reads every ignored file in the repo (e.g. a 500MB private corpus). + Falls back to rglob for non-git checkouts, preserving portability. + """ + try: + out = subprocess.run( + ["git", "-C", str(REPO), "ls-files", "--cached", "--others", + "--exclude-standard", "-z"], + capture_output=True, timeout=30) + if out.returncode == 0: + for raw in out.stdout.split(b"\x00"): + if raw: + yield REPO / raw.decode("utf-8", "surrogateescape") + return + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + yield from REPO.rglob("*") # non-git fallback + + +def check_retired_refs(lo: int, hi: int): + """Scan repo for references into a vacated pre-domain range (opt-in).""" + hits = [] + for f in _retired_scan_files(): + if not f.is_file() or f.suffix not in RETIRED_SCAN_EXTS: + continue + if RETIRED_SKIP_PARTS & set(f.parts): + continue + if f.name in RETIRED_EXEMPT_NAMES or f.name.startswith(RETIRED_EXEMPT_PREFIXES): + continue + try: + text = f.read_text() + except (OSError, UnicodeDecodeError): + continue + if "ADR-" not in text: + continue + rel = str(f.relative_to(REPO)) + for ln, line in enumerate(text.split("\n"), 1): + if RETIRED_ALLOW_MARKER in line: + continue + for m in ADR_ANYREF_RE.finditer(line): + if lo <= int(m.group(1)) <= hi: + hits.append((rel, ln, m.group(0))) + return hits + + # ============================================================================ # Coverage # ============================================================================ def print_coverage(doc_nodes: list, digits: dict): - """Print a domain x mode matrix of catalog page counts.""" modes = list(MODE_LETTER) grid = {} for n in doc_nodes: if n.domain and n.mode: grid[(n.domain, n.mode)] = grid.get((n.domain, n.mode), 0) + 1 - domains = sorted(digits, key=lambda d: digits[d]) - header = f"{'domain':9} " + " ".join(f"{MODE_LETTER[m]:>3}" for m in modes) + " tot" + header = f"{'domain':12} " + " ".join(f"{MODE_LETTER[m]:>3}" for m in modes) + " tot" print("\nCoverage matrix (catalog pages per domain x mode):") print(header) print("-" * len(header)) for d in domains: cells = [grid.get((d, m), 0) for m in modes] - row = f"{d:9} " + " ".join(f"{c or '.':>3}" for c in cells) - print(f"{row} {sum(cells):>3}") - total = sum(grid.values()) + print(f"{d:12} " + " ".join(f"{c or '.':>3}" for c in cells) + f" {sum(cells):>3}") print("-" * len(header)) - print(f"{'total':9} " + " ".join( - f"{sum(grid.get((d, m), 0) for d in domains):>3}" for m in modes) - + f" {total:>3}") + total = sum(grid.values()) + print(f"{'total':12} " + " ".join( + f"{sum(grid.get((d, m), 0) for d in domains):>3}" for m in modes) + f" {total:>3}") # ============================================================================ @@ -384,53 +436,51 @@ def main(): parser.add_argument("--check", action="store_true", help="exit 1 on errors (CI mode)") parser.add_argument("--enforce-adrs", action="store_true", help="treat ADR issues as errors, not warnings") - parser.add_argument("--quiet", action="store_true", - help="suppress coverage matrix and OK lines") + parser.add_argument("--quiet", action="store_true", help="suppress coverage matrix") args = parser.parse_args() - digits = load_domain_digits() + cfg = load_config() + digits = domain_digits(cfg) nav_pages = collect_nav_pages() - with open(ADR_YAML) as f: - retired_lo, retired_hi = (int(x) for x in - yaml.safe_load(f).get("legacy", {}).get("range", [1, 99])) - doc_nodes = [build_doc_node(p, digits) for p in iter_catalog_pages()] + doc_nodes = [n for p in iter_catalog_pages() + if (n := build_doc_node(p, digits)) is not None] adr_nodes = [build_adr_node(p) for p in iter_adrs()] all_nodes = doc_nodes + adr_nodes check_references(all_nodes) check_supersede_cycles(all_nodes) check_orphans(doc_nodes, nav_pages) + check_duplicate_ids(doc_nodes) - # ADR issues are warnings unless --enforce-adrs; doc issues are always errors. def effective(node, severity): if node.kind == "adr" and not args.enforce_adrs and severity == "error": return "warning" return severity errors = warnings = 0 - flagged = [n for n in all_nodes if n.issues] - for node in sorted(flagged, key=lambda n: n.rel): + for node in sorted((n for n in all_nodes if n.issues), key=lambda n: n.rel): print(f"\n{node.rel} [{node.key}]") for severity, msg in node.issues: sev = effective(node, severity) - icon = "ERROR" if sev == "error" else "warn " - print(f" {icon} {msg}") + print(f" {'ERROR' if sev == 'error' else 'warn '} {msg}") if sev == "error": errors += 1 else: warnings += 1 - # Retired-range guard: references into the vacated pre-domain range (ADR-900). - retired_hits = check_retired_refs(retired_lo, retired_hi) - if retired_hits: - print(f"\nRetired-range references (ADR-{retired_lo}..{retired_hi} are " - f"renumbered; see ADR-900):") - for rel, ln, ref in sorted(retired_hits): - print(f" ERROR {rel}:{ln} {ref}") - errors += len(retired_hits) - - if not args.quiet: + # Retired-range guard (opt-in): legacy: {retired: true} in adr.yaml. + legacy = cfg.get("legacy", {}) or {} + if legacy.get("retired"): + lo, hi = (int(x) for x in legacy.get("range", [1, 99])) + retired_hits = check_retired_refs(lo, hi) + if retired_hits: + print(f"\nRetired-range references (ADR-{lo}..{hi} are vacated):") + for rel, ln, ref in sorted(retired_hits): + print(f" ERROR {rel}:{ln} {ref}") + errors += len(retired_hits) + + if not args.quiet and doc_nodes: print_coverage(doc_nodes, digits) print(f"\n{'='*60}") @@ -438,9 +488,7 @@ def effective(node, severity): print(f"Summary: {errors} errors, {warnings} warnings") print(f"{'='*60}") - if args.check and errors > 0: - return 1 - return 0 + return 1 if (args.check and errors > 0) else 0 if __name__ == "__main__": diff --git a/docs/self-host/appliance-libvirt.md b/docs/self-host/appliance-libvirt.md index 515a7dd0a..fd4daf155 100644 --- a/docs/self-host/appliance-libvirt.md +++ b/docs/self-host/appliance-libvirt.md @@ -1,7 +1,7 @@ --- -id: 1.O.10 +id: 01.003.H domain: infra -mode: operations +mode: how-to --- # Appliance on libvirt/KVM diff --git a/docs/self-host/backup-restore.md b/docs/self-host/backup-restore.md index 5be3f5dc7..022f2e4e1 100644 --- a/docs/self-host/backup-restore.md +++ b/docs/self-host/backup-restore.md @@ -1,7 +1,7 @@ --- -id: 1.O.01 +id: 01.004.H domain: infra -mode: operations +mode: how-to --- # Backup and Restore diff --git a/docs/self-host/configuration.md b/docs/self-host/configuration.md index c0c91bc9c..7063b9ba0 100644 --- a/docs/self-host/configuration.md +++ b/docs/self-host/configuration.md @@ -1,7 +1,7 @@ --- -id: 1.O.02 +id: 01.005.R domain: infra -mode: operations +mode: reference --- # Configuration Reference diff --git a/docs/self-host/macvlan.md b/docs/self-host/macvlan.md index fe5caa28c..eead6b5f7 100644 --- a/docs/self-host/macvlan.md +++ b/docs/self-host/macvlan.md @@ -1,7 +1,7 @@ --- -id: 1.O.03 +id: 01.006.H domain: infra -mode: operations +mode: how-to --- # Dedicated IP with macvlan diff --git a/docs/self-host/production.md b/docs/self-host/production.md index bca08345f..f9d8d6400 100644 --- a/docs/self-host/production.md +++ b/docs/self-host/production.md @@ -1,7 +1,7 @@ --- -id: 1.O.04 +id: 01.007.H domain: infra -mode: operations +mode: how-to --- # Production Deployment diff --git a/docs/self-host/quick-start.md b/docs/self-host/quick-start.md index a268c679a..a6510ea17 100644 --- a/docs/self-host/quick-start.md +++ b/docs/self-host/quick-start.md @@ -1,7 +1,7 @@ --- -id: 1.O.05 +id: 01.008.H domain: infra -mode: operations +mode: how-to --- # Quick Start diff --git a/docs/self-host/scheduled-jobs.md b/docs/self-host/scheduled-jobs.md index 006bb715e..b025a5dd1 100644 --- a/docs/self-host/scheduled-jobs.md +++ b/docs/self-host/scheduled-jobs.md @@ -1,7 +1,7 @@ --- -id: 1.O.06 +id: 01.009.E domain: infra -mode: operations +mode: explanation --- # Scheduled Jobs diff --git a/docs/self-host/security.md b/docs/self-host/security.md index f1ff55ce4..2b4969928 100644 --- a/docs/self-host/security.md +++ b/docs/self-host/security.md @@ -1,7 +1,7 @@ --- -id: 4.O.01 +id: 04.001.H domain: auth -mode: operations +mode: how-to --- # Security and Access diff --git a/docs/self-host/tls.md b/docs/self-host/tls.md index 6b5a39770..689e5270a 100644 --- a/docs/self-host/tls.md +++ b/docs/self-host/tls.md @@ -1,7 +1,7 @@ --- -id: 1.O.07 +id: 01.010.H domain: infra -mode: operations +mode: how-to --- # TLS and Certificates diff --git a/docs/self-host/troubleshooting.md b/docs/self-host/troubleshooting.md index 791515bff..e633fc618 100644 --- a/docs/self-host/troubleshooting.md +++ b/docs/self-host/troubleshooting.md @@ -1,7 +1,7 @@ --- -id: 1.O.08 +id: 01.011.H domain: infra -mode: operations +mode: how-to --- # Troubleshooting diff --git a/docs/self-host/upgrading.md b/docs/self-host/upgrading.md index 679a6222e..6076280be 100644 --- a/docs/self-host/upgrading.md +++ b/docs/self-host/upgrading.md @@ -1,7 +1,7 @@ --- -id: 1.O.09 +id: 01.012.H domain: infra -mode: operations +mode: how-to --- # Upgrading diff --git a/fuse/scripts/generate-fuse-docs.py b/fuse/scripts/generate-fuse-docs.py index 1ffb7d054..e21b5f7ec 100644 --- a/fuse/scripts/generate-fuse-docs.py +++ b/fuse/scripts/generate-fuse-docs.py @@ -107,7 +107,7 @@ def generate_markdown(modules: list[dict]) -> str: # because this page is overwritten on every docs build. domain=ui (FUSE), # mode=reference. Stripped from GitHub Pages (mkdocs ignores unknown keys). lines.append("---") - lines.append("id: 7.R.05") + lines.append("id: 07.005.R") lines.append("domain: ui") lines.append("mode: reference") lines.append("---") diff --git a/schema/scripts/generate-schema-docs.py b/schema/scripts/generate-schema-docs.py index 45ba6d67e..6174e7d89 100755 --- a/schema/scripts/generate-schema-docs.py +++ b/schema/scripts/generate-schema-docs.py @@ -363,7 +363,7 @@ def render(tables, table_comments, column_comments, migrations): # because this page is overwritten on every docs build. domain=db (schema), # mode=reference. Stripped from GitHub Pages (mkdocs ignores unknown keys). out.append("---") - out.append("id: 2.R.01") + out.append("id: 02.002.R") out.append("domain: db") out.append("mode: reference") out.append("---")