diff --git a/.gitbook-branch-readme.md b/.gitbook-branch-readme.md new file mode 100644 index 00000000..ff51b886 --- /dev/null +++ b/.gitbook-branch-readme.md @@ -0,0 +1,14 @@ +# GitBook Documentation Branch + +The `gitbook-docs` branch contains **generated** GitBook-compatible documentation, +automatically updated by GitHub Actions on every push to `main`. + +**Do not edit this branch manually** — all changes will be overwritten. + +## How it works + +1. `scripts/prepare_gitbook_site.py` copies `docs/` into `site/`, maps root + files (`README.md`, `CONTRIBUTING.md`, `DEVELOPMENT.md`) into the site, and + expands any `{{#include ...}}` markers +2. The contents of `site/` are pushed to this branch +3. GitBook syncs from this branch diff --git a/.gitbook.yaml b/.gitbook.yaml new file mode 100644 index 00000000..590cd4da --- /dev/null +++ b/.gitbook.yaml @@ -0,0 +1,5 @@ +root: ./ + +structure: + readme: index.md + summary: SUMMARY.md diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 00000000..b1fa95be --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,83 @@ +name: Documentation + +on: + push: + branches: [main] + paths: + - "docs/**" + - "README.md" + - "CONTRIBUTING.md" + - "DEVELOPMENT.md" + - "LICENSE" + - "SECURITY.md" + - "CONTRIBUTOR_AGREEMENT.md" + - "cli/**" + - "sdk/**" + - "server/backend/README.md" + - "scripts/prepare_gitbook_site.py" + - "scripts/check_docs.py" + - ".gitbook.yaml" + - ".gitbook-branch-readme.md" + - ".github/workflows/docs.yml" + pull_request: + paths: + - "docs/**" + - "README.md" + - "CONTRIBUTING.md" + - "DEVELOPMENT.md" + - "LICENSE" + - "SECURITY.md" + - "CONTRIBUTOR_AGREEMENT.md" + - "cli/**" + - "sdk/**" + - "server/backend/README.md" + - "scripts/prepare_gitbook_site.py" + - "scripts/check_docs.py" + - ".gitbook.yaml" + - ".gitbook-branch-readme.md" + - ".github/workflows/docs.yml" + workflow_dispatch: + +jobs: + docs: + permissions: + contents: write + runs-on: ubuntu-latest + steps: + - name: Check out the repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - name: Check documentation links + run: python scripts/check_docs.py + + - name: Build GitBook site + run: python scripts/prepare_gitbook_site.py + + - name: Deploy to gitbook-docs branch + if: ${{ (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.ref == 'refs/heads/main' }} + run: | + git config user.name 'github-actions[bot]' + git config user.email 'github-actions[bot]@users.noreply.github.com' + + mv site/ /tmp/gitbook-site/ + + git fetch origin gitbook-docs || true + if git rev-parse --verify origin/gitbook-docs >/dev/null 2>&1; then + git checkout gitbook-docs + else + git checkout --orphan gitbook-docs + git rm -rf . + fi + + rsync -a --delete --exclude='.git' /tmp/gitbook-site/ . + git add -A + if ! git diff --cached --quiet; then + git commit -m "docs: update GitBook documentation from ${{ github.sha }}" + git push origin gitbook-docs + fi diff --git a/.gitignore b/.gitignore index 28f22fdf..119d459e 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,4 @@ plugins/cq/bin/ # Generated by `make sync-schema`; canonical sources live in schema/*.json. schema/python/src/cq_schema/_data/ +site/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b25b9c76..a350850f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,6 +23,13 @@ repos: - repo: local hooks: + - id: check-docs + name: check docs links + entry: python scripts/check_docs.py + language: system + pass_filenames: false + files: '^docs/|^README\.md$|^CONTRIBUTING\.md$|^DEVELOPMENT\.md$|^LICENSE$|^SECURITY\.md$|^CONTRIBUTOR_AGREEMENT\.md$|^cli/|^sdk/|^server/backend/README\.md$' + - id: ty-check-install name: ty (scripts/install) entry: bash -c 'cd scripts/install && uvx ty check src/cq_install --python .venv' diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 80a1791a..278888ac 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -218,7 +218,7 @@ Data-plane reads remain open: Exploratory — this is a `0.x.x` project. Expect breaking changes to the database format and SDK interfaces before v1. We'll provide migration scripts where possible so your knowledge units survive upgrades. -See [`docs/`](docs/) for the proposal and PoC design. +See the [proposal](docs/CQ-Proposal.md) and [architecture overview](docs/architecture.md) for the design. ### Migrating from earlier releases diff --git a/cli/README.md b/cli/README.md index 57b0bd75..177a10b9 100644 --- a/cli/README.md +++ b/cli/README.md @@ -8,6 +8,9 @@ frameworks. ## Installation ```bash +# Homebrew. +brew install --cask mozilla-ai/tap/cq + # Go install. go install github.com/mozilla-ai/cq/cli@latest @@ -129,7 +132,7 @@ Knowledge units live in one of three tiers: With `CQ_ADDR` set, `cq propose` sends the unit straight to the remote as `private` (falling back to local if the remote is unreachable). With no remote, everything stays local. `cq status` shows the count in each tier. -See the [top-level README](../README.md#knowledge-tiers) for the full description. +See the [top-level README](../README.md) for the full description. ## Development diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md new file mode 100644 index 00000000..b9320a5a --- /dev/null +++ b/docs/SUMMARY.md @@ -0,0 +1,26 @@ +# Table of Contents + +* [Introduction](index.md) + +## Guides + +* [Architecture](architecture.md) +* [Development](DEVELOPMENT.md) + +## Components + +* [CLI](cli/README.md) + * [CLI Development](cli/DEVELOPMENT.md) +* [Go SDK](sdk/go/README.md) + * [Go SDK Development](sdk/go/DEVELOPMENT.md) +* [Python SDK](sdk/python/README.md) + * [Python SDK Development](sdk/python/DEVELOPMENT.md) +* [Server](server/README.md) + +## Reference + +* [Proposal](CQ-Proposal.md) + +## Community + +* [Contributing](CONTRIBUTING.md) diff --git a/scripts/check_docs.py b/scripts/check_docs.py new file mode 100644 index 00000000..24586a3f --- /dev/null +++ b/scripts/check_docs.py @@ -0,0 +1,204 @@ +"""Validate checked-in docs before publishing. + +Checks all source files that will be published to the GitBook site for broken +internal links. External links are skipped so the checker stays fast and works +offline. + +Usage: + python scripts/check_docs.py +""" + +from __future__ import annotations + +import re +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parent.parent +DOCS_DIR = REPO_ROOT / "docs" + +# Source files outside docs/ that are published to the site. +# Must stay in sync with ROOT_FILES in prepare_gitbook_site.py. +PUBLISHED_ROOT_FILES: tuple[Path, ...] = ( + REPO_ROOT / "README.md", + REPO_ROOT / "CONTRIBUTING.md", + REPO_ROOT / "DEVELOPMENT.md", + REPO_ROOT / "LICENSE", + REPO_ROOT / "SECURITY.md", + REPO_ROOT / "CONTRIBUTOR_AGREEMENT.md", + REPO_ROOT / "cli" / "README.md", + REPO_ROOT / "cli" / "DEVELOPMENT.md", + REPO_ROOT / "sdk" / "go" / "README.md", + REPO_ROOT / "sdk" / "go" / "DEVELOPMENT.md", + REPO_ROOT / "sdk" / "python" / "README.md", + REPO_ROOT / "sdk" / "python" / "DEVELOPMENT.md", + REPO_ROOT / "server" / "backend" / "README.md", +) + +# SUMMARY.md uses site-relative paths by design (GitBook navigation file). +# Source-relative resolution would produce false negatives, so skip it. +SKIP_LINK_CHECK: frozenset[Path] = frozenset({(DOCS_DIR / "SUMMARY.md").resolve()}) + +LINK_RE = re.compile(r"!\[[^\]]*\]\(([^)\n]+)\)|(? set[Path]: + """Return resolved paths of every source file that will appear in the site.""" + sources = {p.resolve() for p in PUBLISHED_ROOT_FILES if p.exists()} + sources.update(p.resolve() for p in DOCS_DIR.rglob("*") if p.is_file()) + return sources + + +def strip_code_blocks(text: str) -> str: + """Remove fenced code blocks so code samples are not linted as page links.""" + output: list[str] = [] + in_fence = False + + for line in text.splitlines(): + if CODE_FENCE_RE.match(line): + in_fence = not in_fence + output.append("") + continue + output.append("" if in_fence else line) + + return "\n".join(output) + + +def slugify_heading(raw_heading: str) -> str: + """Approximate the anchor slugs used by common Markdown site generators.""" + heading = re.sub(r"`([^`]*)`", r"\1", raw_heading.strip().lower()) + heading = re.sub(r"[^\w\s-]", "", heading) + heading = re.sub(r"\s+", "-", heading) + heading = re.sub(r"-{2,}", "-", heading) + return heading.strip("-") + + +def extract_anchors(path: Path) -> set[str]: + """Collect heading anchors from a Markdown document.""" + anchors: set[str] = set() + for line in path.read_text(encoding="utf-8").splitlines(): + match = HEADER_RE.match(line) + if match: + anchors.add(slugify_heading(match.group(2))) + return anchors + + +def split_target(raw_target: str) -> tuple[str, str]: + """Split a Markdown link target into path and optional anchor.""" + target = raw_target.strip() + if target.startswith("<") and target.endswith(">"): + target = target[1:-1] + if " " in target and not target.startswith("#"): + target = target.split(" ", 1)[0] + if "#" in target: + path_part, anchor = target.split("#", 1) + return path_part, anchor + return target, "" + + +def resolve_target(source_path: Path, target_path: str) -> Path | None: + """Resolve a relative link target from a source file. + + Returns None for directory targets with no publishable index (source-code + directory references) rather than raising an error. + """ + base = source_path.parent + resolved = (base / target_path).resolve() + + if resolved.is_dir(): + for name in ("README.md", "index.md"): + candidate = resolved / name + if candidate.exists(): + return candidate + return resolved # Directory with no index; caller will flag as unpublished + + if resolved.exists(): + return resolved + + if resolved.suffix == "": + md = resolved.with_suffix(".md") + if md.exists(): + return md + + return resolved # May not exist; caller checks + + +def validate_summary(errors: list[str]) -> None: + """Ensure docs/SUMMARY.md exists.""" + if not (DOCS_DIR / "SUMMARY.md").exists(): + errors.append("docs/SUMMARY.md is missing") + + +def iter_link_targets(text: str) -> list[str]: + """Extract raw link targets from Markdown text (code blocks already stripped).""" + targets: list[str] = [] + for m in LINK_RE.finditer(text): + raw = m.group(1) or m.group(3) + if raw: + targets.append(raw) + return targets + + +def main() -> int: + """Validate docs links and anchors. Returns a process exit code.""" + errors: list[str] = [] + published = all_published_sources() + + anchors_by_file: dict[Path, set[str]] = {} + for path in published: + if path.suffix == ".md": + anchors_by_file[path] = extract_anchors(path) + + validate_summary(errors) + + sources_to_check = [ + p for p in sorted(published) + if p.suffix == ".md" and p not in SKIP_LINK_CHECK + ] + + for source_path in sources_to_check: + text = strip_code_blocks(source_path.read_text(encoding="utf-8")) + + for raw_target in iter_link_targets(text): + if raw_target.startswith(SKIPPED_PREFIXES): + continue + + target_path, anchor = split_target(raw_target) + + if target_path == "": + target_file = source_path + else: + target_file = resolve_target(source_path, target_path) + if not target_file.exists(): + errors.append( + f"{source_path.relative_to(REPO_ROOT)} -> missing target `{target_path}`" + ) + continue + if target_file.resolve() not in published: + errors.append( + f"{source_path.relative_to(REPO_ROOT)} -> `{target_path}` exists but is not published to the site" + ) + continue + + if anchor and target_file.suffix == ".md": + target_anchors = anchors_by_file.get(target_file.resolve()) + if target_anchors is not None and slugify_heading(anchor) not in target_anchors: + errors.append( + f"{source_path.relative_to(REPO_ROOT)} -> missing anchor `#{anchor}` in " + f"{target_file.relative_to(REPO_ROOT)}" + ) + + if errors: + print("Documentation checks failed:\n") + for error in errors: + print(f"- {error}") + return 1 + + print(f"Documentation checks passed ({len(sources_to_check)} files checked).") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/prepare_gitbook_site.py b/scripts/prepare_gitbook_site.py new file mode 100644 index 00000000..eb86fdbf --- /dev/null +++ b/scripts/prepare_gitbook_site.py @@ -0,0 +1,169 @@ +"""Prepare a deployable GitBook site in site/. + +Copies docs/ into site/ and maps root files (README.md → index.md, +CONTRIBUTING.md, DEVELOPMENT.md, LICENSE, SECURITY.md) and component docs +(cli/, sdk/, server/) into the site. Rewrites relative links in every +published file so they resolve correctly in the new site structure. +Also expands {{#include path/to/file}} markers. + +Usage: + python scripts/prepare_gitbook_site.py +""" + +from __future__ import annotations + +import os +import re +import shutil +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parent.parent +DOCS_DIR = REPO_ROOT / "docs" +SITE_DIR = REPO_ROOT / "site" +INCLUDE_PREFIX = "{{#include " +INCLUDE_SUFFIX = "}}" +ROOT_FILES = { + REPO_ROOT / ".gitbook.yaml": SITE_DIR / ".gitbook.yaml", + REPO_ROOT / ".gitbook-branch-readme.md": SITE_DIR / "README-BRANCH.md", + REPO_ROOT / "README.md": SITE_DIR / "index.md", + REPO_ROOT / "CONTRIBUTING.md": SITE_DIR / "CONTRIBUTING.md", + REPO_ROOT / "DEVELOPMENT.md": SITE_DIR / "DEVELOPMENT.md", + REPO_ROOT / "LICENSE": SITE_DIR / "LICENSE", + REPO_ROOT / "SECURITY.md": SITE_DIR / "SECURITY.md", + REPO_ROOT / "CONTRIBUTOR_AGREEMENT.md": SITE_DIR / "CONTRIBUTOR_AGREEMENT.md", + REPO_ROOT / "cli" / "README.md": SITE_DIR / "cli" / "README.md", + REPO_ROOT / "cli" / "DEVELOPMENT.md": SITE_DIR / "cli" / "DEVELOPMENT.md", + REPO_ROOT / "sdk" / "go" / "README.md": SITE_DIR / "sdk" / "go" / "README.md", + REPO_ROOT / "sdk" / "go" / "DEVELOPMENT.md": SITE_DIR / "sdk" / "go" / "DEVELOPMENT.md", + REPO_ROOT / "sdk" / "python" / "README.md": SITE_DIR / "sdk" / "python" / "README.md", + REPO_ROOT / "sdk" / "python" / "DEVELOPMENT.md": SITE_DIR / "sdk" / "python" / "DEVELOPMENT.md", + REPO_ROOT / "server" / "backend" / "README.md": SITE_DIR / "server" / "README.md", +} +IGNORE_PATTERNS = shutil.ignore_patterns(".DS_Store", "__pycache__") + +LINK_RE = re.compile(r"(!?\[[^\]]*\])\(([^)\n]+)\)") +CODE_FENCE_RE = re.compile(r"^```") +SKIPPED_PREFIXES = ("http://", "https://", "mailto:", "tel:", "data:", "#") + + +def build_path_map() -> dict[Path, Path]: + """Return a mapping of resolved source paths to resolved site paths.""" + path_map: dict[Path, Path] = {} + for src in DOCS_DIR.rglob("*"): + rel = src.relative_to(DOCS_DIR) + path_map[src.resolve()] = (SITE_DIR / rel).resolve() + for src, dest in ROOT_FILES.items(): + if src.exists(): + path_map[src.resolve()] = dest.resolve() + return path_map + + +def rewrite_links(site_file: Path, source_file: Path, path_map: dict[Path, Path]) -> None: + """Rewrite relative links so they resolve correctly within the published site.""" + lines = site_file.read_text(encoding="utf-8").splitlines() + output_lines: list[str] = [] + in_fence = False + + for line in lines: + if CODE_FENCE_RE.match(line.strip()): + in_fence = not in_fence + if in_fence: + output_lines.append(line) + continue + + def replace(m: re.Match, _src: Path = source_file, _site: Path = site_file) -> str: + label = m.group(1) + raw = m.group(2).strip() + + if raw.startswith(SKIPPED_PREFIXES): + return m.group(0) + + anchor = "" + target_str = raw + if not raw.startswith("#") and "#" in raw: + target_str, frag = raw.split("#", 1) + anchor = f"#{frag}" + elif raw.startswith("#"): + return m.group(0) + + if not target_str: + return m.group(0) + + resolved = (_src.parent / target_str).resolve() + if resolved.is_dir(): + # Try common index files for directory references + for name in ("README.md", "index.md"): + candidate = (resolved / name).resolve() + if candidate in path_map: + resolved = candidate + break + else: + return m.group(0) # Source dir without a publishable index; leave as-is + + site_target = path_map.get(resolved) + if site_target is None: + return m.group(0) # Target not in published set; leave as-is + + rel = os.path.relpath(site_target, _site.parent) + return f"{label}({rel}{anchor})" + + output_lines.append(LINK_RE.sub(replace, line)) + + site_file.write_text("\n".join(output_lines) + "\n", encoding="utf-8") + + +def expand_includes(path: Path) -> None: + """Replace explicit include markers with canonical file contents. + + The marker syntax is intentionally narrow and line-based: + + {{#include path/to/file}} + + The included file path is resolved relative to the repository root. + """ + output_lines: list[str] = [] + + for line in path.read_text(encoding="utf-8").splitlines(): + stripped = line.strip() + if stripped.startswith(INCLUDE_PREFIX) and stripped.endswith(INCLUDE_SUFFIX): + include_path = stripped[len(INCLUDE_PREFIX) : -len(INCLUDE_SUFFIX)].strip() + source_path = REPO_ROOT / include_path + if not source_path.exists(): + raise FileNotFoundError( + f"Missing include `{include_path}` referenced from {path.relative_to(REPO_ROOT)}" + ) + + output_lines.extend(source_path.read_text(encoding="utf-8").splitlines()) + continue + + output_lines.append(line) + + path.write_text("\n".join(output_lines) + "\n", encoding="utf-8") + + +def main() -> None: + """Rebuild the GitBook publication directory from checked-in docs.""" + if SITE_DIR.exists(): + shutil.rmtree(SITE_DIR) + + shutil.copytree(DOCS_DIR, SITE_DIR, ignore=IGNORE_PATTERNS) + + for src, dest in ROOT_FILES.items(): + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, dest) + + path_map = build_path_map() + site_to_src = {site: src for src, site in path_map.items()} + + for md_file in sorted(SITE_DIR.rglob("*.md")): + expand_includes(md_file) + source = site_to_src.get(md_file.resolve()) + if source is not None: + rewrite_links(md_file, source, path_map) + + md_files = sorted(SITE_DIR.rglob("*.md")) + print(f"Prepared {len(md_files)} markdown files in {SITE_DIR}/") + + +if __name__ == "__main__": + main() diff --git a/sdk/go/README.md b/sdk/go/README.md index cd294ff2..95877107 100644 --- a/sdk/go/README.md +++ b/sdk/go/README.md @@ -75,13 +75,13 @@ Every knowledge unit has a tier: `cq.Local` (on-disk SQLite, never leaves the ma With a remote configured, `Propose` sends the unit to the remote and returns it tagged `cq.Private`; with no remote, or if the remote is unreachable, it writes the unit locally as `cq.Local`. -See the [top-level README](../../README.md#knowledge-tiers) for the full description. +See the [top-level README](../../README.md) for the full description. ## Storage Format Knowledge units are stored as JSON in SQLite. The database schema is shared with the [cq Python SDK](../../sdk/python/) — both SDKs read and write the -same `local.db` file. The [JSON Schema definitions](../../schema/) are the +same `local.db` file. The [JSON Schema definitions](https://github.com/mozilla-ai/cq/tree/main/schema) are the source of truth. ## Development diff --git a/sdk/python/README.md b/sdk/python/README.md index 372bade6..d8257d19 100644 --- a/sdk/python/README.md +++ b/sdk/python/README.md @@ -70,7 +70,7 @@ Every knowledge unit has a tier: `local` (on-disk SQLite, never leaves the machi With a remote configured, `cq.propose(...)` sends the unit to the remote and returns it tagged `private`; with no remote, or if the remote is unreachable, it writes the unit locally as `local`. -See the [top-level README](../../README.md#knowledge-tiers) for the full description. +See the [top-level README](../../README.md) for the full description. ## Dev Setup