From c18c530f668077cb8c927ecc8091c912adbd6ee9 Mon Sep 17 00:00:00 2001 From: latentloop07 <54039495+latentloop07@users.noreply.github.com> Date: Sun, 24 May 2026 20:55:48 +0530 Subject: [PATCH 1/8] feat(claude-ai): self-hosted claude.ai connector with bidirectional sync End-to-end browser-extension-based integration that syncs SkillNote skills to and from users' claude.ai accounts without routing through any SkillNote-project infrastructure (data isolation preserved). Backend (FastAPI) - 3 alembic migrations: integrations + ops queue + skill links (0019), audit log + pair-attempt rate limit + per-skill toggle (0020), cookie_expired audit event (0021). - Endpoints under /v1/integrations/claude-ai: extension pair/approve/ status/operations/complete/skill-bundle/known-skill-ids/imported-skill/ status/telemetry; UI integrations CRUD; conflicts + preview + resolve; activity feed (paginated, date-windowed) + CSV export; analytics + sparkline; health; queue; diagnostic (8-check audit). - Defenses: token-hash storage, with_for_update redemption lock, YAML safe_dump for SKILL.md frontmatter, pair-endpoint rate limit per source IP, telemetry payload validation, integration-scoped tokens, auth_expired flow flips status + writes cookie_expired audit row. Browser extension (Manifest V3) - Service worker (alarms-based), options + popup UIs, vitest tests. - SkillNote client: timeouts, AbortController, non-JSON guard, SkillNoteAuthError vs SkillNoteNetworkError categorization, write- queue mutex on chrome.storage, releaseInFlightOps on claude.ai outages, fetchSelfStatus for popup counters. - claude.ai client: REST surface (org skills CRUD), session-cookie watcher, endpoint-changed detection. Frontend (Next.js) - Connect page surfaces claude.ai as a first-class agent card. - /settings/integrations/claude-ai dedicated page with: - 4-step interactive setup stepper (browser-aware: Chrome/Edge/ Brave/Arc/Firefox; Safari gets an unsupported panel), progress counter, troubleshoot section, per-backend localStorage ack flag, "Mark step done" + "Reset stepper" affordances. - Connected-browsers list with status pills, conflict-policy switcher, optimistic disconnect dialog, cookie-expired re-sign-in CTA. - Live sync-queue panel (5s poll, stale-queue warning). - 7-day analytics panel (sparkline, top-5 synced, per-browser table). - Recent-activity preview + dedicated activity page with date pickers, skill filter, CSV export, search, pagination. - Conflicts section with bulk Resolve-all menu, per-row diff preview (last-pushed vs current SkillNote version + claude.ai metadata). - One-click diagnostic modal with 8 pass/warn/fail checks. - Health card (failed_ops counter, stale-data indicator). - Per-skill SkillSyncBadge on skill detail pages. - Pair-approval page validates the 6-char code shape client-side, emits a /v1/setup/installs telemetry ping on success. CLI - skillnote connect claude-ai prints unpacked-install instructions (Web Store / AMO listings are pending review). Tests - 223 backend tests (integration + unit) covering every endpoint, contract, rate limit, audit event, conflict flow, security boundary. - 51 extension vitest tests (client, storage, telemetry, resilience). - 100 frontend e2e (Playwright) covering discovery, stepper, pair flow, cookie-expired, conflict policy + diff, queue, analytics, activity export, diagnostic modal, accessibility. Docs - docs/claude-ai-integration.md (architecture + sequence diagrams) - docs/claude-ai-admin-runbook.md (operator playbook) - docs/claude-ai-endpoints.md (REST surface) - docs/claude-ai-user-guide.md (end-user flow) - README section under "Wire up your AI agent" Co-Authored-By: Claude Opus 4.7 --- .gitignore | 5 + README.md | 14 + RELEASE-NOTES-0.5.3.md | 84 + .../versions/0019_claude_ai_integration.py | 305 +++ .../alembic/versions/0020_claude_ai_polish.py | 157 ++ .../versions/0021_audit_cookie_expired.py | 78 + backend/app/api/claude_ai.py | 1973 ++++++++++++++++ backend/app/api/setup.py | 84 +- backend/app/api/skills.py | 51 +- backend/app/db/models/__init__.py | 14 + backend/app/db/models/claude_ai.py | 213 ++ backend/app/db/models/claude_ai_polish.py | 85 + backend/app/db/models/skill.py | 7 + backend/app/main.py | 57 + backend/app/schemas/claude_ai.py | 400 ++++ backend/app/schemas/skill.py | 4 + backend/app/services/claude_ai_sync.py | 617 +++++ backend/pyproject.toml | 3 + backend/tests/conftest.py | 16 +- .../test_claude_ai_activity_export.py | 155 ++ .../test_claude_ai_activity_pagination.py | 104 + .../integration/test_claude_ai_analytics.py | 117 + .../test_claude_ai_bundle_escaping.py | 212 ++ .../test_claude_ai_conflict_preview.py | 89 + .../test_claude_ai_conflicts_flow.py | 288 +++ .../integration/test_claude_ai_constraints.py | 222 ++ .../test_claude_ai_cookie_expired.py | 200 ++ .../integration/test_claude_ai_diagnostic.py | 110 + .../integration/test_claude_ai_e2e_flow.py | 236 ++ .../test_claude_ai_extension_status.py | 151 ++ .../test_claude_ai_inbound_ingestion.py | 479 ++++ .../integration/test_claude_ai_ops_queue.py | 201 ++ .../integration/test_claude_ai_pairing.py | 156 ++ .../integration/test_claude_ai_polish_api.py | 304 +++ .../tests/integration/test_claude_ai_queue.py | 213 ++ .../test_claude_ai_security_hardening.py | 442 ++++ backend/tests/unit/test_claude_ai_perf.py | 116 + backend/tests/unit/test_claude_ai_polish.py | 262 ++ backend/tests/unit/test_claude_ai_schemas.py | 125 + backend/tests/unit/test_claude_ai_service.py | 506 ++++ cli/src/__tests__/connect-claude-ai.test.ts | 29 + cli/src/commands/connect.ts | 14 +- docs/claude-ai-admin-runbook.md | 217 ++ docs/claude-ai-endpoints.md | 198 ++ docs/claude-ai-integration.md | 551 +++++ docs/claude-ai-user-guide.md | 139 ++ e2e/claude-ai-activity-pagination.spec.ts | 158 ++ e2e/claude-ai-analytics.spec.ts | 211 ++ e2e/claude-ai-conflict-policy.spec.ts | 318 +++ e2e/claude-ai-cookie-expired.spec.ts | 114 + e2e/claude-ai-diagnostic.spec.ts | 137 ++ e2e/claude-ai-discovery.spec.ts | 235 ++ e2e/claude-ai-health-card.spec.ts | 156 ++ e2e/claude-ai-integration.spec.ts | 290 +++ e2e/claude-ai-journey.spec.ts | 317 +++ e2e/claude-ai-pair-validation.spec.ts | 111 + e2e/claude-ai-polish.spec.ts | 467 ++++ e2e/claude-ai-stepper-a11y.spec.ts | 148 ++ e2e/claude-ai-stepper-bugs.spec.ts | 211 ++ e2e/claude-ai-stepper-edge.spec.ts | 95 + e2e/claude-ai-stepper-polish.spec.ts | 195 ++ e2e/claude-ai-sync-queue.spec.ts | 211 ++ extensions/claude-ai/.gitignore | 16 + extensions/claude-ai/LICENSE | 21 + extensions/claude-ai/PRIVACY.md | 69 + extensions/claude-ai/README.md | 64 + extensions/claude-ai/STORE_LISTING.md | 161 ++ extensions/claude-ai/firefox-manifest.json | 48 + extensions/claude-ai/manifest.json | 42 + extensions/claude-ai/package-lock.json | 2098 +++++++++++++++++ extensions/claude-ai/package.json | 25 + extensions/claude-ai/public/icons/128.png | Bin 0 -> 2099 bytes extensions/claude-ai/public/icons/16.png | Bin 0 -> 227 bytes extensions/claude-ai/public/icons/48.png | Bin 0 -> 862 bytes .../claude-ai/public/icons/promo-440x280.png | Bin 0 -> 4007 bytes extensions/claude-ai/scripts/README.md | 49 + .../claude-ai/scripts/capture-endpoints.mjs | 127 + .../src/__tests__/claude-ai-client.test.ts | 168 ++ extensions/claude-ai/src/__tests__/setup.ts | 162 ++ .../skillnote-client-resilience.test.ts | 86 + .../skillnote-client-self-status.test.ts | 138 ++ .../src/__tests__/skillnote-client.test.ts | 172 ++ .../src/__tests__/storage-concurrency.test.ts | 74 + .../claude-ai/src/__tests__/storage.test.ts | 52 + .../claude-ai/src/__tests__/telemetry.test.ts | 67 + extensions/claude-ai/src/background.ts | 400 ++++ .../claude-ai/src/lib/claude-ai-client.ts | 163 ++ .../claude-ai/src/lib/skillnote-client.ts | 247 ++ extensions/claude-ai/src/lib/storage.ts | 56 + extensions/claude-ai/src/lib/telemetry.ts | 40 + extensions/claude-ai/src/lib/types.ts | 72 + extensions/claude-ai/src/options.html | 90 + extensions/claude-ai/src/options.ts | 162 ++ extensions/claude-ai/src/popup.html | 86 + extensions/claude-ai/src/popup.ts | 114 + extensions/claude-ai/tsconfig.json | 18 + extensions/claude-ai/vite.config.ts | 80 + extensions/claude-ai/vitest.config.ts | 19 + package-lock.json | 27 +- package.json | 2 +- src/app/(app)/integrations/page.tsx | 50 +- .../integrations/claude-ai/activity/page.tsx | 74 + .../settings/integrations/claude-ai/page.tsx | 806 +++++++ .../integrations/claude-ai/pair/page.tsx | 207 ++ src/app/(app)/settings/page.tsx | 22 +- src/components/integrations/agent-marks.tsx | 18 + .../integrations/claude-ai-card.tsx | 143 ++ .../integrations/claude-ai/activity-feed.tsx | 370 +++ .../claude-ai/analytics-panel.tsx | 329 +++ .../claude-ai/confirm-disconnect-dialog.tsx | 95 + .../claude-ai/diagnostic-button.tsx | 256 ++ .../integrations/claude-ai/health-card.tsx | 134 ++ .../integrations/claude-ai/setup-stepper.tsx | 789 +++++++ .../integrations/claude-ai/skeleton.tsx | 56 + .../claude-ai/skill-sync-badge.tsx | 90 + .../integrations/claude-ai/sync-queue.tsx | 269 +++ src/components/skills/skill-detail.tsx | 7 + src/lib/api/claude-ai.ts | 290 +++ src/lib/browser-detect.ts | 82 + src/lib/mock-data.ts | 10 + 120 files changed, 22662 insertions(+), 27 deletions(-) create mode 100644 RELEASE-NOTES-0.5.3.md create mode 100644 backend/alembic/versions/0019_claude_ai_integration.py create mode 100644 backend/alembic/versions/0020_claude_ai_polish.py create mode 100644 backend/alembic/versions/0021_audit_cookie_expired.py create mode 100644 backend/app/api/claude_ai.py create mode 100644 backend/app/db/models/claude_ai.py create mode 100644 backend/app/db/models/claude_ai_polish.py create mode 100644 backend/app/schemas/claude_ai.py create mode 100644 backend/app/services/claude_ai_sync.py create mode 100644 backend/tests/integration/test_claude_ai_activity_export.py create mode 100644 backend/tests/integration/test_claude_ai_activity_pagination.py create mode 100644 backend/tests/integration/test_claude_ai_analytics.py create mode 100644 backend/tests/integration/test_claude_ai_bundle_escaping.py create mode 100644 backend/tests/integration/test_claude_ai_conflict_preview.py create mode 100644 backend/tests/integration/test_claude_ai_conflicts_flow.py create mode 100644 backend/tests/integration/test_claude_ai_constraints.py create mode 100644 backend/tests/integration/test_claude_ai_cookie_expired.py create mode 100644 backend/tests/integration/test_claude_ai_diagnostic.py create mode 100644 backend/tests/integration/test_claude_ai_e2e_flow.py create mode 100644 backend/tests/integration/test_claude_ai_extension_status.py create mode 100644 backend/tests/integration/test_claude_ai_inbound_ingestion.py create mode 100644 backend/tests/integration/test_claude_ai_ops_queue.py create mode 100644 backend/tests/integration/test_claude_ai_pairing.py create mode 100644 backend/tests/integration/test_claude_ai_polish_api.py create mode 100644 backend/tests/integration/test_claude_ai_queue.py create mode 100644 backend/tests/integration/test_claude_ai_security_hardening.py create mode 100644 backend/tests/unit/test_claude_ai_perf.py create mode 100644 backend/tests/unit/test_claude_ai_polish.py create mode 100644 backend/tests/unit/test_claude_ai_schemas.py create mode 100644 backend/tests/unit/test_claude_ai_service.py create mode 100644 cli/src/__tests__/connect-claude-ai.test.ts create mode 100644 docs/claude-ai-admin-runbook.md create mode 100644 docs/claude-ai-endpoints.md create mode 100644 docs/claude-ai-integration.md create mode 100644 docs/claude-ai-user-guide.md create mode 100644 e2e/claude-ai-activity-pagination.spec.ts create mode 100644 e2e/claude-ai-analytics.spec.ts create mode 100644 e2e/claude-ai-conflict-policy.spec.ts create mode 100644 e2e/claude-ai-cookie-expired.spec.ts create mode 100644 e2e/claude-ai-diagnostic.spec.ts create mode 100644 e2e/claude-ai-discovery.spec.ts create mode 100644 e2e/claude-ai-health-card.spec.ts create mode 100644 e2e/claude-ai-integration.spec.ts create mode 100644 e2e/claude-ai-journey.spec.ts create mode 100644 e2e/claude-ai-pair-validation.spec.ts create mode 100644 e2e/claude-ai-polish.spec.ts create mode 100644 e2e/claude-ai-stepper-a11y.spec.ts create mode 100644 e2e/claude-ai-stepper-bugs.spec.ts create mode 100644 e2e/claude-ai-stepper-edge.spec.ts create mode 100644 e2e/claude-ai-stepper-polish.spec.ts create mode 100644 e2e/claude-ai-sync-queue.spec.ts create mode 100644 extensions/claude-ai/.gitignore create mode 100644 extensions/claude-ai/LICENSE create mode 100644 extensions/claude-ai/PRIVACY.md create mode 100644 extensions/claude-ai/README.md create mode 100644 extensions/claude-ai/STORE_LISTING.md create mode 100644 extensions/claude-ai/firefox-manifest.json create mode 100644 extensions/claude-ai/manifest.json create mode 100644 extensions/claude-ai/package-lock.json create mode 100644 extensions/claude-ai/package.json create mode 100644 extensions/claude-ai/public/icons/128.png create mode 100644 extensions/claude-ai/public/icons/16.png create mode 100644 extensions/claude-ai/public/icons/48.png create mode 100644 extensions/claude-ai/public/icons/promo-440x280.png create mode 100644 extensions/claude-ai/scripts/README.md create mode 100644 extensions/claude-ai/scripts/capture-endpoints.mjs create mode 100644 extensions/claude-ai/src/__tests__/claude-ai-client.test.ts create mode 100644 extensions/claude-ai/src/__tests__/setup.ts create mode 100644 extensions/claude-ai/src/__tests__/skillnote-client-resilience.test.ts create mode 100644 extensions/claude-ai/src/__tests__/skillnote-client-self-status.test.ts create mode 100644 extensions/claude-ai/src/__tests__/skillnote-client.test.ts create mode 100644 extensions/claude-ai/src/__tests__/storage-concurrency.test.ts create mode 100644 extensions/claude-ai/src/__tests__/storage.test.ts create mode 100644 extensions/claude-ai/src/__tests__/telemetry.test.ts create mode 100644 extensions/claude-ai/src/background.ts create mode 100644 extensions/claude-ai/src/lib/claude-ai-client.ts create mode 100644 extensions/claude-ai/src/lib/skillnote-client.ts create mode 100644 extensions/claude-ai/src/lib/storage.ts create mode 100644 extensions/claude-ai/src/lib/telemetry.ts create mode 100644 extensions/claude-ai/src/lib/types.ts create mode 100644 extensions/claude-ai/src/options.html create mode 100644 extensions/claude-ai/src/options.ts create mode 100644 extensions/claude-ai/src/popup.html create mode 100644 extensions/claude-ai/src/popup.ts create mode 100644 extensions/claude-ai/tsconfig.json create mode 100644 extensions/claude-ai/vite.config.ts create mode 100644 extensions/claude-ai/vitest.config.ts create mode 100644 src/app/(app)/settings/integrations/claude-ai/activity/page.tsx create mode 100644 src/app/(app)/settings/integrations/claude-ai/page.tsx create mode 100644 src/app/(app)/settings/integrations/claude-ai/pair/page.tsx create mode 100644 src/components/integrations/claude-ai-card.tsx create mode 100644 src/components/integrations/claude-ai/activity-feed.tsx create mode 100644 src/components/integrations/claude-ai/analytics-panel.tsx create mode 100644 src/components/integrations/claude-ai/confirm-disconnect-dialog.tsx create mode 100644 src/components/integrations/claude-ai/diagnostic-button.tsx create mode 100644 src/components/integrations/claude-ai/health-card.tsx create mode 100644 src/components/integrations/claude-ai/setup-stepper.tsx create mode 100644 src/components/integrations/claude-ai/skeleton.tsx create mode 100644 src/components/integrations/claude-ai/skill-sync-badge.tsx create mode 100644 src/components/integrations/claude-ai/sync-queue.tsx create mode 100644 src/lib/api/claude-ai.ts create mode 100644 src/lib/browser-detect.ts diff --git a/.gitignore b/.gitignore index 578251bb..656682a2 100644 --- a/.gitignore +++ b/.gitignore @@ -75,3 +75,8 @@ test-results/ # SkillNote per-project state (per-machine; pins active collection) .skillnote.json .skillnote/ + +# Personal scratch (marketing posts, draft tweets, release-notes outlines) +skillnote-release-posts.md +x-replies-paste.md +extensions/claude-ai/scripts/captured-endpoints.md diff --git a/README.md b/README.md index 7dda5d7d..1a2f60e7 100644 --- a/README.md +++ b/README.md @@ -196,6 +196,20 @@ npx skillnote connect openclaw +### claude.ai (web app) + +Two-way sync between SkillNote and your claude.ai account through a small browser extension. Skills pushed in SkillNote appear in claude.ai; skills authored in claude.ai are imported back into SkillNote — no copy-paste either way. + +```bash +# Install the extension unpacked (Web Store / AMO listings pending review) +cd extensions/claude-ai && npm install && npm run build +# Then in chrome://extensions, enable Developer mode → Load unpacked → dist/ +``` + +Open the extension, paste your SkillNote URL, and approve the pairing code at `/settings/integrations/claude-ai`. Sync runs every minute while you're signed in to claude.ai. The extension reads your claude.ai session cookies locally — they never leave your machine. + +Connector docs: [`docs/claude-ai-integration.md`](docs/claude-ai-integration.md) · admin runbook: [`docs/claude-ai-admin-runbook.md`](docs/claude-ai-admin-runbook.md). + > Cursor, Codex, Antigravity, and OpenHands are on the roadmap. [Open an issue](https://github.com/luna-prompts/skillnote/issues) if you want to help build an adapter. --- diff --git a/RELEASE-NOTES-0.5.3.md b/RELEASE-NOTES-0.5.3.md new file mode 100644 index 00000000..757721f5 --- /dev/null +++ b/RELEASE-NOTES-0.5.3.md @@ -0,0 +1,84 @@ +# SkillNote 0.5.3 + +A polish and positioning release. Nothing dramatic on the API side, no new commands, no breaking changes, but every part of the front door got a careful pass. The sidebar information architecture is cleaner, the README leads with the problem Claude Code users actually feel, and the install paths now include Homebrew alongside npm. + +## Homebrew is in + +You can now install SkillNote with Homebrew on macOS or Linux: + +```bash +brew install luna-prompts/tap/skillnote +skillnote start +``` + +The formula pulls the same `skillnote` package from npm but lets `brew` manage the binary. Node 20+ comes in as a Homebrew dependency, so you don't need a pre-existing Node install. + +Other install paths are unchanged: `npx skillnote start`, raw Docker Compose, or `clawhub install skillnote` for the OpenClaw side. + +## The sidebar got fixed + +Two complaints we'd been hearing: + +- Analytics felt buried under Connect, when it's really a view of your skill data, not part of the agent setup flow. +- The Connect group label was repeating its only item ("CONNECT > Connect"). + +Both are fixed. The sidebar is now: + +``` +WORKSPACE INTEGRATIONS + Skills Connect + Collections + Analytics + Marketplace +``` + +Analytics and Marketplace live with the rest of your skill-management surface. The agent wire-up page sits in its own clearly-named INTEGRATIONS group. No more orphan items between the two sections. + +## The README leads with the problem + +The README has been fully rewritten. It now opens with the **8,000-character Claude Code skill truncation** issue (the pain new SkillNote users actually feel) instead of a feature tour. Down from 659 to ~495 lines. + +Two new pieces worth pointing out: + +1. **Five community skill registries are linked one click away.** `anthropics/skills`, `ComposioHQ/awesome-claude-skills` (800+ skills), `alirezarezvani/claude-skills` (600+), `garrytan/gstack` (50+), `obra/superpowers`. New installs aren't staring at an empty Skills page anymore. They have a clear next step. + +2. **Four LLM-search-friendly FAQ entries** sit at the top of the FAQ: *"What is SkillNote?"*, *"How is SkillNote different from MCP?"*, *"How do I share Claude Code skills across my team?"*, *"Is SkillNote free?"*. Phrased the way people actually ask ChatGPT or Claude about a project, so SkillNote is more likely to surface when someone asks an AI assistant for help. + +## PWA dock icon is finally black + +If you'd installed SkillNote as a PWA on macOS or Android, you were seeing a teal frame around the black LP logo. That was a bug in the maskable icon PNG, not the manifest theme color (the manifest was already correct after 0.5.2). Fixed in 0.5.3. + +**Existing PWA users:** browsers cache the dock icon. To pick up the new all-black icon, uninstall the SkillNote PWA from your dock or home screen, then reinstall it via Chrome's address bar ("Install SkillNote") or `⋮ → Cast/Save/Share → Install SkillNote`. + +## Upgrading + +If you're on the npm path: + +```bash +npx skillnote restart +``` + +That pulls the new images. No data migration. No config changes. The Postgres volume is preserved across the restart. + +If you're on the raw Docker Compose path: + +```bash +curl -fsSL https://raw.githubusercontent.com/luna-prompts/skillnote/cli-v0.5.3/deploy/docker-compose.yml -o docker-compose.yml +docker compose up -d +``` + +The OpenClaw skill bundle gets updated automatically on the next `sync.sh` run, which happens every 60 seconds and on each Claude session start. + +## What's next + +A few items already in motion for the next minor release: + +- **Phase 2C deprecation** of the legacy v0.4 file-push commands (`login`, `add`, `update`, `remove`, `check`, `doctor`) in favor of the lifecycle CLI. Tracked in issue [#40](https://github.com/luna-prompts/skillnote/issues/40). +- **API authentication** for non-localhost deployments. Currently the API is open to anything that can reach `:8082`. The roadmap is a pluggable auth layer so SkillNote is safe behind a reverse proxy without bolt-on hacks. +- **Cursor and Codex CLI native plugins.** OpenHands and Antigravity are further out. Open an issue if you'd like to help with any of them. + +--- + +**Links:** Full changelog in [`CHANGELOG.md`](CHANGELOG.md) · GitHub Release [`cli-v0.5.3`](https://github.com/luna-prompts/skillnote/releases/tag/cli-v0.5.3) · npm: `skillnote@0.5.3` · Docker: `ghcr.io/luna-prompts/skillnote-{api,web}:0.5.3` · clawhub: `skillnote@0.5.3` + +**Help wanted:** join us on [Discord](https://discord.gg/GazU4amU6H) or [open an issue](https://github.com/luna-prompts/skillnote/issues). diff --git a/backend/alembic/versions/0019_claude_ai_integration.py b/backend/alembic/versions/0019_claude_ai_integration.py new file mode 100644 index 00000000..90c23d0b --- /dev/null +++ b/backend/alembic/versions/0019_claude_ai_integration.py @@ -0,0 +1,305 @@ +"""0019 claude_ai_integration — tables for the claude.ai connector + +Adds three tables that power the browser-extension-driven sync between +self-hosted SkillNote and a user's claude.ai account: + + * claude_ai_integrations — one row per paired browser/extension + * claude_ai_skill_links — mapping SkillNote skill <-> claude.ai skill + * claude_ai_sync_operations — work queue the extension drains + +Enum-like columns use Text + CHECK constraints (matching the project's +convention seen in agent_install, skill_usage_events, etc.) rather than +PostgreSQL ENUM types. This keeps schema migrations cheap when we add a +new state — no ALTER TYPE dance, just update the CHECK constraint. + +See docs/claude-ai-integration.md for the full design. + +Revision ID: 0019_claude_ai_integration +Revises: 0018_agent_disconnects +Create Date: 2026-05-24 +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import JSONB, UUID + + +revision = "0019_claude_ai_integration" +down_revision = "0018_agent_disconnects" +branch_labels = None +depends_on = None + + +# Valid values for each enum-like column. Mirrors the Pydantic Literal[] +# unions in app/schemas/claude_ai.py — keep them in sync. +INTEGRATION_STATUS_VALUES = ( + "pending_approval", "active", "cookie_expired", "disconnected", "error", +) +INTEGRATION_SCOPE_VALUES = ("personal", "organization", "both") +INTEGRATION_CONFLICT_POLICY_VALUES = ("ask", "skillnote_wins", "claude_ai_wins") +LINK_DIRECTION_VALUES = ("outbound", "inbound", "both") +LINK_CONFLICT_VALUES = ("none", "diverged", "resolved") +OP_KIND_VALUES = ("upload", "update", "delete", "list", "fetch_one") +OP_STATUS_VALUES = ("pending", "in_progress", "completed", "failed") + + +def _check_in(column: str, values: tuple[str, ...]) -> str: + """Build a SQL CHECK clause for `column IN (...)`.""" + quoted = ", ".join(f"'{v}'" for v in values) + return f"{column} IN ({quoted})" + + +def upgrade() -> None: + # ── claude_ai_integrations ──────────────────────────────────────────────── + # One row per paired browser. Tokens are stored hashed (sha256 hex digest); + # the raw values only ever live on the extension side. + op.create_table( + "claude_ai_integrations", + sa.Column( + "id", + UUID(as_uuid=True), + primary_key=True, + server_default=sa.text("gen_random_uuid()"), + ), + # FK reserved for when ACL ships; nullable today because skillnote + # currently has no auth (see CLAUDE.md). Indexed so per-user lookups + # remain cheap when populated. + sa.Column("user_id", UUID(as_uuid=True), nullable=True), + sa.Column("status", sa.Text(), nullable=False), + sa.Column("scope", sa.Text(), nullable=False, server_default="both"), + # Discovered from claude.ai on the first successful sync via the + # extension. Nullable until that first round-trip completes. + sa.Column("claude_ai_org_id", sa.Text(), nullable=True), + # Human-readable label the extension supplies at pair time + # (e.g. "Chrome on MacBook Pro"). Used in the connected-browsers list. + sa.Column("browser_label", sa.Text(), nullable=True), + # Pairing handshake — short human code shown in extension, opaque + # polling token held by the extension. Both nulled out once redeemed. + sa.Column("pairing_code", sa.Text(), nullable=True), + sa.Column("pairing_token_hash", sa.Text(), nullable=True), + sa.Column("pairing_expires_at", sa.DateTime(timezone=True), nullable=True), + # Set by the user-approval call; consumed by the extension's next + # /pair/status poll. The presence of this timestamp (with status + # still `pending_approval`) means "approved but the extension hasn't + # picked up its token yet." + sa.Column("pairing_approved_at", sa.DateTime(timezone=True), nullable=True), + # Long-lived bearer the extension sends on every request after pairing. + # Stored hashed; raw value never persisted server-side after issuance. + sa.Column("extension_token_hash", sa.Text(), nullable=True), + sa.Column("last_sync_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("last_error", sa.Text(), nullable=True), + sa.Column("conflict_policy", sa.Text(), nullable=False, server_default="ask"), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.CheckConstraint( + _check_in("status", INTEGRATION_STATUS_VALUES), + name="ck_claude_ai_integrations_status", + ), + sa.CheckConstraint( + _check_in("scope", INTEGRATION_SCOPE_VALUES), + name="ck_claude_ai_integrations_scope", + ), + sa.CheckConstraint( + _check_in("conflict_policy", INTEGRATION_CONFLICT_POLICY_VALUES), + name="ck_claude_ai_integrations_conflict_policy", + ), + ) + # Token lookups happen on every extension request — hot path. + op.create_index( + "ix_claude_ai_integrations_extension_token_hash", + "claude_ai_integrations", + ["extension_token_hash"], + unique=True, + postgresql_where=sa.text("extension_token_hash IS NOT NULL"), + ) + op.create_index( + "ix_claude_ai_integrations_pairing_token_hash", + "claude_ai_integrations", + ["pairing_token_hash"], + unique=True, + postgresql_where=sa.text("pairing_token_hash IS NOT NULL"), + ) + # SkillNote UI lists integrations by status + last_sync_at; index supports + # the per-user filtered list view efficiently. + op.create_index( + "ix_claude_ai_integrations_user_id_status", + "claude_ai_integrations", + ["user_id", "status"], + ) + + # ── claude_ai_skill_links ───────────────────────────────────────────────── + # The mapping table. One row per (integration, skill) pair that's been + # observed on either side. Skill_id is nullable because a claude.ai-authored + # skill may exist as a link before the import op creates the SkillNote row. + op.create_table( + "claude_ai_skill_links", + sa.Column( + "id", + UUID(as_uuid=True), + primary_key=True, + server_default=sa.text("gen_random_uuid()"), + ), + sa.Column( + "integration_id", + UUID(as_uuid=True), + sa.ForeignKey("claude_ai_integrations.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column( + "skillnote_skill_id", + UUID(as_uuid=True), + sa.ForeignKey("skills.id", ondelete="CASCADE"), + nullable=True, + ), + # Last version we successfully pushed. SET NULL on version delete + # because version pruning shouldn't break the link (the latest version + # will repopulate on the next sync tick). + sa.Column( + "skillnote_version_id", + UUID(as_uuid=True), + sa.ForeignKey("skill_content_versions.id", ondelete="SET NULL"), + nullable=True, + ), + sa.Column("claude_ai_skill_id", sa.Text(), nullable=False), + sa.Column("claude_ai_version", sa.Text(), nullable=True), + sa.Column("last_seen_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("direction", sa.Text(), nullable=False, server_default="both"), + sa.Column( + "conflict_state", sa.Text(), nullable=False, server_default="none" + ), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + # A given claude.ai skill ID can only be linked once per integration. + # Without this, retries or list-then-upload races could double-insert. + sa.UniqueConstraint( + "integration_id", + "claude_ai_skill_id", + name="uq_claude_ai_skill_links_integration_claude_skill", + ), + sa.CheckConstraint( + _check_in("direction", LINK_DIRECTION_VALUES), + name="ck_claude_ai_skill_links_direction", + ), + sa.CheckConstraint( + _check_in("conflict_state", LINK_CONFLICT_VALUES), + name="ck_claude_ai_skill_links_conflict_state", + ), + ) + # Lookup by skillnote_skill_id is used when enqueueing sync ops on + # skill publish — needs to fan out to every linked integration. + op.create_index( + "ix_claude_ai_skill_links_skillnote_skill_id", + "claude_ai_skill_links", + ["skillnote_skill_id"], + ) + op.create_index( + "ix_claude_ai_skill_links_integration_id_conflict", + "claude_ai_skill_links", + ["integration_id", "conflict_state"], + ) + + # ── claude_ai_sync_operations ───────────────────────────────────────────── + # Append-only queue. Extension polls /extension/operations for pending ops, + # executes them, then calls /complete to set status. Failed ops can be + # retried via attempts counter; >N attempts means surface to the user. + op.create_table( + "claude_ai_sync_operations", + sa.Column( + "id", + UUID(as_uuid=True), + primary_key=True, + server_default=sa.text("gen_random_uuid()"), + ), + sa.Column( + "integration_id", + UUID(as_uuid=True), + sa.ForeignKey("claude_ai_integrations.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column("kind", sa.Text(), nullable=False), + sa.Column( + "skill_id", + UUID(as_uuid=True), + sa.ForeignKey("skills.id", ondelete="CASCADE"), + nullable=True, + ), + sa.Column("payload", JSONB, nullable=False, server_default=sa.text("'{}'::jsonb")), + sa.Column("status", sa.Text(), nullable=False, server_default="pending"), + sa.Column("attempts", sa.Integer(), nullable=False, server_default="0"), + sa.Column("last_error", sa.Text(), nullable=True), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True), + sa.CheckConstraint( + _check_in("kind", OP_KIND_VALUES), + name="ck_claude_ai_sync_operations_kind", + ), + sa.CheckConstraint( + _check_in("status", OP_STATUS_VALUES), + name="ck_claude_ai_sync_operations_status", + ), + ) + # The extension's poll query is `WHERE integration_id = ? AND status = + # 'pending' ORDER BY created_at LIMIT n` — this composite covers it. + op.create_index( + "ix_claude_ai_sync_operations_integration_status_created", + "claude_ai_sync_operations", + ["integration_id", "status", "created_at"], + ) + + +def downgrade() -> None: + op.drop_index( + "ix_claude_ai_sync_operations_integration_status_created", + table_name="claude_ai_sync_operations", + ) + op.drop_table("claude_ai_sync_operations") + + op.drop_index( + "ix_claude_ai_skill_links_integration_id_conflict", + table_name="claude_ai_skill_links", + ) + op.drop_index( + "ix_claude_ai_skill_links_skillnote_skill_id", + table_name="claude_ai_skill_links", + ) + op.drop_table("claude_ai_skill_links") + + op.drop_index( + "ix_claude_ai_integrations_user_id_status", + table_name="claude_ai_integrations", + ) + op.drop_index( + "ix_claude_ai_integrations_pairing_token_hash", + table_name="claude_ai_integrations", + ) + op.drop_index( + "ix_claude_ai_integrations_extension_token_hash", + table_name="claude_ai_integrations", + ) + op.drop_table("claude_ai_integrations") diff --git a/backend/alembic/versions/0020_claude_ai_polish.py b/backend/alembic/versions/0020_claude_ai_polish.py new file mode 100644 index 00000000..014da241 --- /dev/null +++ b/backend/alembic/versions/0020_claude_ai_polish.py @@ -0,0 +1,157 @@ +"""0020 claude_ai_polish — audit log + per-skill sync toggle + rate-limit table + +Adds the polish layer on top of 0019: + + * claude_ai_audit_log — append-only event feed (who did what when) + * claude_ai_pair_attempts — rate-limit tracking for pair endpoint + * skills.claude_ai_sync_enabled — per-skill opt-in toggle (default TRUE + so existing skills sync, but UI surfaces it for granular control) + +Revision ID: 0020_claude_ai_polish +Revises: 0019_claude_ai_integration +Create Date: 2026-05-24 +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import JSONB, UUID, INET + + +revision = "0020_claude_ai_polish" +down_revision = "0019_claude_ai_integration" +branch_labels = None +depends_on = None + + +AUDIT_EVENT_VALUES = ( + "pair_started", + "pair_approved", + "pair_redeemed", + "pair_expired", + "integration_disconnected", + "integration_updated", + "skill_pushed", + "skill_imported", + "skill_delete_pushed", + "op_failed", + "conflict_detected", + "conflict_resolved", + "endpoint_changed", + "token_revoked", +) + + +def upgrade() -> None: + # ── claude_ai_audit_log ─────────────────────────────────────────────────── + # Append-only. Drives the in-product activity feed and gives admins a + # forensic trail for "who synced what when" questions. Indexed for + # the common case: "show me the last N events for this integration." + op.create_table( + "claude_ai_audit_log", + sa.Column( + "id", + UUID(as_uuid=True), + primary_key=True, + server_default=sa.text("gen_random_uuid()"), + ), + sa.Column( + "integration_id", + UUID(as_uuid=True), + sa.ForeignKey("claude_ai_integrations.id", ondelete="CASCADE"), + nullable=True, + ), + sa.Column("event", sa.Text(), nullable=False), + sa.Column( + "skill_id", + UUID(as_uuid=True), + sa.ForeignKey("skills.id", ondelete="SET NULL"), + nullable=True, + ), + # Free-form details — exact shape depends on event type. The + # activity feed renders these via a small switch in the UI. + sa.Column("detail", JSONB, nullable=False, server_default=sa.text("'{}'::jsonb")), + # IPs help admins distinguish "expected pairing from office network" + # vs "someone tried to pair from a coffee shop IP." Captured at the + # boundary; nullable for events that don't have an originating IP. + sa.Column("source_ip", INET, nullable=True), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.CheckConstraint( + "event IN (" + ", ".join(f"'{v}'" for v in AUDIT_EVENT_VALUES) + ")", + name="ck_claude_ai_audit_log_event", + ), + ) + # Per-integration feed (the hot query for the activity page). + op.create_index( + "ix_claude_ai_audit_log_integration_created", + "claude_ai_audit_log", + ["integration_id", "created_at"], + ) + # Global feed sort. + op.create_index( + "ix_claude_ai_audit_log_created_at", + "claude_ai_audit_log", + ["created_at"], + ) + + # ── claude_ai_pair_attempts ─────────────────────────────────────────────── + # Records every POST /pair to enforce rate limits. A simple sliding window + # over the most recent row count per IP is enough for the threat we're + # defending against: brute-force enumeration of pairing codes. + op.create_table( + "claude_ai_pair_attempts", + sa.Column( + "id", + UUID(as_uuid=True), + primary_key=True, + server_default=sa.text("gen_random_uuid()"), + ), + sa.Column("source_ip", INET, nullable=True), + sa.Column("endpoint", sa.Text(), nullable=False), # 'pair' | 'approve' | 'status' + sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + ) + op.create_index( + "ix_claude_ai_pair_attempts_ip_created", + "claude_ai_pair_attempts", + ["source_ip", "created_at"], + ) + op.create_index( + "ix_claude_ai_pair_attempts_created_at", + "claude_ai_pair_attempts", + ["created_at"], + ) + + # ── skills.claude_ai_sync_enabled ───────────────────────────────────────── + # Per-skill opt-in. Defaults to TRUE so the new connector immediately + # syncs all existing skills (no surprise gap during rollout); the UI + # surfaces a toggle for users who want to keep specific skills local. + op.add_column( + "skills", + sa.Column( + "claude_ai_sync_enabled", + sa.Boolean(), + nullable=False, + server_default=sa.true(), + ), + ) + + +def downgrade() -> None: + op.drop_column("skills", "claude_ai_sync_enabled") + + op.drop_index("ix_claude_ai_pair_attempts_created_at", table_name="claude_ai_pair_attempts") + op.drop_index("ix_claude_ai_pair_attempts_ip_created", table_name="claude_ai_pair_attempts") + op.drop_table("claude_ai_pair_attempts") + + op.drop_index("ix_claude_ai_audit_log_created_at", table_name="claude_ai_audit_log") + op.drop_index("ix_claude_ai_audit_log_integration_created", table_name="claude_ai_audit_log") + op.drop_table("claude_ai_audit_log") diff --git a/backend/alembic/versions/0021_audit_cookie_expired.py b/backend/alembic/versions/0021_audit_cookie_expired.py new file mode 100644 index 00000000..4b9577c7 --- /dev/null +++ b/backend/alembic/versions/0021_audit_cookie_expired.py @@ -0,0 +1,78 @@ +"""Add 'cookie_expired' to the claude_ai_audit_log event CHECK constraint. + +Migration 0020 hard-coded the legal `event` values into the table's CHECK +constraint. Round 12 added a new event kind (`cookie_expired`) that the +backend writes when an extension reports `auth_expired=true` on a +complete_operation call. Without this migration, the INSERT fails with a +psycopg CheckViolation and the operation completion returns 500. + +This migration rebuilds the CHECK constraint with the expanded value set. +Downgrade restores the original 14-value set; any `cookie_expired` rows +written between upgrade and downgrade would block the downgrade — a +clean-up step before downgrading is left to operators. + +Revision ID: 0021_claude_ai_cookie_expired_event +Revises: 0020_claude_ai_polish +Create Date: 2026-05-24 +""" + +from alembic import op + + +revision = "0021_audit_cookie_expired" +down_revision = "0020_claude_ai_polish" +branch_labels = None +depends_on = None + + +# Mirrors backend/app/api/claude_ai.py _VALID_AUDIT_EVENTS. Keep these +# two lists in lockstep when adding/removing event kinds. +_AUDIT_EVENTS_NEW = ( + "pair_started", + "pair_approved", + "pair_redeemed", + "pair_expired", + "integration_disconnected", + "integration_updated", + "skill_pushed", + "skill_imported", + "skill_delete_pushed", + "op_failed", + "conflict_detected", + "conflict_resolved", + "endpoint_changed", + "token_revoked", + "cookie_expired", # new in this migration +) + +_AUDIT_EVENTS_OLD = tuple(v for v in _AUDIT_EVENTS_NEW if v != "cookie_expired") + + +def _check_expression(values: tuple[str, ...]) -> str: + return "event IN (" + ", ".join(f"'{v}'" for v in values) + ")" + + +def upgrade() -> None: + op.drop_constraint( + "ck_claude_ai_audit_log_event", + "claude_ai_audit_log", + type_="check", + ) + op.create_check_constraint( + "ck_claude_ai_audit_log_event", + "claude_ai_audit_log", + _check_expression(_AUDIT_EVENTS_NEW), + ) + + +def downgrade() -> None: + op.drop_constraint( + "ck_claude_ai_audit_log_event", + "claude_ai_audit_log", + type_="check", + ) + op.create_check_constraint( + "ck_claude_ai_audit_log_event", + "claude_ai_audit_log", + _check_expression(_AUDIT_EVENTS_OLD), + ) diff --git a/backend/app/api/claude_ai.py b/backend/app/api/claude_ai.py new file mode 100644 index 00000000..ade967a1 --- /dev/null +++ b/backend/app/api/claude_ai.py @@ -0,0 +1,1973 @@ +"""Claude.ai connector API endpoints. + +Two audiences hit this module: + + 1. The SkillNote frontend (browser, authenticated as the user) — the + pairing-approval page, the settings list of paired browsers, and the + conflict resolution UI. + + 2. The Chrome extension (no user session, bearer extension_token in the + Authorization header) — the sync ops queue, the imported-skill push, + and the skill-bundle fetch. + +Both audiences hit `/v1/integrations/claude-ai/...`. Endpoints documented +inline; full design in docs/claude-ai-integration.md. +""" + +import io +import logging +import zipfile +from datetime import datetime +from typing import Optional +from uuid import UUID + +from fastapi import ( + APIRouter, + Depends, + File, + Form, + Header, + Query, + Request, + UploadFile, + status, +) +from fastapi.responses import Response +from sqlalchemy import select +from sqlalchemy.orm import Session + +from app.core.errors import api_error +from app.db.models.claude_ai import ( + ClaudeAIIntegration, + ClaudeAISkillLink, + ClaudeAISyncOperation, +) +from app.db.models.claude_ai_polish import ClaudeAIPairAttempt +from app.db.models import Skill, SkillContentVersion +from app.db.session import get_db +from app.schemas.claude_ai import ( + AuditEventOut, + ConflictListItem, + ConflictResolveRequest, + HealthMetricsResponse, + ImportedSkillResponse, + IntegrationPatchRequest, + IntegrationStatusResponse, + KnownSkillIdsResponse, + PairingApproveRequest, + AnalyticsResponse, + ConflictPreviewResponse, + DiagnosticCheck, + DiagnosticResponse, + ExtensionSelfStatusResponse, + IntegrationActivityStat, + PairingStartRequest, + PairingStartResponse, + PairingStatusResponse, + SparklinePoint, + SyncOperationCompleteRequest, + SyncOperationOut, + SyncQueueItem, + SyncQueueResponse, + TelemetryEvent, + TopSkillStat, +) +from app.services.claude_ai_sync import ( + PairRateLimitExceeded, + bulk_integration_counters, + find_integration_by_extension_token, + find_pending_pairing_by_code, + find_pending_pairing_by_token, + generate_pairing_code, + generate_token, + hash_token, + integration_counters, + pairing_expiry, + query_audit, + record_pair_attempt, + write_audit, +) + +router = APIRouter(prefix="/v1/integrations/claude-ai", tags=["claude-ai"]) +_log = logging.getLogger("skillnote.claude_ai") + +# ── Auth dependency for extension calls ─────────────────────────────────────── + + +def require_extension( + authorization: Optional[str] = Header(default=None), + db: Session = Depends(get_db), +) -> ClaudeAIIntegration: + """Resolve `Authorization: Bearer ` to an integration. + + Used by every endpoint the Chrome extension calls. The frontend's pairing + and settings endpoints DON'T use this — they are user-session based + (currently auth-less, see CLAUDE.md, until ACL lands). + """ + if not authorization or not authorization.lower().startswith("bearer "): + raise api_error(401, "MISSING_BEARER_TOKEN", "Authorization: Bearer required") + raw = authorization[len("Bearer "):].strip() + integ = find_integration_by_extension_token(db, raw) + if integ is None: + raise api_error(401, "INVALID_EXTENSION_TOKEN", "Token not recognized or revoked") + if integ.status == "disconnected": + raise api_error(403, "INTEGRATION_DISCONNECTED", "This integration has been disconnected") + return integ + + +# ── Pairing flow ────────────────────────────────────────────────────────────── + + +def _client_ip(request: Request) -> Optional[str]: + """Extract the originating IP from X-Forwarded-For (production) or + request.client (local dev). Production deploys MUST set the trusted + proxy chain; otherwise an attacker could spoof X-Forwarded-For.""" + xff = request.headers.get("x-forwarded-for") + if xff: + # First IP in the comma list is the original client per RFC 7239. + return xff.split(",")[0].strip() + return request.client.host if request.client else None + + +@router.post("/extension/pair", response_model=PairingStartResponse, status_code=201) +def start_pairing( + body: PairingStartRequest, + request: Request, + db: Session = Depends(get_db), +) -> PairingStartResponse: + """Step 1 of pairing — extension requests a code. + + Creates a row in `pending_approval` with both a human-readable + pairing_code (shown in the extension's options page so the user can + confirm in SkillNote) and an opaque pairing_token (the extension polls + /pair/status with this). + + Rate-limited per source IP to defeat brute-force pairing-code + enumeration. Audit-logged so admins can correlate failed pairings + with suspicious activity. + """ + source_ip = _client_ip(request) + try: + record_pair_attempt(db, source_ip=source_ip, endpoint="pair") + except PairRateLimitExceeded as e: + raise api_error(429, "RATE_LIMITED", str(e)) + + pairing_code = generate_pairing_code() + pairing_token = generate_token() + + # In the unlikely case of a code collision among currently-pending rows, + # retry once before giving up. Six chars over 31 unambiguous glyphs gives + # 31^6 ≈ 887M codes — a collision needs ~30k concurrent pending pairings + # to register, so this loop almost always exits on the first iteration. + for _attempt in range(3): + existing = db.execute( + select(ClaudeAIIntegration.id).where( + ClaudeAIIntegration.pairing_code == pairing_code, + ClaudeAIIntegration.status == "pending_approval", + ) + ).first() + if existing is None: + break + pairing_code = generate_pairing_code() + else: + raise api_error(503, "PAIRING_CODE_EXHAUSTED", "Could not allocate pairing code; retry") + + integ = ClaudeAIIntegration( + status="pending_approval", + scope="both", + browser_label=body.browser_label, + pairing_code=pairing_code, + pairing_token_hash=hash_token(pairing_token), + pairing_expires_at=pairing_expiry(), + conflict_policy="ask", + ) + db.add(integ) + db.flush() + write_audit( + db, + event="pair_started", + integration_id=integ.id, + detail={"browser_label": body.browser_label or ""}, + source_ip=source_ip, + ) + db.commit() + db.refresh(integ) + + # Build the redemption URL the extension opens in a new tab. Uses the + # request's host so it works on both dev and prod without env vars. + host = request.headers.get("host", "localhost:3000") + scheme = request.headers.get("x-forwarded-proto", "http") + # Web URL is typically a different port from the API; trust the + # SKILLNOTE_WEB_URL env if set, otherwise fall back to swapping port. + import os + web_url = os.environ.get("SKILLNOTE_WEB_URL") + if web_url: + base = web_url.rstrip("/") + else: + # Naive port swap: 8082 (API) -> 3000 (Next). Good enough for the + # docker-compose dev story; prod always sets SKILLNOTE_WEB_URL. + host_only = host.split(":")[0] + base = f"{scheme}://{host_only}:3000" + redemption_url = f"{base}/settings/integrations/claude-ai/pair?code={pairing_code}" + + return PairingStartResponse( + integration_id=integ.id, + pairing_code=pairing_code, + pairing_token=pairing_token, + redemption_url=redemption_url, + expires_at=integ.pairing_expires_at, + ) + + +@router.post("/pair/approve", status_code=204) +def approve_pairing( + body: PairingApproveRequest, + db: Session = Depends(get_db), +) -> Response: + """Step 2 — user-side approval (SkillNote frontend posts here). + + Sets `pairing_approved_at` on the integration row. Does NOT issue the + extension token — that happens at the extension's next /pair/status + poll. Two reasons for the separation: + + 1. A shoulder-surfer watching the approval click never sees the + token; it only travels to the extension that holds the matching + pairing_token (which is opaque and never displayed). + 2. The token lifecycle stays atomic with the row-state transition, + eliminating the need to stash raw tokens anywhere. + """ + integ = find_pending_pairing_by_code(db, body.pairing_code) + if integ is None: + raise api_error(404, "PAIRING_NOT_FOUND", "Pairing code not recognized or already used") + + from datetime import datetime, timezone + if integ.pairing_expires_at and integ.pairing_expires_at < datetime.now(timezone.utc): + raise api_error(410, "PAIRING_EXPIRED", "Pairing code has expired; restart from the extension") + if integ.pairing_approved_at is not None: + # Idempotent — approving twice is harmless, the extension's next + # poll still redeems the token. + return Response(status_code=204) + + integ.pairing_approved_at = datetime.now(timezone.utc) + write_audit(db, event="pair_approved", integration_id=integ.id) + db.commit() + return Response(status_code=204) + + +@router.get("/extension/pair/status", response_model=PairingStatusResponse) +def pairing_status( + pairing_token: str, + db: Session = Depends(get_db), +) -> PairingStatusResponse: + """Step 3 — extension polls until approved. + + Three return shapes: + - approved=False, no token: user hasn't clicked Approve yet + - approved=True, with token: this poll redeems the token; happens once + - 404/410 error: pairing token unknown or already consumed + + On token issuance this atomically: + 1. Generates a fresh extension_token (32 random url-safe bytes) + 2. Stores only its sha256 hash + 3. Clears the pairing handshake fields + 4. Flips status to `active` + All inside one db.commit() so a crash mid-transaction can't leave a + partially-paired row. + """ + # Look up the row WITH a row-level lock so concurrent polls can't both + # try to issue tokens. Without this, an extension retry storm could + # generate two tokens for the same pairing — one ends up in the DB, + # the other gets returned to the second poll but is dead. + token_hash = hash_token(pairing_token) + integ = db.execute( + select(ClaudeAIIntegration) + .where(ClaudeAIIntegration.pairing_token_hash == token_hash) + .where(ClaudeAIIntegration.status == "pending_approval") + .with_for_update() + ).scalar_one_or_none() + + if integ is None: + # Not pending — either the extension is polling with a bogus token + # or the row was already activated and the handshake fields were + # cleared. Either way, this poll cannot succeed. + raise api_error( + 404, + "PAIRING_TOKEN_UNKNOWN", + "Pairing token expired or already consumed; restart the pairing flow", + ) + + from datetime import datetime, timezone + if integ.pairing_expires_at and integ.pairing_expires_at < datetime.now(timezone.utc): + raise api_error(410, "PAIRING_EXPIRED", "Pairing code has expired; restart from the extension") + + if integ.pairing_approved_at is None: + # User hasn't approved yet. Extension keeps polling. + return PairingStatusResponse(approved=False, extension_token=None) + + # Approved — atomically issue the token + clear the handshake state. + # The row is locked above, so a concurrent poll waits for our COMMIT + # before observing the cleared state and returning 404. + raw_extension_token = generate_token() + integ.extension_token_hash = hash_token(raw_extension_token) + integ.status = "active" + integ.pairing_code = None + integ.pairing_token_hash = None + integ.pairing_approved_at = None + integ.pairing_expires_at = None + + write_audit(db, event="pair_redeemed", integration_id=integ.id) + db.commit() + return PairingStatusResponse(approved=True, extension_token=raw_extension_token) + + +# ── Integration management (frontend) ───────────────────────────────────────── + + +@router.get("/integrations", response_model=list[IntegrationStatusResponse]) +def list_integrations(db: Session = Depends(get_db)) -> list[IntegrationStatusResponse]: + """All paired browsers for the current user. + + No user filter today because there's no auth (see CLAUDE.md). When ACL + lands, filter by user_id from the session. + """ + rows = ( + db.execute( + select(ClaudeAIIntegration) + .where(ClaudeAIIntegration.status != "pending_approval") + .order_by(ClaudeAIIntegration.created_at.desc()) + ) + .scalars() + .all() + ) + # N+1-free counters: one batched call instead of 3*N queries. + counters_by_id = bulk_integration_counters(db, [r.id for r in rows]) + out: list[IntegrationStatusResponse] = [] + for row in rows: + out.append( + IntegrationStatusResponse( + id=row.id, + browser_label=row.browser_label, + status=row.status, # type: ignore[arg-type] + scope=row.scope, # type: ignore[arg-type] + claude_ai_org_id=row.claude_ai_org_id, + last_sync_at=row.last_sync_at, + last_error=row.last_error, + conflict_policy=row.conflict_policy, # type: ignore[arg-type] + **counters_by_id.get( + row.id, + {"pending_op_count": 0, "failed_op_count": 0, "linked_skill_count": 0}, + ), + ) + ) + return out + + +@router.patch("/integrations/{integration_id}", response_model=IntegrationStatusResponse) +def patch_integration( + integration_id: UUID, + body: IntegrationPatchRequest, + db: Session = Depends(get_db), +) -> IntegrationStatusResponse: + """Update a single integration's scope / conflict policy / label.""" + integ = db.get(ClaudeAIIntegration, integration_id) + if integ is None: + raise api_error(404, "INTEGRATION_NOT_FOUND", f"Integration {integration_id} not found") + changes: dict[str, str] = {} + if body.scope is not None and body.scope != integ.scope: + changes["scope"] = f"{integ.scope}→{body.scope}" + integ.scope = body.scope + if body.conflict_policy is not None and body.conflict_policy != integ.conflict_policy: + changes["conflict_policy"] = f"{integ.conflict_policy}→{body.conflict_policy}" + integ.conflict_policy = body.conflict_policy + if body.browser_label is not None and body.browser_label != integ.browser_label: + changes["browser_label"] = "updated" + integ.browser_label = body.browser_label + if changes: + write_audit( + db, + event="integration_updated", + integration_id=integ.id, + detail=changes, + ) + db.commit() + db.refresh(integ) + counters = integration_counters(db, integ.id) + return IntegrationStatusResponse( + id=integ.id, + browser_label=integ.browser_label, + status=integ.status, # type: ignore[arg-type] + scope=integ.scope, # type: ignore[arg-type] + claude_ai_org_id=integ.claude_ai_org_id, + last_sync_at=integ.last_sync_at, + last_error=integ.last_error, + conflict_policy=integ.conflict_policy, # type: ignore[arg-type] + **counters, + ) + + +@router.delete("/integrations/{integration_id}", status_code=204) +def disconnect_integration( + integration_id: UUID, + db: Session = Depends(get_db), +) -> Response: + """Soft-disconnect — flips status, leaves the link/operation history. + + Hard-delete would orphan claude.ai skills (we never delete them on the + claude.ai side as part of disconnect; the user must do that manually + if they want). The disconnected state stops the extension from + receiving new ops; it does NOT revoke skills already pushed. + """ + integ = db.get(ClaudeAIIntegration, integration_id) + if integ is None: + raise api_error(404, "INTEGRATION_NOT_FOUND", f"Integration {integration_id} not found") + integ.status = "disconnected" + integ.extension_token_hash = None # revoke the bearer + + # Mark any pending or in-flight sync ops as failed — they can never + # complete now that the extension is revoked. Without this, the queue + # would accumulate orphan rows forever, polluting the failed_ops_total + # metric and confusing operators. + db.execute( + ClaudeAISyncOperation.__table__.update() + .where(ClaudeAISyncOperation.integration_id == integ.id) + .where(ClaudeAISyncOperation.status.in_(("pending", "in_progress"))) + .values( + status="failed", + last_error="Integration disconnected before completion", + ) + ) + + write_audit( + db, + event="integration_disconnected", + integration_id=integ.id, + detail={"browser_label": integ.browser_label or ""}, + ) + db.commit() + return Response(status_code=204) + + +# ── Sync ops queue (extension) ──────────────────────────────────────────────── + + +@router.get("/extension/operations", response_model=list[SyncOperationOut]) +def fetch_operations( + integ: ClaudeAIIntegration = Depends(require_extension), + db: Session = Depends(get_db), + limit: int = 20, +) -> list[SyncOperationOut]: + """Return the next batch of pending ops for this integration. + + Marks each fetched op `in_progress` atomically so two extension instances + paired to the same SkillNote don't both try to execute the same op. + (Edge case — same person on two browsers — but cheap to defend against.) + """ + limit = max(1, min(limit, 100)) + rows = ( + db.execute( + select(ClaudeAISyncOperation) + .where( + ClaudeAISyncOperation.integration_id == integ.id, + ClaudeAISyncOperation.status == "pending", + ) + .order_by(ClaudeAISyncOperation.created_at) + .with_for_update(skip_locked=True) + .limit(limit) + ) + .scalars() + .all() + ) + + from datetime import datetime, timezone + now = datetime.now(timezone.utc) + for row in rows: + row.status = "in_progress" + row.started_at = now + row.attempts += 1 + db.commit() + return [SyncOperationOut.model_validate(r) for r in rows] + + +@router.post("/extension/operations/{op_id}/complete", status_code=204) +def complete_operation( + op_id: UUID, + body: SyncOperationCompleteRequest, + integ: ClaudeAIIntegration = Depends(require_extension), + db: Session = Depends(get_db), +) -> Response: + """Extension reports the outcome. + + On success for upload/update ops: also upserts the corresponding + ClaudeAISkillLink row so subsequent syncs of the same skill reuse the + claude.ai skill ID instead of creating duplicates. + + On failure: writes last_error, sets status=failed if attempts>=3, + otherwise re-queues as pending so a transient failure auto-retries. + """ + op = db.get(ClaudeAISyncOperation, op_id) + if op is None or op.integration_id != integ.id: + raise api_error(404, "OPERATION_NOT_FOUND", "Operation not found for this integration") + if op.status not in ("in_progress", "pending"): + raise api_error( + 409, + "OPERATION_ALREADY_FINAL", + f"Operation is already in terminal state '{op.status}'", + ) + + from datetime import datetime, timezone + now = datetime.now(timezone.utc) + + if body.claude_ai_org_id and not integ.claude_ai_org_id: + # First time we've seen this user's org — cache it on the integration. + integ.claude_ai_org_id = body.claude_ai_org_id + + if body.success: + op.status = "completed" + op.completed_at = now + op.last_error = None + integ.last_sync_at = now + # Upsert link row for upload/update outcomes. + if op.kind in ("upload", "update") and op.skill_id is not None and body.result: + ca_skill_id = body.result.get("claude_ai_skill_id") + ca_version = body.result.get("claude_ai_version") + if ca_skill_id: + link = db.execute( + select(ClaudeAISkillLink).where( + ClaudeAISkillLink.integration_id == integ.id, + ClaudeAISkillLink.skillnote_skill_id == op.skill_id, + ) + ).scalar_one_or_none() + if link is None: + link = ClaudeAISkillLink( + integration_id=integ.id, + skillnote_skill_id=op.skill_id, + claude_ai_skill_id=ca_skill_id, + claude_ai_version=ca_version, + last_seen_at=now, + direction="outbound", + ) + db.add(link) + else: + link.claude_ai_skill_id = ca_skill_id + link.claude_ai_version = ca_version + link.last_seen_at = now + # Use the version_id from the op payload (the version we just + # pushed); not the skill's current latest, which may have + # advanced again since the op was enqueued. + version_id = op.payload.get("version_id") if op.payload else None + if version_id: + try: + link.skillnote_version_id = UUID(version_id) + except ValueError: + # Bad payload — log but don't fail the completion; + # the link is still useful with skill_id alone. + _log.warning( + "claude_ai complete_operation: invalid version_id payload %r on op %s", + version_id, op.id, + ) + elif op.kind == "delete" and op.skill_id is not None: + # Drop the link — the claude.ai skill no longer exists. + db.execute( + ClaudeAISkillLink.__table__.delete().where( + ClaudeAISkillLink.integration_id == integ.id, + ClaudeAISkillLink.skillnote_skill_id == op.skill_id, + ) + ) + # Audit log the successful op outcome. Includes the op kind so the + # activity feed can render a meaningful row. + write_audit( + db, + event=( + "skill_pushed" if op.kind in ("upload", "update") + else "skill_delete_pushed" if op.kind == "delete" + else "skill_imported" if op.kind == "list" + else "skill_pushed" + ), + integration_id=integ.id, + skill_id=op.skill_id, + detail={"op_kind": op.kind, "result": body.result or {}}, + ) + else: + op.last_error = body.error or "unknown error" + # Retry budget: 3 attempts total. The fetch path increments attempts + # at dispatch time, so attempts==3 here means we've used all 3. + if op.attempts >= 3: + op.status = "failed" + op.completed_at = now + integ.last_error = op.last_error + write_audit( + db, + event="op_failed", + integration_id=integ.id, + skill_id=op.skill_id, + detail={"op_kind": op.kind, "attempts": op.attempts, "error": op.last_error or ""}, + ) + else: + op.status = "pending" + op.started_at = None + + # Auth-expired signal flips the integration status BEFORE returning + # so the UI's next /integrations poll surfaces the "Sign in to + # claude.ai" CTA. Also emit a dedicated audit event so the activity + # feed shows a single legible "Browser session expired" row rather + # than an opaque op_failed cascade. + if body.auth_expired and integ.status != "cookie_expired": + integ.status = "cookie_expired" + write_audit( + db, + event="cookie_expired", + integration_id=integ.id, + detail={"op_kind": op.kind, "error": (body.error or "")[:200]}, + ) + + db.commit() + return Response(status_code=204) + + +# ── Skill bundle fetch (extension) ──────────────────────────────────────────── + + +@router.get("/extension/skill-bundle") +def get_skill_bundle( + skill_id: UUID, + version_id: UUID, + integ: ClaudeAIIntegration = Depends(require_extension), + db: Session = Depends(get_db), +) -> Response: + """Return the ZIP for a specific skill version. + + Phase 1 implementation builds the ZIP in-memory from the version row's + content_md + frontmatter. Phase 2 will route through the existing + LocalBundleStorage to support skills with bundled scripts/assets. + """ + version = db.get(SkillContentVersion, version_id) + if version is None or version.skill_id != skill_id: + raise api_error(404, "VERSION_NOT_FOUND", "Version not found for that skill") + + skill = db.get(Skill, skill_id) + if skill is None: + raise api_error(404, "SKILL_NOT_FOUND", "Skill not found") + + # Compose SKILL.md from frontmatter + content_md. Use yaml.safe_dump + # so a description containing newlines, quotes, or yaml-special chars + # (---, : at start) doesn't break the frontmatter parser on the + # consuming side. Manual string interpolation here was a CVE waiting + # to happen — a malicious description could inject arbitrary YAML + # keys that the claude.ai upload handler would then misinterpret. + import yaml as _yaml + frontmatter_doc = _yaml.safe_dump( + {"name": skill.slug, "description": version.description}, + default_flow_style=False, + sort_keys=False, + allow_unicode=True, + ) + skill_md = f"---\n{frontmatter_doc}---\n\n" + (version.content_md or "") + + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf: + zf.writestr(f"{skill.slug}/SKILL.md", skill_md) + buf.seek(0) + return Response( + content=buf.read(), + media_type="application/zip", + headers={ + "Content-Disposition": f'attachment; filename="{skill.slug}-v{version.version}.zip"', + }, + ) + + +# ── Reverse sync: imported skills + known IDs (extension) ───────────────────── + + +@router.get("/extension/status", response_model=ExtensionSelfStatusResponse) +def extension_self_status( + integ: ClaudeAIIntegration = Depends(require_extension), + db: Session = Depends(get_db), +) -> ExtensionSelfStatusResponse: + """Compact snapshot for the extension popup. + + Returns *only this integration's* counters. Authenticated by the + extension's bearer token, so it never leaks across integrations. + """ + counters = integration_counters(db, integ.id) + return ExtensionSelfStatusResponse( + integration_id=integ.id, + browser_label=integ.browser_label, + status=integ.status, # type: ignore[arg-type] + linked_skill_count=counters["linked_skill_count"], + pending_op_count=counters["pending_op_count"], + failed_op_count=counters["failed_op_count"], + last_sync_at=integ.last_sync_at, + last_error=integ.last_error, + ) + + +@router.get("/extension/known-skill-ids", response_model=KnownSkillIdsResponse) +def list_known_skill_ids( + integ: ClaudeAIIntegration = Depends(require_extension), + db: Session = Depends(get_db), +) -> KnownSkillIdsResponse: + """Return all claude.ai skill IDs this integration already has linked. + + The extension uses this to skip re-importing already-synced skills + during reverse-sync list operations. + """ + rows = db.execute( + select(ClaudeAISkillLink.claude_ai_skill_id).where( + ClaudeAISkillLink.integration_id == integ.id + ) + ).all() + return KnownSkillIdsResponse( + claude_ai_skill_ids=[r[0] for r in rows], + ) + + +# ── Conflict resolution (frontend) ──────────────────────────────────────────── + + +@router.get("/conflicts", response_model=list[ConflictListItem]) +def list_conflicts(db: Session = Depends(get_db)) -> list[ConflictListItem]: + """All currently-diverged links across every integration. + + Pulled by the SkillNote conflict-resolution UI. Joins skill metadata so + one query gives the UI everything it needs to render the row. + """ + rows = db.execute( + select( + ClaudeAISkillLink.id, + ClaudeAISkillLink.integration_id, + ClaudeAIIntegration.browser_label, + ClaudeAISkillLink.skillnote_skill_id, + Skill.slug, + Skill.name, + ClaudeAISkillLink.claude_ai_skill_id, + ClaudeAISkillLink.claude_ai_version, + ClaudeAISkillLink.last_seen_at, + ) + .join( + ClaudeAIIntegration, + ClaudeAIIntegration.id == ClaudeAISkillLink.integration_id, + ) + .outerjoin(Skill, Skill.id == ClaudeAISkillLink.skillnote_skill_id) + .where(ClaudeAISkillLink.conflict_state == "diverged") + .order_by(ClaudeAISkillLink.last_seen_at.desc().nullslast()) + ).all() + return [ + ConflictListItem( + link_id=r.id, + integration_id=r.integration_id, + integration_label=r.browser_label, + skillnote_skill_id=r.skillnote_skill_id, + skillnote_skill_slug=r.slug, + skillnote_skill_name=r.name, + claude_ai_skill_id=r.claude_ai_skill_id, + claude_ai_version=r.claude_ai_version, + last_seen_at=r.last_seen_at, + ) + for r in rows + ] + + +@router.get( + "/conflicts/{link_id}/preview", response_model=ConflictPreviewResponse +) +def preview_conflict( + link_id: UUID, + db: Session = Depends(get_db), +) -> ConflictPreviewResponse: + """Side-by-side preview data for the Keep-SkillNote / Keep-claude.ai + decision. + + Returns: + - The last version we pushed to claude.ai (`last_pushed_*`) + - The current SkillNote-side latest version (`current_*`) + - A `local_changed` flag — True iff the local content changed since + the last push, i.e. picking "Keep claude.ai" would overwrite real + local edits. + + We can't return the claude.ai-side content (it lives in the user's + browser, not on the server). The UI surfaces version metadata for + the remote side and trusts the user's domain knowledge there. + """ + link = db.get(ClaudeAISkillLink, link_id) + if link is None: + raise api_error(404, "LINK_NOT_FOUND", f"Link {link_id} not found") + + integ = db.get(ClaudeAIIntegration, link.integration_id) + integ_label = integ.browser_label if integ else None + + skill: Optional[Skill] = ( + db.get(Skill, link.skillnote_skill_id) if link.skillnote_skill_id else None + ) + skill_slug = skill.slug if skill else None + skill_name = skill.name if skill else None + + last_pushed: Optional[SkillContentVersion] = None + if link.skillnote_version_id is not None: + last_pushed = db.get(SkillContentVersion, link.skillnote_version_id) + + current: Optional[SkillContentVersion] = None + if skill is not None: + current = db.execute( + select(SkillContentVersion) + .where(SkillContentVersion.skill_id == skill.id) + .where(SkillContentVersion.is_latest.is_(True)) + ).scalar_one_or_none() + + local_changed = bool( + current + and last_pushed + and current.id != last_pushed.id + and (current.content_md or "") != (last_pushed.content_md or "") + ) + # If we have a `current` but no `last_pushed`, that's also "local + # changed" — the link was created without ever pushing successfully. + if current is not None and last_pushed is None: + local_changed = True + + return ConflictPreviewResponse( + link_id=link.id, + integration_id=link.integration_id, + integration_label=integ_label, + skill_id=link.skillnote_skill_id, + skill_slug=skill_slug, + skill_name=skill_name, + last_pushed_version_id=last_pushed.id if last_pushed else None, + last_pushed_version_number=( + last_pushed.version if last_pushed else None + ), + last_pushed_content_md=(last_pushed.content_md if last_pushed else None), + current_version_id=current.id if current else None, + current_version_number=( + current.version if current else None + ), + current_content_md=(current.content_md if current else None), + local_changed=local_changed, + claude_ai_skill_id=link.claude_ai_skill_id, + claude_ai_version=link.claude_ai_version, + claude_ai_last_seen_at=link.last_seen_at, + ) + + +@router.post("/conflicts/{link_id}/resolve", status_code=204) +def resolve_conflict( + link_id: UUID, + body: ConflictResolveRequest, + db: Session = Depends(get_db), +) -> Response: + """User picks a winner. Marks the conflict resolved and (for keep_*) + enqueues a sync op that will overwrite the loser on the next tick. + """ + link = db.get(ClaudeAISkillLink, link_id) + if link is None: + raise api_error(404, "LINK_NOT_FOUND", f"Link {link_id} not found") + if link.conflict_state != "diverged": + raise api_error( + 409, + "LINK_NOT_IN_CONFLICT", + f"Link is not in conflict (state={link.conflict_state})", + ) + + from app.services.claude_ai_sync import ( + enqueue_skill_upload, + active_integrations_for_sync, + ) + + if body.resolution == "skip": + # Nothing to do — just clear the flag. Conflict may re-appear on + # next divergence event. + link.conflict_state = "resolved" + elif body.resolution == "keep_skillnote": + # Re-upload the SkillNote-side version to claude.ai. The link's + # outbound op will overwrite the claude.ai-side change. + if link.skillnote_skill_id is None: + raise api_error( + 422, + "NO_SKILLNOTE_SIDE", + "This conflict has no SkillNote-side skill (inbound-only); keep_skillnote not applicable", + ) + skill = db.get(Skill, link.skillnote_skill_id) + if skill is None: + raise api_error(404, "SKILL_NOT_FOUND", "Linked skill was deleted") + # Find the integration to scope the enqueue. + integ = db.get(ClaudeAIIntegration, link.integration_id) + if integ is None or integ.status == "disconnected": + raise api_error( + 409, + "INTEGRATION_INACTIVE", + "Cannot push — integration is disconnected", + ) + # Get current latest version_id. + latest = db.execute( + select(SkillContentVersion.id) + .where(SkillContentVersion.skill_id == skill.id) + .where(SkillContentVersion.is_latest.is_(True)) + ).scalar_one_or_none() + if latest is None: + raise api_error(409, "NO_LATEST_VERSION", "Skill has no current version to push") + enqueue_skill_upload( + db, + skill_id=skill.id, + version_id=latest, + name=skill.name, + description=skill.description, + integrations=[integ], + ) + link.conflict_state = "resolved" + elif body.resolution == "keep_claude_ai": + # Trigger a fetch_one op so the extension pulls the current claude.ai + # contents and the next inbound import overwrites the SkillNote side. + from app.db.models.claude_ai import ClaudeAISyncOperation + op = ClaudeAISyncOperation( + integration_id=link.integration_id, + kind="fetch_one", + skill_id=link.skillnote_skill_id, + payload={"claude_ai_skill_id": link.claude_ai_skill_id, "overwrite": True}, + ) + db.add(op) + link.conflict_state = "resolved" + + db.commit() + return Response(status_code=204) + + +_VALID_AUDIT_EVENTS = frozenset( + { + "pair_started", + "pair_approved", + "pair_redeemed", + "pair_expired", + "integration_disconnected", + "integration_updated", + "skill_pushed", + "skill_imported", + "skill_delete_pushed", + "op_failed", + "conflict_detected", + "conflict_resolved", + "endpoint_changed", + "token_revoked", + # Emitted when the extension reports `auth_expired=true` on a + # complete_operation call; integration.status transitions to + # cookie_expired and this row explains why in the activity feed. + "cookie_expired", + } +) + + +@router.get("/queue", response_model=SyncQueueResponse) +def list_sync_queue( + db: Session = Depends(get_db), + integration_id: Optional[UUID] = None, + limit: int = Query(default=50, ge=1, le=200), +) -> SyncQueueResponse: + """Live snapshot of pending + in-progress sync operations. + + Drives the "Sync activity" panel on the settings page. Eager-joins + skill metadata and integration label in one query to avoid N+1. + Sorted oldest-first so the user sees the FIFO order. + + Excludes `completed` and `failed` ops — those belong in the activity + feed, not the queue. + """ + from sqlalchemy import desc as _desc, func as _func, or_ as _or + from app.db.models.skill import Skill + + base_q = ( + select(ClaudeAISyncOperation) + .where(ClaudeAISyncOperation.status.in_(("pending", "in_progress"))) + .order_by(ClaudeAISyncOperation.created_at.asc()) + ) + if integration_id is not None: + base_q = base_q.where(ClaudeAISyncOperation.integration_id == integration_id) + + # Pull the bounded slice + total counters in two queries (cheap because + # of the partial index ix_claude_ai_sync_operations_integration_status_created). + rows: list[ClaudeAISyncOperation] = list( + db.execute(base_q.limit(limit)).scalars().all() + ) + + total_q = select(_func.count(ClaudeAISyncOperation.id)).where( + ClaudeAISyncOperation.status.in_(("pending", "in_progress")) + ) + if integration_id is not None: + total_q = total_q.where(ClaudeAISyncOperation.integration_id == integration_id) + total = int(db.execute(total_q).scalar_one()) + + by_status_q = ( + select(ClaudeAISyncOperation.status, _func.count(ClaudeAISyncOperation.id)) + .where(ClaudeAISyncOperation.status.in_(("pending", "in_progress"))) + .group_by(ClaudeAISyncOperation.status) + ) + if integration_id is not None: + by_status_q = by_status_q.where( + ClaudeAISyncOperation.integration_id == integration_id + ) + counts = {s: int(c) for s, c in db.execute(by_status_q).all()} + + oldest_q = select(_func.min(ClaudeAISyncOperation.created_at)).where( + ClaudeAISyncOperation.status.in_(("pending", "in_progress")) + ) + if integration_id is not None: + oldest_q = oldest_q.where( + ClaudeAISyncOperation.integration_id == integration_id + ) + oldest_at: Optional[datetime] = db.execute(oldest_q).scalar_one_or_none() + + # Bulk-load skill + integration metadata for the visible rows. One + # query each — cheaper than per-row eager-load on small page sizes. + skill_ids = {r.skill_id for r in rows if r.skill_id is not None} + integ_ids = {r.integration_id for r in rows} + skills_by_id: dict[UUID, Skill] = {} + if skill_ids: + skills_by_id = { + s.id: s + for s in db.execute( + select(Skill).where(Skill.id.in_(skill_ids)) + ).scalars() + } + integ_labels: dict[UUID, Optional[str]] = {} + if integ_ids: + integ_labels = { + i.id: i.browser_label + for i in db.execute( + select(ClaudeAIIntegration).where( + ClaudeAIIntegration.id.in_(integ_ids) + ) + ).scalars() + } + + items = [ + SyncQueueItem( + id=r.id, + kind=r.kind, # type: ignore[arg-type] + status=r.status, # type: ignore[arg-type] + attempts=r.attempts, + last_error=r.last_error, + created_at=r.created_at, + started_at=r.started_at, + integration_id=r.integration_id, + integration_label=integ_labels.get(r.integration_id), + skill_id=r.skill_id, + skill_slug=(skills_by_id.get(r.skill_id).slug if r.skill_id and skills_by_id.get(r.skill_id) else None), + skill_name=(skills_by_id.get(r.skill_id).name if r.skill_id and skills_by_id.get(r.skill_id) else None), + ) + for r in rows + ] + + oldest_age: Optional[float] = None + if oldest_at is not None: + from datetime import timezone as _tz + oldest_age = (datetime.now(_tz.utc) - oldest_at).total_seconds() + + return SyncQueueResponse( + items=items, + total=total, + pending_count=counts.get("pending", 0), + in_progress_count=counts.get("in_progress", 0), + oldest_age_seconds=oldest_age, + ) + + +@router.get("/activity", response_model=list[AuditEventOut]) +def list_activity( + db: Session = Depends(get_db), + integration_id: Optional[UUID] = None, + event: Optional[str] = None, + skill_id: Optional[UUID] = None, + since: Optional[datetime] = None, + until: Optional[datetime] = None, + limit: int = Query(default=50, ge=1, le=500), + before: Optional[datetime] = None, +) -> list[AuditEventOut]: + """Audit feed — most recent first. + + Drives the Linear-style activity page in the SkillNote frontend. + Filterable by integration and event kind for noise reduction. + + ``before`` enables cursor-based pagination: pass the ``created_at`` of + the last row from the previous page to fetch older events. ``since`` + / ``until`` define an inclusive date window (compliance queries). + ``skill_id`` scopes to a specific skill's history. ``limit`` is + bounded [1, 500] so a misbehaving client can't request unbounded + rows. ``event`` is whitelisted against the canonical set so a typo + returns a 422 instead of silently zero-matching. + """ + if event is not None and event not in _VALID_AUDIT_EVENTS: + raise api_error( + 422, + "INVALID_EVENT", + f"Unknown event kind: {event!r}. " + f"Valid: {sorted(_VALID_AUDIT_EVENTS)}", + ) + if since is not None and until is not None and since > until: + raise api_error( + 422, + "INVALID_DATE_RANGE", + "`since` must be earlier than `until`", + ) + rows = query_audit( + db, + integration_id=integration_id, + event=event, + skill_id=skill_id, + since=since, + until=until, + limit=limit, + before=before, + ) + return [AuditEventOut.model_validate(r) for r in rows] + + +@router.get("/activity/export.csv") +def export_activity_csv( + db: Session = Depends(get_db), + integration_id: Optional[UUID] = None, + event: Optional[str] = None, + skill_id: Optional[UUID] = None, + since: Optional[datetime] = None, + until: Optional[datetime] = None, + limit: int = Query(default=10_000, ge=1, le=50_000), +) -> Response: + """CSV download of the audit log — for compliance + offline review. + + Same filter contract as /activity but with a higher row ceiling + (50k) suitable for an export. Streams the CSV inline so the browser + triggers a download. + """ + import csv + import io as _io + import json as _json + + if event is not None and event not in _VALID_AUDIT_EVENTS: + raise api_error( + 422, + "INVALID_EVENT", + f"Unknown event kind: {event!r}. " + f"Valid: {sorted(_VALID_AUDIT_EVENTS)}", + ) + if since is not None and until is not None and since > until: + raise api_error( + 422, + "INVALID_DATE_RANGE", + "`since` must be earlier than `until`", + ) + + rows = query_audit( + db, + integration_id=integration_id, + event=event, + skill_id=skill_id, + since=since, + until=until, + limit=limit, + ) + + buf = _io.StringIO() + writer = csv.writer(buf, quoting=csv.QUOTE_MINIMAL) + writer.writerow(["created_at", "event", "integration_id", "skill_id", "detail"]) + for r in rows: + writer.writerow([ + r.created_at.isoformat(), + r.event, + str(r.integration_id) if r.integration_id else "", + str(r.skill_id) if r.skill_id else "", + _json.dumps(r.detail, ensure_ascii=False, sort_keys=True), + ]) + + filename = "claude-ai-activity.csv" + return Response( + content=buf.getvalue(), + media_type="text/csv; charset=utf-8", + headers={ + "Content-Disposition": f'attachment; filename="{filename}"', + # Disable caching so a re-export reflects fresh state. + "Cache-Control": "no-store", + }, + ) + + +@router.get("/analytics", response_model=AnalyticsResponse) +def connector_analytics(db: Session = Depends(get_db)) -> AnalyticsResponse: + """Sync-throughput + per-integration rollup for the analytics panel. + + All windows are UTC and computed against now. Only terminal ops + (completed, failed) are counted toward throughput; pending/in_progress + are queue depth (already covered by /queue and /health). + """ + from datetime import timedelta as _td, timezone as _tz + from sqlalchemy import case, cast, Date, func as _func + from app.db.models.skill import Skill + + now = datetime.now(_tz.utc) + cutoff_24h = now - _td(hours=24) + cutoff_7d = now - _td(days=7) + + op = ClaudeAISyncOperation + integ = ClaudeAIIntegration + + completed_filter = (op.status == "completed") & (op.completed_at != None) # noqa: E711 + failed_filter = (op.status == "failed") & (op.completed_at != None) # noqa: E711 + + counts_24h = db.execute( + select( + _func.coalesce( + _func.sum(case((completed_filter, 1), else_=0)), 0 + ), + _func.coalesce( + _func.sum(case((failed_filter, 1), else_=0)), 0 + ), + ).where(op.completed_at >= cutoff_24h) + ).one() + syncs_24h, failed_24h = int(counts_24h[0]), int(counts_24h[1]) + + counts_7d = db.execute( + select( + _func.coalesce( + _func.sum(case((completed_filter, 1), else_=0)), 0 + ), + _func.coalesce( + _func.sum(case((failed_filter, 1), else_=0)), 0 + ), + _func.coalesce(_func.avg(op.attempts), 0.0), + ).where(op.completed_at >= cutoff_7d) + ).one() + syncs_7d = int(counts_7d[0]) + failed_7d = int(counts_7d[1]) + avg_attempts_7d = float(counts_7d[2]) + total_7d = syncs_7d + failed_7d + success_rate_7d = 1.0 if total_7d == 0 else syncs_7d / total_7d + + # Top 5 most-synced skills over 7d. Joins Skill so we return + # human-readable name/slug. Skips ops with NULL skill_id (list ops). + top_rows = db.execute( + select( + Skill.id, + Skill.slug, + Skill.name, + _func.count(op.id).label("sync_count"), + ) + .join(op, op.skill_id == Skill.id) + .where(completed_filter) + .where(op.completed_at >= cutoff_7d) + .group_by(Skill.id, Skill.slug, Skill.name) + .order_by(_func.count(op.id).desc()) + .limit(5) + ).all() + top_skills = [ + TopSkillStat( + skill_id=row[0], + skill_slug=row[1], + skill_name=row[2], + sync_count=int(row[3]), + ) + for row in top_rows + ] + + # Per-integration 24h activity. LEFT JOIN keeps integrations with zero + # activity in the result so the UI can show them as "quiet" instead of + # silently dropping them. + integ_rows = db.execute( + select( + integ.id, + integ.browser_label, + integ.last_sync_at, + _func.coalesce( + _func.sum( + case( + ( + (op.completed_at >= cutoff_24h) + & (op.status == "completed"), + 1, + ), + else_=0, + ) + ), + 0, + ), + _func.coalesce( + _func.sum( + case( + ( + (op.completed_at >= cutoff_24h) + & (op.status == "failed"), + 1, + ), + else_=0, + ) + ), + 0, + ), + ) + .select_from(integ) + .outerjoin(op, op.integration_id == integ.id) + .where(integ.status != "disconnected") + .group_by(integ.id, integ.browser_label, integ.last_sync_at) + .order_by(integ.last_sync_at.desc().nullslast()) + ).all() + per_integration = [ + IntegrationActivityStat( + integration_id=row[0], + integration_label=row[1], + last_sync_at=row[2], + syncs_24h=int(row[3]), + failed_24h=int(row[4]), + ) + for row in integ_rows + ] + + # Sparkline: 7 daily buckets, oldest first, with explicit zeros for + # days that had no activity. Casting to date in the GROUP BY keeps + # the row count bounded at 7. + spark_rows = db.execute( + select( + cast(op.completed_at, Date).label("d"), + _func.coalesce( + _func.sum(case((completed_filter, 1), else_=0)), 0 + ), + _func.coalesce( + _func.sum(case((failed_filter, 1), else_=0)), 0 + ), + ) + .where(op.completed_at >= cutoff_7d) + .group_by(cast(op.completed_at, Date)) + ).all() + spark_by_date = {str(row[0]): (int(row[1]), int(row[2])) for row in spark_rows} + sparkline: list[SparklinePoint] = [] + for i in range(6, -1, -1): + day = (now - _td(days=i)).date() + s, f = spark_by_date.get(str(day), (0, 0)) + sparkline.append(SparklinePoint(date=str(day), syncs=s, failed=f)) + + return AnalyticsResponse( + skills_synced_24h=syncs_24h, + skills_synced_7d=syncs_7d, + failed_24h=failed_24h, + failed_7d=failed_7d, + sync_success_rate_7d=round(success_rate_7d, 4), + avg_attempts_per_sync_7d=round(avg_attempts_7d, 2), + top_skills_7d=top_skills, + per_integration=per_integration, + sparkline_7d=sparkline, + ) + + +@router.get("/diagnostic", response_model=DiagnosticResponse) +def run_diagnostic(db: Session = Depends(get_db)) -> DiagnosticResponse: + """Run an end-to-end health sweep and return a structured pass/warn/fail. + + Each check is independent and idempotent — calling /diagnostic ten + times in a row never changes state, only reports it. The UI surfaces + the verdict as a single "Run diagnostic" button so non-tech users + have a one-click answer to "is everything OK?" + + Checks (current set, additive over time): + backend_db — can we round-trip a SELECT 1 + schema_migrated — alembic head matches the expected revision + integrations_paired — at least one active or cookie_expired integration + no_cookie_expired — no integrations need re-sign-in + no_stuck_in_progress — no ops have been in_progress for > 5 minutes + sync_recent — at least one integration synced in the last hour + conflicts_low — diverged_links_total < 20 + pair_attempts_quiet — fewer than 30 pair attempts in the last hour + """ + from datetime import timedelta as _td, timezone as _tz + from sqlalchemy import func as _func, text as _sql_text + + now = datetime.now(_tz.utc) + checks: list[DiagnosticCheck] = [] + + # 1. backend_db + try: + db.execute(_sql_text("SELECT 1")).scalar_one() + checks.append( + DiagnosticCheck( + id="backend_db", + label="Backend database reachable", + status="pass", + detail="SkillNote can reach its own database.", + ) + ) + except Exception as e: # pragma: no cover - hard to reach in tests + checks.append( + DiagnosticCheck( + id="backend_db", + label="Backend database reachable", + status="fail", + detail=f"DB unreachable: {str(e)[:200]}", + ) + ) + + # 2. schema_migrated — read alembic_version current head. + EXPECTED_HEAD = "0021_audit_cookie_expired" + try: + head = db.execute( + _sql_text("SELECT version_num FROM alembic_version") + ).scalar_one_or_none() + if head == EXPECTED_HEAD: + checks.append( + DiagnosticCheck( + id="schema_migrated", + label="Database schema up to date", + status="pass", + detail=f"Schema at expected head: {EXPECTED_HEAD}.", + ) + ) + else: + checks.append( + DiagnosticCheck( + id="schema_migrated", + label="Database schema up to date", + status="warn", + detail=( + f"Schema head is {head!r}, expected " + f"{EXPECTED_HEAD!r}. Run `alembic upgrade head`." + ), + ) + ) + except Exception as e: # pragma: no cover + checks.append( + DiagnosticCheck( + id="schema_migrated", + label="Database schema up to date", + status="fail", + detail=f"Could not read alembic_version: {e}", + ) + ) + + # 3. integrations_paired + integ_rows = db.execute( + select( + ClaudeAIIntegration.status, + _func.count(ClaudeAIIntegration.id), + ) + .where( + ClaudeAIIntegration.status.in_( + ("active", "cookie_expired", "pending_approval", "error") + ) + ) + .group_by(ClaudeAIIntegration.status) + ).all() + counts_by_status = {s: int(c) for s, c in integ_rows} + active_or_expired = counts_by_status.get("active", 0) + counts_by_status.get( + "cookie_expired", 0 + ) + if active_or_expired > 0: + checks.append( + DiagnosticCheck( + id="integrations_paired", + label="At least one browser is paired", + status="pass", + detail=f"{active_or_expired} integration(s) paired.", + ) + ) + else: + checks.append( + DiagnosticCheck( + id="integrations_paired", + label="At least one browser is paired", + status="warn", + detail=( + "No paired browsers yet. Follow the 4-step setup on the " + "claude.ai settings page to add one." + ), + ) + ) + + # 4. no_cookie_expired + cookie_expired_count = counts_by_status.get("cookie_expired", 0) + if cookie_expired_count == 0: + checks.append( + DiagnosticCheck( + id="no_cookie_expired", + label="All paired browsers are signed in", + status="pass", + detail="No browsers need re-sign-in to claude.ai.", + ) + ) + else: + checks.append( + DiagnosticCheck( + id="no_cookie_expired", + label="All paired browsers are signed in", + status="fail", + detail=( + f"{cookie_expired_count} browser(s) need re-sign-in to " + "claude.ai. Sync will resume after sign-in." + ), + ) + ) + + # 5. no_stuck_in_progress + stuck_cutoff = now - _td(minutes=5) + stuck_count = int( + db.execute( + select(_func.count(ClaudeAISyncOperation.id)) + .where(ClaudeAISyncOperation.status == "in_progress") + .where(ClaudeAISyncOperation.started_at < stuck_cutoff) + ).scalar_one() + ) + if stuck_count == 0: + checks.append( + DiagnosticCheck( + id="no_stuck_in_progress", + label="No stuck sync operations", + status="pass", + detail="Every in-flight op has been picked up recently.", + ) + ) + else: + checks.append( + DiagnosticCheck( + id="no_stuck_in_progress", + label="No stuck sync operations", + status="warn", + detail=( + f"{stuck_count} op(s) have been in_progress for > 5 " + "minutes. The extension may have died mid-sync. They " + "auto-release on the next extension tick." + ), + ) + ) + + # 6. sync_recent (only meaningful when at least one integration is paired) + if active_or_expired > 0: + recent_cutoff = now - _td(hours=1) + recent_count = int( + db.execute( + select(_func.count(ClaudeAIIntegration.id)).where( + ClaudeAIIntegration.last_sync_at >= recent_cutoff + ) + ).scalar_one() + ) + if recent_count > 0: + checks.append( + DiagnosticCheck( + id="sync_recent", + label="Recent sync activity", + status="pass", + detail=( + f"{recent_count} integration(s) synced in the last hour." + ), + ) + ) + else: + checks.append( + DiagnosticCheck( + id="sync_recent", + label="Recent sync activity", + status="warn", + detail=( + "No syncs in the last hour. The extension runs once a " + "minute when claude.ai is open — check that you're " + "signed in there." + ), + ) + ) + + # 7. conflicts_low + conflict_count = int( + db.execute( + select(_func.count(ClaudeAISkillLink.id)).where( + ClaudeAISkillLink.conflict_state == "diverged" + ) + ).scalar_one() + ) + if conflict_count < 20: + checks.append( + DiagnosticCheck( + id="conflicts_low", + label="Conflicts manageable", + status="pass", + detail=( + f"{conflict_count} unresolved conflict(s) — within " + "the normal range." + ), + ) + ) + else: + checks.append( + DiagnosticCheck( + id="conflicts_low", + label="Conflicts manageable", + status="warn", + detail=( + f"{conflict_count} unresolved conflicts. Use the " + "Resolve all menu to apply a single policy." + ), + ) + ) + + # 8. pair_attempts_quiet — burst-detect potential brute force. + pair_cutoff = now - _td(hours=1) + pair_attempts_1h = int( + db.execute( + select(_func.count(ClaudeAIPairAttempt.id)).where( + ClaudeAIPairAttempt.created_at >= pair_cutoff + ) + ).scalar_one() + ) + if pair_attempts_1h < 30: + checks.append( + DiagnosticCheck( + id="pair_attempts_quiet", + label="No suspicious pair traffic", + status="pass", + detail=f"{pair_attempts_1h} pair attempt(s) in the last hour.", + ) + ) + else: + checks.append( + DiagnosticCheck( + id="pair_attempts_quiet", + label="No suspicious pair traffic", + status="warn", + detail=( + f"{pair_attempts_1h} pair attempts in the last hour. " + "Check your access logs if you didn't expect this." + ), + ) + ) + + # Overall verdict — fail dominates warn dominates pass. + statuses = {c.status for c in checks} + overall: str + if "fail" in statuses: + overall = "fail" + elif "warn" in statuses: + overall = "warn" + else: + overall = "pass" + + return DiagnosticResponse( + overall=overall, # type: ignore[arg-type] + checks=checks, + generated_at=now, + ) + + +@router.get("/health", response_model=HealthMetricsResponse) +def connector_health(db: Session = Depends(get_db)) -> HealthMetricsResponse: + """Connector subsystem health metrics. + + Backs both the SkillNote settings page's "Connector health" card and + any external monitoring hooked into this endpoint. + """ + from sqlalchemy import func as _func, text as _text + + active = db.execute( + select(_func.count()) + .select_from(ClaudeAIIntegration) + .where(ClaudeAIIntegration.status == "active") + ).scalar_one() + errors = db.execute( + select(_func.count()) + .select_from(ClaudeAIIntegration) + .where(ClaudeAIIntegration.status == "error") + ).scalar_one() + pending = db.execute( + select(_func.count()) + .select_from(ClaudeAISyncOperation) + .where(ClaudeAISyncOperation.status.in_(("pending", "in_progress"))) + ).scalar_one() + failed = db.execute( + select(_func.count()) + .select_from(ClaudeAISyncOperation) + .where(ClaudeAISyncOperation.status == "failed") + ).scalar_one() + diverged = db.execute( + select(_func.count()) + .select_from(ClaudeAISkillLink) + .where(ClaudeAISkillLink.conflict_state == "diverged") + ).scalar_one() + last_audit = db.execute( + select(_func.max(_text("created_at"))).select_from( + _text("claude_ai_audit_log") + ) + ).scalar() + head = db.execute(_text("SELECT version_num FROM alembic_version")).scalar() + + return HealthMetricsResponse( + integrations_active=int(active), + integrations_with_errors=int(errors), + pending_ops_total=int(pending), + failed_ops_total=int(failed), + diverged_links_total=int(diverged), + last_audit_at=last_audit, + schema_version=str(head) if head else "unknown", + ) + + +# ── Per-skill sync toggle (frontend) ────────────────────────────────────────── + + +from pydantic import BaseModel as _BaseModel # local import — small surface + + +class _SkillSyncToggleRequest(_BaseModel): + enabled: bool + + +@router.patch("/skills/{skill_id}/sync", status_code=204) +def toggle_skill_sync( + skill_id: UUID, + body: _SkillSyncToggleRequest, + db: Session = Depends(get_db), +) -> Response: + """Flip the per-skill claude.ai sync toggle. + + Used by the skill detail page to exclude specific skills (e.g. local + dev experiments, sensitive content) from the connector. Disabling a + skill that's already synced does NOT delete it from claude.ai — that + requires an explicit delete. Future uploads simply stop firing. + """ + skill = db.get(Skill, skill_id) + if skill is None: + raise api_error(404, "SKILL_NOT_FOUND", f"Skill {skill_id} not found") + skill.claude_ai_sync_enabled = body.enabled + db.commit() + return Response(status_code=204) + + +@router.post("/extension/telemetry", status_code=204) +def post_telemetry( + body: TelemetryEvent, + integ: ClaudeAIIntegration = Depends(require_extension), +) -> Response: + """Anonymous failure telemetry from the extension. + + Logged server-side; not persisted to the DB because most operators + don't want the extra storage churn. If you want to keep history, + add a `claude_ai_telemetry_events` table in a later migration. + + Inputs are length-capped and pattern-validated via the TelemetryEvent + schema so a malicious bearer can't dump a 10MB blob into the log + pipeline. + """ + _log.info( + "claude-ai extension telemetry: integration=%s category=%s ext_version=%s detail=%s", + integ.id, + body.category, + body.ext_version, + body.detail, + ) + return Response(status_code=204) + + +@router.post( + "/extension/imported-skill", + response_model=ImportedSkillResponse, + status_code=201, +) +def import_skill_from_claude_ai( + claude_ai_skill_id: str = Form(..., max_length=128), + name: str = Form(..., max_length=64), + description: str = Form(..., max_length=1024), + claude_ai_version: Optional[str] = Form(default=None, max_length=64), + bundle: UploadFile = File(...), + integ: ClaudeAIIntegration = Depends(require_extension), + db: Session = Depends(get_db), +) -> ImportedSkillResponse: + """Inbound: extension pushes a claude.ai-authored skill. + + Full bundle ingestion: parses SKILL.md, creates or updates the + SkillNote skill, adds a fresh SkillContentVersion, and upserts the + integration link. Conflict-aware: if both sides changed since the + last sync, the link is marked diverged and surfaced in the UI. + """ + # 1. Validate the ZIP shape + extract metadata via the existing + # bundle_validator. This runs the same security checks (symlinks, + # path traversal, size caps, frontmatter validity) that the manual + # upload path uses, so claude.ai-authored skills can't smuggle + # anything past the validators that local uploads can't. + import tempfile + from app.validators.bundle_validator import ( + FRONTMATTER_RE, + slugify, + validate_zip_and_extract_metadata, + ) + + raw = bundle.file.read() + if not raw: + raise api_error(422, "EMPTY_BUNDLE", "Bundle upload is empty") + + try: + with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp: + tmp.write(raw) + tmp_path = tmp.name + try: + parsed_name, parsed_slug, parsed_description = ( + validate_zip_and_extract_metadata(tmp_path) + ) + except ValueError as e: + raise api_error(422, "INVALID_BUNDLE", str(e)) + finally: + import os as _os + try: + _os.unlink(tmp_path) + except OSError: + pass + except zipfile.BadZipFile: + raise api_error(422, "INVALID_ZIP", "Uploaded file is not a valid ZIP archive") + + # If the form metadata disagrees with the bundle's SKILL.md, the + # bundle wins (it's the source of truth). Log a warning so admins + # can investigate extension bugs that drift the two. + if parsed_name != name: + _log.warning( + "claude-ai import: form name %r != bundle name %r; using bundle", + name, parsed_name, + ) + + # 2. Extract content_md (the body of SKILL.md after frontmatter) so + # the SkillContentVersion captures the actual skill instructions. + content_md = "" + try: + with zipfile.ZipFile(io.BytesIO(raw)) as zf: + skill_path = next( + (n for n in zf.namelist() if n.endswith("SKILL.md")), None + ) + if skill_path: + raw_content = zf.read(skill_path).decode("utf-8", errors="ignore") + m = FRONTMATTER_RE.match(raw_content) + content_md = raw_content[m.end():] if m else raw_content + except Exception: # noqa: BLE001 + # Content extraction is best-effort; the validator already + # confirmed SKILL.md exists. + pass + + # 3. Upsert the SkillNote skill. Match on slug first (canonical + # identifier) — if found, link to it; otherwise create. + from datetime import datetime, timezone + import uuid as _uuid + from app.db.models import Skill, SkillContentVersion + + now = datetime.now(timezone.utc) + skill = db.execute( + select(Skill).where(Skill.slug == parsed_slug) + ).scalar_one_or_none() + created_new_skill = False + + if skill is None: + skill = Skill( + id=_uuid.uuid4(), + name=parsed_name, + slug=parsed_slug, + description=parsed_description, + content_md=content_md, + collections=[], + current_version=0, + # Imported from claude.ai — flag so the frontend can show provenance. + claude_ai_sync_enabled=True, + ) + db.add(skill) + db.flush() + created_new_skill = True + + # Capture the local "latest version BEFORE this import" so we can + # distinguish "local was changed by user between syncs" from "local + # change was caused by this import itself." Used by the conflict + # detector below. + pre_import_latest_id = db.execute( + select(SkillContentVersion.id) + .where(SkillContentVersion.skill_id == skill.id) + .where(SkillContentVersion.is_latest.is_(True)) + ).scalar_one_or_none() + + if not created_new_skill: + # Refresh in-place. Bumps current_version via _create_content_version below. + skill.name = parsed_name + skill.description = parsed_description + skill.content_md = content_md + + # 4. Create a SkillContentVersion snapshot. Cannot call + # skills.py's _create_content_version helper directly because it + # would re-trigger enqueue_skill_upload — instead we mark the + # incoming content as the new latest manually and avoid the + # outbound-op echo (the data already lives on claude.ai). + next_ver = (skill.current_version or 0) + 1 + db.execute( + SkillContentVersion.__table__.update() + .where(SkillContentVersion.skill_id == skill.id) + .where(SkillContentVersion.is_latest.is_(True)) + .values(is_latest=False) + ) + new_version = SkillContentVersion( + id=_uuid.uuid4(), + skill_id=skill.id, + version=next_ver, + title=skill.name, + description=skill.description, + content_md=skill.content_md or "", + collections=skill.collections or [], + is_latest=True, + ) + db.add(new_version) + skill.current_version = next_ver + # Flush so the link's FK to skill_content_versions.id resolves. + db.flush() + + # 5. Upsert the link + run conflict detection. + from app.services.claude_ai_sync import detect_link_divergence + + existing_link = db.execute( + select(ClaudeAISkillLink).where( + ClaudeAISkillLink.integration_id == integ.id, + ClaudeAISkillLink.claude_ai_skill_id == claude_ai_skill_id, + ) + ).scalar_one_or_none() + + if existing_link is None: + link = ClaudeAISkillLink( + integration_id=integ.id, + skillnote_skill_id=skill.id, + skillnote_version_id=new_version.id, + claude_ai_skill_id=claude_ai_skill_id, + claude_ai_version=claude_ai_version, + last_seen_at=now, + direction="inbound", + ) + db.add(link) + else: + # Conflict check BEFORE we update the link's recorded version. + # Use the PRE-IMPORT local latest (captured above) — not the + # version we're about to create — so we don't false-positive on + # the import itself being a local change. + detect_link_divergence( + db, + link=existing_link, + incoming_claude_ai_version=claude_ai_version, + skillnote_version_id=pre_import_latest_id, + ) + existing_link.skillnote_skill_id = skill.id + existing_link.skillnote_version_id = new_version.id + existing_link.claude_ai_version = claude_ai_version + existing_link.last_seen_at = now + existing_link.direction = "both" + + from app.services.claude_ai_sync import write_audit + write_audit( + db, + event="skill_imported", + integration_id=integ.id, + skill_id=skill.id, + detail={"claude_ai_skill_id": claude_ai_skill_id, "new_skill": created_new_skill}, + ) + db.commit() + return ImportedSkillResponse(skillnote_skill_id=skill.id, created=created_new_skill) + + +# ── Maintenance endpoint (admin-triggered + periodic) ───────────────────────── + + +@router.post("/admin/cleanup-expired-pairings", status_code=200) +def cleanup_expired_pairings(db: Session = Depends(get_db)) -> dict: + """Periodic cleanup of pending_approval rows past their expiry. + + Safe to call from a cron job (e.g. every 5 minutes). Returns the + number of rows expired so monitoring can graph the rate. + """ + from app.services.claude_ai_sync import expire_stale_pairings + expired = expire_stale_pairings(db) + db.commit() + return {"expired": expired} diff --git a/backend/app/api/setup.py b/backend/app/api/setup.py index 08ca00e0..4184cd15 100644 --- a/backend/app/api/setup.py +++ b/backend/app/api/setup.py @@ -18,8 +18,8 @@ # Agents the Connect page understands. Keep the canonical names in sync # with the frontend's `AgentId` union and with the install scripts below. -SUPPORTED_AGENTS = ("claude-code", "openclaw") -AgentLiteral = Literal["claude-code", "openclaw"] +SUPPORTED_AGENTS = ("claude-code", "openclaw", "claude-ai") +AgentLiteral = Literal["claude-code", "openclaw", "claude-ai"] # Buckets for the per-agent state machine on the Connect page. ACTIVE_WINDOW_HOURS = 24 @@ -548,6 +548,80 @@ def get_openclaw_setup_script(request: Request): return PlainTextResponse(script, media_type="text/plain") +_CLAUDE_AI_SETUP_SCRIPT = r'''#!/bin/bash +set -euo pipefail + +API_URL="__API_URL__" +WEB_URL="__WEB_URL__" + +echo "" +echo " S K I L L N O T E -> C L A U D E . A I" +echo "" + +# Claude.ai's sync is a one-time browser-extension install — there's nothing +# to install on this machine itself. Print step-by-step instructions and ping +# the backend so the Connect page knows the user kicked off the flow. + +cat < Extension options). + + 3. Paste this SkillNote URL into the extension: + $API_URL + + 4. Click Connect. A SkillNote tab will open with a pairing code. + + 5. Approve the code on the SkillNote page. + (URL: $WEB_URL/settings/integrations/claude-ai) + + 6. Sign in to claude.ai if you aren't already. That's it — sync runs + automatically every minute while you're logged into claude.ai. + + Status: + $WEB_URL/settings/integrations/claude-ai +EOF + +# ── ping backend so the Connect page tracks the user kicked off this flow ─── +MACHINE_HASH=$(printf '%s' "${HOSTNAME:-host}-${USER:-user}" \ + | shasum -a 256 2>/dev/null \ + | awk '{print $1}' \ + || echo "") +curl -sf --max-time 5 --retry 2 --retry-delay 1 \ + -X POST "$API_URL/v1/setup/installs" \ + -H "Content-Type: application/json" \ + -d "{\"agent\":\"claude-ai\",\"machine_id_hash\":\"$MACHINE_HASH\"}" \ + >/dev/null 2>&1 || true + +echo "" +''' + + +@router.get("/setup/claude-ai") +def get_claude_ai_setup_script(request: Request): + """Return the claude.ai 'install' script — really a tutorial that points + users at the browser extension.""" + urls = _derive_urls(request) + script = (_CLAUDE_AI_SETUP_SCRIPT + .replace("__API_URL__", urls["api"]) + .replace("__WEB_URL__", urls["web"])) + return PlainTextResponse(script, media_type="text/plain") + + # Unified entry point: parses --agent from $@ and delegates to the # right per-agent installer. Keeps each installer's logic isolated (they # touch different home dirs, ship different bundles) while giving users one @@ -618,10 +692,14 @@ def get_openclaw_setup_script(request: Request): TARGET_PATH="/setup/openclaw" AGENT_LABEL="OpenClaw" ;; + claude-ai|claude_ai|claudeai|ca) + TARGET_PATH="/setup/claude-ai" + AGENT_LABEL="claude.ai (browser)" + ;; *) echo "Error: unknown agent '$AGENT'." echo "" - echo "Supported agents: claude-code, openclaw" + echo "Supported agents: claude-code, openclaw, claude-ai" exit 2 ;; esac diff --git a/backend/app/api/skills.py b/backend/app/api/skills.py index c1ba994f..79b81902 100644 --- a/backend/app/api/skills.py +++ b/backend/app/api/skills.py @@ -91,7 +91,12 @@ def _origin_for_skill(db: Session, skill: Skill) -> Optional[SkillOrigin]: def _create_content_version(db: Session, skill: Skill) -> SkillContentVersion: - """Snapshot current skill state as a new content version.""" + """Snapshot current skill state as a new content version. + + Side effect: enqueues claude.ai sync ops for every active integration. + Coalesces against any already-pending upload op for the same skill so + rapid republishes don't pile up the queue. + """ next_ver = (skill.current_version or 0) + 1 # Clear is_latest on all existing versions for this skill @@ -111,8 +116,33 @@ def _create_content_version(db: Session, skill: Skill) -> SkillContentVersion: is_latest=True, ) db.add(cv) + # Flush so cv.id is available for the sync op payload, but don't commit + # yet — the caller owns the transaction boundary. + db.flush() skill.current_version = next_ver + + # Claude.ai connector hook — fan out an upload op per active integration. + # Imported locally so this module doesn't take a top-level dependency on + # the connector subsystem when it's not configured. + # Skipped for skills with claude_ai_sync_enabled=False (per-skill opt-out). + if getattr(skill, "claude_ai_sync_enabled", True): + try: + from app.services.claude_ai_sync import enqueue_skill_upload + enqueue_skill_upload( + db, + skill_id=skill.id, + version_id=cv.id, + name=skill.name, + description=skill.description, + ) + except Exception: # noqa: BLE001 + # Sync-op enqueue must never block a skill publish. Log and continue. + import logging + logging.getLogger("skillnote.claude_ai").exception( + "Failed to enqueue claude.ai sync op for skill %s; skill saved", skill.id + ) + return cv @@ -266,6 +296,7 @@ def set_latest_version( total_versions=_skill_total_versions(db, skill_row.id), extra_frontmatter=skill_row.extra_frontmatter, origin=_origin_for_skill(db, skill_row), + claude_ai_sync_enabled=skill_row.claude_ai_sync_enabled, created_at=skill_row.created_at, updated_at=skill_row.updated_at, ) @@ -318,6 +349,7 @@ def restore_version( total_versions=_skill_total_versions(db, skill_row.id), extra_frontmatter=skill_row.extra_frontmatter, origin=_origin_for_skill(db, skill_row), + claude_ai_sync_enabled=skill_row.claude_ai_sync_enabled, created_at=skill_row.created_at, updated_at=skill_row.updated_at, ) @@ -340,6 +372,7 @@ def get_skill( total_versions=_skill_total_versions(db, skill_row.id), extra_frontmatter=skill_row.extra_frontmatter, origin=_origin_for_skill(db, skill_row), + claude_ai_sync_enabled=skill_row.claude_ai_sync_enabled, created_at=skill_row.created_at, updated_at=skill_row.updated_at, ) @@ -418,6 +451,7 @@ def create_skill( current_version=skill.current_version or 0, total_versions=_skill_total_versions(db, skill.id), extra_frontmatter=skill.extra_frontmatter, + claude_ai_sync_enabled=skill.claude_ai_sync_enabled, created_at=skill.created_at, updated_at=skill.updated_at, ) @@ -517,6 +551,7 @@ def update_skill( total_versions=_skill_total_versions(db, skill_row.id), extra_frontmatter=skill_row.extra_frontmatter, origin=_origin_for_skill(db, skill_row), + claude_ai_sync_enabled=skill_row.claude_ai_sync_enabled, created_at=skill_row.created_at, updated_at=skill_row.updated_at, ) @@ -528,6 +563,20 @@ def delete_skill( db: Session = Depends(get_db), ): skill_row = _get_skill(skill_slug, db) + # Claude.ai connector hook — fan out a delete op for every integration + # that has this skill linked. Must run BEFORE db.delete: the link rows + # are about to cascade and we need their claude_ai_skill_ids to build + # the op payload. + try: + from app.services.claude_ai_sync import enqueue_skill_delete + enqueue_skill_delete(db, skill_id=skill_row.id) + except Exception: # noqa: BLE001 + import logging + logging.getLogger("skillnote.claude_ai").exception( + "Failed to enqueue claude.ai delete op for skill %s; deleting anyway", + skill_row.id, + ) + db.delete(skill_row) # Notify MCP server of tool-list change (delivered on commit) db.execute(text("SELECT pg_notify('skillnote_skills_changed', 'deleted')")) diff --git a/backend/app/db/models/__init__.py b/backend/app/db/models/__init__.py index 9eb20cf3..4cb86765 100644 --- a/backend/app/db/models/__init__.py +++ b/backend/app/db/models/__init__.py @@ -1,5 +1,14 @@ from app.db.models.agent_install import AgentInstall from app.db.models.analytics_event import AnalyticsEvent +from app.db.models.claude_ai import ( + ClaudeAIIntegration, + ClaudeAISkillLink, + ClaudeAISyncOperation, +) +from app.db.models.claude_ai_polish import ( + ClaudeAIAuditLog, + ClaudeAIPairAttempt, +) from app.db.models.collection import Collection from app.db.models.comment import Comment from app.db.models.import_source import ( @@ -24,6 +33,11 @@ "AnalyticsEvent", "SkillRating", "Collection", + "ClaudeAIIntegration", + "ClaudeAISkillLink", + "ClaudeAISyncOperation", + "ClaudeAIAuditLog", + "ClaudeAIPairAttempt", "ImportSource", "SOURCE_TYPES", "IMPORT_KINDS", diff --git a/backend/app/db/models/claude_ai.py b/backend/app/db/models/claude_ai.py new file mode 100644 index 00000000..ab1b0a63 --- /dev/null +++ b/backend/app/db/models/claude_ai.py @@ -0,0 +1,213 @@ +"""Claude.ai connector models. + +Three closely-related tables cluster the integration state: + + * ClaudeAIIntegration — one row per paired browser/extension; stores + hashed tokens + sync scope + status. + * ClaudeAISkillLink — mapping SkillNote skill <-> claude.ai skill, + including conflict state. + * ClaudeAISyncOperation — append-only work queue the extension drains. + +See docs/claude-ai-integration.md for the design rationale. + +Token storage: extension_token and pairing_token are kept as **sha256 hex +hashes**, never the raw values. The raw token only exists on the wire +(returned to the extension once at issuance) and inside the extension's +local `chrome.storage.local`. A DB dump cannot replay sessions. +""" + +import uuid +from datetime import datetime +from typing import Optional + +from sqlalchemy import ( + DateTime, + ForeignKey, + Integer, + Text, + UniqueConstraint, + Index, + func, +) +from sqlalchemy.dialects.postgresql import JSONB, UUID +from sqlalchemy.orm import Mapped, mapped_column + +from app.db.base import Base + + +class ClaudeAIIntegration(Base): + __tablename__ = "claude_ai_integrations" + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 + ) + # Nullable until ACL lands; per memory `[CLAUDE.md drift]` and the no-auth + # note in the project README, skillnote currently has no user model. + user_id: Mapped[Optional[uuid.UUID]] = mapped_column( + UUID(as_uuid=True), nullable=True + ) + + # `pending_approval` | `active` | `cookie_expired` | `disconnected` | `error` + # Kept as Text in the model (matching the PG enum on the wire) — keeps the + # Python type simple. App layer constrains the allowed values via Pydantic. + status: Mapped[str] = mapped_column(Text, nullable=False) + scope: Mapped[str] = mapped_column(Text, nullable=False, default="both") + + claude_ai_org_id: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + browser_label: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + + # Pairing handshake — populated while status='pending_approval', nulled + # after redemption. The raw pairing_code is shown to the user; the raw + # pairing_token is held by the extension. The DB only sees the hashes. + pairing_code: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + pairing_token_hash: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + pairing_expires_at: Mapped[Optional[datetime]] = mapped_column( + DateTime(timezone=True), nullable=True + ) + # Set by /pair/approve; consumed and cleared by the extension's next + # /pair/status poll (which atomically issues the extension token and + # flips status to 'active'). Lets the Device Code Flow stay cleanly + # stateless without stashing raw tokens in any column. + pairing_approved_at: Mapped[Optional[datetime]] = mapped_column( + DateTime(timezone=True), nullable=True + ) + + # Long-lived bearer token (hashed) the extension sends on every API call + # after the pairing is approved. Compared via constant-time hash equality + # in the auth dependency. + extension_token_hash: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + + last_sync_at: Mapped[Optional[datetime]] = mapped_column( + DateTime(timezone=True), nullable=True + ) + last_error: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + # `ask` (default) | `skillnote_wins` | `claude_ai_wins` + conflict_policy: Mapped[str] = mapped_column(Text, nullable=False, default="ask") + + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), nullable=False + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + server_default=func.now(), + onupdate=func.now(), + nullable=False, + ) + + __table_args__ = ( + Index( + "ix_claude_ai_integrations_user_id_status", + "user_id", + "status", + ), + ) + + +class ClaudeAISkillLink(Base): + __tablename__ = "claude_ai_skill_links" + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 + ) + integration_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("claude_ai_integrations.id", ondelete="CASCADE"), + nullable=False, + ) + # Nullable so a claude.ai-authored skill can exist as a link record before + # the inbound import op materializes the SkillNote skill row. + skillnote_skill_id: Mapped[Optional[uuid.UUID]] = mapped_column( + UUID(as_uuid=True), + ForeignKey("skills.id", ondelete="CASCADE"), + nullable=True, + ) + # SET NULL on version-row delete: pruning stale versions shouldn't break + # the link; the next outbound op repopulates from the current latest. + skillnote_version_id: Mapped[Optional[uuid.UUID]] = mapped_column( + UUID(as_uuid=True), + ForeignKey("skill_content_versions.id", ondelete="SET NULL"), + nullable=True, + ) + claude_ai_skill_id: Mapped[str] = mapped_column(Text, nullable=False) + claude_ai_version: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + last_seen_at: Mapped[Optional[datetime]] = mapped_column( + DateTime(timezone=True), nullable=True + ) + # `outbound` | `inbound` | `both` + direction: Mapped[str] = mapped_column(Text, nullable=False, default="both") + # `none` | `diverged` | `resolved` + conflict_state: Mapped[str] = mapped_column(Text, nullable=False, default="none") + + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), nullable=False + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + server_default=func.now(), + onupdate=func.now(), + nullable=False, + ) + + __table_args__ = ( + UniqueConstraint( + "integration_id", + "claude_ai_skill_id", + name="uq_claude_ai_skill_links_integration_claude_skill", + ), + Index( + "ix_claude_ai_skill_links_skillnote_skill_id", + "skillnote_skill_id", + ), + Index( + "ix_claude_ai_skill_links_integration_id_conflict", + "integration_id", + "conflict_state", + ), + ) + + +class ClaudeAISyncOperation(Base): + __tablename__ = "claude_ai_sync_operations" + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 + ) + integration_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("claude_ai_integrations.id", ondelete="CASCADE"), + nullable=False, + ) + # `upload` | `update` | `delete` | `list` | `fetch_one` + kind: Mapped[str] = mapped_column(Text, nullable=False) + # Nullable for `list` ops (which don't target a single skill). + skill_id: Mapped[Optional[uuid.UUID]] = mapped_column( + UUID(as_uuid=True), + ForeignKey("skills.id", ondelete="CASCADE"), + nullable=True, + ) + # Op-specific payload. For upload: { "version_id": "...", "name": "...", + # "description": "...", "zip_url": "..." }. For delete: { "claude_ai_skill_id": "..." }. + payload: Mapped[dict] = mapped_column(JSONB, nullable=False, default=dict) + # `pending` | `in_progress` | `completed` | `failed` + status: Mapped[str] = mapped_column(Text, nullable=False, default="pending") + attempts: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + last_error: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), nullable=False + ) + started_at: Mapped[Optional[datetime]] = mapped_column( + DateTime(timezone=True), nullable=True + ) + completed_at: Mapped[Optional[datetime]] = mapped_column( + DateTime(timezone=True), nullable=True + ) + + __table_args__ = ( + Index( + "ix_claude_ai_sync_operations_integration_status_created", + "integration_id", + "status", + "created_at", + ), + ) diff --git a/backend/app/db/models/claude_ai_polish.py b/backend/app/db/models/claude_ai_polish.py new file mode 100644 index 00000000..10f4b372 --- /dev/null +++ b/backend/app/db/models/claude_ai_polish.py @@ -0,0 +1,85 @@ +"""Polish-layer models for the claude.ai connector. + +Lives alongside the core models in claude_ai.py. Split because: + * The polish layer adds tables for observability (audit log) and security + (rate-limit attempts) that don't belong in the core domain. + * Keeps claude_ai.py focused on the integration/link/op data model. +""" +import uuid +from datetime import datetime +from typing import Optional + +from sqlalchemy import DateTime, ForeignKey, Index, Text, func +from sqlalchemy.dialects.postgresql import INET, JSONB, UUID +from sqlalchemy.orm import Mapped, mapped_column + +from app.db.base import Base + + +class ClaudeAIAuditLog(Base): + """Append-only event feed for the claude.ai connector. + + Captures every load-bearing transition (pair attempted, approved, + skills pushed, conflicts detected) with optional skill + integration + references and a JSONB detail blob for event-specific data. + """ + + __tablename__ = "claude_ai_audit_log" + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 + ) + integration_id: Mapped[Optional[uuid.UUID]] = mapped_column( + UUID(as_uuid=True), + ForeignKey("claude_ai_integrations.id", ondelete="CASCADE"), + nullable=True, + ) + event: Mapped[str] = mapped_column(Text, nullable=False) + skill_id: Mapped[Optional[uuid.UUID]] = mapped_column( + UUID(as_uuid=True), + ForeignKey("skills.id", ondelete="SET NULL"), + nullable=True, + ) + detail: Mapped[dict] = mapped_column(JSONB, nullable=False, default=dict) + source_ip: Mapped[Optional[str]] = mapped_column(INET, nullable=True) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), nullable=False + ) + + __table_args__ = ( + Index( + "ix_claude_ai_audit_log_integration_created", + "integration_id", + "created_at", + ), + Index("ix_claude_ai_audit_log_created_at", "created_at"), + ) + + +class ClaudeAIPairAttempt(Base): + """Records pair endpoint hits for rate-limit enforcement. + + Pruning policy: keep ~24h worth — older rows can be dropped by a + periodic cleanup task. For Phase 1 polish we leave them; if churn + becomes a concern, an `archive_old_pair_attempts` cron is easy to add. + """ + + __tablename__ = "claude_ai_pair_attempts" + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 + ) + source_ip: Mapped[Optional[str]] = mapped_column(INET, nullable=True) + endpoint: Mapped[str] = mapped_column(Text, nullable=False) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), nullable=False + ) + + __table_args__ = ( + Index( + "ix_claude_ai_pair_attempts_ip_created", + "source_ip", + "created_at", + ), + Index("ix_claude_ai_pair_attempts_created_at", "created_at"), + ) diff --git a/backend/app/db/models/skill.py b/backend/app/db/models/skill.py index c7ce20e8..e9c3dfbb 100644 --- a/backend/app/db/models/skill.py +++ b/backend/app/db/models/skill.py @@ -31,6 +31,13 @@ class Skill(Base): forked_from_source: Mapped[bool] = mapped_column( Boolean, nullable=False, default=False, server_default=sa_false() ) + # Per-skill opt-in for the claude.ai connector. Defaults to True so + # existing skills sync once the connector is paired; flip to False to + # exclude a skill (e.g. local dev experiments) from sync. The UI + # surfaces this toggle on the skill detail page. + claude_ai_sync_enabled: Mapped[bool] = mapped_column( + Boolean, nullable=False, default=True + ) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now(), nullable=False) updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False) diff --git a/backend/app/main.py b/backend/app/main.py index 59197b15..1992251f 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,3 +1,4 @@ +import asyncio import logging from fastapi import FastAPI, HTTPException, Request, Depends @@ -40,6 +41,7 @@ from app.api.marketplace import router as marketplace_router from app.api.openclaw import router as openclaw_router, skill_router as openclaw_skill_router from app.api.cli import router as cli_router +from app.api.claude_ai import router as claude_ai_router app = FastAPI(title="SkillNote Backend", version="0.1.0") @@ -141,6 +143,61 @@ async def generic_exception_handler(_: Request, exc: Exception): app.include_router(openclaw_router) app.include_router(openclaw_skill_router) app.include_router(cli_router) +app.include_router(claude_ai_router) + + +# ── Periodic cleanup: claude.ai pending-pairing expiry ───────────────────── +# Sweep stale pending_approval integrations every 5 minutes. Cheap query +# (indexed on pairing_expires_at) so this doesn't add measurable load. +# +# Kept inside main.py rather than as a separate worker because: (1) the +# operation is idempotent and stateless, (2) the API process is the only +# long-lived backend process today (no celery / no rq), (3) running it +# alongside the API means any deploy automatically picks up the schedule +# without ops coordination. + +_CLEANUP_INTERVAL_SECONDS = 300 # 5 minutes + + +async def _claude_ai_cleanup_loop() -> None: + """Background loop that periodically expires stale pending pairings.""" + from app.db.session import SessionLocal + from app.services.claude_ai_sync import expire_stale_pairings + + log = logging.getLogger("skillnote.claude_ai.cleanup") + while True: + try: + await asyncio.sleep(_CLEANUP_INTERVAL_SECONDS) + with SessionLocal() as db: + expired = expire_stale_pairings(db) + db.commit() + if expired > 0: + log.info("expired %d stale pending pairing(s)", expired) + except asyncio.CancelledError: + log.info("cleanup loop cancelled") + return + except Exception: # noqa: BLE001 + log.exception("cleanup loop error; continuing") + + +@app.on_event("startup") +async def _start_cleanup_loop() -> None: + """Launch the claude.ai cleanup loop alongside the API. + + Stored on app.state so the shutdown handler can cancel it. + """ + app.state.claude_ai_cleanup_task = asyncio.create_task(_claude_ai_cleanup_loop()) + + +@app.on_event("shutdown") +async def _stop_cleanup_loop() -> None: + task = getattr(app.state, "claude_ai_cleanup_task", None) + if task and not task.done(): + task.cancel() + try: + await task + except (asyncio.CancelledError, Exception): # noqa: BLE001 + pass @app.get("/health") diff --git a/backend/app/schemas/claude_ai.py b/backend/app/schemas/claude_ai.py new file mode 100644 index 00000000..56088918 --- /dev/null +++ b/backend/app/schemas/claude_ai.py @@ -0,0 +1,400 @@ +"""Pydantic schemas for the claude.ai connector API. + +Strict literal types for every enum-like field so a typo at the call site is +a 422 instead of a silent bad-state row in the database. +""" + +from datetime import datetime +from typing import Any, Literal, Optional +from uuid import UUID + +from pydantic import BaseModel, ConfigDict, Field + +# ── Pairing flow ────────────────────────────────────────────────────────────── + + +class PairingStartRequest(BaseModel): + """Submitted by the extension to begin a pairing handshake. + + `browser_label` is shown in the SkillNote connected-browsers list so the + user can recognize each pairing (e.g. "Chrome on MacBook Pro"). Optional + because the extension may not always be able to derive it. + """ + + browser_label: Optional[str] = Field(default=None, max_length=128) + + +class PairingStartResponse(BaseModel): + """Returned to the extension after `POST /pair`. + + - `pairing_code` is short and human-friendly (e.g. "7K2J9P") for the user + to read off the extension and confirm in SkillNote's UI. + - `pairing_token` is the long opaque token the extension polls with. + Returned exactly once; never queryable again. + - `redemption_url` points at the SkillNote page where the user approves. + """ + + integration_id: UUID + pairing_code: str + pairing_token: str + redemption_url: str + expires_at: datetime + + +class PairingApproveRequest(BaseModel): + """User-side approval of a pending pairing. + + The SkillNote frontend POSTs this when the user clicks Approve on the + pairing-code prompt page. + """ + + pairing_code: str = Field(..., min_length=4, max_length=16) + + +class PairingStatusResponse(BaseModel): + """Returned to the extension while polling pairing status. + + When approved=True, `extension_token` is included exactly once. The + extension stores it and never sees it again from the server. + """ + + approved: bool + extension_token: Optional[str] = None + + +# ── Integration management ──────────────────────────────────────────────────── + + +class IntegrationStatusResponse(BaseModel): + """Status panel data for one paired browser. + + Read by the SkillNote settings page and the extension popup. + """ + + id: UUID + browser_label: Optional[str] + status: Literal[ + "pending_approval", + "active", + "cookie_expired", + "disconnected", + "error", + ] + scope: Literal["personal", "organization", "both"] + claude_ai_org_id: Optional[str] + last_sync_at: Optional[datetime] + last_error: Optional[str] + conflict_policy: Literal["ask", "skillnote_wins", "claude_ai_wins"] + pending_op_count: int + failed_op_count: int + linked_skill_count: int + + model_config = ConfigDict(from_attributes=True) + + +class IntegrationPatchRequest(BaseModel): + """Subset of integration fields the user can update in-place.""" + + scope: Optional[Literal["personal", "organization", "both"]] = None + conflict_policy: Optional[ + Literal["ask", "skillnote_wins", "claude_ai_wins"] + ] = None + browser_label: Optional[str] = Field(default=None, max_length=128) + + +# ── Sync operations queue (extension <-> backend) ───────────────────────────── + + +class SyncOperationOut(BaseModel): + """One pending operation handed to the extension to execute.""" + + id: UUID + kind: Literal["upload", "update", "delete", "list", "fetch_one"] + skill_id: Optional[UUID] + payload: dict[str, Any] + attempts: int + created_at: datetime + + model_config = ConfigDict(from_attributes=True) + + +class SyncOperationCompleteRequest(BaseModel): + """Extension reports the outcome of an operation. + + `result` is op-kind-specific: + - upload/update: { "claude_ai_skill_id": "...", "claude_ai_version": "..." } + - delete: { } + - list: { "imported_count": N } + - fetch_one: { "claude_ai_skill_id": "...", "version": "..." } + """ + + success: bool + result: Optional[dict[str, Any]] = None + error: Optional[str] = Field(default=None, max_length=2000) + claude_ai_org_id: Optional[str] = Field(default=None, max_length=128) + # When true, the extension is telling us claude.ai rejected the request + # because the user's session expired (cookies are gone or 401-ed). The + # backend uses this to flip integration.status -> cookie_expired AND + # write a `cookie_expired` audit event so the user sees an explanation + # in the activity feed instead of just a generic op failure. + auth_expired: bool = False + + +# ── Reverse sync (claude.ai-authored skill imports) ─────────────────────────── + + +class ImportedSkillRequest(BaseModel): + """Extension posts a claude.ai-authored skill for SkillNote to ingest. + + The actual ZIP is sent as multipart on the same request — this model + covers the JSON metadata field. See the API handler for the multipart + contract. + """ + + claude_ai_skill_id: str = Field(..., max_length=128) + claude_ai_version: Optional[str] = Field(default=None, max_length=64) + name: str = Field(..., max_length=64) + description: str = Field(..., max_length=1024) + + +class ImportedSkillResponse(BaseModel): + """Result of an inbound import.""" + + skillnote_skill_id: UUID + created: bool # True if a new SkillNote skill was created; False if matched existing + + +class KnownSkillIdsResponse(BaseModel): + """Extension fetches the set of claude.ai skill IDs SkillNote already + knows about for this integration. Used to skip re-importing on every + reverse-sync poll. + """ + + claude_ai_skill_ids: list[str] + + +class ExtensionSelfStatusResponse(BaseModel): + """Compact snapshot of this integration the extension can show in its + popup (skills synced, queue depth) without needing UI-auth routes. + Authenticated by the extension's bearer token, so it only ever + reveals this integration's counters.""" + + integration_id: UUID + browser_label: str + status: Literal[ + "pending_approval", "active", "cookie_expired", "disconnected", "error" + ] + linked_skill_count: int + pending_op_count: int + failed_op_count: int + last_sync_at: Optional[datetime] = None + last_error: Optional[str] = None + + +# ── Conflict resolution (frontend → backend) ────────────────────────────────── + + +class ConflictListItem(BaseModel): + """One conflict row for the SkillNote conflict-resolution UI.""" + + link_id: UUID + integration_id: UUID + integration_label: Optional[str] + skillnote_skill_id: Optional[UUID] + skillnote_skill_slug: Optional[str] + skillnote_skill_name: Optional[str] + claude_ai_skill_id: str + claude_ai_version: Optional[str] + last_seen_at: Optional[datetime] + + +class ConflictResolveRequest(BaseModel): + """User picks a winner; backend enqueues the corresponding sync op.""" + + resolution: Literal["keep_skillnote", "keep_claude_ai", "skip"] + + +# ── Audit log / activity feed ───────────────────────────────────────────────── + + +class AuditEventOut(BaseModel): + """One row in the activity feed.""" + + id: UUID + integration_id: Optional[UUID] + event: str + skill_id: Optional[UUID] + detail: dict[str, Any] + created_at: datetime + + model_config = ConfigDict(from_attributes=True) + + +class TelemetryEvent(BaseModel): + """Anonymous failure-event payload from the extension. + + Captured at the boundary so the backend never blindly logs arbitrary + JSON from a bearer-authed client. Length-capped fields prevent the + extension from filling the logs with a 10MB error blob. + """ + + category: str = Field(..., max_length=64, pattern=r"^[a-zA-Z0-9_]+$") + ext_version: str = Field(..., max_length=32) + ts: Optional[datetime] = None + detail: Optional[dict[str, Any]] = None + + +class HealthMetricsResponse(BaseModel): + """Operator-facing health metrics for the connector subsystem. + + Surfaced both at the standard /health endpoint extension and on the + Settings page's "Connector health" card. + """ + + integrations_active: int + integrations_with_errors: int + pending_ops_total: int + failed_ops_total: int + diverged_links_total: int + last_audit_at: Optional[datetime] + schema_version: str # alembic head; lets ops detect drift + + +# ── Sync queue (live what's-in-flight visibility) ──────────────────────────── + + +class SyncQueueItem(BaseModel): + """One row in the live sync queue view. Joins to skill + integration + so the UI can render a meaningful row without N+1 fetches.""" + + id: UUID + kind: Literal["upload", "update", "delete", "list", "fetch_one"] + status: Literal["pending", "in_progress"] + attempts: int + last_error: Optional[str] + created_at: datetime + started_at: Optional[datetime] = None + # Joined fields — populated by the API handler. + integration_id: UUID + integration_label: Optional[str] = None + skill_id: Optional[UUID] = None + skill_slug: Optional[str] = None + skill_name: Optional[str] = None + + +class SyncQueueResponse(BaseModel): + """Snapshot of the active sync queue. + + `items` is sorted by created_at asc (oldest = next-up). `total` is + the unbounded count so the UI can render "showing 50 of 217" when + truncated. `oldest_age_seconds` lets the UI flag stale backlogs. + """ + + items: list[SyncQueueItem] + total: int + pending_count: int + in_progress_count: int + oldest_age_seconds: Optional[float] = None + + +# ── Analytics (iter 18) ────────────────────────────────────────────────────── + + +class TopSkillStat(BaseModel): + skill_id: UUID + skill_slug: str + skill_name: str + sync_count: int + + +class IntegrationActivityStat(BaseModel): + integration_id: UUID + integration_label: Optional[str] + syncs_24h: int + failed_24h: int + last_sync_at: Optional[datetime] + + +class SparklinePoint(BaseModel): + """One bucket of the daily-syncs sparkline.""" + date: str # YYYY-MM-DD (UTC) + syncs: int + failed: int + + +class DiagnosticCheck(BaseModel): + """One pass/warn/fail row from the connector diagnostic.""" + + id: str # short stable id, e.g. "backend_reachable" — used as a test key + label: str # human-readable + status: Literal["pass", "warn", "fail"] + detail: str # explanatory text + remediation hint when not pass + + +class DiagnosticResponse(BaseModel): + """Result of a one-click connector health check. + + Bundles N individual checks into a single overall verdict: + - all pass → overall=pass + - any fail → overall=fail + - any warn (no fail) → overall=warn + """ + + overall: Literal["pass", "warn", "fail"] + checks: list[DiagnosticCheck] + generated_at: datetime + + +class ConflictPreviewResponse(BaseModel): + """Per-conflict detail used by the "Keep SkillNote / Keep claude.ai" + preview panel. + + We can only render the SkillNote-side content (the claude.ai + content lives in the extension's browser, not on our server). What + we CAN show is "what changed on the SkillNote side since the last + successful push to claude.ai" — that's the exact text that + 'Keep claude.ai' would overwrite. + """ + + link_id: UUID + integration_id: UUID + integration_label: Optional[str] + skill_id: Optional[UUID] + skill_slug: Optional[str] + skill_name: Optional[str] + # The version we last pushed to claude.ai. None when this is an + # inbound-only link or when the version was deleted. + last_pushed_version_id: Optional[UUID] + last_pushed_version_number: Optional[int] + last_pushed_content_md: Optional[str] + # The current SkillNote-side latest. If this is the same as + # last_pushed_*, the conflict is purely remote-side (the user can + # confidently pick Keep claude.ai). + current_version_id: Optional[UUID] + current_version_number: Optional[int] + current_content_md: Optional[str] + # Whether the local content actually changed since the last push. + local_changed: bool + # claude.ai-side metadata. + claude_ai_skill_id: str + claude_ai_version: Optional[str] + claude_ai_last_seen_at: Optional[datetime] + + +class AnalyticsResponse(BaseModel): + """Sync-throughput + per-integration rollup for the analytics panel. + + 24h / 7d windows are computed against `now - window`. Counts include + completed and failed terminal ops only. The sparkline is 7 daily + UTC-aligned buckets, oldest first. + """ + + skills_synced_24h: int + skills_synced_7d: int + failed_24h: int + failed_7d: int + sync_success_rate_7d: float # 0.0–1.0 (1.0 if no ops in window) + avg_attempts_per_sync_7d: float + top_skills_7d: list[TopSkillStat] + per_integration: list[IntegrationActivityStat] + sparkline_7d: list[SparklinePoint] diff --git a/backend/app/schemas/skill.py b/backend/app/schemas/skill.py index 75aab9b6..83c0af34 100644 --- a/backend/app/schemas/skill.py +++ b/backend/app/schemas/skill.py @@ -49,6 +49,10 @@ class SkillDetail(BaseModel): total_versions: int = 0 extra_frontmatter: Optional[str] = None origin: Optional[SkillOrigin] = None + # Per-skill claude.ai connector opt-in. Defaults to True so a skill + # created before the connector shipped still syncs once a browser + # is paired. The UI surfaces a toggle on the skill detail page. + claude_ai_sync_enabled: bool = True created_at: datetime updated_at: datetime diff --git a/backend/app/services/claude_ai_sync.py b/backend/app/services/claude_ai_sync.py new file mode 100644 index 00000000..ac2ee082 --- /dev/null +++ b/backend/app/services/claude_ai_sync.py @@ -0,0 +1,617 @@ +"""Service helpers for the claude.ai connector. + +Two responsibilities: + + 1. Token issuance + verification — the extension sends raw tokens; we + store only sha256 hashes. Compared via constant-time equality. + + 2. Sync-op enqueueing — when a SkillNote skill changes, fan out one + operation per active integration so the extension picks it up on the + next poll. Called from the publish/delete endpoints (Phase 1b will + wire those in; the helper exists today so the contract is locked). +""" + +import hashlib +import hmac +import secrets +import string +from datetime import datetime, timedelta, timezone +from typing import Any, Iterable, Optional +from uuid import UUID + +from sqlalchemy import desc, select +from sqlalchemy.orm import Session + +from app.db.models.claude_ai import ( + ClaudeAIIntegration, + ClaudeAISkillLink, + ClaudeAISyncOperation, +) +from app.db.models.claude_ai_polish import ( + ClaudeAIAuditLog, + ClaudeAIPairAttempt, +) + +# Pairing codes are read aloud and typed by humans — avoid visually ambiguous +# glyphs (0/O, 1/I/L) so a misread doesn't produce a wrong-but-valid code. +_PAIRING_CODE_ALPHABET = "23456789ABCDEFGHJKMNPQRSTUVWXYZ" +_PAIRING_CODE_LENGTH = 6 +_PAIRING_TTL = timedelta(minutes=10) + + +def generate_pairing_code() -> str: + """Return a 6-char human-friendly code (no 0/O/1/I/L confusion).""" + return "".join(secrets.choice(_PAIRING_CODE_ALPHABET) for _ in range(_PAIRING_CODE_LENGTH)) + + +def generate_token() -> str: + """Return a 32-byte url-safe random string (the wire token). + + `secrets.token_urlsafe(32)` produces ~43 chars of ~256 bits of entropy — + well past the threshold where guessing is meaningful. + """ + return secrets.token_urlsafe(32) + + +def hash_token(token: str) -> str: + """sha256 hex digest. The function name says hash, not encrypt — we + intentionally cannot recover the raw token from the stored value.""" + return hashlib.sha256(token.encode("utf-8")).hexdigest() + + +def verify_token(raw: str, stored_hash: str) -> bool: + """Constant-time hash comparison. + + Without `compare_digest` an attacker could mount a timing attack to + learn the first N matching characters of the stored hash. + """ + return hmac.compare_digest(hash_token(raw), stored_hash) + + +def pairing_expiry() -> datetime: + """Pairing codes expire 10 minutes after issue. Tight enough to limit + the window for shoulder-surfing the code; long enough to let a user + switch tabs without panic.""" + return datetime.now(timezone.utc) + _PAIRING_TTL + + +# ── Integration lookups ─────────────────────────────────────────────────────── + + +def find_integration_by_extension_token( + db: Session, raw_token: str +) -> Optional[ClaudeAIIntegration]: + """Resolve a bearer token to its integration row. + + We hash first, then SELECT by the hash — never search-by-prefix or + similar leaky comparison. Returns None for unknown / expired tokens. + """ + if not raw_token: + return None + token_hash = hash_token(raw_token) + return db.execute( + select(ClaudeAIIntegration).where( + ClaudeAIIntegration.extension_token_hash == token_hash, + ClaudeAIIntegration.status.in_(("active", "cookie_expired", "error")), + ) + ).scalar_one_or_none() + + +def find_pending_pairing_by_token( + db: Session, raw_pairing_token: str +) -> Optional[ClaudeAIIntegration]: + """Resolve a pairing-token to its (pending) integration row.""" + if not raw_pairing_token: + return None + token_hash = hash_token(raw_pairing_token) + return db.execute( + select(ClaudeAIIntegration).where( + ClaudeAIIntegration.pairing_token_hash == token_hash, + ClaudeAIIntegration.status == "pending_approval", + ) + ).scalar_one_or_none() + + +def find_pending_pairing_by_code( + db: Session, pairing_code: str +) -> Optional[ClaudeAIIntegration]: + """Resolve the human-typed pairing code to its (pending) row. + + Codes are short (6 chars) so they live in plaintext on the row — the + short window + low entropy makes a hash pointless. The pairing_token + (long, opaque) is what actually authenticates the extension's poll. + """ + if not pairing_code: + return None + normalized = pairing_code.strip().upper() + return db.execute( + select(ClaudeAIIntegration).where( + ClaudeAIIntegration.pairing_code == normalized, + ClaudeAIIntegration.status == "pending_approval", + ) + ).scalar_one_or_none() + + +# ── Sync op enqueueing ──────────────────────────────────────────────────────── + + +def active_integrations_for_sync(db: Session) -> list[ClaudeAIIntegration]: + """Every integration eligible to receive new sync ops. + + `cookie_expired` integrations still get ops enqueued — they'll fire as + soon as the user re-logs into claude.ai. Avoids the alternative trap of + silently dropping changes during the expiration window. + """ + return list( + db.execute( + select(ClaudeAIIntegration).where( + ClaudeAIIntegration.status.in_(("active", "cookie_expired")) + ) + ) + .scalars() + .all() + ) + + +def _has_pending_op( + db: Session, integration_id: UUID, skill_id: UUID, kind: str +) -> bool: + """Coalesce duplicate enqueues. If a user mashes Save 3 times in a row + we don't want three pending uploads — the latest pending one will pull + the current latest version when it runs.""" + return ( + db.execute( + select(ClaudeAISyncOperation.id).where( + ClaudeAISyncOperation.integration_id == integration_id, + ClaudeAISyncOperation.skill_id == skill_id, + ClaudeAISyncOperation.kind == kind, + ClaudeAISyncOperation.status.in_(("pending", "in_progress")), + ) + ).first() + is not None + ) + + +_SYNCABLE_STATUSES = frozenset({"active", "cookie_expired"}) + + +# ── Periodic cleanup ────────────────────────────────────────────────────────── + + +def expire_stale_pairings(db: Session) -> int: + """Mark expired pending pairings and emit audit events. + + Called from a periodic job (every ~5 minutes) so the integrations + table doesn't accumulate pending_approval rows that no one will ever + finish. Returns the number of rows expired. + """ + cutoff = datetime.now(timezone.utc) - _PAIRING_GRACE + stale = list(db.execute( + select(ClaudeAIIntegration).where( + ClaudeAIIntegration.status == "pending_approval", + ClaudeAIIntegration.pairing_expires_at < cutoff, + ) + ).scalars().all()) + for integ in stale: + # Use 'error' state — we don't want a discoverable expired row + # left dangling. Audit event records the reason. + integ.status = "error" + integ.pairing_code = None + integ.pairing_token_hash = None + write_audit( + db, + event="pair_expired", + integration_id=integ.id, + detail={"browser_label": integ.browser_label or ""}, + ) + return len(stale) + + +# ── Conflict auto-detection ─────────────────────────────────────────────────── + + +def detect_link_divergence( + db: Session, + *, + link: ClaudeAISkillLink, + incoming_claude_ai_version: Optional[str], + skillnote_version_id: Optional[UUID] = None, +) -> bool: + """Mark a link as 'diverged' when both sides have changed since last sync. + + Called during inbound import. If the link already has a recorded + claude_ai_version that differs from the incoming version, AND the + SkillNote-side version has advanced beyond what we last recorded, + both sides changed → conflict. + + Returns True if the link was newly marked diverged. + """ + if link.conflict_state == "diverged": + return False + remote_changed = ( + link.claude_ai_version is not None + and incoming_claude_ai_version is not None + and link.claude_ai_version != incoming_claude_ai_version + ) + local_changed = ( + link.skillnote_version_id is not None + and skillnote_version_id is not None + and link.skillnote_version_id != skillnote_version_id + ) + if remote_changed and local_changed: + link.conflict_state = "diverged" + write_audit( + db, + event="conflict_detected", + integration_id=link.integration_id, + skill_id=link.skillnote_skill_id, + detail={ + "claude_ai_skill_id": link.claude_ai_skill_id, + "local_version": str(skillnote_version_id) if skillnote_version_id else None, + "remote_version": incoming_claude_ai_version, + }, + ) + return True + return False + + +# ── Audit log ───────────────────────────────────────────────────────────────── + + +def write_audit( + db: Session, + *, + event: str, + integration_id: Optional[UUID] = None, + skill_id: Optional[UUID] = None, + detail: Optional[dict[str, Any]] = None, + source_ip: Optional[str] = None, +) -> ClaudeAIAuditLog: + """Append an event to the connector audit log. + + Never raises on db errors — audit logging must never block the + primary operation. The caller is expected to handle commit semantics. + """ + row = ClaudeAIAuditLog( + integration_id=integration_id, + event=event, + skill_id=skill_id, + detail=detail or {}, + source_ip=source_ip, + ) + db.add(row) + return row + + +def query_audit( + db: Session, + *, + integration_id: Optional[UUID] = None, + event: Optional[str] = None, + limit: int = 100, + before: Optional[datetime] = None, + since: Optional[datetime] = None, + until: Optional[datetime] = None, + skill_id: Optional[UUID] = None, +) -> list[ClaudeAIAuditLog]: + """Paginated audit-log query for the activity feed UI. + + ``since`` / ``until`` define an inclusive date window for compliance + queries ("show me everything between Mar 1 and Mar 14"). ``skill_id`` + scopes to events that involved a specific skill — useful for + debugging "why did this skill keep failing?" without scrolling the + entire log. All filters AND together. + """ + stmt = select(ClaudeAIAuditLog).order_by(desc(ClaudeAIAuditLog.created_at)) + if integration_id is not None: + stmt = stmt.where(ClaudeAIAuditLog.integration_id == integration_id) + if event is not None: + stmt = stmt.where(ClaudeAIAuditLog.event == event) + if before is not None: + stmt = stmt.where(ClaudeAIAuditLog.created_at < before) + if since is not None: + stmt = stmt.where(ClaudeAIAuditLog.created_at >= since) + if until is not None: + stmt = stmt.where(ClaudeAIAuditLog.created_at <= until) + if skill_id is not None: + stmt = stmt.where(ClaudeAIAuditLog.skill_id == skill_id) + stmt = stmt.limit(min(max(limit, 1), 500)) + return list(db.execute(stmt).scalars().all()) + + +# ── Rate limiting (pair endpoint) ───────────────────────────────────────────── + + +# Tuned to defeat brute-force pairing-code enumeration. Codes are 6 chars +# over 31 glyphs (~887M codes), so 60 attempts/minute gives the attacker +# vanishingly low success probability even before lockout. +_PAIR_RATE_LIMIT_PER_IP = 60 +_PAIR_RATE_WINDOW = timedelta(minutes=1) + +# Pairing rows past their expiry that no extension ever redeemed. +# Pruned by the scheduled cleanup so the integrations table doesn't +# accumulate stale pending_approval rows forever. +_PAIRING_GRACE = timedelta(hours=1) + + +class PairRateLimitExceeded(Exception): + """Raised by record_pair_attempt when the per-IP limit is breached.""" + + +def record_pair_attempt( + db: Session, *, source_ip: Optional[str], endpoint: str +) -> None: + """Record an attempt and enforce per-IP rate limit. + + Strategy: sliding-window count over the most recent N seconds. If the + count for this IP in the window is already >= limit, raise without + inserting (so we don't double-count the rejected request). + + Caller (the API handler) catches PairRateLimitExceeded and returns 429. + + Disabled when `SKILLNOTE_DISABLE_PAIR_RATE_LIMIT=1` (used in test + runs where many pair attempts back-to-back are expected). Never set + in production. + """ + import os as _os + if _os.environ.get("SKILLNOTE_DISABLE_PAIR_RATE_LIMIT") == "1": + return + if source_ip is None: + # Unknown IP — be conservative, don't enforce. Production should + # always have an IP via X-Forwarded-For; if absent, the rate limit + # would lump all unknown IPs into one bucket which is unfair. + return + + window_start = datetime.now(timezone.utc) - _PAIR_RATE_WINDOW + count = db.execute( + select(ClaudeAIPairAttempt.id) + .where(ClaudeAIPairAttempt.source_ip == source_ip) + .where(ClaudeAIPairAttempt.created_at > window_start) + .limit(_PAIR_RATE_LIMIT_PER_IP + 1) + ).all() + if len(count) >= _PAIR_RATE_LIMIT_PER_IP: + raise PairRateLimitExceeded( + f"Too many pairing attempts from {source_ip}; wait a minute and retry" + ) + + db.add( + ClaudeAIPairAttempt( + source_ip=source_ip, + endpoint=endpoint, + ) + ) + + +# ── Sync op enqueueing ──────────────────────────────────────────────────────── + + +def enqueue_skill_upload( + db: Session, + skill_id: UUID, + version_id: UUID, + name: str, + description: str, + integrations: Optional[Iterable[ClaudeAIIntegration]] = None, +) -> list[ClaudeAISyncOperation]: + """Fan out one upload op per active integration with matching scope. + + Returns the freshly-created ops (or empty list if all were coalesced). + Caller is responsible for db.commit() — keeps this composable with + larger transactions in the publish endpoint. + + Defense in depth: even when a caller passes a specific integration list + (e.g. resolve_conflict), this helper still filters by status so a + disconnected integration never receives ops by accident. + """ + candidates = list(integrations) if integrations is not None else active_integrations_for_sync(db) + created: list[ClaudeAISyncOperation] = [] + for integ in candidates: + if integ.status not in _SYNCABLE_STATUSES: + continue + if _has_pending_op(db, integ.id, skill_id, "upload"): + # Replace the in-flight op's payload with the latest version + # so when it does run, it pushes the current content (not the + # stale one queued earlier). + existing = db.execute( + select(ClaudeAISyncOperation).where( + ClaudeAISyncOperation.integration_id == integ.id, + ClaudeAISyncOperation.skill_id == skill_id, + ClaudeAISyncOperation.kind == "upload", + ClaudeAISyncOperation.status == "pending", + ) + ).scalar_one_or_none() + if existing is not None: + existing.payload = { + "version_id": str(version_id), + "name": name, + "description": description, + } + continue + op = ClaudeAISyncOperation( + integration_id=integ.id, + kind="upload", + skill_id=skill_id, + payload={ + "version_id": str(version_id), + "name": name, + "description": description, + }, + ) + db.add(op) + created.append(op) + return created + + +def enqueue_skill_delete( + db: Session, + skill_id: UUID, + integrations: Optional[Iterable[ClaudeAIIntegration]] = None, +) -> list[ClaudeAISyncOperation]: + """Fan out one delete op per integration that has this skill linked. + + Only enqueues for integrations with an existing link — there's no point + asking claude.ai to delete a skill it never knew about. + + Important: delete ops are enqueued WITHOUT a skill_id FK. The skill is + typically deleted in the same transaction as the enqueue, and the + skills→sync_operations FK is ondelete=CASCADE — so a delete op with + skill_id set would be wiped out by the cascade in the same commit, + making the delete a no-op. The claude.ai-side ID lives in the payload + where the extension can read it. + + The original skill_id is preserved in the payload as + `skillnote_skill_id` for forensics / dedup, but not as an FK. + """ + rows = db.execute( + select(ClaudeAISkillLink.integration_id, ClaudeAISkillLink.claude_ai_skill_id) + .where(ClaudeAISkillLink.skillnote_skill_id == skill_id) + ).all() + created: list[ClaudeAISyncOperation] = [] + for integration_id, claude_ai_skill_id in rows: + # Dedup against existing pending delete ops for the same claude.ai + # skill — without skill_id we can't use _has_pending_op directly. + existing = db.execute( + select(ClaudeAISyncOperation.id).where( + ClaudeAISyncOperation.integration_id == integration_id, + ClaudeAISyncOperation.kind == "delete", + ClaudeAISyncOperation.status.in_(("pending", "in_progress")), + ClaudeAISyncOperation.payload["claude_ai_skill_id"].astext == claude_ai_skill_id, + ) + ).first() + if existing is not None: + continue + op = ClaudeAISyncOperation( + integration_id=integration_id, + kind="delete", + skill_id=None, # see docstring + payload={ + "claude_ai_skill_id": claude_ai_skill_id, + "skillnote_skill_id": str(skill_id), # forensics only + }, + ) + db.add(op) + created.append(op) + return created + + +def enqueue_periodic_list( + db: Session, integrations: Optional[Iterable[ClaudeAIIntegration]] = None +) -> list[ClaudeAISyncOperation]: + """One `list` op per active integration. Drives reverse-sync polling. + + Called from an APScheduler tick. Coalesces against any already-pending + list op for the same integration so a long-blocked queue doesn't grow + list ops faster than they drain. + """ + candidates = list(integrations) if integrations is not None else active_integrations_for_sync(db) + created: list[ClaudeAISyncOperation] = [] + for integ in candidates: + existing = db.execute( + select(ClaudeAISyncOperation.id).where( + ClaudeAISyncOperation.integration_id == integ.id, + ClaudeAISyncOperation.kind == "list", + ClaudeAISyncOperation.status.in_(("pending", "in_progress")), + ) + ).first() + if existing is not None: + continue + op = ClaudeAISyncOperation( + integration_id=integ.id, + kind="list", + ) + db.add(op) + created.append(op) + return created + + +# ── Integration counters (for the status response) ──────────────────────────── + + +def integration_counters(db: Session, integration_id: UUID) -> dict[str, int]: + """Compose pending/failed/linked counts for a single integration. + + Uses COUNT(*) at the DB rather than fetching all IDs — old code + loaded up to 100k rows per integration to call `len()`, which + became expensive on busy instances. + """ + from sqlalchemy import func as _func + pending = db.execute( + select(_func.count(ClaudeAISyncOperation.id)) + .where( + ClaudeAISyncOperation.integration_id == integration_id, + ClaudeAISyncOperation.status.in_(("pending", "in_progress")), + ) + ).scalar_one() + failed = db.execute( + select(_func.count(ClaudeAISyncOperation.id)) + .where( + ClaudeAISyncOperation.integration_id == integration_id, + ClaudeAISyncOperation.status == "failed", + ) + ).scalar_one() + linked = db.execute( + select(_func.count(ClaudeAISkillLink.id)) + .where(ClaudeAISkillLink.integration_id == integration_id) + ).scalar_one() + return { + "pending_op_count": int(pending), + "failed_op_count": int(failed), + "linked_skill_count": int(linked), + } + + +def bulk_integration_counters( + db: Session, integration_ids: list[UUID] +) -> dict[UUID, dict[str, int]]: + """N+1-free counters for many integrations at once. + + The list_integrations endpoint calls this with all current integration + IDs — replaces 3*N queries with 3 queries total, regardless of N. + """ + from sqlalchemy import func as _func, case + if not integration_ids: + return {} + + # Operations: GROUP BY integration_id + status; SUM by bucket. + op_rows = db.execute( + select( + ClaudeAISyncOperation.integration_id, + _func.sum( + case( + ( + ClaudeAISyncOperation.status.in_(("pending", "in_progress")), + 1, + ), + else_=0, + ) + ).label("pending"), + _func.sum( + case( + (ClaudeAISyncOperation.status == "failed", 1), + else_=0, + ) + ).label("failed"), + ) + .where(ClaudeAISyncOperation.integration_id.in_(integration_ids)) + .group_by(ClaudeAISyncOperation.integration_id) + ).all() + + link_rows = db.execute( + select( + ClaudeAISkillLink.integration_id, + _func.count(ClaudeAISkillLink.id).label("linked"), + ) + .where(ClaudeAISkillLink.integration_id.in_(integration_ids)) + .group_by(ClaudeAISkillLink.integration_id) + ).all() + + out: dict[UUID, dict[str, int]] = { + i: {"pending_op_count": 0, "failed_op_count": 0, "linked_skill_count": 0} + for i in integration_ids + } + for row in op_rows: + out[row.integration_id]["pending_op_count"] = int(row.pending or 0) + out[row.integration_id]["failed_op_count"] = int(row.failed or 0) + for row in link_rows: + out[row.integration_id]["linked_skill_count"] = int(row.linked or 0) + return out diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 1274bc47..a58e0ace 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -31,3 +31,6 @@ test = [ [tool.pytest.ini_options] testpaths = ["tests"] pythonpath = ["."] + +[tool.pytest.ini_options.markers] +slow = "tests that intentionally make many requests (e.g. rate-limit flood tests)" diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index e08c48bf..d23a48c1 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -46,16 +46,26 @@ def db_session(engine): @pytest.fixture def api_request(): - """Return a _req(method, path, body) helper that hits BASE_URL. + """Return a _req(method, path, body, headers) helper that hits BASE_URL. Returns (status_code, parsed_json_body_or_none). Skips the test if the API is unreachable (e.g. running tests without the backend up). """ - def _req(method: str, path: str, body: Optional[dict] = None): + def _req( + method: str, + path: str, + body: Optional[dict] = None, + headers: Optional[dict] = None, + ): + h: dict = {} + if body is not None: + h["Content-Type"] = "application/json" + if headers: + h.update(headers) req = urllib.request.Request( f"{BASE_URL}{path}", method=method, - headers={"Content-Type": "application/json"} if body else {}, + headers=h, data=(json.dumps(body).encode() if body else None), ) try: diff --git a/backend/tests/integration/test_claude_ai_activity_export.py b/backend/tests/integration/test_claude_ai_activity_export.py new file mode 100644 index 00000000..f722a146 --- /dev/null +++ b/backend/tests/integration/test_claude_ai_activity_export.py @@ -0,0 +1,155 @@ +"""Iter 19 — activity log review tools. + +Tests: + - /activity now accepts since/until/skill_id query params. + - since > until returns 422 INVALID_DATE_RANGE. + - /activity/export.csv streams a CSV with proper headers + Content-Disposition. + - Export honors all the filter params (since/until/skill_id/event). +""" +from __future__ import annotations + +import csv +import io +import json +import os +import urllib.error +import urllib.parse +import urllib.request +from datetime import datetime, timedelta, timezone + +import pytest + + +def _enc(s: str) -> str: + """URL-encode an ISO datetime so the `+` in the timezone doesn't get + interpreted as a space by the query-string parser.""" + return urllib.parse.quote(s, safe="") + + +BASE = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + + +def _get(path, headers=None): + req = urllib.request.Request(f"{BASE}{path}", method="GET", headers=headers or {}) + try: + with urllib.request.urlopen(req) as r: + ct = r.headers.get("content-type", "") + text = r.read().decode() + if ct.startswith("application/json"): + return r.status, json.loads(text), dict(r.headers) + return r.status, text, dict(r.headers) + except urllib.error.HTTPError as e: + try: + return e.code, json.loads(e.read().decode()), dict(e.headers) + except Exception: + return e.code, e.read().decode(), dict(e.headers) + except Exception as e: # pragma: no cover + pytest.skip(f"API not reachable: {e}") + + +class TestDateRangeFilter: + def test_since_accepted(self): + cutoff = (datetime.now(timezone.utc) - timedelta(days=30)).isoformat() + s, body, _ = _get( + f"/v1/integrations/claude-ai/activity?since={_enc(cutoff)}" + ) + assert s == 200, body + assert isinstance(body, list) + + def test_until_accepted(self): + cutoff = datetime.now(timezone.utc).isoformat() + s, body, _ = _get( + f"/v1/integrations/claude-ai/activity?until={_enc(cutoff)}" + ) + assert s == 200, body + assert isinstance(body, list) + + def test_since_until_window_returns_only_in_range(self): + since = (datetime.now(timezone.utc) - timedelta(days=365)).isoformat() + until = datetime.now(timezone.utc).isoformat() + s, body, _ = _get( + f"/v1/integrations/claude-ai/activity" + f"?since={_enc(since)}&until={_enc(until)}&limit=10" + ) + assert s == 200, body + for row in body: + t = datetime.fromisoformat(row["created_at"].replace("Z", "+00:00")) + assert datetime.fromisoformat(since) <= t <= datetime.fromisoformat(until) + + def test_inverted_range_returns_422(self): + since = datetime.now(timezone.utc).isoformat() + until = (datetime.now(timezone.utc) - timedelta(days=7)).isoformat() + s, body, _ = _get( + f"/v1/integrations/claude-ai/activity" + f"?since={_enc(since)}&until={_enc(until)}" + ) + assert s == 422, body + assert body.get("error", {}).get("code") == "INVALID_DATE_RANGE" + + def test_skill_id_filter_scopes_to_skill(self): + s, body, _ = _get( + "/v1/integrations/claude-ai/activity" + "?skill_id=00000000-0000-0000-0000-000000000001" + ) + assert s == 200 + # Either empty (no audit rows for that skill) or every row matches. + for row in body: + assert row["skill_id"] == "00000000-0000-0000-0000-000000000001" + + +class TestCsvExport: + def test_export_returns_csv_content_type(self): + s, body, headers = _get("/v1/integrations/claude-ai/activity/export.csv?limit=5") + assert s == 200 + assert headers["content-type"].startswith("text/csv") + # Disposition header instructs the browser to save the file. + cd = headers.get("content-disposition", "") + assert "attachment" in cd + assert "claude-ai-activity.csv" in cd + + def test_export_includes_header_row(self): + s, body, _ = _get("/v1/integrations/claude-ai/activity/export.csv?limit=3") + assert s == 200 + reader = csv.reader(io.StringIO(body)) + rows = list(reader) + assert len(rows) >= 1 + assert rows[0] == ["created_at", "event", "integration_id", "skill_id", "detail"] + + def test_export_rejects_invalid_event_with_422(self): + s, body, _ = _get( + "/v1/integrations/claude-ai/activity/export.csv?event=not_a_kind" + ) + assert s == 422 + assert body.get("error", {}).get("code") == "INVALID_EVENT" + + def test_export_rejects_inverted_date_range(self): + since = datetime.now(timezone.utc).isoformat() + until = (datetime.now(timezone.utc) - timedelta(days=1)).isoformat() + s, body, _ = _get( + f"/v1/integrations/claude-ai/activity/export.csv" + f"?since={_enc(since)}&until={_enc(until)}" + ) + assert s == 422 + assert body.get("error", {}).get("code") == "INVALID_DATE_RANGE" + + def test_export_limit_capped_at_50000(self): + s, _, _ = _get( + "/v1/integrations/claude-ai/activity/export.csv?limit=999999" + ) + # ge=1, le=50000 — over-limit is 422. + assert s == 422 + + def test_export_limit_50000_accepted(self): + s, _, _ = _get( + "/v1/integrations/claude-ai/activity/export.csv?limit=50000" + ) + assert s == 200 + + def test_export_cache_headers_disable_caching(self): + """A re-export must always reflect fresh state — no stale cached + downloads. The handler sets Cache-Control: no-store.""" + s, _, headers = _get( + "/v1/integrations/claude-ai/activity/export.csv?limit=1" + ) + assert s == 200 + assert headers.get("cache-control") == "no-store" diff --git a/backend/tests/integration/test_claude_ai_activity_pagination.py b/backend/tests/integration/test_claude_ai_activity_pagination.py new file mode 100644 index 00000000..b40d7efe --- /dev/null +++ b/backend/tests/integration/test_claude_ai_activity_pagination.py @@ -0,0 +1,104 @@ +"""Activity-feed pagination + event-kind validation tests (round 9). + +Before this round: + * The `event` query param accepted any string. A typo (e.g. `?event=foo`) + returned 0 rows silently, leading to debugging confusion. + * `limit` was effectively unbounded — handler-side default 50, but a + client passing `?limit=99999` would get clamped to 500 by the service + layer without an error response, hiding the misuse. + * The service supported `before=` for cursor pagination but the API + didn't expose it, so the UI could only ever see the most recent page. +""" +from __future__ import annotations + +import json +import os +import urllib.error +import urllib.request + +import pytest + + +BASE = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + + +def _get(path): + req = urllib.request.Request(f"{BASE}{path}", method="GET") + try: + with urllib.request.urlopen(req) as r: + return r.status, json.loads(r.read().decode()) + except urllib.error.HTTPError as e: + return e.code, json.loads(e.read().decode()) + except Exception as e: # pragma: no cover + pytest.skip(f"API not reachable: {e}") + + +class TestEventKindValidation: + def test_known_event_kind_is_accepted(self): + s, body = _get("/v1/integrations/claude-ai/activity?event=pair_started&limit=1") + assert s == 200 + assert isinstance(body, list) + + def test_unknown_event_kind_returns_422_with_helpful_message(self): + s, body = _get("/v1/integrations/claude-ai/activity?event=nope") + assert s == 422, body + # FastAPI puts validation errors under `detail`; our custom api_error + # uses the `error.code` envelope. Either is acceptable; just check + # the unknown kind doesn't silently return 200 with an empty list. + text = json.dumps(body) + assert "nope" in text or "INVALID_EVENT" in text or "event" in text + + +class TestLimitBounds: + def test_negative_limit_is_rejected(self): + s, body = _get("/v1/integrations/claude-ai/activity?limit=-1") + # Pydantic/FastAPI validates ge=1 — should be 422. + assert s == 422, body + + def test_oversized_limit_is_rejected(self): + s, body = _get("/v1/integrations/claude-ai/activity?limit=10000") + assert s == 422, body + + def test_limit_at_max_is_accepted(self): + s, _ = _get("/v1/integrations/claude-ai/activity?limit=500") + assert s == 200 + + def test_zero_limit_is_rejected(self): + s, _ = _get("/v1/integrations/claude-ai/activity?limit=0") + assert s == 422 + + +class TestBeforeCursor: + def test_before_param_is_accepted(self): + """A valid ISO timestamp doesn't 4xx — the wire contract is honored + even if the dataset is empty.""" + s, body = _get( + "/v1/integrations/claude-ai/activity" + "?before=2030-01-01T00:00:00Z&limit=5" + ) + assert s == 200, body + assert isinstance(body, list) + + def test_malformed_before_returns_422(self): + s, _ = _get("/v1/integrations/claude-ai/activity?before=not-a-date") + assert s == 422 + + def test_before_filters_to_older_rows(self): + """If the suite has emitted any audit events at all, ordering + guarantees should hold: first row's timestamp must be > second row's + when sorted desc; using that timestamp as `before` returns the rest.""" + s, page1 = _get("/v1/integrations/claude-ai/activity?limit=2") + if s != 200 or len(page1) < 2: + pytest.skip("Not enough audit history to exercise pagination") + # page1 is desc-by-created_at. The 'before' of page1[1].created_at + # should NOT include page1[0]. + cursor = page1[0]["created_at"] + s, page2 = _get( + f"/v1/integrations/claude-ai/activity?before={cursor}&limit=5" + ) + assert s == 200 + ids_page1 = {row["id"] for row in page1[:1]} + ids_page2 = {row["id"] for row in page2} + assert ids_page1.isdisjoint(ids_page2), ( + "before= cursor should EXCLUDE the cursor row itself" + ) diff --git a/backend/tests/integration/test_claude_ai_analytics.py b/backend/tests/integration/test_claude_ai_analytics.py new file mode 100644 index 00000000..8b79b6b6 --- /dev/null +++ b/backend/tests/integration/test_claude_ai_analytics.py @@ -0,0 +1,117 @@ +"""Iter 18 — /v1/integrations/claude-ai/analytics endpoint. + +Returns the 24h/7d sync rollup that drives the analytics panel. + +Contract: + - skills_synced_{24h,7d} / failed_{24h,7d} count terminal ops in window. + - sync_success_rate_7d defaults to 1.0 when there are no ops in window. + - avg_attempts_per_sync_7d is a float; honest 0.0 when no data. + - top_skills_7d max-5 rows, ordered by sync_count desc, with skill name/slug. + - per_integration list never drops integrations (LEFT JOIN), only filters + out disconnected ones. + - sparkline_7d has EXACTLY 7 entries (oldest-first, even for days with 0). +""" +from __future__ import annotations + +import json +import os +import urllib.error +import urllib.request + +import pytest + + +BASE = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + + +def _get(path): + req = urllib.request.Request(f"{BASE}{path}", method="GET") + try: + with urllib.request.urlopen(req) as r: + return r.status, json.loads(r.read().decode()) + except urllib.error.HTTPError as e: + return e.code, json.loads(e.read().decode()) + except Exception as e: # pragma: no cover + pytest.skip(f"API not reachable: {e}") + + +class TestAnalyticsShape: + def test_endpoint_returns_200_with_full_shape(self): + s, body = _get("/v1/integrations/claude-ai/analytics") + assert s == 200, body + # All required top-level keys present. + for k in [ + "skills_synced_24h", + "skills_synced_7d", + "failed_24h", + "failed_7d", + "sync_success_rate_7d", + "avg_attempts_per_sync_7d", + "top_skills_7d", + "per_integration", + "sparkline_7d", + ]: + assert k in body, f"missing key {k}" + + def test_sparkline_always_has_7_entries(self): + s, body = _get("/v1/integrations/claude-ai/analytics") + assert s == 200 + assert len(body["sparkline_7d"]) == 7 + # Oldest-first ordering. + dates = [p["date"] for p in body["sparkline_7d"]] + assert dates == sorted(dates), ( + "sparkline_7d must be oldest-first, got " + str(dates) + ) + + def test_each_sparkline_point_has_required_keys(self): + s, body = _get("/v1/integrations/claude-ai/analytics") + for p in body["sparkline_7d"]: + assert set(p.keys()) >= {"date", "syncs", "failed"} + assert isinstance(p["syncs"], int) + assert isinstance(p["failed"], int) + + def test_success_rate_is_between_zero_and_one(self): + s, body = _get("/v1/integrations/claude-ai/analytics") + rate = body["sync_success_rate_7d"] + assert 0.0 <= rate <= 1.0, rate + + def test_top_skills_is_at_most_5(self): + s, body = _get("/v1/integrations/claude-ai/analytics") + assert len(body["top_skills_7d"]) <= 5 + + def test_top_skills_have_skill_name_and_slug(self): + s, body = _get("/v1/integrations/claude-ai/analytics") + for skill in body["top_skills_7d"]: + assert skill["skill_slug"] + assert skill["skill_name"] + assert skill["sync_count"] > 0 + + def test_top_skills_ordered_desc_by_count(self): + s, body = _get("/v1/integrations/claude-ai/analytics") + counts = [s["sync_count"] for s in body["top_skills_7d"]] + assert counts == sorted(counts, reverse=True), counts + + def test_per_integration_excludes_disconnected_rows(self): + s, body = _get("/v1/integrations/claude-ai/analytics") + # We don't directly know which integrations are disconnected from + # the analytics endpoint, but the integrations endpoint does. Any + # integration that's not in per_integration shouldn't be reachable + # via /integrations as active either. Soft check: confirm we at + # least don't see "disconnected" status leak through, by verifying + # no per_integration row corresponds to a disconnected integration + # in /integrations. + s2, integ_list = _get("/v1/integrations/claude-ai/integrations") + if s2 != 200: + pytest.skip("integrations endpoint unavailable") + disconnected = { + i["id"] for i in integ_list if i["status"] == "disconnected" + } + analytics_ids = {p["integration_id"] for p in body["per_integration"]} + assert disconnected.isdisjoint(analytics_ids), ( + "per_integration must not include disconnected integrations" + ) + + def test_avg_attempts_is_a_number(self): + s, body = _get("/v1/integrations/claude-ai/analytics") + assert isinstance(body["avg_attempts_per_sync_7d"], (int, float)) + assert body["avg_attempts_per_sync_7d"] >= 0.0 diff --git a/backend/tests/integration/test_claude_ai_bundle_escaping.py b/backend/tests/integration/test_claude_ai_bundle_escaping.py new file mode 100644 index 00000000..dc22643d --- /dev/null +++ b/backend/tests/integration/test_claude_ai_bundle_escaping.py @@ -0,0 +1,212 @@ +"""Bundle-generation escaping tests. + +The skill-bundle endpoint composes a SKILL.md with YAML frontmatter from +the skill's name + description. A naive `f"---\\nname: {x}\\n---"` is +vulnerable to YAML injection — a description containing `\\n---\\n` or +`\\n` + arbitrary keys could smuggle frontmatter fields into the +uploaded skill. yaml.safe_dump escapes correctly. + +These tests upload skills with adversarial descriptions and verify the +generated SKILL.md round-trips through the same YAML parser without +yielding extra keys. +""" +from __future__ import annotations + +import io +import json +import os +import urllib.error +import urllib.request +import uuid +import zipfile + +import pytest +import yaml + +BASE = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + + +def _post(path, body=None, headers=None): + h = {"Content-Type": "application/json"} if body is not None else {} + if headers: + h.update(headers) + req = urllib.request.Request( + f"{BASE}{path}", + method="POST", + data=(json.dumps(body).encode() if body is not None else None), + headers=h, + ) + try: + with urllib.request.urlopen(req) as r: + txt = r.read().decode() + return r.status, (json.loads(txt) if txt else None) + except urllib.error.HTTPError as e: + txt = e.read().decode() + return e.code, (json.loads(txt) if txt else None) + + +def _get_bytes(path, headers=None): + req = urllib.request.Request(f"{BASE}{path}", headers=headers or {}) + try: + with urllib.request.urlopen(req) as r: + return r.status, r.read() + except urllib.error.HTTPError as e: + return e.code, e.read() + + +@pytest.fixture +def bearer_and_skill(): + """Pair an extension and create a skill with the given description. + Returns (extension_token, skill_id, version_id).""" + def _make(description: str): + # Pair + s, pair = _post("/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "bundle test"}) + if s != 201: + pytest.skip(f"pair endpoint returned {s}") + _post("/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": pair["pairing_code"]}) + from urllib.request import Request, urlopen + with urlopen(Request( + f"{BASE}/v1/integrations/claude-ai/extension/pair/status?pairing_token={pair['pairing_token']}" + )) as r: + redeemed = json.loads(r.read().decode()) + token = redeemed["extension_token"] + + # Create a skill with the adversarial description. + slug = f"esc-{uuid.uuid4().hex[:6]}" + s, body = _post( + "/v1/skills", + body={ + "name": slug, "slug": slug, + "description": description, + "content_md": "# Test\n\nsome body.", + "collections": [f"esc-bucket-{uuid.uuid4().hex[:8]}"], + }, + ) + assert s == 201, f"skill create: {s} {body}" + skill_id = body["id"] + + # Fetch the upload op to get the version_id. + from urllib.request import Request, urlopen + with urlopen(Request( + f"{BASE}/v1/integrations/claude-ai/extension/operations", + headers={"Authorization": f"Bearer {token}"}, + )) as r: + ops = json.loads(r.read().decode()) + ours = [op for op in ops if op["payload"].get("name") == slug][0] + return token, skill_id, ours["payload"]["version_id"] + return _make + + +def _parse_frontmatter(skill_md: str) -> dict: + """Mimic the upstream parser claude.ai uses on uploaded skills.""" + import re + m = re.match(r"^---\n(.*?)\n---\n", skill_md, re.DOTALL) + assert m, f"missing frontmatter:\n{skill_md[:200]}" + return yaml.safe_load(m.group(1)) or {} + + +class TestBundleYAMLEscaping: + def test_description_with_newlines(self, bearer_and_skill): + token, skill_id, version_id = bearer_and_skill( + "Line 1\nLine 2\nLine 3" + ) + s, raw = _get_bytes( + f"/v1/integrations/claude-ai/extension/skill-bundle" + f"?skill_id={skill_id}&version_id={version_id}", + headers={"Authorization": f"Bearer {token}"}, + ) + assert s == 200 + + with zipfile.ZipFile(io.BytesIO(raw)) as zf: + skill_md_path = next(n for n in zf.namelist() if n.endswith("SKILL.md")) + skill_md = zf.read(skill_md_path).decode("utf-8") + + fm = _parse_frontmatter(skill_md) + # The full description is preserved (yaml multiline format). + assert "Line 1" in fm["description"] + assert "Line 2" in fm["description"] + # And no extra keys were smuggled in. + assert set(fm.keys()) == {"name", "description"} + + def test_description_with_yaml_special_chars(self, bearer_and_skill): + # All these would break naive interpolation. + adversarial = 'colons: are special, "quotes" too, and #hashes' + token, skill_id, version_id = bearer_and_skill(adversarial) + s, raw = _get_bytes( + f"/v1/integrations/claude-ai/extension/skill-bundle" + f"?skill_id={skill_id}&version_id={version_id}", + headers={"Authorization": f"Bearer {token}"}, + ) + assert s == 200 + + with zipfile.ZipFile(io.BytesIO(raw)) as zf: + skill_md_path = next(n for n in zf.namelist() if n.endswith("SKILL.md")) + skill_md = zf.read(skill_md_path).decode("utf-8") + + fm = _parse_frontmatter(skill_md) + assert fm["description"] == adversarial + assert set(fm.keys()) == {"name", "description"} + + def test_description_attempting_yaml_injection(self, bearer_and_skill): + """The exact attack: try to inject an extra frontmatter key by + terminating the description and adding a new key. + + Naive code: f'description: {x}' with x='hi\\n---\\nname: hacked' + produces a SKILL.md with TWO --- separators — claude.ai's parser + would read either the first or second block, and we have no + control over which. + + yaml.safe_dump encodes newlines correctly so this becomes + a multi-line string value, not an escape.""" + adversarial = "innocent\n---\nname: hacked-name\n---\n" + token, skill_id, version_id = bearer_and_skill(adversarial) + s, raw = _get_bytes( + f"/v1/integrations/claude-ai/extension/skill-bundle" + f"?skill_id={skill_id}&version_id={version_id}", + headers={"Authorization": f"Bearer {token}"}, + ) + assert s == 200 + + with zipfile.ZipFile(io.BytesIO(raw)) as zf: + skill_md_path = next(n for n in zf.namelist() if n.endswith("SKILL.md")) + skill_md = zf.read(skill_md_path).decode("utf-8") + + # CRITICAL: there must be exactly one --- pair as frontmatter + # delimiters (line == '---', no leading whitespace). The injected + # `---` lines inside the description are indented by yaml.safe_dump + # as part of a block-scalar string, which is correctly NOT + # interpreted as a frontmatter delimiter. + delimiter_lines = [ + line for line in skill_md.splitlines() if line == "---" + ] + assert len(delimiter_lines) == 2, ( + f"YAML injection vulnerability! Expected 2 --- delimiters, got " + f"{len(delimiter_lines)}. SKILL.md:\n{skill_md}" + ) + + fm = _parse_frontmatter(skill_md) + # The injected `name: hacked-name` MUST not appear as a top-level key. + assert fm["name"] != "hacked-name" + assert set(fm.keys()) == {"name", "description"} + + def test_description_with_unicode(self, bearer_and_skill): + # Emoji, RTL marks, zero-width spaces should round-trip. + token, skill_id, version_id = bearer_and_skill( + "Emoji 🌶 and Arabic مرحبا plus ZWS​here" + ) + s, raw = _get_bytes( + f"/v1/integrations/claude-ai/extension/skill-bundle" + f"?skill_id={skill_id}&version_id={version_id}", + headers={"Authorization": f"Bearer {token}"}, + ) + assert s == 200 + + with zipfile.ZipFile(io.BytesIO(raw)) as zf: + skill_md_path = next(n for n in zf.namelist() if n.endswith("SKILL.md")) + skill_md = zf.read(skill_md_path).decode("utf-8") + + fm = _parse_frontmatter(skill_md) + assert "🌶" in fm["description"] + assert "مرحبا" in fm["description"] diff --git a/backend/tests/integration/test_claude_ai_conflict_preview.py b/backend/tests/integration/test_claude_ai_conflict_preview.py new file mode 100644 index 00000000..56df5b8d --- /dev/null +++ b/backend/tests/integration/test_claude_ai_conflict_preview.py @@ -0,0 +1,89 @@ +"""Iter 20 — GET /conflicts/{link_id}/preview. + +Side-by-side conflict preview. The endpoint returns: + - last_pushed_* (the version we last successfully sent to claude.ai) + - current_* (the SkillNote-side latest) + - local_changed flag — True iff the local content diverged from + what was last pushed (i.e. "Keep claude.ai" would overwrite real + local edits) + - claude.ai-side metadata (we never store the remote content here) + +Contract: + - Unknown link_id returns 404. + - When the link has no skillnote_skill_id (inbound-only), the + skillnote fields are all null but the endpoint still succeeds. + - local_changed=False when current_version_id == last_pushed_version_id. +""" +from __future__ import annotations + +import json +import os +import urllib.error +import urllib.request +import uuid + +import pytest + + +BASE = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + + +def _get(path): + req = urllib.request.Request(f"{BASE}{path}", method="GET") + try: + with urllib.request.urlopen(req) as r: + return r.status, json.loads(r.read().decode()) + except urllib.error.HTTPError as e: + return e.code, json.loads(e.read().decode()) + except Exception as e: # pragma: no cover + pytest.skip(f"API not reachable: {e}") + + +class TestConflictPreview: + def test_unknown_link_returns_404(self): + s, body = _get( + f"/v1/integrations/claude-ai/conflicts/{uuid.uuid4()}/preview" + ) + assert s == 404 + assert body["error"]["code"] == "LINK_NOT_FOUND" + + def test_malformed_uuid_returns_422(self): + s, _ = _get( + "/v1/integrations/claude-ai/conflicts/not-a-uuid/preview" + ) + assert s == 422 + + def test_returns_full_shape_when_link_exists(self): + # The conflict list endpoint returns any current links — pick the + # first one if it exists, otherwise skip (no data to test against). + s, conflicts = _get("/v1/integrations/claude-ai/conflicts") + assert s == 200 + if not conflicts: + pytest.skip("no conflicts in fixture data to preview") + link_id = conflicts[0]["link_id"] + s, body = _get( + f"/v1/integrations/claude-ai/conflicts/{link_id}/preview" + ) + assert s == 200, body + # All required fields are present, with correct types. + for k in [ + "link_id", + "integration_id", + "integration_label", + "skill_id", + "skill_slug", + "skill_name", + "last_pushed_version_id", + "last_pushed_version_number", + "last_pushed_content_md", + "current_version_id", + "current_version_number", + "current_content_md", + "local_changed", + "claude_ai_skill_id", + "claude_ai_version", + "claude_ai_last_seen_at", + ]: + assert k in body, f"missing key {k}" + assert isinstance(body["local_changed"], bool) + assert body["link_id"] == link_id diff --git a/backend/tests/integration/test_claude_ai_conflicts_flow.py b/backend/tests/integration/test_claude_ai_conflicts_flow.py new file mode 100644 index 00000000..9df5132f --- /dev/null +++ b/backend/tests/integration/test_claude_ai_conflicts_flow.py @@ -0,0 +1,288 @@ +"""Conflict resolution + bundle fetch + telemetry endpoint coverage. + +These tests exercise the Phase 4 conflict flow end-to-end: create a +diverged link, list it, resolve via each of the three resolutions, and +verify the right follow-up op is enqueued (or none, for `skip`). +""" +from __future__ import annotations + +import os +import uuid + +import pytest + + +def _bearer(token: str): + import json + import urllib.error + import urllib.request + base = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + + def _req(method, path, body=None): + h = {"Authorization": f"Bearer {token}"} + if body is not None: + h["Content-Type"] = "application/json" + req = urllib.request.Request( + f"{base}{path}", method=method, headers=h, + data=(json.dumps(body).encode() if body is not None else None), + ) + try: + with urllib.request.urlopen(req) as r: + txt = r.read().decode() + return r.status, (json.loads(txt) if txt else None) + except urllib.error.HTTPError as e: + txt = e.read().decode() + return e.code, (json.loads(txt) if txt else None) + return _req + + +@pytest.fixture +def paired_extension(api_request): + status, pair = api_request( + "POST", "/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "conflict test"}, + ) + if status != 201: + pytest.skip(f"pair endpoint returned {status}") + api_request( + "POST", "/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": pair["pairing_code"]}, + ) + _, body = api_request( + "GET", + f"/v1/integrations/claude-ai/extension/pair/status?pairing_token={pair['pairing_token']}", + ) + return pair["integration_id"], body["extension_token"] + + +@pytest.fixture +def linked_skill(api_request, paired_extension): + """Create a skill, push it, complete the op to materialize a link. + Returns (skill_id, claude_ai_skill_id, integration_id, token).""" + integ_id, token = paired_extension + slug = f"conflict-{uuid.uuid4().hex[:6]}" + status, body = api_request( + "POST", "/v1/skills", + body={ + "name": slug, "slug": slug, + "description": "conflict resolution test", + "content_md": "# x", + "collections": [f"ca-conflict-{slug[:18]}"], + }, + ) + assert status == 201, f"skill create failed: {status} {body}" + skill_id = body["id"] + + bearer = _bearer(token) + _, ops = bearer("GET", "/v1/integrations/claude-ai/extension/operations") + ours = [op for op in ops if op["payload"].get("name") == slug][0] + claude_ai_skill_id = f"skill_conflict_{uuid.uuid4().hex[:6]}" + bearer( + "POST", + f"/v1/integrations/claude-ai/extension/operations/{ours['id']}/complete", + body={ + "success": True, + "result": {"claude_ai_skill_id": claude_ai_skill_id, "claude_ai_version": "v1"}, + }, + ) + return skill_id, claude_ai_skill_id, integ_id, token + + +class TestBundleFetch: + """The extension fetches a ZIP for each upload op.""" + + def test_bundle_endpoint_returns_zip(self, api_request, paired_extension): + integ_id, token = paired_extension + # Create a skill so we have a version to fetch. + slug = f"bundle-{uuid.uuid4().hex[:6]}" + status, body = api_request( + "POST", "/v1/skills", + body={ + "name": slug, "slug": slug, + "description": "bundle test", + "content_md": "# Content", + "collections": [f"ca-conflict-{slug[:18]}"], + }, + ) + assert status == 201 + skill_id = body["id"] + + bearer = _bearer(token) + _, ops = bearer("GET", "/v1/integrations/claude-ai/extension/operations") + ours = [op for op in ops if op["payload"].get("name") == slug][0] + version_id = ours["payload"]["version_id"] + + import io + import urllib.request + import zipfile + base = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + req = urllib.request.Request( + f"{base}/v1/integrations/claude-ai/extension/skill-bundle" + f"?skill_id={skill_id}&version_id={version_id}", + headers={"Authorization": f"Bearer {token}"}, + ) + with urllib.request.urlopen(req) as r: + assert r.headers["Content-Type"] == "application/zip" + data = r.read() + # Verify it's a valid ZIP with SKILL.md inside. + zf = zipfile.ZipFile(io.BytesIO(data)) + names = zf.namelist() + assert any(n.endswith("SKILL.md") for n in names), f"no SKILL.md in {names}" + + def test_bundle_requires_bearer(self, api_request): + import urllib.error + import urllib.request + base = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + req = urllib.request.Request( + f"{base}/v1/integrations/claude-ai/extension/skill-bundle" + f"?skill_id={uuid.uuid4()}&version_id={uuid.uuid4()}", + ) + try: + urllib.request.urlopen(req) + pytest.fail("expected 401") + except urllib.error.HTTPError as e: + assert e.code == 401 + + def test_bundle_404_for_unknown_version(self, api_request, paired_extension): + _, token = paired_extension + import urllib.error + import urllib.request + base = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + req = urllib.request.Request( + f"{base}/v1/integrations/claude-ai/extension/skill-bundle" + f"?skill_id={uuid.uuid4()}&version_id={uuid.uuid4()}", + headers={"Authorization": f"Bearer {token}"}, + ) + try: + urllib.request.urlopen(req) + pytest.fail("expected 404") + except urllib.error.HTTPError as e: + assert e.code == 404 + + +class TestConflictListing: + def test_diverged_link_appears_in_conflict_list(self, api_request, db_session, linked_skill): + """Phase 4: when a link is marked diverged, it shows up in the + conflicts endpoint with full metadata for the resolution UI.""" + skill_id, claude_ai_skill_id, integ_id, _ = linked_skill + + # Manually mark the link diverged (Phase 1 doesn't yet auto-detect). + from app.db.models.claude_ai import ClaudeAISkillLink + from sqlalchemy import select + link = db_session.execute( + select(ClaudeAISkillLink).where( + ClaudeAISkillLink.claude_ai_skill_id == claude_ai_skill_id + ) + ).scalar_one() + link.conflict_state = "diverged" + db_session.commit() + + status, body = api_request("GET", "/v1/integrations/claude-ai/conflicts") + assert status == 200 + ours = [c for c in body if c["claude_ai_skill_id"] == claude_ai_skill_id] + assert len(ours) == 1 + assert ours[0]["skillnote_skill_name"] is not None + assert ours[0]["skillnote_skill_slug"] is not None + + +class TestConflictResolve: + def _make_diverged(self, db_session, linked_skill): + from app.db.models.claude_ai import ClaudeAISkillLink + from sqlalchemy import select + skill_id, claude_ai_skill_id, integ_id, token = linked_skill + link = db_session.execute( + select(ClaudeAISkillLink).where( + ClaudeAISkillLink.claude_ai_skill_id == claude_ai_skill_id + ) + ).scalar_one() + link.conflict_state = "diverged" + db_session.commit() + return link.id, skill_id, claude_ai_skill_id, integ_id, token + + def test_skip_clears_conflict(self, api_request, db_session, linked_skill): + link_id, _, claude_ai_skill_id, _, _ = self._make_diverged(db_session, linked_skill) + status, _ = api_request( + "POST", f"/v1/integrations/claude-ai/conflicts/{link_id}/resolve", + body={"resolution": "skip"}, + ) + assert status == 204 + + # Link should no longer appear in conflicts list. + _, body = api_request("GET", "/v1/integrations/claude-ai/conflicts") + assert not any(c["link_id"] == str(link_id) for c in body) + + def test_keep_skillnote_enqueues_upload(self, api_request, db_session, linked_skill): + link_id, skill_id, _, integ_id, token = self._make_diverged(db_session, linked_skill) + # Drain any existing pending ops first so we can detect the new one. + bearer = _bearer(token) + bearer("GET", "/v1/integrations/claude-ai/extension/operations") + + status, _ = api_request( + "POST", f"/v1/integrations/claude-ai/conflicts/{link_id}/resolve", + body={"resolution": "keep_skillnote"}, + ) + assert status == 204 + # A new upload op should now be in the queue. + _, ops = bearer("GET", "/v1/integrations/claude-ai/extension/operations") + ours = [op for op in ops if op["skill_id"] == skill_id and op["kind"] == "upload"] + assert len(ours) == 1 + + def test_keep_claude_ai_enqueues_fetch_one(self, api_request, db_session, linked_skill): + link_id, skill_id, claude_ai_skill_id, integ_id, token = self._make_diverged( + db_session, linked_skill + ) + bearer = _bearer(token) + bearer("GET", "/v1/integrations/claude-ai/extension/operations") # drain + status, _ = api_request( + "POST", f"/v1/integrations/claude-ai/conflicts/{link_id}/resolve", + body={"resolution": "keep_claude_ai"}, + ) + assert status == 204 + _, ops = bearer("GET", "/v1/integrations/claude-ai/extension/operations") + ours = [op for op in ops if op["kind"] == "fetch_one"] + assert len(ours) == 1 + assert ours[0]["payload"]["claude_ai_skill_id"] == claude_ai_skill_id + + def test_resolve_already_resolved_returns_409(self, api_request, db_session, linked_skill): + link_id, _, _, _, _ = self._make_diverged(db_session, linked_skill) + api_request( + "POST", f"/v1/integrations/claude-ai/conflicts/{link_id}/resolve", + body={"resolution": "skip"}, + ) + status, body = api_request( + "POST", f"/v1/integrations/claude-ai/conflicts/{link_id}/resolve", + body={"resolution": "skip"}, + ) + assert status == 409 + assert body["error"]["code"] == "LINK_NOT_IN_CONFLICT" + + def test_invalid_resolution_422(self, api_request, db_session, linked_skill): + link_id, _, _, _, _ = self._make_diverged(db_session, linked_skill) + status, _ = api_request( + "POST", f"/v1/integrations/claude-ai/conflicts/{link_id}/resolve", + body={"resolution": "merge"}, + ) + assert status == 422 + + +class TestTelemetryEndpoint: + def test_telemetry_accepts_bearer(self, paired_extension): + _, token = paired_extension + bearer = _bearer(token) + status, _ = bearer( + "POST", "/v1/integrations/claude-ai/extension/telemetry", + body={ + "category": "test_event", + "ext_version": "0.1.0-test", + "ts": "2026-05-24T12:00:00Z", + "detail": {"path": "/api/x"}, + }, + ) + assert status == 204 + + def test_telemetry_rejects_unauthed(self, api_request): + status, _ = api_request( + "POST", "/v1/integrations/claude-ai/extension/telemetry", + body={"category": "x"}, + ) + assert status == 401 diff --git a/backend/tests/integration/test_claude_ai_constraints.py b/backend/tests/integration/test_claude_ai_constraints.py new file mode 100644 index 00000000..79c6f130 --- /dev/null +++ b/backend/tests/integration/test_claude_ai_constraints.py @@ -0,0 +1,222 @@ +"""Schema-level integration tests for the claude.ai connector tables. + +Validates the CHECK constraints, unique indexes, and cascade behavior +declared in migration 0019_claude_ai_integration.py. Catches the kind of +regression where a refactor accidentally drops a constraint and lets +junk data into the production DB. +""" +from __future__ import annotations + +import uuid + +import pytest +from sqlalchemy import select, text +from sqlalchemy.exc import IntegrityError + +from app.db.models.claude_ai import ( + ClaudeAIIntegration, + ClaudeAISkillLink, + ClaudeAISyncOperation, +) + + +class TestCheckConstraints: + def test_invalid_status_rejected(self, db_session): + bad = ClaudeAIIntegration( + status="bogus", + scope="both", + conflict_policy="ask", + ) + db_session.add(bad) + with pytest.raises(IntegrityError, match="ck_claude_ai_integrations_status"): + db_session.commit() + db_session.rollback() + + def test_invalid_scope_rejected(self, db_session): + bad = ClaudeAIIntegration( + status="active", + scope="all-the-things", + conflict_policy="ask", + ) + db_session.add(bad) + with pytest.raises(IntegrityError, match="ck_claude_ai_integrations_scope"): + db_session.commit() + db_session.rollback() + + def test_invalid_conflict_policy_rejected(self, db_session): + bad = ClaudeAIIntegration( + status="active", + scope="both", + conflict_policy="coin-flip", + ) + db_session.add(bad) + with pytest.raises(IntegrityError, match="ck_claude_ai_integrations_conflict_policy"): + db_session.commit() + db_session.rollback() + + def test_invalid_op_kind_rejected(self, db_session): + integ = ClaudeAIIntegration(status="active", scope="both", conflict_policy="ask") + db_session.add(integ) + db_session.flush() + bad = ClaudeAISyncOperation( + integration_id=integ.id, + kind="not-a-real-kind", + ) + db_session.add(bad) + with pytest.raises(IntegrityError, match="ck_claude_ai_sync_operations_kind"): + db_session.commit() + db_session.rollback() + + def test_invalid_op_status_rejected(self, db_session): + integ = ClaudeAIIntegration(status="active", scope="both", conflict_policy="ask") + db_session.add(integ) + db_session.flush() + bad = ClaudeAISyncOperation( + integration_id=integ.id, + kind="list", + status="halfway", + ) + db_session.add(bad) + with pytest.raises(IntegrityError): + db_session.commit() + db_session.rollback() + + +class TestUniqueConstraints: + def test_extension_token_hash_unique(self, db_session): + """Two integrations cannot share a token hash. This is what makes + the bearer lookup safe — exactly one row matches a given hash. + Partial index: only enforced when extension_token_hash IS NOT NULL. + """ + shared = "a" * 64 + a = ClaudeAIIntegration( + status="active", scope="both", conflict_policy="ask", + extension_token_hash=shared, + ) + b = ClaudeAIIntegration( + status="active", scope="both", conflict_policy="ask", + extension_token_hash=shared, + ) + db_session.add_all([a, b]) + with pytest.raises(IntegrityError): + db_session.commit() + db_session.rollback() + + def test_null_token_hashes_allowed_to_coexist(self, db_session): + """Multiple rows with NULL token hashes are fine (the index is partial).""" + a = ClaudeAIIntegration(status="pending_approval", scope="both", conflict_policy="ask") + b = ClaudeAIIntegration(status="pending_approval", scope="both", conflict_policy="ask") + db_session.add_all([a, b]) + db_session.commit() # should not raise + + def test_skill_link_uniqueness(self, db_session): + """A given claude.ai skill ID can only be linked once per integration.""" + integ = ClaudeAIIntegration(status="active", scope="both", conflict_policy="ask") + db_session.add(integ) + db_session.flush() + a = ClaudeAISkillLink( + integration_id=integ.id, claude_ai_skill_id="skill_dup", + ) + b = ClaudeAISkillLink( + integration_id=integ.id, claude_ai_skill_id="skill_dup", + ) + db_session.add_all([a, b]) + with pytest.raises(IntegrityError, match="uq_claude_ai_skill_links"): + db_session.commit() + db_session.rollback() + + +class TestCascadeBehavior: + def test_delete_integration_cascades_links(self, db_session): + integ = ClaudeAIIntegration(status="active", scope="both", conflict_policy="ask") + db_session.add(integ) + db_session.flush() + link = ClaudeAISkillLink( + integration_id=integ.id, claude_ai_skill_id="skill_for_cascade", + ) + db_session.add(link) + op = ClaudeAISyncOperation(integration_id=integ.id, kind="list") + db_session.add(op) + db_session.commit() + + link_id = link.id + op_id = op.id + + db_session.delete(integ) + db_session.commit() + + # Both link + op should be gone. + remaining_link = db_session.execute( + select(ClaudeAISkillLink.id).where(ClaudeAISkillLink.id == link_id) + ).first() + remaining_op = db_session.execute( + select(ClaudeAISyncOperation.id).where(ClaudeAISyncOperation.id == op_id) + ).first() + assert remaining_link is None, "link should cascade-delete with integration" + assert remaining_op is None, "op should cascade-delete with integration" + + def test_delete_skill_cascades_links(self, db_session): + """When a SkillNote skill is deleted, its claude_ai links die too. + This is what makes the delete-op enqueue race-safe — we read the + link's claude_ai_skill_id BEFORE the cascade fires, then enqueue + the op so the extension can clean up claude.ai's side. + """ + from app.db.models import Skill + + skill = Skill( + id=uuid.uuid4(), + name=f"cascade-{uuid.uuid4().hex[:6]}", + slug=f"cascade-{uuid.uuid4().hex[:6]}", + description="cascade test", + content_md="", + current_version=0, + ) + db_session.add(skill) + integ = ClaudeAIIntegration(status="active", scope="both", conflict_policy="ask") + db_session.add(integ) + db_session.flush() + link = ClaudeAISkillLink( + integration_id=integ.id, + skillnote_skill_id=skill.id, + claude_ai_skill_id="skill_for_skill_cascade", + ) + db_session.add(link) + db_session.commit() + link_id = link.id + + db_session.delete(skill) + db_session.commit() + + remaining = db_session.execute( + select(ClaudeAISkillLink.id).where(ClaudeAISkillLink.id == link_id) + ).first() + assert remaining is None + + +class TestDefaults: + def test_scope_defaults_to_both(self, db_session): + # Use raw SQL to avoid SQLAlchemy populating defaults from the model; + # we want to verify the DB-side server_default is the canonical source. + result = db_session.execute( + text( + "INSERT INTO claude_ai_integrations (status, conflict_policy) " + "VALUES ('active', 'ask') RETURNING scope" + ) + ).first() + assert result[0] == "both" + db_session.rollback() + + def test_op_status_defaults_to_pending(self, db_session): + integ = ClaudeAIIntegration(status="active", scope="both", conflict_policy="ask") + db_session.add(integ) + db_session.flush() + result = db_session.execute( + text( + "INSERT INTO claude_ai_sync_operations (integration_id, kind) " + "VALUES (:i, 'list') RETURNING status, attempts" + ), + {"i": integ.id}, + ).first() + assert result[0] == "pending" + assert result[1] == 0 + db_session.rollback() diff --git a/backend/tests/integration/test_claude_ai_cookie_expired.py b/backend/tests/integration/test_claude_ai_cookie_expired.py new file mode 100644 index 00000000..d042f2e7 --- /dev/null +++ b/backend/tests/integration/test_claude_ai_cookie_expired.py @@ -0,0 +1,200 @@ +"""Round 12 — cookie_expired flip + audit event. + +Before: extension's only auth-failure signal was the generic `error` string +on a complete_operation call. Backend couldn't distinguish "claude.ai 500" +from "claude.ai 401 / session gone," so it never flipped the integration to +`cookie_expired` and never wrote a matching audit row. UI saw a parade of +generic op_failed events with no remediation hint. + +After: `auth_expired: true` on the complete payload (a) flips +integration.status to `cookie_expired` and (b) emits a `cookie_expired` +audit event. Tests below verify both effects and the validator change +(`cookie_expired` is now in _VALID_AUDIT_EVENTS). +""" +from __future__ import annotations + +import json +import os +import random +import urllib.error +import urllib.request +import uuid + +import pytest + + +BASE = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + + +def _unique_ip() -> str: + return f"192.0.2.{random.randint(1, 254)}" + + +def _post(path, body=None, headers=None): + h = {"Content-Type": "application/json"} if body is not None else {} + if headers: + h.update(headers) + req = urllib.request.Request( + f"{BASE}{path}", method="POST", + data=(json.dumps(body).encode() if body is not None else None), + headers=h, + ) + try: + with urllib.request.urlopen(req) as r: + txt = r.read().decode() + return r.status, (json.loads(txt) if txt else None) + except urllib.error.HTTPError as e: + txt = e.read().decode() + return e.code, (json.loads(txt) if txt else None) + except Exception as e: # pragma: no cover + pytest.skip(f"API not reachable: {e}") + + +def _get(path, headers=None): + req = urllib.request.Request(f"{BASE}{path}", method="GET", headers=headers or {}) + try: + with urllib.request.urlopen(req) as r: + txt = r.read().decode() + return r.status, (json.loads(txt) if txt else None) + except urllib.error.HTTPError as e: + txt = e.read().decode() + return e.code, (json.loads(txt) if txt else None) + except Exception as e: # pragma: no cover + pytest.skip(f"API not reachable: {e}") + + +@pytest.fixture +def paired_with_pending_op(): + """Pair an extension AND seed a skill so an upload op is queued. + + Right after `pair → approve → status`, the operations queue is empty. + The cookie_expired tests need at least one pending op to complete. + Creating a skill via POST /v1/skills auto-enqueues an upload op for + every active integration (see enqueue_skill_upload in + services/claude_ai_sync.py).""" + ip = _unique_ip() + s, pair = _post( + "/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "cookie-expired-test"}, + headers={"X-Forwarded-For": ip}, + ) + if s != 201: + pytest.skip(f"pair endpoint returned {s}") + _post( + "/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": pair["pairing_code"]}, + ) + _, body = _get( + f"/v1/integrations/claude-ai/extension/pair/status" + f"?pairing_token={pair['pairing_token']}" + ) + assert body["approved"] + + # Seed a skill — auto-enqueues an `upload` sync op against the + # integration. Unique collection slug avoids the 15-skill collection + # limit that bit us in earlier rounds. + name = f"cookie-test-{uuid.uuid4().hex[:6]}" + collection = f"cookie-{uuid.uuid4().hex[:10]}" + s, _ = _post( + "/v1/skills", + body={ + "name": name, + "slug": name, + "description": "cookie-expired fixture seed", + "content_md": "# seed\n", + "collections": [collection], + }, + ) + if s != 201: + pytest.skip(f"could not seed skill (status {s})") + + return pair["integration_id"], body["extension_token"] + + +class TestCookieExpiredFlip: + def test_auth_expired_true_flips_integration_status(self, paired_with_pending_op): + integ_id, token = paired_with_pending_op + # Queue an op by toggling sync (a simpler way: just look at any + # op pulled by the extension). For the cookie_expired path we just + # need an op to complete with auth_expired=true. We'll trigger + # `list` reverse sync by hitting the operations endpoint and + # synthesizing a completion below. + # Approach: directly use the extension's complete endpoint on a + # bogus op id — it 404s. So instead, we pull pending ops first. + s, ops = _get( + "/v1/integrations/claude-ai/extension/operations", + headers={"Authorization": f"Bearer {token}"}, + ) + if s != 200 or len(ops) == 0: + pytest.skip("no pending ops — would need to create a skill first") + op = ops[0] + s, _ = _post( + f"/v1/integrations/claude-ai/extension/operations/{op['id']}/complete", + body={"success": False, "error": "claude.ai 401", "auth_expired": True}, + headers={"Authorization": f"Bearer {token}"}, + ) + assert s == 204 + # Now read the integrations list and confirm status flipped. + s, rows = _get("/v1/integrations/claude-ai/integrations") + row = next((r for r in rows if r["id"] == integ_id), None) + assert row is not None + assert row["status"] == "cookie_expired", row + + def test_cookie_expired_audit_event_is_written(self, paired_with_pending_op): + integ_id, token = paired_with_pending_op + s, ops = _get( + "/v1/integrations/claude-ai/extension/operations", + headers={"Authorization": f"Bearer {token}"}, + ) + if s != 200 or len(ops) == 0: + pytest.skip("no pending ops") + op = ops[0] + _post( + f"/v1/integrations/claude-ai/extension/operations/{op['id']}/complete", + body={"success": False, "error": "claude.ai 401", "auth_expired": True}, + headers={"Authorization": f"Bearer {token}"}, + ) + s, events = _get( + f"/v1/integrations/claude-ai/activity?integration_id={integ_id}&event=cookie_expired" + ) + assert s == 200 + assert len(events) >= 1 + # The event detail should include the op_kind that hit the auth + # error — that's how the activity feed can render a useful row. + assert events[0]["event"] == "cookie_expired" + assert "op_kind" in events[0]["detail"] + + +class TestAuthExpiredDefault: + def test_auth_expired_defaults_to_false(self, paired_with_pending_op): + """A vanilla op_failed (without auth_expired) must NOT flip status + to cookie_expired. Only an explicit auth_expired=true does.""" + integ_id, token = paired_with_pending_op + s, ops = _get( + "/v1/integrations/claude-ai/extension/operations", + headers={"Authorization": f"Bearer {token}"}, + ) + if s != 200 or len(ops) == 0: + pytest.skip("no pending ops") + op = ops[0] + # 3 failures to exhaust retry budget — finalizes as op_failed, + # not cookie_expired. + for _ in range(3): + _post( + f"/v1/integrations/claude-ai/extension/operations/{op['id']}/complete", + body={"success": False, "error": "claude.ai 500"}, + headers={"Authorization": f"Bearer {token}"}, + ) + s, rows = _get("/v1/integrations/claude-ai/integrations") + row = next((r for r in rows if r["id"] == integ_id), None) + assert row is not None + assert row["status"] != "cookie_expired" + + +class TestActivityFilter: + def test_cookie_expired_is_a_valid_event_filter(self, api_request): + s, body = api_request( + "GET", "/v1/integrations/claude-ai/activity?event=cookie_expired" + ) + assert s == 200, body + assert isinstance(body, list) diff --git a/backend/tests/integration/test_claude_ai_diagnostic.py b/backend/tests/integration/test_claude_ai_diagnostic.py new file mode 100644 index 00000000..108acc91 --- /dev/null +++ b/backend/tests/integration/test_claude_ai_diagnostic.py @@ -0,0 +1,110 @@ +"""Iter 21 — GET /v1/integrations/claude-ai/diagnostic. + +One-click connector health audit. Bundles 8 checks into a single +pass/warn/fail verdict. + +Contract: + - Always returns 200 (failures live INSIDE the response, not as HTTP + errors). Operators want one structured payload to scrape, not + branching on status codes. + - Each check has {id, label, status: pass|warn|fail, detail}. + - overall = fail > warn > pass precedence. + - generated_at is a real timestamp. + - The check `id`s are stable string keys (used by ops as dashboard + selectors), so renaming one is a contract break. +""" +from __future__ import annotations + +import json +import os +import urllib.error +import urllib.request + +import pytest + + +BASE = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + +EXPECTED_CHECK_IDS = { + "backend_db", + "schema_migrated", + "integrations_paired", + "no_cookie_expired", + "no_stuck_in_progress", + "conflicts_low", + "pair_attempts_quiet", + # `sync_recent` is conditional — only included when at least one + # integration is paired. +} + + +def _get(path): + req = urllib.request.Request(f"{BASE}{path}", method="GET") + try: + with urllib.request.urlopen(req) as r: + return r.status, json.loads(r.read().decode()) + except urllib.error.HTTPError as e: + return e.code, json.loads(e.read().decode()) + except Exception as e: # pragma: no cover + pytest.skip(f"API not reachable: {e}") + + +class TestDiagnostic: + def test_endpoint_returns_200(self): + s, body = _get("/v1/integrations/claude-ai/diagnostic") + assert s == 200, body + + def test_response_shape(self): + s, body = _get("/v1/integrations/claude-ai/diagnostic") + assert s == 200 + assert "overall" in body + assert body["overall"] in ("pass", "warn", "fail") + assert "checks" in body + assert isinstance(body["checks"], list) + assert "generated_at" in body + # Every check carries all 4 fields with correct types. + for c in body["checks"]: + assert set(c.keys()) >= {"id", "label", "status", "detail"} + assert c["status"] in ("pass", "warn", "fail") + assert isinstance(c["label"], str) + assert isinstance(c["detail"], str) + + def test_includes_required_check_ids(self): + s, body = _get("/v1/integrations/claude-ai/diagnostic") + ids = {c["id"] for c in body["checks"]} + # Mandatory subset always present regardless of integration state. + missing = EXPECTED_CHECK_IDS - ids + assert missing == set(), f"missing required check ids: {missing}" + + def test_check_ids_are_unique(self): + s, body = _get("/v1/integrations/claude-ai/diagnostic") + ids = [c["id"] for c in body["checks"]] + assert len(ids) == len(set(ids)), f"duplicate ids in {ids}" + + def test_backend_db_check_passes(self): + # The diagnostic ITSELF can't run unless the DB is reachable, so + # this check should always pass when we get a 200. + s, body = _get("/v1/integrations/claude-ai/diagnostic") + backend_db = next(c for c in body["checks"] if c["id"] == "backend_db") + assert backend_db["status"] == "pass" + + def test_overall_dominated_by_worst_status(self): + s, body = _get("/v1/integrations/claude-ai/diagnostic") + statuses = {c["status"] for c in body["checks"]} + if "fail" in statuses: + assert body["overall"] == "fail" + elif "warn" in statuses: + assert body["overall"] == "warn" + else: + assert body["overall"] == "pass" + + def test_generated_at_is_a_recent_timestamp(self): + from datetime import datetime, timezone + + s, body = _get("/v1/integrations/claude-ai/diagnostic") + ts = datetime.fromisoformat(body["generated_at"].replace("Z", "+00:00")) + now = datetime.now(timezone.utc) + delta = abs((now - ts).total_seconds()) + # The diagnostic ran milliseconds ago; allow a generous 60s + # clock-skew window for slow CI runners. + assert delta < 60, f"generated_at off by {delta}s" diff --git a/backend/tests/integration/test_claude_ai_e2e_flow.py b/backend/tests/integration/test_claude_ai_e2e_flow.py new file mode 100644 index 00000000..2f808360 --- /dev/null +++ b/backend/tests/integration/test_claude_ai_e2e_flow.py @@ -0,0 +1,236 @@ +"""End-to-end flow test for the claude.ai connector. + +Walks the complete happy path: + + 1. Pair an extension and approve it (active bearer issued). + 2. Publish a SkillNote skill via existing publish flow. + 3. Verify an upload op was enqueued by Phase 1b's _create_content_version hook. + 4. Extension fetches the op (status flips to in_progress). + 5. Extension completes it successfully — verify link row upserted. + 6. Delete the skill — verify delete op is enqueued. + +This is the "does the whole thing actually work end-to-end" test. If any +of the per-component tests pass but this one fails, the connector is +broken at a stitching point. +""" +from __future__ import annotations + +import io +import os +import uuid +import zipfile + +import pytest + + +def _publish_skill(api_request, slug: str) -> dict: + """Create a SkillNote skill via POST /v1/skills. + + This is the path that calls `_create_content_version` (and therefore + triggers the claude.ai upload-op enqueue hook). The /v1/publish + endpoint is for bundle release versions, which is a different code + path that doesn't go through the content-version hook. + """ + # /v1/skills requires at least one collection AND collections have a + # 15-skill cap. Use a slug-derived collection so each test gets its + # own bucket — avoids cross-test interference when this test runs + # against a shared/persistent DB. + collection_name = f"ca-test-{slug[:24]}" + status, body = api_request( + "POST", "/v1/skills", + body={ + "name": slug, + "slug": slug, + "description": "claude-ai e2e flow test skill", + "content_md": "# Test skill\n\nSome content.", + "collections": [collection_name], + }, + ) + if status != 201: + pytest.fail(f"skill create failed: {status} {body}") + return body + + +def _bearer(token): + """Build a bearer-request closure.""" + import json + import os + import urllib.error + import urllib.request + base = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + def _req(method, path, body=None): + h = {"Authorization": f"Bearer {token}"} + if body is not None: + h["Content-Type"] = "application/json" + req = urllib.request.Request( + f"{base}{path}", method=method, headers=h, + data=(json.dumps(body).encode() if body is not None else None), + ) + try: + with urllib.request.urlopen(req) as r: + txt = r.read().decode() + return r.status, (json.loads(txt) if txt else None) + except urllib.error.HTTPError as e: + txt = e.read().decode() + return e.code, (json.loads(txt) if txt else None) + return _req + + +@pytest.fixture +def paired_extension(api_request): + """Standard pair → approve → redeem → return (integration_id, token).""" + status, pair = api_request( + "POST", "/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "e2e test"}, + ) + if status != 201: + pytest.skip(f"pair endpoint returned {status}") + api_request( + "POST", "/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": pair["pairing_code"]}, + ) + _, body = api_request( + "GET", + f"/v1/integrations/claude-ai/extension/pair/status?pairing_token={pair['pairing_token']}", + ) + assert body["extension_token"] + return pair["integration_id"], body["extension_token"] + + +class TestPublishEnqueueFlow: + """Phase 1b: skill publish triggers upload op enqueue.""" + + def test_publish_creates_upload_op(self, api_request, paired_extension): + integ_id, token = paired_extension + # Use a unique slug per test to avoid collisions with other runs. + slug = f"e2e-pub-{uuid.uuid4().hex[:6]}" + _publish_skill(api_request, slug) + + # Fetch ops via bearer. Should include exactly one upload op for our skill. + bearer = _bearer(token) + status, ops = bearer("GET", "/v1/integrations/claude-ai/extension/operations") + assert status == 200 + upload_ops = [op for op in ops if op["kind"] == "upload"] + assert len(upload_ops) >= 1, f"expected upload op enqueued, got {ops}" + + ours = [op for op in upload_ops if op["payload"].get("name") == slug] + assert len(ours) == 1, f"upload op for {slug} not found among {[o['payload'].get('name') for o in upload_ops]}" + op = ours[0] + assert op["skill_id"] + assert op["payload"]["version_id"] + assert op["payload"]["description"] == "claude-ai e2e flow test skill" + + def test_fetch_marks_op_in_progress(self, api_request, paired_extension): + """Calling /operations atomically transitions pending → in_progress + so a second concurrent extension instance won't grab the same op.""" + integ_id, token = paired_extension + slug = f"e2e-fetch-{uuid.uuid4().hex[:6]}" + _publish_skill(api_request, slug) + bearer = _bearer(token) + + # First fetch claims the op. + _, ops = bearer("GET", "/v1/integrations/claude-ai/extension/operations") + ours = [op for op in ops if op["payload"].get("name") == slug] + assert ours + op = ours[0] + + # Second fetch must NOT return the same op (it's now in_progress). + _, ops_again = bearer("GET", "/v1/integrations/claude-ai/extension/operations") + ids_again = [o["id"] for o in ops_again] + assert op["id"] not in ids_again, "op should not appear twice — locking broken" + + +class TestCompleteOpFlow: + def test_complete_success_creates_link(self, api_request, paired_extension): + integ_id, token = paired_extension + slug = f"e2e-complete-{uuid.uuid4().hex[:6]}" + _publish_skill(api_request, slug) + bearer = _bearer(token) + + _, ops = bearer("GET", "/v1/integrations/claude-ai/extension/operations") + ours = [op for op in ops if op["payload"].get("name") == slug][0] + + # Extension reports success with a claude.ai skill ID + version. + status, _ = bearer( + "POST", + f"/v1/integrations/claude-ai/extension/operations/{ours['id']}/complete", + body={ + "success": True, + "result": {"claude_ai_skill_id": "skill_ext_e2e_01", "claude_ai_version": "v1"}, + "claude_ai_org_id": "org_e2e_01", + }, + ) + assert status == 204 + + # known-skill-ids should now include the new claude_ai_skill_id. + _, known = bearer("GET", "/v1/integrations/claude-ai/extension/known-skill-ids") + assert "skill_ext_e2e_01" in known["claude_ai_skill_ids"] + + # Integration's claude_ai_org_id should be cached. + _, integrations = api_request("GET", "/v1/integrations/claude-ai/integrations") + ours_int = [i for i in integrations if i["id"] == integ_id][0] + assert ours_int["claude_ai_org_id"] == "org_e2e_01" + + def test_complete_failure_retries_until_budget_exhausted( + self, api_request, paired_extension + ): + """Failed ops retry up to 3 attempts, then move to 'failed' status. + The retry counter increments at fetch time, so 3 failures means + 3 fetches; the 4th fetch finds nothing pending.""" + integ_id, token = paired_extension + slug = f"e2e-retry-{uuid.uuid4().hex[:6]}" + _publish_skill(api_request, slug) + bearer = _bearer(token) + + for attempt in range(3): + _, ops = bearer("GET", "/v1/integrations/claude-ai/extension/operations") + ours = [op for op in ops if op["payload"].get("name") == slug] + if not ours: + pytest.fail(f"expected op to be available on attempt {attempt + 1}") + bearer( + "POST", + f"/v1/integrations/claude-ai/extension/operations/{ours[0]['id']}/complete", + body={"success": False, "error": f"simulated failure #{attempt + 1}"}, + ) + + # After 3 failures the op should be in 'failed' state; not returned. + _, ops_after = bearer("GET", "/v1/integrations/claude-ai/extension/operations") + assert not [op for op in ops_after if op["payload"].get("name") == slug], \ + "exhausted-retry op should not be re-served" + + +class TestDeleteFlow: + def test_delete_enqueues_delete_op_for_linked_skill(self, api_request, paired_extension): + """Phase 1b: skill delete fans out a delete op for every linked + claude.ai integration.""" + integ_id, token = paired_extension + slug = f"e2e-del-{uuid.uuid4().hex[:6]}" + published = _publish_skill(api_request, slug) + skill_slug = published["slug"] + bearer = _bearer(token) + + # Complete the upload so a link row exists. + _, ops = bearer("GET", "/v1/integrations/claude-ai/extension/operations") + ours = [op for op in ops if op["payload"].get("name") == slug][0] + bearer( + "POST", + f"/v1/integrations/claude-ai/extension/operations/{ours['id']}/complete", + body={ + "success": True, + "result": {"claude_ai_skill_id": "skill_ext_e2e_del", "claude_ai_version": "v1"}, + }, + ) + + # Now delete the skill — triggers the Phase 1b delete hook. + status, _ = api_request("DELETE", f"/v1/skills/{skill_slug}") + assert status == 204 + + # The delete op should be in the bearer's queue, payload references + # the claude.ai skill ID we recorded above. + _, ops_after = bearer("GET", "/v1/integrations/claude-ai/extension/operations") + delete_ops = [ + op for op in ops_after + if op["kind"] == "delete" + and op.get("payload", {}).get("claude_ai_skill_id") == "skill_ext_e2e_del" + ] + assert len(delete_ops) == 1, f"expected delete op for skill_ext_e2e_del, got {ops_after}" diff --git a/backend/tests/integration/test_claude_ai_extension_status.py b/backend/tests/integration/test_claude_ai_extension_status.py new file mode 100644 index 00000000..6b1c1351 --- /dev/null +++ b/backend/tests/integration/test_claude_ai_extension_status.py @@ -0,0 +1,151 @@ +"""Integration tests for GET /v1/integrations/claude-ai/extension/status. + +The endpoint is what the extension popup reads to show "skills synced / +pending / failed" counters. Before this, the popup always rendered 0 — +the counters typed on `ExtensionConfig` were never populated. This suite +verifies the wire contract end to end and catches the obvious regression +modes: cross-integration leakage, anonymous access, counter accuracy. +""" +from __future__ import annotations + +import json +import os +import urllib.error +import urllib.request + +import pytest + + +BASE = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + + +def _unique_ip() -> str: + """TEST-NET-1 IP unique per call — keeps pair rate-limit state from + leaking between this suite and others sharing the same DB.""" + import random + return f"192.0.2.{random.randint(1, 254)}" + + +def _post(path, body=None, headers=None): + h = {"Content-Type": "application/json"} if body is not None else {} + if headers: + h.update(headers) + req = urllib.request.Request( + f"{BASE}{path}", + method="POST", + data=(json.dumps(body).encode() if body is not None else None), + headers=h, + ) + try: + with urllib.request.urlopen(req) as r: + return r.status, (json.loads(r.read().decode()) if r.headers.get("content-type", "").startswith("application/json") else None) + except urllib.error.HTTPError as e: + txt = e.read().decode() + return e.code, (json.loads(txt) if txt else None) + except Exception as e: # pragma: no cover - infra + pytest.skip(f"API not reachable: {e}") + + +def _get(path, headers=None): + req = urllib.request.Request(f"{BASE}{path}", method="GET", headers=headers or {}) + try: + with urllib.request.urlopen(req) as r: + return r.status, (json.loads(r.read().decode()) if r.headers.get("content-type", "").startswith("application/json") else None) + except urllib.error.HTTPError as e: + txt = e.read().decode() + return e.code, (json.loads(txt) if txt else None) + except Exception as e: # pragma: no cover - infra + pytest.skip(f"API not reachable: {e}") + + +def _pair_and_redeem(label="ext-status-test"): + # Each pair call uses a fresh TEST-NET-1 IP so the rate limiter + # never blocks us when running alongside the rest of the suite. + ip = _unique_ip() + s, pair = _post( + "/v1/integrations/claude-ai/extension/pair", + body={"browser_label": label}, + headers={"X-Forwarded-For": ip}, + ) + if s != 201: + pytest.skip(f"pair endpoint returned {s}") + _post("/v1/integrations/claude-ai/pair/approve", body={"pairing_code": pair["pairing_code"]}) + s, body = _get( + f"/v1/integrations/claude-ai/extension/pair/status?pairing_token={pair['pairing_token']}" + ) + assert s == 200 and body["approved"] + return pair["integration_id"], body["extension_token"] + + +class TestExtensionSelfStatus: + def test_anonymous_returns_401(self): + s, body = _get("/v1/integrations/claude-ai/extension/status") + assert s == 401 + assert body["error"]["code"] # any auth-error code + + def test_invalid_bearer_returns_401(self): + s, _ = _get( + "/v1/integrations/claude-ai/extension/status", + headers={"Authorization": "Bearer not-a-real-token-12345"}, + ) + assert s == 401 + + def test_valid_bearer_returns_self_status(self): + integ_id, token = _pair_and_redeem("self-status-happy") + s, body = _get( + "/v1/integrations/claude-ai/extension/status", + headers={"Authorization": f"Bearer {token}"}, + ) + assert s == 200, body + assert body["integration_id"] == integ_id + assert body["status"] == "active" + # New integration with no skills + no ops yet. + assert body["linked_skill_count"] == 0 + assert body["pending_op_count"] == 0 + assert body["failed_op_count"] == 0 + assert body["last_error"] is None + # browser_label is present and the user-supplied value round-trips. + assert body["browser_label"] == "self-status-happy" + + def test_status_only_sees_own_integration(self): + """Two integrations side by side; each token returns its own row.""" + a_id, a_token = _pair_and_redeem("status-A") + b_id, b_token = _pair_and_redeem("status-B") + + _, a = _get( + "/v1/integrations/claude-ai/extension/status", + headers={"Authorization": f"Bearer {a_token}"}, + ) + _, b = _get( + "/v1/integrations/claude-ai/extension/status", + headers={"Authorization": f"Bearer {b_token}"}, + ) + assert a["integration_id"] == a_id + assert b["integration_id"] == b_id + # Labels distinct — wiring confirms tokens never crossed. + assert a["browser_label"] != b["browser_label"] + + def test_status_after_disconnect_returns_401(self): + """Disconnected integrations cannot fetch their own status.""" + integ_id, token = _pair_and_redeem("status-disconnect") + s, _ = _post( + f"/v1/integrations/claude-ai/integrations/{integ_id}", + body=None, + ) + # The DELETE endpoint isn't reached via _post; use a raw urlopen. + req = urllib.request.Request( + f"{BASE}/v1/integrations/claude-ai/integrations/{integ_id}", + method="DELETE", + ) + try: + with urllib.request.urlopen(req) as r: + assert r.status == 204 + except urllib.error.HTTPError as e: + pytest.skip(f"disconnect returned {e.code}") + + s, _ = _get( + "/v1/integrations/claude-ai/extension/status", + headers={"Authorization": f"Bearer {token}"}, + ) + # require_extension rejects non-active integrations. + assert s == 401 diff --git a/backend/tests/integration/test_claude_ai_inbound_ingestion.py b/backend/tests/integration/test_claude_ai_inbound_ingestion.py new file mode 100644 index 00000000..142ad381 --- /dev/null +++ b/backend/tests/integration/test_claude_ai_inbound_ingestion.py @@ -0,0 +1,479 @@ +"""Full inbound-skill-ingestion + conflict auto-detection + cleanup tests. + +Covers the deferred-from-Phase-1 work that's now complete: + - POST /extension/imported-skill creates a real Skill + SkillContentVersion + - Repeat imports update the existing skill (no duplicate slugs) + - Conflict detection fires when both sides changed + - POST /admin/cleanup-expired-pairings prunes stale pending rows +""" +from __future__ import annotations + +import io +import json +import os +import urllib.error +import urllib.request +import uuid +import zipfile + +import pytest + + +def _build_skill_zip(name: str, description: str, body: str = "# Test skill") -> bytes: + """Produce a valid SKILL.md bundle for the inbound import endpoint.""" + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf: + zf.writestr( + f"{name}/SKILL.md", + f"---\nname: {name}\ndescription: {description}\n---\n\n{body}\n", + ) + return buf.getvalue() + + +def _bearer(token: str): + base = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + + def _req(method, path, body=None): + h = {"Authorization": f"Bearer {token}"} + if body is not None: + h["Content-Type"] = "application/json" + req = urllib.request.Request( + f"{base}{path}", method=method, headers=h, + data=(json.dumps(body).encode() if body is not None else None), + ) + try: + with urllib.request.urlopen(req) as r: + txt = r.read().decode() + return r.status, (json.loads(txt) if txt else None) + except urllib.error.HTTPError as e: + txt = e.read().decode() + return e.code, (json.loads(txt) if txt else None) + return _req + + +def _upload_imported_skill( + token: str, + *, + name: str, + description: str, + claude_ai_skill_id: str, + claude_ai_version: str | None = None, + body: str = "# Test skill content", +) -> tuple[int, dict]: + """Multipart POST to /extension/imported-skill.""" + base = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + zip_bytes = _build_skill_zip(name, description, body) + boundary = "----pytest-inbound-" + uuid.uuid4().hex + parts = [] + for k, v in [ + ("claude_ai_skill_id", claude_ai_skill_id), + ("name", name), + ("description", description), + *([("claude_ai_version", claude_ai_version)] if claude_ai_version else []), + ]: + parts.append(f"--{boundary}\r\n".encode()) + parts.append(f'Content-Disposition: form-data; name="{k}"\r\n\r\n'.encode()) + parts.append(v.encode() + b"\r\n") + parts.append(f"--{boundary}\r\n".encode()) + parts.append( + f'Content-Disposition: form-data; name="bundle"; filename="{name}.zip"\r\n' + f"Content-Type: application/zip\r\n\r\n".encode() + ) + parts.append(zip_bytes) + parts.append(f"\r\n--{boundary}--\r\n".encode()) + body_bytes = b"".join(parts) + + req = urllib.request.Request( + f"{base}/v1/integrations/claude-ai/extension/imported-skill", + method="POST", + data=body_bytes, + headers={ + "Authorization": f"Bearer {token}", + "Content-Type": f"multipart/form-data; boundary={boundary}", + }, + ) + try: + with urllib.request.urlopen(req) as r: + return r.status, json.loads(r.read().decode()) + except urllib.error.HTTPError as e: + return e.code, json.loads(e.read().decode()) + + +@pytest.fixture +def paired_extension(api_request): + # Per-fixture unique TEST-NET-1 IP — keeps this fixture from being + # rate-limited when run alongside other suites that also hit /pair. + # Without this, suite-order determined whether these tests passed. + import random + ip = f"192.0.2.{random.randint(1, 254)}" + status, pair = api_request( + "POST", "/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "ingestion test"}, + headers={"X-Forwarded-For": ip}, + ) + if status != 201: + pytest.skip(f"pair endpoint returned {status}") + api_request( + "POST", "/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": pair["pairing_code"]}, + ) + _, body = api_request( + "GET", + f"/v1/integrations/claude-ai/extension/pair/status?pairing_token={pair['pairing_token']}", + ) + return pair["integration_id"], body["extension_token"] + + +class TestInboundSkillCreation: + def test_creates_new_skill_when_slug_unique(self, api_request, paired_extension): + _, token = paired_extension + name = f"inbound-new-{uuid.uuid4().hex[:6]}" + status, body = _upload_imported_skill( + token, + name=name, + description="created from claude.ai", + claude_ai_skill_id=f"skill_in_{uuid.uuid4().hex[:6]}", + claude_ai_version="v1", + body="# Imported skill\n\nfrom claude.ai", + ) + assert status == 201 + assert body["created"] is True + skill_id = body["skillnote_skill_id"] + + # Verify the skill is actually queryable. + status, detail = api_request("GET", f"/v1/skills/{name}") + assert status == 200 + assert detail["name"] == name + assert detail["description"] == "created from claude.ai" + assert detail["current_version"] >= 1 + assert "Imported skill" in detail["content_md"] + assert detail["id"] == skill_id + + def test_idempotent_for_same_claude_id(self, api_request, paired_extension): + _, token = paired_extension + name = f"inbound-idem-{uuid.uuid4().hex[:6]}" + ca_id = f"skill_idem_{uuid.uuid4().hex[:6]}" + + s1, b1 = _upload_imported_skill( + token, name=name, description="v1", claude_ai_skill_id=ca_id, + claude_ai_version="v1", body="# v1", + ) + assert s1 == 201 and b1["created"] is True + + s2, b2 = _upload_imported_skill( + token, name=name, description="v2 updated", claude_ai_skill_id=ca_id, + claude_ai_version="v2", body="# v2 updated", + ) + assert s2 == 201 + assert b2["created"] is False, "second import of same claude.ai id should not create new skill" + assert b2["skillnote_skill_id"] == b1["skillnote_skill_id"] + + # Detail should reflect the v2 content. + _, detail = api_request("GET", f"/v1/skills/{name}") + assert "v2 updated" in detail["content_md"] + assert detail["current_version"] >= 2 + + def test_rejects_invalid_bundle(self, api_request, paired_extension): + _, token = paired_extension + base = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + # Send garbage as the bundle. + boundary = "----p-" + uuid.uuid4().hex + parts = [ + f"--{boundary}\r\n".encode(), + b'Content-Disposition: form-data; name="claude_ai_skill_id"\r\n\r\nskill_x\r\n', + f"--{boundary}\r\n".encode(), + b'Content-Disposition: form-data; name="name"\r\n\r\nbad\r\n', + f"--{boundary}\r\n".encode(), + b'Content-Disposition: form-data; name="description"\r\n\r\nbad\r\n', + f"--{boundary}\r\n".encode(), + b'Content-Disposition: form-data; name="bundle"; filename="x.zip"\r\n' + b"Content-Type: application/zip\r\n\r\n", + b"NOT A ZIP", + f"\r\n--{boundary}--\r\n".encode(), + ] + req = urllib.request.Request( + f"{base}/v1/integrations/claude-ai/extension/imported-skill", + method="POST", + data=b"".join(parts), + headers={ + "Authorization": f"Bearer {token}", + "Content-Type": f"multipart/form-data; boundary={boundary}", + }, + ) + try: + urllib.request.urlopen(req) + pytest.fail("expected 422") + except urllib.error.HTTPError as e: + assert e.code == 422 + + def test_rejects_skill_missing_frontmatter(self, api_request, paired_extension): + _, token = paired_extension + base = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + # Build a ZIP with a SKILL.md but no frontmatter. + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + zf.writestr("x/SKILL.md", "no frontmatter here") + boundary = "----p-" + uuid.uuid4().hex + parts = [ + f"--{boundary}\r\n".encode(), + b'Content-Disposition: form-data; name="claude_ai_skill_id"\r\n\r\nskill_x\r\n', + f"--{boundary}\r\n".encode(), + b'Content-Disposition: form-data; name="name"\r\n\r\nbad\r\n', + f"--{boundary}\r\n".encode(), + b'Content-Disposition: form-data; name="description"\r\n\r\nbad\r\n', + f"--{boundary}\r\n".encode(), + b'Content-Disposition: form-data; name="bundle"; filename="x.zip"\r\n' + b"Content-Type: application/zip\r\n\r\n", + buf.getvalue(), + f"\r\n--{boundary}--\r\n".encode(), + ] + req = urllib.request.Request( + f"{base}/v1/integrations/claude-ai/extension/imported-skill", + method="POST", + data=b"".join(parts), + headers={ + "Authorization": f"Bearer {token}", + "Content-Type": f"multipart/form-data; boundary={boundary}", + }, + ) + try: + urllib.request.urlopen(req) + pytest.fail("expected 422") + except urllib.error.HTTPError as e: + assert e.code == 422 + + +class TestInboundLinksToExisting: + def test_imports_into_existing_skillnote_skill_by_slug(self, api_request, paired_extension): + """If a SkillNote skill with the same slug exists locally, the + import should ATTACH to it rather than fail with a duplicate-slug + error. New SkillContentVersion is appended.""" + _, token = paired_extension + name = f"inbound-attach-{uuid.uuid4().hex[:6]}" + # Per-run unique collection slug — previously `attach-bucket-{name[:10]}` + # collapsed to a fixed prefix `inbound-at` because every name shared + # that head. After 15 runs we hit the 15-skill collection limit and + # the test failed in suite order. + collection = f"attach-{uuid.uuid4().hex[:10]}" + # First, create the skill via the normal API. + status, created = api_request( + "POST", "/v1/skills", + body={ + "name": name, "slug": name, + "description": "local original", + "content_md": "# local v1\n", + "collections": [collection], + }, + ) + assert status == 201 + local_id = created["id"] + + # Now import from claude.ai with same slug → should attach, not duplicate. + s, body = _upload_imported_skill( + token, name=name, description="from claude.ai", + claude_ai_skill_id=f"skill_attach_{uuid.uuid4().hex[:6]}", + claude_ai_version="v1", body="# imported\n", + ) + assert s == 201 + assert body["skillnote_skill_id"] == local_id, ( + "should reuse existing skill, not create new" + ) + # `created` should be False because we reused. + assert body["created"] is False + + +class TestConflictAutoDetection: + def test_detects_diverged_when_both_sides_changed( + self, api_request, paired_extension, db_session + ): + """The full conflict scenario: + 1. SkillNote publishes skill (v1) → upload op enqueued + 2. Extension completes upload, link created with skillnote_version_id=v1 + 3. SkillNote publishes again (v2) → link's skillnote_version_id is now stale + 4. claude.ai-side modifies → inbound import comes in with new claude_ai_version + 5. Both sides changed since last sync → conflict detected + """ + integ_id, token = paired_extension + slug = f"div-{uuid.uuid4().hex[:6]}" + + # 1. Create skill locally — emits upload op. + _, c1 = api_request( + "POST", "/v1/skills", + body={ + "name": slug, "slug": slug, + "description": "divergence test v1", + "content_md": "# v1", + "collections": [f"div-bucket-{slug[:8]}"], + }, + ) + skill_id = c1["id"] + bearer = _bearer(token) + _, ops = bearer("GET", "/v1/integrations/claude-ai/extension/operations") + ours = [op for op in ops if op["payload"].get("name") == slug][0] + ca_id = f"skill_div_{uuid.uuid4().hex[:6]}" + # 2. Complete the upload — link created. + bearer( + "POST", f"/v1/integrations/claude-ai/extension/operations/{ours['id']}/complete", + body={ + "success": True, + "result": {"claude_ai_skill_id": ca_id, "claude_ai_version": "remote_v1"}, + }, + ) + + # 3. Update the skill locally — bumps current_version. + api_request( + "PATCH", f"/v1/skills/{slug}", + body={"content_md": "# v2 (local edit)"}, + ) + + # 4. Import a new version from claude.ai (simulating claude.ai-side edit). + s, body = _upload_imported_skill( + token, name=slug, description="divergence test v2 (remote)", + claude_ai_skill_id=ca_id, claude_ai_version="remote_v2", + body="# v2 (remote edit)", + ) + assert s == 201 + + # 5. Conflict list should now contain the link. + from app.db.models.claude_ai import ClaudeAISkillLink + from sqlalchemy import select + link = db_session.execute( + select(ClaudeAISkillLink).where( + ClaudeAISkillLink.claude_ai_skill_id == ca_id + ) + ).scalar_one() + assert link.conflict_state == "diverged", ( + f"expected diverged, got {link.conflict_state} — both sides changed since last sync" + ) + + # Activity feed should record the detection. + _, audit = api_request( + "GET", + f"/v1/integrations/claude-ai/activity?integration_id={integ_id}&event=conflict_detected", + ) + assert any(e["event"] == "conflict_detected" for e in audit) + + def test_no_conflict_when_only_remote_changed( + self, api_request, paired_extension, db_session + ): + """Only claude.ai changed (SkillNote-side has the same content + as last push). Should NOT mark diverged — the inbound import is + simply the new authoritative version.""" + integ_id, token = paired_extension + slug = f"nodiv-{uuid.uuid4().hex[:6]}" + _, c1 = api_request( + "POST", "/v1/skills", + body={ + "name": slug, "slug": slug, + "description": "no-divergence v1", + "content_md": "# v1", + "collections": [f"nodiv-bucket-{slug[:8]}"], + }, + ) + bearer = _bearer(token) + _, ops = bearer("GET", "/v1/integrations/claude-ai/extension/operations") + ours = [op for op in ops if op["payload"].get("name") == slug][0] + ca_id = f"skill_nodiv_{uuid.uuid4().hex[:6]}" + bearer( + "POST", f"/v1/integrations/claude-ai/extension/operations/{ours['id']}/complete", + body={ + "success": True, + "result": {"claude_ai_skill_id": ca_id, "claude_ai_version": "remote_v1"}, + }, + ) + + # Skip the local update step — only remote changed. + _upload_imported_skill( + token, name=slug, description="no-divergence v2 (remote-only)", + claude_ai_skill_id=ca_id, claude_ai_version="remote_v2", + body="# v2 (remote)", + ) + + from app.db.models.claude_ai import ClaudeAISkillLink + from sqlalchemy import select + link = db_session.execute( + select(ClaudeAISkillLink).where( + ClaudeAISkillLink.claude_ai_skill_id == ca_id + ) + ).scalar_one() + assert link.conflict_state != "diverged", ( + f"unchanged local + changed remote should NOT diverge; got {link.conflict_state}" + ) + + +class TestPairingCleanupEndpoint: + def test_admin_cleanup_endpoint(self, api_request): + # Endpoint always exists; even if no rows are stale, returns 0. + status, body = api_request("POST", "/v1/integrations/claude-ai/admin/cleanup-expired-pairings") + assert status == 200 + assert "expired" in body + assert isinstance(body["expired"], int) + + +class TestExpireStalePairings: + def test_expires_old_pending(self, db_session): + """Direct service-level test: insert a pending row with an + expiry timestamp 2 hours in the past, run the cleanup, verify + the row is moved to 'error' state and an audit event fired.""" + from datetime import datetime, timedelta, timezone + from app.db.models.claude_ai import ClaudeAIIntegration + from app.db.models.claude_ai_polish import ClaudeAIAuditLog + from app.services.claude_ai_sync import expire_stale_pairings + from sqlalchemy import select + + stale = ClaudeAIIntegration( + status="pending_approval", + scope="both", + conflict_policy="ask", + browser_label="stale-test", + pairing_code="EXPIRD", + pairing_token_hash="x" * 64, + pairing_expires_at=datetime.now(timezone.utc) - timedelta(hours=2), + ) + db_session.add(stale) + db_session.flush() + + count = expire_stale_pairings(db_session) + db_session.flush() + + assert count >= 1 + db_session.refresh(stale) + assert stale.status == "error" + assert stale.pairing_code is None + assert stale.pairing_token_hash is None + + # Audit event recorded. + audit_rows = db_session.execute( + select(ClaudeAIAuditLog).where( + ClaudeAIAuditLog.integration_id == stale.id, + ClaudeAIAuditLog.event == "pair_expired", + ) + ).scalars().all() + assert len(audit_rows) == 1 + + def test_does_not_expire_recent_pending(self, db_session): + from datetime import datetime, timedelta, timezone + from app.db.models.claude_ai import ClaudeAIIntegration + from app.services.claude_ai_sync import expire_stale_pairings + + fresh = ClaudeAIIntegration( + status="pending_approval", + scope="both", + conflict_policy="ask", + browser_label="fresh-test", + pairing_code="FRESH1", + pairing_token_hash="y" * 64, + pairing_expires_at=datetime.now(timezone.utc) + timedelta(minutes=5), + ) + db_session.add(fresh) + db_session.flush() + before_count = db_session.execute( + __import__("sqlalchemy").text( + "SELECT COUNT(*) FROM claude_ai_integrations WHERE status='pending_approval'" + ) + ).scalar() + + expire_stale_pairings(db_session) + db_session.flush() + db_session.refresh(fresh) + assert fresh.status == "pending_approval", "non-expired row should not be touched" diff --git a/backend/tests/integration/test_claude_ai_ops_queue.py b/backend/tests/integration/test_claude_ai_ops_queue.py new file mode 100644 index 00000000..705c54bc --- /dev/null +++ b/backend/tests/integration/test_claude_ai_ops_queue.py @@ -0,0 +1,201 @@ +"""HTTP integration tests for the claude.ai connector sync-ops queue. + +Covers the bearer auth dependency, fetch-then-complete contract, retry +budget, and the soft-disconnect lifecycle. +""" +from __future__ import annotations + +import pytest + + +@pytest.fixture +def active_extension(api_request): + """Full pair → approve → redeem flow → return (integration_id, extension_token).""" + status, pair = api_request( + "POST", "/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "pytest active extension"}, + ) + if status != 201: + pytest.skip(f"claude-ai pair endpoint returned {status}") + api_request( + "POST", "/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": pair["pairing_code"]}, + ) + _, status_body = api_request( + "GET", + f"/v1/integrations/claude-ai/extension/pair/status?pairing_token={pair['pairing_token']}", + ) + assert status_body["approved"] and status_body["extension_token"] + return pair["integration_id"], status_body["extension_token"] + + +@pytest.fixture +def bearer_request(api_request, active_extension): + """Convenience: like api_request but with the bearer token attached.""" + _, token = active_extension + import json + import urllib.error + import urllib.request + import os + base = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + + def _req(method: str, path: str, body=None, headers=None): + h = {"Authorization": f"Bearer {token}"} + if headers: + h.update(headers) + if body is not None: + h["Content-Type"] = "application/json" + req = urllib.request.Request( + f"{base}{path}", method=method, headers=h, + data=(json.dumps(body).encode() if body is not None else None), + ) + try: + with urllib.request.urlopen(req) as r: + txt = r.read().decode() + return r.status, (json.loads(txt) if txt else None) + except urllib.error.HTTPError as e: + txt = e.read().decode() + return e.code, (json.loads(txt) if txt else None) + return _req + + +class TestExtensionAuth: + def test_missing_bearer_401(self, api_request): + status, body = api_request( + "GET", "/v1/integrations/claude-ai/extension/operations", + ) + assert status == 401 + assert body["error"]["code"] == "MISSING_BEARER_TOKEN" + + def test_invalid_bearer_401(self, api_request): + import json + import urllib.error + import urllib.request + import os + base = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + req = urllib.request.Request( + f"{base}/v1/integrations/claude-ai/extension/operations", + headers={"Authorization": "Bearer not-a-real-token"}, + ) + try: + urllib.request.urlopen(req) + pytest.fail("expected 401") + except urllib.error.HTTPError as e: + assert e.code == 401 + body = e.read().decode() + assert "INVALID_EXTENSION_TOKEN" in body + + def test_malformed_bearer_header_401(self, api_request): + import urllib.error + import urllib.request + import os + base = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + # No "Bearer " prefix. + req = urllib.request.Request( + f"{base}/v1/integrations/claude-ai/extension/operations", + headers={"Authorization": "garbage"}, + ) + try: + urllib.request.urlopen(req) + pytest.fail("expected 401") + except urllib.error.HTTPError as e: + assert e.code == 401 + + def test_active_bearer_succeeds(self, bearer_request): + status, body = bearer_request("GET", "/v1/integrations/claude-ai/extension/operations") + assert status == 200 + assert isinstance(body, list) + + +class TestOperationsQueue: + def test_initial_queue_empty(self, bearer_request): + status, body = bearer_request("GET", "/v1/integrations/claude-ai/extension/operations") + assert status == 200 + assert body == [] + + def test_complete_unknown_op_returns_404(self, bearer_request): + import uuid as _uuid + status, body = bearer_request( + "POST", + f"/v1/integrations/claude-ai/extension/operations/{_uuid.uuid4()}/complete", + body={"success": True}, + ) + assert status == 404 + + +class TestIntegrationManagement: + def test_list_includes_active(self, api_request, active_extension): + integ_id, _ = active_extension + status, body = api_request("GET", "/v1/integrations/claude-ai/integrations") + assert status == 200 + ids = [i["id"] for i in body] + assert integ_id in ids + + def test_patch_updates_scope(self, api_request, active_extension): + integ_id, _ = active_extension + status, body = api_request( + "PATCH", f"/v1/integrations/claude-ai/integrations/{integ_id}", + body={"scope": "organization"}, + ) + assert status == 200 + assert body["scope"] == "organization" + + def test_patch_rejects_bad_scope(self, api_request, active_extension): + integ_id, _ = active_extension + status, _ = api_request( + "PATCH", f"/v1/integrations/claude-ai/integrations/{integ_id}", + body={"scope": "made-up-value"}, + ) + assert status == 422 + + def test_disconnect_then_token_revoked( + self, api_request, bearer_request, active_extension + ): + integ_id, _ = active_extension + # Soft-disconnect. + status, _ = api_request( + "DELETE", f"/v1/integrations/claude-ai/integrations/{integ_id}", + ) + assert status == 204 + + # Subsequent bearer call returns 401 (token cleared) or 403 + # (status='disconnected'). Either is acceptable security posture. + status, body = bearer_request( + "GET", "/v1/integrations/claude-ai/extension/operations", + ) + assert status in (401, 403) + assert body["error"]["code"] in ( + "INVALID_EXTENSION_TOKEN", + "INTEGRATION_DISCONNECTED", + ) + + +class TestKnownSkillIdsAndConflicts: + def test_known_skill_ids_empty_initially(self, bearer_request): + status, body = bearer_request( + "GET", "/v1/integrations/claude-ai/extension/known-skill-ids", + ) + assert status == 200 + assert body == {"claude_ai_skill_ids": []} + + def test_conflict_list_omits_fresh_integration(self, api_request, active_extension): + """A freshly-paired integration has no links, so the global + conflict list cannot contain any rows pointing at it. We scope + the assertion to the test's own integration rather than asserting + the global list is empty, because the global list may contain + rows from other concurrent tests against the same DB.""" + integ_id, _ = active_extension + status, body = api_request("GET", "/v1/integrations/claude-ai/conflicts") + assert status == 200 + ours = [c for c in body if c["integration_id"] == integ_id] + assert ours == [], ( + f"fresh integration {integ_id} should have no conflicts, got {ours}" + ) + + def test_resolve_unknown_link_404(self, api_request, active_extension): + import uuid as _uuid + status, body = api_request( + "POST", f"/v1/integrations/claude-ai/conflicts/{_uuid.uuid4()}/resolve", + body={"resolution": "skip"}, + ) + assert status == 404 diff --git a/backend/tests/integration/test_claude_ai_pairing.py b/backend/tests/integration/test_claude_ai_pairing.py new file mode 100644 index 00000000..5ebb37e3 --- /dev/null +++ b/backend/tests/integration/test_claude_ai_pairing.py @@ -0,0 +1,156 @@ +"""End-to-end HTTP tests for the claude.ai connector pairing flow. + +Uses the shared `api_request` fixture (HTTP against a running backend). +These tests skip cleanly when the API isn't reachable. +""" +from __future__ import annotations + +import time + +import pytest + + +@pytest.fixture +def fresh_pairing(api_request): + """Start a pairing, return (status, body) so each test starts clean. + + Failing-fast: skips the whole module if the API doesn't accept the + pair POST (e.g. older deployment without the connector wired in). + """ + status, body = api_request( + "POST", "/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "pytest pair-fixture"}, + ) + if status != 201: + pytest.skip(f"claude-ai pair endpoint returned {status}; deployment may not have phase 1") + return body + + +class TestPairingHandshake: + def test_pair_returns_all_required_fields(self, fresh_pairing): + b = fresh_pairing + assert "integration_id" in b + assert "pairing_code" in b + assert "pairing_token" in b + assert "redemption_url" in b + assert "expires_at" in b + + def test_pairing_code_is_six_chars(self, fresh_pairing): + assert len(fresh_pairing["pairing_code"]) == 6 + + def test_pairing_token_is_substantial(self, fresh_pairing): + # Long opaque random — at minimum 32 chars. + assert len(fresh_pairing["pairing_token"]) >= 32 + + def test_pairing_token_different_from_code(self, fresh_pairing): + assert fresh_pairing["pairing_token"] != fresh_pairing["pairing_code"] + + def test_status_returns_unapproved_initially(self, api_request, fresh_pairing): + status, body = api_request( + "GET", + f"/v1/integrations/claude-ai/extension/pair/status?pairing_token={fresh_pairing['pairing_token']}", + ) + assert status == 200 + assert body == {"approved": False, "extension_token": None} + + def test_approve_returns_204(self, api_request, fresh_pairing): + status, _ = api_request( + "POST", + "/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": fresh_pairing["pairing_code"]}, + ) + assert status == 204 + + def test_approve_idempotent(self, api_request, fresh_pairing): + # Approving twice is harmless. + api_request("POST", "/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": fresh_pairing["pairing_code"]}) + status, _ = api_request("POST", "/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": fresh_pairing["pairing_code"]}) + assert status == 204 + + def test_approve_unknown_code_404(self, api_request): + status, body = api_request( + "POST", "/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": "NOPENO"}, + ) + assert status == 404 + assert body["error"]["code"] == "PAIRING_NOT_FOUND" + + def test_approve_short_code_422(self, api_request): + # Below min length — Pydantic rejects before reaching the handler. + status, body = api_request( + "POST", "/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": "AB"}, + ) + assert status == 422 + + def test_token_issuance_after_approval(self, api_request, fresh_pairing): + """The full Device Code Flow: approve, then status poll returns + the extension token exactly once.""" + api_request( + "POST", "/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": fresh_pairing["pairing_code"]}, + ) + status, body = api_request( + "GET", + f"/v1/integrations/claude-ai/extension/pair/status?pairing_token={fresh_pairing['pairing_token']}", + ) + assert status == 200 + assert body["approved"] is True + assert body["extension_token"] is not None + # Tokens are urlsafe-base64 random — should be substantial length. + assert len(body["extension_token"]) >= 40 + + def test_token_is_one_shot(self, api_request, fresh_pairing): + """Second status-poll after redemption must NOT return the token + again. The pairing_token_hash is cleared atomically with issuance, + so the row becomes un-findable via the pending-pairing path.""" + api_request( + "POST", "/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": fresh_pairing["pairing_code"]}, + ) + # First poll redeems. + status1, body1 = api_request( + "GET", + f"/v1/integrations/claude-ai/extension/pair/status?pairing_token={fresh_pairing['pairing_token']}", + ) + assert status1 == 200 and body1["extension_token"] + + # Second poll with the same pairing_token must NOT return another token. + status2, body2 = api_request( + "GET", + f"/v1/integrations/claude-ai/extension/pair/status?pairing_token={fresh_pairing['pairing_token']}", + ) + assert status2 == 404 + assert body2["error"]["code"] == "PAIRING_TOKEN_UNKNOWN" + + def test_status_unknown_token_404(self, api_request): + status, body = api_request( + "GET", + "/v1/integrations/claude-ai/extension/pair/status?pairing_token=does-not-exist", + ) + assert status == 404 + + +class TestPairCodeCollision: + def test_many_concurrent_pairs_unique(self, api_request): + """Six char / 31 glyph code space has ~887M codes. Ten codes in a + row should be unique with overwhelming probability.""" + codes = [] + for _ in range(10): + status, body = api_request( + "POST", "/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "pytest collision check"}, + ) + if status != 201: + pytest.skip(f"pair endpoint returned {status}") + codes.append(body["pairing_code"]) + assert len(set(codes)) == len(codes), f"code collision among {codes}" + + +class TestPairingAuthLeak: + def test_pair_response_does_not_leak_token_hash(self, api_request, fresh_pairing): + """Response must not contain the *hash* fields — those stay in the DB.""" + for field in ("pairing_token_hash", "extension_token_hash"): + assert field not in fresh_pairing diff --git a/backend/tests/integration/test_claude_ai_polish_api.py b/backend/tests/integration/test_claude_ai_polish_api.py new file mode 100644 index 00000000..4c418217 --- /dev/null +++ b/backend/tests/integration/test_claude_ai_polish_api.py @@ -0,0 +1,304 @@ +"""HTTP integration tests for the polish-layer endpoints (0020): + - GET /activity + - GET /health + - PATCH /skills/{id}/sync + - Rate limit on POST /pair +""" +from __future__ import annotations + +import os +import uuid + +import pytest + + +def _bearer(token: str): + import json + import urllib.error + import urllib.request + base = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + + def _req(method, path, body=None): + h = {"Authorization": f"Bearer {token}"} + if body is not None: + h["Content-Type"] = "application/json" + req = urllib.request.Request( + f"{base}{path}", method=method, headers=h, + data=(json.dumps(body).encode() if body is not None else None), + ) + try: + with urllib.request.urlopen(req) as r: + txt = r.read().decode() + return r.status, (json.loads(txt) if txt else None) + except urllib.error.HTTPError as e: + txt = e.read().decode() + return e.code, (json.loads(txt) if txt else None) + return _req + + +@pytest.fixture +def paired_extension(api_request): + status, pair = api_request( + "POST", "/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "polish test"}, + ) + if status != 201: + pytest.skip(f"pair endpoint returned {status}") + api_request( + "POST", "/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": pair["pairing_code"]}, + ) + _, body = api_request( + "GET", + f"/v1/integrations/claude-ai/extension/pair/status?pairing_token={pair['pairing_token']}", + ) + return pair["integration_id"], body["extension_token"] + + +class TestActivityEndpoint: + def test_pair_flow_writes_audit_events(self, api_request, paired_extension): + integ_id, _ = paired_extension + # The pair → approve → redeem flow should emit 3 events for this integration. + status, body = api_request( + "GET", + f"/v1/integrations/claude-ai/activity?integration_id={integ_id}", + ) + assert status == 200 + events = [r["event"] for r in body] + assert "pair_started" in events + assert "pair_approved" in events + assert "pair_redeemed" in events + + def test_disconnect_writes_audit(self, api_request, paired_extension): + integ_id, _ = paired_extension + api_request("DELETE", f"/v1/integrations/claude-ai/integrations/{integ_id}") + _, body = api_request( + "GET", + f"/v1/integrations/claude-ai/activity?integration_id={integ_id}&event=integration_disconnected", + ) + assert any(r["event"] == "integration_disconnected" for r in body) + + def test_event_filter_narrows_results(self, api_request, paired_extension): + integ_id, _ = paired_extension + _, body = api_request( + "GET", + f"/v1/integrations/claude-ai/activity?integration_id={integ_id}&event=pair_redeemed", + ) + assert all(r["event"] == "pair_redeemed" for r in body) + + def test_limit_out_of_range_returns_422(self, api_request): + # Round-9 hardening: limit is bounded [1, 500] at the API layer + # (previously the service silently clamped). A misbehaving client + # gets an explicit 422 instead of an apparent-success-with-cap. + status, _ = api_request("GET", "/v1/integrations/claude-ai/activity?limit=999999") + assert status == 422 + + def test_limit_at_max_succeeds(self, api_request): + status, body = api_request("GET", "/v1/integrations/claude-ai/activity?limit=500") + assert status == 200 + assert isinstance(body, list) + assert len(body) <= 500 + + def test_unknown_integration_returns_empty(self, api_request): + status, body = api_request( + "GET", + f"/v1/integrations/claude-ai/activity?integration_id={uuid.uuid4()}", + ) + assert status == 200 + assert body == [] + + +class TestHealthEndpoint: + def test_returns_metrics_shape(self, api_request, paired_extension): + status, body = api_request("GET", "/v1/integrations/claude-ai/health") + assert status == 200 + for field in ( + "integrations_active", + "integrations_with_errors", + "pending_ops_total", + "failed_ops_total", + "diverged_links_total", + "schema_version", + ): + assert field in body, f"missing field {field}" + assert isinstance(body["integrations_active"], int) + assert isinstance(body["schema_version"], str) + + def test_active_count_reflects_recent_pair(self, api_request, paired_extension): + _, body = api_request("GET", "/v1/integrations/claude-ai/health") + # At least our just-paired integration should be in the active count. + assert body["integrations_active"] >= 1 + + +class TestSkillSyncToggleEndpoint: + @pytest.fixture + def skill_id(self, api_request): + slug = f"toggle-api-{uuid.uuid4().hex[:6]}" + status, body = api_request( + "POST", "/v1/skills", + body={ + "name": slug, "slug": slug, + "description": "toggle endpoint test", + "content_md": "# x", + "collections": [f"tg-bucket-{uuid.uuid4().hex[:8]}"], + }, + ) + assert status == 201 + return body["id"] + + def test_toggle_off_then_on(self, api_request, skill_id): + # Off + status, _ = api_request( + "PATCH", + f"/v1/integrations/claude-ai/skills/{skill_id}/sync", + body={"enabled": False}, + ) + assert status == 204 + # On + status, _ = api_request( + "PATCH", + f"/v1/integrations/claude-ai/skills/{skill_id}/sync", + body={"enabled": True}, + ) + assert status == 204 + + def test_unknown_skill_404(self, api_request): + status, body = api_request( + "PATCH", + f"/v1/integrations/claude-ai/skills/{uuid.uuid4()}/sync", + body={"enabled": True}, + ) + assert status == 404 + assert body["error"]["code"] == "SKILL_NOT_FOUND" + + def test_invalid_payload_422(self, api_request, skill_id): + # Missing 'enabled' field — Pydantic rejects. + status, _ = api_request( + "PATCH", + f"/v1/integrations/claude-ai/skills/{skill_id}/sync", + body={}, + ) + assert status == 422 + + +class TestSkillDetailExposesSyncFlag: + """SkillDetail response must include claude_ai_sync_enabled so the + skill detail page can render the badge in the right state.""" + + def test_field_present_in_detail(self, api_request): + slug = f"detail-flag-{uuid.uuid4().hex[:6]}" + status, body = api_request( + "POST", "/v1/skills", + body={ + "name": slug, "slug": slug, + "description": "detail flag test", + "content_md": "# x", + "collections": [f"detail-bucket-{uuid.uuid4().hex[:8]}"], + }, + ) + assert status == 201 + assert "claude_ai_sync_enabled" in body + assert body["claude_ai_sync_enabled"] is True + + def test_toggling_persists_in_detail(self, api_request): + slug = f"detail-persist-{uuid.uuid4().hex[:6]}" + _, created = api_request( + "POST", "/v1/skills", + body={ + "name": slug, "slug": slug, + "description": "persistence test", + "content_md": "# x", + "collections": [f"persist-bucket-{uuid.uuid4().hex[:8]}"], + }, + ) + skill_id = created["id"] + # Disable. + api_request( + "PATCH", + f"/v1/integrations/claude-ai/skills/{skill_id}/sync", + body={"enabled": False}, + ) + # Re-fetch detail. + _, detail = api_request("GET", f"/v1/skills/{slug}") + assert detail["claude_ai_sync_enabled"] is False + + +class TestDisabledSkillDoesNotEnqueue: + """When claude_ai_sync_enabled=False, _create_content_version should + NOT enqueue an upload op. Verified by checking the queue is empty for + a paired extension after creating a disabled skill.""" + + def test_disabled_skill_skips_enqueue(self, api_request, paired_extension): + integ_id, token = paired_extension + # Create a skill, immediately disable sync, then update it. + slug = f"disabled-{uuid.uuid4().hex[:6]}" + _, created = api_request( + "POST", "/v1/skills", + body={ + "name": slug, "slug": slug, + "description": "disabled sync test", + "content_md": "# v1", + "collections": [f"disabled-bucket-{uuid.uuid4().hex[:8]}"], + }, + ) + skill_id = created["id"] + + # Drain whatever ops were enqueued for the initial create. + bearer = _bearer(token) + bearer("GET", "/v1/integrations/claude-ai/extension/operations") + + # Disable sync. + api_request( + "PATCH", + f"/v1/integrations/claude-ai/skills/{skill_id}/sync", + body={"enabled": False}, + ) + # Update the skill — should NOT enqueue an op. + api_request( + "PATCH", f"/v1/skills/{slug}", + body={"content_md": "# v2 updated"}, + ) + + _, ops = bearer("GET", "/v1/integrations/claude-ai/extension/operations") + ours = [op for op in ops if op.get("payload", {}).get("name") == slug] + assert ours == [], f"disabled skill should not enqueue ops; got {ours}" + + +class TestRateLimit: + """The /pair endpoint rate-limits per source IP. Hard to fully prove + without flooding 60+ requests; we verify the 429 response shape via a + manual high-volume run, gated behind a marker.""" + + def _unique_ip(self) -> str: + # TEST-NET-1 (192.0.2.0/24, RFC 5737) is reserved for tests and + # never collides with real traffic. Random within-class keeps each + # run isolated from any prior state in the shared DB. + import random + return f"192.0.2.{random.randint(1, 254)}" + + def test_pair_endpoint_returns_201_under_threshold(self, api_request): + ip = self._unique_ip() + for _ in range(5): + status, _ = api_request( + "POST", "/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "rate test"}, + headers={"X-Forwarded-For": ip}, + ) + assert status == 201 + + @pytest.mark.slow + def test_pair_endpoint_returns_429_above_threshold(self, api_request): + """Skipped unless run with --runslow because it floods the endpoint.""" + ip = self._unique_ip() + rejected = 0 + for _ in range(65): + status, body = api_request( + "POST", "/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "flood test"}, + headers={"X-Forwarded-For": ip}, + ) + if status == 429: + rejected += 1 + assert body["error"]["code"] == "RATE_LIMITED" + break + assert rejected > 0, "expected at least one 429 in 65 attempts" diff --git a/backend/tests/integration/test_claude_ai_queue.py b/backend/tests/integration/test_claude_ai_queue.py new file mode 100644 index 00000000..c18076c8 --- /dev/null +++ b/backend/tests/integration/test_claude_ai_queue.py @@ -0,0 +1,213 @@ +"""Iter 17 — /v1/integrations/claude-ai/queue endpoint. + +The endpoint surfaces the live pending + in-progress sync operations +to the SkillNote settings UI. Drives the "Sync activity" panel. + +Contract: + - Returns ONLY pending and in_progress ops (no completed/failed). + - Sorted oldest-first so the queue reads FIFO. + - Eager-joins skill name/slug and integration label so the UI doesn't + need N+1 follow-up requests. + - Provides total/pending/in_progress counts even when the page is + truncated by limit. + - oldest_age_seconds lets the UI flag a stalled extension. + - integration_id query param filters to one integration. + - limit clamps to [1, 200]. +""" +from __future__ import annotations + +import json +import os +import random +import urllib.error +import urllib.request +import uuid + +import pytest + + +BASE = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + + +def _unique_ip() -> str: + return f"192.0.2.{random.randint(1, 254)}" + + +def _post(path, body=None, headers=None): + h = {"Content-Type": "application/json"} if body is not None else {} + if headers: + h.update(headers) + req = urllib.request.Request( + f"{BASE}{path}", + method="POST", + data=(json.dumps(body).encode() if body is not None else None), + headers=h, + ) + try: + with urllib.request.urlopen(req) as r: + txt = r.read().decode() + return r.status, (json.loads(txt) if txt else None) + except urllib.error.HTTPError as e: + txt = e.read().decode() + return e.code, (json.loads(txt) if txt else None) + except Exception as e: # pragma: no cover + pytest.skip(f"API not reachable: {e}") + + +def _get(path): + req = urllib.request.Request(f"{BASE}{path}", method="GET") + try: + with urllib.request.urlopen(req) as r: + return r.status, json.loads(r.read().decode()) + except urllib.error.HTTPError as e: + return e.code, json.loads(e.read().decode()) + except Exception as e: # pragma: no cover + pytest.skip(f"API not reachable: {e}") + + +@pytest.fixture +def paired_with_seeded_op(): + """Pair an extension and create a skill so an upload op lands in the queue.""" + ip = _unique_ip() + s, pair = _post( + "/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "queue-test"}, + headers={"X-Forwarded-For": ip}, + ) + if s != 201: + pytest.skip(f"pair returned {s}") + _post( + "/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": pair["pairing_code"]}, + ) + _, body = _get( + f"/v1/integrations/claude-ai/extension/pair/status" + f"?pairing_token={pair['pairing_token']}" + ) + assert body["approved"] + + name = f"queue-skill-{uuid.uuid4().hex[:6]}" + collection = f"q-{uuid.uuid4().hex[:10]}" + s, _ = _post( + "/v1/skills", + body={ + "name": name, + "slug": name, + "description": "queue test seed", + "content_md": "# seed\n", + "collections": [collection], + }, + ) + if s != 201: + pytest.skip(f"could not seed skill (status {s})") + return pair["integration_id"], body["extension_token"], name + + +class TestQueueContract: + def test_returns_pending_op_after_seeding_a_skill(self, paired_with_seeded_op): + integ_id, _token, name = paired_with_seeded_op + # Scope to THIS integration — global queue can hold ops from + # other tests / past runs. Without the filter our seeded op + # could be buried below the page limit. + s, body = _get( + f"/v1/integrations/claude-ai/queue?integration_id={integ_id}" + ) + assert s == 200, body + assert body["pending_count"] + body["in_progress_count"] >= 1 + ours = [it for it in body["items"] if it["skill_name"] == name] + assert len(ours) == 1, ours + item = ours[0] + assert item["integration_id"] == integ_id + assert item["kind"] == "upload" + assert item["status"] in ("pending", "in_progress") + assert item["skill_slug"] == name + assert item["integration_label"] == "queue-test" + assert item["attempts"] == 0 + assert item["last_error"] is None + + def test_oldest_age_seconds_is_populated_when_queue_nonempty( + self, paired_with_seeded_op + ): + _, _, _ = paired_with_seeded_op + s, body = _get("/v1/integrations/claude-ai/queue") + assert s == 200 + if body["total"] > 0: + assert body["oldest_age_seconds"] is not None + assert body["oldest_age_seconds"] >= 0 + + def test_completed_ops_are_excluded(self, paired_with_seeded_op): + """After we complete an op the queue stops listing it.""" + integ_id, token, name = paired_with_seeded_op + # Pull the op into in_progress. + s, ops = _get("/v1/integrations/claude-ai/extension/operations") + # Without the bearer this would 401; the get helper here doesn't + # attach one. Use a direct request instead. + req = urllib.request.Request( + f"{BASE}/v1/integrations/claude-ai/extension/operations", + method="GET", + headers={"Authorization": f"Bearer {token}"}, + ) + with urllib.request.urlopen(req) as r: + ops_payload = json.loads(r.read().decode()) + ours = [o for o in ops_payload if o.get("payload", {}).get("name") == name] + if not ours: + pytest.skip("seed op didn't materialize") + op_id = ours[0]["id"] + + _post( + f"/v1/integrations/claude-ai/extension/operations/{op_id}/complete", + body={ + "success": True, + "result": { + "claude_ai_skill_id": "skill_test_" + uuid.uuid4().hex[:6], + "claude_ai_version": "v1", + }, + }, + headers={"Authorization": f"Bearer {token}"}, + ) + + s, body = _get( + f"/v1/integrations/claude-ai/queue?integration_id={integ_id}" + ) + assert s == 200 + remaining = [it for it in body["items"] if it["id"] == op_id] + assert remaining == [], ( + f"completed op should be excluded from queue, got {remaining}" + ) + + +class TestQueueFiltering: + def test_integration_id_filter_scopes_results(self, paired_with_seeded_op): + integ_id, _, _ = paired_with_seeded_op + s, body = _get( + f"/v1/integrations/claude-ai/queue?integration_id={integ_id}" + ) + assert s == 200 + # Every row in the filtered response is for THIS integration only. + for it in body["items"]: + assert it["integration_id"] == integ_id + + def test_unknown_integration_returns_empty(self): + s, body = _get( + f"/v1/integrations/claude-ai/queue?integration_id={uuid.uuid4()}" + ) + assert s == 200 + assert body["items"] == [] + assert body["total"] == 0 + assert body["pending_count"] == 0 + assert body["in_progress_count"] == 0 + assert body["oldest_age_seconds"] is None + + +class TestQueueLimitBounds: + def test_limit_below_min_returns_422(self): + s, _ = _get("/v1/integrations/claude-ai/queue?limit=0") + assert s == 422 + + def test_limit_above_max_returns_422(self): + s, _ = _get("/v1/integrations/claude-ai/queue?limit=201") + assert s == 422 + + def test_limit_at_max_succeeds(self): + s, _ = _get("/v1/integrations/claude-ai/queue?limit=200") + assert s == 200 diff --git a/backend/tests/integration/test_claude_ai_security_hardening.py b/backend/tests/integration/test_claude_ai_security_hardening.py new file mode 100644 index 00000000..c99569d3 --- /dev/null +++ b/backend/tests/integration/test_claude_ai_security_hardening.py @@ -0,0 +1,442 @@ +"""Security and race-condition tests for the claude.ai connector. + +Targets specific bugs surfaced during the hardening round: + + 1. Concurrent /pair/status polls must NOT issue two tokens for the + same pairing (only one token can be in the DB). + 2. Disconnect must mark pending sync_operations as failed so they + don't accumulate forever. + 3. Telemetry endpoint must reject malformed/oversized payloads. + 4. Bearer token comparison must be constant-time (sanity check). + 5. Pairing approval is idempotent under concurrent approval clicks. + 6. Sensitive token values never appear in audit log details. +""" +from __future__ import annotations + +import concurrent.futures +import io +import json +import os +import urllib.error +import urllib.request +import uuid +import zipfile + +import pytest + + +BASE = os.environ.get("SKILLNOTE_TEST_BASE_URL", "http://127.0.0.1:8082") + + +def _post(path: str, body=None, headers=None): + h = {"Content-Type": "application/json"} if body is not None else {} + if headers: + h.update(headers) + req = urllib.request.Request( + f"{BASE}{path}", + method="POST", + data=(json.dumps(body).encode() if body is not None else None), + headers=h, + ) + try: + with urllib.request.urlopen(req) as r: + txt = r.read().decode() + return r.status, (json.loads(txt) if txt else None) + except urllib.error.HTTPError as e: + txt = e.read().decode() + return e.code, (json.loads(txt) if txt else None) + + +def _get(path: str, headers=None): + req = urllib.request.Request(f"{BASE}{path}", headers=headers or {}) + try: + with urllib.request.urlopen(req) as r: + txt = r.read().decode() + return r.status, (json.loads(txt) if txt else None) + except urllib.error.HTTPError as e: + txt = e.read().decode() + return e.code, (json.loads(txt) if txt else None) + + +@pytest.fixture +def pending_pair(): + """Set up a pending pair-approval ready to be redeemed.""" + s, pair = _post("/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "security test"}) + if s != 201: + pytest.skip(f"pair endpoint not available: {s}") + s2, _ = _post("/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": pair["pairing_code"]}) + if s2 != 204: + pytest.skip(f"approve failed: {s2}") + return pair + + +class TestConcurrentTokenRedemption: + """The bug: an extension retry storm hits /pair/status with the same + pairing_token simultaneously. Without row-level locking, two requests + could each issue a fresh extension_token; the DB stores whichever + finishes last, leaving the other extension with a dead token. + + With with_for_update + status='pending_approval' filter, the second + request waits for the first's commit, then sees a row no longer + matching the filter → 404.""" + + def test_concurrent_polls_issue_exactly_one_token(self, pending_pair): + pairing_token = pending_pair["pairing_token"] + + def poll(): + return _get( + f"/v1/integrations/claude-ai/extension/pair/status?pairing_token={pairing_token}" + ) + + # Fire 8 concurrent polls — at most ONE should return a token. + with concurrent.futures.ThreadPoolExecutor(max_workers=8) as ex: + results = list(ex.map(lambda _: poll(), range(8))) + + token_holders = [ + r for r in results + if r[0] == 200 and r[1] and r[1].get("extension_token") + ] + # Exactly one request gets the token. The rest get 404 + # (PAIRING_TOKEN_UNKNOWN — the row's pairing fields are now NULL). + assert len(token_holders) == 1, ( + f"expected exactly 1 token issuance, got {len(token_holders)}; " + f"all responses: {results}" + ) + + # The other 7 should be 404 (or 200 with approved=False if they + # ran before the approval was visible — unlikely but possible). + other_codes = [r[0] for r in results if r not in token_holders] + assert all(c in (200, 404) for c in other_codes), ( + f"unexpected status codes among losers: {other_codes}" + ) + + def test_redeemed_token_works_immediately(self, pending_pair): + """Sanity check that the token issued by the redemption is + actually valid against the extension API. (Regression guard + against issuing tokens but failing to persist their hash.)""" + s, body = _get( + f"/v1/integrations/claude-ai/extension/pair/status?pairing_token={pending_pair['pairing_token']}" + ) + assert s == 200 and body["extension_token"] + token = body["extension_token"] + + s2, _ = _get( + "/v1/integrations/claude-ai/extension/operations", + headers={"Authorization": f"Bearer {token}"}, + ) + assert s2 == 200, f"redeemed token should authenticate, got {s2}" + + +class TestDisconnectCleansQueue: + """The bug: disconnect_integration nulls the bearer but leaves + pending/in_progress sync_operations dangling. Those rows accumulate + forever and pollute the failed_ops_total metric (well — they DON'T + show up as failed, they're stuck in pending; the queue just grows). + + The fix marks them failed so the operator can see and the queue + stays clean.""" + + def test_disconnect_marks_pending_ops_as_failed(self): + # Pair an extension. + s, pair = _post("/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "queue cleanup test"}) + if s != 201: + pytest.skip("pair not available") + _post("/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": pair["pairing_code"]}) + _, status = _get( + f"/v1/integrations/claude-ai/extension/pair/status?pairing_token={pair['pairing_token']}" + ) + integ_id = pair["integration_id"] + token = status["extension_token"] + + # Create a skill — emits an upload op for this integration. + slug = f"qclean-{uuid.uuid4().hex[:6]}" + s, _ = _post( + "/v1/skills", + body={ + "name": slug, "slug": slug, + "description": "queue cleanup test", + "content_md": "# x", + "collections": [f"qclean-bucket-{uuid.uuid4().hex[:8]}"], + }, + ) + assert s == 201 + + # Verify the op is pending. + _, ops_before = _get( + "/v1/integrations/claude-ai/extension/operations", + headers={"Authorization": f"Bearer {token}"}, + ) + # The fetch above flips status to in_progress as a side effect — that's + # the realistic state at disconnect time. + assert any(op["payload"].get("name") == slug for op in ops_before) + + # Now disconnect. + req = urllib.request.Request( + f"{BASE}/v1/integrations/claude-ai/integrations/{integ_id}", + method="DELETE", + ) + with urllib.request.urlopen(req) as r: + assert r.status == 204 + + # Check the DB state via health endpoint — failed_ops_total should + # include our in-flight op (queued + in_progress) now flipped to failed. + # The health endpoint counts FAILED ops total; we expect at least one + # increment from the disconnect cleanup. + _, health = _get("/v1/integrations/claude-ai/health") + # We don't have a clean baseline (shared DB), but at least one of our + # in-flight ops MUST have transitioned to failed. We verify via the + # integrations endpoint instead: + _, integrations = _get("/v1/integrations/claude-ai/integrations") + ours = [i for i in integrations if i["id"] == integ_id][0] + # After disconnect, pending_op_count should be 0 (all flipped to failed). + assert ours["pending_op_count"] == 0, ( + f"disconnect should flush pending ops; got {ours['pending_op_count']}" + ) + # And the failed count should have absorbed them. + assert ours["failed_op_count"] >= 1, ( + f"expected at least 1 op flipped to failed; got {ours['failed_op_count']}" + ) + + +class TestTelemetryInputValidation: + """Bearer-authed but the schema must reject malformed/oversized + payloads before they reach the log pipeline.""" + + @pytest.fixture + def bearer(self): + s, pair = _post("/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "telemetry validation"}) + if s != 201: + pytest.skip("pair not available") + _post("/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": pair["pairing_code"]}) + _, status = _get( + f"/v1/integrations/claude-ai/extension/pair/status?pairing_token={pair['pairing_token']}" + ) + return status["extension_token"] + + def test_valid_payload(self, bearer): + s, _ = _post( + "/v1/integrations/claude-ai/extension/telemetry", + body={"category": "endpoint_changed", "ext_version": "0.1.0", "detail": {"path": "/x"}}, + headers={"Authorization": f"Bearer {bearer}"}, + ) + assert s == 204 + + def test_rejects_missing_category(self, bearer): + s, _ = _post( + "/v1/integrations/claude-ai/extension/telemetry", + body={"ext_version": "0.1.0"}, + headers={"Authorization": f"Bearer {bearer}"}, + ) + assert s == 422 + + def test_rejects_category_with_special_chars(self, bearer): + """Category is restricted to [a-zA-Z0-9_] — protects log + injection (newlines, ANSI escapes) from a malicious bearer.""" + s, _ = _post( + "/v1/integrations/claude-ai/extension/telemetry", + body={"category": "bad\nLOG_INJECTION\rROOT-LOGGER=DEBUG", "ext_version": "0.1.0"}, + headers={"Authorization": f"Bearer {bearer}"}, + ) + assert s == 422 + + def test_rejects_oversized_category(self, bearer): + s, _ = _post( + "/v1/integrations/claude-ai/extension/telemetry", + body={"category": "a" * 65, "ext_version": "0.1.0"}, # cap is 64 + headers={"Authorization": f"Bearer {bearer}"}, + ) + assert s == 422 + + def test_rejects_oversized_ext_version(self, bearer): + s, _ = _post( + "/v1/integrations/claude-ai/extension/telemetry", + body={"category": "x", "ext_version": "a" * 33}, # cap is 32 + headers={"Authorization": f"Bearer {bearer}"}, + ) + assert s == 422 + + +class TestIdempotentApproval: + """Approving the same pairing code twice in quick succession (e.g. + user double-clicked the Approve button) must not break the flow.""" + + def test_double_approve_is_safe(self): + s, pair = _post("/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "double approve"}) + if s != 201: + pytest.skip("pair not available") + code = pair["pairing_code"] + + # Fire 5 concurrent approves of the same code. + def approve(): + return _post( + "/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": code}, + ) + + with concurrent.futures.ThreadPoolExecutor(max_workers=5) as ex: + results = list(ex.map(lambda _: approve(), range(5))) + + # All should return 204 — idempotent. + codes = [r[0] for r in results] + assert codes.count(204) == 5, f"double approval not idempotent: {results}" + + # The flow should still work: status poll redeems exactly once. + _, status = _get( + f"/v1/integrations/claude-ai/extension/pair/status?pairing_token={pair['pairing_token']}" + ) + assert status["extension_token"], "approval still works after multi-click" + + +class TestAuditLogPrivacy: + """The audit log MUST never store raw tokens or bearer values. + Defense in depth: even if a SQL injection elsewhere exposed audit + rows, no credentials should be recoverable.""" + + def test_audit_details_contain_no_token_hashes(self): + s, pair = _post("/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "privacy audit"}) + if s != 201: + pytest.skip("pair not available") + _post("/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": pair["pairing_code"]}) + _, status = _get( + f"/v1/integrations/claude-ai/extension/pair/status?pairing_token={pair['pairing_token']}" + ) + + # Look at the audit feed for this integration. + _, events = _get( + f"/v1/integrations/claude-ai/activity?integration_id={pair['integration_id']}" + ) + for event in events: + blob = json.dumps(event).lower() + assert pair["pairing_token"].lower() not in blob, ( + f"pairing_token leaked into audit event {event['event']}" + ) + if status.get("extension_token"): + assert status["extension_token"].lower() not in blob, ( + f"extension_token leaked into audit event {event['event']}" + ) + + +class TestRequireExtensionEdgeCases: + """The bearer auth dependency must handle a variety of malformed inputs + without 500-ing.""" + + def test_empty_authorization_header(self): + s, body = _get( + "/v1/integrations/claude-ai/extension/operations", + headers={"Authorization": ""}, + ) + assert s == 401 + + def test_only_word_bearer_no_token(self): + s, _ = _get( + "/v1/integrations/claude-ai/extension/operations", + headers={"Authorization": "Bearer"}, + ) + assert s == 401 + + def test_bearer_with_only_whitespace(self): + s, _ = _get( + "/v1/integrations/claude-ai/extension/operations", + headers={"Authorization": "Bearer "}, + ) + assert s == 401 + + def test_lowercase_bearer_keyword(self): + # Should still parse — case-insensitive on the keyword. + s, body = _get( + "/v1/integrations/claude-ai/extension/operations", + headers={"Authorization": "bearer no-such-token"}, + ) + # 401 INVALID_EXTENSION_TOKEN (not MISSING_BEARER_TOKEN — we + # parsed the keyword but the token doesn't match anything). + assert s == 401 + assert body["error"]["code"] == "INVALID_EXTENSION_TOKEN" + + +class TestImportedSkillSecurity: + """The inbound import endpoint runs SKILL.md validation via the same + bundle_validator that protects local uploads. Specific attack + vectors to verify are blocked.""" + + @pytest.fixture + def bearer(self): + s, pair = _post("/v1/integrations/claude-ai/extension/pair", + body={"browser_label": "import security"}) + if s != 201: + pytest.skip("pair not available") + _post("/v1/integrations/claude-ai/pair/approve", + body={"pairing_code": pair["pairing_code"]}) + _, status = _get( + f"/v1/integrations/claude-ai/extension/pair/status?pairing_token={pair['pairing_token']}" + ) + return status["extension_token"] + + def _upload_zip(self, bearer, zip_bytes, name="x", ca_id=None): + ca_id = ca_id or f"skill_sec_{uuid.uuid4().hex[:6]}" + boundary = "----b-" + uuid.uuid4().hex + parts = [] + for k, v in [ + ("claude_ai_skill_id", ca_id), + ("name", name), + ("description", "security test"), + ]: + parts.append(f"--{boundary}\r\n".encode()) + parts.append(f'Content-Disposition: form-data; name="{k}"\r\n\r\n'.encode()) + parts.append(v.encode() + b"\r\n") + parts.append(f"--{boundary}\r\n".encode()) + parts.append( + b'Content-Disposition: form-data; name="bundle"; filename="x.zip"\r\n' + b'Content-Type: application/zip\r\n\r\n' + ) + parts.append(zip_bytes) + parts.append(f"\r\n--{boundary}--\r\n".encode()) + req = urllib.request.Request( + f"{BASE}/v1/integrations/claude-ai/extension/imported-skill", + method="POST", data=b"".join(parts), + headers={ + "Authorization": f"Bearer {bearer}", + "Content-Type": f"multipart/form-data; boundary={boundary}", + }, + ) + try: + with urllib.request.urlopen(req) as r: + return r.status, json.loads(r.read().decode()) + except urllib.error.HTTPError as e: + return e.code, json.loads(e.read().decode()) + + def test_rejects_empty_bundle(self, bearer): + s, body = self._upload_zip(bearer, b"") + assert s == 422 + assert body["error"]["code"] in ("EMPTY_BUNDLE", "INVALID_ZIP", "INVALID_BUNDLE") + + def test_rejects_path_traversal(self, bearer): + # ZIP with a SKILL.md entry that escapes the parent directory. + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + zf.writestr( + "../../../etc/passwd-skill/SKILL.md", + "---\nname: x\ndescription: y\n---\n\n# x\n", + ) + s, body = self._upload_zip(bearer, buf.getvalue()) + assert s == 422, f"path-traversal should be rejected, got {s} {body}" + + def test_rejects_reserved_word_in_name(self, bearer): + # Reserved words 'anthropic' and 'claude' must be blocked even + # via the inbound path. + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + zf.writestr( + "claude-evil/SKILL.md", + "---\nname: claude-evil\ndescription: reserved\n---\n\n# x\n", + ) + s, body = self._upload_zip(bearer, buf.getvalue()) + assert s == 422 diff --git a/backend/tests/unit/test_claude_ai_perf.py b/backend/tests/unit/test_claude_ai_perf.py new file mode 100644 index 00000000..e3d15e7b --- /dev/null +++ b/backend/tests/unit/test_claude_ai_perf.py @@ -0,0 +1,116 @@ +"""Performance & query-shape tests. + +Catches N+1 regressions and over-fetching. Doesn't assert latency +(too flaky in CI); instead asserts on query count or row-fetch shape. +""" +from __future__ import annotations + +import uuid as _uuid + +import pytest +from sqlalchemy import event + +from app.db.models import Skill +from app.db.models.claude_ai import ( + ClaudeAIIntegration, + ClaudeAISkillLink, + ClaudeAISyncOperation, +) +from app.services.claude_ai_sync import ( + bulk_integration_counters, + integration_counters, +) + + +@pytest.fixture +def ten_integrations(db_session): + """Create 10 integrations, each with 5 links + 3 ops, so the + counters have non-trivial values to roll up.""" + rows = [] + for i in range(10): + integ = ClaudeAIIntegration( + status="active", scope="both", conflict_policy="ask", + browser_label=f"perf-{i}", + ) + db_session.add(integ) + db_session.flush() + for j in range(5): + db_session.add( + ClaudeAISkillLink( + integration_id=integ.id, + claude_ai_skill_id=f"skill_perf_{i}_{j}", + ) + ) + for s in ("pending", "in_progress", "failed"): + db_session.add( + ClaudeAISyncOperation( + integration_id=integ.id, + kind="list", + status=s, + ) + ) + rows.append(integ) + db_session.commit() + return rows + + +class TestBulkCountersAvoidsNPlus1: + """The bulk-fetch helper should issue exactly 2 queries regardless + of how many integrations are passed in. + + Before the optimization, list_integrations issued 3*N queries + (one set per integration). With bulk_integration_counters, two + GROUP-BY queries cover all N.""" + + def test_bulk_returns_correct_counts(self, db_session, ten_integrations): + ids = [i.id for i in ten_integrations] + result = bulk_integration_counters(db_session, ids) + assert len(result) == 10 + for i in ten_integrations: + row = result[i.id] + assert row["linked_skill_count"] == 5 + # 1 pending + 1 in_progress = 2 in the "pending" bucket + # (in_progress is in-flight work, displayed as pending). + assert row["pending_op_count"] == 2 + assert row["failed_op_count"] == 1 + + def test_bulk_query_count(self, db_session, ten_integrations): + """Count actual SQL queries via event hook. Must be O(1) not O(N).""" + engine = db_session.get_bind() + executed: list[str] = [] + + def _before_cursor_execute(conn, cursor, statement, *_): + # Only count statements that touch our tables. + if "claude_ai_skill_links" in statement or "claude_ai_sync_operations" in statement: + executed.append(statement) + + event.listen(engine, "before_cursor_execute", _before_cursor_execute) + try: + bulk_integration_counters(db_session, [i.id for i in ten_integrations]) + finally: + event.remove(engine, "before_cursor_execute", _before_cursor_execute) + + # 2 queries: one for ops, one for links. NOT 20. + assert len(executed) == 2, ( + f"bulk counters issued {len(executed)} queries (expected 2). " + f"This is an N+1 regression. Queries:\n" + "\n".join(executed) + ) + + def test_single_call_helper_remains_correct(self, db_session, ten_integrations): + """The original integration_counters helper still works for + single-row callers (kept as a backwards-compatible alias).""" + result = integration_counters(db_session, ten_integrations[0].id) + assert result["linked_skill_count"] == 5 + assert result["pending_op_count"] == 2 + assert result["failed_op_count"] == 1 + + def test_bulk_empty_input_returns_empty(self, db_session): + assert bulk_integration_counters(db_session, []) == {} + + def test_bulk_missing_integration_gets_zeros(self, db_session, ten_integrations): + """Pass an ID that has no ops + no links. Should return zero counts, + not crash.""" + result = bulk_integration_counters(db_session, [_uuid.uuid4()]) + assert len(result) == 1 + for k, v in next(iter(result.values())).items(): + assert v == 0, f"expected zero {k}, got {v}" diff --git a/backend/tests/unit/test_claude_ai_polish.py b/backend/tests/unit/test_claude_ai_polish.py new file mode 100644 index 00000000..22f1fe54 --- /dev/null +++ b/backend/tests/unit/test_claude_ai_polish.py @@ -0,0 +1,262 @@ +"""Unit tests for the polish layer (audit log, rate limit, per-skill toggle). + +The polish layer (0020) adds three load-bearing capabilities on top of the +core connector: + + 1. Audit log — append-only event feed for the in-product activity page + AND forensic trail for admins. + 2. Pair-endpoint rate limit — defeats brute-force code enumeration. + 3. Per-skill sync toggle — granular opt-out per skill. +""" +from __future__ import annotations + +import os +import uuid as _uuid +from datetime import datetime, timedelta, timezone + +import pytest +from sqlalchemy import select + +from app.db.models.claude_ai import ClaudeAIIntegration +from app.db.models.claude_ai_polish import ( + ClaudeAIAuditLog, + ClaudeAIPairAttempt, +) +from app.services.claude_ai_sync import ( + PairRateLimitExceeded, + query_audit, + record_pair_attempt, + write_audit, +) + + +@pytest.fixture +def integration(db_session): + integ = ClaudeAIIntegration(status="active", scope="both", conflict_policy="ask") + db_session.add(integ) + db_session.commit() + db_session.refresh(integ) + yield integ + + +# ── Audit log ───────────────────────────────────────────────────────────────── + + +class TestWriteAudit: + def test_basic_event(self, db_session, integration): + write_audit(db_session, event="pair_started", integration_id=integration.id) + db_session.commit() + row = db_session.execute( + select(ClaudeAIAuditLog).where( + ClaudeAIAuditLog.integration_id == integration.id + ) + ).scalar_one() + assert row.event == "pair_started" + assert row.detail == {} + + def test_with_detail_and_source_ip(self, db_session, integration): + write_audit( + db_session, + event="pair_started", + integration_id=integration.id, + detail={"browser_label": "Chrome on Mac"}, + source_ip="192.168.1.1", + ) + db_session.commit() + row = db_session.execute( + select(ClaudeAIAuditLog).where( + ClaudeAIAuditLog.integration_id == integration.id + ) + ).scalar_one() + assert row.detail == {"browser_label": "Chrome on Mac"} + # SQLAlchemy returns INET as ipaddress.IPv4Address — comparison + # is value-equal but type-strict, so coerce both sides to str. + assert str(row.source_ip) == "192.168.1.1" + + def test_invalid_event_rejected_by_check_constraint(self, db_session): + # DB CHECK constraint protects against typo'd event strings making + # it past the application layer. + from sqlalchemy.exc import IntegrityError + write_audit(db_session, event="bogus_event") + with pytest.raises(IntegrityError, match="ck_claude_ai_audit_log_event"): + db_session.commit() + db_session.rollback() + + def test_skill_id_set_null_on_skill_delete(self, db_session, integration): + """When a skill is deleted, audit rows referencing it should be + SET NULL (not cascade-deleted) — historical events stay visible + but no longer point at a dangling skill ID.""" + from app.db.models import Skill + skill = Skill( + id=_uuid.uuid4(), + name=f"polish-{_uuid.uuid4().hex[:6]}", + slug=f"polish-{_uuid.uuid4().hex[:6]}", + description="audit cascade test", + content_md="", + current_version=0, + ) + db_session.add(skill) + db_session.flush() + write_audit( + db_session, + event="skill_pushed", + integration_id=integration.id, + skill_id=skill.id, + ) + db_session.commit() + audit_id = db_session.execute( + select(ClaudeAIAuditLog.id).where( + ClaudeAIAuditLog.skill_id == skill.id + ) + ).scalar_one() + + # Delete the skill. + db_session.delete(skill) + db_session.commit() + + # Audit row must still exist but skill_id should be NULL. + row = db_session.execute( + select(ClaudeAIAuditLog).where(ClaudeAIAuditLog.id == audit_id) + ).scalar_one_or_none() + assert row is not None, "audit log row should survive skill deletion" + assert row.skill_id is None, "skill_id should SET NULL on cascade" + + +class TestQueryAudit: + def test_returns_most_recent_first(self, db_session, integration): + # Insert 3 events; query should return newest first. + from datetime import datetime, timezone + base = datetime.now(timezone.utc) + for offset, kind in [(0, "pair_started"), (1, "pair_approved"), (2, "pair_redeemed")]: + row = ClaudeAIAuditLog( + integration_id=integration.id, + event=kind, + created_at=base + timedelta(seconds=offset), + ) + db_session.add(row) + db_session.commit() + + results = query_audit(db_session, integration_id=integration.id) + assert len(results) >= 3 + # Newest first. + events = [r.event for r in results] + assert events.index("pair_redeemed") < events.index("pair_approved") < events.index("pair_started") + + def test_filter_by_event(self, db_session, integration): + for kind in ("pair_started", "pair_approved", "skill_pushed"): + db_session.add( + ClaudeAIAuditLog(integration_id=integration.id, event=kind) + ) + db_session.commit() + only_pair = query_audit(db_session, integration_id=integration.id, event="pair_started") + assert all(r.event == "pair_started" for r in only_pair) + + def test_limit_caps_at_500(self, db_session, integration): + """Defense against UI bug requesting a million rows.""" + results = query_audit(db_session, integration_id=integration.id, limit=1_000_000) + # Just check the query doesn't crash and the limit clamp works + assert isinstance(results, list) + + +# ── Rate limiting ───────────────────────────────────────────────────────────── + + +@pytest.mark.skipif( + os.environ.get("SKILLNOTE_DISABLE_PAIR_RATE_LIMIT") == "1", + reason="rate-limit assertions require the limiter to be active", +) +class TestRateLimit: + """Rate-limit tests use uuid-suffixed IPs to isolate from any + persisted state in the shared DB. Each test's IP is unique to that + test invocation.""" + + def _ip(self) -> str: + # Synthesize a TEST-NET-1 IP that's unique per test invocation. + # 192.0.2.0/24 is reserved for documentation/test, so we never + # collide with anything real. + import random + return f"192.0.2.{random.randint(1, 254)}" + + def test_below_threshold_succeeds(self, db_session): + ip = self._ip() + for _ in range(5): + record_pair_attempt(db_session, source_ip=ip, endpoint="pair") + db_session.flush() + + def test_no_ip_does_not_enforce(self, db_session): + for _ in range(200): + record_pair_attempt(db_session, source_ip=None, endpoint="pair") + db_session.flush() + + def test_breaches_at_threshold(self, db_session): + ip = self._ip() + # Flush within the loop so the SELECT counter sees each insert. + for _ in range(60): + record_pair_attempt(db_session, source_ip=ip, endpoint="pair") + db_session.flush() + with pytest.raises(PairRateLimitExceeded): + record_pair_attempt(db_session, source_ip=ip, endpoint="pair") + + def test_other_ip_not_affected(self, db_session): + ip_a, ip_b = self._ip(), self._ip() + # Sanity: ensure distinct ips (random.randint can collide). + while ip_a == ip_b: + ip_b = self._ip() + for _ in range(60): + record_pair_attempt(db_session, source_ip=ip_a, endpoint="pair") + db_session.flush() + # ip_b is fresh — even though ip_a is exhausted, ip_b can still pair. + record_pair_attempt(db_session, source_ip=ip_b, endpoint="pair") + + def test_window_slides(self, db_session): + """Old attempts shouldn't count. Insert 60 attempts with a + timestamp 2 minutes ago, then verify a new attempt succeeds.""" + ip = self._ip() + old = datetime.now(timezone.utc) - timedelta(minutes=2) + for _ in range(60): + db_session.add( + ClaudeAIPairAttempt( + source_ip=ip, + endpoint="pair", + created_at=old, + ) + ) + db_session.flush() + # Should succeed — those 60 attempts are outside the window. + record_pair_attempt(db_session, source_ip=ip, endpoint="pair") + + +# ── Per-skill sync toggle ───────────────────────────────────────────────────── + + +class TestSkillSyncToggle: + def test_default_enabled(self, db_session): + from app.db.models import Skill + skill = Skill( + id=_uuid.uuid4(), + name=f"toggle-{_uuid.uuid4().hex[:6]}", + slug=f"toggle-{_uuid.uuid4().hex[:6]}", + description="toggle test", + content_md="", + current_version=0, + ) + db_session.add(skill) + db_session.commit() + db_session.refresh(skill) + assert skill.claude_ai_sync_enabled is True + + def test_can_be_disabled(self, db_session): + from app.db.models import Skill + skill = Skill( + id=_uuid.uuid4(), + name=f"toggle2-{_uuid.uuid4().hex[:6]}", + slug=f"toggle2-{_uuid.uuid4().hex[:6]}", + description="toggle test", + content_md="", + current_version=0, + claude_ai_sync_enabled=False, + ) + db_session.add(skill) + db_session.commit() + db_session.refresh(skill) + assert skill.claude_ai_sync_enabled is False diff --git a/backend/tests/unit/test_claude_ai_schemas.py b/backend/tests/unit/test_claude_ai_schemas.py new file mode 100644 index 00000000..cc1d3354 --- /dev/null +++ b/backend/tests/unit/test_claude_ai_schemas.py @@ -0,0 +1,125 @@ +"""Schema-level validation tests for app.schemas.claude_ai. + +Validates that Pydantic enforces the same literals as the DB CHECK +constraints, so a typo at the call site fails fast as a 422 instead of +becoming a bad-state row. +""" +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from app.schemas.claude_ai import ( + ConflictResolveRequest, + ImportedSkillRequest, + IntegrationPatchRequest, + IntegrationStatusResponse, + PairingApproveRequest, + PairingStartRequest, + SyncOperationCompleteRequest, + SyncOperationOut, +) + + +class TestPairingStart: + def test_label_optional(self): + r = PairingStartRequest() + assert r.browser_label is None + + def test_label_max_length(self): + with pytest.raises(ValidationError): + PairingStartRequest(browser_label="a" * 129) + + def test_label_at_max_length(self): + # 128 is the documented cap; should be accepted. + r = PairingStartRequest(browser_label="a" * 128) + assert len(r.browser_label) == 128 + + +class TestPairingApprove: + def test_minimum_length(self): + with pytest.raises(ValidationError): + PairingApproveRequest(pairing_code="abc") # below min 4 + + def test_maximum_length(self): + with pytest.raises(ValidationError): + PairingApproveRequest(pairing_code="a" * 17) # above max 16 + + def test_valid_length(self): + r = PairingApproveRequest(pairing_code="ABCDEF") + assert r.pairing_code == "ABCDEF" + + +class TestIntegrationPatch: + def test_scope_literal(self): + IntegrationPatchRequest(scope="personal") + IntegrationPatchRequest(scope="organization") + IntegrationPatchRequest(scope="both") + with pytest.raises(ValidationError): + IntegrationPatchRequest(scope="bogus") + + def test_conflict_policy_literal(self): + IntegrationPatchRequest(conflict_policy="ask") + IntegrationPatchRequest(conflict_policy="skillnote_wins") + IntegrationPatchRequest(conflict_policy="claude_ai_wins") + with pytest.raises(ValidationError): + IntegrationPatchRequest(conflict_policy="undecided") + + +class TestSyncOperationComplete: + def test_success_minimal(self): + SyncOperationCompleteRequest(success=True) + + def test_failure_with_error(self): + SyncOperationCompleteRequest(success=False, error="something broke") + + def test_error_max_length(self): + # Defense-in-depth cap: extensions could otherwise dump arbitrarily + # long error blobs into the integration's last_error column. + with pytest.raises(ValidationError): + SyncOperationCompleteRequest(success=False, error="x" * 2001) + + def test_result_accepts_dict(self): + r = SyncOperationCompleteRequest( + success=True, + result={"claude_ai_skill_id": "skill_01", "claude_ai_version": "v1"}, + ) + assert r.result["claude_ai_skill_id"] == "skill_01" + + +class TestConflictResolve: + def test_three_resolutions(self): + for res in ("keep_skillnote", "keep_claude_ai", "skip"): + ConflictResolveRequest(resolution=res) + + def test_invalid_resolution(self): + with pytest.raises(ValidationError): + ConflictResolveRequest(resolution="merge") + + +class TestImportedSkill: + def test_name_max_length(self): + # Anthropic's skill name cap is 64 chars (mirrored on our side). + # Pydantic should reject longer. + with pytest.raises(ValidationError): + ImportedSkillRequest( + claude_ai_skill_id="skill_01", + name="a" * 65, + description="ok", + ) + + def test_description_max_length(self): + with pytest.raises(ValidationError): + ImportedSkillRequest( + claude_ai_skill_id="skill_01", + name="ok", + description="x" * 1025, + ) + + def test_at_cap_succeeds(self): + # 64 / 1024 should be accepted, not rejected. + ImportedSkillRequest( + claude_ai_skill_id="skill_01", + name="a" * 64, + description="x" * 1024, + ) diff --git a/backend/tests/unit/test_claude_ai_service.py b/backend/tests/unit/test_claude_ai_service.py new file mode 100644 index 00000000..9c7f5dd2 --- /dev/null +++ b/backend/tests/unit/test_claude_ai_service.py @@ -0,0 +1,506 @@ +"""Unit tests for app.services.claude_ai_sync. + +Covers token generation/hashing/verification, pairing-flow helpers, and the +sync-op enqueue helpers including coalescing. Uses the real DB through the +shared db_session fixture for end-to-end realism; service helpers without DB +contact are exercised directly without a session. +""" +from __future__ import annotations + +import re +from datetime import datetime, timedelta, timezone + +import pytest +from sqlalchemy import select + +from app.db.models.claude_ai import ( + ClaudeAIIntegration, + ClaudeAISkillLink, + ClaudeAISyncOperation, +) +from app.services.claude_ai_sync import ( + active_integrations_for_sync, + enqueue_periodic_list, + enqueue_skill_delete, + enqueue_skill_upload, + find_integration_by_extension_token, + find_pending_pairing_by_code, + find_pending_pairing_by_token, + generate_pairing_code, + generate_token, + hash_token, + integration_counters, + pairing_expiry, + verify_token, +) + + +# ── Token primitives ────────────────────────────────────────────────────────── + + +class TestPairingCode: + """Pairing codes are user-typed; they must avoid visually ambiguous glyphs.""" + + def test_length_is_six(self): + assert len(generate_pairing_code()) == 6 + + def test_only_uppercase_alphanumerics_minus_confusing(self): + # Generate many to cover the alphabet probabilistically. + codes = [generate_pairing_code() for _ in range(200)] + for c in codes: + assert re.match(r"^[A-Z2-9]+$", c), f"unexpected glyph in {c!r}" + # Explicitly verify the confusable glyphs never appear. + assert "0" not in c + assert "O" not in c + assert "1" not in c + assert "I" not in c + assert "L" not in c + + def test_codes_are_random(self): + # 200 codes from a 31-glyph alphabet of length 6 should have very low + # duplicate rate. Birthday-paradox math says expected collisions are + # ~200^2 / (2 * 31^6) ≈ 0.000045 — effectively never. + codes = {generate_pairing_code() for _ in range(200)} + assert len(codes) >= 198, f"low uniqueness: {len(codes)}/200" + + +class TestExtensionToken: + """Long bearer tokens used by the extension.""" + + def test_token_length_is_substantial(self): + # 32 random bytes -> ~43 chars of urlsafe-base64. + t = generate_token() + assert len(t) >= 40 + + def test_tokens_are_unique(self): + s = {generate_token() for _ in range(100)} + assert len(s) == 100, "token generation collision" + + def test_token_only_urlsafe(self): + # urlsafe-base64 alphabet is A-Z, a-z, 0-9, -, _ (and = padding). + t = generate_token() + assert re.match(r"^[A-Za-z0-9_\-=]+$", t) + + +class TestTokenHashing: + def test_hash_is_deterministic(self): + h1 = hash_token("hello") + h2 = hash_token("hello") + assert h1 == h2 + + def test_hash_is_64_hex_chars(self): + # sha256 = 256 bits = 64 hex chars. + assert re.match(r"^[0-9a-f]{64}$", hash_token("anything")) + + def test_hashes_differ_for_different_inputs(self): + assert hash_token("a") != hash_token("b") + + def test_hash_one_way(self): + # Defense-in-depth: ensure the hash function doesn't accidentally + # leak the original (e.g. via base64 encoding). + h = hash_token("super-secret-token-do-not-leak") + assert "secret" not in h + assert "super" not in h + + +class TestVerifyToken: + def test_verify_true_for_matching(self): + raw = generate_token() + assert verify_token(raw, hash_token(raw)) + + def test_verify_false_for_mismatch(self): + assert not verify_token("not-the-token", hash_token("real-token")) + + def test_verify_empty_strings_safe(self): + # Should not raise; just returns False. + assert verify_token("", hash_token("real")) is False + assert verify_token("real", hash_token("")) is False + + +class TestPairingExpiry: + def test_expiry_is_future(self): + e = pairing_expiry() + delta = e - datetime.now(timezone.utc) + # Implementation says 10 min. Bound both ways so a regression to + # 1-second or 100-day expiry is caught. + assert timedelta(minutes=9) < delta < timedelta(minutes=11) + + +# ── DB-backed lookups ───────────────────────────────────────────────────────── + + +@pytest.fixture +def pending_integration(db_session): + """Create a fresh pending_approval integration and yield (row, raw_pairing_token).""" + raw = generate_token() + integ = ClaudeAIIntegration( + status="pending_approval", + scope="both", + browser_label="pytest pending", + pairing_code=generate_pairing_code(), + pairing_token_hash=hash_token(raw), + pairing_expires_at=pairing_expiry(), + conflict_policy="ask", + ) + db_session.add(integ) + db_session.commit() + db_session.refresh(integ) + yield integ, raw + # db_session fixture rolls back; nothing to clean up. + + +@pytest.fixture +def active_integration(db_session): + """Create an active integration and yield (row, raw_extension_token).""" + raw_ext = generate_token() + integ = ClaudeAIIntegration( + status="active", + scope="both", + browser_label="pytest active", + extension_token_hash=hash_token(raw_ext), + conflict_policy="ask", + ) + db_session.add(integ) + db_session.commit() + db_session.refresh(integ) + yield integ, raw_ext + + +class TestPendingLookups: + def test_by_code_finds_only_pending(self, db_session, pending_integration): + integ, _ = pending_integration + found = find_pending_pairing_by_code(db_session, integ.pairing_code) + assert found is not None + assert found.id == integ.id + + def test_by_code_is_uppercase_tolerant(self, db_session, pending_integration): + integ, _ = pending_integration + # Should normalize input — user types in mixed case sometimes. + found = find_pending_pairing_by_code(db_session, integ.pairing_code.lower()) + assert found is not None + assert found.id == integ.id + + def test_by_code_returns_none_for_unknown(self, db_session): + assert find_pending_pairing_by_code(db_session, "ZZZZZZ") is None + + def test_by_code_returns_none_for_empty(self, db_session): + assert find_pending_pairing_by_code(db_session, "") is None + + def test_by_code_ignores_active_rows(self, db_session, active_integration): + # Active integrations have pairing_code=NULL, but verify the status + # filter independently — set a code temporarily without changing status. + integ, _ = active_integration + integ.pairing_code = "TEST99" + db_session.commit() + # Status is 'active', not 'pending_approval' → not findable. + assert find_pending_pairing_by_code(db_session, "TEST99") is None + + def test_by_token_hashes_input(self, db_session, pending_integration): + integ, raw = pending_integration + found = find_pending_pairing_by_token(db_session, raw) + assert found is not None + assert found.id == integ.id + + def test_by_token_does_not_match_hash_value(self, db_session, pending_integration): + integ, raw = pending_integration + # Sending the already-hashed value should NOT match — would indicate + # a double-hash bug where the function applied hash to an already-hashed string. + assert find_pending_pairing_by_token(db_session, hash_token(raw)) is None + + def test_by_token_returns_none_for_empty(self, db_session): + assert find_pending_pairing_by_token(db_session, "") is None + + +class TestBearerLookup: + def test_by_extension_token(self, db_session, active_integration): + integ, raw = active_integration + found = find_integration_by_extension_token(db_session, raw) + assert found is not None + assert found.id == integ.id + + def test_returns_none_for_unknown(self, db_session): + assert find_integration_by_extension_token(db_session, "garbage") is None + + def test_returns_none_for_disconnected(self, db_session, active_integration): + integ, raw = active_integration + integ.status = "disconnected" + db_session.commit() + assert find_integration_by_extension_token(db_session, raw) is None + + +# ── active_integrations_for_sync ────────────────────────────────────────────── + + +class TestActiveIntegrations: + def test_includes_active(self, db_session, active_integration): + integ, _ = active_integration + active = active_integrations_for_sync(db_session) + assert any(a.id == integ.id for a in active) + + def test_includes_cookie_expired(self, db_session, active_integration): + """Cookie-expired integrations still receive ops (they'll drain + when the user re-logs in to claude.ai).""" + integ, _ = active_integration + integ.status = "cookie_expired" + db_session.commit() + active = active_integrations_for_sync(db_session) + assert any(a.id == integ.id for a in active) + + def test_excludes_disconnected(self, db_session, active_integration): + integ, _ = active_integration + integ.status = "disconnected" + db_session.commit() + active = active_integrations_for_sync(db_session) + assert not any(a.id == integ.id for a in active) + + def test_excludes_pending(self, db_session, pending_integration): + integ, _ = pending_integration + active = active_integrations_for_sync(db_session) + assert not any(a.id == integ.id for a in active) + + +# ── Enqueue helpers ─────────────────────────────────────────────────────────── + + +@pytest.fixture +def real_skill(db_session): + """Create a Skill + one SkillContentVersion so enqueue tests have a target.""" + import uuid as _uuid + from app.db.models import Skill, SkillContentVersion + + skill = Skill( + id=_uuid.uuid4(), + name=f"test-{_uuid.uuid4().hex[:6]}", + slug=f"test-{_uuid.uuid4().hex[:6]}", + description="A test skill", + content_md="# Test\n", + current_version=1, + ) + db_session.add(skill) + db_session.flush() + cv = SkillContentVersion( + id=_uuid.uuid4(), + skill_id=skill.id, + version=1, + title=skill.name, + description=skill.description, + content_md=skill.content_md, + is_latest=True, + ) + db_session.add(cv) + db_session.commit() + yield skill, cv + # Rolled back by db_session fixture. + + +class TestEnqueueSkillUpload: + def test_creates_one_op_per_active_integration( + self, db_session, active_integration, real_skill + ): + integ, _ = active_integration + skill, cv = real_skill + ops = enqueue_skill_upload( + db_session, + skill_id=skill.id, + version_id=cv.id, + name=skill.name, + description=skill.description, + ) + db_session.commit() + assert len(ops) >= 1 + # The one for our active integration should be present. + target = [op for op in ops if op.integration_id == integ.id] + assert len(target) == 1 + assert target[0].kind == "upload" + assert target[0].payload["version_id"] == str(cv.id) + assert target[0].payload["name"] == skill.name + + def test_no_op_for_disconnected(self, db_session, active_integration, real_skill): + integ, _ = active_integration + integ.status = "disconnected" + db_session.commit() + skill, cv = real_skill + # Pass empty integrations to isolate from any other rows. + ops = enqueue_skill_upload( + db_session, + skill_id=skill.id, + version_id=cv.id, + name=skill.name, + description=skill.description, + integrations=[integ], + ) + db_session.commit() + assert ops == [] + + def test_coalesces_repeated_calls( + self, db_session, active_integration, real_skill + ): + """Rapid republishes should not pile up the queue. The second + call must update the existing pending op's payload, not create a + new row.""" + integ, _ = active_integration + skill, cv = real_skill + enqueue_skill_upload( + db_session, + skill_id=skill.id, + version_id=cv.id, + name=skill.name, + description="first version description", + integrations=[integ], + ) + db_session.commit() + # Same skill, different description (simulating a republish). + ops2 = enqueue_skill_upload( + db_session, + skill_id=skill.id, + version_id=cv.id, + name=skill.name, + description="updated description", + integrations=[integ], + ) + db_session.commit() + # No new op created. + assert ops2 == [] + # The existing pending op now has the updated payload. + pending = db_session.execute( + select(ClaudeAISyncOperation) + .where(ClaudeAISyncOperation.integration_id == integ.id) + .where(ClaudeAISyncOperation.skill_id == skill.id) + .where(ClaudeAISyncOperation.status == "pending") + ).scalars().all() + assert len(pending) == 1 + assert pending[0].payload["description"] == "updated description" + + def test_creates_new_op_after_previous_completed( + self, db_session, active_integration, real_skill + ): + """Once an upload finishes, a new publish must enqueue a fresh op + (the coalesce window is bounded by 'pending' or 'in_progress').""" + integ, _ = active_integration + skill, cv = real_skill + ops = enqueue_skill_upload( + db_session, skill_id=skill.id, version_id=cv.id, + name=skill.name, description="v1", integrations=[integ], + ) + db_session.commit() + assert len(ops) == 1 + ops[0].status = "completed" + db_session.commit() + + ops2 = enqueue_skill_upload( + db_session, skill_id=skill.id, version_id=cv.id, + name=skill.name, description="v2", integrations=[integ], + ) + db_session.commit() + assert len(ops2) == 1 + + +class TestEnqueueSkillDelete: + def test_skips_unlinked_skills(self, db_session, active_integration, real_skill): + """If the skill was never synced to claude.ai (no link row), no + delete op is needed.""" + integ, _ = active_integration + skill, _ = real_skill + ops = enqueue_skill_delete( + db_session, skill_id=skill.id, integrations=[integ] + ) + db_session.commit() + assert ops == [] + + def test_creates_op_for_linked_skill( + self, db_session, active_integration, real_skill + ): + integ, _ = active_integration + skill, _ = real_skill + # Create a link so the delete enqueue has something to target. + link = ClaudeAISkillLink( + integration_id=integ.id, + skillnote_skill_id=skill.id, + claude_ai_skill_id="skill_ext_01ABCDEF", + direction="outbound", + ) + db_session.add(link) + db_session.commit() + + ops = enqueue_skill_delete(db_session, skill_id=skill.id) + db_session.commit() + # We don't filter by integration here — should match the linked one. + assert any( + op.integration_id == integ.id and op.kind == "delete" + for op in ops + ) + # Op payload carries the claude.ai-side ID. + for op in ops: + if op.integration_id == integ.id: + assert op.payload["claude_ai_skill_id"] == "skill_ext_01ABCDEF" + + +class TestEnqueuePeriodicList: + def test_creates_one_list_op_per_active( + self, db_session, active_integration + ): + integ, _ = active_integration + ops = enqueue_periodic_list(db_session, [integ]) + db_session.commit() + assert len(ops) == 1 + assert ops[0].kind == "list" + assert ops[0].integration_id == integ.id + + def test_coalesces_against_pending( + self, db_session, active_integration + ): + integ, _ = active_integration + enqueue_periodic_list(db_session, [integ]) + db_session.commit() + ops2 = enqueue_periodic_list(db_session, [integ]) + db_session.commit() + assert ops2 == [], "second tick should not double-enqueue" + + +# ── Counters ────────────────────────────────────────────────────────────────── + + +class TestIntegrationCounters: + def test_zero_state(self, db_session, active_integration): + integ, _ = active_integration + c = integration_counters(db_session, integ.id) + assert c == { + "pending_op_count": 0, + "failed_op_count": 0, + "linked_skill_count": 0, + } + + def test_counts_pending_and_failed(self, db_session, active_integration): + integ, _ = active_integration + db_session.add( + ClaudeAISyncOperation(integration_id=integ.id, kind="list", status="pending") + ) + db_session.add( + ClaudeAISyncOperation(integration_id=integ.id, kind="list", status="in_progress") + ) + db_session.add( + ClaudeAISyncOperation(integration_id=integ.id, kind="list", status="failed") + ) + db_session.add( + ClaudeAISyncOperation(integration_id=integ.id, kind="list", status="completed") + ) + db_session.commit() + c = integration_counters(db_session, integ.id) + assert c["pending_op_count"] == 2 # pending + in_progress + assert c["failed_op_count"] == 1 + # Completed ops aren't counted in either bucket — by design (they're + # historical, not action items). + + def test_counts_links(self, db_session, active_integration, real_skill): + integ, _ = active_integration + skill, _ = real_skill + db_session.add( + ClaudeAISkillLink( + integration_id=integ.id, + skillnote_skill_id=skill.id, + claude_ai_skill_id="skill_ext_link_1", + ) + ) + db_session.commit() + c = integration_counters(db_session, integ.id) + assert c["linked_skill_count"] == 1 diff --git a/cli/src/__tests__/connect-claude-ai.test.ts b/cli/src/__tests__/connect-claude-ai.test.ts new file mode 100644 index 00000000..00f9b2b7 --- /dev/null +++ b/cli/src/__tests__/connect-claude-ai.test.ts @@ -0,0 +1,29 @@ +import { describe, expect, it } from 'vitest' + +// Phase 6 — the `claude-ai` agent must be wired into the connect command's +// allowlist alongside `claude-code` and `openclaw`. These tests guard against +// a regression that drops it from the SUPPORTED_AGENTS tuple. + +describe('connect command — claude-ai agent', () => { + it('SUPPORTED_AGENTS includes claude-ai', async () => { + const { SUPPORTED_AGENTS } = await import('../commands/connect.js') + expect(SUPPORTED_AGENTS).toContain('claude-ai') + }) + + it('SUPPORTED_AGENTS still includes claude-code and openclaw', async () => { + // Regression guard — adding claude-ai must not have replaced the others. + const { SUPPORTED_AGENTS } = await import('../commands/connect.js') + expect(SUPPORTED_AGENTS).toContain('claude-code') + expect(SUPPORTED_AGENTS).toContain('openclaw') + }) + + it('SUPPORTED_AGENTS is a frozen tuple (readonly)', async () => { + // The tuple is declared `as const`; assigning to it should be a TS error. + // At runtime it's still a plain array, so this test just verifies the + // shape is preserved (3 known names, no surprises). + const { SUPPORTED_AGENTS } = await import('../commands/connect.js') + expect(SUPPORTED_AGENTS).toHaveLength(3) + const names = new Set(SUPPORTED_AGENTS) + expect(names).toEqual(new Set(['claude-code', 'openclaw', 'claude-ai'])) + }) +}) diff --git a/cli/src/commands/connect.ts b/cli/src/commands/connect.ts index 50eedfd5..33e0bf3e 100644 --- a/cli/src/commands/connect.ts +++ b/cli/src/commands/connect.ts @@ -5,7 +5,7 @@ import { UserFacingError, prettyError } from '../ui/errors.js' import { c } from '../ui/theme.js' // Supported agent identifiers, matching the backend's /setup/agent dispatcher. -export const SUPPORTED_AGENTS = ['claude-code', 'openclaw'] as const +export const SUPPORTED_AGENTS = ['claude-code', 'openclaw', 'claude-ai'] as const export type SupportedAgent = (typeof SUPPORTED_AGENTS)[number] export interface ConnectOptions { @@ -15,6 +15,7 @@ export interface ConnectOptions { const displayNames: Record = { 'claude-code': 'Claude Code', openclaw: 'OpenClaw', + 'claude-ai': 'claude.ai (browser)', } export async function connectCommand(agent: string, _opts: ConnectOptions = {}): Promise { @@ -85,6 +86,17 @@ export async function connectCommand(agent: string, _opts: ConnectOptions = {}): ) } else if (agent === 'openclaw') { log.info('Restart OpenClaw to pick up the SkillNote skill.') + } else if (agent === 'claude-ai') { + log.info( + [ + 'Next:', + ' 1. Install the SkillNote browser extension from the Chrome Web Store', + ' (or load extensions/claude-ai/dist as unpacked in dev mode)', + ` 2. Paste this SkillNote URL into the extension: ${apiBase}`, + ' 3. Approve the pairing code in SkillNote', + ' 4. Sign in to claude.ai (the extension reads the session cookies)', + ].join('\n'), + ) } outro(`${c.ok('Done.')} Run ${c.brand('skillnote status')} to see active agents.`) diff --git a/docs/claude-ai-admin-runbook.md b/docs/claude-ai-admin-runbook.md new file mode 100644 index 00000000..3ca6cc3f --- /dev/null +++ b/docs/claude-ai-admin-runbook.md @@ -0,0 +1,217 @@ +# Claude.ai Connector — Admin Runbook + +Operational reference for SkillNote administrators running the claude.ai +connector in production. + +## Health check + +`GET /v1/integrations/claude-ai/health` returns the connector's +operational metrics: + +```json +{ + "integrations_active": 12, + "integrations_with_errors": 0, + "pending_ops_total": 3, + "failed_ops_total": 0, + "diverged_links_total": 1, + "last_audit_at": "2026-05-24T11:30:00Z", + "schema_version": "0020_claude_ai_polish" +} +``` + +The same data renders on the **Settings → claude.ai** page's +**Connector health** card. + +### What to monitor + +| Metric | Healthy | Warning | Bad | +|---|---|---|---| +| `integrations_with_errors` | 0 | — | ≥1 | +| `failed_ops_total` | 0 | — | ≥1 | +| `pending_ops_total` | <10 | 10–50 | >50 | +| `diverged_links_total` | 0 | ≥1 | — | +| `schema_version` | matches deployed code's expected head | drift | drift | + +Wire `health` into your existing observability stack (Prometheus +exporter, periodic curl + alerting) the same way you'd watch any other +SkillNote endpoint. + +## Activity feed (audit log) + +Every load-bearing event lands in `claude_ai_audit_log`. Query via: + +- **In-product** — Settings → claude.ai → View all activity. +- **API** — `GET /v1/integrations/claude-ai/activity?integration_id=…&event=…&limit=…` +- **SQL** — direct queries against `claude_ai_audit_log`. + +Event types (mirrored from `0020_claude_ai_polish.py`): + +| Event | Trigger | Detail payload | +|---|---|---| +| `pair_started` | `POST /extension/pair` | `{ browser_label }` | +| `pair_approved` | `POST /pair/approve` | `{}` | +| `pair_redeemed` | first `/pair/status` poll after approval | `{}` | +| `pair_expired` | (reserved — not yet emitted; scheduled cleanup) | `{}` | +| `integration_disconnected` | `DELETE /integrations/{id}` | `{ browser_label }` | +| `integration_updated` | `PATCH /integrations/{id}` (reserved) | `{}` | +| `skill_pushed` | extension reports successful upload/update op | `{ op_kind, result }` | +| `skill_imported` | extension reports successful list/import op | `{ op_kind, result }` | +| `skill_delete_pushed` | extension reports successful delete op | `{ op_kind, result }` | +| `op_failed` | op exhausted retry budget | `{ op_kind, attempts, error }` | +| `conflict_detected` | (reserved — Phase 4b conflict auto-detection) | `{}` | +| `conflict_resolved` | (reserved — `/conflicts/{id}/resolve`) | `{ resolution }` | +| `endpoint_changed` | extension surfaces a 404 from claude.ai | `{ message }` | +| `token_revoked` | (reserved) | `{}` | + +## Rate limiting + +The pair endpoint (`POST /extension/pair`) is rate-limited per source IP +to **60 attempts per minute**. Brute-forcing a 6-char pairing code (31 +possible glyphs ≈ 887M combinations) is infeasible within the +10-minute pairing window even at 60 attempts/minute. + +Attempts are recorded in `claude_ai_pair_attempts`. To inspect: + +```sql +SELECT source_ip, COUNT(*) AS attempts +FROM claude_ai_pair_attempts +WHERE created_at > now() - interval '5 minutes' +GROUP BY source_ip +ORDER BY attempts DESC +LIMIT 20; +``` + +A repeated 429 response from a single IP is a signal worth investigating +(scripted enumeration attempt, or a misbehaving extension build). + +### Pruning old attempts + +The table is small but unbounded. Add a periodic job (cron / Postgres +`pg_cron` / external scheduler) to keep it lean: + +```sql +DELETE FROM claude_ai_pair_attempts +WHERE created_at < now() - interval '24 hours'; +``` + +Same with the audit log if you have retention requirements: + +```sql +DELETE FROM claude_ai_audit_log +WHERE created_at < now() - interval '90 days'; +``` + +(Audit retention defaults to forever — set policy explicitly if needed.) + +## Token security model + +- **Pairing tokens** and **extension tokens** are stored as + `sha256(token)` hex digests. Raw tokens are returned to the extension + exactly once (at issuance) and never persisted server-side. +- Bearer verification uses `hmac.compare_digest` for constant-time + comparison. +- `pairing_code` (the user-visible 6-char code) is stored in plaintext + because the short window + low entropy makes hashing pointless. The + *pairing_token* (the long opaque token the extension polls with) + guards the actual handshake. + +A database dump cannot replay sessions — at worst, an attacker with DB +access sees that integration X is paired, but cannot impersonate it. + +## Disconnect / kill-switch + +To revoke a single browser's access: + +- **From the SkillNote UI** — Settings → claude.ai → Disconnect. +- **By API** — `DELETE /v1/integrations/claude-ai/integrations/{id}`. + +To revoke ALL extension access (e.g. emergency response): + +```sql +UPDATE claude_ai_integrations +SET status = 'disconnected', extension_token_hash = NULL; +``` + +After this, every extension bearer fails with 403. Users must re-pair. + +## Backup / restore considerations + +The connector adds three tables (`claude_ai_integrations`, +`claude_ai_skill_links`, `claude_ai_sync_operations`) and two polish +tables (`claude_ai_audit_log`, `claude_ai_pair_attempts`) plus one +column on `skills` (`claude_ai_sync_enabled`). + +A point-in-time restore that rolls back past a pairing approval but +NOT the extension's token receipt would leave the extension holding a +token the server doesn't recognize. The extension handles this with a +401 response and prompts the user to re-pair. No data corruption — just +a re-pair friction event. + +## Schema migration history + +| Migration | Adds | +|---|---| +| `0019_claude_ai_integration` | core tables (integrations / links / ops) | +| `0020_claude_ai_polish` | audit log + rate-limit table + per-skill toggle column | + +Future schema changes should land as new migrations rather than amending +0019/0020. + +## Common operational issues + +### "All my pairings show pending_approval" + +Either the extension never finished its `/pair/status` poll (network +issue) or the user closed the approval tab before clicking Approve. The +records expire after 10 minutes; older rows can be safely deleted with: + +```sql +DELETE FROM claude_ai_integrations +WHERE status = 'pending_approval' + AND pairing_expires_at < now() - interval '1 day'; +``` + +### "Sync queue keeps growing" + +Indicates extensions can't reach SkillNote (the queue grows because the +extension's poll loop isn't draining it). Check: + +1. Extension's last_error in the integrations list. +2. SkillNote's reachability from the user's network. +3. claude.ai endpoint health — if Anthropic ships an internal-endpoint + change, ops fail and accumulate as `failed`. + +The retry budget per op is 3; after that the op is marked `failed` and +surfaces in the UI. Failed ops don't block new ops. + +### "Active integrations report `cookie_expired`" + +The user's claude.ai session lapsed. They need to sign back into +claude.ai — the extension detects the new session cookie via +`chrome.cookies.onChanged` and resumes sync automatically. No +re-pairing. + +### claude.ai endpoint contract drift + +If the extension reports `ClaudeAIEndpointChangedError` repeatedly, an +internal claude.ai endpoint was renamed. Steps: + +1. Verify locally with a fresh manual capture (devtools Network tab). +2. Update `extensions/claude-ai/src/lib/claude-ai-client.ts` with the + new path. +3. Bump the extension version, build, submit to Chrome Web Store + + Firefox AMO. +4. Users with auto-update enabled get the fix within hours. + +Doc the new contract in `docs/claude-ai-endpoints.md` for future +regressions. + +## See also + +- [User guide](claude-ai-user-guide.md) — what to tell users. +- [Architecture plan](claude-ai-integration.md) — full design rationale. +- [Endpoint contracts](claude-ai-endpoints.md) — provisional claude.ai + internal endpoint shapes. +- [Privacy policy](../extensions/claude-ai/PRIVACY.md) — what the + extension reads and where it sends data. diff --git a/docs/claude-ai-endpoints.md b/docs/claude-ai-endpoints.md new file mode 100644 index 00000000..97ae46ca --- /dev/null +++ b/docs/claude-ai-endpoints.md @@ -0,0 +1,198 @@ +# Claude.ai Internal Endpoints — Phase 0 Spike Document + +**Status**: Provisional — values below are based on community reverse-engineering +documented in `anthropics/claude-code` issues and third-party projects +(`claude-unofficial-api`, `unofficial-claude-api`). They MUST be re-verified +against a live claude.ai Team/Enterprise session before the Chrome extension +ships. Each verified contract should be updated here with a captured `curl` +example. + +**Last verified**: never. Marked `TODO: verify` throughout. + +## How to verify + +For each endpoint listed below: + +1. Log into claude.ai with a Team or Enterprise account in Chrome. +2. Open DevTools → Network → preserve log. +3. Perform the action manually (upload a skill, delete one, list them). +4. Find the matching XHR in the Network panel. +5. Right-click → Copy → Copy as cURL. +6. Strip the `sec-*` and `priority` headers, replace the session cookie with + `$COOKIE` env var, and replay. +7. If the replay succeeds and produces the same observable effect (skill + appears in Customize → Skills), record the verified shape below. + +## Authentication + +All endpoints below take session cookies — NOT `sk-ant-...` API keys. The +session cookie is set after login at `claude.ai/login` and lives in browser +storage under domain `.claude.ai`. + +| Cookie name | Type | Required | Notes | +|---|---|---|---| +| `sessionKey` | HttpOnly | Yes | `TODO: verify name` — community docs reference this name; could be `__Secure-sessionKey` in production | +| `_csrf_token` | Standard | Possibly | `TODO: verify` — claude.ai may require a CSRF token in `X-CSRF-Token` header for mutating requests | +| `lastActiveOrg` | Standard | Often | Used by the UI to pick a default org for unscoped requests | + +Browser extensions can read all of these (including HttpOnly) via the +`chrome.cookies` API. Bookmarklets and `document.cookie` cannot read HttpOnly +cookies — this is the load-bearing reason the integration uses an extension. + +## Organization skills endpoints (Team / Enterprise) + +### List org skills + +``` +GET /api/organizations/{org_id}/skills/list-org-skills +Cookie: sessionKey=... +``` + +**Response** (TODO: verify shape): +```json +{ + "skills": [ + { + "id": "skill_org_01ABCDEF...", + "name": "financial-analyzer", + "display_title": "Financial Analyzer", + "description": "...", + "version": "epoch_1716422400", + "created_at": "2026-05-20T10:30:00Z", + "updated_at": "2026-05-22T14:15:00Z", + "uploaded_by": { "user_id": "...", "email": "..." } + } + ] +} +``` + +### Upload org skill + +``` +POST /api/organizations/{org_id}/skills/upload-org-skill +Cookie: sessionKey=... +X-CSRF-Token: ... (TODO: verify if required) +Content-Type: multipart/form-data + +(form-data) +display_title: "Financial Analyzer" +files[]: @financial-analyzer.zip +``` + +Or possibly individual files: +``` +files[]: @financial-analyzer/SKILL.md;filename=financial-analyzer/SKILL.md +files[]: @financial-analyzer/scripts/extract.py;filename=financial-analyzer/scripts/extract.py +``` + +**Response** (TODO: verify): +```json +{ + "skill": { + "id": "skill_org_01...", + "name": "financial-analyzer", + "version": "epoch_...", + "display_title": "Financial Analyzer" + } +} +``` + +### Delete org skill + +``` +POST /api/organizations/{org_id}/skills/delete-org-skill +Cookie: sessionKey=... +X-CSRF-Token: ... +Content-Type: application/json + +{ "skill_id": "skill_org_01..." } +``` + +**Response**: `204 No Content` or `200` with `{ "deleted": true }` — TODO: verify. + +### Get / download skill bundle + +``` +GET /api/organizations/{org_id}/skills/{skill_id}/download (TODO: verify path) +Cookie: sessionKey=... +``` + +**Response**: `application/zip` blob with the skill folder inside (`SKILL.md` ++ bundled files). + +**Open question**: does claude.ai offer a per-skill download endpoint, or do +admins only see metadata via the web UI? If no download endpoint exists, the +reverse-sync path needs to capture the bundle some other way — possibly by +parsing the user's manual download from the UI. + +## Personal skills endpoints + +All endpoints above have a personal-account equivalent under `/api/account/`: + +| Org-scope path | Personal equivalent | +|---|---| +| `/api/organizations/{org_id}/skills/list-org-skills` | `/api/account/skills/list-skills` (TODO: verify) | +| `/api/organizations/{org_id}/skills/upload-org-skill` | `/api/account/skills/upload-skill` (TODO: verify) | +| `/api/organizations/{org_id}/skills/delete-org-skill` | `/api/account/skills/delete-skill` (TODO: verify) | +| `/api/organizations/{org_id}/skills/{id}/download` | `/api/account/skills/{id}/download` (TODO: verify) | + +The personal endpoints may instead live under `/api/users/{user_id}/skills/` +— this is the second most likely path based on Anthropic's naming +conventions seen in published API surfaces. + +## Discovering the user's org_id + +Two known mechanisms: + +1. **GET /api/organizations** — returns the list of orgs the user belongs to. + Pick the first / mark as active. +2. **Session cookie `lastActiveOrg`** — set by the UI when the user switches + orgs. Read by the extension to keep the integration scoped to whatever org + the user is currently working in. + +The extension should call `GET /api/organizations` once at first sync and +cache the result. Subsequent syncs should re-check on every Nth poll (or on +session-cookie change) to catch org switches. + +## Anti-automation considerations + +Anthropic may apply some or all of these defenses: + +- **User-Agent inspection** — requests not coming from a real browser may be + flagged. Extension requests carry the user's real browser UA, so this is + not a concern for us; it would be a concern for a server-side proxy. +- **Origin/Referer enforcement** — extension content scripts run in the + page context with `Origin: https://claude.ai`, naturally satisfying any + cross-origin checks. The background service worker doesn't have a page + origin; for those requests we send `fetch` with `credentials: "include"` + and let chrome attach cookies. +- **CSRF tokens** — many SaaS apps require a CSRF token on mutating + requests. If claude.ai does, we need to extract it from a known location + (response header on initial page load, or a meta tag in the HTML). The + extension can fetch `claude.ai/` once and parse it out. +- **Rate limits** — TBD. Extension should backoff exponentially on 429. + +## What if the endpoints are gated or changed + +If the spike reveals that: + +- **Endpoints require an Enterprise feature flag** not available to Team: + scope v1 to Enterprise only, surface clear error to Team users. +- **Endpoints require a CSRF token we can't easily extract**: add a content + script that runs on `claude.ai/*`, extracts the token from the page on + load, and ships it to the background service worker via `chrome.runtime` + messaging. +- **Endpoint paths differ from documented**: update this file. The Chrome + extension's `claude-ai-client.ts` is a single file; selector updates land + in minutes. +- **Endpoints are completely different shape (e.g. GraphQL)**: probably + means re-architecting `claude-ai-client.ts`. ~1 day of work. + +## Sources + +- Community reverse-engineering: [Explosion-Scratch/claude-unofficial-api](https://github.com/Explosion-Scratch/claude-unofficial-api/blob/main/DOCS.md) +- Feature requests describing the endpoints from a user perspective: + - [anthropics/claude-code#39929](https://github.com/anthropics/claude-code/issues/39929) + - [anthropics/claude-code#49530](https://github.com/anthropics/claude-code/issues/49530) (closed duplicate) + - [anthropics/claude-code#25771](https://github.com/anthropics/claude-code/issues/25771) (closed NOT_PLANNED) +- Anthropic's own [admin-settings/skills](https://claude.ai/admin-settings/skills) UI which calls these endpoints diff --git a/docs/claude-ai-integration.md b/docs/claude-ai-integration.md new file mode 100644 index 00000000..110fb168 --- /dev/null +++ b/docs/claude-ai-integration.md @@ -0,0 +1,551 @@ +# Claude.ai Connector — Integration Plan + +**Status**: Planned, awaiting Phase 0 spike +**Owner**: TBD +**Created**: 2026-05-24 + +## Context + +SkillNote currently supports Claude Code, Cursor, Codex, OpenClaw, OpenHands, and a universal target — all filesystem-based. The next surface is **claude.ai** (the web UI at claude.ai, also branded "Cowork" for Team/Enterprise). + +The goal is two-way sync of skills between a user's self-hosted SkillNote and their claude.ai account, covering **both personal skills and shared/organization skills**. The user experience target: parity with the existing Claude Code integration — install once, then skills appear and stay in sync automatically. + +## Decision: Chrome extension with cookie auth + direct internal API calls + +After extensive evaluation (see "Rejected alternatives" below), the chosen path is a browser extension that: + +1. Reads the user's claude.ai session cookies via Chrome's `chrome.cookies` API +2. Calls claude.ai's internal REST endpoints (`/api/organizations/{org_id}/skills/*`, `/api/account/.../skills/*`) directly with cookie auth +3. Polls the SkillNote backend for pending sync operations and executes them +4. Pulls claude.ai-authored skills back to SkillNote on a periodic cycle + +**Why this beats every other path**: + +- Cookies are inaccessible to bookmarklets (HttpOnly), CLIs (no browser session), and desktop apps without embedded webviews. Extensions are the only mechanism with first-class cookie access for non-engineering users. +- Direct REST calls (not DOM automation) means no fragility on UI redesigns — only contract changes break us. +- Full skill bundles (SKILL.md + scripts + assets) are preserved because we call the same upload endpoint the web UI itself uses. +- Self-hosting isolation preserved: skill content flows **user's SkillNote → user's browser → user's claude.ai**. SkillNote-project never touches the data; only ships the open-source extension binary. + +### Locked decisions (with rationale) + +| # | Decision | Choice | Rationale | +|---|---|---|---| +| 1 | Scope (personal / org / both) | **Both, org first** | Org skills are higher business value (Team/Enterprise users), better documented endpoints. Personal in v1.1. | +| 2 | Per-skill sync opt-in | **Yes — toggle per skill in SkillNote** | Some skills are dev-only and shouldn't leak to claude.ai. | +| 3 | Conflict policy default | **Ask each time**, with per-integration override | Teams want control on first conflict; defaults can be set later. | +| 4 | Sync direction default | **Bidirectional** | Matches Claude Code mental model. Restrictable in options. | +| 5 | Plan tier coverage | **All paid tiers** (Pro/Max/Team/Enterprise) | Detect from claude.ai API response. Free users get clear error. | +| 6 | Extension brand | **"SkillNote"** | Aligns with main product. | +| 7 | Self-hosted URL protocol | **HTTPS required**, with `localhost` / `*.local` exception | Mixed-content from HTTPS extension to HTTP backend fails in modern browsers anyway. | +| 8 | Extension source code | **Open-source, MIT** | Matches SkillNote's backend posture. Lets users audit cookie usage (the sensitive permission). | + +### Rejected alternatives (and why) + +- **MCP server + MCP Apps** — Tools alone can't carry skill bundles with bash-executable scripts. Resources can carry ZIPs but skills end up under Connectors, not in the Skills section. +- **Plugin marketplace via GitHub** — Cowork restricts marketplace sources to github.com private repos; user data routing through any SkillNote-project-hosted GitHub bridge violates self-hosting isolation. +- **Anthropic API workspace (`/v1/skills`)** — Different surface; workspace skills are not synced to personal claude.ai accounts. +- **Cloud storage bridge (Google Drive)** — Functional but skills appear as Drive files, not in Customize → Skills. Read-only into chat, no true bidirectional sync. +- **Desktop app with embedded webview** — Asks users to switch from their browser to a separate app. Larger install surface, no advantage over extension for the cookie-access problem. +- **CLI / local daemon** — Cookie capture impractical for non-engineering users (devtools paste or build OAuth-style capture flow). The browser already holds the cookies; an extension is the right home for code that uses them. +- **Manual ZIP export** — Not sync, just better export. Useful as a fallback only. + +## Architecture overview + +``` +┌─────────────────────────┐ ┌──────────────────────┐ +│ SkillNote backend │ │ Chrome extension │ +│ (self-hosted) │ │ (in user's browser) │ +│ │ │ │ +│ - skills table │◀─── REST ───────▶│ - background worker │ +│ - sync_operations │ (extension │ - cookie reader │ +│ - claude_ai_links │ token auth) │ - claude.ai client │ +│ │ │ - skillnote client │ +└─────────────────────────┘ └──────────┬───────────┘ + │ + │ cookies + REST + ▼ + ┌──────────────────────┐ + │ claude.ai │ + │ │ + │ /api/organizations/ │ + │ {id}/skills/... │ + │ /api/account/ │ + │ skills/... │ + └──────────────────────┘ +``` + +Three actors, clear responsibilities: + +- **SkillNote backend** is the source of truth. It enqueues sync operations whenever skills change. +- **Extension** is the messenger. It reads cookies, executes operations against claude.ai, reports back, and runs a reverse-sync poll. +- **claude.ai** is the destination/source. It exposes internal REST endpoints (no official API) that the extension calls with the user's session. + +The data path **user's SkillNote → user's browser → user's claude.ai** never touches SkillNote-project infrastructure. + +--- + +## Component 1 — SkillNote backend + +### Database schema (Alembic migration 0011) + +**`claude_ai_integrations`** — one row per paired browser/extension + +| Column | Type | Notes | +|---|---|---| +| `id` | UUID PK | | +| `user_id` | FK, nullable | Populated when ACL ships | +| `extension_token` | TEXT | Hashed at rest | +| `claude_ai_org_id` | TEXT | Discovered from claude.ai on first sync | +| `scope` | ENUM | `personal` \| `organization` \| `both` | +| `status` | ENUM | `active` \| `cookie_expired` \| `disconnected` \| `error` | +| `browser_label` | TEXT | "Chrome on MacBook Pro" (for the UI list) | +| `last_sync_at` | TIMESTAMP | | +| `last_error` | TEXT | nullable | +| `created_at` / `updated_at` | TIMESTAMP | | + +**`claude_ai_skill_links`** — mapping between SkillNote skills and claude.ai skills + +| Column | Type | Notes | +|---|---|---| +| `id` | UUID PK | | +| `integration_id` | FK | | +| `skillnote_skill_id` | FK, nullable | Nullable for claude.ai-authored skills awaiting import | +| `skillnote_version_id` | FK | Last version pushed to claude.ai | +| `claude_ai_skill_id` | TEXT | claude.ai's internal skill ID | +| `claude_ai_version` | TEXT | claude.ai's version identifier | +| `last_seen_at` | TIMESTAMP | | +| `direction` | ENUM | `outbound` \| `inbound` \| `both` | +| `conflict_state` | ENUM | `none` \| `diverged` \| `resolved` | + +**`claude_ai_sync_operations`** — the work queue the extension drains + +| Column | Type | Notes | +|---|---|---| +| `id` | UUID PK | | +| `integration_id` | FK | | +| `kind` | ENUM | `upload` \| `update` \| `delete` \| `list` \| `fetch_one` | +| `skill_id` | FK, nullable | Nullable for `list` operations | +| `payload` | JSONB | Op-specific: ZIP URL, target IDs, etc. | +| `status` | ENUM | `pending` \| `in_progress` \| `completed` \| `failed` | +| `attempts` | INT | | +| `last_error` | TEXT | | +| `created_at` / `completed_at` | TIMESTAMP | | + +### New API endpoints + +All under `/v1/integrations/claude-ai/`. Backend module: `backend/app/api/claude_ai_integration.py`. + +| Method | Path | Purpose | +|---|---|---| +| `POST` | `/extension/pair` | Begin pairing — return 6-digit code | +| `POST` | `/extension/redeem` | Extension exchanges approved pairing code for token | +| `GET` | `/status` | Status panel data (sync count, errors, last activity) | +| `GET` | `/extension/operations` | Extension polls for pending ops | +| `POST` | `/extension/operations/{id}/complete` | Extension reports success/failure | +| `POST` | `/extension/imported-skill` | Reverse-sync: extension uploads claude.ai-authored skill | +| `GET` | `/extension/list-known-skills` | Extension fetches claude.ai skill IDs for diffing | +| `DELETE` | `/integrations/{id}` | User disconnects a browser | +| `PATCH` | `/integrations/{id}` | Update scope/conflict policy for a specific browser | + +### Event hooks in existing skill flow + +In `backend/app/api/skills.py`, the existing publish / update / delete endpoints emit sync events: + +- Skill publish (new version) → enqueue `upload` or `update` op for each active integration with `direction ∈ {outbound, both}` +- Skill delete → enqueue `delete` op +- Integration `connect` → enqueue initial `list` + reconcile ops +- Periodic timer (15 min, APScheduler) → enqueue `list` op for every active integration (catches claude.ai-side authoring) + +### Bundle compatibility check + +Existing `LocalBundleStorage` produces standard SKILL.md + bundled-files ZIPs. **Phase 0 spike must verify** claude.ai's upload endpoint accepts this exact format, or we add a thin transform. + +### Extension pairing flow (auth model) + +The user never pastes a token. The flow: + +1. User opens extension options → pastes SkillNote URL (the only manual entry) +2. Extension calls `POST /v1/integrations/claude-ai/extension/pair` → SkillNote returns `{ pairing_code: "ABC123", pairing_url: "https://skillnote.acme/pair?code=ABC123" }` +3. Extension opens `pairing_url` in a new tab — user lands in SkillNote (signing in if not) +4. SkillNote shows: "A SkillNote browser extension wants to connect. Code: `ABC123`. Approve?" +5. User clicks Approve → pairing is marked approved server-side +6. Extension (polling `redeem`) gets back its long-lived extension token +7. Extension stores token in `chrome.storage.local` + +Pattern matches Spotify Connect, Plex device pairing, Zoom desktop. Zero tokens visible to the user. + +--- + +## Component 2 — Chrome extension + +**Repo location**: `extensions/claude-ai/` as a sibling to existing `cli/` and `plugin/` directories. + +### File structure + +``` +extensions/claude-ai/ +├── manifest.json +├── public/icons/ (16/32/48/128 px) +├── src/ +│ ├── background/ +│ │ ├── index.ts service worker entry +│ │ ├── sync-engine.ts the loop: poll → execute → report +│ │ ├── cookie-watcher.ts chrome.cookies.onChanged listener +│ │ └── alarm.ts chrome.alarms periodic ticks +│ ├── lib/ +│ │ ├── claude-ai-client.ts REST client + cookie auth +│ │ ├── skillnote-client.ts REST client + extension token auth +│ │ └── types.ts shared Operation, Skill, etc. +│ ├── popup/ toolbar status panel +│ │ ├── popup.html +│ │ ├── popup.tsx +│ │ └── popup.css +│ ├── options/ full-page settings +│ │ ├── options.html +│ │ ├── options.tsx +│ │ └── options.css +│ └── shared/ +│ └── storage.ts chrome.storage wrapper +├── package.json +├── tsconfig.json +└── vite.config.ts builds to /dist for Web Store +``` + +### Manifest (Manifest V3) + +```json +{ + "manifest_version": 3, + "name": "SkillNote", + "version": "0.1.0", + "description": "Sync your SkillNote skills to claude.ai automatically", + "permissions": ["cookies", "storage", "alarms", "notifications"], + "host_permissions": ["https://claude.ai/*", "https://claude.com/*"], + "optional_host_permissions": ["http://*/*", "https://*/*"], + "background": { "service_worker": "background/index.js", "type": "module" }, + "action": { "default_popup": "popup/popup.html" }, + "options_page": "options/options.html", + "icons": { "16": "icons/16.png", "48": "icons/48.png", "128": "icons/128.png" } +} +``` + +`optional_host_permissions` lets the user grant access to their SkillNote URL (arbitrary host). Prompted on first paste. + +### Cookie capture + +Chrome's `chrome.cookies.get` reads HttpOnly cookies, which is the load-bearing capability: + +```ts +const sessionCookie = await chrome.cookies.get({ + url: "https://claude.ai", + name: "sessionKey", // exact name to be verified in Phase 0 spike +}); +if (!sessionCookie) throw new NotLoggedInError(); +``` + +`chrome.cookies.onChanged` provides realtime login/logout detection: + +```ts +chrome.cookies.onChanged.addListener(({ cookie, removed }) => { + if (cookie.domain.includes("claude.ai") && cookie.name === "sessionKey") { + if (removed) pauseSync(); + else resumeSync(); + } +}); +``` + +### Claude.ai REST client (contract TBD in Phase 0) + +Provisional interface based on community reverse-engineering: + +```ts +class ClaudeAIClient { + async getOrgId(): Promise; // from /api/organizations or session + async listOrgSkills(orgId): Promise; // GET /api/organizations/{orgId}/skills/list-org-skills + async uploadOrgSkill(orgId, zip, name, desc): Promise; // POST /api/organizations/{orgId}/skills/upload-org-skill + async deleteOrgSkill(orgId, skillId): Promise; // POST /api/organizations/{orgId}/skills/delete-org-skill + async downloadSkillBundle(orgId, skillId): Promise; // path TBD + + // Personal-skill parallel set + async listPersonalSkills(): Promise; + async uploadPersonalSkill(zip, name, desc): Promise; + async deletePersonalSkill(skillId): Promise; +} +``` + +**Unknowns the Phase 0 spike must resolve:** + +- Exact session cookie name(s) +- Whether CSRF tokens are required beyond the session cookie +- Exact request format for upload (`multipart/form-data` vs JSON-with-base64) +- Exact response shapes from each endpoint +- Personal skill endpoint paths +- How to fetch a skill's full bundle (with bundled files) for reverse sync +- Session token lifetime +- Rate-limit behavior + +### Sync engine + +```ts +async function tick() { + if (!await isConfigured()) return; + + const ops = await skillnoteClient.fetchOperations(); + + for (const op of ops) { + try { + switch (op.kind) { + case "upload": { + const zip = await skillnoteClient.downloadSkillZip(op.skill_id, op.version); + const result = await claudeAI.uploadOrgSkill(orgId, zip, op.name, op.description); + await skillnoteClient.completeOp(op.id, { claude_ai_skill_id: result.skill_id, version: result.version }); + break; + } + case "delete": { + await claudeAI.deleteOrgSkill(orgId, op.payload.claude_ai_skill_id); + await skillnoteClient.completeOp(op.id); + break; + } + case "list": { + // Reverse sync + const remoteSkills = await claudeAI.listOrgSkills(orgId); + const knownIds = await skillnoteClient.listKnownClaudeAIIds(); + for (const remote of remoteSkills) { + if (!knownIds.includes(remote.id)) { + const bundle = await claudeAI.downloadSkillBundle(orgId, remote.id); + await skillnoteClient.importSkill(bundle, remote); + } + } + break; + } + } + } catch (err) { + if (err instanceof NotLoggedInError) { await pauseAndNotify(); return; } + await skillnoteClient.completeOp(op.id, { error: err.message }); + } + } +} + +chrome.alarms.create("sync", { periodInMinutes: 1 }); +chrome.alarms.onAlarm.addListener(tick); +``` + +### Extension UI + +**Popup** (toolbar click, ~300×400px): + +``` +┌──────────────────────────────────────┐ +│ SkillNote ⚙ │ +├──────────────────────────────────────┤ +│ ✓ Connected to │ +│ skillnote.acme.com │ +│ │ +│ ✓ Logged in to claude.ai │ +│ │ +│ Synced 12 skills · last 30s ago │ +│ │ +│ Recent activity: │ +│ • pdf-extractor → claude.ai │ +│ • financial-analyzer ← claude.ai │ +│ • slack-summary → claude.ai │ +│ │ +│ [Sync now] [Open SkillNote] │ +└──────────────────────────────────────┘ +``` + +**Options page** — full-page settings: + +- SkillNote URL field (with "Test connection" button) +- Pair status / Unpair button +- Sync scope checkboxes: personal skills / org skills +- Conflict policy radio: ask each time (default) / SkillNote wins / claude.ai wins +- Direction checkboxes: push to claude.ai / pull from claude.ai +- Telemetry opt-in (default off until v1.1) +- Open-source attribution + +**Notifications** (OS-level via `chrome.notifications`): + +- "Sign in to claude.ai to keep syncing" (cookie expired) +- "Skill conflict: `pdf-extractor` changed on both sides" (with "Review" action) +- "Sync failed: endpoint changed. Update extension." (with "Open Web Store") + +--- + +## Component 3 — SkillNote frontend additions + +### New settings page + +Path: `src/app/(app)/settings/integrations/claude-ai/page.tsx` + +Sections: + +1. **Intro & install** — heading, brief description, "Install for Chrome" / "Install for Firefox" buttons linking to extension store listings. +2. **Connected browsers** — list (multiple browsers can pair to the same SkillNote). Each shows: browser label, last sync, status pill, "Disconnect" button. +3. **Default settings** — fallback policy used when a new browser pairs: default scope, default conflict policy, default direction. Overridable per browser. +4. **Activity log** — recent sync events (last 24h / 7d / 30d) with timestamps, skill names, direction, success/failure. + +### Per-skill UI + +Modify `src/components/skills/skill-detail.tsx`: + +- Small badge next to skill title: + - `✓ Synced to claude.ai` (green) — last sync successful + - `⏳ Syncing` (amber, animated) + - `⚠ Conflict` (orange) — both sides changed; click opens resolution + - `✗ Sync failed` (red) — click shows error +- Hover reveals: timestamp, claude.ai skill ID, last error if any +- Per-skill "Sync to claude.ai" toggle in skill settings (off by default for safety; user opts in per skill, matches decision #2) + +### Conflict resolution UI + +When `conflict_state = "diverged"`: + +- Side-by-side diff of SKILL.md + bundled file lists +- Three buttons: **Keep SkillNote** / **Keep claude.ai** / **Skip for now** +- "Keep both" creates a new SkillNote skill with `-from-claude-ai` suffix (escape hatch) + +### CLI command + +Add to `cli/src/commands/connect.ts`'s `SUPPORTED_AGENTS`: + +```typescript +export const SUPPORTED_AGENTS = ['claude-code', 'openclaw', 'claude-ai'] as const +``` + +The install script served at `/setup/agent?agent=claude-ai`: + +1. Detects user's browser +2. Opens the Chrome Web Store / Firefox AMO listing +3. Prints: "After install, click the SkillNote extension icon and paste this URL: `https://skillnote.acme/`" +4. Optional `--pair` flag triggers the SkillNote pairing approval page immediately + +This mirrors existing `claude-code` / `openclaw` UX. + +--- + +## Phase 0 — Discovery spike (1 week, must precede all other work) + +Before any production code, validate the technical foundation. Without this, every later phase risks being built on wrong assumptions. + +### Spike deliverables + +A one-page document in `docs/claude-ai-endpoints.md` containing verified curl examples for: + +- `GET /api/organizations` (or wherever org_id comes from) +- `GET /api/organizations/{org_id}/skills/list-org-skills` +- `POST /api/organizations/{org_id}/skills/upload-org-skill` +- `POST /api/organizations/{org_id}/skills/delete-org-skill` +- Skill-bundle download (path TBD) +- Personal-skill equivalents + +For each: request method, full path, required headers (including any CSRF), request body shape, response shape, observed status codes, error formats. + +### Validation steps + +1. Log into claude.ai (Team or Enterprise account) +2. Use devtools Network tab to capture actual requests made when: + - Uploading a skill manually via Customize → Skills + - Deleting a skill + - Loading the Skills list page + - Downloading a skill (if claude.ai offers that) +3. Replay each captured request via curl with copied cookies +4. Verify: does the replayed upload appear in the user's Skills section identically to manual upload? Are bundled `scripts/` directories intact? +5. Stress-test: upload 10 sequential, observe rate limiting +6. Wait 24h, retry: does the session cookie still work? When does it expire? + +### Risks the spike must surface + +- **CSRF requirement**: claude.ai likely sends a CSRF token alongside the session cookie. Need to know how to obtain and rotate. +- **Endpoint name drift**: community-reverse-engineered names may be stale by May 2026. +- **Personal vs org endpoint divergence**: paths and payload formats may differ in ways not yet documented. +- **Anti-automation**: claude.ai may inspect User-Agent, request timing, or other fingerprints. If so, extension must mimic browser-origin requests carefully. + +### Exit criteria + +The spike concludes successfully when: + +- All four core operations (list/upload/delete/download) work via replayed curl +- A skill uploaded via curl appears in the Skills section, with full bundle intact +- Session cookie lifetime is documented +- Any CSRF/anti-automation requirements are documented + +If exit criteria can't be met (e.g., Anthropic ships hard anti-automation), we re-plan. Possible fallback at that point: build the cloud storage bridge (Drive) for v1 instead. + +--- + +## Phase plan & estimates + +| Phase | Work | Duration | Sequencing | +|---|---|---|---| +| 0 | Discovery spike: verify endpoints, payload formats, auth | 1 week | Must precede all | +| 1 | Backend: migration, models, sync queue, API endpoints, event hooks | 1.5 weeks | After Phase 0 | +| 2 | Extension MVP: scaffold, manifest, cookie reader, claude.ai client (push-only) | 2 weeks | After Phase 1 contracts | +| 3 | Extension reverse sync: list, download, import to SkillNote | 1 week | After Phase 2 | +| 4 | Conflict detection + resolution UI (SkillNote frontend) | 1 week | Parallel with Phase 3 | +| 5 | SkillNote settings page + per-skill badges + activity log | 1 week | Parallel with Phase 3-4 | +| 6 | CLI `connect claude-ai` command + install script | 3 days | After Phase 2 | +| 7 | Polish: error messages, telemetry, notifications, Firefox port | 1 week | After Phase 5 | +| 8 | Chrome Web Store + Firefox AMO submission + review wait | 1 week (calendar) | After Phase 7 | + +**Total**: ~9 weeks to public beta. +**MVP demoable internally**: after Phase 3 (~5.5 weeks). + +--- + +## Open risks + +1. **Anthropic changes the internal endpoints.** Real, especially after our extension is in the wild. Mitigation: anonymized telemetry on 4xx responses, fast extension auto-update via Chrome Web Store, version pinning per claude.ai release. Worst case: extension stops working until selectors/contracts updated and pushed (typically <24h). + +2. **Anthropic detects and blocks non-browser-origin requests.** Mitigation: extension calls happen from inside the user's browser context, so requests carry normal browser fingerprint. Lower risk than CLI or headless approaches. + +3. **Session cookie rotation is more aggressive than expected.** Mitigation: extension handles 401s gracefully, notifies user to re-login. Adds friction but doesn't break the feature. + +4. **Chrome Web Store rejects the listing** because `cookies` permission scrutiny is tightening. Mitigation: clear listing copy explaining the cookie use (same pattern as 1Password, Honey, Grammarly), open-source the code, link to source from listing. + +5. **Personal-skill endpoints are gated or have different shape** than org endpoints. Mitigation: ship org-only in v1, personal in v1.1 after additional spike. + +6. **Mixed-content (HTTPS extension → HTTP self-hosted SkillNote)** blocks extension users with HTTP-only deployments. Mitigation: extension warns at pair time; document HTTPS requirement; `localhost` exception for dev. + +7. **HARDENING_SPEC.md** in repo root suggests existing security review process — claude.ai integration should be added to that document before Phase 7 polish. + +## Definition of done (v1.0) + +- A user with self-hosted SkillNote and a paid claude.ai account can: + 1. Install the SkillNote extension from Chrome Web Store + 2. Paste their SkillNote URL once in extension options + 3. Approve the pairing in SkillNote (one click) + 4. See all currently-synced skills appear in their claude.ai Customize → Skills section within 2 minutes + 5. Publish a new skill in SkillNote → see it in claude.ai within 60 seconds + 6. Author a skill in claude.ai → see it imported into SkillNote within 15 minutes (next reverse-sync poll) + 7. Edit a skill on both sides → see conflict UI in SkillNote with clear resolution options + 8. Disconnect cleanly → no orphaned state + +Plus: + +- Open-source extension source on GitHub under MIT +- Privacy policy documenting cookie use +- Settings page in SkillNote showing all paired browsers with status +- Per-skill sync toggle (default off; user opts in) +- HARDENING_SPEC.md updated with claude.ai integration considerations + +## Out of scope for v1.0 + +- Mobile claude.ai (no extensions on mobile browsers) +- Claude Desktop sync (separate filesystem-based mechanism; revisit later) +- Org-admin bulk management UI (admin still uses claude.ai's admin-settings page for org-level provisioning of SkillNote-synced skills) +- Real-time push (we poll; webhook from SkillNote to extension would require persistent connection — defer) +- Multi-org-per-extension (one extension = one paired SkillNote = one claude.ai account; users with multiple claude.ai orgs install in separate browser profiles) + +## References + +- Anthropic feature requests (informing the "no official API" decision): + - [anthropics/claude-code#25771](https://github.com/anthropics/claude-code/issues/25771) — closed NOT_PLANNED + - [anthropics/claude-code#49530](https://github.com/anthropics/claude-code/issues/49530) — closed duplicate + - [anthropics/claude-code#39929](https://github.com/anthropics/claude-code/issues/39929) — open +- claude.ai admin docs: `https://support.claude.com/en/articles/13119606-provision-and-manage-skills-for-your-organization` +- Connectors directory submission: `https://claude.com/docs/connectors/building/submission` +- Existing SkillNote agent adapter pattern: `cli/src/agents/` +- Existing connect/bridge primitive: `cli/src/commands/connect.ts`, `cli/src/commands/bridge.ts` +- Existing skill bundle pipeline: `backend/app/services/` +- Skill validation rules (mirrored frontend/backend): `src/lib/skill-validation.ts`, `backend/app/validators/skill_validator.py` diff --git a/docs/claude-ai-user-guide.md b/docs/claude-ai-user-guide.md new file mode 100644 index 00000000..2f1c310e --- /dev/null +++ b/docs/claude-ai-user-guide.md @@ -0,0 +1,139 @@ +# Claude.ai Sync — User Guide + +SkillNote can keep your skills in sync with your [claude.ai](https://claude.ai) +account so a skill you publish in SkillNote shows up in claude.ai's +**Customize → Skills** section automatically, and a skill you author +directly on claude.ai flows back into SkillNote. + +This guide walks you through setup. **One-time, ~60 seconds.** + +> **Requirements** +> +> - A self-hosted SkillNote instance reachable from your browser. +> - A paid claude.ai account (Pro, Max, Team, or Enterprise). +> - Chrome, Edge, Brave, Arc, or any Chromium browser (Firefox AMO version +> in beta). + +## Setup in three steps + +### 1. Install the SkillNote browser extension + +- **Chrome / Edge / Brave / Arc** — + [Chrome Web Store listing](https://chrome.google.com/webstore/category/extensions) + *(replace with real URL after submission)* +- **Firefox** — + [Firefox Add-ons listing](https://addons.mozilla.org/) *(beta)* +- **Local dev** — clone the repo, run `npm run build` in + `extensions/claude-ai/`, then load `dist/` as an unpacked extension at + `chrome://extensions`. + +### 2. Connect the extension to your SkillNote + +1. Click the SkillNote extension icon in your browser toolbar. +2. Click **Open settings** if it's not your first time, or just paste + directly: your SkillNote URL (e.g. `https://skillnote.acme.com`). +3. The extension prompts for permission to talk to that URL — click + **Allow**. +4. Click **Connect**. + +A new tab opens showing a 6-character pairing code. + +### 3. Approve the pairing in SkillNote + +The pairing page in SkillNote shows the same 6-character code that the +extension displayed. + +**Verify the codes match**, then click **Approve**. + +Within a second, the extension is connected. The new tab redirects you +to the claude.ai connector settings page, where you'll see your browser +listed under **Connected browsers**. + +## What happens next + +- Skills you publish or edit in SkillNote now appear in your + claude.ai **Customize → Skills** section within a minute. +- Skills you author directly in claude.ai are pulled back into SkillNote + on the next reverse-sync cycle (every ~15 minutes when claude.ai is + open in your browser). +- The extension reads your existing claude.ai session cookies — it + never asks for a separate API key. + +## Granular control + +### Per-skill sync toggle + +Some skills are dev-only or contain sensitive content you don't want on +claude.ai. On any skill's detail page, look for the +**Syncing to claude.ai** badge in the header. Click to toggle off — that +skill stops syncing immediately. Skills already pushed to claude.ai stay +there until you delete them; future updates simply stop firing. + +### Conflict resolution + +If you edit the same skill on both sides since the last sync, the +connector marks it **diverged** instead of guessing which version wins. +You'll see a **Conflicts** section on the connector settings page with +three options per skill: + +- **Keep SkillNote** — overwrites claude.ai with your SkillNote version. +- **Keep claude.ai** — overwrites SkillNote with the claude.ai version. +- **Skip** — clear the warning; you can resolve manually later. + +### Activity feed + +Every action the connector takes (pairings, pushes, imports, conflicts, +errors) is logged. Visit **Settings → claude.ai → View all activity** to +see the full history with search and filter. + +## Common issues + +### "Sign in to claude.ai to keep syncing" + +The extension lost your claude.ai session. Open +[claude.ai](https://claude.ai), sign back in, and the extension picks up +the new cookies automatically. No re-pairing needed. + +### Connection status shows "Error" + +Check the **Last error** message on the connector settings page. The most +common causes: + +- **claude.ai endpoint changed** — Anthropic redesigned an internal + endpoint. The extension auto-updates via the Chrome Web Store; if + Auto-update is disabled, manually update from + `chrome://extensions` → SkillNote → "Update." +- **SkillNote unreachable** — verify the URL in the extension's options + matches your SkillNote backend. + +### "Pairing code has expired" + +Pairing codes are valid for 10 minutes. Restart the pairing flow from +the extension's settings. + +### Disconnecting + +On the connector settings page, click **Disconnect** next to a browser. +This revokes the extension's bearer token. Skills already synced to +claude.ai stay there until you delete them individually — disconnect +does *not* sweep claude.ai's side. + +## Privacy + +The extension uses your browser's existing claude.ai session cookies +to authenticate requests **to claude.ai only**. Cookies never leave your +browser except as part of normal claude.ai requests. The SkillNote +project never sees your skill content — data flows +**your SkillNote → your browser → your claude.ai**, end to end. + +Full policy: [`extensions/claude-ai/PRIVACY.md`](../extensions/claude-ai/PRIVACY.md). + +## Architecture reference (for the curious) + +See [`docs/claude-ai-integration.md`](claude-ai-integration.md) for the +full design rationale: data model, sync queue, pairing handshake, +conflict detection, audit log, and rate limits. + +## Support + +Open an issue: . diff --git a/e2e/claude-ai-activity-pagination.spec.ts b/e2e/claude-ai-activity-pagination.spec.ts new file mode 100644 index 00000000..f8ad3bd4 --- /dev/null +++ b/e2e/claude-ai-activity-pagination.spec.ts @@ -0,0 +1,158 @@ +/** + * Round 9 — activity feed pagination. + * + * Before: when the backend had more events than fit on one page, the UI + * had no way to load them — the activity page just truncated at + * `limit=100`. Now the full page shows a "Load older events" button + * that uses cursor-based `before=` pagination. The compact preview + * still shows "View full activity log" (link to the dedicated page). + */ + +import { test, expect, type Page } from '@playwright/test' + +interface Event { + id: string + integration_id: string | null + event: string + skill_id: string | null + detail: Record + created_at: string +} + +function mkEvent(i: number, base = Date.now()): Event { + return { + id: `evt-${i}`, + integration_id: 'int-1', + event: 'skill_pushed', + skill_id: null, + detail: { result: { claude_ai_skill_id: `skill_pdf_${i}` } }, + created_at: new Date(base - i * 60_000).toISOString(), + } +} + +async function wireActivityFeed(page: Page, all: Event[]) { + await page.route('**/v1/integrations/claude-ai/activity**', async (route) => { + const url = new URL(route.request().url()) + const limit = Number(url.searchParams.get('limit') ?? '100') + const before = url.searchParams.get('before') + let rows = all + if (before) { + const cutoff = new Date(before).getTime() + rows = rows.filter((r) => new Date(r.created_at).getTime() < cutoff) + } + return route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify(rows.slice(0, limit)), + }) + }) +} + +test('"Load older events" appears only when a full page is returned, and pages in older events', async ({ + page, +}) => { + // Seed 30 events; full activity page uses pageSize=100, so the button + // should NOT show. Drop to a smaller dataset that still triggers full-page. + // Pin the dataset to 25 (page size used by ActivityFeed default) + + // extras, so the first fetch returns 25 and `hasMore=true`. + const all = Array.from({ length: 50 }, (_, i) => mkEvent(i)) + await wireActivityFeed(page, all) + // We need the feed in non-compact / full mode. Use ?pageSize via the + // dedicated activity page — its ActivityFeed call passes pageSize=100, + // so 50 events → no button. Bypass by going to settings page (compact) + // first to confirm the COMPACT branch renders the "View full" link. + await page.goto('/settings/integrations/claude-ai') + + // The settings page only renders ActivityFeed compact when an integration + // exists. Without any integration mocking the preview won't show. + // So instead test only the non-compact activity page with a page size + // greater than dataset size to confirm "hasMore=false" hides the button. + await page.goto('/settings/integrations/claude-ai/activity') + // 50 < pageSize (100). hasMore should be false → no "Load older" button. + await expect(page.getByRole('button', { name: /Load older/ })).not.toBeVisible({ + timeout: 5_000, + }) +}) + +test('compact preview links out to the full activity page when at the page limit', async ({ + page, +}) => { + const all = Array.from({ length: 25 }, (_, i) => mkEvent(i)) + await wireActivityFeed(page, all) + // Mock integrations so the settings page renders its compact preview. + await page.route('**/v1/integrations/claude-ai/integrations', (route) => + route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify([ + { + id: 'int-1', + browser_label: 'Chrome', + status: 'active', + scope: 'both', + claude_ai_org_id: null, + last_sync_at: null, + last_error: null, + conflict_policy: 'ask', + pending_op_count: 0, + failed_op_count: 0, + linked_skill_count: 0, + }, + ]), + }), + ) + await page.route('**/v1/integrations/claude-ai/conflicts', (route) => + route.fulfill({ status: 200, contentType: 'application/json', body: '[]' }), + ) + await page.route('**/v1/integrations/claude-ai/health', (route) => + route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify({ + integrations_active: 1, + integrations_with_errors: 0, + pending_ops_total: 0, + failed_ops_total: 0, + diverged_links_total: 0, + last_audit_at: null, + schema_version: '0020_claude_ai_polish', + }), + }), + ) + + await page.goto('/settings/integrations/claude-ai') + + // Compact preview is pageSize=10. With 25 events the API returns 10 → + // events.length === pageSize → "View full activity log" link. + await expect( + page.getByRole('link', { name: /View full activity log/ }), + ).toBeVisible() +}) + +test('"Load older events" paginates with before= cursor', async ({ page }) => { + // 250 events total; full activity page pageSize=100. + const all = Array.from({ length: 250 }, (_, i) => mkEvent(i)) + await wireActivityFeed(page, all) + await page.goto('/settings/integrations/claude-ai/activity') + + // First page should have 100 events; the 101st (older) should not appear. + await expect(page.getByText('skill_pdf_99', { exact: true })).toBeVisible() + await expect(page.getByText('skill_pdf_100', { exact: true })).not.toBeVisible() + + await page.getByRole('button', { name: /Load older events/ }).click() + + // Older page is now appended. The 100th-200th events become visible. + await expect(page.getByText('skill_pdf_100', { exact: true })).toBeVisible({ + timeout: 5_000, + }) + await expect(page.getByText('skill_pdf_199', { exact: true })).toBeVisible() + // 250th not yet — still one more page. + await expect(page.getByText('skill_pdf_249', { exact: true })).not.toBeVisible() + + // Third page completes the dataset; button disappears (less than pageSize). + await page.getByRole('button', { name: /Load older events/ }).click() + await expect(page.getByText('skill_pdf_249', { exact: true })).toBeVisible({ + timeout: 5_000, + }) + await expect(page.getByRole('button', { name: /Load older events/ })).not.toBeVisible() +}) diff --git a/e2e/claude-ai-analytics.spec.ts b/e2e/claude-ai-analytics.spec.ts new file mode 100644 index 00000000..7d1fa222 --- /dev/null +++ b/e2e/claude-ai-analytics.spec.ts @@ -0,0 +1,211 @@ +/** + * Iter 18 — analytics panel e2e. + * + * The analytics panel renders 7-day rollups: throughput numbers, a + * sparkline, top-synced skills, and per-browser breakdown. It only + * shows once at least one integration exists. Empty-state copy must + * be friendlier than walls of zeros for users who just paired their + * first browser. + */ + +import { test, expect, type Page } from '@playwright/test' + +interface Analytics { + skills_synced_24h: number + skills_synced_7d: number + failed_24h: number + failed_7d: number + sync_success_rate_7d: number + avg_attempts_per_sync_7d: number + top_skills_7d: { skill_id: string; skill_slug: string; skill_name: string; sync_count: number }[] + per_integration: { integration_id: string; integration_label: string | null; syncs_24h: number; failed_24h: number; last_sync_at: string | null }[] + sparkline_7d: { date: string; syncs: number; failed: number }[] +} + +async function wireBase(page: Page, integrations: any[] = [], analytics: Analytics | null = null) { + await page.route('**/v1/integrations/claude-ai/integrations', (route) => + route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify(integrations), + }), + ) + await page.route('**/v1/integrations/claude-ai/conflicts', (route) => + route.fulfill({ status: 200, contentType: 'application/json', body: '[]' }), + ) + await page.route('**/v1/integrations/claude-ai/health', (route) => + route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify({ + integrations_active: integrations.filter((i) => i.status === 'active').length, + integrations_with_errors: 0, + pending_ops_total: 0, + failed_ops_total: 0, + diverged_links_total: 0, + last_audit_at: null, + schema_version: '0020_claude_ai_polish', + }), + }), + ) + await page.route('**/v1/integrations/claude-ai/activity**', (route) => + route.fulfill({ status: 200, contentType: 'application/json', body: '[]' }), + ) + await page.route('**/v1/integrations/claude-ai/queue**', (route) => + route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify({ + items: [], + total: 0, + pending_count: 0, + in_progress_count: 0, + oldest_age_seconds: null, + }), + }), + ) + if (analytics) { + await page.route('**/v1/integrations/claude-ai/analytics', (route) => + route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify(analytics), + }), + ) + } +} + +function activeIntegration() { + return { + id: 'int-1', + browser_label: 'Chrome on MacBook Pro', + status: 'active', + scope: 'both', + claude_ai_org_id: null, + last_sync_at: new Date().toISOString(), + last_error: null, + conflict_policy: 'ask', + pending_op_count: 0, + failed_op_count: 0, + linked_skill_count: 0, + } +} + +function dailySparkline(values: number[]): { date: string; syncs: number; failed: number }[] { + const today = new Date() + return values.map((v, i) => { + const d = new Date(today) + d.setUTCDate(d.getUTCDate() - (6 - i)) + return { date: d.toISOString().slice(0, 10), syncs: v, failed: 0 } + }) +} + +test('analytics panel does NOT render without an integration', async ({ page }) => { + await wireBase(page, [], null) + await page.goto('/settings/integrations/claude-ai') + await expect(page.getByTestId('claude-ai-analytics-panel')).not.toBeVisible() +}) + +test('analytics panel shows friendly empty-state when no syncs yet', async ({ + page, +}) => { + await wireBase(page, [activeIntegration()], { + skills_synced_24h: 0, + skills_synced_7d: 0, + failed_24h: 0, + failed_7d: 0, + sync_success_rate_7d: 1.0, + avg_attempts_per_sync_7d: 0, + top_skills_7d: [], + per_integration: [], + sparkline_7d: dailySparkline([0, 0, 0, 0, 0, 0, 0]), + }) + await page.goto('/settings/integrations/claude-ai') + + const panel = page.getByTestId('claude-ai-analytics-panel') + await expect(panel).toBeVisible() + await expect(panel.getByText(/No syncs yet/i)).toBeVisible() + // Headline metrics are NOT rendered when noActivity → spares users + // a wall of zeros. + await expect(page.getByTestId('metric-24h')).not.toBeVisible() +}) + +test('analytics panel renders headline metrics + sparkline + top skills + per-browser table', async ({ + page, +}) => { + await wireBase(page, [activeIntegration()], { + skills_synced_24h: 142, + skills_synced_7d: 893, + failed_24h: 3, + failed_7d: 5, + sync_success_rate_7d: 0.994, + avg_attempts_per_sync_7d: 1.04, + top_skills_7d: [ + { skill_id: 'sk-a', skill_slug: 'pdf-extractor', skill_name: 'pdf-extractor', sync_count: 142 }, + { skill_id: 'sk-b', skill_slug: 'git-helper', skill_name: 'git-helper', sync_count: 98 }, + ], + per_integration: [ + { + integration_id: 'int-1', + integration_label: 'Chrome on MacBook Pro', + syncs_24h: 142, + failed_24h: 3, + last_sync_at: new Date(Date.now() - 60_000).toISOString(), + }, + ], + sparkline_7d: dailySparkline([10, 30, 50, 90, 200, 300, 213]), + }) + await page.goto('/settings/integrations/claude-ai') + + const panel = page.getByTestId('claude-ai-analytics-panel') + await expect(panel).toBeVisible() + + // Headline metrics with the right values. + await expect(panel.getByTestId('metric-24h')).toContainText('142') + await expect(panel.getByTestId('metric-7d')).toContainText('893') + await expect(panel.getByTestId('metric-success')).toContainText('99.4%') + await expect(panel.getByTestId('metric-avg-tries')).toContainText('1.04') + + // Failed counts surface alongside the headline numbers. + await expect(panel.getByTestId('metric-24h')).toContainText('3 failed') + + // Top synced skills present with links to the skill page. + const topList = panel.getByTestId('top-skills-list') + await expect(topList.getByRole('link', { name: 'pdf-extractor' })).toBeVisible() + await expect(topList.getByRole('link', { name: 'git-helper' })).toBeVisible() + expect( + await topList.getByRole('link', { name: 'pdf-extractor' }).getAttribute('href'), + ).toBe('/skills/pdf-extractor') + + // Per-integration table renders with the right counts. + const breakdown = panel.getByTestId('per-integration-breakdown') + await expect(breakdown).toContainText('Chrome on MacBook Pro') + await expect(breakdown).toContainText('142') + + // Sparkline is an SVG with the right aria-label shape. + const spark = panel.getByTestId('analytics-sparkline') + await expect(spark).toBeVisible() + const label = await spark.getAttribute('aria-label') + expect(label).toMatch(/7-day sync sparkline/i) +}) + +test('success rate below 95% styles in amber, above stays emerald', async ({ + page, +}) => { + await wireBase(page, [activeIntegration()], { + skills_synced_24h: 50, + skills_synced_7d: 80, + failed_24h: 5, + failed_7d: 12, + sync_success_rate_7d: 0.87, + avg_attempts_per_sync_7d: 1.2, + top_skills_7d: [], + per_integration: [], + sparkline_7d: dailySparkline([0, 0, 0, 0, 10, 30, 50]), + }) + await page.goto('/settings/integrations/claude-ai') + const success = page.getByTestId('metric-success') + await expect(success).toBeVisible() + const cls = await success.locator('div').first().getAttribute('class') + expect(cls ?? '').toContain('text-amber-600') +}) diff --git a/e2e/claude-ai-conflict-policy.spec.ts b/e2e/claude-ai-conflict-policy.spec.ts new file mode 100644 index 00000000..073b3e0c --- /dev/null +++ b/e2e/claude-ai-conflict-policy.spec.ts @@ -0,0 +1,318 @@ +/** + * Round 7 — conflict policy switcher (per-integration) and + * optimistic conflict resolve. Before this round, users with many + * conflicts had to manually resolve each one because no UI exposed + * `conflict_policy`. The switcher lets them pick "SkillNote wins" / + * "claude.ai wins" so the backend auto-resolves future conflicts. + */ + +import { test, expect, type Page } from '@playwright/test' + +interface MockIntegration { + id: string + browser_label: string + status: string + scope: 'personal' | 'organization' | 'both' + claude_ai_org_id: string | null + last_sync_at: string | null + last_error: string | null + conflict_policy: 'ask' | 'skillnote_wins' | 'claude_ai_wins' + pending_op_count: number + failed_op_count: number + linked_skill_count: number +} + +interface MockState { + integration: MockIntegration + patchCalls: Array<{ id: string; body: Record }> + conflicts: Array<{ + link_id: string + integration_id: string + integration_label: string | null + skillnote_skill_id: string | null + skillnote_skill_slug: string | null + skillnote_skill_name: string | null + claude_ai_skill_id: string + claude_ai_version: string | null + last_seen_at: string | null + }> + resolveCalls: Array<{ link_id: string; resolution: string }> +} + +function makeState(): MockState { + return { + integration: { + id: 'int-1', + browser_label: 'Chrome on Mac', + status: 'active', + scope: 'both', + claude_ai_org_id: null, + last_sync_at: null, + last_error: null, + conflict_policy: 'ask', + pending_op_count: 0, + failed_op_count: 0, + linked_skill_count: 0, + }, + patchCalls: [], + conflicts: [], + resolveCalls: [], + } +} + +async function wireMocks(page: Page, state: MockState) { + await page.route('**/v1/integrations/claude-ai/health', (route) => + route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify({ + integrations_active: 1, + integrations_with_errors: 0, + pending_ops_total: 0, + failed_ops_total: 0, + diverged_links_total: state.conflicts.length, + last_audit_at: null, + schema_version: '0020_claude_ai_polish', + }), + }), + ) + await page.route('**/v1/integrations/claude-ai/integrations', (route) => + route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify([state.integration]), + }), + ) + await page.route('**/v1/integrations/claude-ai/integrations/*', async (route) => { + const url = new URL(route.request().url()) + const id = url.pathname.split('/').pop()! + if (route.request().method() === 'PATCH') { + const body = JSON.parse(route.request().postData() ?? '{}') + state.patchCalls.push({ id, body }) + if (body.conflict_policy) state.integration.conflict_policy = body.conflict_policy + return route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify(state.integration), + }) + } + return route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify(state.integration), + }) + }) + await page.route('**/v1/integrations/claude-ai/conflicts', (route) => + route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify(state.conflicts), + }), + ) + await page.route( + '**/v1/integrations/claude-ai/conflicts/*/resolve', + async (route) => { + const url = new URL(route.request().url()) + const m = url.pathname.match(/conflicts\/([^/]+)\/resolve/) + const link_id = m?.[1] ?? '' + const body = JSON.parse(route.request().postData() ?? '{}') + state.resolveCalls.push({ link_id, resolution: body.resolution }) + state.conflicts = state.conflicts.filter((c) => c.link_id !== link_id) + return route.fulfill({ status: 204, body: '' }) + }, + ) + await page.route('**/v1/integrations/claude-ai/activity**', (route) => + route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify([]), + }), + ) +} + +test.describe('conflict policy switcher', () => { + test('renders all three options with the current value pressed', async ({ page }) => { + const state = makeState() + state.integration.conflict_policy = 'skillnote_wins' + await wireMocks(page, state) + await page.goto('/settings/integrations/claude-ai') + + const group = page.getByRole('radiogroup', { name: /Conflict resolution policy/i }) + await expect(group).toBeVisible() + await expect(group.getByRole('radio', { name: 'Ask me' })).toHaveAttribute( + 'aria-checked', + 'false', + ) + await expect(group.getByRole('radio', { name: 'SkillNote wins' })).toHaveAttribute( + 'aria-checked', + 'true', + ) + await expect(group.getByRole('radio', { name: /claude\.ai wins/ })).toHaveAttribute( + 'aria-checked', + 'false', + ) + }) + + test('clicking a different policy fires PATCH and updates aria-checked', async ({ page }) => { + const state = makeState() // starts as 'ask' + await wireMocks(page, state) + await page.goto('/settings/integrations/claude-ai') + + await page.getByRole('radio', { name: 'claude.ai wins' }).click() + + await expect.poll(() => state.patchCalls).toEqual([ + { id: 'int-1', body: { conflict_policy: 'claude_ai_wins' } }, + ]) + await expect( + page.getByRole('radio', { name: 'claude.ai wins' }), + ).toHaveAttribute('aria-checked', 'true') + await expect( + page.getByRole('radio', { name: 'Ask me' }), + ).toHaveAttribute('aria-checked', 'false') + }) + + test('clicking the already-active option does not fire PATCH', async ({ page }) => { + const state = makeState() + state.integration.conflict_policy = 'ask' + await wireMocks(page, state) + await page.goto('/settings/integrations/claude-ai') + + await page.getByRole('radio', { name: 'Ask me' }).click() + // Wait long enough that any patch would have landed. + await page.waitForTimeout(300) + expect(state.patchCalls).toEqual([]) + }) +}) + +test.describe('bulk resolve all', () => { + test('"Resolve all" menu only renders when 2+ conflicts exist', async ({ page }) => { + const state = makeState() + state.conflicts.push({ + link_id: 'l1', + integration_id: 'int-1', + integration_label: 'Chrome', + skillnote_skill_id: 'sk-1', + skillnote_skill_slug: 'one', + skillnote_skill_name: 'one', + claude_ai_skill_id: 'c-1', + claude_ai_version: null, + last_seen_at: new Date().toISOString(), + }) + await wireMocks(page, state) + await page.goto('/settings/integrations/claude-ai') + + // 1 conflict — bulk menu hidden. + await expect(page.getByText(/Conflicts \(1\)/)).toBeVisible() + await expect(page.getByRole('button', { name: /Resolve all/ })).not.toBeVisible() + + // Add a second conflict and re-load. + state.conflicts.push({ + link_id: 'l2', + integration_id: 'int-1', + integration_label: 'Chrome', + skillnote_skill_id: 'sk-2', + skillnote_skill_slug: 'two', + skillnote_skill_name: 'two', + claude_ai_skill_id: 'c-2', + claude_ai_version: null, + last_seen_at: new Date().toISOString(), + }) + await page.reload() + await expect(page.getByRole('button', { name: /Resolve all \(2\)/ })).toBeVisible() + }) + + test('clicking Keep SkillNote in the menu fires resolve for every conflict and clears the section', async ({ + page, + }) => { + const state = makeState() + state.conflicts = ['a', 'b', 'c'].map((slug) => ({ + link_id: `link-${slug}`, + integration_id: 'int-1', + integration_label: 'Chrome', + skillnote_skill_id: `sk-${slug}`, + skillnote_skill_slug: slug, + skillnote_skill_name: slug, + claude_ai_skill_id: `c-${slug}`, + claude_ai_version: null, + last_seen_at: new Date().toISOString(), + })) + await wireMocks(page, state) + await page.goto('/settings/integrations/claude-ai') + + await page.getByRole('button', { name: /Resolve all \(3\)/ }).click() + await page.getByRole('menuitem', { name: /Keep SkillNote.*for all/i }).click() + + await expect.poll(() => + state.resolveCalls.map((r) => r.link_id).sort(), + ).toEqual(['link-a', 'link-b', 'link-c']) + // Section disappears (since `conflicts` is mock-cleared by the route). + await expect(page.getByText(/^Conflicts \(/)).not.toBeVisible() + }) + + test('menu closes on Escape without firing any resolve', async ({ page }) => { + const state = makeState() + state.conflicts = ['a', 'b'].map((slug) => ({ + link_id: `link-${slug}`, + integration_id: 'int-1', + integration_label: 'Chrome', + skillnote_skill_id: `sk-${slug}`, + skillnote_skill_slug: slug, + skillnote_skill_name: slug, + claude_ai_skill_id: `c-${slug}`, + claude_ai_version: null, + last_seen_at: new Date().toISOString(), + })) + await wireMocks(page, state) + await page.goto('/settings/integrations/claude-ai') + + await page.getByRole('button', { name: /Resolve all \(2\)/ }).click() + await expect(page.getByRole('menu')).toBeVisible() + await page.keyboard.press('Escape') + await expect(page.getByRole('menu')).not.toBeVisible() + expect(state.resolveCalls).toEqual([]) + }) +}) + +test.describe('optimistic conflict resolve', () => { + test('Keep SkillNote removes the row immediately without waiting for poll', async ({ + page, + }) => { + const state = makeState() + state.conflicts.push({ + link_id: 'link-x', + integration_id: 'int-1', + integration_label: 'Chrome on Mac', + skillnote_skill_id: 'sk-1', + skillnote_skill_slug: 'pdf-extractor', + skillnote_skill_name: 'pdf-extractor', + claude_ai_skill_id: 'skill_ext_1', + claude_ai_version: 'v2', + last_seen_at: new Date().toISOString(), + }) + // Make resolve slow so we can verify optimism specifically. + await page.route( + '**/v1/integrations/claude-ai/conflicts/*/resolve', + async (route) => { + const url = new URL(route.request().url()) + const m = url.pathname.match(/conflicts\/([^/]+)\/resolve/) + const link_id = m?.[1] ?? '' + const body = JSON.parse(route.request().postData() ?? '{}') + state.resolveCalls.push({ link_id, resolution: body.resolution }) + state.conflicts = state.conflicts.filter((c) => c.link_id !== link_id) + // Pause 800ms before responding to simulate network latency. + await new Promise((r) => setTimeout(r, 800)) + return route.fulfill({ status: 204, body: '' }) + }, + ) + await wireMocks(page, state) // health/integrations/conflicts/activity routes + await page.goto('/settings/integrations/claude-ai') + + await expect(page.getByText('pdf-extractor')).toBeVisible() + await page.getByRole('button', { name: 'Keep SkillNote' }).click() + // The row should disappear within ~50ms of the click — way before the + // 800ms backend response. + await expect(page.getByText('pdf-extractor')).not.toBeVisible({ + timeout: 500, + }) + }) +}) diff --git a/e2e/claude-ai-cookie-expired.spec.ts b/e2e/claude-ai-cookie-expired.spec.ts new file mode 100644 index 00000000..c8af632e --- /dev/null +++ b/e2e/claude-ai-cookie-expired.spec.ts @@ -0,0 +1,114 @@ +/** + * Round 12 — cookie_expired surfacing. + * + * Before: when an extension's claude.ai cookies expired, the integration + * row showed "Status: cookie expired" with no next steps. Now there's a + * prominent "Sign in to claude.ai" CTA in amber, and the matching + * cookie_expired audit event is rendered with a Cookie icon in the + * activity feed. + */ + +import { test, expect, type Page } from '@playwright/test' + +async function baseMocks(page: Page, integrations: any[] = [], events: any[] = []) { + await page.route('**/v1/integrations/claude-ai/integrations', (route) => + route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify(integrations), + }), + ) + await page.route('**/v1/integrations/claude-ai/conflicts', (route) => + route.fulfill({ status: 200, contentType: 'application/json', body: '[]' }), + ) + await page.route('**/v1/integrations/claude-ai/health', (route) => + route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify({ + integrations_active: integrations.filter((i) => i.status === 'active').length, + integrations_with_errors: integrations.filter((i) => i.status === 'error').length, + pending_ops_total: 0, + failed_ops_total: 0, + diverged_links_total: 0, + last_audit_at: null, + schema_version: '0020_claude_ai_polish', + }), + }), + ) + await page.route('**/v1/integrations/claude-ai/activity**', (route) => + route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(events) }), + ) +} + +test('cookie_expired integration shows a Sign-in-to-claude.ai button', async ({ page }) => { + await baseMocks(page, [ + { + id: 'int-1', + browser_label: 'Chrome on Mac', + status: 'cookie_expired', + scope: 'both', + claude_ai_org_id: null, + last_sync_at: null, + last_error: 'claude.ai session expired', + conflict_policy: 'ask', + pending_op_count: 0, + failed_op_count: 0, + linked_skill_count: 4, + }, + ]) + await page.goto('/settings/integrations/claude-ai') + + const cta = page.getByRole('link', { name: /Sign in to claude\.ai/i }) + await expect(cta).toBeVisible() + // Opens in a new tab. + expect(await cta.getAttribute('target')).toBe('_blank') + expect(await cta.getAttribute('href')).toBe('https://claude.ai/login') +}) + +test('active integration does NOT show the re-sign-in CTA', async ({ page }) => { + await baseMocks(page, [ + { + id: 'int-2', + browser_label: 'Edge on Windows', + status: 'active', + scope: 'both', + claude_ai_org_id: 'org_1', + last_sync_at: new Date().toISOString(), + last_error: null, + conflict_policy: 'ask', + pending_op_count: 0, + failed_op_count: 0, + linked_skill_count: 12, + }, + ]) + await page.goto('/settings/integrations/claude-ai') + await expect( + page.getByRole('link', { name: /Sign in to claude\.ai/i }), + ).not.toBeVisible() +}) + +test('cookie_expired event renders in the activity feed with explanatory label', async ({ + page, +}) => { + const now = new Date().toISOString() + await baseMocks( + page, + [], + [ + { + id: 'evt-1', + integration_id: 'int-1', + event: 'cookie_expired', + skill_id: null, + detail: { op_kind: 'upload', error: 'claude.ai 401' }, + created_at: now, + }, + ], + ) + await page.goto('/settings/integrations/claude-ai/activity') + // Scope to the activity list — the same label also appears inside the + // event-filter