diff --git a/.gitignore b/.gitignore index 4b8b767c1f..572acd91cc 100644 --- a/.gitignore +++ b/.gitignore @@ -80,8 +80,13 @@ studio-demo/ apps/mesh/test-results/ apps/mesh/playwright-report/ -# Generated docs (root-level only, not apps/docs/) -/docs +# Generated docs (root-level only, not apps/docs/). Use /docs/* not /docs +# so we can un-ignore specific subdirs below — git won't traverse into an +# ignored directory. +/docs/* +# Hand-written specs/plans (superpowers brainstorming + writing-plans output) +# are first-class engineering artifacts and should be tracked. +!/docs/superpowers/ # Local dev data directory .deco diff --git a/apps/mesh/migrations/083-thread-run-locally.ts b/apps/mesh/migrations/083-thread-run-locally.ts new file mode 100644 index 0000000000..1a421645af --- /dev/null +++ b/apps/mesh/migrations/083-thread-run-locally.ts @@ -0,0 +1,28 @@ +/** + * Migration 078: Add run_locally column to threads + * + * Persists the "Run locally" choice per thread (Phase 10 of the remote + * harness dispatch project). When true, every message on this thread is + * dispatched to the thread owner's link daemon instead of the + * in-cluster sandbox. + * + * The column is set on first-message creation from the POST body and is + * not modified afterwards, so DBOS replays and subsequent messages all + * agree on where the run executes. Defaults to false so every existing + * thread continues to behave as a normal cluster-side thread. + */ + +import type { Kysely } from "kysely"; + +export async function up(db: Kysely): Promise { + await db.schema + .alterTable("threads") + .addColumn("run_locally", "boolean", (col) => + col.notNull().defaultTo(false), + ) + .execute(); +} + +export async function down(db: Kysely): Promise { + await db.schema.alterTable("threads").dropColumn("run_locally").execute(); +} diff --git a/apps/mesh/migrations/084-drop-host-sandbox-rows.ts b/apps/mesh/migrations/084-drop-host-sandbox-rows.ts new file mode 100644 index 0000000000..46fa47d551 --- /dev/null +++ b/apps/mesh/migrations/084-drop-host-sandbox-rows.ts @@ -0,0 +1,29 @@ +/** + * Migration 079: Drop sandbox_runner_state rows for the retired `host` + * runner. + * + * The `host` SandboxProvider was the local-dev shortcut that spawned the + * sandbox daemon as a child of the mesh process. It has been retired in + * favor of the laptop-side `deco link` daemon (auto-spawned by + * `bun run dev --local-sandbox-provider`), which exercises the same + * remote-cli + remote-user code paths production uses. + * + * Any `runner_kind = 'host'` rows left in dev databases are orphaned + * pointers to daemon PIDs/ports that no longer exist; the new code path + * never reads them. Delete them so the table doesn't accumulate dead + * state. Sandbox state is ephemeral by design — the runner rehydrates + * from a healthy daemon, or provisions a new one on next ensure(). + */ + +import { sql, type Kysely } from "kysely"; + +export async function up(db: Kysely): Promise { + await sql`delete from sandbox_runner_state where runner_kind = 'host'`.execute( + db, + ); +} + +export async function down(_db: Kysely): Promise { + // Irreversible — the host runner is gone, restoring deleted rows + // would point at processes that no longer exist. +} diff --git a/apps/mesh/migrations/085-rename-runner-kind.ts b/apps/mesh/migrations/085-rename-runner-kind.ts new file mode 100644 index 0000000000..3d5af1456a --- /dev/null +++ b/apps/mesh/migrations/085-rename-runner-kind.ts @@ -0,0 +1,121 @@ +/** + * Migration 080: Rename sandbox_runner_state.runner_kind → + * sandbox_provider_kind, rewrite vmMap JSON keys in virtualmcps, and drop + * threads.run_locally. + * + * (1) ALTER TABLE sandbox_runner_state RENAME COLUMN runner_kind TO + * sandbox_provider_kind — aligns the column name with the + * SandboxProviderKind type rename done in Task 1.1. + * + * (2) Walk every virtualmcps.metadata.vmMap[user][branch] entry and rename + * the JSON key `runnerKind` → `sandboxProviderKind`. The UPDATE is + * idempotent: it only rewrites entries where `runnerKind` exists and + * `sandboxProviderKind` does not, so re-running is safe. + * + * (3) DROP COLUMN threads.run_locally — the VM now owns runner-choice; the + * dispatch layer reads from VmMapEntry.sandboxProviderKind instead. + */ + +import { sql, type Kysely } from "kysely"; + +export async function up(db: Kysely): Promise { + await sql` + ALTER TABLE sandbox_runner_state + RENAME COLUMN runner_kind TO sandbox_provider_kind + `.execute(db); + + // Rewrite vmMap JSON keys. Wrapped in a DO block so it is a no-op in test + // environments where the virtualmcps table was never created (PGlite + // migration tests run in isolation and may not have the full schema). + await sql` + DO $$ BEGIN + UPDATE virtualmcps v + SET metadata = jsonb_set( + v.metadata, + '{vmMap}', + COALESCE( + ( + SELECT jsonb_object_agg( + user_key, + COALESCE( + ( + SELECT jsonb_object_agg( + branch_key, + CASE + WHEN entry ? 'runnerKind' AND NOT entry ? 'sandboxProviderKind' + THEN jsonb_set(entry, '{sandboxProviderKind}', entry->'runnerKind') - 'runnerKind' + ELSE entry + END + ) + FROM jsonb_each(user_map) AS branches(branch_key, entry) + ), + '{}'::jsonb + ) + ) + FROM jsonb_each(v.metadata->'vmMap') AS users(user_key, user_map) + ), + '{}'::jsonb + ) + ) + WHERE v.metadata ? 'vmMap' + AND jsonb_typeof(v.metadata->'vmMap') = 'object'; + EXCEPTION WHEN undefined_table THEN + NULL; + END; $$ + `.execute(db); + + await db.schema.alterTable("threads").dropColumn("run_locally").execute(); +} + +export async function down(db: Kysely): Promise { + // Re-add threads.run_locally before the other reversals so that if + // anything downstream reads the column it still exists. + await db.schema + .alterTable("threads") + .addColumn("run_locally", "boolean", (col) => + col.notNull().defaultTo(false), + ) + .execute(); + + await sql` + ALTER TABLE sandbox_runner_state + RENAME COLUMN sandbox_provider_kind TO runner_kind + `.execute(db); + + await sql` + DO $$ BEGIN + UPDATE virtualmcps v + SET metadata = jsonb_set( + v.metadata, + '{vmMap}', + COALESCE( + ( + SELECT jsonb_object_agg( + user_key, + COALESCE( + ( + SELECT jsonb_object_agg( + branch_key, + CASE + WHEN entry ? 'sandboxProviderKind' AND NOT entry ? 'runnerKind' + THEN jsonb_set(entry, '{runnerKind}', entry->'sandboxProviderKind') - 'sandboxProviderKind' + ELSE entry + END + ) + FROM jsonb_each(user_map) AS branches(branch_key, entry) + ), + '{}'::jsonb + ) + ) + FROM jsonb_each(v.metadata->'vmMap') AS users(user_key, user_map) + ), + '{}'::jsonb + ) + ) + WHERE v.metadata ? 'vmMap' + AND jsonb_typeof(v.metadata->'vmMap') = 'object'; + EXCEPTION WHEN undefined_table THEN + NULL; + END; $$ + `.execute(db); +} diff --git a/apps/mesh/migrations/086-thread-pins-and-vm-map-rekey.ts b/apps/mesh/migrations/086-thread-pins-and-vm-map-rekey.ts new file mode 100644 index 0000000000..6a86d4d40e --- /dev/null +++ b/apps/mesh/migrations/086-thread-pins-and-vm-map-rekey.ts @@ -0,0 +1,121 @@ +/** + * Migration 081: per-thread runner + harness pinning, and vmMap re-key. + * + * Adds: + * - `threads.sandbox_provider_kind` (nullable, backfilled on first message) + * - `threads.harness_id` (nullable, backfilled on first message) + * + * Re-keys `virtualmcps.metadata.vmMap` from 2-level to 3-level: + * vmMap[user][branch] = entry + * → + * vmMap[user][branch][entry.sandboxProviderKind ?? 'docker'] = entry + * Idempotent: only re-keys entries that still have `vmId` at the second level + * (which marks them as the 2-level legacy shape). The sandboxProviderKind + * field stays on the entry for one release as a tolerant-reader fallback. + */ + +import { sql, type Kysely } from "kysely"; + +export async function up(db: Kysely): Promise { + // 1. Thread columns (nullable; populated on first message by the + // POST /messages handler). + await db.schema + .alterTable("threads") + .addColumn("sandbox_provider_kind", "text") + .execute(); + await db.schema + .alterTable("threads") + .addColumn("harness_id", "text") + .execute(); + + // 2. vmMap re-key. Wrapped in a DO block so it no-ops if `virtualmcps` + // doesn't exist (PGlite test bootstrap order). + await sql` + DO $$ BEGIN + UPDATE virtualmcps v + SET metadata = jsonb_set( + v.metadata, + '{vmMap}', + ( + SELECT jsonb_object_agg( + user_key, + ( + SELECT jsonb_object_agg( + branch_key, + CASE + -- Already 3-level (entry is itself a map without vmId): pass through. + WHEN NOT (entry ? 'vmId') THEN entry + -- Legacy 2-level: wrap under sandboxProviderKind. + ELSE jsonb_build_object( + COALESCE(entry->>'sandboxProviderKind', 'docker'), + entry + ) + END + ) + FROM jsonb_each(user_map) AS branches(branch_key, entry) + ) + ) + FROM jsonb_each(v.metadata->'vmMap') AS users(user_key, user_map) + ) + ) + WHERE v.metadata ? 'vmMap' + AND EXISTS ( + SELECT 1 + FROM jsonb_each(v.metadata->'vmMap') AS users(user_key, user_map) + JOIN jsonb_each(user_map) AS branches(branch_key, entry) ON true + WHERE entry ? 'vmId' + ); + EXCEPTION WHEN undefined_table THEN + -- PGlite migration order: virtualmcps not yet created. Safe no-op. + NULL; + END; $$ + `.execute(db); +} + +export async function down(db: Kysely): Promise { + // Drop the columns. + await db.schema.alterTable("threads").dropColumn("harness_id").execute(); + await db.schema + .alterTable("threads") + .dropColumn("sandbox_provider_kind") + .execute(); + + // Reverse the vmMap re-key: collapse each (user, branch, kind) back to + // (user, branch). When multiple kinds exist for the same branch, the first + // one (in JSON iteration order) wins; the others are dropped. Acceptable + // because down() is a manual recovery path, not a production rollback. + await sql` + DO $$ BEGIN + UPDATE virtualmcps v + SET metadata = jsonb_set( + v.metadata, + '{vmMap}', + ( + SELECT jsonb_object_agg( + user_key, + ( + SELECT jsonb_object_agg( + branch_key, + CASE + -- Already 2-level (entry has vmId): pass through. + WHEN entry ? 'vmId' THEN entry + -- 3-level: pick the first kind's entry. + ELSE ( + SELECT inner_entry + FROM jsonb_each(entry) AS kinds(kind_key, inner_entry) + LIMIT 1 + ) + END + ) + FROM jsonb_each(user_map) AS branches(branch_key, entry) + ) + ) + FROM jsonb_each(v.metadata->'vmMap') AS users(user_key, user_map) + ) + ) + WHERE v.metadata ? 'vmMap'; + EXCEPTION WHEN undefined_table THEN + NULL; + END; $$ + `.execute(db); +} diff --git a/apps/mesh/migrations/087-fix-vm-map-rekey.test.ts b/apps/mesh/migrations/087-fix-vm-map-rekey.test.ts new file mode 100644 index 0000000000..d6888c8340 --- /dev/null +++ b/apps/mesh/migrations/087-fix-vm-map-rekey.test.ts @@ -0,0 +1,224 @@ +/** + * Integration test for migration 082. + * + * Migrations 080 and 081 silently no-op'd against the wrong table name. This + * test pins down that 082 actually rewrites the data on a `connections` row + * (where virtual MCPs live) and that re-running is a no-op — so a fresh + * install never regresses to the v1 shape, and a partial-prior-run state + * converges on a single re-run. + */ + +import { beforeEach, afterEach, describe, expect, it } from "bun:test"; +import { sql } from "kysely"; +import { + closeTestDatabase, + createTestDatabase, + type TestDatabase, +} from "../src/database/test-db"; +import { + createTestSchema, + seedCommonTestFixtures, +} from "../src/storage/test-helpers"; +import { up as up087 } from "./087-fix-vm-map-rekey"; + +const USER = "user_test"; +const ORG = "org_test"; + +interface ConnectionRow { + metadata: string | null; +} + +async function getMetadata( + database: TestDatabase, + id: string, +): Promise> { + const row = (await sql` + SELECT metadata FROM connections WHERE id = ${id} + `.execute(database.db)) as unknown as { rows: ConnectionRow[] }; + const raw = row.rows[0]?.metadata; + if (!raw) throw new Error(`connection ${id} not found`); + return JSON.parse(raw) as Record; +} + +async function insertVirtualConnection( + database: TestDatabase, + id: string, + metadata: Record, +): Promise { + const now = new Date().toISOString(); + // `connection_url` is NOT NULL even for VIRTUAL connections; tests use + // an inert placeholder URL since the migration only touches `metadata`. + await sql` + INSERT INTO connections ( + id, organization_id, created_by, title, connection_type, + connection_url, metadata, status, created_at, updated_at + ) VALUES ( + ${id}, ${ORG}, ${USER}, 'test-vm', 'VIRTUAL', + 'virtual://test', ${JSON.stringify(metadata)}, + 'active', ${now}, ${now} + ) + `.execute(database.db); +} + +describe("migration 082 — fix vmMap rekey", () => { + let database: TestDatabase; + + beforeEach(async () => { + database = await createTestDatabase(); + await createTestSchema(database.db); + await seedCommonTestFixtures(database.db); + }); + + afterEach(async () => { + await closeTestDatabase(database); + }); + + it("wraps a v1 bare entry under its sandboxProviderKind", async () => { + await insertVirtualConnection(database, "vir_v1_kind", { + vmMap: { + [USER]: { + "deco/branch-a": { + vmId: "vm-a", + previewUrl: "http://x/preview", + sandboxProviderKind: "remote-user", + createdAt: 1779000000000, + }, + }, + }, + }); + + // biome-ignore lint/suspicious/noExplicitAny: migration accepts the test Kysely instance + await up087(database.db as any); + + const meta = await getMetadata(database, "vir_v1_kind"); + expect(meta).toEqual({ + vmMap: { + [USER]: { + "deco/branch-a": { + "remote-user": { + vmId: "vm-a", + previewUrl: "http://x/preview", + sandboxProviderKind: "remote-user", + createdAt: 1779000000000, + }, + }, + }, + }, + }); + }); + + it("falls back to runnerKind, then docker, when sandboxProviderKind is absent", async () => { + await insertVirtualConnection(database, "vir_v1_runner", { + vmMap: { + [USER]: { + "deco/branch-runner": { + vmId: "vm-r", + previewUrl: null, + runnerKind: "agent-sandbox", + createdAt: 1779000000001, + }, + "deco/branch-default": { + vmId: "vm-d", + previewUrl: null, + }, + }, + }, + }); + + // biome-ignore lint/suspicious/noExplicitAny: migration accepts the test Kysely instance + await up087(database.db as any); + + const meta = (await getMetadata(database, "vir_v1_runner")) as { + vmMap: Record>>; + }; + const userBranches = meta.vmMap[USER]!; + // The runnerKind branch was wrapped under "agent-sandbox" AND its inner + // entry's `runnerKind` key was renamed to `sandboxProviderKind`. + const runnerBranch = userBranches["deco/branch-runner"]!; + expect(Object.keys(runnerBranch)).toEqual(["agent-sandbox"]); + expect(runnerBranch["agent-sandbox"]).toEqual({ + vmId: "vm-r", + previewUrl: null, + sandboxProviderKind: "agent-sandbox", + createdAt: 1779000000001, + }); + // The bare-default branch (no kind hint) was wrapped under "docker". + expect(Object.keys(userBranches["deco/branch-default"]!)).toEqual([ + "docker", + ]); + }); + + it("renames runnerKind → sandboxProviderKind on already-v2 inner entries", async () => { + await insertVirtualConnection(database, "vir_v2_runner", { + vmMap: { + [USER]: { + "deco/branch-c": { + "agent-sandbox": { + vmId: "vm-c", + previewUrl: null, + runnerKind: "agent-sandbox", + createdAt: 1779000000002, + }, + }, + }, + }, + }); + + // biome-ignore lint/suspicious/noExplicitAny: migration accepts the test Kysely instance + await up087(database.db as any); + + const meta = await getMetadata(database, "vir_v2_runner"); + expect(meta).toEqual({ + vmMap: { + [USER]: { + "deco/branch-c": { + "agent-sandbox": { + vmId: "vm-c", + previewUrl: null, + sandboxProviderKind: "agent-sandbox", + createdAt: 1779000000002, + }, + }, + }, + }, + }); + }); + + it("is idempotent — re-running on a clean row makes no change", async () => { + await insertVirtualConnection(database, "vir_idem", { + vmMap: { + [USER]: { + "deco/branch-i": { + "remote-user": { + vmId: "vm-i", + previewUrl: null, + sandboxProviderKind: "remote-user", + createdAt: 1779000000003, + }, + }, + }, + }, + }); + + // biome-ignore lint/suspicious/noExplicitAny: migration accepts the test Kysely instance + await up087(database.db as any); + const after1 = await getMetadata(database, "vir_idem"); + // biome-ignore lint/suspicious/noExplicitAny: migration accepts the test Kysely instance + await up087(database.db as any); + const after2 = await getMetadata(database, "vir_idem"); + + expect(after2).toEqual(after1); + }); + + it("leaves rows without a vmMap untouched", async () => { + await insertVirtualConnection(database, "vir_no_map", { + instructions: "hello", + }); + + // biome-ignore lint/suspicious/noExplicitAny: migration accepts the test Kysely instance + await up087(database.db as any); + + const meta = await getMetadata(database, "vir_no_map"); + expect(meta).toEqual({ instructions: "hello" }); + }); +}); diff --git a/apps/mesh/migrations/087-fix-vm-map-rekey.ts b/apps/mesh/migrations/087-fix-vm-map-rekey.ts new file mode 100644 index 0000000000..de7863ec9c --- /dev/null +++ b/apps/mesh/migrations/087-fix-vm-map-rekey.ts @@ -0,0 +1,150 @@ +/** + * Migration 082: Re-do the vmMap rewrites that migrations 080 and 081 + * silently skipped. + * + * Both prior migrations targeted a `virtualmcps` table that was dropped in + * migration 024 — virtual MCPs live in `connections` (with + * `connection_type = 'VIRTUAL'`) since then. The misnamed UPDATEs raised + * `undefined_table`, which both migrations caught with `EXCEPTION WHEN + * undefined_table THEN NULL`, so they no-op'd and were recorded as applied + * without rewriting any data. `connections.metadata` is also `text`, not + * `jsonb`, so the cast must be explicit. + * + * This migration performs the two rewrites that should have happened: + * + * (1) v1 → v2 rekey: legacy entries stored as + * vmMap[user][branch] = { vmId, previewUrl, ... } + * get wrapped under their sandboxProviderKind: + * vmMap[user][branch][kind] = { vmId, previewUrl, ... } + * Falls back to `runnerKind` (the pre-080 name), then `"docker"`. + * + * (2) Field rename: any inner entry that still has `runnerKind` and not + * `sandboxProviderKind` is rewritten to use the new key. + * + * Both passes are idempotent — re-running on an already-clean row is a + * no-op. The mesh-sdk `parseBranchMap` / `parseVmMapEntry` tolerant + * readers, plus the matching Zod preprocess adapters in `virtual-mcp.ts`, + * continue to accept either shape on read until this migration has run + * everywhere. + */ + +import { sql, type Kysely } from "kysely"; + +export async function up(db: Kysely): Promise { + // Pass 1: v1 (2-level, bare VmMapEntry at branch) → v2 (3-level, keyed + // by sandboxProviderKind). Only rewrites rows that still contain at least + // one branch whose value carries `vmId` directly (= legacy shape marker). + await sql` + UPDATE connections c + SET metadata = ( + jsonb_set( + c.metadata::jsonb, + '{vmMap}', + ( + SELECT jsonb_object_agg( + user_key, + COALESCE( + ( + SELECT jsonb_object_agg( + branch_key, + CASE + WHEN entry ? 'vmId' THEN + jsonb_build_object( + COALESCE( + entry->>'sandboxProviderKind', + entry->>'runnerKind', + 'docker' + ), + entry + ) + ELSE entry + END + ) + FROM jsonb_each(user_map) AS branches(branch_key, entry) + ), + '{}'::jsonb + ) + ) + FROM jsonb_each(c.metadata::jsonb -> 'vmMap') AS users(user_key, user_map) + ) + ) + )::text + WHERE c.connection_type = 'VIRTUAL' + AND c.metadata IS NOT NULL + AND c.metadata::jsonb ? 'vmMap' + AND EXISTS ( + SELECT 1 + FROM jsonb_each(c.metadata::jsonb -> 'vmMap') AS users(user_key, user_map) + JOIN jsonb_each(user_map) AS branches(branch_key, entry) ON true + WHERE entry ? 'vmId' + ); + `.execute(db); + + // Pass 2: rename `runnerKind` → `sandboxProviderKind` on every inner + // entry of the (now v2) 3-level structure. Only rewrites rows that + // still have at least one such inner entry. + await sql` + UPDATE connections c + SET metadata = ( + jsonb_set( + c.metadata::jsonb, + '{vmMap}', + ( + SELECT jsonb_object_agg( + user_key, + COALESCE( + ( + SELECT jsonb_object_agg( + branch_key, + COALESCE( + ( + SELECT jsonb_object_agg( + kind_key, + CASE + WHEN inner_entry ? 'runnerKind' + AND NOT inner_entry ? 'sandboxProviderKind' + THEN jsonb_set( + inner_entry, + '{sandboxProviderKind}', + inner_entry->'runnerKind' + ) - 'runnerKind' + WHEN inner_entry ? 'runnerKind' + THEN inner_entry - 'runnerKind' + ELSE inner_entry + END + ) + FROM jsonb_each(branch_entry) AS kinds(kind_key, inner_entry) + ), + '{}'::jsonb + ) + ) + FROM jsonb_each(user_map) AS branches(branch_key, branch_entry) + ), + '{}'::jsonb + ) + ) + FROM jsonb_each(c.metadata::jsonb -> 'vmMap') AS users(user_key, user_map) + ) + ) + )::text + WHERE c.connection_type = 'VIRTUAL' + AND c.metadata IS NOT NULL + AND c.metadata::jsonb ? 'vmMap' + AND EXISTS ( + SELECT 1 + FROM jsonb_each(c.metadata::jsonb -> 'vmMap') AS users(user_key, user_map) + JOIN jsonb_each(user_map) AS branches(branch_key, branch_entry) ON true + JOIN jsonb_each(branch_entry) AS kinds(kind_key, inner_entry) ON true + WHERE inner_entry ? 'runnerKind' + ); + `.execute(db); +} + +export async function down(_db: Kysely): Promise { + // No-op. Reversing this would mean reintroducing two distinct legacy + // shapes (v1 layout AND `runnerKind` field) that the rest of the + // codebase has already moved past. Restoration is not useful — readers + // still tolerate both shapes via mesh-sdk's preprocess adapters and the + // `parseBranchMap` / `parseVmMapEntry` helpers, so a rollback is never + // needed to recover behavior. +} diff --git a/apps/mesh/migrations/088-purge-cli-activate-keys.test.ts b/apps/mesh/migrations/088-purge-cli-activate-keys.test.ts new file mode 100644 index 0000000000..c445b0a4fb --- /dev/null +++ b/apps/mesh/migrations/088-purge-cli-activate-keys.test.ts @@ -0,0 +1,89 @@ +/** + * Integration test for migration 083. + * + * Verifies that `up` deletes all rows where `provider_id` is `claude-code` + * or `codex` (the sentinel rows created by the deprecated + * `AI_PROVIDER_CLI_ACTIVATE` tool) and leaves all other rows untouched. + */ + +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { sql } from "kysely"; +import { + closeTestDatabase, + createTestDatabase, + type TestDatabase, +} from "../src/database/test-db"; +import { + createTestSchema, + seedCommonTestFixtures, +} from "../src/storage/test-helpers"; +import { up as up088 } from "./088-purge-cli-activate-keys"; + +const ORG = "org_test"; +const USER = "user_test"; + +async function insertProviderKey( + database: TestDatabase, + id: string, + providerId: string, +): Promise { + const now = new Date().toISOString(); + await sql` + INSERT INTO ai_provider_keys ( + id, organization_id, created_by, provider_id, label, encrypted_api_key, created_at + ) VALUES ( + ${id}, ${ORG}, ${USER}, ${providerId}, 'cli-local', 'cli-local', ${now} + ) + `.execute(database.db); +} + +async function countByProvider( + database: TestDatabase, + providerId: string, +): Promise { + const result = await sql<{ n: number }>` + SELECT count(*)::int AS n FROM ai_provider_keys WHERE provider_id = ${providerId} + `.execute(database.db); + return result.rows[0]?.n ?? 0; +} + +describe("migration 083 — purge cli-activate sentinel keys", () => { + let database: TestDatabase; + + beforeEach(async () => { + database = await createTestDatabase(); + await createTestSchema(database.db); + await seedCommonTestFixtures(database.db); + }); + + afterEach(async () => { + await closeTestDatabase(database); + }); + + it("removes claude-code and codex rows", async () => { + await insertProviderKey(database, "apk_claude", "claude-code"); + await insertProviderKey(database, "apk_codex", "codex"); + + // biome-ignore lint/suspicious/noExplicitAny: migration accepts the test Kysely instance + await up088(database.db as any); + + expect(await countByProvider(database, "claude-code")).toBe(0); + expect(await countByProvider(database, "codex")).toBe(0); + }); + + it("leaves rows with other provider_ids untouched", async () => { + await insertProviderKey(database, "apk_openai", "openai"); + await insertProviderKey(database, "apk_claude", "claude-code"); + + // biome-ignore lint/suspicious/noExplicitAny: migration accepts the test Kysely instance + await up088(database.db as any); + + expect(await countByProvider(database, "openai")).toBe(1); + expect(await countByProvider(database, "claude-code")).toBe(0); + }); + + it("is a no-op when no matching rows exist", async () => { + // biome-ignore lint/suspicious/noExplicitAny: migration accepts the test Kysely instance + await expect(up088(database.db as any)).resolves.toBeUndefined(); + }); +}); diff --git a/apps/mesh/migrations/088-purge-cli-activate-keys.ts b/apps/mesh/migrations/088-purge-cli-activate-keys.ts new file mode 100644 index 0000000000..a05c6842e8 --- /dev/null +++ b/apps/mesh/migrations/088-purge-cli-activate-keys.ts @@ -0,0 +1,26 @@ +import { sql, type Kysely } from "kysely"; + +/** + * Removes the sentinel rows created by the deprecated + * `AI_PROVIDER_CLI_ACTIVATE` tool. These rows always had + * `api_key = 'cli-local'` and never represented a real credential — + * they were a marker the dialog used to render a "Claude CLI" / + * "Codex CLI" entry in the connected-providers list. + * + * Capability discovery now lives on the laptop link, so the rows are + * vestigial. No downstream code reads them after the cli-activate path + * is deleted. + */ +export async function up(db: Kysely): Promise { + await sql` + DELETE FROM ai_provider_keys + WHERE provider_id IN ('claude-code', 'codex') + `.execute(db); +} + +export async function down(_db: Kysely): Promise { + // No rollback — the rows were sentinels and the upstream code that + // created them no longer exists. If a rollback is ever needed, the + // user can re-run AI Providers onboarding manually (against a build + // that still has the cli-activate path). +} diff --git a/apps/mesh/migrations/index.ts b/apps/mesh/migrations/index.ts index 593cbe39f9..9207bf856d 100644 --- a/apps/mesh/migrations/index.ts +++ b/apps/mesh/migrations/index.ts @@ -81,6 +81,12 @@ import * as migration079striplegacyfreestylevmmapentries from "./079-strip-legac import * as migration080asyncresearchjobs from "./080-async-research-jobs.ts"; import * as migration081asyncresearchjobsresultcontent from "./081-async-research-jobs-result-content.ts"; import * as migration082secrets from "./082-secrets.ts"; +import * as migration083threadrunlocally from "./083-thread-run-locally.ts"; +import * as migration084drophostsandboxrows from "./084-drop-host-sandbox-rows.ts"; +import * as migration085renamerunnerkindd from "./085-rename-runner-kind.ts"; +import * as migration086threadpinsandvmmaprekey from "./086-thread-pins-and-vm-map-rekey.ts"; +import * as migration087fixvmmaprekey from "./087-fix-vm-map-rekey.ts"; +import * as migration088purgecliactivatekeys from "./088-purge-cli-activate-keys.ts"; /** * Core migrations for the Mesh application. @@ -179,6 +185,12 @@ const migrations: Record = { "081-async-research-jobs-result-content": migration081asyncresearchjobsresultcontent, "082-secrets": migration082secrets, + "083-thread-run-locally": migration083threadrunlocally, + "084-drop-host-sandbox-rows": migration084drophostsandboxrows, + "085-rename-runner-kind": migration085renamerunnerkindd, + "086-thread-pins-and-vm-map-rekey": migration086threadpinsandvmmaprekey, + "087-fix-vm-map-rekey": migration087fixvmmaprekey, + "088-purge-cli-activate-keys": migration088purgecliactivatekeys, }; export default migrations; diff --git a/apps/mesh/package.json b/apps/mesh/package.json index 2946cc146e..c13339f132 100644 --- a/apps/mesh/package.json +++ b/apps/mesh/package.json @@ -33,9 +33,11 @@ "test:e2e": "playwright test", "test:e2e:ui": "playwright test --ui", "better-auth:migrate": "bunx --bun @better-auth/cli migrate -y --config src/auth/index.ts", + "smoke:link": "bun run scripts/smoke-link.ts", "prepublishOnly": "bun run build:client && bun run build:server" }, "optionalDependencies": { + "@anthropic-ai/claude-agent-sdk": "^0.2.141", "@duckdb/node-api": "^1.5.0-r.1" }, "dependencies": { @@ -73,7 +75,6 @@ "devDependencies": { "@ai-sdk/provider": "^3.0.10", "@ai-sdk/react": "^3.0.184", - "@anthropic-ai/claude-agent-sdk": "^0.2.141", "@better-auth/sso": "1.4.1", "@daveyplate/better-auth-ui": "^3.2.7", "@deco/ui": "workspace:*", @@ -85,6 +86,7 @@ "@decocms/vite-plugin": "workspace:*", "@electric-sql/pglite": "^0.3.15", "@floating-ui/react": "^0.27.16", + "@happy-dom/global-registrator": "^20.9.0", "@hookform/resolvers": "^5.2.2", "@jitl/quickjs-wasmfile-release-sync": "0.31.0", "@modelcontextprotocol/sdk": "1.29.0", @@ -116,6 +118,8 @@ "@tailwindcss/vite": "^4.1.17", "@tanstack/react-query": "5.100.10", "@tanstack/react-router": "1.169.2", + "@testing-library/jest-dom": "^6.9.1", + "@testing-library/react": "^16.3.2", "@tiptap/core": "3.20.2", "@tiptap/extension-mention": "3.20.2", "@tiptap/extension-placeholder": "3.20.2", @@ -138,6 +142,7 @@ "croner": "^9.1.0", "date-fns": "^4.1.0", "degit": "^2.8.4", + "happy-dom": "^20.9.0", "hono": "^4.10.7", "input-otp": "^1.4.2", "jose": "^6.0.11", diff --git a/apps/mesh/scripts/smoke-link.ts b/apps/mesh/scripts/smoke-link.ts new file mode 100644 index 0000000000..f49f02c3ca --- /dev/null +++ b/apps/mesh/scripts/smoke-link.ts @@ -0,0 +1,51 @@ +/** + * smoke-link — verify that a local link daemon is registered and online. + * + * Hits `/api/links/me` on the local cluster with a bearer session and + * fails fast if the status isn't "online". Useful as a quick check + * before running an integration test that depends on a live link. + * + * Run with: `bun run smoke:link` from `apps/mesh/`. + * + * Required env: + * MESH_TEST_SESSION Bearer token for an authenticated session + * + * Optional env: + * MESH_BASE_URL Cluster base URL (default http://localhost:3000) + */ + +async function main(): Promise { + const baseUrl = process.env.MESH_BASE_URL ?? "http://localhost:3000"; + const token = process.env.MESH_TEST_SESSION ?? ""; + if (!token) { + console.error( + "smoke: MESH_TEST_SESSION is not set — pass a bearer token for an authenticated session.", + ); + process.exit(2); + } + const res = await fetch(`${baseUrl}/api/links/me`, { + headers: { authorization: `Bearer ${token}` }, + }); + if (!res.ok) { + console.error( + `smoke: /api/links/me returned ${res.status} — start the link with \`bun run dev --local-sandbox-provider\` or \`deco link\``, + ); + process.exit(1); + } + const body = (await res.json()) as { + status?: string; + capabilities?: string[]; + }; + if (body.status !== "online") { + console.error( + "smoke: link is not online; start it with `bun run dev --local-sandbox-provider` or `deco link`", + ); + process.exit(1); + } + console.log("smoke: link online — capabilities", body.capabilities); +} + +main().catch((err) => { + console.error("smoke: unexpected error", err); + process.exit(1); +}); diff --git a/apps/mesh/src/ai-providers/adapters/claude-code-models.ts b/apps/mesh/src/ai-providers/adapters/claude-code-models.ts new file mode 100644 index 0000000000..2d3d319dbe --- /dev/null +++ b/apps/mesh/src/ai-providers/adapters/claude-code-models.ts @@ -0,0 +1,40 @@ +import type { ModelInfo } from "../types"; + +/** + * Browser-safe model list for the Claude Code laptop harness. Lives apart + * from `claude-code.ts` because that file re-exports `createClaudeCodeModel` + * from `../../harnesses/claude-code`, which transitively pulls + * `ai-sdk-provider-claude-code` (and Node's `crypto`) into any bundle that + * imports it. The chat model selector only needs the list — never the + * harness factory — so it imports from here to keep `node:crypto` out of + * the browser bundle. + */ +export const CLAUDE_CODE_MODELS: ModelInfo[] = [ + { + providerId: "claude-code", + modelId: "claude-code:haiku", + title: "Claude Code Haiku", + description: "Fast and lightweight", + capabilities: ["text"], + limits: null, + costs: null, + }, + { + providerId: "claude-code", + modelId: "claude-code:sonnet", + title: "Claude Code Sonnet", + description: "Balanced performance", + capabilities: ["text", "reasoning"], + limits: null, + costs: null, + }, + { + providerId: "claude-code", + modelId: "claude-code:opus", + title: "Claude Code Opus", + description: "Most capable", + capabilities: ["text", "reasoning"], + limits: null, + costs: null, + }, +]; diff --git a/apps/mesh/src/ai-providers/adapters/claude-code.ts b/apps/mesh/src/ai-providers/adapters/claude-code.ts index e9aedc19a4..16bb077ec1 100644 --- a/apps/mesh/src/ai-providers/adapters/claude-code.ts +++ b/apps/mesh/src/ai-providers/adapters/claude-code.ts @@ -1,79 +1 @@ -import { createClaudeCode } from "ai-sdk-provider-claude-code"; -import type { MeshProvider, ModelInfo, ProviderAdapter } from "../types"; - -export { createClaudeCodeModel } from "../coding-agents/claude-code"; - -export const CLAUDE_CODE_MODELS: ModelInfo[] = [ - { - providerId: "claude-code", - modelId: "claude-code:haiku", - title: "Claude Code Haiku", - description: "Fast and lightweight", - capabilities: ["text"], - limits: null, - costs: null, - }, - { - providerId: "claude-code", - modelId: "claude-code:sonnet", - title: "Claude Code Sonnet", - description: "Balanced performance", - capabilities: ["text", "reasoning"], - limits: null, - costs: null, - }, - { - providerId: "claude-code", - modelId: "claude-code:opus", - title: "Claude Code Opus", - description: "Most capable", - capabilities: ["text", "reasoning"], - limits: null, - costs: null, - }, -]; - -/** Map composite model IDs (e.g. "claude-code:sonnet") to SDK model names. */ -const CLAUDE_CODE_SDK_MODELS: Record = { - "claude-code:opus": "opus", - "claude-code:sonnet": "sonnet", - "claude-code:haiku": "haiku", -}; - -/** Resolve a composite claude-code model ID to the SDK model name. */ -export function resolveClaudeCodeModelId(modelId: string): string { - return CLAUDE_CODE_SDK_MODELS[modelId] ?? modelId; -} - -export const claudeCodeAdapter: ProviderAdapter = { - info: { - id: "claude-code", - name: "Claude Code", - description: "Use your Claude Pro or Max subscription", - logo: "https://assets.decocache.com/decocms/2b91e6f8-5151-4b4f-bdf9-037ee769e6ff/Claude_AI_symbol.svg.png", - }, - supportedMethods: ["cli-activate"], - create(_apiKey): MeshProvider { - // Claude Code doesn't use API keys, but we need to conform to the interface. - // The real model creation happens via createClaudeCodeModel() with mcpServers. - const provider = createClaudeCode({ - defaultSettings: { - permissionMode: "bypassPermissions", - disallowedTools: [ - "AskUserQuestion", - "ExitPlanMode", - "EnterWorktree", - "ExitWorktree", - "Config", - ], - }, - }); - return { - info: claudeCodeAdapter.info, - aiSdk: provider as any, - async listModels(): Promise { - return CLAUDE_CODE_MODELS; - }, - }; - }, -}; +export { CLAUDE_CODE_MODELS } from "./claude-code-models"; diff --git a/apps/mesh/src/ai-providers/adapters/codex-models.ts b/apps/mesh/src/ai-providers/adapters/codex-models.ts new file mode 100644 index 0000000000..0fe8bbaf4a --- /dev/null +++ b/apps/mesh/src/ai-providers/adapters/codex-models.ts @@ -0,0 +1,66 @@ +import type { ModelInfo } from "../types"; + +/** + * Browser-safe model list for the Codex laptop harness. Lives apart from + * `codex.ts` because that file re-exports `createCodexModel` from + * `../../harnesses/codex`, which transitively pulls Node-only crypto + * code into any bundle that imports it. The chat model selector only + * needs the list — never the harness factory — so it imports from here. + */ +const CODEX_LOGO = + "https://assets.decocache.com/decocms/6ac44f1c-c0cf-4480-84b5-2ae6fe742d0b/codex-app.png.png"; + +export const CODEX_MODELS: ModelInfo[] = [ + { + providerId: "codex", + modelId: "codex:gpt-5.5", + title: "GPT-5.5", + description: + "Frontier model for complex coding, research, and real-world work", + capabilities: ["text", "reasoning"], + logo: CODEX_LOGO, + limits: null, + costs: null, + }, + { + providerId: "codex", + modelId: "codex:gpt-5.4", + title: "GPT-5.4", + description: "Strong model for everyday coding", + capabilities: ["text", "reasoning"], + logo: CODEX_LOGO, + limits: null, + costs: null, + }, + { + providerId: "codex", + modelId: "codex:gpt-5.4-mini", + title: "GPT-5.4 Mini", + description: + "Small, fast, and cost-efficient model for simpler coding tasks", + capabilities: ["text", "reasoning"], + logo: CODEX_LOGO, + limits: null, + costs: null, + }, + { + providerId: "codex", + modelId: "codex:gpt-5.3-codex", + title: "GPT-5.3 Codex", + description: "Coding-optimized model", + capabilities: ["text", "reasoning"], + logo: CODEX_LOGO, + limits: null, + costs: null, + }, + { + providerId: "codex", + modelId: "codex:gpt-5.2", + title: "GPT-5.2", + description: "Optimized for professional work and long-running agents", + capabilities: ["text", "reasoning"], + logo: CODEX_LOGO, + limits: null, + costs: null, + }, +]; diff --git a/apps/mesh/src/ai-providers/adapters/codex.ts b/apps/mesh/src/ai-providers/adapters/codex.ts index aefa00efa8..b8058b1397 100644 --- a/apps/mesh/src/ai-providers/adapters/codex.ts +++ b/apps/mesh/src/ai-providers/adapters/codex.ts @@ -1,98 +1 @@ -import type { MeshProvider, ModelInfo, ProviderAdapter } from "../types"; - -export { createCodexModel } from "../coding-agents/codex"; - -const CODEX_LOGO = - "https://assets.decocache.com/decocms/6ac44f1c-c0cf-4480-84b5-2ae6fe742d0b/codex-app.png.png"; - -export const CODEX_MODELS: ModelInfo[] = [ - { - providerId: "codex", - modelId: "codex:gpt-5.5", - title: "GPT-5.5", - description: - "Frontier model for complex coding, research, and real-world work", - capabilities: ["text", "reasoning"], - logo: CODEX_LOGO, - limits: null, - costs: null, - }, - { - providerId: "codex", - modelId: "codex:gpt-5.4", - title: "GPT-5.4", - description: "Strong model for everyday coding", - capabilities: ["text", "reasoning"], - logo: CODEX_LOGO, - limits: null, - costs: null, - }, - { - providerId: "codex", - modelId: "codex:gpt-5.4-mini", - title: "GPT-5.4 Mini", - description: - "Small, fast, and cost-efficient model for simpler coding tasks", - capabilities: ["text", "reasoning"], - logo: CODEX_LOGO, - limits: null, - costs: null, - }, - { - providerId: "codex", - modelId: "codex:gpt-5.3-codex", - title: "GPT-5.3 Codex", - description: "Coding-optimized model", - capabilities: ["text", "reasoning"], - logo: CODEX_LOGO, - limits: null, - costs: null, - }, - { - providerId: "codex", - modelId: "codex:gpt-5.2", - title: "GPT-5.2", - description: "Optimized for professional work and long-running agents", - capabilities: ["text", "reasoning"], - logo: CODEX_LOGO, - limits: null, - costs: null, - }, -]; - -/** Map composite model IDs to SDK model names. */ -const CODEX_SDK_MODELS: Record = { - "codex:gpt-5.5": "gpt-5.5", - "codex:gpt-5.4": "gpt-5.4", - "codex:gpt-5.4-mini": "gpt-5.4-mini", - "codex:gpt-5.3-codex": "gpt-5.3-codex", - "codex:gpt-5.2": "gpt-5.2", -}; - -/** Resolve a composite codex model ID to the SDK model name. */ -export function resolveCodexModelId(modelId: string): string { - const resolved = CODEX_SDK_MODELS[modelId]; - if (!resolved) { - throw new Error(`Unknown Codex model ID: ${modelId}`); - } - return resolved; -} - -export const codexAdapter: ProviderAdapter = { - info: { - id: "codex", - name: "Codex", - description: "Use your ChatGPT Plus or Pro subscription", - logo: CODEX_LOGO, - }, - supportedMethods: ["cli-activate"], - create(_apiKey): MeshProvider { - return { - info: codexAdapter.info, - aiSdk: {} as any, - async listModels(): Promise { - return CODEX_MODELS; - }, - }; - }, -}; +export { CODEX_MODELS } from "./codex-models"; diff --git a/apps/mesh/src/ai-providers/agent-tiers.ts b/apps/mesh/src/ai-providers/agent-tiers.ts new file mode 100644 index 0000000000..d5f2f25e85 --- /dev/null +++ b/apps/mesh/src/ai-providers/agent-tiers.ts @@ -0,0 +1,45 @@ +import type { HarnessId } from "../harnesses"; +import type { ChatTier } from "@/tools/organization/schema"; + +/** + * Per-agent (laptop-CLI harness) tier → model mapping. + * + * Lives in the server-safe `ai-providers/` folder so both the cluster's + * dispatch path (`resolvePerRequestModels`) and the web `agent-models.ts` + * can read it. The cluster never has an `ai_provider_keys` row for these + * harnesses — capability and credential live on the user's laptop link. + */ +export interface AgentTierEntry { + modelId: string; + /** Short label shown in the chat input model trigger ("Haiku"). */ + label: string; +} + +export type AgentTierMap = Record; + +const CLAUDE_CODE_TIERS: AgentTierMap = { + fast: { modelId: "claude-code:haiku", label: "Haiku" }, + smart: { modelId: "claude-code:sonnet", label: "Sonnet" }, + thinking: { modelId: "claude-code:opus", label: "Opus" }, +}; + +const CODEX_TIERS: AgentTierMap = { + fast: { modelId: "codex:gpt-5.4-mini", label: "GPT-5.4 Mini" }, + smart: { modelId: "codex:gpt-5.3-codex", label: "GPT-5.3 Codex" }, + thinking: { modelId: "codex:gpt-5.5", label: "GPT-5.5" }, +}; + +function getAgentTiers(agent: HarnessId): AgentTierMap | null { + if (agent === "claude-code") return CLAUDE_CODE_TIERS; + if (agent === "codex") return CODEX_TIERS; + return null; +} + +/** Returns the model the laptop harness should run for the given tier, + * or `null` when the harness is Decopilot (uses the AI provider path). */ +export function resolveAgentTier( + agent: HarnessId, + tier: ChatTier, +): AgentTierEntry | null { + return getAgentTiers(agent)?.[tier] ?? null; +} diff --git a/apps/mesh/src/ai-providers/registry.ts b/apps/mesh/src/ai-providers/registry.ts index 4aa709b5a9..fd71823fda 100644 --- a/apps/mesh/src/ai-providers/registry.ts +++ b/apps/mesh/src/ai-providers/registry.ts @@ -1,6 +1,4 @@ import { anthropicAdapter } from "./adapters/anthropic"; -import { claudeCodeAdapter } from "./adapters/claude-code"; -import { codexAdapter } from "./adapters/codex"; import { googleAdapter } from "./adapters/google"; import { openaiCompatibleAdapter } from "./adapters/openai-compatible"; import { openrouterAdapter } from "./adapters/openrouter"; @@ -13,10 +11,6 @@ export function getProviders(): Partial> { const settings = getSettings(); return { ...(settings.aiGatewayEnabled && { deco: decoAiGatewayAdapter }), - ...(settings.localMode && { - "claude-code": claudeCodeAdapter, - codex: codexAdapter, - }), anthropic: anthropicAdapter, google: googleAdapter, openrouter: openrouterAdapter, diff --git a/apps/mesh/src/ai-providers/types.ts b/apps/mesh/src/ai-providers/types.ts index 9c09f1923f..3b38644fa8 100644 --- a/apps/mesh/src/ai-providers/types.ts +++ b/apps/mesh/src/ai-providers/types.ts @@ -92,7 +92,7 @@ export interface MeshProvider { listModels(): Promise; } -export type ConnectionMethod = "api-key" | "oauth-pkce" | "cli-activate"; +export type ConnectionMethod = "api-key" | "oauth-pkce"; export interface OAuthPkceParams { callbackUrl: string; diff --git a/apps/mesh/src/api/app.ts b/apps/mesh/src/api/app.ts index 041b67c18f..a46ccda3ab 100644 --- a/apps/mesh/src/api/app.ts +++ b/apps/mesh/src/api/app.ts @@ -25,7 +25,10 @@ import { } from "../core/context-factory"; import type { MeshContext } from "../core/mesh-context"; import { closeDatabase, getDb, type MeshDatabase } from "../database"; -import { asDockerRunner, getSharedRunnerIfInit } from "../sandbox/lifecycle"; +import { + asDockerRunner, + getSharedSandboxProviderIfInit, +} from "../sandbox/lifecycle"; import { createEventBus, type EventBus } from "../event-bus"; import { flushMonitoringData, @@ -89,6 +92,12 @@ import { import { NatsCancelBroadcast } from "./routes/decopilot/nats-cancel-broadcast"; import type { StreamBuffer } from "./routes/decopilot/stream-buffer"; import { NatsStreamBuffer } from "./routes/decopilot/nats-stream-buffer"; +import { + createInMemoryLinkRegistry, + type LinkRegistry, + NatsLinkRegistry, +} from "../links/link-registry"; +import { registerLinksRoutes } from "../links/routes"; import { RunRegistry } from "./routes/decopilot/run-registry"; import type { RunReactorDeps } from "./routes/decopilot/run-reactor"; import { SqlThreadStorage } from "../storage/threads"; @@ -712,6 +721,7 @@ export async function createApp(options: CreateAppOptions = {}) { let modelListCache: ModelListCache; let cancelBroadcast: CancelBroadcast; let streamBuffer: StreamBuffer; + let linkRegistry: LinkRegistry; let natsProvider: NatsConnectionProvider | null = null; if (options.eventBus) { @@ -734,6 +744,11 @@ export async function createApp(options: CreateAppOptions = {}) { broadcast: () => {}, stop: async () => {}, }; + // Test/no-NATS branch: an in-memory link registry keeps the link routes + // testable without a live NATS cluster. + linkRegistry = createInMemoryLinkRegistry({ + nowSeconds: () => Math.floor(Date.now() / 1000), + }); streamBuffer = { init: async () => {}, // Test/no-NATS stub: drain the stream so `createUIMessageStream`'s @@ -785,6 +800,12 @@ export async function createApp(options: CreateAppOptions = {}) { getJetStream: () => natsProvider!.getJetStream(), }); + const natsLinkRegistry = new NatsLinkRegistry({ + getJetStream: () => natsProvider!.getJetStream(), + }); + natsLinkRegistry.init().catch(() => {}); + linkRegistry = natsLinkRegistry; + eventBus = createEventBus(database, natsProvider); // When NATS connects, (re-)initialize all deferred consumers @@ -801,6 +822,12 @@ export async function createApp(options: CreateAppOptions = {}) { err, ); }); + natsLinkRegistry.init().catch((err: unknown) => { + console.warn( + "[LinkRegistry] Deferred init failed, link dispatch disabled:", + err, + ); + }); }); } @@ -1050,7 +1077,21 @@ export async function createApp(options: CreateAppOptions = {}) { // mounted via `createOrgScopedApi` below. const legacyWellKnownProtectedResource = createLegacyWellKnownProtectedResourceRoutes(); - legacyWellKnownProtectedResource.use("*", logDeprecatedRoute); + // Scope the deprecation log to the two specific legacy paths this sub-app + // owns, NOT `use("*", ...)`. Because this sub-app is mounted at `/`, a + // wildcard middleware fires for every request to the root app — and the + // suppression logic in `log-deprecated-route.ts` can't reliably tell + // root-app handlers (e.g. `/api/links/heartbeat`) apart from this + // sub-app's handlers via basePath alone. Pinning the middleware to the + // actual deprecated patterns avoids the false-positive entirely. + legacyWellKnownProtectedResource.use( + "/.well-known/oauth-protected-resource/mcp/:connectionId", + logDeprecatedRoute, + ); + legacyWellKnownProtectedResource.use( + "/mcp/:connectionId/.well-known/oauth-protected-resource", + logDeprecatedRoute, + ); app.route("/", legacyWellKnownProtectedResource); // Well-known *prefix* discovery for the new org-scoped server URL shape. @@ -1132,6 +1173,7 @@ export async function createApp(options: CreateAppOptions = {}) { eventBus, modelListCache, memberRoleCache, + linkRegistry, }); ContextFactory.set(factory); @@ -1570,9 +1612,23 @@ export async function createApp(options: CreateAppOptions = {}) { cancelBroadcast, streamBuffer, runRegistry, + linkRegistry, }); app.route("/api", decopilotRoutes); + // `/api/links/*` — link daemon registration, heartbeat, status. + // Session auth uses the same Better-Auth flow as every other authed + // route: `meshContext.auth.user.id` is the userSub. Bearer auth on + // heartbeat is the `linkSecret` (verified inside the route). + registerLinksRoutes(app, { + linkRegistry, + getAuthenticatedUserSub: (c) => { + const ctx = c.get("meshContext"); + return ctx?.auth?.user?.id ?? null; + }, + allowLocalhostLinks: process.env.MESH_ALLOW_LOCALHOST_LINKS === "1", + }); + // Stable file redirect endpoint (resolves mesh-storage: URIs to presigned URLs) app.route("/api", filesRoutes); @@ -1783,10 +1839,10 @@ export async function createApp(options: CreateAppOptions = {}) { // Sweep sandbox containers — Docker only. Other runners' sandboxes // outlive mesh by design, so a generic sweep would nuke active user VMs. // Must run before NATS/DB close (sweep writes state). - const dockerRunner = asDockerRunner(getSharedRunnerIfInit()); + const dockerRunner = asDockerRunner(getSharedSandboxProviderIfInit()); if (dockerRunner) { const { sweepDockerOrphansOnShutdown } = await import( - "@decocms/sandbox/runner" + "@decocms/sandbox/provider" ); await sweepDockerOrphansOnShutdown(dockerRunner); } diff --git a/apps/mesh/src/api/routes/decopilot/dispatch-run.ts b/apps/mesh/src/api/routes/decopilot/dispatch-run.ts index 07227ffa72..5cbb84df91 100644 --- a/apps/mesh/src/api/routes/decopilot/dispatch-run.ts +++ b/apps/mesh/src/api/routes/decopilot/dispatch-run.ts @@ -25,16 +25,22 @@ import type { MeshContext } from "@/core/mesh-context"; import { posthog } from "@/posthog"; import { type UIMessageChunk, createUIMessageStream } from "ai"; -import { localDispatch } from "../../../harnesses"; +import { localDispatch } from "@/harnesses"; +import { + ensureRemoteCliSandbox, + remoteDispatch, +} from "@/harnesses/remote-dispatch"; +import { LinkOfflineError } from "../../../links/link-offline-error"; +import type { DispatchTarget } from "../../../links/resolve-dispatch-target"; import type { HarnessId, HarnessProcessLocal, HarnessStreamInput, -} from "../../../harnesses/types"; +} from "@/harnesses"; import { sanitizeStreamError, stringifyError, -} from "../../../harnesses/decopilot/stream-error"; +} from "@/harnesses/decopilot/stream-error"; import { DEFAULT_WINDOW_SIZE, generateMessageId } from "./constants"; import { loadAndMergeMessages } from "./conversation"; import { uploadFileParts, resolveStorageRefs } from "./file-materializer"; @@ -55,8 +61,8 @@ import type { StreamBuffer } from "./stream-buffer"; import type { ChatMessage, ModelsConfig } from "./types"; import type { CancelBroadcast } from "./cancel-broadcast"; import type { ThreadMessage } from "@/storage/types"; -import type { PendingImage } from "../../../harnesses/decopilot/built-in-tools"; -import { getInternalUrl } from "@/core/server-constants"; +import type { PendingImage } from "@/harnesses/decopilot/built-in-tools"; +import { getInternalUrl, getPublicUrl } from "@/core/server-constants"; import { traced } from "@/observability"; import { getPodId } from "@/core/pod-identity"; import type { SSEEvent } from "@/event-bus"; @@ -105,8 +111,12 @@ function classifyStreamError( * Anything that isn't a recognized CLI agent provider id maps to * decopilot — the native in-tree harness. The CLI agent providers each * own their own harness (see `apps/mesh/src/harnesses/{claude-code,codex}`). + * + * Exported so POST /messages can resolve the harness up-front (before + * enqueuing onto the thread gate) to decide whether the request needs + * a link daemon and which capability to check. */ -function resolveHarnessId(providerId: string | undefined): HarnessId { +export function resolveHarnessId(providerId: string | undefined): HarnessId { if (providerId === "claude-code") return "claude-code"; if (providerId === "codex") return "codex"; return "decopilot"; @@ -178,16 +188,30 @@ function lookupResumeSessionRef( * called for harnesses that actually open an HTTP MCP connection * (claude-code, codex); decopilot's in-process passthrough doesn't need * this. + * + * `targetKind` decides which base URL to mint: + * - `"local"` — `getInternalUrl()` (loopback; the harness runs inside + * the cluster pod alongside the API). + * - `"remote-cli"` — `getPublicUrl()` (the harness runs on the user's + * laptop and dials the cluster back over the public network — or, in + * dev mode, localhost via `MESH_ALLOW_LOCALHOST_LINKS=1`). */ +const MCP_KEY_TTL_SECONDS = 3600; + async function mintMcpEndpoint( ctx: MeshContext, agentId: string, organization: { id: string; slug?: string; name?: string }, apiKeyName: string, -): Promise<{ url: string; headers: Record }> { + targetKind: DispatchTarget["kind"], +): Promise<{ + url: string; + headers: Record; + expiresAt: number; +}> { const apiKey = await ctx.boundAuth.apiKey.create({ name: apiKeyName, - expiresIn: 3600, + expiresIn: MCP_KEY_TTL_SECONDS, metadata: { organization: { id: organization.id, @@ -196,12 +220,18 @@ async function mintMcpEndpoint( }, }, }); + const baseUrl = + targetKind === "remote-cli" ? getPublicUrl() : getInternalUrl(); return { - url: `${getInternalUrl()}/mcp/virtual-mcp/${agentId}`, + url: `${baseUrl}/mcp/virtual-mcp/${agentId}`, headers: { Authorization: `Bearer ${apiKey.key}`, "x-org-id": organization.id, }, + // Wire-shape: HarnessStreamInputWire requires expiresAt for the + // remote-cli path so the daemon can pre-empt expiry with a refresh + // (v2 — currently only used for logging / forward-compat). + expiresAt: Date.now() + MCP_KEY_TTL_SECONDS * 1000, }; } @@ -230,6 +260,26 @@ export interface DispatchRunInput { isResume?: boolean; /** Persisted to the thread row on first-message creation. */ branch?: string | null; + /** + * Pre-resolved dispatch target. Set by POST /messages before enqueuing + * onto the per-thread gate so the workflow body never has to call + * `resolveDispatchTarget` itself (avoids replay-time drift if the link + * goes offline between enqueue and dispatch). Defaults to + * `{ kind: "local", sandbox: "default" }` when omitted, preserving the + * pre-Phase-4 behavior. + */ + target?: DispatchTarget; + /** + * Pre-resolved harness id (Decopilot / Claude Code / Codex) from POST + * /messages — taken from the thread's persisted pin or the request + * body. When omitted, falls back to deriving from the credential's + * provider id (legacy behavior; still correct for Decopilot). + * + * Necessary because the laptop-CLI harnesses no longer have an + * `ai_provider_keys` row to drive the credential→harness lookup — + * their `credentialId` is the sentinel `laptop:`. + */ + harnessId?: HarnessId | null; } export interface DispatchRunDeps { @@ -384,9 +434,47 @@ async function prepareRun( const credentialKey = await ctx.storage.aiProviderKeys .findById(input.models.credentialId, input.organizationId) .catch(() => null); - const harnessId = resolveHarnessId(credentialKey?.providerId); + // Prefer the pre-resolved pin from POST /messages (covers laptop-CLI + // harnesses whose synthetic credentialId doesn't match any row); + // fall back to deriving from the credential's provider id for legacy + // callers (e.g. older automation paths) that don't set `harnessId`. + const harnessId: HarnessId = + input.harnessId ?? resolveHarnessId(credentialKey?.providerId); rootSpan.setAttribute("decopilot.harnessId", harnessId); + // Resolve the dispatch target. POST /messages already runs + // `resolveDispatchTarget` and forwards the result on `input.target`; + // we re-read it here (defaulting to local/default for any caller — + // e.g. legacy automation paths — that hasn't been migrated yet). + // + // An `error` target reaching this far means a request without a + // pre-resolved target somehow slipped past the 409 in POST /messages. + // Surface a typed exception so the gate workflow records the run as + // failed instead of a generic stream error. + const target: DispatchTarget = input.target ?? { + kind: "local", + sandbox: "default", + }; + if (target.kind === "error") { + throw new LinkOfflineError(target.reason, target.activeCapabilities); + } + + // Stash the resolved target on the context so downstream consumers + // (Phase 5's remote-user sandbox provider, Phase 6's remote-cli + // dispatch) can read it without re-querying the registry. + if (target.kind === "local") { + ctx.sandboxPreference = target.sandbox; + ctx.linkForCurrentRun = target.link; + } else { + // remote-cli: no in-cluster sandbox runs, but we still hold the + // link reference for the eventual remoteDispatch call below. + ctx.linkForCurrentRun = target.link; + } + rootSpan.setAttribute("decopilot.dispatchTarget.kind", target.kind); + if (target.kind === "local") { + rootSpan.setAttribute("decopilot.dispatchTarget.sandbox", target.sandbox); + } + // 1. Check model permissions (decopilot-only; CLI harnesses run with // the user's own provider credential / local CLI binary, which is // already vetted at credential-creation time). @@ -631,7 +719,14 @@ async function prepareRun( // in-process), so we skip the API-key mint for that path. const mcp = harnessId === "decopilot" - ? { url: "", headers: {} as Record } + ? { + url: "", + headers: {} as Record, + // Sentinel for the in-process decopilot path — its + // passthrough client doesn't consume mcp.* but the + // shared HarnessStreamInput type requires the field. + expiresAt: 0, + } : await mintMcpEndpoint( ctx, input.agent.id, @@ -639,6 +734,7 @@ async function prepareRun( harnessId === "claude-code" ? "claude-code-session" : "codex-session", + target.kind, ); // Build the in-process extras that decopilot needs to participate @@ -700,6 +796,15 @@ async function prepareRun( processLocal, }; + // claude-code cwd resolution: with the `host` runner gone, the + // cluster never has a local on-disk workdir to point the CLI at, + // so the harness falls back to its own ambient cwd. Remote-user + // dispatch runs the harness inside the laptop daemon, where the + // daemon is spawned with workdir = sandbox path; remote-cli runs + // claude-code in-process on the user's machine (no resolver + // needed). Production runners (docker, agent-sandbox, freestyle) + // don't surface a local FS to mesh. + // Dispatch through the registry. The harness produces a stream // of UIMessageChunk; we adapt it to a ReadableStream so it can // flow through writer.merge(). When a streamBuffer is wired, its @@ -708,7 +813,33 @@ async function prepareRun( // tails. We do NOT pipe through the buffer here; the pump is // detached and consumes uiStream directly after prepareRun // returns. - const harnessChunks = localDispatch(harnessId, harnessInput, ctx); + // + // Branch on the resolved target: + // - `remote-cli` — the whole stream is delegated to the user's + // link daemon. We first POST `/api/sandboxes` with the runId + // as the handle; the link spawns (or reuses) a daemon for + // that handle and returns its `sandboxUrl` — a per-daemon + // tunnel that the cluster talks to directly (no link + // reverse-proxy hop). Without ensure() first there's no + // sandboxUrl to dispatch against. + // - `local` (default OR remote-user) — runs in-cluster. + // `remote-user` only changes where the sandbox tool calls go; + // the harness still runs here. + let harnessChunks; + if (target.kind === "remote-cli") { + const { sandboxUrl } = await ensureRemoteCliSandbox( + target.link, + harnessInput.runId, + ); + harnessChunks = remoteDispatch( + harnessId, + harnessInput, + target.link, + sandboxUrl, + ); + } else { + harnessChunks = localDispatch(harnessId, harnessInput, ctx); + } const harnessStream = asReadableStream(harnessChunks); // Cast: the outer createUIMessageStream is typed via ChatMessage so diff --git a/apps/mesh/src/api/routes/decopilot/helpers.ts b/apps/mesh/src/api/routes/decopilot/helpers.ts index 22381d3774..3255de6718 100644 --- a/apps/mesh/src/api/routes/decopilot/helpers.ts +++ b/apps/mesh/src/api/routes/decopilot/helpers.ts @@ -26,7 +26,7 @@ import { MAX_RESULT_TOKENS, createOutputPreview, estimateJsonTokens, -} from "../../../harnesses/decopilot/built-in-tools/read-tool-output"; +} from "@/harnesses/decopilot/built-in-tools/read-tool-output"; /** * Tool approval levels determine which tools require user approval before executing diff --git a/apps/mesh/src/api/routes/decopilot/on-title-updated.test.ts b/apps/mesh/src/api/routes/decopilot/on-title-updated.test.ts index 7d914189f5..38bac32a6f 100644 --- a/apps/mesh/src/api/routes/decopilot/on-title-updated.test.ts +++ b/apps/mesh/src/api/routes/decopilot/on-title-updated.test.ts @@ -26,6 +26,8 @@ function makeThread(overrides: Partial = {}): Thread { trigger_id: null, context_start_message_id: null, branch: null, + sandbox_provider_kind: null, + harness_id: null, created_at: "2024-01-01T00:00:00.000Z", updated_at: "2024-01-02T00:00:00.000Z", run_owner_pod: null, diff --git a/apps/mesh/src/api/routes/decopilot/routes.test.ts b/apps/mesh/src/api/routes/decopilot/routes.test.ts index 7b61d3d9ea..e3852a8137 100644 --- a/apps/mesh/src/api/routes/decopilot/routes.test.ts +++ b/apps/mesh/src/api/routes/decopilot/routes.test.ts @@ -1,14 +1,14 @@ /** - * Tests for Decopilot route helpers. + * Tests for Decopilot route helpers + POST /messages dispatch-target + * resolution. */ -import { describe, expect, test } from "bun:test"; +import { describe, expect, mock, test } from "bun:test"; import { Hono } from "hono"; import type { MeshContext } from "@/core/mesh-context"; -import { computeIdempotencyKey, createDecopilotRoutes } from "./routes"; -import type { CancelBroadcast } from "./cancel-broadcast"; -import type { RunRegistry } from "./run-registry"; -import type { StreamBuffer } from "./stream-buffer"; +import type { Capability } from "@/links/protocol"; +import { createInMemoryLinkRegistry } from "../../../links/link-registry"; +import { computeIdempotencyKey } from "./routes"; import type { ChatMessage } from "./types"; describe("computeIdempotencyKey", () => { @@ -96,128 +96,353 @@ describe("computeIdempotencyKey", () => { }); // ============================================================================ -// Stream Endpoint — pure live tail (no snapshot) +// POST /:org/decopilot/threads/:threadId/messages — VM-based dispatch // ============================================================================ +// +// Bun's mock.module is module-global within a shard. Register stubs for +// `resolveTier`, `model-permissions`, `dispatch-queue`, `ensureVmForBranch` +// and Hono helpers BEFORE importing routes so the route module captures the +// mocked implementations. Other tests in this file don't import the route +// factory, so the mocks don't bleed into them. -interface StreamTestSetup { - app: Hono<{ Variables: { meshContext: MeshContext } }>; - listMessagesCalls: number; -} +mock.module("@/core/resolve-tier", () => ({ + resolveTier: async () => ({ + credentialId: "cred_local", + modelId: "claude-3-5-sonnet", + modelMeta: { title: "Claude 3.5 Sonnet", capabilities: [], limits: null }, + }), + TierUnavailableError: class TierUnavailableError extends Error {}, +})); + +mock.module("./model-permissions", () => ({ + fetchModelPermissions: async () => undefined, // no restriction + checkModelPermission: () => true, + parseModelsToMap: () => ({}), +})); + +mock.module("@/dispatch-queue", () => ({ + enqueueThreadRun: async () => ({ workflowID: "wf_test" }), +})); + +// `./helpers` is mocked minimally — only `ensureOrganization` is exercised +// on the 409 path, and the real one already works against our stub context. +// We do NOT mock the module, to keep the rest of the imports intact. + +// `ensureVm` is stubbed so tests don't try to provision real sandboxes. +// The stub returns a vmMap entry with the sandboxProviderKind requested by each +// test scenario via the module-level `vmKindForTest` variable. +type VmKind = "docker" | "agent-sandbox" | "remote-user"; +let vmKindForTest: VmKind = "docker"; +mock.module("@/tools/vm/start", () => ({ + ensureVm: async () => ({ + vmId: "vm_test", + previewUrl: null, + sandboxProviderKind: vmKindForTest, + }), +})); + +mock.module("@/sandbox/resolve-default-provider-kind", () => ({ + resolveDefaultSandboxProviderKind: async () => vmKindForTest, +})); + +const { createDecopilotRoutes } = await import("./routes"); + +const THREAD_ID = "thread_test_1"; +const AGENT_ID = "agent_1"; +const BRANCH = "main"; + +function buildApp(opts: { + vmKind: VmKind; + linkOnline: boolean; + linkCapabilities?: Capability[]; + userId?: string; + /** + * Controls what the `threads.get` stub returns for (sandbox_provider_kind, + * harness_id). Defaults to already-pinned values so existing "VM-based + * dispatch" tests continue to act like subsequent messages. Pass both as + * null to simulate a first-message scenario where no pins have been + * persisted yet. + */ + threadPins?: { + sandbox_provider_kind?: string | null; + harness_id?: string | null; + }; +}) { + vmKindForTest = opts.vmKind; + + const resolvedPins = opts.threadPins ?? { + sandbox_provider_kind: opts.vmKind, + harness_id: "claude-code", + }; + + const linkRegistry = createInMemoryLinkRegistry({ + nowSeconds: () => Math.floor(Date.now() / 1000), + }); -function makeStreamApp(opts: { - threadStatus?: "idle" | "in_progress"; - tailChunks?: ReadableStream | null; -}): StreamTestSetup { - let listMessagesCalls = 0; + const threadUpdateSpy = mock(async () => {}); const ctx = { - organization: { id: "org_1", slug: "acme" }, - auth: { user: { id: "user_1" } }, + organization: { id: "org_1", slug: "org_1" }, + auth: { user: { id: opts.userId ?? "user_1" } }, storage: { + aiProviderKeys: { + findById: mock(async () => ({ + id: "cred_local", + providerId: "claude-code", + })), + }, threads: { - get: async () => ({ - id: "thread_1", + get: mock(async () => ({ + id: THREAD_ID, + branch: BRANCH, + sandbox_provider_kind: resolvedPins.sandbox_provider_kind ?? null, + harness_id: resolvedPins.harness_id ?? null, + })), + update: threadUpdateSpy, + }, + virtualMcps: { + findById: mock(async () => ({ + id: AGENT_ID, organization_id: "org_1", - status: opts.threadStatus ?? "idle", - created_by: "user_1", - }), - listMessages: async () => { - listMessagesCalls += 1; - return { messages: [], total: 0 }; - }, + metadata: { + vmMap: { + user_1: { + [BRANCH]: { + vmId: "vm_test", + previewUrl: null, + sandboxProviderKind: opts.vmKind, + }, + }, + }, + }, + })), }, }, + linkRegistry, + db: {} as MeshContext["db"], } as unknown as MeshContext; - const tail = - opts.tailChunks === undefined - ? new ReadableStream({ - start(controller) { - controller.close(); - }, - }) - : opts.tailChunks; - - const streamBuffer = { - init: async () => {}, - pump: () => {}, - purge: () => {}, - teardown: () => {}, - createTailStream: async () => tail, - } as unknown as StreamBuffer; - - const cancelBroadcast = { - broadcast: () => {}, - } as unknown as CancelBroadcast; - const runRegistry = {} as unknown as RunRegistry; - const app = new Hono<{ Variables: { meshContext: MeshContext } }>(); app.use("*", async (c, next) => { c.set("meshContext", ctx); await next(); }); app.route( - "/", - createDecopilotRoutes({ cancelBroadcast, streamBuffer, runRegistry }), + "/api", + createDecopilotRoutes({ + cancelBroadcast: { + start: async () => {}, + broadcast: () => {}, + stop: async () => {}, + } as unknown as Parameters< + typeof createDecopilotRoutes + >[0]["cancelBroadcast"], + streamBuffer: {} as Parameters< + typeof createDecopilotRoutes + >[0]["streamBuffer"], + runRegistry: {} as Parameters< + typeof createDecopilotRoutes + >[0]["runRegistry"], + linkRegistry, + }), ); - return { - app, - get listMessagesCalls() { - return listMessagesCalls; - }, + // Populate the link registry if the test scenario requires an online link. + const seedLink = async () => { + if (opts.linkOnline) { + await linkRegistry.put(opts.userId ?? "user_1", { + machineId: "m1", + cliVersion: "1.0.0", + protocolVersion: 1, + capabilities: + opts.linkCapabilities ?? (["claude-code"] as Capability[]), + tunnelUrl: "http://localhost:5174", + linkSecret: "secret-hash", + createdAt: new Date().toISOString(), + }); + } }; -} -async function readSseBody(res: Response): Promise { - const reader = res.body!.getReader(); - const decoder = new TextDecoder(); - let buf = ""; - while (true) { - const { done, value } = await reader.read(); - if (done) break; - buf += decoder.decode(value, { stream: true }); - } - buf += decoder.decode(); - return buf; + return { app, linkRegistry, ctx, seedLink, threadUpdateSpy }; } -describe("GET /:org/decopilot/threads/:threadId/stream", () => { - test("does not call listMessages or emit a snapshot frame", async () => { - const setup = makeStreamApp({}); +describe("POST /messages — VM-based dispatch", () => { + const validBody = { + messages: [ + { + role: "user", + parts: [{ type: "text", text: "hi" }], + }, + ], + agent: { id: AGENT_ID }, + branch: BRANCH, + temperature: 0.5, + }; - const res = await setup.app.request( - "/acme/decopilot/threads/thread_1/stream", + test("VM with remote-user kind + no online link → 409 link_offline", async () => { + const { app } = buildApp({ vmKind: "remote-user", linkOnline: false }); + const res = await app.request( + `/api/org_1/decopilot/threads/${THREAD_ID}/messages`, + { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(validBody), + }, ); - expect(res.status).toBe(200); - expect(res.headers.get("content-type")).toContain("text/event-stream"); - - const body = await readSseBody(res); + expect(res.status).toBe(409); + const body = (await res.json()) as { error: string; code: string }; + expect(body.error).toBe("link_unavailable"); + expect(body.code).toBe("link_offline"); + }); - // No snapshot frame anywhere in the response — the client owns initial - // load via COLLECTION_THREAD_MESSAGES_LIST. - expect(body).not.toContain("event: snapshot"); + test("VM with remote-user kind + link missing capability → 409 capability_missing", async () => { + const { app, seedLink } = buildApp({ + vmKind: "remote-user", + linkOnline: true, + linkCapabilities: ["decopilot-sandbox"], + }); + await seedLink(); + // Link exists but only advertises decopilot-sandbox — claude-code + // provider expects "claude-code". + const res = await app.request( + `/api/org_1/decopilot/threads/${THREAD_ID}/messages`, + { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(validBody), + }, + ); + expect(res.status).toBe(409); + const body = (await res.json()) as { + error: string; + code: string; + activeCapabilities: string[]; + }; + expect(body.code).toBe("capability_missing"); + expect(body.activeCapabilities).toEqual(["decopilot-sandbox"]); + }); - // Handler never touches storage.threads.listMessages. - expect(setup.listMessagesCalls).toBe(0); + test("VM with cloud kind → 202 (target is local/default)", async () => { + const { app } = buildApp({ vmKind: "docker", linkOnline: false }); + const res = await app.request( + `/api/org_1/decopilot/threads/${THREAD_ID}/messages`, + { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(validBody), + }, + ); + expect(res.status).toBe(202); }); +}); + +// ============================================================================ +// POST /messages — first-message pinning +// ============================================================================ +// +// These tests exercise the logic added in Task 3.2: +// - First message (thread row has null pins) → derive + persist pins. +// - Subsequent message (thread row already has pins) → use them, no update. - test("forwards tail chunks from streamBuffer with no preceding snapshot", async () => { - const tail = new ReadableStream({ - start(controller) { - controller.enqueue({ type: "start" }); - controller.close(); +describe("POST /messages — first-message pinning", () => { + const validBody = { + messages: [ + { + role: "user", + parts: [{ type: "text", text: "hi" }], }, + ], + agent: { id: AGENT_ID }, + branch: BRANCH, + temperature: 0.5, + }; + + test("first message with explicit pins persists them and uses them", async () => { + const { app, seedLink, threadUpdateSpy } = buildApp({ + vmKind: "remote-user", + linkOnline: true, + threadPins: { sandbox_provider_kind: null, harness_id: null }, }); - const setup = makeStreamApp({ tailChunks: tail }); + await seedLink(); + const res = await app.request( + `/api/org_1/decopilot/threads/${THREAD_ID}/messages`, + { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + ...validBody, + sandboxProviderKind: "remote-user", + harnessId: "claude-code", + }), + }, + ); + expect(res.status).toBe(202); + expect(threadUpdateSpy).toHaveBeenCalledWith( + THREAD_ID, + expect.objectContaining({ + sandbox_provider_kind: "remote-user", + harness_id: "claude-code", + }), + ); + }); - const res = await setup.app.request( - "/acme/decopilot/threads/thread_1/stream", + test("first message without explicit pins derives defaults and persists", async () => { + const { app, seedLink, threadUpdateSpy } = buildApp({ + vmKind: "remote-user", + linkOnline: true, + threadPins: { sandbox_provider_kind: null, harness_id: null }, + }); + await seedLink(); + const res = await app.request( + `/api/org_1/decopilot/threads/${THREAD_ID}/messages`, + { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(validBody), + }, + ); + expect(res.status).toBe(202); + // link is online → resolveDefaultSandboxProviderKind returns vmKindForTest + // which is "remote-user" + expect(threadUpdateSpy).toHaveBeenCalledWith( + THREAD_ID, + expect.objectContaining({ + sandbox_provider_kind: "remote-user", + }), ); - expect(res.status).toBe(200); + }); - const body = await readSseBody(res); - expect(body).not.toContain("event: snapshot"); - expect(body).toContain('"type":"start"'); + test("subsequent message ignores request pins and uses thread row", async () => { + // Thread is pinned to (remote-user, claude-code). The request body sends + // harnessId: "decopilot" which would require the decopilot-sandbox + // capability. If the route mistakenly uses the body's harnessId, the link + // check fails with 409 capability_missing. Using the pinned harness + // (claude-code) instead → the link's claude-code capability matches → 202. + const { app, seedLink, threadUpdateSpy } = buildApp({ + vmKind: "remote-user", + linkOnline: true, + threadPins: { + sandbox_provider_kind: "remote-user", + harness_id: "claude-code", + }, + }); + await seedLink(); + const res = await app.request( + `/api/org_1/decopilot/threads/${THREAD_ID}/messages`, + { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + ...validBody, + sandboxProviderKind: "docker", // should be ignored — thread row has remote-user + harnessId: "decopilot", // should be ignored — thread row has claude-code + }), + }, + ); + // 202 proves the pinned harness (claude-code) was used, not "decopilot" + // which would have produced a 409 capability_missing. + expect(res.status).toBe(202); + // Pins were already set — no update should be written. + expect(threadUpdateSpy).not.toHaveBeenCalled(); }); }); diff --git a/apps/mesh/src/api/routes/decopilot/routes.ts b/apps/mesh/src/api/routes/decopilot/routes.ts index a8e3613077..1141f1d96d 100644 --- a/apps/mesh/src/api/routes/decopilot/routes.ts +++ b/apps/mesh/src/api/routes/decopilot/routes.ts @@ -8,7 +8,8 @@ import { createHash } from "node:crypto"; import type { MeshContext } from "@/core/mesh-context"; import { TierUnavailableError, resolveTier } from "@/core/resolve-tier"; -import type { SimpleModeTier } from "@/tools/organization/schema"; +import { resolveAgentTier } from "@/ai-providers/agent-tiers"; +import type { ChatTier, SimpleModeTier } from "@/tools/organization/schema"; import { posthog } from "@/posthog"; import { consumeStream, @@ -36,8 +37,18 @@ import { import { StreamRequestSchema } from "./schemas"; import type { ChatMessage, ModelsConfig } from "./types"; import type { DispatchRunInput } from "./dispatch-run"; +import { resolveHarnessId } from "./dispatch-run"; import { enqueueThreadRun } from "@/dispatch-queue"; import { wrapWithSseKeepalive } from "./sse-keepalive"; +import type { LinkRegistry } from "../../../links/link-registry"; +import { resolveDispatchTarget } from "../../../links/resolve-dispatch-target"; +import { ensureVm } from "@/tools/vm/start"; +import { + resolveSandboxProviderKindFromEnv, + type SandboxProviderKind, +} from "@decocms/sandbox/provider"; +import { resolveDefaultSandboxProviderKind } from "@/sandbox/resolve-default-provider-kind"; +import type { HarnessId } from "@/harnesses"; // ============================================================================ // Idempotency @@ -88,6 +99,29 @@ async function validateRequest( }; } +/** + * Look up the providerId for the credential the request would use, so + * POST /messages can pick the right harness (and therefore the right + * link capability) before enqueuing onto the thread gate. Returns + * undefined when the credential row isn't found — the caller falls back + * to "decopilot" (matches the existing prepareRun behavior). + */ +async function resolveProviderId( + ctx: MeshContext, + credentialId: string, + organizationId: string, +): Promise { + try { + const row = await ctx.storage.aiProviderKeys.findById( + credentialId, + organizationId, + ); + return row?.providerId; + } catch { + return undefined; + } +} + // ============================================================================ // Per-Request Model Resolution // ============================================================================ @@ -134,16 +168,51 @@ async function tryResolveTier(ctx: MeshContext, tier: SimpleModeTier) { } /** - * Resolves a tier (defaulting to "smart") to a full ModelsConfig via the - * shared resolveTier(), which falls back to curated provider defaults when - * the org's tier slot is unset. Also resolves the "image" and "web_research" - * tiers — when present they enable the generate_image and web_search - * built-in tools (registration is conditional in built-in-tools/index.ts). + * Resolves a tier (defaulting to "smart") to a full ModelsConfig. + * + * Two paths: + * + * - **Decopilot:** goes through `resolveTier()`, which consults the org's + * AI provider keys + simple-mode slot configuration. Also resolves + * `image` and `web_research` tiers — when present they enable the + * `generate_image` and `web_search` built-in tools. + * + * - **Laptop-CLI harnesses (`claude-code`, `codex`):** the model lives + * on the user's laptop, not in any AI provider key. We synthesize the + * ModelsConfig from the agent's hardcoded tier map (`agent-tiers.ts`). + * The `credentialId` is a sentinel — the harness reads `models.thinking.id` + * to know which CLI sub-command to invoke and ignores the credential. + * `image` / `web_research` are not supported in this path; the + * corresponding built-in tools stay unregistered. */ async function resolvePerRequestModels( ctx: MeshContext, tier: SimpleModeTier | undefined, + harnessId: HarnessId | null | undefined, ): Promise { + if (harnessId === "claude-code" || harnessId === "codex") { + const chatTier: ChatTier = + tier === "fast" || tier === "smart" || tier === "thinking" + ? tier + : "smart"; + const entry = resolveAgentTier(harnessId, chatTier); + if (!entry) { + // Should be unreachable — resolveAgentTier returns non-null for + // both supported CLI harnesses and every ChatTier value. + throw new Error( + `No model mapping for harness "${harnessId}" tier "${chatTier}"`, + ); + } + return { + credentialId: `laptop:${harnessId}`, + thinking: { + id: entry.modelId, + title: entry.label, + provider: harnessId, + }, + }; + } + const [chat, image, webResearch] = await Promise.all([ resolveTier(ctx, tier ?? "smart"), tryResolveTier(ctx, "image"), @@ -177,7 +246,12 @@ async function resolvePerRequestModels( async function validate( c: Context<{ Variables: { meshContext: MeshContext } }>, threadIdParam: string | undefined, -): Promise { +): Promise< + DispatchRunInput & { + sandboxProviderKind?: SandboxProviderKind | null; + harnessId?: HarnessId | null; + } +> { const ctx = c.get("meshContext"); const { @@ -192,6 +266,8 @@ async function validate( branch, toolApprovalLevel, mode, + sandboxProviderKind, + harnessId, } = await validateRequest(c); const bodyThreadId = thread_id ?? memoryConfig?.thread_id; @@ -207,7 +283,7 @@ async function validate( throw new HTTPException(401, { message: "User ID is required" }); } - const models = await resolvePerRequestModels(ctx, tier); + const models = await resolvePerRequestModels(ctx, tier, harnessId); const allowedModels = await fetchModelPermissions( ctx.db, @@ -239,6 +315,8 @@ async function validate( taskId: taskIdInput, windowSize: memoryConfig?.windowSize ?? DEFAULT_WINDOW_SIZE, branch: branch ?? null, + sandboxProviderKind: sandboxProviderKind ?? null, + harnessId: harnessId ?? null, }; } @@ -250,10 +328,17 @@ export interface DecopilotDeps { cancelBroadcast: CancelBroadcast; streamBuffer: StreamBuffer; runRegistry: RunRegistry; + /** + * Used to resolve the user's link daemon. POST /messages calls + * `resolveDispatchTarget` against this registry before enqueuing onto + * the thread gate so the cluster can reject early with 409 instead of + * silently queueing a run that would have nowhere to go. + */ + linkRegistry: LinkRegistry; } export function createDecopilotRoutes(deps: DecopilotDeps) { - const { cancelBroadcast, streamBuffer, runRegistry } = deps; + const { cancelBroadcast, streamBuffer, runRegistry, linkRegistry } = deps; const app = new Hono<{ Variables: { meshContext: MeshContext } }>(); // ============================================================================ @@ -309,6 +394,7 @@ export function createDecopilotRoutes(deps: DecopilotDeps) { app.post("/:org/decopilot/threads/:threadId/messages", async (c) => { try { + const ctx = c.get("meshContext"); const input = await validate(c, c.req.param("threadId")); const taskId = input.taskId; if (!taskId) { @@ -317,7 +403,106 @@ export function createDecopilotRoutes(deps: DecopilotDeps) { throw new HTTPException(400, { message: "threadId is required" }); } - const { abortSignal: _ignored, ...serializableRequest } = input; + // Resolve the dispatch target up-front so we can reject a + // request with 409 *before* enqueuing it onto the thread gate. + // Holding the link-online decision at POST time also keeps DBOS + // replay from rerouting the run if the daemon disconnects between + // enqueue and dispatch (the workflow body reads target directly off + // the serialized request). + // + // The thread row's (sandbox_provider_kind, harness_id) are the + // single source of truth for routing. Tolerate storage failure when + // loading the thread row — a missing/erroring row just means we fall + // back to the request body / default helpers (the canonical thread row + // is created by COLLECTION_THREADS_CREATE before the first POST, but + // legacy callers and tests may skip it). + let existingThread: Awaited< + ReturnType + > | null = null; + try { + existingThread = (await ctx.storage.threads?.get?.(taskId)) ?? null; + } catch { + existingThread = null; + } + + const branch = existingThread?.branch ?? input.branch ?? null; + if (!branch) { + throw new HTTPException(400, { + message: "thread has no branch pinned", + }); + } + + // Determine the pinned (kind, harness). If the thread row has them, + // use those. Otherwise this is the first message — derive defaults and + // persist to the thread row. + let pinnedKind = (existingThread?.sandbox_provider_kind ?? + null) as SandboxProviderKind | null; + + const providerId = await resolveProviderId( + ctx, + input.models.credentialId, + input.organizationId, + ); + const credentialHarness = resolveHarnessId(providerId); + + let pinnedHarness = (existingThread?.harness_id ?? + null) as HarnessId | null; + + if (!pinnedKind || !pinnedHarness) { + pinnedKind = + pinnedKind ?? + input.sandboxProviderKind ?? + (await resolveDefaultSandboxProviderKind(input.userId, { + linkRegistry, + resolveEnvKind: resolveSandboxProviderKindFromEnv, + })); + pinnedHarness = pinnedHarness ?? input.harnessId ?? credentialHarness; + + if (existingThread) { + try { + await ctx.storage.threads?.update?.(taskId, { + sandbox_provider_kind: pinnedKind, + harness_id: pinnedHarness, + }); + } catch (err) { + console.warn( + "[decopilot:messages] failed to persist thread pins", + err, + ); + } + } + } + + const vm = await ensureVm( + { + virtualMcpId: input.agent.id, + branch, + sandboxProviderKind: pinnedKind, + }, + ctx, + ); + + const target = await resolveDispatchTarget( + { harnessId: pinnedHarness, vm, userId: input.userId }, + { linkRegistry }, + ); + if (target.kind === "error") { + return c.json( + { + error: "link_unavailable", + code: target.reason, + activeCapabilities: target.activeCapabilities, + }, + 409, + ); + } + + const { abortSignal: _ignored, ...rest } = input; + const serializableRequest = { + ...rest, + target, + harnessId: pinnedHarness, + }; const lastMsg = input.messages[input.messages.length - 1]; const idempotencyKey = computeIdempotencyKey(lastMsg); const workflowID = idempotencyKey diff --git a/apps/mesh/src/api/routes/decopilot/schemas.ts b/apps/mesh/src/api/routes/decopilot/schemas.ts index 92becbfff5..da7c69e7b1 100644 --- a/apps/mesh/src/api/routes/decopilot/schemas.ts +++ b/apps/mesh/src/api/routes/decopilot/schemas.ts @@ -20,7 +20,7 @@ const MemoryConfigSchema = z.object({ thread_id: z.string(), }); -export const StreamRequestSchema = z.object({ +const baseStreamRequestSchema = z.object({ messages: z .array(UIMessageSchema) .min(1) @@ -43,9 +43,38 @@ export const StreamRequestSchema = z.object({ */ branch: z.string().nullish(), toolApprovalLevel: z.enum(["auto", "readonly"]).default("auto"), + sandboxProviderKind: z + .enum(["docker", "agent-sandbox", "remote-user"]) + .nullish() + .describe( + "Pinned on first message. Subsequent messages ignore this field (the thread row carries the pinned value).", + ), + harnessId: z + .enum(["claude-code", "codex", "decopilot"]) + .nullish() + .describe( + "Pinned on first message. Subsequent messages ignore this field.", + ), mode: z .enum(["default", "plan", "web-search", "gen-image"]) .default("default"), }); +// TODO(2026-06-20): remove this preprocessor once all clients have shipped +// without the runLocally field. See spec +// docs/superpowers/specs/2026-05-20-vm-as-runtime-identity-design.md. +export const StreamRequestSchema = z.preprocess((raw) => { + if (raw && typeof raw === "object" && !Array.isArray(raw)) { + const obj = raw as Record; + if ("runLocally" in obj) { + console.log("deprecated field runLocally", { + thread_id: obj.thread_id, + }); + const { runLocally: _drop, ...rest } = obj; + return rest; + } + } + return raw; +}, baseStreamRequestSchema); + export type StreamRequest = z.infer; diff --git a/apps/mesh/src/api/routes/decopilot/types.ts b/apps/mesh/src/api/routes/decopilot/types.ts index c52b312506..1dd62080d7 100644 --- a/apps/mesh/src/api/routes/decopilot/types.ts +++ b/apps/mesh/src/api/routes/decopilot/types.ts @@ -11,7 +11,7 @@ import type { LanguageModelV2 } from "@ai-sdk/provider"; import type { InferUITool, UIMessage } from "ai"; import type { ToolDefinition, UsageStats } from "@decocms/mesh-sdk"; import type { Metadata } from "@/web/components/chat/types"; -import type { BuiltInToolSet } from "../../../harnesses/decopilot/built-in-tools"; +import type { BuiltInToolSet } from "@/harnesses/decopilot/built-in-tools"; // ============================================================================ // Stream API Message Types diff --git a/apps/mesh/src/api/routes/vm-events-handler.ts b/apps/mesh/src/api/routes/vm-events-handler.ts index fab43a2828..c9ed4f09ff 100644 --- a/apps/mesh/src/api/routes/vm-events-handler.ts +++ b/apps/mesh/src/api/routes/vm-events-handler.ts @@ -9,12 +9,15 @@ import type { Context } from "hono"; import { streamSSE } from "hono/streaming"; -import { resolveRunnerKindFromEnv } from "@decocms/sandbox/runner"; -import type { ClaimPhase } from "@decocms/sandbox/runner"; +import { + resolveSandboxProviderKindFromEnv, + type SandboxProviderKind, + type SandboxProvider, +} from "@decocms/sandbox/provider"; +import type { ClaimPhase } from "@decocms/sandbox/provider/agent-sandbox"; import { subscribeLifecycle } from "../../sandbox/lifecycle"; -import type { SandboxRunner } from "@decocms/sandbox/runner"; import type { MeshContext } from "../../core/mesh-context"; -import { KyselySandboxRunnerStateStore } from "../../storage/sandbox-runner-state"; +import { KyselySandboxProviderStateStore } from "../../storage/sandbox-runner-state"; import { readVmMap, resolveVm } from "../../tools/vm/vm-map"; import type { Env } from "../hono-env"; @@ -40,7 +43,7 @@ const PROXY_OPEN_RETRY_DELAY_MS = 500; export interface VmEventsHandlerArgs { ctx: MeshContext; claimName: string; - runner: SandboxRunner; + runner: SandboxProvider; branch: string; userId: string; projectRef: string; @@ -57,15 +60,19 @@ export function handleVmEvents(c: Context, args: VmEventsHandlerArgs) { projectRef, virtualMcpMetadata, } = args; - const runnerKind = resolveRunnerKindFromEnv(); + const providerKind = resolveSandboxProviderKindFromEnv(); const existingVmEntry = resolveVm( readVmMap(virtualMcpMetadata), userId, branch, + providerKind, ); const expectingHandle = existingVmEntry?.vmId === claimName; - const existingRunnerKind = existingVmEntry?.runnerKind ?? null; + // Coalesce legacy kinds ("host", "freestyle") to the current env kind. + const rawKind = existingVmEntry?.sandboxProviderKind; + const existingProviderKind: SandboxProviderKind | null = + rawKind === "host" || rawKind === "freestyle" ? null : (rawKind ?? null); c.header("X-Accel-Buffering", "no"); c.header("Content-Encoding", "identity"); @@ -92,7 +99,7 @@ export function handleVmEvents(c: Context, args: VmEventsHandlerArgs) { claimName, userId, projectRef, - runnerKind: existingRunnerKind ?? runnerKind, + sandboxProviderKind: existingProviderKind ?? providerKind, }); await stream.writeSSE({ event: "gone", data: "" }).catch(() => {}); return; @@ -122,7 +129,7 @@ export function handleVmEvents(c: Context, args: VmEventsHandlerArgs) { } async function isStaleHandle( - runner: SandboxRunner, + runner: SandboxProvider, claimName: string, ): Promise { try { @@ -140,13 +147,14 @@ async function isStaleHandle( async function cleanupStaleEntry(args: { ctx: MeshContext; - runner: SandboxRunner; + runner: SandboxProvider; claimName: string; userId: string; projectRef: string; - runnerKind: "host" | "docker" | "agent-sandbox"; + sandboxProviderKind: SandboxProviderKind; }): Promise { - const { ctx, runner, claimName, userId, projectRef, runnerKind } = args; + const { ctx, runner, claimName, userId, projectRef, sandboxProviderKind } = + args; try { await runner.delete(claimName); } catch (err) { @@ -157,11 +165,11 @@ async function cleanupStaleEntry(args: { ); } try { - const stateStore = new KyselySandboxRunnerStateStore(ctx.db); - await stateStore.delete({ userId, projectRef }, runnerKind); + const stateStore = new KyselySandboxProviderStateStore(ctx.db); + await stateStore.delete({ userId, projectRef }, sandboxProviderKind); } catch (err) { console.warn( - `[vm-events] sandbox_runner_state delete failed for ${userId}/${projectRef}/${runnerKind}: ${ + `[vm-events] sandbox_runner_state delete failed for ${userId}/${projectRef}/${sandboxProviderKind}: ${ err instanceof Error ? err.message : String(err) }`, ); @@ -171,7 +179,7 @@ async function cleanupStaleEntry(args: { async function emitLifecycle(args: { stream: import("hono/streaming").SSEStreamingApi; claimName: string; - runner: SandboxRunner; + runner: SandboxProvider; signal: AbortSignal; }): Promise { const { stream, claimName, runner, signal } = args; @@ -223,7 +231,7 @@ async function emitLifecycle(args: { async function proxyDaemonEvents(args: { stream: import("hono/streaming").SSEStreamingApi; - runner: SandboxRunner; + runner: SandboxProvider; claimName: string; signal: AbortSignal; }): Promise { diff --git a/apps/mesh/src/api/routes/vm-events.ts b/apps/mesh/src/api/routes/vm-events.ts index 98ed5fceff..8d0aa1bb95 100644 --- a/apps/mesh/src/api/routes/vm-events.ts +++ b/apps/mesh/src/api/routes/vm-events.ts @@ -38,12 +38,16 @@ import { Hono } from "hono"; import { streamSSE } from "hono/streaming"; import { composeSandboxRef, - resolveRunnerKindFromEnv, -} from "@decocms/sandbox/runner"; -import type { ClaimPhase } from "@decocms/sandbox/runner"; + resolveSandboxProviderKindFromEnv, +} from "@decocms/sandbox/provider"; +import type { + ClaimPhase, + SandboxProviderKind, +} from "@decocms/sandbox/provider"; import { computeClaimHandle } from "../../sandbox/claim-handle"; import { getOrInitSharedRunner, + getSharedSandboxProvider, subscribeLifecycle, } from "../../sandbox/lifecycle"; import { @@ -52,7 +56,7 @@ import { requireOrganization, type MeshContext, } from "../../core/mesh-context"; -import { KyselySandboxRunnerStateStore } from "../../storage/sandbox-runner-state"; +import { KyselySandboxProviderStateStore } from "../../storage/sandbox-runner-state"; import { readVmMap, resolveVm } from "../../tools/vm/vm-map"; import type { Env } from "../hono-env"; @@ -111,7 +115,7 @@ export const createVmEventsRoutes = () => { branch, }); const claimName = computeClaimHandle({ userId, projectRef }, branch); - const runnerKind = resolveRunnerKindFromEnv(); + const providerKind = resolveSandboxProviderKindFromEnv(); // Snapshot vmMap from the same metadata read used for the org-ownership // check. Used below to gate the stale-handle probe: we only run it when @@ -125,14 +129,32 @@ export const createVmEventsRoutes = () => { readVmMap(virtualMcp.metadata as Record | null), userId, branch, + providerKind, ); const expectingHandle = existingVmEntry?.vmId === claimName; - const existingRunnerKind = existingVmEntry?.runnerKind ?? null; - const runner = await getOrInitSharedRunner(); + // User-scoped resolution: for `remote-user` this picks the acting + // user's link daemon; for other kinds it falls through to the env + // singleton. Wrapped in a try because the remote-user path throws a + // typed error when no link is registered — in that case we want to + // emit a `failed` phase with a user-actionable message, NOT fall + // through to `getOrInitSharedRunner()` (which would re-throw the + // same error from `instantiate("remote-user")`). + let runner: Awaited> | null; + let resolveError: Error | null = null; + try { + runner = await getSharedSandboxProvider(ctx); + } catch (err) { + resolveError = err instanceof Error ? err : new Error(String(err)); + // For non-remote-user kinds, try the env singleton as a last + // resort. For remote-user the throw IS the answer. + if (providerKind !== "remote-user") { + runner = await getOrInitSharedRunner().catch(() => null); + } else { + runner = null; + } + } - // No runner configured at all → can't proxy daemon SSE. Surface a failed - // phase rather than a silent close so the UI shows a meaningful error. if (!runner) { return streamSSE(c, async (stream) => { await stream.writeSSE({ @@ -140,7 +162,9 @@ export const createVmEventsRoutes = () => { data: JSON.stringify({ kind: "failed", reason: "unknown", - message: "No sandbox runner configured on this mesh.", + message: + resolveError?.message ?? + "No sandbox runner configured on this mesh.", } satisfies ClaimPhase), }); }); @@ -178,7 +202,7 @@ export const createVmEventsRoutes = () => { claimName, userId, projectRef, - runnerKind: existingRunnerKind ?? runnerKind, + sandboxProviderKind: providerKind, }); await stream.writeSSE({ event: "gone", data: "" }).catch(() => {}); return; @@ -239,7 +263,7 @@ async function isStaleHandle( * * `runner.delete` is idempotent: it 404-tolerantly tries to delete the * SandboxClaim, closes any forwarder, drops in-memory + state-store rows. - * The runner-kind dispatch matches the *prior* kind (existingRunnerKind) + * The provider-kind dispatch matches the *prior* kind (existingRunnerKind) * so we don't leave behind rows in the wrong table when the env's runner * has flipped between starts and stops. * @@ -263,9 +287,16 @@ async function cleanupStaleEntry(args: { claimName: string; userId: string; projectRef: string; - runnerKind: "host" | "docker" | "agent-sandbox"; + sandboxProviderKind: SandboxProviderKind; }): Promise { - const { ctx, runner, claimName, userId, projectRef, runnerKind } = args; + const { + ctx, + runner, + claimName, + userId, + projectRef, + sandboxProviderKind: providerKind, + } = args; try { await runner.delete(claimName); } catch (err) { @@ -276,11 +307,11 @@ async function cleanupStaleEntry(args: { ); } try { - const stateStore = new KyselySandboxRunnerStateStore(ctx.db); - await stateStore.delete({ userId, projectRef }, runnerKind); + const stateStore = new KyselySandboxProviderStateStore(ctx.db); + await stateStore.delete({ userId, projectRef }, providerKind); } catch (err) { console.warn( - `[vm-events] sandbox_runner_state delete failed for ${userId}/${projectRef}/${runnerKind}: ${ + `[vm-events] sandbox_runner_state delete failed for ${userId}/${projectRef}/${providerKind}: ${ err instanceof Error ? err.message : String(err) }`, ); diff --git a/apps/mesh/src/api/routes/vm-proxy.ts b/apps/mesh/src/api/routes/vm-proxy.ts index 0a78692dbf..28ed9da125 100644 --- a/apps/mesh/src/api/routes/vm-proxy.ts +++ b/apps/mesh/src/api/routes/vm-proxy.ts @@ -14,10 +14,20 @@ import { Hono, type Context } from "hono"; import { streamSSE } from "hono/streaming"; import { createMiddleware } from "hono/factory"; -import { composeSandboxRef } from "@decocms/sandbox/runner"; -import type { ClaimPhase, SandboxRunner } from "@decocms/sandbox/runner"; +import { + composeSandboxRef, + resolveSandboxProviderKindFromEnv, +} from "@decocms/sandbox/provider"; +import type { + SandboxProvider, + SandboxProviderKind, +} from "@decocms/sandbox/provider"; +import type { ClaimPhase } from "@decocms/sandbox/provider/agent-sandbox"; import { computeClaimHandle } from "../../sandbox/claim-handle"; -import { getOrInitSharedRunner } from "../../sandbox/lifecycle"; +import { + getOrInitSharedRunner, + getSharedSandboxProvider, +} from "../../sandbox/lifecycle"; import { getUserId, requireAuth, @@ -33,7 +43,7 @@ import { readValidatedRuntimeEnv } from "../../tools/vm/helpers"; interface VmClaim { claimName: string; /** Null when no sandbox runner is configured on this mesh instance. */ - runner: SandboxRunner | null; + runner: SandboxProvider | null; virtualMcpId: string; branch: string; userId: string; @@ -84,7 +94,29 @@ const resolveVmClaim = createMiddleware(async (c, next) => { branch, }); const claimName = computeClaimHandle({ userId, projectRef }, branch); - const runner = await getOrInitSharedRunner(); + + // User-scoped resolution: for `remote-user` this picks the acting user's + // link daemon via `ctx.linkRegistry`; for `docker` / `agent-sandbox` it + // returns the cached env-resolved singleton. Falls back to the env + // singleton ONLY for non-`remote-user` kinds — for `remote-user` the + // throw IS the answer (no link daemon → `requireRunner`/events handler + // surface the 503/`failed` phase). Pattern mirrors the legacy + // `/api/vm-events` handler at vm-events.ts:143-156. + let providerKind: SandboxProviderKind | null; + try { + providerKind = resolveSandboxProviderKindFromEnv(); + } catch { + providerKind = null; + } + let runner: SandboxProvider | null; + try { + runner = await getSharedSandboxProvider(ctx); + } catch { + runner = + providerKind === "remote-user" + ? null + : await getOrInitSharedRunner().catch(() => null); + } c.set("vmClaim", { claimName, @@ -100,7 +132,7 @@ const resolveVmClaim = createMiddleware(async (c, next) => { }); /** Guard for routes that need a non-null runner. Returns the runner or a 503. */ -function requireRunner(c: Context): SandboxRunner | Response { +function requireRunner(c: Context): SandboxProvider | Response { const { runner } = c.get("vmClaim"); if (!runner) { return c.json({ error: "No sandbox runner configured" }, 503); diff --git a/apps/mesh/src/api/watch.test.ts b/apps/mesh/src/api/watch.test.ts index 928c2e112a..fea0ad51cb 100644 --- a/apps/mesh/src/api/watch.test.ts +++ b/apps/mesh/src/api/watch.test.ts @@ -137,6 +137,8 @@ function makeThread(overrides: Partial): Thread { run_started_at: overrides.run_started_at ?? null, virtual_mcp_id: overrides.virtual_mcp_id ?? "", branch: overrides.branch ?? null, + sandbox_provider_kind: overrides.sandbox_provider_kind ?? null, + harness_id: overrides.harness_id ?? null, metadata: overrides.metadata ?? {}, created_at: overrides.created_at ?? "2026-01-01T00:00:00.000Z", updated_at: overrides.updated_at ?? "2026-01-01T00:00:00.000Z", diff --git a/apps/mesh/src/auth/dev-link-session.ts b/apps/mesh/src/auth/dev-link-session.ts new file mode 100644 index 0000000000..c660ecf37f --- /dev/null +++ b/apps/mesh/src/auth/dev-link-session.ts @@ -0,0 +1,150 @@ +/** + * Dev-only helper to bootstrap an OAuth-ish session file for the + * laptop-side link daemon when it auto-spawns out of `bun run dev`. + * + * In production the link reads a real OAuth session minted by + * `decocms auth login`. Locally we have no such login flow — the dev + * cluster boots in `localMode`, seeds an admin user, and is the only + * thing the link talks to. To avoid asking the developer to manually + * `auth login` against their own dev cluster, we mint a Better Auth + * API key for the local admin user at first dev boot and serialize it + * in the `Session` shape `apps/mesh/src/link-daemon/session.ts` expects. + * + * The link then presents that API key as a Bearer token to + * `POST /api/links`. The cluster's `getAuthenticatedUserSub` resolves + * it via `auth.api.verifyApiKey` and returns the local admin's userSub. + * + * Idempotent — exits early if the session file already exists. + * + * Only runs when `MESH_ALLOW_LOCALHOST_LINKS=1` is set, which we + * default to in `bun run dev`. Production never sets that flag. + */ + +import { existsSync } from "node:fs"; +import { chmod, mkdir, rename, writeFile } from "node:fs/promises"; +import { dirname, join } from "node:path"; +import { getDb } from "@/database"; +import { auth } from "./index"; + +export interface DevLinkSession { + target: string; + clientId: string; + user: { sub: string; email?: string; name?: string }; + accessToken: string; + createdAt: string; +} + +function devLinkDataDir(homeDir: string): string { + return join(homeDir, "dev-link"); +} + +function devLinkSessionPath(homeDir: string): string { + // `DEV_LINK_SESSION_PATH` is set by the dev CLI (`apps/mesh/src/cli/commands/dev.ts`) + // so the cluster writes session.json to the same tmpdir-based location + // the auto-spawned link daemon reads from. Falls back to the legacy + // `/dev-link/session.json` for callers that haven't migrated + // yet (manual dev:server invocations, tests). + const override = process.env.DEV_LINK_SESSION_PATH; + if (override && override.trim() !== "") return override; + return join(devLinkDataDir(homeDir), "session.json"); +} + +/** + * Mint a dev session for the link daemon so it can register against the + * local cluster without a real OAuth flow. Writes to + * `/dev-link/session.json` (mode 0600). Idempotent — returns + * the existing path if the file is already there. + * + * Returns the session path on success, or null when bootstrap could not + * complete (no admin user yet, API key mint failed). The caller is + * expected to surface a warning and skip auto-spawning the link rather + * than crashing dev boot. + */ +export async function bootstrapDevLinkSession( + homeDir: string, + clusterBaseUrl: string, +): Promise<{ path: string; userSub: string } | null> { + const path = devLinkSessionPath(homeDir); + if (existsSync(path)) { + // Re-use the existing session file iff its API key still verifies. + // If the key was deleted/rotated/expired (e.g. DB wipe, manual + // cleanup, expiresIn lapsed) the link daemon fails to register with + // an opaque 500 "no session". Verifying up front and re-minting on + // failure makes restarts self-healing without the developer having + // to manually delete the file. + try { + const file = Bun.file(path); + const json = (await file.json()) as { + user?: { sub?: string }; + accessToken?: string; + }; + const sub = json.user?.sub; + const key = json.accessToken; + if ( + typeof sub === "string" && + sub.length > 0 && + typeof key === "string" + ) { + const verified = await auth.api + .verifyApiKey({ body: { key } }) + .then((r: { valid?: boolean } | null) => r?.valid === true) + .catch(() => false); + if (verified) return { path, userSub: sub }; + } + } catch { + // fall through and re-mint + } + } + + const db = getDb().db; + // Most-recently-created admin user — local-mode seeds exactly one, + // but we don't pin to the local-mode email so a hand-created admin + // also works when DECOCMS_LOCAL_MODE is off. + const user = await db + .selectFrom("user") + .select(["id", "email", "name"]) + .orderBy("createdAt", "desc") + .executeTakeFirst(); + if (!user?.id) return null; + + let apiKey: { key?: string } | null = null; + try { + apiKey = (await auth.api.createApiKey({ + body: { + name: "dev-link (auto-minted by bun run dev)", + userId: user.id, + // 30 days — re-minted on file deletion, far longer than any + // single dev session. + expiresIn: 60 * 60 * 24 * 30, + rateLimitEnabled: false, + }, + })) as { key?: string } | null; + } catch (err) { + console.warn( + "[dev-link] failed to mint API key:", + err instanceof Error ? err.message : String(err), + ); + return null; + } + if (!apiKey?.key) return null; + + const session: DevLinkSession = { + target: clusterBaseUrl, + clientId: "dev-link", + user: { + sub: user.id, + ...(user.email ? { email: user.email } : {}), + ...(user.name ? { name: user.name } : {}), + }, + accessToken: apiKey.key, + createdAt: new Date().toISOString(), + }; + + await mkdir(dirname(path), { recursive: true }); + const tmp = `${path}.tmp`; + await writeFile(tmp, JSON.stringify(session, null, 2), { mode: 0o600 }); + await chmod(tmp, 0o600); + await rename(tmp, path); + + return { path, userSub: user.id }; +} diff --git a/apps/mesh/src/cli.ts b/apps/mesh/src/cli.ts index c8b13e9ce1..8918b6c934 100644 --- a/apps/mesh/src/cli.ts +++ b/apps/mesh/src/cli.ts @@ -56,6 +56,14 @@ const { values, positionals } = parseArgs({ type: "boolean", default: false, }, + "local-sandbox-provider": { + type: "boolean", + default: false, + }, + "no-tunnel": { + type: "boolean", + default: false, + }, vibe: { type: "boolean", default: false, @@ -77,7 +85,7 @@ Usage: deco services Manage services (Postgres, NATS) deco init Scaffold a new MCP app deco auth Manage CLI authentication - deco link [options] [-- ] Tunnel a local port to a stable deco.host URL + deco link [options] Start the laptop-side link daemon deco completion [shell] Install shell completions Server Options: @@ -91,17 +99,17 @@ Server Options: -v, --version Show version Dev Options: - --vite-port Vite dev server port (default: 4000) - --base-url Base URL for the server + --vite-port Vite dev server port (default: 4000) + --base-url Base URL for the server + --local-sandbox-provider Auto-spawn the local link daemon (remote-user sandbox provider) Auth Options: --target Decocms target (default: https://studio.decocms.com) Link Options: - -p, --port Local port to tunnel (default: 8787) - -e, --env Env var to inject the tunnel URL into when spawning - a child command (default: BASE_URL) - -- Optional command to spawn after the tunnel opens + --port Local port for the daemon (default: 5174) + --no-tunnel Skip Warp tunnel (requires MESH_ALLOW_LOCALHOST_LINKS=1 + on the cluster) Environment Variables: PORT Port to listen on (default: 3000) @@ -118,8 +126,6 @@ Examples: deco init my-app Scaffold a new MCP app deco auth login Log in to studio.decocms.com deco auth whoami Show current session - deco link -p 3000 -- bun dev Tunnel localhost:3000, run "bun dev" - deco link -p 8787 Tunnel an already-running service on 8787 Documentation: https://decocms.com/studio @@ -229,34 +235,23 @@ if (command === "auth") { // ── Link command ─────────────────────────────────────────────────────── if (command === "link") { - const dataDir = resolveDataDir(); - const port = Number(values.port); - if (!Number.isInteger(port) || port <= 0) { - console.error(`Invalid --port value: ${values.port}`); - process.exit(1); - } - const env = values.env ?? "BASE_URL"; - - // Trailing args after `--` are the run command. parseArgs gives us positionals - // including everything after `--`; we re-derive the boundary from the raw argv. - const dashDashIdx = process.argv.indexOf("--"); - const runCommand = - dashDashIdx >= 0 ? process.argv.slice(dashDashIdx + 1) : []; - - const { linkCommand } = await import("./cli/commands/link"); - const result = linkCommand({ - cwd: process.cwd(), - dataDir, - port, - env, - runCommand, + const { runLinkCommand } = await import("./cli/commands/link"); + // The top-level `parseArgs` declares `--port` with a default of 3000 + // (for the server command). Only honor it for `deco link` if the user + // actually passed `--port`/`-p` on the command line — otherwise + // `runLinkCommand` falls back to the daemon's own default of 5174. + const portExplicit = process.argv.some( + (a) => + a === "--port" || + a === "-p" || + a.startsWith("--port=") || + a.startsWith("-p="), + ); + const code = await runLinkCommand({ + port: portExplicit ? Number(values.port) : undefined, + noTunnel: values["no-tunnel"] === true, }); - - // Forward Ctrl-C to the link command for graceful shutdown. - process.on("SIGINT", () => void result.cancel()); - process.on("SIGTERM", () => void result.cancel()); - - process.exit(await result.exit); + process.exit(code); } // ── Dev command (Ink TUI + dev servers) ───────────────────────────────── @@ -269,6 +264,7 @@ if (command === "dev") { const noTui = values["no-tui"] === true || !process.stdout.isTTY; + const localSandboxProvider = values["local-sandbox-provider"] === true; const devOptions = { port: values.port!, vitePort: values["vite-port"]!, @@ -277,6 +273,7 @@ if (command === "dev") { skipMigrations: values["skip-migrations"] === true, noTui, localMode: values["no-local-mode"] !== true, + localSandboxProvider, }; if (noTui) { @@ -305,7 +302,7 @@ if (command === "dev") { const { setDevMode, setVibe, setDataDir } = await import("./cli/cli-store"); const displayHome = decoHome.replace(homedir(), "~"); - setDevMode(); + setDevMode({ localSandboxProvider }); setDataDir(decoHome); render(createElement(App, { home: displayHome }), { patchConsole: false, diff --git a/apps/mesh/src/cli/cli-store.ts b/apps/mesh/src/cli/cli-store.ts index dde4eae625..2a806a336d 100644 --- a/apps/mesh/src/cli/cli-store.ts +++ b/apps/mesh/src/cli/cli-store.ts @@ -82,10 +82,20 @@ export function addLogEntry(entry: LogEntry) { emit(); } -export function setDevMode() { +export function setDevMode(opts: { localSandboxProvider?: boolean } = {}) { state = { ...state, - services: [...state.services, { name: "Vite", status: "pending", port: 0 }], + services: [ + ...state.services, + { name: "Vite", status: "pending", port: 0 }, + // Auto-spawned by `bun run dev --local-sandbox-provider` after the + // cluster is up — see apps/mesh/src/cli/commands/dev.ts. The + // remote-user sandbox provider routes through this. Marked ready + // once the link binary's HTTP server begins accepting connections. + ...(opts.localSandboxProvider + ? [{ name: "Sandbox", status: "pending" as const, port: 0 }] + : []), + ], }; emit(); } diff --git a/apps/mesh/src/cli/commands/completion.ts b/apps/mesh/src/cli/commands/completion.ts index 2be1fc1f14..723e3baeaf 100644 --- a/apps/mesh/src/cli/commands/completion.ts +++ b/apps/mesh/src/cli/commands/completion.ts @@ -23,7 +23,7 @@ _deco_completion() { return 0 ;; *) - COMPREPLY=($(compgen -W "init completion --help --version --port --home --skip-migrations --no-tui --no-local-mode" -- "$cur")) + COMPREPLY=($(compgen -W "init completion --help --version --port --home --skip-migrations --no-tui --no-local-mode --local-sandbox-provider" -- "$cur")) ;; esac } @@ -48,6 +48,7 @@ _deco() { '--skip-migrations[Skip database migrations]' \\ '--no-tui[Disable Ink UI]' \\ '--no-local-mode[Disable auto-login]' \\ + '--local-sandbox-provider[Auto-spawn the local link daemon]' \\ '-h[Show help]' \\ '--help[Show help]' \\ '-v[Show version]' \\ diff --git a/apps/mesh/src/cli/commands/dev.ts b/apps/mesh/src/cli/commands/dev.ts index f72af1cddd..c8fa125522 100644 --- a/apps/mesh/src/cli/commands/dev.ts +++ b/apps/mesh/src/cli/commands/dev.ts @@ -5,6 +5,7 @@ * buildSettings(). Spawns dev servers and reports progress via the CLI * store so the Ink UI can update live. */ +import { tmpdir } from "node:os"; import { join } from "path"; import type { Subprocess } from "bun"; import { buildSettings } from "../../settings/pipeline"; @@ -16,6 +17,7 @@ import { updateService, } from "../cli-store"; import { findAvailablePort } from "../find-available-port"; +import { waitForPort } from "../lib/port-wait"; export interface DevOptions { port: string; @@ -25,6 +27,10 @@ export interface DevOptions { skipMigrations: boolean; noTui?: boolean; localMode: boolean; + /** When true, auto-spawn the link daemon (`deco link`) so the + * remote-user sandbox provider has a live target. Default false — + * `dev:conductor` opts in. */ + localSandboxProvider: boolean; } // Strip ANSI escape codes from a string @@ -114,6 +120,20 @@ export async function startDevServer( // import.meta.dir = apps/mesh/src/cli/commands → go up 5 levels to repo root const repoRoot = join(import.meta.dir, "..", "..", "..", "..", ".."); + // Pre-compute the link's data dir so the cluster's `bootstrapDevLinkSession` + // can write `session.json` to the exact path the link will read. The dir + // lives in tmpdir — NOT under settings.dataDir, which is inside the + // mesh repo. Sandbox clones go into `/.deco/link/sandboxes//repo`; + // when that parent is itself a git repo (e.g. `~/code/mesh/.deco/...`) + // git's parent-walk hits the outer .git, refuses to clone, and the + // daemon crashes mid-bootstrap. Keying by workspace slug isolates + // concurrent worktrees. + const slug = + process.env.WORKTREE_SLUG ?? + process.env.CONDUCTOR_WORKSPACE_NAME ?? + "default"; + const linkDataDir = join(tmpdir(), `decocms-dev-link-${slug}`); + // When TUI is active, pipe stdout/stderr so child output doesn't corrupt // Ink's cursor-based rendering. Lines are fed into the CLI store instead. const useInherit = noTui === true; @@ -130,6 +150,13 @@ export async function startDevServer( DECOCMS_HOME: settings.dataDir, DATA_DIR: settings.dataDir, DECO_CLI: "1", + // Auto-enable the link's localhost registration path. The cluster + // route honors `tunnelUrl: http://localhost:*` only when this flag + // is set. Production never sets it. + MESH_ALLOW_LOCALHOST_LINKS: "1", + // Tell the cluster where to write the dev-link session file so the + // auto-spawned link binary finds it at boot. + DEV_LINK_SESSION_PATH: join(linkDataDir, "session.json"), ...(settings.baseUrl ? { BASE_URL: settings.baseUrl } : {}), }, stdio: [ @@ -148,13 +175,109 @@ export async function startDevServer( setServerUrl(serverUrl); updateService({ name: "Vite", status: "ready", port: Number(vitePort) }); + // ── Auto-spawn `deco link --no-tunnel` (opt-in) ─────────────────── + // Gated on --local-sandbox-provider. When set, once the cluster is up + // on :PORT, spawn the link daemon so the dev session exercises the + // remote-cli + remote-user code paths end-to-end. The link reads its + // session from /dev-link/session.json (auto-minted by the + // cluster on first boot — see apps/mesh/src/auth/dev-link-session.ts). + const linkPort = 5174; + const linkChild: Promise = !options.localSandboxProvider + ? Promise.resolve(null) + : (async (): Promise => { + try { + await waitForPort(Number(settings.port), { intervalMs: 500 }); + } catch (err) { + addLogEntry({ + method: "", + path: "", + status: 0, + duration: 0, + timestamp: new Date(), + rawLine: `[link] gave up waiting for cluster on :${settings.port}: ${ + err instanceof Error ? err.message : String(err) + }`, + }); + return null; + } + const proc = Bun.spawn( + [ + "bun", + "run", + "--cwd=apps/mesh", + "src/cli.ts", + "link", + "--no-tunnel", + "--port", + String(linkPort), + ], + { + cwd: repoRoot, + env: { + ...process.env, + MESH_CLUSTER_URL: serverUrl, + MESH_ALLOW_LOCALHOST_LINKS: "1", + // DATA_DIR lives OUTSIDE the mesh repo. The daemon clones + // user repos into `/.deco/link/sandboxes//repo`; + // if that path is nested under another git repo (this one) + // git's parent-walk hits the outer .git, refuses to clone, + // and the daemon crashes mid-bootstrap. Use a tmpdir-rooted + // path keyed by the workspace slug so concurrent worktrees + // don't fight over the same sandboxes dir. + DATA_DIR: linkDataDir, + DECOCMS_HOME: linkDataDir, + }, + stdio: [ + "inherit", + useInherit ? "inherit" : "pipe", + useInherit ? "inherit" : "pipe", + ], + }, + ); + if (!useInherit) { + pipeToLogStore(proc.stdout as ReadableStream); + pipeToLogStore(proc.stderr as ReadableStream); + } + // Mark Sandbox ready once the link binary's HTTP server accepts + // connections on its port. Fire-and-forget; if the link never + // comes up (e.g. no admin user yet for session bootstrap), the + // status stays "pending" and the user sees a spinner — useful + // signal that something's wrong rather than silent failure. + void waitForPort(linkPort, { intervalMs: 500 }) + .then(() => { + updateService({ name: "Sandbox", status: "ready", port: linkPort }); + }) + .catch(() => { + /* link never came up — leave status pending as a signal */ + }); + return proc; + })(); + const shutdown = async (signal: NodeJS.Signals) => { + // Kill the link child first — it talks to the cluster on shutdown + // (DELETE /api/links/me), so giving it a window before we tear down + // the API server reduces orphaned registry entries. + const link = await linkChild.catch(() => null); + if (link) { + try { + link.kill(signal); + } catch { + /* already gone */ + } + } child.kill(signal); // Wait for the server to finish graceful shutdown before killing shared // services. Otherwise pg dies mid-flight and DBOS / app.shutdown error // out connecting to a dead system DB. The server has its own 55s force- // exit timer, so this won't hang indefinitely. await child.exited; + if (link) { + try { + await link.exited; + } catch { + /* ignore */ + } + } if (managedServiceNames.length > 0) { const { stopServices } = await import("../../services/ensure-services"); await stopServices(settings.dataDir); diff --git a/apps/mesh/src/cli/commands/link.test.ts b/apps/mesh/src/cli/commands/link.test.ts deleted file mode 100644 index db5ad4f50d..0000000000 --- a/apps/mesh/src/cli/commands/link.test.ts +++ /dev/null @@ -1,346 +0,0 @@ -import { - afterEach, - beforeEach, - describe, - expect, - it, - mock, - spyOn, -} from "bun:test"; -import { mkdtemp, rm, writeFile } from "node:fs/promises"; -import { tmpdir } from "node:os"; -import { join } from "node:path"; -import { computeAppDomain } from "../lib/app-domain"; -import { writeSession } from "../lib/session"; -import { linkCommand, type SpawnFn, type TunnelOpener } from "./link"; - -let dir: string; -let cwdDir: string; -let logSpy: ReturnType; - -async function makeProject(name: string): Promise { - const projectDir = await mkdtemp(join(tmpdir(), "deco-link-cwd-")); - await writeFile( - join(projectDir, "package.json"), - JSON.stringify({ name }, null, 2), - ); - return projectDir; -} - -beforeEach(async () => { - dir = await mkdtemp(join(tmpdir(), "deco-link-")); - logSpy = spyOn(console, "log").mockImplementation(() => {}); -}); - -afterEach(async () => { - logSpy.mockRestore(); - await rm(dir, { recursive: true, force: true }); - if (cwdDir) await rm(cwdDir, { recursive: true, force: true }); -}); - -describe("linkCommand", () => { - it("opens a tunnel to localhost-.deco.host with the session token", async () => { - cwdDir = await makeProject("my-app"); - await writeSession(dir, { - target: "https://studio.decocms.com", - clientId: "client_abc", - user: { sub: "u_1", email: "u@x" }, - accessToken: "tok_link", - createdAt: "2026-05-04T00:00:00.000Z", - }); - - const expectedDomain = computeAppDomain("u_1", "my-app"); - const tunnelOpener = mock(async (params) => { - expect(params.domain).toBe(expectedDomain); - expect(params.localAddr).toBe("http://127.0.0.1:8787"); - expect(params.apiKey).toBe("tok_link"); - expect(params.server).toBe(`wss://${expectedDomain}`); - return { closed: new Promise(() => {}), close: () => {} }; - }); - - const port = 8787; - // Pretend the port is already listening so waitForPort returns instantly. - const portWaiter = mock(async () => "127.0.0.1"); - - const result = linkCommand({ - cwd: cwdDir, - dataDir: dir, - port, - env: "BASE_URL", - runCommand: [], - tunnelOpener, - portWaiter, - copyClipboard: async () => true, - ensureSession: async () => null, // session is already present - }); - - // Give the command a tick to call tunnelOpener and reach the await on closed. - await new Promise((r) => setTimeout(r, 30)); - - expect(tunnelOpener).toHaveBeenCalledTimes(1); - - // Cleanup so the test actually finishes. - await result.cancel(); - }); - - it("auto-triggers ensureSession when no session is present", async () => { - cwdDir = await makeProject("my-app"); - const ensureSession = mock(async () => ({ - target: "https://studio.decocms.com", - clientId: "client_x", - user: { sub: "u", email: "u@x" }, - accessToken: "tok", - createdAt: "2026-05-04T00:00:00.000Z", - })); - const tunnelOpener = mock(async () => ({ - closed: new Promise(() => {}), - close: () => {}, - })); - - const result = linkCommand({ - cwd: cwdDir, - dataDir: dir, - port: 8787, - env: "BASE_URL", - runCommand: [], - tunnelOpener, - portWaiter: async () => "127.0.0.1", - copyClipboard: async () => false, - ensureSession, - }); - - await new Promise((r) => setTimeout(r, 30)); - expect(ensureSession).toHaveBeenCalledTimes(1); - expect(tunnelOpener).toHaveBeenCalledTimes(1); - await result.cancel(); - }); - - it("reconnects when the tunnel closes mid-session", async () => { - cwdDir = await makeProject("my-app"); - await writeSession(dir, { - target: "https://studio.decocms.com", - clientId: "client_x", - user: { sub: "u", email: "u@x" }, - accessToken: "tok", - createdAt: "2026-05-04T00:00:00.000Z", - }); - - let openCount = 0; - const tunnelOpener = mock(async () => { - openCount += 1; - // First call: a tunnel that closes immediately. Second: never closes. - if (openCount === 1) { - return { closed: Promise.resolve(), close: () => {} }; - } - return { closed: new Promise(() => {}), close: () => {} }; - }); - - const result = linkCommand({ - cwd: cwdDir, - dataDir: dir, - port: 8787, - env: "BASE_URL", - runCommand: [], - tunnelOpener, - portWaiter: async () => "127.0.0.1", - copyClipboard: async () => false, - ensureSession: async () => null, - reconnectDelayMs: 5, - }); - - // Allow time for the first tunnel to close and reconnect. - await new Promise((r) => setTimeout(r, 60)); - expect(openCount).toBeGreaterThanOrEqual(2); - await result.cancel(); - }); - - it("logs and retries when tunnelOpener throws (e.g. registration timeout)", async () => { - cwdDir = await makeProject("my-app"); - await writeSession(dir, { - target: "https://studio.decocms.com", - clientId: "client_x", - user: { sub: "u", email: "u@x" }, - accessToken: "tok", - createdAt: "2026-05-04T00:00:00.000Z", - }); - - const errMessages: string[] = []; - const errSpy = spyOn(console, "error").mockImplementation( - (msg: unknown) => { - errMessages.push(String(msg)); - }, - ); - - let openCount = 0; - const tunnelOpener = mock(async () => { - openCount += 1; - if (openCount === 1) { - throw new Error("Tunnel registration timed out after 15s"); - } - return { closed: new Promise(() => {}), close: () => {} }; - }); - - const result = linkCommand({ - cwd: cwdDir, - dataDir: dir, - port: 8787, - env: "BASE_URL", - runCommand: [], - tunnelOpener, - portWaiter: async () => "127.0.0.1", - copyClipboard: async () => false, - ensureSession: async () => null, - reconnectDelayMs: 5, - }); - - await new Promise((r) => setTimeout(r, 50)); - expect(openCount).toBeGreaterThanOrEqual(2); - expect( - errMessages.some((m) => - m.includes( - "Tunnel connect failed, retrying: Tunnel registration timed out", - ), - ), - ).toBe(true); - await result.cancel(); - errSpy.mockRestore(); - }); - - it("returns non-zero when package.json is missing a name", async () => { - cwdDir = await mkdtemp(join(tmpdir(), "deco-link-noname-")); - await writeFile(join(cwdDir, "package.json"), "{}"); - await writeSession(dir, { - target: "https://studio.decocms.com", - clientId: "client_x", - user: { sub: "u", email: "u@x" }, - accessToken: "tok", - createdAt: "2026-05-04T00:00:00.000Z", - }); - - const tunnelOpener = mock(async () => ({ - closed: new Promise(() => {}), - close: () => {}, - })); - const result = linkCommand({ - cwd: cwdDir, - dataDir: dir, - port: 8787, - env: "BASE_URL", - runCommand: [], - tunnelOpener, - portWaiter: async () => "127.0.0.1", - copyClipboard: async () => false, - ensureSession: async () => null, - }); - expect(await result.exit).not.toBe(0); - expect(tunnelOpener).toHaveBeenCalledTimes(0); - }); - - it("uses BASE_URL by default and respects the -e flag", async () => { - cwdDir = await makeProject("my-app"); - await writeSession(dir, { - target: "https://studio.decocms.com", - clientId: "client_x", - user: { sub: "u", email: "u@x" }, - accessToken: "tok", - createdAt: "2026-05-04T00:00:00.000Z", - }); - - let envSeen: NodeJS.ProcessEnv | undefined; - const childSpawn = mock((_cmd, _args, opts) => { - envSeen = opts.env; - return { - on: () => {}, - kill: () => {}, - exitCode: null, - } as unknown as import("node:child_process").ChildProcess; - }); - - const tunnelOpener = mock(async () => ({ - closed: new Promise(() => {}), - close: () => {}, - })); - - const result = linkCommand({ - cwd: cwdDir, - dataDir: dir, - port: 8787, - env: "MY_PUBLIC_URL", - runCommand: ["node", "server.js"], - tunnelOpener, - portWaiter: async () => "127.0.0.1", - copyClipboard: async () => false, - ensureSession: async () => null, - spawn: childSpawn, - }); - - await new Promise((r) => setTimeout(r, 30)); - expect(childSpawn).toHaveBeenCalledTimes(1); - expect(envSeen?.MY_PUBLIC_URL).toMatch( - /^https:\/\/localhost-[0-9a-f]{8}\.deco\.host$/, - ); - expect(envSeen?.BASE_URL).toBeUndefined(); - await result.cancel(); - }); - - it("stops reconnecting when the spawned child exits", async () => { - cwdDir = await makeProject("my-app"); - await writeSession(dir, { - target: "https://studio.decocms.com", - clientId: "client_x", - user: { sub: "u", email: "u@x" }, - accessToken: "tok", - createdAt: "2026-05-04T00:00:00.000Z", - }); - - const childExitHandlers: Array<(code: number | null) => void> = []; - const childSpawn: SpawnFn = mock(() => { - const fakeChild = { - on: (event: string, handler: (code: number | null) => void) => { - if (event === "exit") childExitHandlers.push(handler); - }, - kill: () => {}, - exitCode: null, - }; - return fakeChild as unknown as import("node:child_process").ChildProcess; - }); - - let openCount = 0; - const tunnelOpener: TunnelOpener = mock(async () => { - openCount += 1; - let resolveClosed!: () => void; - const closed = new Promise((r) => { - resolveClosed = r; - }); - return { closed, close: () => resolveClosed() }; - }); - - const result = linkCommand({ - cwd: cwdDir, - dataDir: dir, - port: 8787, - env: "BASE_URL", - runCommand: ["node", "server.js"], - tunnelOpener, - portWaiter: async () => "127.0.0.1", - copyClipboard: async () => false, - ensureSession: async () => null, - spawn: childSpawn, - reconnectDelayMs: 5, - }); - - // Wait for the first tunnel to open. - await new Promise((r) => setTimeout(r, 30)); - expect(openCount).toBe(1); - expect(childExitHandlers.length).toBe(1); - - // Simulate child crash. - childExitHandlers[0]?.(42); - - // The tunnel should close, the reconnect loop should NOT iterate again, - // and the exit code should be the child's exit code. - expect(await result.exit).toBe(42); - // Confirm we did not re-open after the child died. - expect(openCount).toBe(1); - }); -}); diff --git a/apps/mesh/src/cli/commands/link.ts b/apps/mesh/src/cli/commands/link.ts index 48e8234d7a..b933b072f0 100644 --- a/apps/mesh/src/cli/commands/link.ts +++ b/apps/mesh/src/cli/commands/link.ts @@ -1,252 +1,53 @@ -import { spawn as nodeSpawn, type ChildProcess } from "node:child_process"; -import { readFile } from "node:fs/promises"; +/** + * `deco link` — the laptop-side link daemon command. + * + * Boots a local Bun.serve on `--port` (default 5174), opens a Warp + * tunnel to deco.host so the cluster can reach it, registers with the + * cluster's `/api/links` to receive a `linkSecret`, then exposes the + * control-plane HMAC handler (sandbox lifecycle + reverse-proxy). + * + * `--no-tunnel` skips Warp and registers `tunnelUrl=http://localhost:` + * — only honored when the cluster has `MESH_ALLOW_LOCALHOST_LINKS=1`. + */ +import { homedir } from "node:os"; import { join } from "node:path"; -import { computeAppDomain } from "../lib/app-domain"; -import { copyToClipboard } from "../lib/clipboard"; -import { waitForPort } from "../lib/port-wait"; -import { readSession, type Session } from "../lib/session"; -import { loginCommand } from "./auth/login"; +import { startLinkDaemon } from "../../link-daemon"; -export interface TunnelHandle { - closed: Promise; - close: () => void; - // TODO: surface auth failure separately so the caller can show the - // "session may be expired" hint described in the spec. +export interface LinkCommandOptions { + port?: number; + noTunnel?: boolean; + clusterBaseUrl?: string; + dataDir?: string; } -export type TunnelOpener = (params: { - domain: string; - localAddr: string; - apiKey: string; - server: string; -}) => Promise; +export async function runLinkCommand( + opts: LinkCommandOptions = {}, +): Promise { + const port = opts.port ?? 5174; + const noTunnel = opts.noTunnel ?? false; + // Matches the dataDir convention the rest of the CLI uses (auth login + // writes session to ~/deco/...), so a fresh `deco link` after `deco + // auth login` finds the session without DATA_DIR being set explicitly. + const dataDir = + opts.dataDir ?? + process.env.DATA_DIR ?? + process.env.DECOCMS_HOME ?? + join(homedir(), "deco"); + const clusterBaseUrl = + opts.clusterBaseUrl ?? + process.env.MESH_CLUSTER_URL ?? + "https://studio.decocms.com"; -/** Minimal spawn signature used by linkCommand — compatible with node:child_process spawn. */ -export type SpawnFn = ( - command: string, - args: string[], - options: { stdio: "inherit"; shell: boolean; env: NodeJS.ProcessEnv }, -) => ChildProcess; - -export interface LinkOptions { - cwd: string; - dataDir: string; - port: number; - env: string; - runCommand: string[]; - /** Injectable: defaults to defaultTunnelOpener (dynamic import of @deco-cx/warp-node). */ - tunnelOpener?: TunnelOpener; - /** Injectable: defaults to waitForPort. */ - portWaiter?: (port: number) => Promise; - /** Injectable: defaults to copyToClipboard. */ - copyClipboard?: (text: string) => Promise; - /** Called when no session is present. Returns the new session or null on failure. */ - ensureSession?: () => Promise; - /** Injectable: defaults to node:child_process spawn. */ - spawn?: SpawnFn; - /** Reconnect delay after a tunnel disconnect (default 500ms, matches legacy). */ - reconnectDelayMs?: number; -} - -export interface LinkRunResult { - exit: Promise; - cancel: () => Promise; -} - -export function linkCommand(options: LinkOptions): LinkRunResult { - let resolveExit!: (n: number) => void; - const exit = new Promise((r) => { - resolveExit = r; - }); - - let child: ChildProcess | undefined; - let tunnel: TunnelHandle | undefined; - let cancelled = false; - - const cancel = async () => { - cancelled = true; - try { - child?.kill("SIGTERM"); - } catch {} - try { - tunnel?.close(); - } catch {} - resolveExit(0); - }; - - void (async () => { - try { - let session = await readSession(options.dataDir); - if (!session) { - const ensure = - options.ensureSession ?? defaultEnsureSession(options.dataDir); - console.log("No session found — opening login..."); - session = await ensure(); - if (!session) { - console.error("Login failed; cannot open tunnel."); - resolveExit(1); - return; - } - } - - const appName = await readPackageName(options.cwd); - if (!appName) { - console.error( - "Could not read `name` from package.json. Run `decocms link` from a project directory.", - ); - resolveExit(1); - return; - } - - const domain = computeAppDomain(session.user.sub, appName); - const publicUrl = `https://${domain}`; - - const spawnImpl: SpawnFn = options.spawn ?? nodeSpawn; - if (options.runCommand.length > 0) { - const [cmd, ...args] = options.runCommand; - if (!cmd) { - console.error("runCommand must not be empty"); - resolveExit(1); - return; - } - console.log(`Starting: ${cmd} ${args.join(" ")}`); - const spawned = spawnImpl(cmd, args, { - stdio: "inherit", - shell: true, - env: { ...process.env, [options.env]: publicUrl }, - }); - child = spawned; - spawned.on("exit", (code) => { - if (cancelled) return; - cancelled = true; - try { - tunnel?.close(); - } catch {} - resolveExit(code ?? 0); - }); - } else { - console.log( - `Tunnel will connect to existing service on port ${options.port}.`, - ); - } - - const wait = options.portWaiter ?? ((p: number) => waitForPort(p)); - const opener = options.tunnelOpener ?? defaultTunnelOpener; - const copy = options.copyClipboard ?? copyToClipboard; - const reconnectDelay = options.reconnectDelayMs ?? 500; - - // Loop: open tunnel, wait for it to close, reconnect after a small delay. - // Matches legacy behavior — exits only when the user cancels. - let firstOpen = true; - while (!cancelled) { - const host = await wait(options.port); - try { - tunnel = await opener({ - domain, - localAddr: `http://${host}:${options.port}`, - apiKey: session.accessToken, - server: `wss://${domain}`, - }); - } catch (err) { - console.error( - `Tunnel connect failed, retrying: ${err instanceof Error ? err.message : String(err)}`, - ); - await sleep(reconnectDelay); - continue; - } - - if (firstOpen) { - console.log(`Tunnel open: ${publicUrl}`); - if (await copy(publicUrl)) { - console.log("(URL copied to clipboard)"); - } - firstOpen = false; - } else { - console.log("Tunnel reconnected."); - } - - await tunnel.closed; - if (cancelled) break; - console.log("Tunnel closed, reconnecting..."); - await sleep(reconnectDelay); - } - - if (!cancelled) resolveExit(0); - } catch (err) { - console.error( - `Link failed: ${err instanceof Error ? err.message : String(err)}`, - ); - resolveExit(1); - } - })(); - - return { exit, cancel }; -} - -function sleep(ms: number): Promise { - return new Promise((resolve) => setTimeout(resolve, ms)); -} - -async function readPackageName(cwd: string): Promise { try { - const raw = await readFile(join(cwd, "package.json"), "utf8"); - const parsed = JSON.parse(raw) as { name?: unknown }; - return typeof parsed.name === "string" && parsed.name.length > 0 - ? parsed.name - : null; - } catch { - return null; + const handle = await startLinkDaemon({ + port, + noTunnel, + clusterBaseUrl, + dataDir, + }); + return handle.stopped; + } catch (err) { + console.error(err instanceof Error ? err.message : String(err)); + return 1; } } - -function defaultEnsureSession(dataDir: string): () => Promise { - return async () => { - const code = await loginCommand({ dataDir }); - if (code !== 0) return null; - return readSession(dataDir); - }; -} - -// The Warp tunnel server still expects the legacy shared key — it does not -// yet verify OAuth bearer tokens. Until that lands, fall back to this -// hardcoded value (overridable via DECO_TUNNEL_SERVER_TOKEN) so `link` -// works end-to-end. The session's OAuth access token from `params.apiKey` -// is intentionally ignored here for now; we keep storing it on the -// session so we can flip the source back in one line once Warp is ready. -const LEGACY_TUNNEL_TOKEN = "c309424a-2dc4-46fe-bfc7-a7c10df59477"; - -// If `tunnel.registered` doesn't resolve within this window, the Warp -// server most likely silently rejected the auth. Surface that as an -// error instead of hanging indefinitely. -const REGISTRATION_TIMEOUT_MS = 15_000; - -const defaultTunnelOpener: TunnelOpener = async (params) => { - const { connect } = await import("@deco-cx/warp-node"); - const tunnel = await connect({ - domain: params.domain, - localAddr: params.localAddr, - server: params.server, - apiKey: process.env.DECO_TUNNEL_SERVER_TOKEN ?? LEGACY_TUNNEL_TOKEN, - }); - await Promise.race([ - tunnel.registered, - new Promise((_, reject) => { - setTimeout(() => { - reject( - new Error( - `Tunnel registration timed out after ${REGISTRATION_TIMEOUT_MS / 1000}s — Warp server may have rejected the auth. Try upgrading the CLI.`, - ), - ); - }, REGISTRATION_TIMEOUT_MS); - }), - ]); - return { - // Connected.closed resolves with Error | undefined; we discard the value - // to satisfy TunnelHandle.closed: Promise. - closed: tunnel.closed.then(() => undefined), - close: () => { - // @deco-cx/warp-node Connected has no close() method; the connection - // closes on its own when the server drops it. - }, - }; -}; diff --git a/apps/mesh/src/cli/lib/clipboard.ts b/apps/mesh/src/cli/lib/clipboard.ts deleted file mode 100644 index 379fab5ecc..0000000000 --- a/apps/mesh/src/cli/lib/clipboard.ts +++ /dev/null @@ -1,36 +0,0 @@ -import { spawn } from "node:child_process"; - -/** - * Best-effort copy of `text` to the system clipboard. Returns true on success, - * false if the platform tool is missing or fails. Never throws. - */ -export function copyToClipboard(text: string): Promise { - let command: string; - let args: string[] = []; - switch (process.platform) { - case "darwin": - command = "pbcopy"; - break; - case "win32": - command = "clip"; - break; - case "linux": - command = "xclip"; - args = ["-selection", "clipboard"]; - break; - default: - return Promise.resolve(false); - } - - return new Promise((resolve) => { - try { - const child = spawn(command, args, { stdio: "pipe" }); - child.on("error", () => resolve(false)); - child.on("close", (code) => resolve(code === 0)); - child.stdin.write(text); - child.stdin.end(); - } catch { - resolve(false); - } - }); -} diff --git a/apps/mesh/src/core/context-factory.ts b/apps/mesh/src/core/context-factory.ts index ea3fd2b3e0..0dff334888 100644 --- a/apps/mesh/src/core/context-factory.ts +++ b/apps/mesh/src/core/context-factory.ts @@ -60,6 +60,7 @@ import type { import type { EventBus } from "../event-bus/interface"; import type { MemberRoleCache } from "../auth/member-role-cache"; +import type { LinkRegistry } from "../links/link-registry"; // ============================================================================ // Helper Functions @@ -112,6 +113,8 @@ export interface MeshContextConfig { eventBus: EventBus; modelListCache?: ModelListCache; memberRoleCache?: MemberRoleCache; + /** Required for remote-user sandbox auto-resolution; tests may omit. */ + linkRegistry?: LinkRegistry; } // ============================================================================ @@ -1192,6 +1195,7 @@ export async function createMeshContextFactory( ), }, eventBus: config.eventBus, + linkRegistry: config.linkRegistry, aiProviders: aiProviderFactory, createMCPProxy: async (conn: string | ConnectionEntity) => { return await createMCPProxy(conn, ctx); diff --git a/apps/mesh/src/core/harness-context.test.ts b/apps/mesh/src/core/harness-context.test.ts new file mode 100644 index 0000000000..5621cd96ef --- /dev/null +++ b/apps/mesh/src/core/harness-context.test.ts @@ -0,0 +1,23 @@ +import { describe, expect, it } from "bun:test"; +import type { HarnessContext } from "./harness-context"; +import type { MeshContext } from "./mesh-context"; + +describe("HarnessContext", () => { + it("MeshContext is assignable to HarnessContext (narrowing)", () => { + const mesh: MeshContext = null as never; + const harness: HarnessContext = mesh; + expect(harness).toBe(mesh); + }); + + it("exposes exactly the narrow surface CLI harnesses need", () => { + // Listing the keys as a tuple pins the public shape — adding or + // removing a field on HarnessContext requires updating this test, + // which forces a review of whether the change belongs on the + // narrow surface. The compile-time assertion via the `Keys` tuple + // is what does the real work; the `expect` keeps the test runner + // happy. + type Keys = keyof HarnessContext; + const expected: Keys[] = ["tracer", "meter", "metadata", "aiProviders"]; + expect(expected).toEqual(["tracer", "meter", "metadata", "aiProviders"]); + }); +}); diff --git a/apps/mesh/src/core/harness-context.ts b/apps/mesh/src/core/harness-context.ts new file mode 100644 index 0000000000..588f1dff21 --- /dev/null +++ b/apps/mesh/src/core/harness-context.ts @@ -0,0 +1,7 @@ +/** + * `HarnessContext` is defined in `apps/mesh/src/harnesses` so the laptop + * link daemon can construct one without depending on cluster modules. + * This file re-exports it for cluster-side consumers that still import + * via the historical `@/core/harness-context` path. + */ +export type { HarnessContext } from "../harnesses"; diff --git a/apps/mesh/src/core/mesh-context.ts b/apps/mesh/src/core/mesh-context.ts index 4818452e05..7216a20769 100644 --- a/apps/mesh/src/core/mesh-context.ts +++ b/apps/mesh/src/core/mesh-context.ts @@ -13,9 +13,12 @@ import type { Meter, Tracer } from "@opentelemetry/api"; import type { Kysely } from "kysely"; +import type { LinkEntry } from "../links/protocol"; +import type { LinkRegistry } from "@/links/link-registry"; import type { CredentialVault } from "../encryption/credential-vault"; import type { Database, Permission } from "../storage/types"; import type { AccessControl } from "./access-control"; +import type { HarnessContext } from "./harness-context"; export type { BetterAuthInstance } from "@/auth"; // Re-export for consumers export type { AccessControl, CredentialVault }; @@ -318,7 +321,7 @@ export interface Timings { * This provides access to all necessary services without coupling * to implementation details. */ -export interface MeshContext { +export interface MeshContext extends HarnessContext { // Connection ID (from url) connectionId?: string; @@ -395,6 +398,36 @@ export interface MeshContext { orgId: string, userId: string, ) => Promise; + + /** + * Sandbox dispatch preference for the in-flight run, populated by + * `prepareRun` from the resolved `DispatchTarget`: + * - `"default"` — cluster sandbox (today's behavior). + * - `"remote-user"` — decopilot still runs in the cluster, but its + * Code Sandbox tool calls are forwarded to the user's link daemon. + * Unset for non-decopilot harnesses (`remote-cli` runs never enter the + * sandbox tool path on the cluster side). + */ + sandboxPreference?: "default" | "remote-user"; + + /** + * Link entry for the user this run is dispatched on behalf of, if any. + * Set by `prepareRun` when the resolved `DispatchTarget` references a + * link (either `local/remote-user` or `remote-cli`). The remote-user + * sandbox provider reads this to know which daemon URL + secret to talk + * to without re-querying the registry. Unset for `local/default` runs. + */ + linkForCurrentRun?: LinkEntry; + + /** + * Cluster-wide LinkRegistry, injected by the context factory. Tools that + * touch the sandbox provider outside the decopilot dispatch path (e.g. + * `VM_START`, the always-on VM auto-provisioner) read this to resolve the + * acting user's link on demand — there is no `prepareRun` to pre-populate + * `linkForCurrentRun` for them. Undefined in test contexts that don't + * supply a registry. + */ + linkRegistry?: LinkRegistry; } // ============================================================================ diff --git a/apps/mesh/src/core/server-constants.ts b/apps/mesh/src/core/server-constants.ts index 4579234425..a9c42da3a4 100644 --- a/apps/mesh/src/core/server-constants.ts +++ b/apps/mesh/src/core/server-constants.ts @@ -30,3 +30,31 @@ export function getBaseUrl(): string { export function getInternalUrl(): string { return `http://localhost:${getSettings().port ?? 3000}`; } + +/** + * Get the cluster's externally reachable URL. + * + * Used when minting URLs that need to be resolvable from outside the + * cluster — e.g. the MCP endpoint URL handed to a remote link daemon + * (Phase 9 remote harness dispatch), which talks back to the cluster + * over HTTP from the user's laptop. + * + * In dev mode (`MESH_ALLOW_LOCALHOST_LINKS=1`) we deliberately advertise + * a localhost URL so a link daemon running on the same machine can dial + * the cluster without a public hostname. Honors + * `MESH_LOCAL_CLUSTER_URL` to allow per-developer overrides. + * + * Otherwise we use `BASE_URL` (the same hostname the server advertises + * to browsers and OAuth clients). Falls back to `getBaseUrl()` so + * production deployments without a separate public-URL setting still + * work. + */ +export function getPublicUrl(): string { + if (process.env.MESH_ALLOW_LOCALHOST_LINKS === "1") { + return ( + process.env.MESH_LOCAL_CLUSTER_URL ?? + `http://localhost:${getSettings().port ?? 3000}` + ); + } + return process.env.MESH_PUBLIC_URL ?? getBaseUrl(); +} diff --git a/apps/mesh/src/dispatch-queue/thread-gate-workflow.ts b/apps/mesh/src/dispatch-queue/thread-gate-workflow.ts index adf88c19c5..e106cd13c6 100644 --- a/apps/mesh/src/dispatch-queue/thread-gate-workflow.ts +++ b/apps/mesh/src/dispatch-queue/thread-gate-workflow.ts @@ -219,8 +219,15 @@ async function threadGateWorkflowFn( name: "trackMessageStarted", }); try { + // The dispatch step is non-retriable for v1. If a pod dies mid-stream, + // the laptop daemon (if remote-cli) keeps running, and a DBOS replay + // would open a second concurrent dispatch against the same workdir — + // racing on git state and tool output. Marking the step non-retriable + // converts pod death into a clean "run failed" rather than a corruption + // hazard. Re-attach semantics (stable runId, daemon-side dedupe) are v2. await DBOS.runStep(() => dispatchRunAndWaitStep(ctx), { name: "dispatchRunAndWait", + retriesAllowed: false, }); } catch (err) { // Setup errors (prepareRun) propagate out of `dispatchRunAndWait`; in-flight diff --git a/apps/mesh/src/harnesses/claude-code/index.test.ts b/apps/mesh/src/harnesses/claude-code/index.test.ts index e83b0b680e..fc8c6ba85c 100644 --- a/apps/mesh/src/harnesses/claude-code/index.test.ts +++ b/apps/mesh/src/harnesses/claude-code/index.test.ts @@ -1,6 +1,6 @@ import { describe, expect, test } from "bun:test"; import { claudeCodeHarnessFactory } from "./index"; -import type { MeshContext } from "../../core/mesh-context"; +import type { HarnessContext } from "../types"; /** * Contract tests for the Claude Code harness factory. @@ -18,7 +18,7 @@ describe("claudeCodeHarnessFactory", () => { }); test("create() returns a Harness with id 'claude-code' and a stream() method", () => { - const harness = claudeCodeHarnessFactory.create({} as MeshContext); + const harness = claudeCodeHarnessFactory.create({} as HarnessContext); expect(harness.id).toBe("claude-code"); expect(typeof harness.stream).toBe("function"); }); diff --git a/apps/mesh/src/harnesses/claude-code/index.ts b/apps/mesh/src/harnesses/claude-code/index.ts index 985a2e68ef..0e56d78476 100644 --- a/apps/mesh/src/harnesses/claude-code/index.ts +++ b/apps/mesh/src/harnesses/claude-code/index.ts @@ -11,13 +11,12 @@ * `finish-step.providerMetadata["claude-code"].sessionId`. The harness * just forwards that opaque token to the SDK's `resume` setting. * - * Working-directory resolution mirrors the inline original at - * `apps/mesh/src/api/routes/decopilot/stream-core.ts` lines ~864–886: - * github-linked virtual MCPs get a per-branch sandbox handle; the - * underlying `host` runner exposes `localWorkdir(handle)` to map that - * handle to a real filesystem path. Ephemeral agents (no `githubRepo`) - * fall through to `undefined`, which means the SDK defaults to - * `process.cwd()` — same as the inline original. + * Working-directory resolution: the cluster used to inject a + * `processLocal.resolveCwd` callback that mapped to the `host` runner's + * `localWorkdir(handle)`. That runner has been retired; the cluster no + * longer supplies a resolver, and this harness falls through to + * `process.cwd()` on the laptop daemon (spawned with workdir = sandbox + * path) or to `undefined` (SDK default) inside the cluster. * * Behavior parity with stream-core: the inline call at lines 888–906 * passes `mcpServers` (single `cms` entry), `toolApprovalLevel`, @@ -31,30 +30,38 @@ */ import { streamText, type UIMessageChunk } from "ai"; -import { - createClaudeCodeModel, - resolveClaudeCodeModelId, -} from "../../ai-providers/adapters/claude-code"; -import type { MeshContext } from "../../core/mesh-context"; -import { getSharedRunner } from "../../sandbox/lifecycle"; +import { createClaudeCodeModel, resolveClaudeCodeModelId } from "./model"; import { prepCliMessages } from "../cli-message-prep"; -import type { Harness, HarnessFactory, HarnessStreamInput } from "../types"; +import type { + Harness, + HarnessContext, + HarnessFactory, + HarnessStreamInput, +} from "../types"; import { createUsageAccumulator } from "../usage-accumulator"; /** * Compute the Claude Code working directory. * - * Mirrors stream-core.ts lines ~864–886. Returns `undefined` when: - * - The agent has no githubRepo (ephemeral agent → use SDK default cwd). - * - No userId is available (defensive — branch resolution needs it). - * - The shared runner is not the local `host` kind (Docker / remote - * runners don't expose a local filesystem path). - * - `localWorkdir(handle)` returns null (the handle isn't materialized - * on this pod yet). + * Returns `undefined` when the agent has no `githubRepo` (ephemeral + * agent → SDK default cwd) or no userId is available (defensive). + * + * Otherwise: + * - Laptop daemon (no `processLocal`): the sandbox daemon is spawned + * with `cwd = `, but the cloned repo lives at + * `/repo` (see `packages/sandbox/daemon/entry.ts` — it + * joins APP_ROOT with "repo" to form `repoDir`). Prefer the env + * vars the sandbox sets (`WORKDIR` / `APP_ROOT`) and fall through + * to `/repo` so Claude Code actually runs inside the + * checkout. Final fallback is `process.cwd()` for non-sandbox + * environments (e.g. tests, ad-hoc invocations). + * - Cluster: no on-disk sandbox to point at after the host runner was + * retired, so fall through to `undefined` (SDK default). The + * `processLocal.resolveCwd` callback is kept as an extension point + * for future cluster-side runners that materialize files locally. */ async function resolveClaudeCodeCwd( input: HarnessStreamInput, - ctx: MeshContext, ): Promise { const vmMetadata = input.virtualMcp.metadata as { githubRepo?: unknown; @@ -62,32 +69,20 @@ async function resolveClaudeCodeCwd( if (!vmMetadata?.githubRepo) return undefined; if (!input.user?.id) return undefined; - const isEphemeralAgent = !vmMetadata.githubRepo; - const branch = isEphemeralAgent - ? "ephemeral" - : (input.branch ?? `thread:${input.threadId}`); - - const runner = await getSharedRunner(ctx); - if (runner.kind !== "host") return undefined; + if (!input.processLocal) { + const appRoot = + process.env.WORKDIR || process.env.APP_ROOT || process.cwd(); + return `${appRoot.replace(/\/$/, "")}/repo`; + } - const { computeHandle, composeSandboxRef } = await import( - "@decocms/sandbox/runner" - ); - const projectRef = composeSandboxRef({ - orgId: input.organizationId, - virtualMcpId: input.agent.id, - branch, - }); - const handle = computeHandle({ userId: input.user.id, projectRef }, branch); - const hostRunner = runner as unknown as { - localWorkdir(h: string): Promise; - }; - return (await hostRunner.localWorkdir(handle)) ?? undefined; + const resolveCwd = input.processLocal.resolveCwd; + if (!resolveCwd) return undefined; + return await resolveCwd(); } export const claudeCodeHarnessFactory: HarnessFactory = { id: "claude-code", - create(ctx: MeshContext): Harness { + create(_ctx: HarnessContext): Harness { return { id: "claude-code", async *stream(input: HarnessStreamInput): AsyncIterable { @@ -99,7 +94,7 @@ export const claudeCodeHarnessFactory: HarnessFactory = { // 2. Compute the working directory for the CLI subprocess — // github-linked agents get a per-branch sandbox path, ephemeral // agents fall through to undefined (SDK default). - const cwd = await resolveClaudeCodeCwd(input, ctx); + const cwd = await resolveClaudeCodeCwd(input); // 3. Build the Claude Code language model. The MCP URL + headers // are already minted by the shared layer (it owns the diff --git a/apps/mesh/src/ai-providers/coding-agents/claude-code/index.ts b/apps/mesh/src/harnesses/claude-code/model/index.ts similarity index 76% rename from apps/mesh/src/ai-providers/coding-agents/claude-code/index.ts rename to apps/mesh/src/harnesses/claude-code/model/index.ts index 8f6f6d6715..a40fe6abea 100644 --- a/apps/mesh/src/ai-providers/coding-agents/claude-code/index.ts +++ b/apps/mesh/src/harnesses/claude-code/model/index.ts @@ -1,5 +1,6 @@ +import type { LanguageModelV3 } from "@ai-sdk/provider"; import { createClaudeCode } from "ai-sdk-provider-claude-code"; -import type { ToolApprovalLevel } from "@/api/routes/decopilot/helpers"; +import type { ToolApprovalLevel } from "../../types"; /** * Create a Claude Code language model with MCP servers attached. @@ -24,7 +25,7 @@ export function createClaudeCodeModel( /** Working directory for Claude Code's subprocess. Defaults to mesh's cwd. */ cwd?: string; }, -) { +): LanguageModelV3 { // Tools that require a TTY, manage local state, or are not useful in headless mode const HEADLESS_DISALLOWED_TOOLS = [ "AskUserQuestion", @@ -67,3 +68,15 @@ export function createClaudeCodeModel( }); return provider(modelId); } + +/** Map composite model IDs (e.g. "claude-code:sonnet") to SDK model names. */ +const CLAUDE_CODE_SDK_MODELS: Record = { + "claude-code:opus": "opus", + "claude-code:sonnet": "sonnet", + "claude-code:haiku": "haiku", +}; + +/** Resolve a composite claude-code model ID to the SDK model name. */ +export function resolveClaudeCodeModelId(modelId: string): string { + return CLAUDE_CODE_SDK_MODELS[modelId] ?? modelId; +} diff --git a/apps/mesh/src/harnesses/cli-message-prep.test.ts b/apps/mesh/src/harnesses/cli-message-prep.test.ts index d029fc40f0..01da26a3a3 100644 --- a/apps/mesh/src/harnesses/cli-message-prep.test.ts +++ b/apps/mesh/src/harnesses/cli-message-prep.test.ts @@ -1,5 +1,5 @@ import { describe, expect, test } from "bun:test"; -import type { ChatMessage } from "../api/routes/decopilot/types"; +import type { ChatMessage } from "./types"; import { prepCliMessages } from "./cli-message-prep"; describe("prepCliMessages", () => { diff --git a/apps/mesh/src/harnesses/cli-message-prep.ts b/apps/mesh/src/harnesses/cli-message-prep.ts index 402ea3b271..91db800cde 100644 --- a/apps/mesh/src/harnesses/cli-message-prep.ts +++ b/apps/mesh/src/harnesses/cli-message-prep.ts @@ -27,7 +27,7 @@ */ import { convertToModelMessages, type ModelMessage } from "ai"; -import type { ChatMessage } from "../api/routes/decopilot/types"; +import type { ChatMessage } from "./types"; /** Convert harness UIMessages to ModelMessages for CLI harness streamText * calls. See file-level comment for the why. */ diff --git a/apps/mesh/src/harnesses/codex/index.test.ts b/apps/mesh/src/harnesses/codex/index.test.ts index 1c18f7103a..107cc6ff86 100644 --- a/apps/mesh/src/harnesses/codex/index.test.ts +++ b/apps/mesh/src/harnesses/codex/index.test.ts @@ -1,5 +1,5 @@ import { describe, expect, test } from "bun:test"; -import type { MeshContext } from "../../core/mesh-context"; +import type { HarnessContext } from "../types"; import { codexHarnessFactory } from "./index"; /** @@ -23,7 +23,7 @@ describe("codexHarnessFactory", () => { }); test("create() returns a Harness with id 'codex' and a stream() method", () => { - const harness = codexHarnessFactory.create({} as MeshContext); + const harness = codexHarnessFactory.create({} as HarnessContext); expect(harness.id).toBe("codex"); expect(typeof harness.stream).toBe("function"); }); diff --git a/apps/mesh/src/harnesses/codex/index.ts b/apps/mesh/src/harnesses/codex/index.ts index 45d26fbea6..137280eebd 100644 --- a/apps/mesh/src/harnesses/codex/index.ts +++ b/apps/mesh/src/harnesses/codex/index.ts @@ -37,18 +37,19 @@ */ import { streamText, type UIMessageChunk } from "ai"; -import { - createCodexModel, - resolveCodexModelId, -} from "../../ai-providers/adapters/codex"; -import type { MeshContext } from "../../core/mesh-context"; +import { createCodexModel, resolveCodexModelId } from "./model"; import { prepCliMessages } from "../cli-message-prep"; -import type { Harness, HarnessFactory, HarnessStreamInput } from "../types"; +import type { + Harness, + HarnessContext, + HarnessFactory, + HarnessStreamInput, +} from "../types"; import { createUsageAccumulator } from "../usage-accumulator"; export const codexHarnessFactory: HarnessFactory = { id: "codex", - create(_ctx: MeshContext): Harness { + create(_ctx: HarnessContext): Harness { return { id: "codex", async *stream(input: HarnessStreamInput): AsyncIterable { diff --git a/apps/mesh/src/ai-providers/coding-agents/codex/index.ts b/apps/mesh/src/harnesses/codex/model/index.ts similarity index 64% rename from apps/mesh/src/ai-providers/coding-agents/codex/index.ts rename to apps/mesh/src/harnesses/codex/model/index.ts index 7f7444bee6..9d9a71bedc 100644 --- a/apps/mesh/src/ai-providers/coding-agents/codex/index.ts +++ b/apps/mesh/src/harnesses/codex/model/index.ts @@ -1,5 +1,9 @@ -import { createCodexAppServer } from "ai-sdk-provider-codex-cli"; -import type { ToolApprovalLevel } from "@/api/routes/decopilot/helpers"; +import type { LanguageModelV3 } from "@ai-sdk/provider"; +import { + createCodexAppServer, + type CodexAppServerProvider, +} from "ai-sdk-provider-codex-cli"; +import type { ToolApprovalLevel } from "../../types"; /** * Create a Codex language model with MCP servers attached. @@ -24,7 +28,7 @@ export function createCodexModel( /** Chat mode plan — stricter approval policy */ isPlanMode?: boolean; }, -) { +): { model: LanguageModelV3; provider: CodexAppServerProvider } { const mcpServers = options?.mcpServers ? Object.fromEntries( Object.entries(options.mcpServers).map(([name, config]) => [ @@ -59,3 +63,21 @@ export function createCodexModel( return { model: provider(modelId), provider }; } + +/** Map composite model IDs to SDK model names. */ +const CODEX_SDK_MODELS: Record = { + "codex:gpt-5.5": "gpt-5.5", + "codex:gpt-5.4": "gpt-5.4", + "codex:gpt-5.4-mini": "gpt-5.4-mini", + "codex:gpt-5.3-codex": "gpt-5.3-codex", + "codex:gpt-5.2": "gpt-5.2", +}; + +/** Resolve a composite codex model ID to the SDK model name. */ +export function resolveCodexModelId(modelId: string): string { + const resolved = CODEX_SDK_MODELS[modelId]; + if (!resolved) { + throw new Error(`Unknown Codex model ID: ${modelId}`); + } + return resolved; +} diff --git a/apps/mesh/src/harnesses/decopilot/built-in-tools/index.ts b/apps/mesh/src/harnesses/decopilot/built-in-tools/index.ts index a83fb4eacc..3443872a0b 100644 --- a/apps/mesh/src/harnesses/decopilot/built-in-tools/index.ts +++ b/apps/mesh/src/harnesses/decopilot/built-in-tools/index.ts @@ -37,8 +37,10 @@ import { createReadPromptTool } from "./prompts"; import { createReadResourceTool } from "./resources"; import { createSandboxTool, type VirtualClient } from "./sandbox"; import { createVmTools } from "./vm-tools"; -import { getSharedRunner } from "@/sandbox/lifecycle"; -import { ensureVmForBranch } from "@/tools/vm/start"; +import { getSharedSandboxProvider } from "@/sandbox/lifecycle"; +import { ensureVm } from "@/tools/vm/start"; +import { resolveDefaultSandboxProviderKind } from "@/sandbox/resolve-default-provider-kind"; +import { resolveSandboxProviderKindFromEnv } from "@decocms/sandbox/provider"; import { createSubtaskTool } from "./subtask"; import { userAskTool } from "./user-ask"; import { todoWriteTool } from "./todo-write"; @@ -152,14 +154,26 @@ async function buildAllTools( const vmNeedsApproval = toolNeedsApproval(toolApprovalLevel, false, approvalOpts) !== false; if (vmContext) { - const runner = await getSharedRunner(ctx); + const runner = await getSharedSandboxProvider(ctx); let cached: Promise | null = null; const ensureHandle = () => { if (!cached) { - cached = ensureVmForBranch( - { virtualMcpId: vmContext.virtualMcpId, branch: vmContext.branch }, - ctx, - ).then((entry) => entry.vmId); + const userId = vmContext.userId; + cached = resolveDefaultSandboxProviderKind(userId, { + linkRegistry: ctx.linkRegistry!, + resolveEnvKind: resolveSandboxProviderKindFromEnv, + }) + .then((sandboxProviderKind) => + ensureVm( + { + virtualMcpId: vmContext.virtualMcpId, + branch: vmContext.branch, + sandboxProviderKind, + }, + ctx, + ), + ) + .then((entry) => entry.vmId); // Reset on failure so the next tool call retries instead of // permanently caching a rejected promise. cached.catch(() => { diff --git a/apps/mesh/src/harnesses/decopilot/built-in-tools/vm-tools/index.ts b/apps/mesh/src/harnesses/decopilot/built-in-tools/vm-tools/index.ts index 4363977975..088e8dacf9 100644 --- a/apps/mesh/src/harnesses/decopilot/built-in-tools/vm-tools/index.ts +++ b/apps/mesh/src/harnesses/decopilot/built-in-tools/vm-tools/index.ts @@ -2,13 +2,13 @@ * VM File Tools — runner-agnostic. * * Registers the six LLM-visible tools (read/write/edit/grep/glob/bash) on - * top of any `SandboxRunner.proxyDaemonRequest`. All runners speak the + * top of any `SandboxProvider.proxyDaemonRequest`. All runners speak the * unified `/_decopilot_vm/*` surface with plain JSON bodies. */ import { tool, zodSchema } from "ai"; import path from "node:path"; -import type { SandboxRunner } from "@decocms/sandbox/runner"; +import type { SandboxProvider } from "@decocms/sandbox/provider"; import { maybeTruncate } from "./common"; import { buildBashDescription, @@ -103,7 +103,7 @@ function toFileDownloadUrl( export type { VmToolsParams } from "./types"; async function daemonRequest( - runner: SandboxRunner, + runner: SandboxProvider, handle: string, path: string, body: Record | null, diff --git a/apps/mesh/src/harnesses/decopilot/built-in-tools/vm-tools/types.ts b/apps/mesh/src/harnesses/decopilot/built-in-tools/vm-tools/types.ts index 82a69670d8..8701a4d18b 100644 --- a/apps/mesh/src/harnesses/decopilot/built-in-tools/vm-tools/types.ts +++ b/apps/mesh/src/harnesses/decopilot/built-in-tools/vm-tools/types.ts @@ -1,9 +1,9 @@ -import type { SandboxRunner } from "@decocms/sandbox/runner"; +import type { SandboxProvider } from "@decocms/sandbox/provider"; import type { MeshContext } from "@/core/mesh-context"; import type { PendingImage } from "../take-screenshot"; export interface VmToolsParams { - readonly runner: SandboxRunner; + readonly runner: SandboxProvider; /** * Lazy handle resolver. Invoked on every tool call; caller is expected * to memoise so the first invocation provisions and later calls reuse. diff --git a/apps/mesh/src/harnesses/decopilot/index.ts b/apps/mesh/src/harnesses/decopilot/index.ts index faf63ad420..086a574d8c 100644 --- a/apps/mesh/src/harnesses/decopilot/index.ts +++ b/apps/mesh/src/harnesses/decopilot/index.ts @@ -28,22 +28,80 @@ * leaking raw JSON into every subsequent turn. */ -import type { UIMessageChunk } from "ai"; +import type { UIMessageChunk, UIMessageStreamWriter } from "ai"; +import type { HarnessContext } from "../../core/harness-context"; import type { MeshContext } from "../../core/mesh-context"; import type { Harness, HarnessFactory, HarnessStreamInput } from "../types"; +import type { MeshProvider } from "../../ai-providers/types"; +import type { RunRegistry } from "../../api/routes/decopilot/run-registry"; +import type { ChatMessage } from "../../api/routes/decopilot/types"; +import type { ChatMode } from "../../api/routes/decopilot/mode-config"; +import type { VirtualMCPEntity } from "@decocms/mesh-sdk"; import { processConversation } from "../../api/routes/decopilot/conversation"; import { DEFAULT_WINDOW_SIZE } from "../../api/routes/decopilot/constants"; import { assembleDecopilotTools } from "./tools"; import { assembleDecopilotPrompt } from "./prompt"; import { runDecopilotStream } from "./run-stream"; +import type { PendingImage } from "./built-in-tools"; + +/** Narrowed view of `HarnessStreamInput.processLocal` for the cluster + * decopilot harness. The package types those structurally-deep fields + * as `unknown` so the package stays portable to the laptop daemon; the + * cluster knows it builds richer values and narrows here at the + * harness boundary. */ +interface ClusterProcessLocal { + writer: UIMessageStreamWriter; + toolOutputMap: Map; + pendingImages: PendingImage[]; + threadId: string; + currentThreadTitle: string; + registrySignal: AbortSignal; + runRegistry: RunRegistry; + provider: MeshProvider | null; + registerPendingOp: (op: Promise) => void; + isStreamFinished: () => boolean; + onUsageAggregated: (totalUsage: { + inputTokens: number; + outputTokens: number; + totalTokens: number; + }) => void; + onTitleUpdated?: (title: string) => void | Promise; +} + +/** Narrowed view of the cluster's richer input fields, mirroring what + * `dispatch-run.ts` actually builds. */ +interface ClusterInputView { + messages: ChatMessage[]; + mode: ChatMode; + virtualMcp: VirtualMCPEntity; +} export const decopilotHarnessFactory: HarnessFactory = { id: "decopilot", - create(ctx: MeshContext): Harness { + create(harnessCtx: HarnessContext): Harness { + // `stream()` refuses to run without processLocal, so any cluster-only + // ctx field reads only happen on a real MeshContext value. The widening + // cast here is a TS-level erasure; the defensive check below catches a + // narrow HarnessContext smuggled in via misuse (e.g. a non-decopilot + // caller mistakenly invoking this factory on the laptop). + // + // `storage` and `db` are required fields on MeshContext but absent + // from HarnessContext, so their presence reliably distinguishes the + // two at runtime. + if (!("storage" in harnessCtx) || !("db" in harnessCtx)) { + throw new Error( + "decopilot harness requires MeshContext (cluster-side only); " + + "got narrow HarnessContext", + ); + } + const ctx = harnessCtx as MeshContext; return { id: "decopilot", async *stream(input: HarnessStreamInput): AsyncIterable { - const pl = input.processLocal; + // Package types are intentionally loose so the harness package + // is daemon-portable; narrow back to cluster-rich types here. + const pl = input.processLocal as ClusterProcessLocal | undefined; + const clusterInput = input as HarnessStreamInput & ClusterInputView; if (!pl) { throw new Error( "Decopilot harness requires HarnessStreamInput.processLocal in this build. " + @@ -80,7 +138,7 @@ export const decopilotHarnessFactory: HarnessFactory = { systemMessages: processedSystemMessages, messages: processedMessages, originalMessages, - } = await processConversation(input.messages, { + } = await processConversation(clusterInput.messages, { windowSize: DEFAULT_WINDOW_SIZE, models: input.models, tools: tools.tools, diff --git a/apps/mesh/src/harnesses/decopilot/tools.ts b/apps/mesh/src/harnesses/decopilot/tools.ts index 4b72547b79..854d82ba99 100644 --- a/apps/mesh/src/harnesses/decopilot/tools.ts +++ b/apps/mesh/src/harnesses/decopilot/tools.ts @@ -127,7 +127,10 @@ export async function assembleDecopilotTools( // tool (GitHub, Slack, etc.) for users who don't have explicit per-tool // permissions configured — the wrong enforcement layer for chat. const passthroughClient = await createVirtualClientFrom( - input.virtualMcp, + // Cluster-side: `virtualMcp` is the real `VirtualMCPEntity`; the + // package widens the field to a loose bag so the daemon can ship + // without the cluster's storage types. Narrow back here. + input.virtualMcp as Parameters[0], ctx, "passthrough", true, diff --git a/apps/mesh/src/harnesses/index.test.ts b/apps/mesh/src/harnesses/index.test.ts index 84ec07f4e3..dd82213cbf 100644 --- a/apps/mesh/src/harnesses/index.test.ts +++ b/apps/mesh/src/harnesses/index.test.ts @@ -1,8 +1,8 @@ import { beforeAll, describe, expect, test } from "bun:test"; import { claudeCodeHarnessFactory } from "./claude-code"; import { codexHarnessFactory } from "./codex"; -import { decopilotHarnessFactory } from "./decopilot"; import { getHarnessFactory, registerHarnessFactory } from "./registry"; +import { decopilotHarnessFactory } from "./decopilot"; describe("harness registration", () => { // Re-register explicitly here so the test doesn't depend on test-file diff --git a/apps/mesh/src/harnesses/index.ts b/apps/mesh/src/harnesses/index.ts index 1e8f01640e..2f365cf2b1 100644 --- a/apps/mesh/src/harnesses/index.ts +++ b/apps/mesh/src/harnesses/index.ts @@ -5,14 +5,25 @@ import { registerHarnessFactory } from "./registry"; // Side-effect registration. Importing this module wires up the three // in-tree harnesses. Out-of-tree harnesses register themselves the same way. +// +// CLI harnesses (claude-code, codex) are also imported by the laptop link +// daemon; decopilot pulls in cluster-only modules (RunRegistry, run-stream, +// mesh tools) and is only usable on the cluster side. registerHarnessFactory(decopilotHarnessFactory); registerHarnessFactory(claudeCodeHarnessFactory); registerHarnessFactory(codexHarnessFactory); export { localDispatch } from "./local-dispatch"; export type { + ChatMessage, + ChatMode, Harness, + HarnessContext, HarnessFactory, HarnessId, + HarnessProcessLocal, HarnessStreamInput, + ModelSelection, + ModelsConfig, + ToolApprovalLevel, } from "./types"; diff --git a/apps/mesh/src/harnesses/local-dispatch.test.ts b/apps/mesh/src/harnesses/local-dispatch.test.ts index 860aced19e..9719db6afe 100644 --- a/apps/mesh/src/harnesses/local-dispatch.test.ts +++ b/apps/mesh/src/harnesses/local-dispatch.test.ts @@ -1,9 +1,13 @@ import { describe, expect, test } from "bun:test"; import type { UIMessageChunk } from "ai"; +import { registerHarnessFactory, resetRegistryForTests } from "./registry"; +import type { + HarnessContext, + HarnessFactory, + HarnessStreamInput, +} from "./types"; import type { MeshContext } from "../core/mesh-context"; import { localDispatch } from "./local-dispatch"; -import { registerHarnessFactory, resetRegistryForTests } from "./registry"; -import type { HarnessFactory, HarnessStreamInput } from "./types"; const makeInput = (): HarnessStreamInput => ({ threadId: "t1", @@ -13,7 +17,7 @@ const makeInput = (): HarnessStreamInput => ({ credentialId: "cred-1", thinking: { id: "m-thinking", name: "Thinking", contextWindow: 0 }, } as unknown as HarnessStreamInput["models"], - mcp: { url: "http://localhost/mcp", headers: {} }, + mcp: { url: "http://localhost/mcp", headers: {}, expiresAt: 0 }, mode: "default", temperature: 0, toolApprovalLevel: "auto", @@ -43,7 +47,7 @@ describe("localDispatch", () => { { type: "start" } as UIMessageChunk, { type: "finish" } as UIMessageChunk, ]; - let capturedCtx: MeshContext | undefined; + let capturedCtx: HarnessContext | undefined; const factory: HarnessFactory = { id: "decopilot", create(ctx) { diff --git a/apps/mesh/src/harnesses/local-dispatch.ts b/apps/mesh/src/harnesses/local-dispatch.ts index 127c1a1884..330ae96dd8 100644 --- a/apps/mesh/src/harnesses/local-dispatch.ts +++ b/apps/mesh/src/harnesses/local-dispatch.ts @@ -1,7 +1,7 @@ import type { UIMessageChunk } from "ai"; -import type { MeshContext } from "../core/mesh-context"; import { getHarnessFactory } from "./registry"; import type { HarnessId, HarnessStreamInput } from "./types"; +import type { MeshContext } from "../core/mesh-context"; /** Invoke a harness in-process. Looks up the factory, creates a harness with * the provided `ctx`, and returns its stream. Throws synchronously if the id diff --git a/apps/mesh/src/harnesses/registry.ts b/apps/mesh/src/harnesses/registry.ts index 27012e3008..e0e250ee40 100644 --- a/apps/mesh/src/harnesses/registry.ts +++ b/apps/mesh/src/harnesses/registry.ts @@ -3,7 +3,8 @@ import type { HarnessFactory, HarnessId } from "./types"; const registry = new Map(); /** Register a harness factory. Called once per harness at module load by - * `apps/mesh/src/harnesses/index.ts` (the barrel) — see Task 11. */ + * `apps/mesh/src/harnesses/index.ts` (the barrel), which imports each + * harness module for its registration side effect. */ export function registerHarnessFactory(factory: HarnessFactory): void { registry.set(factory.id, factory); } diff --git a/apps/mesh/src/harnesses/remote-dispatch.test.ts b/apps/mesh/src/harnesses/remote-dispatch.test.ts new file mode 100644 index 0000000000..1cbb72187a --- /dev/null +++ b/apps/mesh/src/harnesses/remote-dispatch.test.ts @@ -0,0 +1,305 @@ +import { describe, it, expect } from "bun:test"; +import { fixtures, verifyRequest } from "../links/protocol"; +import { parseSSEStream, remoteDispatch } from "./remote-dispatch"; +import type { HarnessStreamInput } from "./types"; + +function eventsToSSEBody(events: readonly unknown[]): string { + return events.map((e) => `data: ${JSON.stringify(e)}\n\n`).join(""); +} + +function bodyFromString(s: string): ReadableStream { + return new Response(s).body!; +} + +function makeInput( + overrides: Partial = {}, +): HarnessStreamInput { + const ctrl = new AbortController(); + return { + threadId: "thr-1", + runId: "run-1", + taskId: "thr-1", + messages: [], + models: { + credentialId: "cred-1", + thinking: { id: "claude-code:opus", title: "Opus" }, + }, + mcp: { + url: "https://mesh.example.com/mcp/virtual-mcp/agent-1", + headers: { Authorization: "Bearer mcp-token" }, + }, + mode: "default", + temperature: 0.7, + toolApprovalLevel: "auto", + user: { id: "user-1", email: "u@example.com" }, + organizationId: "org-1", + virtualMcp: { + id: "agent-1", + } as unknown as HarnessStreamInput["virtualMcp"], + agent: { id: "agent-1" }, + signal: ctrl.signal, + ...overrides, + } as HarnessStreamInput; +} + +describe("parseSSEStream", () => { + it("yields UIMessageChunks from a happy-path stream", async () => { + const sseBody = eventsToSSEBody(fixtures.FIXTURE_SSE_HAPPY_PATH); + const out: unknown[] = []; + for await (const chunk of parseSSEStream(bodyFromString(sseBody))) { + out.push(chunk); + } + // happy-path fixture has 4 ui-message-chunk events + 1 done. + // parser yields only the chunks and returns on `done`. + expect(out.length).toBe(4); + }); + + it("throws on an error event after yielding earlier chunks", async () => { + const sseBody = eventsToSSEBody(fixtures.FIXTURE_SSE_HARNESS_CRASH); + const out: unknown[] = []; + let caught: Error | null = null; + try { + for await (const chunk of parseSSEStream(bodyFromString(sseBody))) { + out.push(chunk); + } + } catch (e) { + caught = e as Error; + } + expect(caught).not.toBeNull(); + expect(caught!.message).toContain("harness_crashed"); + // the start chunk arrived before the error + expect(out.length).toBe(1); + }); + + it("tolerates events split across read chunks", async () => { + // Manually construct a stream that emits the SSE bytes in two + // chunks, splitting in the middle of an event. The parser must + // buffer until the next \n\n boundary. + const full = eventsToSSEBody(fixtures.FIXTURE_SSE_HAPPY_PATH); + const half = Math.floor(full.length / 2); + const enc = new TextEncoder(); + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(enc.encode(full.slice(0, half))); + controller.enqueue(enc.encode(full.slice(half))); + controller.close(); + }, + }); + const out: unknown[] = []; + for await (const chunk of parseSSEStream(stream)) { + out.push(chunk); + } + expect(out.length).toBe(4); + }); + + it("ignores unknown event types (forward-compat)", async () => { + const body = [ + `data: ${JSON.stringify({ type: "ping" })}\n\n`, + `data: ${JSON.stringify({ type: "ui-message-chunk", chunk: { type: "start", id: "m1" } })}\n\n`, + `data: ${JSON.stringify({ type: "done" })}\n\n`, + ].join(""); + const out: unknown[] = []; + for await (const chunk of parseSSEStream(bodyFromString(body))) { + out.push(chunk); + } + expect(out.length).toBe(1); + }); +}); + +describe("remoteDispatch", () => { + const link = { + tunnelUrl: "https://link-x.example.com", + linkSecret: "secret-for-tests-padding-padding-padding", + }; + + it("posts to the right URL with a valid HMAC signature", async () => { + let captured: { + url: string; + method: string; + headers: Record; + body: string; + } | null = null; + + const fetchImpl = (async ( + url: string | URL | Request, + init?: RequestInit, + ): Promise => { + const headers: Record = {}; + const hdrs = new Headers(init?.headers ?? {}); + hdrs.forEach((v, k) => { + headers[k] = v; + }); + captured = { + url: typeof url === "string" ? url : url.toString(), + method: init?.method ?? "GET", + headers, + body: (init?.body as string) ?? "", + }; + // Reply with the happy-path SSE body. + return new Response(eventsToSSEBody(fixtures.FIXTURE_SSE_HAPPY_PATH), { + status: 200, + headers: { "content-type": "text/event-stream" }, + }); + }) as typeof fetch; + + const input = makeInput({ runId: "run-abc" }); + // Per-daemon tunnel URL (what the link's POST /api/sandboxes returns) + // — the cluster dispatches directly against the daemon, no link hop. + const sandboxUrl = `https://${input.runId}.deco.host`; + const out: unknown[] = []; + for await (const chunk of remoteDispatch( + "claude-code", + input, + link, + sandboxUrl, + { fetchImpl }, + )) { + out.push(chunk); + } + expect(out.length).toBe(4); + + expect(captured).not.toBeNull(); + const cap = captured!; + const expectedPath = "/_decopilot_vm/dispatch"; + expect(cap.url).toBe(`${sandboxUrl}${expectedPath}`); + expect(cap.method).toBe("POST"); + + // Verify HMAC headers round-trip through verifyRequest with the + // same secret. This is the same check the daemon does on the + // receiving end, so a green test here proves the wire contract. + const v = verifyRequest({ + secret: link.linkSecret, + method: cap.method, + path: expectedPath, + body: cap.body, + headers: cap.headers, + seenNonce: () => false, + }); + expect(v.valid).toBe(true); + + // Body shape: harnessId + wireInput; the AbortSignal must be + // stripped (otherwise JSON.stringify would have crashed before we + // ever got here). + const parsedBody = JSON.parse(cap.body) as { + harnessId: string; + input: Record; + }; + expect(parsedBody.harnessId).toBe("claude-code"); + expect(parsedBody.input.runId).toBe("run-abc"); + expect("signal" in parsedBody.input).toBe(false); + expect("processLocal" in parsedBody.input).toBe(false); + + // No Authorization header — HMAC alone authenticates (Task 5.1 + // decision; bearer would leak the signing key in HTTP logs). + expect(cap.headers["authorization"]).toBeUndefined(); + }); + + it("fires a HMAC-signed DELETE on consumer abort", async () => { + const calls: Array<{ + url: string; + method: string; + headers: Record; + }> = []; + + // The dispatch fetch returns a stream we never drain past the + // first chunk; we abort the consumer signal and exit the loop. + const fetchImpl = (async ( + url: string | URL | Request, + init?: RequestInit, + ): Promise => { + const headers: Record = {}; + const hdrs = new Headers(init?.headers ?? {}); + hdrs.forEach((v, k) => { + headers[k] = v; + }); + calls.push({ + url: typeof url === "string" ? url : url.toString(), + method: init?.method ?? "GET", + headers, + }); + if (init?.method === "POST") { + // SSE body with multiple chunks; the consumer will abort + // after the first one. + const sseBody = eventsToSSEBody(fixtures.FIXTURE_SSE_HAPPY_PATH); + return new Response(sseBody, { + status: 200, + headers: { "content-type": "text/event-stream" }, + }); + } + // DELETE + return new Response(null, { status: 204 }); + }) as typeof fetch; + + const ctrl = new AbortController(); + const input = makeInput({ runId: "run-xyz", signal: ctrl.signal }); + const sandboxUrl = `https://${input.runId}.deco.host`; + const out: unknown[] = []; + let firstChunkSeen = false; + try { + for await (const chunk of remoteDispatch( + "codex", + input, + link, + sandboxUrl, + { fetchImpl }, + )) { + out.push(chunk); + if (!firstChunkSeen) { + firstChunkSeen = true; + ctrl.abort(); + break; + } + } + } catch { + // The DELETE fires from `finally`, so we tolerate any error + // raised by abort-during-read here. + } + + // Give the unawaited cancel-fetch a tick to land in `calls`. + await new Promise((r) => setTimeout(r, 0)); + + const deleteCall = calls.find((c) => c.method === "DELETE"); + expect(deleteCall).toBeDefined(); + const expectedCancelPath = "/_decopilot_vm/runs/run-xyz"; + expect(deleteCall!.url).toBe(`${sandboxUrl}${expectedCancelPath}`); + + // HMAC signature on the DELETE verifies against the same secret. + const v = verifyRequest({ + secret: link.linkSecret, + method: "DELETE", + path: expectedCancelPath, + body: "", + headers: deleteCall!.headers, + seenNonce: () => false, + }); + expect(v.valid).toBe(true); + }); + + it("throws when the daemon returns a non-2xx response", async () => { + const fetchImpl = (async ( + _url: string | URL | Request, + _init?: RequestInit, + ): Promise => { + return new Response("nope", { status: 502, statusText: "Bad Gateway" }); + }) as typeof fetch; + + const input = makeInput(); + const sandboxUrl = `https://${input.runId}.deco.host`; + let caught: Error | null = null; + try { + for await (const _ of remoteDispatch( + "claude-code", + input, + link, + sandboxUrl, + { fetchImpl }, + )) { + // consume + } + } catch (e) { + caught = e as Error; + } + expect(caught).not.toBeNull(); + expect(caught!.message).toContain("502"); + }); +}); diff --git a/apps/mesh/src/harnesses/remote-dispatch.ts b/apps/mesh/src/harnesses/remote-dispatch.ts new file mode 100644 index 0000000000..6619d27be3 --- /dev/null +++ b/apps/mesh/src/harnesses/remote-dispatch.ts @@ -0,0 +1,290 @@ +/** + * Remote Dispatch. + * + * The cluster-side counterpart to `localDispatch` for runs that resolve + * to a `remote-cli` target — the entire harness stream is delegated to + * the user's link daemon over the registered tunnel URL. The cluster + * keeps producing UI chunks back to the chat client by tailing the + * daemon's SSE response and re-emitting `UIMessageChunk`s. + * + * Wire shape (per `harnessStreamInputSchema.strip()`): non-serializable + * fields (`signal`, `processLocal`) are stripped before signing. The + * body is `{ harnessId, input: wireInput }` JSON; HMAC headers + * authenticate (no Bearer header — sending the signing key plaintext in + * a header would defeat the point of HMAC, see Task 5.1). + * + * URL: `/_decopilot_vm/dispatch` — the per-daemon tunnel URL + * (`https://.deco.host` in prod, `http://127.0.0.1:` in dev + * `--no-tunnel`) returned by the link's `POST /api/sandboxes`. The cluster + * talks to the daemon directly; the link no longer reverse-proxies + * `/_sandbox//*` (that route was deleted with the per-daemon-tunnel + * migration). + * + * Handle vs runId: for remote-cli (whole-harness dispatch), the laptop + * spins up an ephemeral per-run sandbox; the handle == `input.runId`. + * This differs from the remote-user provider path (Task 5.1), where the + * handle is `computeHandle(sandboxId, branch)` because the sandbox is + * long-lived and shared across runs. The daemon's cancel route + * (`/_decopilot_vm/runs/`) confirms this — it matches on runId + * directly. + * + * Cancellation: on consumer abort, fires a DELETE to the runs endpoint + * independent of SSE close so the laptop can abort its CLI subprocess + * promptly even if the response stream is still draining. + */ +import type { UIMessageChunk } from "ai"; +import { + signRequest, + dispatchSSEEventSchema, + type LinkEntry, +} from "../links/protocol"; +import type { HarnessId, HarnessStreamInput } from "./types"; + +/** + * Parse the daemon's SSE response into a stream of UIMessageChunks. + * + * Buffers incoming bytes, splits on the `\n\n` event boundary, and + * decodes each event from one or more `data: …` lines (joined by + * `\n`). Events are validated through `dispatchSSEEventSchema`: + * + * - `ui-message-chunk` → yields the chunk + * - `error` → throws (carrying code + message) + * - `done` → returns + * - anything else → silently skipped (forward-compat) + * + * Unparseable JSON or schema-mismatched events are also skipped — the + * parser is permissive on purpose. The daemon controls both ends of the + * wire, so the failure mode that matters is a future protocol bump + * adding a new event type; we don't want old clients to crash. + */ +export async function* parseSSEStream( + body: ReadableStream, +): AsyncIterable { + const decoder = new TextDecoder(); + let buffer = ""; + const reader = body.getReader(); + try { + while (true) { + const { value, done } = await reader.read(); + if (done) { + // Flush any trailing bytes that didn't include the final \n\n. + // Standards-compliant SSE producers always end with the + // separator, but we'd rather be lenient than drop a final event + // on a chunk boundary. + buffer += decoder.decode(); + const trailing = buffer.trim(); + if (trailing.length > 0) { + for (const ev of extractEvents(trailing + "\n\n")) { + yield* handleEvent(ev); + } + } + return; + } + buffer += decoder.decode(value, { stream: true }); + + let separatorIndex = buffer.indexOf("\n\n"); + while (separatorIndex !== -1) { + const eventBlock = buffer.slice(0, separatorIndex); + buffer = buffer.slice(separatorIndex + 2); + separatorIndex = buffer.indexOf("\n\n"); + + for (const ev of extractEvents(eventBlock + "\n\n")) { + yield* handleEvent(ev); + } + } + } + } finally { + reader.releaseLock(); + } +} + +/** + * Split a single SSE event block into `data:`-only payloads. One event + * may carry multiple `data:` lines — per the SSE spec they get joined + * with `\n` to form the final payload. Other line types (`event:`, + * `id:`, comments starting with `:`) are ignored because the daemon + * doesn't emit them today. + */ +function extractEvents(block: string): string[] { + const events: string[] = []; + const dataLines = block + .split("\n") + .filter((l) => l.startsWith("data: ")) + .map((l) => l.slice("data: ".length)); + if (dataLines.length > 0) { + events.push(dataLines.join("\n")); + } + return events; +} + +async function* handleEvent(dataJson: string): AsyncIterable { + let parsedJson: unknown; + try { + parsedJson = JSON.parse(dataJson); + } catch { + return; // ignore garbage + } + const parsed = dispatchSSEEventSchema.safeParse(parsedJson); + if (!parsed.success) return; // ignore unknown event shapes (forward-compat) + + if (parsed.data.type === "ui-message-chunk") { + yield parsed.data.chunk as UIMessageChunk; + } else if (parsed.data.type === "error") { + throw new Error( + `[remoteDispatch] ${parsed.data.code}: ${parsed.data.message}`, + ); + } else if (parsed.data.type === "done") { + // Signal end of stream by returning. The caller's for-await loop + // breaks when the generator returns. + return; + } +} + +export interface RemoteDispatchDeps { + fetchImpl?: typeof fetch; +} + +/** + * Subset of `LinkEntry` actually needed by `remoteDispatch`. Accepting + * the smaller shape makes test fakes cheaper to construct. + */ +export type RemoteDispatchLink = Pick; + +/** + * Ensure the laptop link has a sandbox registered at `handle` before the + * cluster fires `remoteDispatch`. The link's `POST /api/sandboxes` + * spawns (or reuses) a daemon for `handle` and returns the daemon's + * `sandboxUrl` — a per-daemon tunnel (`https://.deco.host` in + * prod, `http://127.0.0.1:` in dev `--no-tunnel`). The cluster + * then talks to the daemon DIRECTLY at that URL; the link no longer + * reverse-proxies `/_sandbox//*` (that route was deleted with + * the per-daemon-tunnel migration). + * + * Used by `remote-cli` dispatch where the handle equals `input.runId` — + * a fresh, ephemeral per-run sandbox. We do NOT go through the + * `remote-user` SandboxProvider here because that provider derives its + * handle via `computeHandle(sandboxId, branch)`, which would diverge + * from the runId-keyed flow `remoteDispatch` uses. + * + * No `repo` is sent: ephemeral CLI runs operate on an empty workdir + * the link auto-creates at `/.deco/link/sandboxes//`. + * + * Idempotent on the link side — repeated POSTs with the same handle + * return the existing sandbox unchanged. + */ +export async function ensureRemoteCliSandbox( + link: RemoteDispatchLink, + handle: string, + deps: RemoteDispatchDeps = {}, +): Promise<{ sandboxUrl: string }> { + const fetcher = deps.fetchImpl ?? fetch; + const path = "/api/sandboxes"; + const body = JSON.stringify({ handle }); + const sigHeaders = signRequest({ + secret: link.linkSecret, + method: "POST", + path, + body, + }); + const res = await fetcher(`${link.tunnelUrl}${path}`, { + method: "POST", + body, + headers: { + ...sigHeaders, + "Content-Type": "application/json", + }, + }); + if (!res.ok) { + let detail = ""; + try { + const text = await res.text(); + detail = text.length > 200 ? `${text.slice(0, 200)}…` : text; + } catch { + // ignore + } + throw new Error( + `ensureRemoteCliSandbox HTTP ${res.status}${detail ? ` ${detail}` : ""}`, + ); + } + const parsed = (await res.json()) as { sandboxUrl?: unknown }; + if (typeof parsed.sandboxUrl !== "string") { + throw new Error( + "ensureRemoteCliSandbox: link did not return a sandboxUrl string", + ); + } + return { sandboxUrl: parsed.sandboxUrl }; +} + +export function remoteDispatch( + id: HarnessId, + input: HarnessStreamInput, + link: RemoteDispatchLink, + sandboxUrl: string, + deps: RemoteDispatchDeps = {}, +): AsyncIterable { + const fetcher = deps.fetchImpl ?? fetch; + // The cluster talks to the daemon directly via its per-handle tunnel + // URL — no more link reverse-proxy hop. `link.linkSecret` is still the + // HMAC signing key (the daemon shares it via DAEMON_LINK_SECRET, Task 1). + // HMAC signs against the URL's pathname only (not the full URL); the + // daemon verifies against `url.pathname` on its end. + const dispatchTarget = `${sandboxUrl}/_decopilot_vm/dispatch`; + const cancelTarget = `${sandboxUrl}/_decopilot_vm/runs/${input.runId}`; + const dispatchPath = new URL(dispatchTarget).pathname; + const cancelPath = new URL(cancelTarget).pathname; + + // Strip the two non-serializable fields. `harnessStreamInputSchema` + // uses `.strip()` so the daemon-side parser would drop these anyway, + // but stripping client-side keeps the body deterministic for HMAC + // signing (otherwise an `AbortSignal` would crash JSON.stringify). + const { signal, processLocal: _processLocal, ...wireInput } = input; + + return { + async *[Symbol.asyncIterator]() { + const bodyString = JSON.stringify({ harnessId: id, input: wireInput }); + const sigHeaders = signRequest({ + secret: link.linkSecret, + method: "POST", + path: dispatchPath, + body: bodyString, + }); + const res = await fetcher(dispatchTarget, { + method: "POST", + body: bodyString, + headers: { + ...sigHeaders, + "Content-Type": "application/json", + Accept: "text/event-stream", + }, + signal, + }); + if (!res.ok || !res.body) { + throw new Error( + `remoteDispatch HTTP ${res.status}${res.statusText ? ` ${res.statusText}` : ""}`, + ); + } + + try { + yield* parseSSEStream(res.body); + } finally { + // Fire a cancel RPC on consumer abort so the daemon can abort + // its CLI subprocess promptly (independent of SSE close). + // Best-effort: we don't await the response, and we swallow + // errors — by the time we're in `finally` the consumer has + // already moved on. + if (signal?.aborted) { + const cancelSig = signRequest({ + secret: link.linkSecret, + method: "DELETE", + path: cancelPath, + body: "", + }); + fetcher(cancelTarget, { + method: "DELETE", + headers: { ...cancelSig }, + }).catch(() => {}); + } + } + }, + }; +} diff --git a/apps/mesh/src/harnesses/types.ts b/apps/mesh/src/harnesses/types.ts index 931dc55ab4..54e5bd4d65 100644 --- a/apps/mesh/src/harnesses/types.ts +++ b/apps/mesh/src/harnesses/types.ts @@ -1,26 +1,70 @@ -import type { UIMessageChunk, UIMessageStreamWriter } from "ai"; -import type { VirtualMCPEntity } from "@decocms/mesh-sdk"; -import type { ChatMessage, ModelsConfig } from "../api/routes/decopilot/types"; -import type { ChatMode } from "../api/routes/decopilot/mode-config"; -import type { ToolApprovalLevel } from "../api/routes/decopilot/helpers"; +import type { UIMessage, UIMessageChunk, UIMessageStreamWriter } from "ai"; + +/** + * Harness types — minimal definitions shared by every harness. + * + * These types intentionally avoid importing from cluster-only paths + * (`@/core/*`, `@/storage/*`, `@/api/*`, etc.) so this file stays portable + * into the laptop daemon's bundle. The cluster-side shapes (`ChatMessage` + * with metadata + tools, full `MeshContext`, decopilot-only + * `HarnessProcessLocal` internals) flow in via structural compatibility: + * the cluster passes its richer types where the harness expects a + * UIMessage / HarnessContext / unknown-extras-bag, and TS accepts the + * widening. + */ /** Built-in harness identifiers. Open-ended on purpose — third-party harnesses * may register additional ids later, but the v1 union covers what's in-tree. */ export type HarnessId = "decopilot" | "claude-code" | "codex"; +/** Tool approval policy a harness should honor when forwarding to its CLI. + * Mirrors `apps/mesh/src/api/routes/decopilot/helpers.ts:ToolApprovalLevel`. */ +export type ToolApprovalLevel = "auto" | "readonly"; + +/** Mode flag forwarded into harnesses. The CLI harnesses only care about + * "plan" (sets `isPlanMode` for read-only restrictions); decopilot + * interprets the rest internally. Mirrors + * `apps/mesh/src/api/routes/decopilot/mode-config.ts:CHAT_MODES`. */ +export type ChatMode = "default" | "plan" | "web-search" | "gen-image"; + +/** Per-model selection passed in the wire input. Cluster has a richer + * `ModelInfo` (capabilities, etc.); the package mirrors the minimal + * fields the harness package itself reads, plus `limits` which the + * cluster's decopilot run-stream reads for `maxOutputTokens`. */ +export interface ModelSelection { + id: string; + title?: string; + provider?: string | null; + limits?: { contextWindow?: number; maxOutputTokens?: number }; +} + +export interface ModelsConfig { + credentialId: string; + thinking: ModelSelection; + coding?: ModelSelection; + fast?: ModelSelection; + image?: ModelSelection; + deepResearch?: ModelSelection; +} + +/** UI-shape message a harness receives. Structurally compatible with the + * cluster's richer `ChatMessage` (which carries Metadata + builtin tool + * types). The package only needs the `parts` + `role` + `id` shape, which + * the AI SDK's generic `UIMessage` already provides. */ +export type ChatMessage = UIMessage; + /** In-process-only extras that don't survive remote-dispatch serialization. * * Decopilot consumes these; CLI harnesses ignore them entirely. The fields - * are produced by `prepareRun`'s outer scope (the `createUIMessageStream` - * execute callback's `writer`, the `RunRegistry` instance, etc.) and - * forwarded into the decopilot harness so the streamText loop can wire - * into the surrounding request-level state. + * are produced by `prepareRun`'s outer scope on the cluster side (the + * `createUIMessageStream` execute callback's `writer`, the `RunRegistry` + * instance, etc.) and forwarded into the decopilot harness so the + * streamText loop can wire into the surrounding request-level state. * - * In a future remote-dispatch pass this field is stripped before the - * payload crosses the wire — the in-process extras simply have no remote - * equivalent (the writer's UI message stream layer would live on the - * caller side). The decopilot harness MUST refuse to run without it; the - * CLI harnesses MUST NOT depend on any of these fields. */ + * Strongly-typed members live on the cluster side. To keep the package + * portable, we type the structurally-deep cluster fields as `unknown` and + * let the cluster cast at the call site. CLI harnesses MUST NOT depend on + * any of these fields. */ export interface HarnessProcessLocal { /** UI message stream writer from the surrounding * `createUIMessageStream({ execute: ({ writer }) => ... })`. Built-in @@ -38,11 +82,8 @@ export interface HarnessProcessLocal { * execution. Mutated in place by the built-in tool and by * `prepareStep` inside `runDecopilotStream` (which splices images out * to embed in the next user message). MUST be the same array - * reference passed to `assembleDecopilotTools` and `runDecopilotStream` - * — otherwise the screenshot tool writes to one array and - * `prepareStep` reads from another, and the images never reach the - * model. */ - pendingImages: import("./decopilot/built-in-tools/take-screenshot").PendingImage[]; + * reference passed to `assembleDecopilotTools` and `runDecopilotStream`. */ + pendingImages: unknown[]; /** Thread id (equals `mem.thread.id`). Also lives on * `HarnessStreamInput.threadId`; kept duplicated here so the harness @@ -51,11 +92,7 @@ export interface HarnessProcessLocal { /** Initial value of `mem.thread.title` at request entry. Title * generation only kicks off when this equals `DEFAULT_THREAD_TITLE` - * ("New chat") — the convention for an unrenamed thread. - * - * Identical to `HarnessStreamInput.currentThreadTitle` in well-formed - * callers; the duplication exists because the surrounding stream-core - * code loads the title from the `Memory` object today. */ + * ("New chat") — the convention for an unrenamed thread. */ currentThreadTitle: string; /** Run-registry abort signal for this run. Listened to by streamText @@ -66,14 +103,15 @@ export interface HarnessProcessLocal { /** The run-registry itself, used by `streamText.onFinish` to dispatch * a deferred `FINISH` event when the HTTP consumer cut early but the - * model has now actually completed server-side. */ - runRegistry: import("../api/routes/decopilot/run-registry").RunRegistry; + * model has now actually completed server-side. Cluster-only type; + * the package treats it as opaque. */ + runRegistry: unknown; /** Already-activated MeshProvider — the caller has resolved the * credential id to a key/headers and called `ctx.aiProviders.activate` * before invoking us. The decopilot harness rejects `null`; CLI - * harnesses don't read this field. */ - provider: import("../ai-providers/types").MeshProvider | null; + * harnesses don't read this field. Cluster-only type. */ + provider: unknown | null; /** Push callback for title-generation work. The streamText loop * registers `titleHandle.promise.then(...)` as a pending op so the @@ -102,6 +140,12 @@ export interface HarnessProcessLocal { * Optional — callers that cannot supply sseHub (e.g. orphan-recovery * path without a buffer) may omit it; the omission is safe and silent. */ onTitleUpdated?: (title: string) => void | Promise; + + /** Cluster-side cwd resolver for github-linked agents. Returns the + * per-branch sandbox workdir; CLI harnesses use this when running + * in-cluster. When undefined (or returns undefined), the harness + * falls back to `process.cwd()`. */ + resolveCwd?: () => Promise; } /** Input passed to every Harness.stream() call. Fully serializable except @@ -121,10 +165,16 @@ export interface HarnessStreamInput { models: ModelsConfig; // ===== Tool gateway ===== - /** MCP endpoint the harness should connect to. Today this is the in-process - * `getInternalUrl()/mcp/virtual-mcp/`; in a future remote-dispatch - * pass it will be the public mesh URL. The Bearer token is a 1h-TTL temp key. */ - mcp: { url: string; headers: Record }; + /** MCP endpoint the harness should connect to. In-process (decopilot, + * cluster-side claude-code/codex) uses `getInternalUrl()/mcp/virtual-mcp/`; + * remote-cli (laptop daemon dispatch) uses the cluster's public URL. + * The Bearer token is a 1h-TTL temp key — `expiresAt` carries its + * absolute deadline so remote daemons can refresh proactively. */ + mcp: { + url: string; + headers: Record; + expiresAt: number; + }; // ===== Mode (forwarded; each harness interprets independently) ===== mode: ChatMode; @@ -140,8 +190,10 @@ export interface HarnessStreamInput { projectSlug?: string; /** Loaded VirtualMcp entity (the agent definition). Decopilot reads metadata, - * connection list, and github-repo info from this; CLI harnesses use only `id`. */ - virtualMcp: VirtualMCPEntity; + * connection list, and github-repo info from this; CLI harnesses use only `id`. + * Typed as a permissive bag in the package — the cluster passes its richer + * `VirtualMCPEntity` shape and TS accepts the widening. */ + virtualMcp: { id: string; metadata?: unknown; [k: string]: unknown }; /** Convenience: same as `virtualMcp.id`. Kept separate to avoid forcing CLI * harnesses to destructure the full entity. */ agent: { id: string }; @@ -183,12 +235,42 @@ export interface Harness { stream(input: HarnessStreamInput): AsyncIterable; } -/** A factory binds in-process dependencies (MeshContext) into a Harness +/** Narrow context interface every Harness factory takes. Cluster-specific + * surface (DB, vault, auth, MCP gateway internals) lives on the wider + * MeshContext and is only safe to read inside a harness that gates its + * cluster-side code path on `HarnessStreamInput.processLocal` (today, + * only decopilot does this). + * + * The laptop's daemon constructs a HarnessContext directly to invoke + * `claudeCodeHarnessFactory.create()` / `codexHarnessFactory.create()` + * without depending on cluster-only modules. + * + * Re-declared here (mirroring `apps/mesh/src/core/harness-context.ts`) so + * the package stays portable. The cluster's richer `MeshContext` is + * structurally assignable to this shape. */ +export interface HarnessContext { + tracer: import("@opentelemetry/api").Tracer; + meter: import("@opentelemetry/api").Meter; + metadata: { + threadId?: string; + orgId?: string; + userId?: string; + }; + /** Optional — only decopilot uses this; CLI harnesses never read it. */ + aiProviders?: { + activate( + credentialId: string, + organizationId: string, + ): Promise; + }; +} + +/** A factory binds in-process dependencies (HarnessContext) into a Harness * instance. The registry stores factories rather than singletons because * the harnesses need per-request access to storage, providers, and tracing * via `ctx`. Keeping ctx out of `HarnessStreamInput` means the input shape * stays serializable for a future remote transport. */ export interface HarnessFactory { id: HarnessId; - create(ctx: import("../core/mesh-context").MeshContext): Harness; + create(ctx: HarnessContext): Harness; } diff --git a/apps/mesh/src/harnesses/usage-accumulator.ts b/apps/mesh/src/harnesses/usage-accumulator.ts index 61c3e59eae..29d868986b 100644 --- a/apps/mesh/src/harnesses/usage-accumulator.ts +++ b/apps/mesh/src/harnesses/usage-accumulator.ts @@ -37,7 +37,38 @@ import { sanitizeProviderMetadata } from "@decocms/mesh-sdk"; import type { LanguageModelUsage } from "ai"; -import { addCacheStep } from "../api/routes/decopilot/cache-instrumentation"; + +/** + * Per-step cache-token accumulator. Mirrors + * `apps/mesh/src/api/routes/decopilot/cache-instrumentation.ts:addCacheStep` — + * inlined here so the package stays free of cluster-side dependencies. + * Both implementations must stay in sync; the cluster's version is also + * the source of truth for OTel attribute emission on the dispatch-run + * path. + */ +interface CacheAccumulator { + read: number; + write: number; + input: number; +} + +interface CacheStepUsage { + inputTokens?: number; + inputTokenDetails?: { + cacheReadTokens?: number; + cacheWriteTokens?: number; + }; +} + +function addCacheStep( + acc: CacheAccumulator, + usage: CacheStepUsage | undefined, +): void { + if (!usage) return; + acc.read += usage.inputTokenDetails?.cacheReadTokens ?? 0; + acc.write += usage.inputTokenDetails?.cacheWriteTokens ?? 0; + acc.input += usage.inputTokens ?? 0; +} // ───────────────────────────────────────────────────────────────────── // Types diff --git a/apps/mesh/src/index.ts b/apps/mesh/src/index.ts index 6940425206..40c26885e5 100644 --- a/apps/mesh/src/index.ts +++ b/apps/mesh/src/index.ts @@ -114,48 +114,48 @@ const previewProxyDeps = { if (!runner || runner.kind !== "agent-sandbox") return null; // The agent-sandbox runner is the only one that exposes proxyPreviewRequest / // resolvePreviewUpstreamUrl; cast is safe after the kind check. - return runner as unknown as import("@decocms/sandbox/runner/agent-sandbox").AgentSandboxRunner; + return runner as unknown as import("@decocms/sandbox/provider/agent-sandbox").AgentSandboxProvider; }, }; -// Boot/dev wiring for local runners (docker + host). The boot sweep is -// Docker-only — host runner's rehydrate() probes /health and discards dead -// state on its own. The local ingress is shared by both runners. -const { resolveRunnerKindFromEnv } = await import("@decocms/sandbox/runner"); -const sandboxRunnerKind = resolveRunnerKindFromEnv(); -const ingressEligible = - sandboxRunnerKind === "docker" || sandboxRunnerKind === "host"; +// Boot/dev wiring for the Docker runner. The boot sweep + local ingress +// are Docker-only — other runners (freestyle, agent-sandbox, remote-user) +// either don't run on this machine or expose previews via their own +// publicly-reachable URLs. +const { resolveSandboxProviderKindFromEnv } = await import( + "@decocms/sandbox/provider" +); +const sandboxProviderKind = resolveSandboxProviderKindFromEnv(); +const ingressEligible = sandboxProviderKind === "docker"; if (ingressEligible) { - const { startLocalSandboxIngress } = await import("@decocms/sandbox/runner"); - const { getSharedRunnerIfInit, getOrInitSharedRunner } = await import( - "./sandbox/lifecycle" + const { startLocalSandboxIngress } = await import( + "@decocms/sandbox/provider" ); + const { getSharedSandboxProviderIfInit, getOrInitSharedRunner } = + await import("./sandbox/lifecycle"); // Boot sweep (best-effort). Shutdown cleanup can't cover crashes — // SIGTERM races with the parent killing postgres — so the boot sweep is // what actually keeps `docker ps` empty between sessions. - // Host runner's rehydrate() probes /health and discards dead state on its own. - if (sandboxRunnerKind === "docker") { - const { sweepDockerOrphansOnBoot } = await import( - "@decocms/sandbox/runner" - ); - await sweepDockerOrphansOnBoot(); - } + const { sweepDockerOrphansOnBoot } = await import( + "@decocms/sandbox/provider" + ); + await sweepDockerOrphansOnBoot(); // Port 7070 default: macOS AirPlay Receiver owns `*:7000` on v4+v6, so a // Chrome Happy-Eyeballs race would hit Apple. The ingress is part of the - // host/docker runner contract — those runners only expose user dev servers - // through `.localhost:7070`, so the gate is the runner kind, not + // Docker runner contract — Docker exposes user dev servers through + // `.localhost:7070`, so the gate is the runner kind, not // NODE_ENV. Set `SANDBOX_INGRESS_PORT=0` to skip binding entirely. const ingressPort = Number(process.env.SANDBOX_INGRESS_PORT ?? 7070); if (ingressPort > 0) { ingressServers = startLocalSandboxIngress(() => { - const r = getSharedRunnerIfInit(); + const r = getSharedSandboxProviderIfInit(); if (!r) return null; - if (r.kind !== "docker" && r.kind !== "host") return null; - // Both DockerSandboxRunner and HostSandboxRunner expose - // resolveDaemonPort; the structural cast is safe after the kind check. + if (r.kind !== "docker") return null; + // DockerSandboxProvider exposes resolveDaemonPort; the structural + // cast is safe after the kind check. return r as unknown as { resolveDaemonPort(handle: string): Promise; }; @@ -254,6 +254,35 @@ if (settings.localMode) { try { const seeded = await seedLocalMode(); void seeded; + // When the cluster is in dev mode (MESH_ALLOW_LOCALHOST_LINKS=1 + // set by `bun run dev`), bootstrap an API-key-backed session for + // the laptop-side link binary that `bun run dev` auto-spawns. + // The link reads it from `/dev-link/session.json` and + // presents the API key as a Bearer token to POST /api/links. + if (process.env.MESH_ALLOW_LOCALHOST_LINKS === "1") { + try { + const { bootstrapDevLinkSession } = await import( + "./auth/dev-link-session" + ); + const clusterBaseUrl = + settings.baseUrl ?? `http://localhost:${settings.port}`; + const result = await bootstrapDevLinkSession( + settings.dataDir, + clusterBaseUrl, + ); + if (result) { + console.log( + `[dev-link] session ready at ${result.path} (userSub=${result.userSub})`, + ); + } else { + console.warn( + "[dev-link] no admin user yet — skipping session bootstrap. The auto-spawned link will refuse to start until an admin exists.", + ); + } + } catch (err) { + console.error("[dev-link] bootstrap failed:", err); + } + } } catch (error) { console.error("Failed to seed local mode:", error); } finally { diff --git a/apps/mesh/src/link-daemon/capabilities.test.ts b/apps/mesh/src/link-daemon/capabilities.test.ts new file mode 100644 index 0000000000..e85e095c6e --- /dev/null +++ b/apps/mesh/src/link-daemon/capabilities.test.ts @@ -0,0 +1,48 @@ +import { describe, expect, it } from "bun:test"; +import { detectCapabilities } from "./capabilities"; + +describe("detectCapabilities", () => { + it("always includes decopilot-sandbox", async () => { + const caps = await detectCapabilities({ + detectClaudeCode: async () => false, + detectCodex: async () => false, + }); + expect(caps).toEqual(["decopilot-sandbox"]); + }); + + it("includes claude-code when probe succeeds", async () => { + const caps = await detectCapabilities({ + detectClaudeCode: async () => true, + detectCodex: async () => false, + }); + expect(caps).toEqual(["decopilot-sandbox", "claude-code"]); + }); + + it("includes codex when probe succeeds", async () => { + const caps = await detectCapabilities({ + detectClaudeCode: async () => false, + detectCodex: async () => true, + }); + expect(caps).toEqual(["decopilot-sandbox", "codex"]); + }); + + it("includes both when both probes succeed", async () => { + const caps = await detectCapabilities({ + detectClaudeCode: async () => true, + detectCodex: async () => true, + }); + expect(caps).toEqual(["decopilot-sandbox", "claude-code", "codex"]); + }); + + it("treats throwing probes as false", async () => { + const caps = await detectCapabilities({ + detectClaudeCode: async () => { + throw new Error("not found"); + }, + detectCodex: async () => { + throw new Error("oops"); + }, + }); + expect(caps).toEqual(["decopilot-sandbox"]); + }); +}); diff --git a/apps/mesh/src/link-daemon/capabilities.ts b/apps/mesh/src/link-daemon/capabilities.ts new file mode 100644 index 0000000000..1d54d9a3b9 --- /dev/null +++ b/apps/mesh/src/link-daemon/capabilities.ts @@ -0,0 +1,59 @@ +import type { Capability } from "@/links/protocol"; + +/** + * Detected once at daemon startup. The result rides the existing + * `capabilities: Capability[]` field on the registration payload, so the + * cluster sees an accurate view of what this laptop can actually run. + * + * `decopilot-sandbox` is unconditional — the daemon process IS the sandbox + * host, so it can always serve that capability. + */ +export interface CapabilityProbes { + detectClaudeCode: () => Promise; + detectCodex: () => Promise; +} + +export async function detectCapabilities( + probes: CapabilityProbes = defaultProbes, +): Promise { + const caps: Capability[] = ["decopilot-sandbox"]; + const [hasClaudeCode, hasCodex] = await Promise.all([ + probes.detectClaudeCode().catch(() => false), + probes.detectCodex().catch(() => false), + ]); + if (hasClaudeCode) caps.push("claude-code"); + if (hasCodex) caps.push("codex"); + return caps; +} + +async function detectClaudeCode(): Promise { + try { + const { query } = await import("@anthropic-ai/claude-agent-sdk"); + const q = query({ prompt: "", options: { maxTurns: 1 } }); + const info = await q.accountInfo(); + q.return(undefined); + return Boolean(info.email); + } catch { + return false; + } +} + +async function detectCodex(): Promise { + try { + const proc = Bun.spawn(["codex", "--version"], { + stdout: "ignore", + stderr: "ignore", + }); + const timeout = setTimeout(() => proc.kill(), 10_000); + const code = await proc.exited; + clearTimeout(timeout); + return code === 0; + } catch { + return false; + } +} + +const defaultProbes: CapabilityProbes = { + detectClaudeCode, + detectCodex, +}; diff --git a/apps/mesh/src/link-daemon/control-plane.test.ts b/apps/mesh/src/link-daemon/control-plane.test.ts new file mode 100644 index 0000000000..bc043b524f --- /dev/null +++ b/apps/mesh/src/link-daemon/control-plane.test.ts @@ -0,0 +1,107 @@ +import { describe, expect, it } from "bun:test"; +import { signRequest } from "@/links/protocol"; +import { makeControlPlaneHandler } from "./control-plane"; +import { createLaptopSandboxProvider } from "./sandbox-provider"; + +const SECRET = "test-link-secret"; + +function buildHandler() { + let portCounter = 30000; + const provider = createLaptopSandboxProvider({ + dataDir: "/tmp/link-cp-test", + spawnDaemon: () => ({ port: 0, kill: () => {} }), + postConfig: async () => {}, + waitForHealth: async () => {}, + openDaemonTunnel: async ({ subDomain }) => ({ + publicUrl: `https://${subDomain}`, + closed: new Promise(() => {}), + close: () => {}, + }), + pickPort: () => portCounter++, + }); + const nonces = new Set(); + const handler = makeControlPlaneHandler({ + provider, + linkSecret: SECRET, + seenNonce: (n) => nonces.has(n), + recordNonce: (n) => nonces.add(n), + }); + return { handler, provider }; +} + +describe("control plane handler", () => { + it("rejects unsigned requests with 401", async () => { + const { handler } = buildHandler(); + const res = await handler( + new Request("http://localhost:5174/api/sandboxes", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ handle: "abc" }), + }), + ); + expect(res.status).toBe(401); + const body = await res.json(); + expect(body).toMatchObject({ error: "unauthorized" }); + }); + + it("accepts signed POST /api/sandboxes and returns sandboxUrl", async () => { + const { handler } = buildHandler(); + const body = JSON.stringify({ handle: "abc" }); + const sig = signRequest({ + secret: SECRET, + method: "POST", + path: "/api/sandboxes", + body, + }); + const res = await handler( + new Request("http://localhost:5174/api/sandboxes", { + method: "POST", + headers: { "content-type": "application/json", ...sig }, + body, + }), + ); + expect(res.status).toBe(200); + const out = (await res.json()) as { sandboxUrl: string }; + expect(out.sandboxUrl).toBe("https://abc.deco.host"); + }); + + it("rejects nonce replay", async () => { + const { handler } = buildHandler(); + const body = JSON.stringify({ handle: "abc" }); + const sig = signRequest({ + secret: SECRET, + method: "POST", + path: "/api/sandboxes", + body, + }); + const make = () => + new Request("http://localhost:5174/api/sandboxes", { + method: "POST", + headers: { "content-type": "application/json", ...sig }, + body, + }); + const first = await handler(make()); + expect(first.status).toBe(200); + const second = await handler(make()); + expect(second.status).toBe(401); + const reason = (await second.json()) as { reason: string }; + expect(reason.reason).toBe("nonce_replay"); + }); + + it("returns 404 for unrecognised paths after auth passes", async () => { + const { handler } = buildHandler(); + const sig = signRequest({ + secret: SECRET, + method: "GET", + path: "/something-else", + body: "", + }); + const res = await handler( + new Request("http://localhost:5174/something-else", { + method: "GET", + headers: { ...sig }, + }), + ); + expect(res.status).toBe(404); + }); +}); diff --git a/apps/mesh/src/link-daemon/control-plane.ts b/apps/mesh/src/link-daemon/control-plane.ts new file mode 100644 index 0000000000..c44229f8b8 --- /dev/null +++ b/apps/mesh/src/link-daemon/control-plane.ts @@ -0,0 +1,104 @@ +/** + * The link daemon's public HTTP surface. Every request is HMAC-verified + * against the link's `linkSecret`. Routes: + * + * POST /api/sandboxes — ensure a sandbox for a handle + * DELETE /api/sandboxes/ — tear it down + * + * Browser preview + cluster→daemon RPCs are NOT served here anymore — + * each daemon has its own warp tunnel (.deco.host) opened by + * the laptop provider at ensureSandbox time. The cluster reads the + * URL from the link's response, persists it in sandbox_runner_state, + * and talks to the daemon directly. + * + * 401 = bad/missing signature; 404 = unknown path / unknown handle. + * + * Nonce-replay protection is delegated to the caller via `seenNonce` / + * `recordNonce` so production can wire up a TTL'd Map without the + * verifier needing direct mutable state. + */ + +import { verifyRequest } from "@/links/protocol"; +import type { LaptopSandboxProvider, RepoRef } from "./sandbox-provider"; + +export interface ControlPlaneDeps { + provider: LaptopSandboxProvider; + linkSecret: string; + seenNonce: (nonce: string) => boolean; + recordNonce: (nonce: string) => void; +} + +interface EnsureSandboxBody { + handle: string; + repo?: RepoRef; +} + +export function makeControlPlaneHandler( + deps: ControlPlaneDeps, +): (req: Request) => Promise { + return async (req: Request): Promise => { + const url = new URL(req.url); + const method = req.method.toUpperCase(); + + const requestBody = + method === "GET" || method === "HEAD" ? "" : await req.text(); + + const verification = verifyRequest({ + secret: deps.linkSecret, + method, + path: url.pathname, + body: requestBody, + headers: Object.fromEntries(req.headers), + seenNonce: deps.seenNonce, + }); + if (!verification.valid) { + return new Response( + JSON.stringify({ error: "unauthorized", reason: verification.reason }), + { + status: 401, + headers: { "content-type": "application/json" }, + }, + ); + } + const nonce = + req.headers.get("x-mesh-nonce") ?? req.headers.get("X-Mesh-Nonce"); + if (nonce) deps.recordNonce(nonce); + + if (url.pathname === "/api/sandboxes" && method === "POST") { + let parsed: EnsureSandboxBody; + try { + parsed = JSON.parse(requestBody) as EnsureSandboxBody; + } catch { + return new Response(JSON.stringify({ error: "invalid_json" }), { + status: 400, + headers: { "content-type": "application/json" }, + }); + } + if (typeof parsed.handle !== "string" || parsed.handle.length === 0) { + return new Response(JSON.stringify({ error: "missing_handle" }), { + status: 400, + headers: { "content-type": "application/json" }, + }); + } + const { sandboxUrl } = await deps.provider.ensureSandbox(parsed); + return new Response(JSON.stringify({ sandboxUrl }), { + status: 200, + headers: { "content-type": "application/json" }, + }); + } + + if (url.pathname.startsWith("/api/sandboxes/") && method === "DELETE") { + const handle = url.pathname.slice("/api/sandboxes/".length); + if (!handle) { + return new Response(JSON.stringify({ error: "missing_handle" }), { + status: 400, + headers: { "content-type": "application/json" }, + }); + } + await deps.provider.deleteSandbox(handle); + return new Response(null, { status: 204 }); + } + + return new Response("not found", { status: 404 }); + }; +} diff --git a/apps/mesh/src/link-daemon/index.ts b/apps/mesh/src/link-daemon/index.ts new file mode 100644 index 0000000000..cf0c1882a3 --- /dev/null +++ b/apps/mesh/src/link-daemon/index.ts @@ -0,0 +1,283 @@ +/** + * Laptop-side link daemon. + * + * Boots a local Bun.serve, opens a Warp tunnel (unless `noTunnel`), + * registers with the cluster's `/api/links` to receive a `linkSecret`, + * then serves the control-plane HMAC handler (sandbox lifecycle + + * reverse proxy). On stop (SIGINT/SIGTERM or `handle.stop()`) it + * gracefully deregisters, kills any sandboxes, and closes the tunnel. + * + * The OAuth session is read from `/.deco/session.json` (the + * format `deco auth login` writes). + */ + +import { randomBytes, randomUUID } from "node:crypto"; +import { + postConfig as daemonPostConfig, + waitForDaemonReady, +} from "@decocms/sandbox/daemon-client"; +import { createDefaultDaemonSpawn } from "@decocms/sandbox/daemon-spawn"; +import { LINK_PROTOCOL_VERSION } from "@/links/protocol"; +import { detectCapabilities } from "./capabilities"; +import { makeControlPlaneHandler } from "./control-plane"; +import { loadOrCreateMachineId } from "./machine-id"; +import { registerWithCluster, startHeartbeatLoop } from "./registration"; +import { + createLaptopSandboxProvider, + type SpawnResult, +} from "./sandbox-provider"; +import { readSession } from "./session"; +import { computeLinkSubDomain, openTunnel } from "./tunnel"; + +export interface StartLinkDaemonOptions { + port: number; + noTunnel: boolean; + clusterBaseUrl: string; + dataDir: string; +} + +export interface LinkDaemonHandle { + /** Resolves with the daemon's exit code when it shuts down. */ + stopped: Promise; + /** Trigger graceful shutdown (equivalent to receiving SIGTERM). */ + stop: () => Promise; +} + +export async function startLinkDaemon( + opts: StartLinkDaemonOptions, +): Promise { + const session = await readSession(opts.dataDir); + if (!session) { + throw new Error( + "No session found. Run `deco auth login` first, then re-run `deco link`.", + ); + } + + const machineId = await loadOrCreateMachineId(opts.dataDir); + const cliVersion = process.env.npm_package_version ?? "0.0.0"; + + let tunnel: { + publicUrl: string; + close: () => void; + closed?: Promise; + } | null = null; + let tunnelBaseUrl: string; + if (opts.noTunnel) { + tunnelBaseUrl = `http://localhost:${opts.port}`; + console.log(`Tunnel disabled. Advertising ${tunnelBaseUrl} to cluster.`); + } else { + const subDomain = computeLinkSubDomain(session.user.sub); + tunnel = await openTunnel({ + subDomain, + localAddr: `http://127.0.0.1:${opts.port}`, + }); + tunnelBaseUrl = tunnel.publicUrl; + console.log(`Tunnel open: ${tunnelBaseUrl}`); + } + + // Register first — the cluster mints the linkSecret we then plumb + // through to every spawned sandbox daemon (DAEMON_LINK_SECRET env), + // so the cluster's HMAC-signed dispatch requests verify on arrival. + const { linkSecret } = await registerWithCluster({ + clusterBaseUrl: opts.clusterBaseUrl, + sessionToken: session.accessToken, + machineId, + cliVersion, + capabilities: await detectCapabilities(), + tunnelUrl: opts.noTunnel ? tunnelBaseUrl : undefined, + }); + console.log( + `Linked. Protocol v${LINK_PROTOCOL_VERSION}. machineId=${machineId}`, + ); + + // Per-port DAEMON_TOKEN registry so postSandboxConfig can authenticate + // against the spawned daemon's non-dispatch routes. + const perPortTokens = new Map(); + + const innerSpawn = createDefaultDaemonSpawn(opts.dataDir); + const spawnSandboxDaemon = ({ + workdir, + port, + }: { + workdir: string; + handle: string; + port: number; + }): Promise => { + const token = randomBytes(24).toString("hex"); + const bootId = randomUUID(); + const env: Record = { + DAEMON_TOKEN: token, + DAEMON_BOOT_ID: bootId, + APP_ROOT: workdir, + PROXY_PORT: String(port), + DAEMON_LINK_SECRET: linkSecret, + }; + perPortTokens.set(port, token); + return innerSpawn({ workdir, env, daemonPort: port }).then((proc) => ({ + port, + kill: (sig) => proc.kill(sig), + exited: proc.exited.then(() => undefined), + })); + }; + + const postSandboxConfig = async ( + port: number, + devPort: number, + config: { + repo?: { + cloneUrl: string; + branch?: string; + userName?: string; + userEmail?: string; + }; + }, + ): Promise => { + const token = perPortTokens.get(port); + if (!token) { + throw new Error(`no daemon token for port ${port}`); + } + const daemonUrl = `http://127.0.0.1:${port}`; + // The daemon's TenantConfig wire shape is `{ git, application }`. + // packageManager + runtime are auto-detected by the orchestrator from + // the lockfile post-clone, so we leave them out. `application.port` + // is the port the dev script binds to — without it, frameworks + // default to 3000 and collide with the cluster's own dev server. + // Allocating a fresh ephemeral per sandbox keeps multiple parallel + // sandboxes from also colliding with each other. + const payload: Record = { + application: { port: devPort }, + }; + if (config.repo) { + payload.git = { + repository: { + cloneUrl: config.repo.cloneUrl, + branch: config.repo.branch, + }, + ...(config.repo.userName && config.repo.userEmail + ? { + identity: { + userName: config.repo.userName, + userEmail: config.repo.userEmail, + }, + } + : {}), + }; + } + await daemonPostConfig(daemonUrl, token, payload); + }; + + const waitForSandboxHealth = async (port: number): Promise => { + await waitForDaemonReady(`http://127.0.0.1:${port}`); + }; + + const provider = createLaptopSandboxProvider({ + dataDir: opts.dataDir, + spawnDaemon: spawnSandboxDaemon, + postConfig: postSandboxConfig, + waitForHealth: waitForSandboxHealth, + openDaemonTunnel: async ({ subDomain, localAddr }) => { + if (opts.noTunnel) return null; + return openTunnel({ subDomain, localAddr }); + }, + maxSandboxes: 20, + }); + + // TTL'd nonce cache for HMAC replay protection. + const nonceTtlMs = 60_000; + const nonces = new Map(); + const sweepNonces = () => { + const now = Date.now(); + for (const [n, exp] of nonces) if (exp <= now) nonces.delete(n); + }; + const sweepInterval = setInterval(sweepNonces, nonceTtlMs); + sweepInterval.unref?.(); + + const handler = makeControlPlaneHandler({ + provider, + linkSecret, + seenNonce: (n) => { + const exp = nonces.get(n); + return exp != null && exp > Date.now(); + }, + recordNonce: (n) => { + nonces.set(n, Date.now() + nonceTtlMs); + }, + }); + + const stopHeartbeat = startHeartbeatLoop({ + clusterBaseUrl: opts.clusterBaseUrl, + linkSecret, + userSub: session.user.sub, + }); + + const server = Bun.serve({ + port: opts.port, + fetch: handler, + }); + console.log(`Listening on http://127.0.0.1:${server.port}`); + + // ── Graceful shutdown ───────────────────────────────────────────── + let resolveStopped!: (code: number) => void; + const stopped = new Promise((resolve) => { + resolveStopped = resolve; + }); + + let shuttingDown = false; + const shutdown = async (): Promise => { + if (shuttingDown) return; + shuttingDown = true; + console.log("\nShutting down…"); + stopHeartbeat(); + clearInterval(sweepInterval); + try { + await fetch(`${opts.clusterBaseUrl}/api/links/me`, { + method: "DELETE", + headers: { + "x-link-secret": linkSecret, + "x-mesh-user-sub": session.user.sub, + }, + }); + } catch { + // best-effort + } + try { + await provider.shutdown(); + } catch { + // ignore + } + try { + tunnel?.close(); + } catch { + // ignore + } + try { + server.stop(true); + } catch { + // ignore + } + resolveStopped(0); + }; + + const onSigInt = () => void shutdown(); + const onSigTerm = () => void shutdown(); + process.on("SIGINT", onSigInt); + process.on("SIGTERM", onSigTerm); + void stopped.then(() => { + process.off("SIGINT", onSigInt); + process.off("SIGTERM", onSigTerm); + }); + + if (tunnel) { + // If the Warp tunnel drops, the link is effectively offline — exit + // so the process supervisor (or the user's `&&` chain) can decide + // what to do next. The cluster will mark us offline within ~30s + // when heartbeats stop arriving. + void Promise.resolve(tunnel.closed).then(() => { + if (shuttingDown) return; + console.error("Tunnel closed unexpectedly; exiting."); + void shutdown(); + }); + } + + return { stopped, stop: shutdown }; +} diff --git a/apps/mesh/src/link-daemon/machine-id.test.ts b/apps/mesh/src/link-daemon/machine-id.test.ts new file mode 100644 index 0000000000..7cbda66c67 --- /dev/null +++ b/apps/mesh/src/link-daemon/machine-id.test.ts @@ -0,0 +1,19 @@ +import { describe, expect, it } from "bun:test"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { loadOrCreateMachineId } from "./machine-id"; + +describe("machine-id", () => { + it("creates a stable id on first call and reuses it on the second", async () => { + const dir = mkdtempSync(join(tmpdir(), "link-test-")); + try { + const a = await loadOrCreateMachineId(dir); + const b = await loadOrCreateMachineId(dir); + expect(a).toBe(b); + expect(a.length).toBe(32); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); +}); diff --git a/apps/mesh/src/link-daemon/machine-id.ts b/apps/mesh/src/link-daemon/machine-id.ts new file mode 100644 index 0000000000..b453081946 --- /dev/null +++ b/apps/mesh/src/link-daemon/machine-id.ts @@ -0,0 +1,34 @@ +/** + * Machine-id is the stable identifier the link daemon presents at + * registration. It lives at `/.deco/link/machine-id` and is + * generated once per laptop. + * + * The cluster keys its `LinkRegistry` by the OAuth userSub, NOT by this + * machineId — but it stores the value so it can detect "another machine + * is already claiming this account" (409) and surface a hint to the user. + * + * 16 random bytes (32 hex chars). Generated lazily on first call. + */ + +import { randomBytes } from "node:crypto"; +import { mkdir, readFile, writeFile } from "node:fs/promises"; +import { dirname, join } from "node:path"; + +function machineIdPath(dataDir: string): string { + return join(dataDir, ".deco", "link", "machine-id"); +} + +export async function loadOrCreateMachineId(dataDir: string): Promise { + const path = machineIdPath(dataDir); + try { + const existing = await readFile(path, "utf8"); + const trimmed = existing.trim(); + if (trimmed) return trimmed; + } catch { + // fall through to create + } + const id = randomBytes(16).toString("hex"); + await mkdir(dirname(path), { recursive: true }); + await writeFile(path, id); + return id; +} diff --git a/apps/mesh/src/link-daemon/registration.ts b/apps/mesh/src/link-daemon/registration.ts new file mode 100644 index 0000000000..30feab2f2b --- /dev/null +++ b/apps/mesh/src/link-daemon/registration.ts @@ -0,0 +1,148 @@ +/** + * Talks to the cluster's `/api/links/*` HTTP surface: + * + * POST /api/links — register (session-authed). Returns + * the raw `linkSecret` exactly once. + * POST /api/links/heartbeat — refresh TTL (authed via X-Link-Secret + * header against the linkSecret). + * DELETE /api/links/me — graceful shutdown (session-authed). + * + * Heartbeat and graceful-shutdown carry the linkSecret in `X-Link-Secret` + * rather than `Authorization: Bearer …` so it never enters Better Auth's + * API-key validation path on the cluster (which would log every tick as + * an INVALID_API_KEY false positive). + * + * Registration errors carry actionable hints — 409 means another machine + * is registered for the account, 426 means this CLI is too old for the + * cluster's protocolVersion. Both surface as user-readable Error messages. + */ + +import { + LINK_PROTOCOL_VERSION, + type Capability, + type RegistrationPayload, + type RegistrationResponse, +} from "@/links/protocol"; + +export interface RegistrationInput { + clusterBaseUrl: string; + /** OAuth access token from the user's CLI session. */ + sessionToken: string; + machineId: string; + cliVersion: string; + capabilities: Capability[]; + /** + * Only honored when the cluster has `MESH_ALLOW_LOCALHOST_LINKS=1`. + * In production the cluster derives the expected Warp domain from + * the user's sub and ignores any value sent here. + */ + tunnelUrl?: string; +} + +export async function registerWithCluster( + input: RegistrationInput, +): Promise { + const body: RegistrationPayload = { + machineId: input.machineId, + cliVersion: input.cliVersion, + protocolVersion: LINK_PROTOCOL_VERSION, + capabilities: input.capabilities, + ...(input.tunnelUrl ? { tunnelUrl: input.tunnelUrl } : {}), + }; + const res = await fetch(`${input.clusterBaseUrl}/api/links`, { + method: "POST", + body: JSON.stringify(body), + headers: { + "content-type": "application/json", + authorization: `Bearer ${input.sessionToken}`, + }, + }); + if (res.status === 409) { + const data = (await res.json().catch(() => ({}))) as { + activeMachineId?: string; + }; + throw new Error( + `Another machine (${data.activeMachineId ?? "unknown"}) is already linked for this account. ` + + `Stop that one first, or wait ~30s for its registration to expire.`, + ); + } + if (res.status === 426) { + const data = (await res.json().catch(() => ({}))) as { + installHint?: string; + }; + throw new Error( + `Protocol too old (link v${LINK_PROTOCOL_VERSION}). ${data.installHint ?? "Upgrade the decocms CLI."}`, + ); + } + if (res.status === 401) { + throw new Error( + "Cluster rejected the session token (401). Re-run `decocms auth login` and try again.", + ); + } + if (!res.ok) { + const text = await res.text().catch(() => ""); + throw new Error( + `Registration failed: HTTP ${res.status}${text ? ` — ${text}` : ""}`, + ); + } + const parsed = (await res.json()) as RegistrationResponse; + if (!parsed || typeof parsed.linkSecret !== "string") { + throw new Error("Registration response missing linkSecret"); + } + return parsed; +} + +export interface HeartbeatInput { + clusterBaseUrl: string; + /** Raw link secret returned at registration. The cluster compares the + * presented bearer directly against the stored raw value (HMAC signing + * is symmetric — see schemas.ts JSDoc on linkSecret). */ + linkSecret: string; + /** userSub the cluster keys the link entry by. Sent in + * X-Mesh-User-Sub so the route can look up the entry without an active + * Better Auth session. */ + userSub: string; + intervalMs?: number; + /** Test seam — defaults to global `fetch`. */ + fetchImpl?: typeof fetch; + /** Test seam — defaults to setTimeout. */ + schedule?: (cb: () => void, ms: number) => unknown; + /** Logger override (silences warnings in tests). */ + onError?: (err: unknown) => void; +} + +export function startHeartbeatLoop(input: HeartbeatInput): () => void { + const interval = input.intervalMs ?? 10_000; + const fetcher = input.fetchImpl ?? fetch; + const schedule = input.schedule ?? setTimeout; + const onError = + input.onError ?? + ((err: unknown) => + console.warn( + `heartbeat failed: ${err instanceof Error ? err.message : String(err)}`, + )); + + let stopped = false; + const tick = async () => { + if (stopped) return; + try { + const res = await fetcher(`${input.clusterBaseUrl}/api/links/heartbeat`, { + method: "POST", + headers: { + "x-link-secret": input.linkSecret, + "x-mesh-user-sub": input.userSub, + }, + }); + if (!res.ok && res.status !== 204) { + onError(new Error(`heartbeat HTTP ${res.status}`)); + } + } catch (err) { + onError(err); + } + if (!stopped) schedule(() => void tick(), interval); + }; + schedule(() => void tick(), interval); + return () => { + stopped = true; + }; +} diff --git a/apps/mesh/src/link-daemon/sandbox-provider.test.ts b/apps/mesh/src/link-daemon/sandbox-provider.test.ts new file mode 100644 index 0000000000..7f5afd95c6 --- /dev/null +++ b/apps/mesh/src/link-daemon/sandbox-provider.test.ts @@ -0,0 +1,217 @@ +import { describe, expect, it } from "bun:test"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { createLaptopSandboxProvider } from "./sandbox-provider"; + +function fakeDaemonSpawner() { + return { + port: 12345, + kill: () => {}, + }; +} + +function nullTunnelOpener() { + return async () => null; +} + +function tmpDataDir(): string { + return mkdtempSync(join(tmpdir(), "link-prov-")); +} + +describe("laptop sandbox provider", () => { + it("creates a sandbox and returns sandboxUrl", async () => { + const dataDir = tmpDataDir(); + try { + let portCounter = 30000; + const provider = createLaptopSandboxProvider({ + dataDir, + spawnDaemon: () => fakeDaemonSpawner(), + postConfig: async () => {}, + waitForHealth: async () => {}, + openDaemonTunnel: async ({ subDomain }) => ({ + publicUrl: `https://${subDomain}`, + closed: new Promise(() => {}), + close: () => {}, + }), + pickPort: () => portCounter++, + }); + const { sandboxUrl } = await provider.ensureSandbox({ + handle: "abc", + repo: undefined, + }); + expect(sandboxUrl).toBe("https://abc.deco.host"); + } finally { + rmSync(dataDir, { recursive: true, force: true }); + } + }); + + it("re-attaches to an existing sandbox on idempotent create", async () => { + const dataDir = tmpDataDir(); + try { + let spawnCount = 0; + let portCounter = 30000; + const provider = createLaptopSandboxProvider({ + dataDir, + spawnDaemon: () => { + spawnCount++; + return fakeDaemonSpawner(); + }, + postConfig: async () => {}, + waitForHealth: async () => {}, + openDaemonTunnel: nullTunnelOpener(), + pickPort: () => portCounter++, + }); + await provider.ensureSandbox({ handle: "abc", repo: undefined }); + await provider.ensureSandbox({ handle: "abc", repo: undefined }); + expect(spawnCount).toBe(1); + } finally { + rmSync(dataDir, { recursive: true, force: true }); + } + }); + + it("LRU-evicts when the cap is exceeded", async () => { + const dataDir = tmpDataDir(); + try { + let spawnCount = 0; + let portCounter = 30000; + const provider = createLaptopSandboxProvider({ + dataDir, + spawnDaemon: () => { + spawnCount++; + return fakeDaemonSpawner(); + }, + postConfig: async () => {}, + waitForHealth: async () => {}, + openDaemonTunnel: nullTunnelOpener(), + pickPort: () => portCounter++, + maxSandboxes: 2, + }); + await provider.ensureSandbox({ handle: "a", repo: undefined }); + // Slight delay so "b" has a strictly later lastUsedAt than "a". + await new Promise((r) => setTimeout(r, 2)); + await provider.ensureSandbox({ handle: "b", repo: undefined }); + await new Promise((r) => setTimeout(r, 2)); + await provider.ensureSandbox({ handle: "c", repo: undefined }); + // "a" should have been evicted (it was the LRU when "c" arrived). + expect(spawnCount).toBe(3); + expect( + provider + .listSandboxes() + .map((s) => s.handle) + .sort(), + ).toEqual(["b", "c"]); + } finally { + rmSync(dataDir, { recursive: true, force: true }); + } + }); +}); + +describe("LaptopSandboxProvider tunnel lifecycle", () => { + it("opens a tunnel after postConfig and returns the public URL", async () => { + const tunnelHandles: Array<{ subDomain: string }> = []; + + const provider = createLaptopSandboxProvider({ + dataDir: "/tmp/test", + spawnDaemon: async ({ port }) => ({ port, kill: () => {} }), + postConfig: async () => {}, + waitForHealth: async () => {}, + openDaemonTunnel: async ({ subDomain }) => { + tunnelHandles.push({ subDomain }); + return { + publicUrl: `https://${subDomain}`, + closed: new Promise(() => {}), + close: () => {}, + }; + }, + pickPort: (() => { + let n = 50_000; + return () => n++; + })(), + }); + + const { sandboxUrl, port } = await provider.ensureSandbox({ + handle: "test-handle-abc", + }); + expect(sandboxUrl).toBe("https://test-handle-abc.deco.host"); + expect(port).toBe(50_000); + expect(tunnelHandles).toHaveLength(1); + expect(tunnelHandles[0]?.subDomain).toBe("test-handle-abc.deco.host"); + }); + + it("falls back to 127.0.0.1 URL when openDaemonTunnel returns null", async () => { + const provider = createLaptopSandboxProvider({ + dataDir: "/tmp/test", + spawnDaemon: async ({ port }) => ({ port, kill: () => {} }), + postConfig: async () => {}, + waitForHealth: async () => {}, + openDaemonTunnel: async () => null, + pickPort: (() => { + let n = 50_100; + return () => n++; + })(), + }); + + const { sandboxUrl } = await provider.ensureSandbox({ + handle: "test-handle-xyz", + }); + expect(sandboxUrl).toBe("http://127.0.0.1:50100"); + }); + + it("closes the tunnel on deleteSandbox", async () => { + const closes: string[] = []; + const provider = createLaptopSandboxProvider({ + dataDir: "/tmp/test", + spawnDaemon: async ({ port }) => ({ port, kill: () => {} }), + postConfig: async () => {}, + waitForHealth: async () => {}, + openDaemonTunnel: async ({ subDomain }) => ({ + publicUrl: `https://${subDomain}`, + closed: new Promise(() => {}), + close: () => closes.push(subDomain), + }), + pickPort: (() => { + let n = 50_200; + return () => n++; + })(), + }); + + await provider.ensureSandbox({ handle: "close-me" }); + await provider.deleteSandbox("close-me"); + expect(closes).toEqual(["close-me.deco.host"]); + }); + + it("clears the map entry when the daemon process exits unexpectedly", async () => { + let exitResolver: (() => void) | null = null; + const exitPromise = new Promise((r) => { + exitResolver = r; + }); + + const provider = createLaptopSandboxProvider({ + dataDir: "/tmp/test", + spawnDaemon: async ({ port }) => ({ + port, + kill: () => {}, + exited: exitPromise, + }), + postConfig: async () => {}, + waitForHealth: async () => {}, + openDaemonTunnel: async ({ subDomain }) => ({ + publicUrl: `https://${subDomain}`, + closed: new Promise(() => {}), + close: () => {}, + }), + pickPort: (() => { + let n = 50_300; + return () => n++; + })(), + }); + + await provider.ensureSandbox({ handle: "crash-me" }); + expect(provider.listSandboxes()).toHaveLength(1); + + exitResolver!(); + await new Promise((r) => setTimeout(r, 0)); + expect(provider.listSandboxes()).toHaveLength(0); + }); +}); diff --git a/apps/mesh/src/link-daemon/sandbox-provider.ts b/apps/mesh/src/link-daemon/sandbox-provider.ts new file mode 100644 index 0000000000..93b0309dfa --- /dev/null +++ b/apps/mesh/src/link-daemon/sandbox-provider.ts @@ -0,0 +1,314 @@ +/** + * LaptopSandboxProvider — owns per-handle sandbox daemons on the user's + * machine. Each `ensureSandbox` call either returns an existing + * (handle, port) pair or spawns a fresh daemon process, posts the + * initial tenant config, waits for `/health`, and tracks it for LRU + * eviction. + * + * Idle sandboxes are evicted when the population exceeds `maxSandboxes` + * (default 20). Sandboxes with active dispatches are pinned — eviction + * skips them and we tolerate going temporarily over the cap. The + * cluster's `remoteDispatch` calls `provider.recordHit` indirectly via + * the reverse-proxy hits, keeping warm sandboxes warm. + * + * The actual process spawn / config-post / health-probe are passed in + * as deps so the production wiring (apps/mesh/src/link-daemon/index.ts) + * can plug in `Bun.spawn` against the daemon bundle and the tests can + * stay lightweight. + */ + +import { mkdir } from "node:fs/promises"; +import { createServer } from "node:net"; +import { join } from "node:path"; +import type { TunnelHandle } from "./tunnel"; + +export interface SpawnResult { + port: number; + kill: (signal?: NodeJS.Signals) => void; + /** Resolves when the daemon process exits (cleanly or otherwise). Optional + * for backwards-compat with test fakes; production spawn always sets it. */ + exited?: Promise; +} + +export interface RepoRef { + cloneUrl: string; + branch?: string; + /** Git author identity for commits inside the sandbox. The cluster sources + * these from the linked GitHub user; the daemon uses them when running + * `git commit`. Optional — clone works without them. */ + userName?: string; + userEmail?: string; +} + +export interface EnsureSandboxInput { + handle: string; + repo?: RepoRef; +} + +export interface SandboxState { + handle: string; + port: number; + process: SpawnResult; + /** Warp tunnel handle, or null in --no-tunnel mode. */ + tunnel: TunnelHandle | null; + /** Public URL the cluster + browser use to reach this daemon. + * - prod: `https://.deco.host` (the tunnel above) + * - dev (--no-tunnel): `http://127.0.0.1:` */ + sandboxUrl: string; + lastUsedAt: number; + activeDispatchCount: number; +} + +export interface LaptopSandboxProvider { + ensureSandbox( + input: EnsureSandboxInput, + ): Promise<{ sandboxUrl: string; port: number }>; + proxyPort(handle: string): number | null; + recordHit(handle: string): void; + acquireDispatch(handle: string): () => void; + listSandboxes(): SandboxState[]; + deleteSandbox(handle: string): Promise; + shutdown(): Promise; +} + +export interface OpenTunnelDeps { + /** Public hostname (subdomain) for the daemon's tunnel. + * e.g. `mellow-slate-bb5b7.deco.host` */ + subDomain: string; + /** Local target the tunnel forwards to. e.g. `http://127.0.0.1:63266` */ + localAddr: string; +} + +export interface LaptopSandboxProviderDeps { + dataDir: string; + spawnDaemon: (args: { + workdir: string; + handle: string; + port: number; + }) => SpawnResult | Promise; + postConfig: ( + port: number, + devPort: number, + config: { repo?: RepoRef }, + ) => Promise; + waitForHealth: (port: number) => Promise; + /** + * Open a warp tunnel for a daemon. Called per-spawn; the returned + * TunnelHandle's `publicUrl` is what the cluster + browser see. + * Return `null` to skip tunnel opening (e.g. --no-tunnel dev mode); + * the provider will fall back to `http://127.0.0.1:` as the URL. + */ + openDaemonTunnel: (input: OpenTunnelDeps) => Promise; + /** Override port allocation (tests provide a deterministic value). */ + pickPort?: () => Promise | number; + maxSandboxes?: number; +} + +export function createLaptopSandboxProvider( + deps: LaptopSandboxProviderDeps, +): LaptopSandboxProvider { + const cap = deps.maxSandboxes ?? 20; + const sandboxes = new Map(); + const pickPort = deps.pickPort ?? allocateEphemeralPort; + + // In-flight ensureSandbox promises, keyed by handle. The cluster + // creates a fresh `RemoteUserSandboxProvider` for every request + // (its `records` map is per-instance), so several concurrent + // VM_START / preview / proxyDaemonRequest paths can race here + // before any of them gets a chance to populate `sandboxes`. Without + // dedup, each would spawn its own daemon + clone + install. Memoizing + // the promise collapses concurrent callers onto the first one's work. + // Cleared on settle so a fresh ensure can take a clean swing. + const inflight = new Map< + string, + Promise<{ sandboxUrl: string; port: number }> + >(); + + function evictIfNeeded(): void { + if (sandboxes.size < cap) return; + const candidates = [...sandboxes.values()] + .filter((s) => s.activeDispatchCount === 0) + .sort((a, b) => a.lastUsedAt - b.lastUsedAt); + if (candidates.length === 0) return; // every sandbox is pinned + const victim = candidates[0]!; + try { + victim.process.kill("SIGTERM"); + } catch { + // already gone + } + try { + victim.tunnel?.close(); + } catch { + // warp-node Connected has no real close yet; ignore + } + sandboxes.delete(victim.handle); + } + + const buildEntry = async ( + input: EnsureSandboxInput, + ): Promise<{ sandboxUrl: string; port: number }> => { + evictIfNeeded(); + const workdir = join( + deps.dataDir, + ".deco", + "link", + "sandboxes", + input.handle, + ); + await mkdir(workdir, { recursive: true }); + // Two ephemeral ports per sandbox: one for the daemon's HTTP/proxy + // (port) and one for the dev script the orchestrator will spawn + // (devPort). Without a dedicated devPort, every framework's + // default 3000 collides with the cluster (and with other sandboxes). + const [port, devPort] = await Promise.all([pickPort(), pickPort()]); + const spawned = await Promise.resolve( + deps.spawnDaemon({ workdir, handle: input.handle, port }), + ); + let tunnel: TunnelHandle | null = null; + try { + await deps.waitForHealth(port); + await deps.postConfig(port, devPort, { repo: input.repo }); + tunnel = await deps.openDaemonTunnel({ + subDomain: `${input.handle}.deco.host`, + localAddr: `http://127.0.0.1:${port}`, + }); + } catch (err) { + try { + spawned.kill("SIGKILL"); + } catch { + // already gone + } + try { + tunnel?.close(); + } catch { + // no-op + } + throw err; + } + const sandboxUrl = tunnel?.publicUrl ?? `http://127.0.0.1:${port}`; + const state: SandboxState = { + handle: input.handle, + port, + process: spawned, + tunnel, + sandboxUrl, + lastUsedAt: Date.now(), + activeDispatchCount: 0, + }; + sandboxes.set(input.handle, state); + + // Watchdog: clear the map entry if the daemon process exits unexpectedly. + // Without this the cache returns a stale dead port and the cluster's + // alive() probe loops forever against a tunnel pointing at a dead upstream. + if (spawned.exited) { + spawned.exited.then(() => { + const current = sandboxes.get(input.handle); + if (current === state) { + sandboxes.delete(input.handle); + try { + tunnel?.close(); + } catch { + // no-op + } + } + }); + } + + return { sandboxUrl, port }; + }; + + return { + async ensureSandbox(input) { + const existing = sandboxes.get(input.handle); + if (existing) { + existing.lastUsedAt = Date.now(); + return { sandboxUrl: existing.sandboxUrl, port: existing.port }; + } + const pending = inflight.get(input.handle); + if (pending) return pending; + const promise = buildEntry(input).finally(() => { + inflight.delete(input.handle); + }); + inflight.set(input.handle, promise); + return promise; + }, + proxyPort(handle) { + const s = sandboxes.get(handle); + if (s) s.lastUsedAt = Date.now(); + return s?.port ?? null; + }, + recordHit(handle) { + const s = sandboxes.get(handle); + if (s) s.lastUsedAt = Date.now(); + }, + acquireDispatch(handle) { + const s = sandboxes.get(handle); + if (!s) return () => {}; + s.activeDispatchCount += 1; + let released = false; + return () => { + if (released) return; + released = true; + const cur = sandboxes.get(handle); + if (cur) + cur.activeDispatchCount = Math.max(0, cur.activeDispatchCount - 1); + }; + }, + listSandboxes() { + return [...sandboxes.values()]; + }, + async deleteSandbox(handle) { + const s = sandboxes.get(handle); + if (!s) return; + try { + s.process.kill("SIGTERM"); + } catch { + // already gone + } + try { + s.tunnel?.close(); + } catch { + // no-op + } + sandboxes.delete(handle); + }, + async shutdown() { + for (const s of sandboxes.values()) { + try { + s.process.kill("SIGTERM"); + } catch { + // already gone + } + try { + s.tunnel?.close(); + } catch { + // no-op + } + } + sandboxes.clear(); + }, + }; +} + +/** + * Bind to a kernel-chosen ephemeral port and return it after closing. + * Race window between close() and the daemon's bind() is non-zero — in + * practice the daemon's bind happens within milliseconds and we accept + * the rare conflict (the caller surfaces the spawn failure). + */ +function allocateEphemeralPort(): Promise { + return new Promise((resolve, reject) => { + const srv = createServer(); + srv.unref(); + srv.on("error", reject); + srv.listen(0, "127.0.0.1", () => { + const addr = srv.address(); + if (addr && typeof addr === "object") { + const port = addr.port; + srv.close(() => resolve(port)); + } else { + srv.close(() => reject(new Error("could not allocate port"))); + } + }); + }); +} diff --git a/apps/mesh/src/link-daemon/session.ts b/apps/mesh/src/link-daemon/session.ts new file mode 100644 index 0000000000..b60f4242df --- /dev/null +++ b/apps/mesh/src/link-daemon/session.ts @@ -0,0 +1,60 @@ +/** + * Read the OAuth session from `/.deco/session.json`, matching + * the shape `decocms auth login` writes (see + * `apps/mesh/src/cli/lib/session.ts`). Reading is read-only on the + * link side — minting a new session must go through the `decocms` + * CLI's `auth login`. + * + * Returns null if no session file exists or the contents fail to + * validate; the link's main entry surfaces a "run decocms auth login + * first" message in that case. + */ + +import { readFile } from "node:fs/promises"; +import { join } from "node:path"; + +export interface Session { + target: string; + clientId: string; + user: { sub: string; email?: string; name?: string }; + accessToken: string; + refreshToken?: string; + expiresAt?: number; + createdAt: string; +} + +function sessionPath(dataDir: string): string { + // Mirrors `apps/mesh/src/cli/lib/session.ts:sessionPath` so the link + // reads the same file `decocms auth login` writes. `dataDir` is the + // user's deco home (default `~/deco`), so the resolved path is + // `~/deco/session.json`. + return join(dataDir, "session.json"); +} + +export async function readSession(dataDir: string): Promise { + try { + const raw = await readFile(sessionPath(dataDir), "utf8"); + const parsed = JSON.parse(raw) as unknown; + if (!isSession(parsed)) return null; + return parsed; + } catch { + return null; + } +} + +function isSession(value: unknown): value is Session { + if (!value || typeof value !== "object") return false; + const v = value as Record; + if ( + typeof v.target !== "string" || + typeof v.clientId !== "string" || + typeof v.accessToken !== "string" || + typeof v.createdAt !== "string" + ) { + return false; + } + if (!v.user || typeof v.user !== "object") return false; + const u = v.user as Record; + if (typeof u.sub !== "string") return false; + return true; +} diff --git a/apps/mesh/src/link-daemon/tunnel.ts b/apps/mesh/src/link-daemon/tunnel.ts new file mode 100644 index 0000000000..6451e264f8 --- /dev/null +++ b/apps/mesh/src/link-daemon/tunnel.ts @@ -0,0 +1,83 @@ +/** + * Warp tunnel client. Wraps `@deco-cx/warp-node` so the rest of the + * link daemon stays free of the legacy import. `openTunnel` returns the + * public URL the tunnel resolved to and a `closed` promise the caller + * can await for reconnect logic. + * + * The Warp server still uses a legacy shared key; the user's OAuth + * bearer is intentionally NOT sent yet. Once Warp accepts OAuth tokens + * we can swap the source in one place. + */ + +export interface TunnelHandle { + /** The `https://.deco.host` URL the tunnel listens at. */ + publicUrl: string; + /** Resolves when the tunnel disconnects. */ + closed: Promise; + /** Best-effort close (Warp's Connected has no close() — server-side close). */ + close: () => void; +} + +export interface OpenTunnelInput { + /** Stable per-user-per-app subdomain (see computeAppDomain). */ + subDomain: string; + /** Local target the tunnel forwards to (e.g. `http://127.0.0.1:5174`). */ + localAddr: string; + /** Cluster's tunnel server (defaults to `wss://`). */ + server?: string; + /** Override the Warp shared key (defaults to env / hardcoded legacy). */ + apiKey?: string; +} + +/** Warp tunnel server pre-OAuth shared key — same value `apps/mesh` used. */ +const LEGACY_TUNNEL_TOKEN = "c309424a-2dc4-46fe-bfc7-a7c10df59477"; + +/** If `tunnel.registered` doesn't resolve, treat as silent auth rejection. */ +const REGISTRATION_TIMEOUT_MS = 15_000; + +export async function openTunnel( + input: OpenTunnelInput, +): Promise { + const { connect } = await import("@deco-cx/warp-node"); + const tunnel = await connect({ + domain: input.subDomain, + localAddr: input.localAddr, + server: input.server ?? `wss://${input.subDomain}`, + apiKey: + input.apiKey ?? + process.env.DECO_TUNNEL_SERVER_TOKEN ?? + LEGACY_TUNNEL_TOKEN, + }); + await Promise.race([ + tunnel.registered, + new Promise((_, reject) => { + setTimeout(() => { + reject( + new Error( + `Tunnel registration timed out after ${ + REGISTRATION_TIMEOUT_MS / 1000 + }s — Warp server may have rejected the auth. Try upgrading the CLI.`, + ), + ); + }, REGISTRATION_TIMEOUT_MS); + }), + ]); + return { + publicUrl: `https://${input.subDomain}`, + closed: tunnel.closed.then(() => undefined), + close: () => { + // @deco-cx/warp-node Connected has no close() method; the + // connection closes on its own when the server drops it. + }, + }; +} + +/** + * Stable subdomain the cluster expects for the link daemon. Must match + * the cluster's `expectedTunnelDomain(userSub)` in + * `apps/mesh/src/links/routes.ts` — both sides derive the host from the + * authenticated userSub independently. + */ +export function computeLinkSubDomain(userSub: string): string { + return `link-${userSub}.deco.host`; +} diff --git a/apps/mesh/src/links/cancellation.test.ts b/apps/mesh/src/links/cancellation.test.ts new file mode 100644 index 0000000000..19b90cc000 --- /dev/null +++ b/apps/mesh/src/links/cancellation.test.ts @@ -0,0 +1,206 @@ +/** + * Cancellation matrix: exercises the daemon's dispatch + cancel route + * pair against the four interesting orderings of cancel vs dispatch. + * + * These run against the route handlers directly (not over a real socket) + * so they're fast and deterministic. The HMAC signing path uses the + * shared `apps/mesh/src/links/protocol` fixtures so both the cluster `remoteDispatch` + * tests and these tests fail together if the wire contract drifts. + * + * The matrix: + * 1. Cancel BEFORE dispatch → 410 Gone (tombstone semantics). + * 2. Cancel DURING dispatch → AbortController fires, harness loop stops, + * SSE stream emits `{type:"done"}` and closes promptly. + * 3. Cancel for an unknown runId → 204 (idempotent, no-op). + * 4. Cancel with bad HMAC → 401. + */ + +import { afterEach, describe, expect, it } from "bun:test"; +import { + handleCancelRequest, + handleDispatchRequest, + resetDispatchStateForTests, +} from "@decocms/sandbox/daemon/routes/dispatch"; +import { fixtures, signRequest } from "./protocol"; + +const SECRET = "test-secret-32-bytes-padding-padding-padding"; + +interface SlowHarnessHandle { + harness: { stream: () => AsyncIterable }; + chunksEmitted: () => number; + done: () => boolean; +} + +/** A harness that yields up to `count` chunks, sleeping `delayMs` between + * yields. Records how many it actually emitted so the cancel-during test + * can assert that it stopped well short of `count`. */ +function makeSlowHarness(count = 100, delayMs = 10): SlowHarnessHandle { + let emitted = 0; + let finished = false; + const harness = { + async *stream() { + try { + for (let i = 0; i < count; i++) { + await new Promise((r) => setTimeout(r, delayMs)); + emitted++; + yield { type: "text-delta", id: "m1", delta: String(i) }; + } + } finally { + finished = true; + } + }, + }; + return { + harness, + chunksEmitted: () => emitted, + done: () => finished, + }; +} + +function makeDeps( + overrides: Partial[1]> = {}, +) { + return { + bearerSecret: SECRET, + lookupHarness: () => makeSlowHarness().harness, + seenNonce: () => false, + ...overrides, + }; +} + +function signedDispatch(body: string): Request { + const sig = signRequest({ + secret: SECRET, + method: "POST", + path: "/_decopilot_vm/dispatch", + body, + }); + return new Request("http://localhost/_decopilot_vm/dispatch", { + method: "POST", + body, + headers: { ...sig, "Content-Type": "application/json" }, + }); +} + +function signedCancel(runId: string): Request { + const path = `/_decopilot_vm/runs/${runId}`; + const sig = signRequest({ secret: SECRET, method: "DELETE", path, body: "" }); + return new Request(`http://localhost${path}`, { + method: "DELETE", + headers: { ...sig }, + }); +} + +describe("cancellation matrix", () => { + // The dispatch route uses module-scoped maps for activeRuns + tombstones; + // reset between tests so a stale entry from a previous case can't bleed + // into the next. Cross-test pollution here is subtle (tombstones live 60s). + afterEach(() => { + resetDispatchStateForTests(); + }); + + it("DELETE before dispatch tombstones the runId so the dispatch returns 410", async () => { + const runId = "run-cancel-before"; + const cancelRes = await handleCancelRequest(signedCancel(runId), { + bearerSecret: SECRET, + seenNonce: () => false, + }); + expect(cancelRes.status).toBe(204); + + const body = JSON.stringify({ + harnessId: "fake", + input: { ...fixtures.FIXTURE_MINIMAL_INPUT, runId }, + }); + const res = await handleDispatchRequest(signedDispatch(body), makeDeps()); + expect(res.status).toBe(410); + // Drain the response so any stream is closed before the next test runs. + await res.text().catch(() => ""); + }); + + it("DELETE during dispatch aborts the harness loop and closes the SSE stream", async () => { + const runId = "run-cancel-during"; + const handle = makeSlowHarness(100, 10); + + const body = JSON.stringify({ + harnessId: "fake", + input: { ...fixtures.FIXTURE_MINIMAL_INPUT, runId }, + }); + + const dispatchRes = await handleDispatchRequest( + signedDispatch(body), + makeDeps({ lookupHarness: () => handle.harness }), + ); + expect(dispatchRes.status).toBe(200); + expect(dispatchRes.body).not.toBeNull(); + + // Start reading the stream in the background. We'll cancel mid-flight + // and then wait for the stream to drain. + const reader = dispatchRes.body!.getReader(); + const decoder = new TextDecoder(); + let collected = ""; + const drainPromise = (async () => { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + collected += decoder.decode(value, { stream: true }); + } + collected += decoder.decode(); + })(); + + // Let a few chunks land, then fire the cancel. + await new Promise((r) => setTimeout(r, 50)); + const emittedBeforeCancel = handle.chunksEmitted(); + expect(emittedBeforeCancel).toBeGreaterThan(0); + expect(emittedBeforeCancel).toBeLessThan(100); + + const cancelRes = await handleCancelRequest(signedCancel(runId), { + bearerSecret: SECRET, + seenNonce: () => false, + }); + expect(cancelRes.status).toBe(204); + + // The stream should close quickly. Race the drain against a safety + // timeout — a hang here means the abort never propagated. + const drained = await Promise.race([ + drainPromise.then(() => "drained"), + new Promise((r) => setTimeout(() => r("timeout"), 2000)), + ]); + expect(drained).toBe("drained"); + + // The stream must terminate cleanly with `done`, and the harness must + // have stopped well short of the 100 it would have emitted otherwise. + const events = collected + .split("\n\n") + .filter((s) => s.startsWith("data: ")) + .map((s) => s.slice("data: ".length)); + expect(events.at(-1)).toBe('{"type":"done"}'); + expect(handle.chunksEmitted()).toBeLessThan(100); + }); + + it("DELETE for an unknown runId returns 204 (idempotent)", async () => { + const res = await handleCancelRequest(signedCancel("run-never-existed"), { + bearerSecret: SECRET, + seenNonce: () => false, + }); + expect(res.status).toBe(204); + }); + + it("DELETE with a bad HMAC signature returns 401", async () => { + const path = "/_decopilot_vm/runs/run-bad-sig"; + const sig = signRequest({ + secret: "wrong-secret-32-bytes-paddingpaddingpadding", + method: "DELETE", + path, + body: "", + }); + const req = new Request(`http://localhost${path}`, { + method: "DELETE", + headers: { ...sig }, + }); + const res = await handleCancelRequest(req, { + bearerSecret: SECRET, + seenNonce: () => false, + }); + expect(res.status).toBe(401); + }); +}); diff --git a/apps/mesh/src/links/dispatch-loopback.test.ts b/apps/mesh/src/links/dispatch-loopback.test.ts new file mode 100644 index 0000000000..7c48f7b3d5 --- /dev/null +++ b/apps/mesh/src/links/dispatch-loopback.test.ts @@ -0,0 +1,266 @@ +/** + * Dispatch loopback test — exercises the FULL HMAC path end-to-end + * in-process, from cluster-side `remoteDispatch` through the daemon's + * `handleDispatchRequest` and back as SSE chunks. This catches any + * asymmetry between the key used to SIGN at the cluster and the key + * used to VERIFY at the daemon — the exact bug class that turned every + * remote dispatch into a 401 in production. + * + * Why this needs its own test file (vs. extending `loopback.test.ts`): + * - `loopback.test.ts` covers the registration handshake only. + * - The dispatch path uses a different secret usage (HMAC signing both + * control-plane AND per-sandbox routes), and the bug lived in the + * asymmetry between what was stored in the registry vs. what the + * link binary received. Verifying just the registration round-trip + * was insufficient. + * + * Strategy: + * 1. Build an in-process "cluster" Hono app with `registerLinksRoutes`. + * 2. Have the link `registerWithCluster` call it via a synthetic + * `fetch` — this returns the raw `linkSecret` exactly like prod. + * 3. Pull the LinkEntry back from the same registry — this is the + * object the cluster's dispatch path actually signs with. + * 4. Wire `remoteDispatch` to a fake daemon handler (`handleDispatchRequest` + * from `@decocms/sandbox/daemon/routes/dispatch`) configured with + * `bearerSecret = linkSecret` (raw, as the daemon holds it). + * 5. Drive a FakeHarness end-to-end and assert the cluster receives + * the harness's chunks back through the SSE parser. + * + * If the symmetric-signing invariant ever breaks again (e.g. someone + * re-introduces hash-at-rest on one side but not the other), this test + * dies on the dispatch fetch returning 401. + */ + +import { describe, expect, it } from "bun:test"; +import { Hono } from "hono"; +import { handleDispatchRequest } from "@decocms/sandbox/daemon/routes/dispatch"; +import { fixtures } from "./protocol"; +import type { HarnessStreamInput } from "../harnesses"; +import { registerWithCluster } from "../link-daemon/registration"; +import { remoteDispatch } from "../harnesses/remote-dispatch"; +import { createInMemoryLinkRegistry } from "./link-registry"; +import { registerLinksRoutes } from "./routes"; + +const TEST_USER_SUB = "user-dispatch-loopback"; +const TEST_SESSION_TOKEN = "session-dispatch-loopback"; +// Must hit the cluster's `isLocalhostUrl` check (hostname === "localhost") +// for the cluster to honor our tunnelUrl override. Otherwise it derives +// `https://link-.deco.host` and our test router stops matching. +const TUNNEL_BASE_URL = "http://localhost:65535"; + +function buildClusterApp() { + const registry = createInMemoryLinkRegistry({ nowSeconds: () => 0 }); + const app = new Hono(); + registerLinksRoutes(app, { + linkRegistry: registry, + getAuthenticatedUserSub: (c) => { + const auth = c.req.header("authorization"); + const session = c.req.header("x-deco-session"); + if (auth?.startsWith("Bearer ") || session) return TEST_USER_SUB; + return null; + }, + allowLocalhostLinks: true, + }); + return { app, registry }; +} + +/** + * A tiny harness that yields a deterministic series of `UIMessageChunk`s. + * The cluster-side `remoteDispatch` parses these back out of SSE and + * yields them to the caller; the test asserts the full set arrives. + */ +function makeFakeHarness() { + return { + async *stream() { + yield { type: "start", id: "msg-loopback" } as const; + yield { + type: "text-delta", + id: "msg-loopback", + delta: "hello loopback", + } as const; + yield { type: "finish", finishReason: "stop" } as const; + }, + }; +} + +/** + * Build a `fetch`-compatible function that routes: + * - Requests to the cluster app (any host equal to "cluster.local") + * through Hono. + * - Requests to the link's tunnel URL through the daemon's + * `handleDispatchRequest` (we expose it as a bare Request handler). + * + * Anything else is rejected — the test should not be making other calls. + */ +function makeRouter( + clusterApp: Hono, + daemonHandler: (req: Request) => Promise, +): typeof fetch { + return (async (input: RequestInfo | URL, init?: RequestInit) => { + const req = + input instanceof Request ? input : new Request(String(input), init); + const url = new URL(req.url); + if (url.host === "cluster.local") { + return clusterApp.fetch(req); + } + if (req.url.startsWith(TUNNEL_BASE_URL)) { + return daemonHandler(req); + } + throw new Error(`unexpected fetch target in test: ${req.url}`); + }) as typeof fetch; +} + +describe("dispatch loopback (in-process)", () => { + it("registers, signs, dispatches, and round-trips SSE chunks", async () => { + const { app, registry } = buildClusterApp(); + + // The daemon-side state: we'll receive whatever secret the link does + // and use it to verify HMAC signatures on incoming requests. Setting + // this before `registerWithCluster` would require knowing the secret + // ahead of time — so we initialize daemonBearer after registration + // and capture it via a mutable holder. + let daemonBearer: string | null = null; + const daemonHandler = async (req: Request): Promise => { + if (!daemonBearer) { + throw new Error("daemonBearer not set before dispatch"); + } + return handleDispatchRequest(req, { + bearerSecret: daemonBearer, + lookupHarness: () => makeFakeHarness(), + seenNonce: () => false, + }); + }; + + const originalFetch = globalThis.fetch; + globalThis.fetch = makeRouter(app, daemonHandler); + try { + // 1) Register — get the raw linkSecret the link binary would hold. + const { linkSecret } = await registerWithCluster({ + clusterBaseUrl: "http://cluster.local", + sessionToken: TEST_SESSION_TOKEN, + machineId: "machine-dispatch-loopback", + cliVersion: "0.0.0-test", + capabilities: ["claude-code"], + tunnelUrl: TUNNEL_BASE_URL, + }); + daemonBearer = linkSecret; + + // 2) Pull the stored entry back. THIS is the value the cluster + // dispatch path will sign HMACs with. The bug we're guarding + // against was `stored.linkSecret !== linkSecret` — different + // key material on the two ends. + const stored = await registry.get(TEST_USER_SUB); + if (!stored) throw new Error("registry missing the link we just put"); + + // Sanity: if these ever drift apart again, every dispatch fails + // with 401. Assert symmetry explicitly so a later refactor doesn't + // silently re-introduce the asymmetry without also tripping the + // SSE assertion below. + expect(stored.linkSecret).toBe(linkSecret); + + // 3) Drive remoteDispatch against the daemon via the routed fetch. + const chunks: unknown[] = []; + // FIXTURE_MINIMAL_INPUT types `messages` as `Record[]` + // (the wire shape is opaque at the link-protocol layer); the harness + // type narrows it to `ChatMessage[]`. The wire test doesn't drive any + // message-shape logic so the cast is safe. + const harnessInput = { + ...fixtures.FIXTURE_MINIMAL_INPUT, + runId: "run-loopback-1", + signal: new AbortController().signal, + } as unknown as HarnessStreamInput; + // Per-daemon-tunnel migration: the cluster now talks to the + // daemon DIRECTLY (no link reverse-proxy hop), at a per-handle + // `sandboxUrl` returned by the link's `POST /api/sandboxes`. In + // this in-process loopback we point sandboxUrl at the same + // TUNNEL_BASE_URL so the routed fetch still resolves to the + // daemon handler — the URL path (`/_decopilot_vm/dispatch`) is + // what the daemon's HMAC verification matches against. + const iter = remoteDispatch( + "claude-code", + harnessInput, + { + tunnelUrl: stored.tunnelUrl, + linkSecret: stored.linkSecret, + }, + stored.tunnelUrl, + ); + for await (const chunk of iter) { + chunks.push(chunk); + } + + // 4) Assert the round-trip: the chunks emitted by the FakeHarness + // on the daemon side made it back through SSE parsing as the + // same UIMessageChunk shapes. + expect(chunks.length).toBeGreaterThanOrEqual(3); + // The harness emitted: start → text-delta → finish. + const types = chunks + .map((c) => (c as { type?: string }).type) + .filter((t): t is string => typeof t === "string"); + expect(types).toContain("start"); + expect(types).toContain("text-delta"); + expect(types).toContain("finish"); + } finally { + globalThis.fetch = originalFetch; + } + }); + + it("dispatch fails with 401 when cluster signs with a different secret than the daemon verifies with", async () => { + // Negative control: confirms the test's HMAC plumbing is sensitive + // to a key mismatch — i.e., if we DELIBERATELY desymmetrize the + // secret, the dispatch fetch surfaces the 401 the production bug + // would produce. Without this, the happy-path test could be passing + // for the wrong reason (e.g., HMAC verification accidentally + // bypassed). + const { app, registry } = buildClusterApp(); + + const daemonHandler = (req: Request): Promise => + handleDispatchRequest(req, { + bearerSecret: "completely-different-secret-32-bytes-pad", + lookupHarness: () => makeFakeHarness(), + seenNonce: () => false, + }); + + const originalFetch = globalThis.fetch; + globalThis.fetch = makeRouter(app, daemonHandler); + try { + const { linkSecret } = await registerWithCluster({ + clusterBaseUrl: "http://cluster.local", + sessionToken: TEST_SESSION_TOKEN, + machineId: "machine-mismatch", + cliVersion: "0.0.0-test", + capabilities: ["claude-code"], + tunnelUrl: TUNNEL_BASE_URL, + }); + const stored = await registry.get(TEST_USER_SUB); + if (!stored) throw new Error("registry missing entry"); + expect(stored.linkSecret).toBe(linkSecret); + + const harnessInput = { + ...fixtures.FIXTURE_MINIMAL_INPUT, + runId: "run-mismatch-1", + signal: new AbortController().signal, + } as unknown as HarnessStreamInput; + const iter = remoteDispatch( + "claude-code", + harnessInput, + { + tunnelUrl: stored.tunnelUrl, + linkSecret: stored.linkSecret, + }, + stored.tunnelUrl, + ); + // Consuming the iterator triggers the dispatch fetch; the 401 + // surfaces as a thrown Error from `remoteDispatch`. + await expect( + (async () => { + for await (const _ of iter) { + // no-op + } + })(), + ).rejects.toThrow(/HTTP 401/); + } finally { + globalThis.fetch = originalFetch; + } + }); +}); diff --git a/apps/mesh/src/links/link-offline-error.ts b/apps/mesh/src/links/link-offline-error.ts new file mode 100644 index 0000000000..5fbc4ebe31 --- /dev/null +++ b/apps/mesh/src/links/link-offline-error.ts @@ -0,0 +1,20 @@ +/** + * Thrown / returned when a `runLocally=true` request cannot reach the + * user's link daemon. The cluster surfaces this as a 409 response on + * `POST /messages` (Phase 4) so the user can see a clear "your link is + * offline" or "your link is missing capability X" message without the + * request being silently queued. + */ +export type LinkOfflineReason = "link_offline" | "capability_missing"; + +export class LinkOfflineError extends Error { + readonly code: LinkOfflineReason; + readonly activeCapabilities?: string[]; + + constructor(reason: LinkOfflineReason, activeCapabilities?: string[]) { + super(`link unavailable: ${reason}`); + this.name = "LinkOfflineError"; + this.code = reason; + this.activeCapabilities = activeCapabilities; + } +} diff --git a/apps/mesh/src/links/link-registry.test.ts b/apps/mesh/src/links/link-registry.test.ts new file mode 100644 index 0000000000..77b77ae938 --- /dev/null +++ b/apps/mesh/src/links/link-registry.test.ts @@ -0,0 +1,55 @@ +import { beforeEach, describe, expect, it } from "bun:test"; +import { + type InMemoryLinkRegistry, + createInMemoryLinkRegistry, +} from "./link-registry"; + +const sample = { + machineId: "mach-1", + tunnelUrl: "https://link-1.deco.host", + linkSecret: "hashed-secret", + cliVersion: "2.331.2", + protocolVersion: 1, + capabilities: ["claude-code" as const], + createdAt: "2026-05-19T12:00:00.000Z", +}; + +describe("LinkRegistry (in-memory test impl)", () => { + let registry: InMemoryLinkRegistry; + beforeEach(() => { + registry = createInMemoryLinkRegistry({ + ttlSeconds: 30, + nowSeconds: () => 1000, + }); + }); + + it("returns null when no entry exists", async () => { + expect(await registry.get("user-a")).toBeNull(); + }); + + it("returns the entry after put", async () => { + await registry.put("user-a", sample); + const got = await registry.get("user-a"); + expect(got).toMatchObject(sample); + }); + + it("expires the entry after TTL", async () => { + await registry.put("user-a", sample); + registry.advanceNow(31); + expect(await registry.get("user-a")).toBeNull(); + }); + + it("refreshes TTL on put", async () => { + await registry.put("user-a", sample); + registry.advanceNow(20); + await registry.put("user-a", sample); // refresh + registry.advanceNow(20); + expect(await registry.get("user-a")).not.toBeNull(); + }); + + it("delete removes the entry immediately", async () => { + await registry.put("user-a", sample); + await registry.delete("user-a"); + expect(await registry.get("user-a")).toBeNull(); + }); +}); diff --git a/apps/mesh/src/links/link-registry.ts b/apps/mesh/src/links/link-registry.ts new file mode 100644 index 0000000000..8877daab92 --- /dev/null +++ b/apps/mesh/src/links/link-registry.ts @@ -0,0 +1,137 @@ +/** + * Link Registry + * + * Stores per-user `LinkEntry` records for the remote-harness dispatch flow. + * The cluster looks up an entry by `userId` before deciding whether to run + * a harness in the cluster sandbox or to dispatch it to the user's link + * daemon over the registered tunnel URL. + * + * Two backends: + * - `createNatsLinkRegistry` — production, backed by a NATS JetStream KV + * bucket (`LINKS`). Entries expire via the bucket's `ttl` option + * (nats@2.29.3 `KvLimits.ttl`, expressed in milliseconds). + * - `createInMemoryLinkRegistry` — test/dev only, with a controllable + * clock for TTL testing. + * + * Storage shape: the `linkSecret` field is the HMAC HASH of the secret + * (not the raw secret). The raw secret is returned to the link daemon + * exactly once at registration. NATS operators with read access on the + * `LINKS` bucket see hashes, not working credentials. + */ +import { JSONCodec, StorageType, type JetStreamClient, type KV } from "nats"; +import type { LinkEntry } from "./protocol"; + +const LINKS_BUCKET = "LINKS"; +const DEFAULT_LINK_TTL_SECONDS = 30; + +export interface LinkRegistry { + get(userId: string): Promise; + put(userId: string, entry: LinkEntry): Promise; + delete(userId: string): Promise; +} + +/** Test-only: in-memory backend with a controllable clock. */ +export interface InMemoryLinkRegistry extends LinkRegistry { + advanceNow(deltaSeconds: number): void; +} + +export function createInMemoryLinkRegistry(opts: { + ttlSeconds?: number; + nowSeconds: () => number; +}): InMemoryLinkRegistry { + const ttl = opts.ttlSeconds ?? DEFAULT_LINK_TTL_SECONDS; + let drift = 0; + const nowFn = () => opts.nowSeconds() + drift; + const store = new Map(); + + return { + async get(userId) { + const slot = store.get(userId); + if (!slot) return null; + if (slot.expiresAt <= nowFn()) { + store.delete(userId); + return null; + } + return slot.entry; + }, + async put(userId, entry) { + store.set(userId, { entry, expiresAt: nowFn() + ttl }); + }, + async delete(userId) { + store.delete(userId); + }, + advanceNow(delta) { + drift += delta; + }, + }; +} + +export interface NatsLinkRegistryOptions { + getJetStream: () => JetStreamClient | null; + /** Defaults to `DEFAULT_LINK_TTL_SECONDS`. */ + ttlSeconds?: number; +} + +/** + * NATS-JetStream-KV backed registry. Mirrors the lazy-init pattern used by + * the other NATS-backed caches in this codebase: `init()` acquires the KV + * handle if the connection is up; until then the registry no-ops and reads + * return null. Re-call `init()` from `natsProvider.onReady` so the registry + * recovers after a reconnect. + * + * Uses bucket-level `max_age` as the freshness ceiling — entries are GC'd + * by the stream once they age out. The read path includes a freshness check + * to harden against clusters with looser GC timing. Per-message TTL + * (JetStream 2.11+) is not required. + */ +export class NatsLinkRegistry implements LinkRegistry { + private kv: KV | null = null; + private readonly codec = JSONCodec(); + private readonly ttl: number; + + constructor(private readonly options: NatsLinkRegistryOptions) { + this.ttl = options.ttlSeconds ?? DEFAULT_LINK_TTL_SECONDS; + } + + async init(): Promise { + const js = this.options.getJetStream(); + if (!js) return; // NATS not ready — registry disabled until re-init + this.kv = await js.views.kv(LINKS_BUCKET, { + history: 1, + // KvLimits.ttl is documented as "millis the key should live" + ttl: this.ttl * 1000, + storage: StorageType.Memory, + }); + } + + async get(userId: string): Promise { + if (!this.kv) return null; + try { + const entry = await this.kv.get(userId); + if (!entry?.value) return null; + if (entry.operation === "DEL" || entry.operation === "PURGE") { + return null; + } + // Fallback freshness check: compare entry creation time to TTL. + const ageMs = Date.now() - entry.created.getTime(); + if (ageMs > this.ttl * 1000) return null; + return this.codec.decode(entry.value); + } catch { + return null; + } + } + + async put(userId: string, value: LinkEntry): Promise { + if (!this.kv) return; + await this.kv.put(userId, this.codec.encode(value)); + } + + async delete(userId: string): Promise { + if (!this.kv) return; + try { + await this.kv.delete(userId); + } catch { + // best-effort + } + } +} diff --git a/apps/mesh/src/links/loopback.test.ts b/apps/mesh/src/links/loopback.test.ts new file mode 100644 index 0000000000..649e9fe61f --- /dev/null +++ b/apps/mesh/src/links/loopback.test.ts @@ -0,0 +1,269 @@ +/** + * Link loopback integration test. + * + * Verifies the link daemon's `registerWithCluster` HTTP call against + * the real cluster `registerLinksRoutes` handler, end-to-end through + * Hono. The protocol fields (`tunnelUrl`, `machineId`, `linkSecret` + * lifecycle, `protocolVersion` check) all flow through. + * + * Scope decision: a full process-spawning E2E (boot `apps/mesh` dev + * server + spawn `deco link` --no-tunnel --port 5174 and poll + * /api/links/me until online) is fragile in CI — OAuth session + * shape, port conflicts, daemon-spawn nesting, embedded postgres boot + * time all conspire to make it flaky. The plan (Phase 11.2) explicitly + * allows degrading to a "best-effort smoke that asserts the registration + * HTTP call works against an in-test mesh API + in-test link config, + * without actually spawning the link binary." + * + * That's what this file ships: + * - `register loopback (in-process)` ALWAYS runs. It mounts the cluster + * route on a Hono app, wires the link's `registerWithCluster` to it + * via a synthetic `fetch`, and asserts: + * 1. `POST /api/links` returns a usable `linkSecret`. + * 2. The registry sees the entry with the right machineId/tunnelUrl. + * 3. `GET /api/links/me` reports `status: "online"`. + * 4. `DELETE /api/links/me` removes the entry; subsequent GET + * returns offline. + * - `live process loopback` is SKIPPED unless `MESH_LIVE_INTEGRATION=1`. + * When enabled, it spawns the real `deco link` subprocess. + */ + +import { afterAll, beforeAll, describe, expect, it } from "bun:test"; +import { Hono } from "hono"; +import { registerWithCluster } from "../link-daemon/registration"; +import { createInMemoryLinkRegistry } from "./link-registry"; +import { registerLinksRoutes } from "./routes"; + +// ────────────────────────────────────────────────────────────────────── +// In-process loopback (always runs). +// +// This wires the link daemon's `registerWithCluster` to the cluster's +// `registerLinksRoutes` via a fake `fetch` that dispatches to a Hono +// app. The same request/response objects flow through both sides, so +// schema drift on either end fails this test. +// ────────────────────────────────────────────────────────────────────── + +const TEST_USER_SUB = "user-loopback"; +const TEST_SESSION_TOKEN = "session-loopback-token"; + +function buildClusterApp() { + const registry = createInMemoryLinkRegistry({ nowSeconds: () => 0 }); + const app = new Hono(); + registerLinksRoutes(app, { + linkRegistry: registry, + // Treat any caller presenting `authorization: Bearer ` (or + // `x-deco-session: `) as the test user. The link daemon's + // `registerWithCluster` and shutdown both use `authorization: Bearer`, + // so this single check is sufficient. + getAuthenticatedUserSub: (c) => { + const auth = c.req.header("authorization"); + const session = c.req.header("x-deco-session"); + if (auth?.startsWith("Bearer ") || session) return TEST_USER_SUB; + return null; + }, + allowLocalhostLinks: true, + }); + return { app, registry }; +} + +/** Adapt a Hono app to look like `fetch(url, init)` so the link's + * `registerWithCluster` can call it without going through a real + * network socket. The link only cares about `fetch(absoluteUrl, init)` + * shape — Hono accepts a Request directly. */ +function makeAppFetch(app: Hono): typeof fetch { + return (async (input: RequestInfo | URL, init?: RequestInit) => { + const req = + input instanceof Request ? input : new Request(String(input), init); + return app.fetch(req); + }) as typeof fetch; +} + +describe("link loopback (in-process)", () => { + it("registers, reports online, then deregisters cleanly", async () => { + const { app, registry } = buildClusterApp(); + const appFetch = makeAppFetch(app); + + // Swap the global fetch with our in-process one for the duration of + // this test — `registerWithCluster` calls bare `fetch(...)` so we + // need to intercept at that layer. + const originalFetch = globalThis.fetch; + globalThis.fetch = appFetch; + try { + // 1) Register. + const { linkSecret } = await registerWithCluster({ + clusterBaseUrl: "http://cluster.local", + sessionToken: TEST_SESSION_TOKEN, + machineId: "machine-loopback", + cliVersion: "0.0.0-test", + capabilities: ["claude-code", "codex"], + tunnelUrl: "http://localhost:5174", + }); + expect(typeof linkSecret).toBe("string"); + expect(linkSecret.length).toBeGreaterThan(20); + + // 2) The cluster registry now sees the entry. + const stored = await registry.get(TEST_USER_SUB); + expect(stored).not.toBeNull(); + expect(stored?.machineId).toBe("machine-loopback"); + expect(stored?.tunnelUrl).toBe("http://localhost:5174"); + expect(stored?.capabilities).toEqual(["claude-code", "codex"]); + // HMAC signing requires symmetric key material — the cluster persists + // the RAW secret so it can sign dispatch requests with the same value + // the link verifies with (see schemas.ts JSDoc on linkSecret). + expect(stored?.linkSecret).toBe(linkSecret); + + // 3) GET /api/links/me reports online. + const meRes = await appFetch("http://cluster.local/api/links/me", { + headers: { authorization: `Bearer ${TEST_SESSION_TOKEN}` }, + }); + expect(meRes.status).toBe(200); + const meBody = (await meRes.json()) as { + status: string; + machineId?: string; + capabilities?: string[]; + }; + expect(meBody.status).toBe("online"); + expect(meBody.machineId).toBe("machine-loopback"); + expect(meBody.capabilities).toEqual(["claude-code", "codex"]); + + // 4) DELETE /api/links/me deregisters. Matches the link binary's + // shutdown path: linkSecret in X-Link-Secret + X-Mesh-User-Sub + // header (the link has no active OAuth session at this point). + const delRes = await appFetch("http://cluster.local/api/links/me", { + method: "DELETE", + headers: { + "x-link-secret": linkSecret, + "x-mesh-user-sub": TEST_USER_SUB, + }, + }); + expect(delRes.status).toBe(204); + + const meAfterRes = await appFetch("http://cluster.local/api/links/me", { + headers: { authorization: `Bearer ${TEST_SESSION_TOKEN}` }, + }); + expect(meAfterRes.status).toBe(200); + const meAfterBody = (await meAfterRes.json()) as { status: string }; + expect(meAfterBody.status).toBe("offline"); + } finally { + globalThis.fetch = originalFetch; + } + }); + + it("surfaces 409 when another machineId is already active", async () => { + const { app } = buildClusterApp(); + const appFetch = makeAppFetch(app); + const originalFetch = globalThis.fetch; + globalThis.fetch = appFetch; + try { + // First registration wins. + await registerWithCluster({ + clusterBaseUrl: "http://cluster.local", + sessionToken: TEST_SESSION_TOKEN, + machineId: "machine-A", + cliVersion: "0.0.0-test", + capabilities: ["claude-code"], + tunnelUrl: "http://localhost:5174", + }); + // A second registration from a different machine for the same user + // must surface as the documented "another machine active" error — + // `registerWithCluster` translates the cluster's 409 into a + // user-readable Error. + await expect( + registerWithCluster({ + clusterBaseUrl: "http://cluster.local", + sessionToken: TEST_SESSION_TOKEN, + machineId: "machine-B", + cliVersion: "0.0.0-test", + capabilities: ["claude-code"], + tunnelUrl: "http://localhost:5174", + }), + ).rejects.toThrow(/Another machine/); + } finally { + globalThis.fetch = originalFetch; + } + }); +}); + +// ────────────────────────────────────────────────────────────────────── +// Live process loopback (skipped unless MESH_LIVE_INTEGRATION=1). +// +// Boots an in-test mesh server on an ephemeral port and spawns the real +// `deco link` subprocess against it. Requires: +// - DATA_DIR pointing at a writable temp dir +// - A pre-written `session.json` in that DATA_DIR (we synthesize one) +// - The cluster to honor MESH_ALLOW_LOCALHOST_LINKS for --no-tunnel +// +// This is intentionally minimal — it just confirms the link process +// reaches the "Linked." log line within a timeout. Deeper assertions +// belong in the in-process test above (which exercises the same code +// paths without subprocess flake). +// ────────────────────────────────────────────────────────────────────── + +const LIVE = process.env.MESH_LIVE_INTEGRATION === "1"; + +describe.skipIf(!LIVE)("link loopback (live subprocess)", () => { + let linkProc: ReturnType | null = null; + + beforeAll(() => { + // The plan acknowledges this is the hardest path. We document the + // intended invocation here but stop short of doing the dev-server + // boot inside the test — `bun run dev` is assumed to already be up + // on http://localhost:3000 by whoever set `MESH_LIVE_INTEGRATION=1`. + linkProc = Bun.spawn({ + cmd: [ + "bun", + "run", + "--cwd=apps/mesh", + "src/cli.ts", + "link", + "--no-tunnel", + "--port", + "5174", + ], + env: { + ...process.env, + MESH_CLUSTER_URL: + process.env.MESH_TEST_CLUSTER_URL ?? "http://localhost:3000", + MESH_ALLOW_LOCALHOST_LINKS: "1", + }, + stdout: "pipe", + stderr: "pipe", + }); + }); + + afterAll(() => { + try { + linkProc?.kill(); + } catch { + // ignore + } + }); + + it("reaches /api/links/me online state within 30s", async () => { + const clusterUrl = + process.env.MESH_TEST_CLUSTER_URL ?? "http://localhost:3000"; + const sessionToken = process.env.MESH_TEST_SESSION_TOKEN ?? ""; + const deadline = Date.now() + 30_000; + let onlineSeen = false; + while (Date.now() < deadline) { + try { + const res = await fetch(`${clusterUrl}/api/links/me`, { + headers: sessionToken + ? { authorization: `Bearer ${sessionToken}` } + : {}, + }); + if (res.ok) { + const body = (await res.json()) as { status?: string }; + if (body.status === "online") { + onlineSeen = true; + break; + } + } + } catch { + // server not up yet, retry + } + await new Promise((r) => setTimeout(r, 500)); + } + expect(onlineSeen).toBe(true); + }); +}); diff --git a/apps/mesh/src/links/protocol/fixtures.ts b/apps/mesh/src/links/protocol/fixtures.ts new file mode 100644 index 0000000000..1d60007228 --- /dev/null +++ b/apps/mesh/src/links/protocol/fixtures.ts @@ -0,0 +1,54 @@ +import type { DispatchSSEEvent, HarnessStreamInputWire } from "./schemas"; + +/** + * Shared by cluster `remoteDispatch` tests AND daemon dispatch tests. + * If one side drifts, CI breaks on the other. Drop fixtures + * conservatively — every new one is wire-contract surface area. + */ + +export const FIXTURE_MINIMAL_INPUT: HarnessStreamInputWire = { + threadId: "thr-fixture", + runId: "run-fixture", + taskId: "thr-fixture", + messages: [], + models: { + credentialId: "cred-fixture", + thinking: { id: "claude-code:opus", title: "Opus" }, + }, + mcp: { + url: "https://mesh.example.com/mcp/virtual-mcp/agent-fixture", + headers: { Authorization: "Bearer fixture-token" }, + expiresAt: 9999999999000, + }, + mode: "default", + temperature: 0.7, + toolApprovalLevel: "auto", + user: { id: "user-fixture", email: "fixture@example.com" }, + organizationId: "org-fixture", + virtualMcp: { id: "agent-fixture" }, + agent: { id: "agent-fixture" }, +}; + +export const FIXTURE_SSE_HAPPY_PATH: readonly DispatchSSEEvent[] = [ + { type: "ui-message-chunk", chunk: { type: "start", id: "msg-1" } }, + { + type: "ui-message-chunk", + chunk: { type: "text-delta", id: "msg-1", delta: "Hello" }, + }, + { + type: "ui-message-chunk", + chunk: { type: "finish-step", usage: { inputTokens: 1, outputTokens: 1 } }, + }, + { type: "ui-message-chunk", chunk: { type: "finish", finishReason: "stop" } }, + { type: "done" }, +] as const; + +export const FIXTURE_SSE_HARNESS_CRASH: readonly DispatchSSEEvent[] = [ + { type: "ui-message-chunk", chunk: { type: "start", id: "msg-1" } }, + { + type: "error", + code: "harness_crashed", + message: "claude exited with code 137", + }, + { type: "done" }, +] as const; diff --git a/apps/mesh/src/links/protocol/hmac.test.ts b/apps/mesh/src/links/protocol/hmac.test.ts new file mode 100644 index 0000000000..79f880eaaf --- /dev/null +++ b/apps/mesh/src/links/protocol/hmac.test.ts @@ -0,0 +1,95 @@ +import { describe, expect, it } from "bun:test"; +import { signRequest, verifyRequest } from "./hmac"; + +const SECRET = "test-secret-32-bytes-or-more-padding-padding"; + +function makeHeaders(input: { + secret?: string; + method?: string; + path?: string; + body?: string; + timestampOverride?: number; + nonceOverride?: string; +}): Record { + return { + ...signRequest({ + secret: input.secret ?? SECRET, + method: input.method ?? "POST", + path: input.path ?? "/_decopilot_vm/dispatch", + body: input.body ?? "{}", + timestamp: input.timestampOverride, + nonce: input.nonceOverride, + }), + }; +} + +describe("HMAC request signing", () => { + it("verifies a freshly signed request", () => { + const headers = makeHeaders({}); + const ok = verifyRequest({ + secret: SECRET, + method: "POST", + path: "/_decopilot_vm/dispatch", + body: "{}", + headers, + seenNonce: () => false, + }); + expect(ok).toEqual({ valid: true }); + }); + + it("rejects mismatched body", () => { + const headers = makeHeaders({ body: "{}" }); + const result = verifyRequest({ + secret: SECRET, + method: "POST", + path: "/_decopilot_vm/dispatch", + body: '{"x":1}', + headers, + seenNonce: () => false, + }); + expect(result.valid).toBe(false); + }); + + it("rejects mismatched secret", () => { + const headers = makeHeaders({}); + const result = verifyRequest({ + secret: "wrong-secret-32-bytes-padding-padding-padding", + method: "POST", + path: "/_decopilot_vm/dispatch", + body: "{}", + headers, + seenNonce: () => false, + }); + expect(result.valid).toBe(false); + }); + + it("rejects timestamps drifting more than 30s", () => { + const headers = makeHeaders({ + timestampOverride: Math.floor(Date.now() / 1000) - 60, + }); + const result = verifyRequest({ + secret: SECRET, + method: "POST", + path: "/_decopilot_vm/dispatch", + body: "{}", + headers, + seenNonce: () => false, + }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.reason).toBe("timestamp_drift"); + }); + + it("rejects replayed nonces", () => { + const headers = makeHeaders({ nonceOverride: "fixed-nonce" }); + const result = verifyRequest({ + secret: SECRET, + method: "POST", + path: "/_decopilot_vm/dispatch", + body: "{}", + headers, + seenNonce: (n) => n === "fixed-nonce", + }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.reason).toBe("nonce_replay"); + }); +}); diff --git a/apps/mesh/src/links/protocol/hmac.ts b/apps/mesh/src/links/protocol/hmac.ts new file mode 100644 index 0000000000..aac9720a2b --- /dev/null +++ b/apps/mesh/src/links/protocol/hmac.ts @@ -0,0 +1,121 @@ +import { + createHash, + createHmac, + randomBytes, + timingSafeEqual, +} from "node:crypto"; + +export const SIG_HEADER = "X-Mesh-Signature"; +export const TS_HEADER = "X-Mesh-Timestamp"; +export const NONCE_HEADER = "X-Mesh-Nonce"; + +const MAX_TIMESTAMP_DRIFT_SECONDS = 30; + +export interface RequestSignatureHeaders { + [SIG_HEADER]: string; + [TS_HEADER]: string; + [NONCE_HEADER]: string; +} + +export interface SignInput { + secret: string; + method: string; + path: string; + body: string; + /** Test-only override. */ + timestamp?: number; + /** Test-only override. */ + nonce?: string; +} + +export function signRequest(input: SignInput): RequestSignatureHeaders { + const timestamp = input.timestamp ?? Math.floor(Date.now() / 1000); + const nonce = input.nonce ?? randomBytes(16).toString("hex"); + const bodyHash = sha256Hex(input.body); + const stringToSign = [ + input.method.toUpperCase(), + input.path, + String(timestamp), + nonce, + bodyHash, + ].join("\n"); + const signature = createHmac("sha256", input.secret) + .update(stringToSign) + .digest("hex"); + return { + [SIG_HEADER]: signature, + [TS_HEADER]: String(timestamp), + [NONCE_HEADER]: nonce, + }; +} + +export interface VerifyInput { + secret: string; + method: string; + path: string; + body: string; + headers: Record; + /** + * Caller-provided nonce cache. Returning `true` means "this nonce has + * been seen recently; reject as replay." The verifier never mutates; + * the caller records the nonce on successful verification. + */ + seenNonce: (nonce: string) => boolean; + /** Test-only override of "now" in seconds. */ + now?: number; +} + +export type VerifyResult = + | { valid: true } + | { + valid: false; + reason: + | "missing_headers" + | "timestamp_drift" + | "nonce_replay" + | "bad_signature"; + }; + +export function verifyRequest(input: VerifyInput): VerifyResult { + const sig = + input.headers[SIG_HEADER] ?? input.headers[SIG_HEADER.toLowerCase()]; + const tsRaw = + input.headers[TS_HEADER] ?? input.headers[TS_HEADER.toLowerCase()]; + const nonce = + input.headers[NONCE_HEADER] ?? input.headers[NONCE_HEADER.toLowerCase()]; + if (!sig || !tsRaw || !nonce) { + return { valid: false, reason: "missing_headers" }; + } + + const timestamp = Number(tsRaw); + if (!Number.isFinite(timestamp)) { + return { valid: false, reason: "missing_headers" }; + } + const now = input.now ?? Math.floor(Date.now() / 1000); + if (Math.abs(now - timestamp) > MAX_TIMESTAMP_DRIFT_SECONDS) { + return { valid: false, reason: "timestamp_drift" }; + } + if (input.seenNonce(nonce)) return { valid: false, reason: "nonce_replay" }; + + const bodyHash = sha256Hex(input.body); + const stringToSign = [ + input.method.toUpperCase(), + input.path, + String(timestamp), + nonce, + bodyHash, + ].join("\n"); + const expected = createHmac("sha256", input.secret) + .update(stringToSign) + .digest("hex"); + + const a = Buffer.from(expected, "hex"); + const b = Buffer.from(sig, "hex"); + if (a.length !== b.length) return { valid: false, reason: "bad_signature" }; + if (!timingSafeEqual(a, b)) return { valid: false, reason: "bad_signature" }; + return { valid: true }; +} + +function sha256Hex(s: string): string { + return createHash("sha256").update(s).digest("hex"); +} diff --git a/apps/mesh/src/links/protocol/index.ts b/apps/mesh/src/links/protocol/index.ts new file mode 100644 index 0000000000..8b31861148 --- /dev/null +++ b/apps/mesh/src/links/protocol/index.ts @@ -0,0 +1,4 @@ +export * from "./hmac"; +export * from "./schemas"; +export * from "./version"; +export * as fixtures from "./fixtures"; diff --git a/apps/mesh/src/links/protocol/schemas.test.ts b/apps/mesh/src/links/protocol/schemas.test.ts new file mode 100644 index 0000000000..0647f1a88a --- /dev/null +++ b/apps/mesh/src/links/protocol/schemas.test.ts @@ -0,0 +1,145 @@ +import { describe, expect, it } from "bun:test"; +import { + capabilitySchema, + dispatchSSEEventSchema, + harnessStreamInputSchema, + type HarnessStreamInputWire, + linkEntrySchema, + registrationPayloadSchema, +} from "./schemas"; + +describe("registrationPayloadSchema", () => { + it("accepts a well-formed payload", () => { + const result = registrationPayloadSchema.safeParse({ + machineId: "machine-1", + cliVersion: "0.1.0", + protocolVersion: 1, + capabilities: ["claude-code"], + tunnelUrl: "https://example.com/tunnel", + }); + expect(result.success).toBe(true); + }); + + it("rejects unknown capability", () => { + const result = registrationPayloadSchema.safeParse({ + machineId: "machine-1", + cliVersion: "0.1.0", + protocolVersion: 1, + capabilities: ["definitely-not-a-harness"], + }); + expect(result.success).toBe(false); + }); + + it("rejects negative protocolVersion", () => { + const result = registrationPayloadSchema.safeParse({ + machineId: "machine-1", + cliVersion: "0.1.0", + protocolVersion: -1, + capabilities: ["claude-code"], + }); + expect(result.success).toBe(false); + }); +}); + +describe("linkEntrySchema", () => { + it("preserves linkSecret as opaque string", () => { + const payload = { + machineId: "m-1", + tunnelUrl: "https://tunnel.example.com", + linkSecret: "OPAQUE-SECRET-STRING", + cliVersion: "0.1.0", + protocolVersion: 1, + capabilities: ["claude-code" as const], + createdAt: "2026-05-19T00:00:00.000Z", + }; + const result = linkEntrySchema.safeParse(payload); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.linkSecret).toBe("OPAQUE-SECRET-STRING"); + } + }); +}); + +describe("dispatchSSEEventSchema", () => { + it("accepts ui-message-chunk", () => { + const result = dispatchSSEEventSchema.safeParse({ + type: "ui-message-chunk", + chunk: { hello: "world" }, + }); + expect(result.success).toBe(true); + }); + + it("accepts error", () => { + const result = dispatchSSEEventSchema.safeParse({ + type: "error", + code: "harness_crashed", + message: "boom", + }); + expect(result.success).toBe(true); + }); + + it("accepts done", () => { + const result = dispatchSSEEventSchema.safeParse({ type: "done" }); + expect(result.success).toBe(true); + }); +}); + +describe("harnessStreamInputSchema", () => { + const minimalInput: HarnessStreamInputWire = { + threadId: "thr-1", + runId: "run-1", + taskId: "thr-1", + messages: [], + models: { + credentialId: "cred-1", + thinking: { id: "claude-code:opus", title: "Opus" }, + }, + mcp: { + url: "https://mesh.example.com/mcp/virtual-mcp/agent-1", + headers: { Authorization: "Bearer fixture" }, + expiresAt: 9999999999000, + }, + mode: "default", + temperature: 0.7, + toolApprovalLevel: "auto", + user: { id: "user-1", email: "user@example.com" }, + organizationId: "org-1", + virtualMcp: { id: "agent-1" }, + agent: { id: "agent-1" }, + }; + + it("round-trips a minimal CLI harness input", () => { + const result = harnessStreamInputSchema.safeParse(minimalInput); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.threadId).toBe("thr-1"); + expect(result.data.models.thinking.title).toBe("Opus"); + } + }); + + it("strips signal and processLocal fields", () => { + const withExtras = { + ...minimalInput, + signal: { aborted: false }, + processLocal: true, + }; + const result = harnessStreamInputSchema.safeParse(withExtras); + expect(result.success).toBe(true); + if (result.success) { + expect("signal" in result.data).toBe(false); + expect("processLocal" in result.data).toBe(false); + } + }); +}); + +describe("capabilitySchema", () => { + it("accepts known harnesses", () => { + expect(capabilitySchema.safeParse("claude-code").success).toBe(true); + expect(capabilitySchema.safeParse("codex").success).toBe(true); + expect(capabilitySchema.safeParse("decopilot-sandbox").success).toBe(true); + }); + + it("rejects unknown harness", () => { + expect(capabilitySchema.safeParse("not-a-harness").success).toBe(false); + }); +}); diff --git a/apps/mesh/src/links/protocol/schemas.ts b/apps/mesh/src/links/protocol/schemas.ts new file mode 100644 index 0000000000..fa071bf3ee --- /dev/null +++ b/apps/mesh/src/links/protocol/schemas.ts @@ -0,0 +1,114 @@ +import { z } from "zod"; + +export const capabilitySchema = z.enum([ + "claude-code", + "codex", + "decopilot-sandbox", +]); +export type Capability = z.infer; + +export const registrationPayloadSchema = z.object({ + machineId: z.string().min(1), + cliVersion: z.string().min(1), + protocolVersion: z.number().int().nonnegative(), + capabilities: z.array(capabilitySchema).min(1), + /** + * Honored only when the cluster sets MESH_ALLOW_LOCALHOST_LINKS=1. + * In production the cluster derives the expected Warp domain from + * the authenticated userSub and ignores any value here. + */ + tunnelUrl: z.string().url().optional(), +}); +export type RegistrationPayload = z.infer; + +export const linkEntrySchema = z.object({ + machineId: z.string(), + tunnelUrl: z.string().url(), + /** + * The raw bearer secret. Both the cluster and the link sign HMACs + * with this value — symmetric signing requires identical key material. + * + * Security posture: this is treated as a working credential at rest in + * NATS KV. NATS operators within the cluster's trust boundary can see + * it; mitigations are (a) 30s TTL bounds leak windows, (b) rotation = + * re-register. A v2 hardening will encrypt at rest with a cluster KMS + * key. See spec §"linkSecret at rest" — the Path C symmetric + * construction it describes is impractical without shipping the cluster + * signing key to the link, so v1 ships with raw-at-rest. + */ + linkSecret: z.string(), + cliVersion: z.string(), + protocolVersion: z.number().int().nonnegative(), + capabilities: z.array(capabilitySchema), + createdAt: z.string().datetime(), +}); +export type LinkEntry = z.infer; + +export const registrationResponseSchema = z.object({ + /** + * Raw bearer secret returned exactly once at registration. The link + * holds it in process memory; the cluster persists the same raw value + * (see linkEntrySchema.linkSecret). Lost on re-register. + */ + linkSecret: z.string(), +}); +export type RegistrationResponse = z.infer; + +export const dispatchSSEEventSchema = z.discriminatedUnion("type", [ + z.object({ + type: z.literal("ui-message-chunk"), + chunk: z.unknown(), + }), + z.object({ + type: z.literal("error"), + code: z.string(), + message: z.string(), + }), + z.object({ type: z.literal("done") }), +]); +export type DispatchSSEEvent = z.infer; + +const chatMessageSchema = z.record(z.string(), z.unknown()); // opaque to link-protocol + +const modelsConfigSchema = z.object({ + credentialId: z.string(), + thinking: z.object({ + id: z.string(), + title: z.string(), + provider: z.string().optional(), + }), + coding: z.object({ id: z.string(), title: z.string() }).optional(), + fast: z.object({ id: z.string(), title: z.string() }).optional(), + image: z.object({ id: z.string(), title: z.string() }).optional(), + deepResearch: z.object({ id: z.string(), title: z.string() }).optional(), +}); + +export const harnessStreamInputSchema = z + .object({ + threadId: z.string(), + runId: z.string(), + taskId: z.string(), + resumeSessionRef: z.string().optional(), + messages: z.array(chatMessageSchema), + models: modelsConfigSchema, + mcp: z.object({ + url: z.string().url(), + headers: z.record(z.string(), z.string()), + expiresAt: z.number().int().positive(), + }), + mode: z.string(), + temperature: z.number(), + toolApprovalLevel: z.string(), + user: z.object({ id: z.string(), email: z.string() }), + organizationId: z.string(), + organizationSlug: z.string().optional(), + virtualMcp: z.record(z.string(), z.unknown()), + agent: z.object({ id: z.string() }), + branch: z.string().nullable().optional(), + triggerId: z.string().optional(), + currentThreadTitle: z.string().optional(), + traceparent: z.string().optional(), + }) + .strip(); + +export type HarnessStreamInputWire = z.infer; diff --git a/apps/mesh/src/links/protocol/version.test.ts b/apps/mesh/src/links/protocol/version.test.ts new file mode 100644 index 0000000000..c7ff63c5ec --- /dev/null +++ b/apps/mesh/src/links/protocol/version.test.ts @@ -0,0 +1,32 @@ +import { describe, expect, it } from "bun:test"; +import { + isVersionAcceptable, + LINK_PROTOCOL_VERSION, + MIN_SUPPORTED_LINK_PROTOCOL, +} from "./version"; + +describe("link protocol version", () => { + it("exposes numeric constants with LINK >= MIN", () => { + expect(typeof LINK_PROTOCOL_VERSION).toBe("number"); + expect(typeof MIN_SUPPORTED_LINK_PROTOCOL).toBe("number"); + expect(LINK_PROTOCOL_VERSION).toBeGreaterThanOrEqual( + MIN_SUPPORTED_LINK_PROTOCOL, + ); + }); + + it("accepts MIN_SUPPORTED_LINK_PROTOCOL", () => { + expect(isVersionAcceptable(MIN_SUPPORTED_LINK_PROTOCOL)).toBe(true); + }); + + it("accepts LINK_PROTOCOL_VERSION", () => { + expect(isVersionAcceptable(LINK_PROTOCOL_VERSION)).toBe(true); + }); + + it("rejects MIN - 1", () => { + expect(isVersionAcceptable(MIN_SUPPORTED_LINK_PROTOCOL - 1)).toBe(false); + }); + + it("rejects 0", () => { + expect(isVersionAcceptable(0)).toBe(false); + }); +}); diff --git a/apps/mesh/src/links/protocol/version.ts b/apps/mesh/src/links/protocol/version.ts new file mode 100644 index 0000000000..be9ee3683d --- /dev/null +++ b/apps/mesh/src/links/protocol/version.ts @@ -0,0 +1,20 @@ +/** + * Bumped manually whenever a wire-breaking change ships: + * - HarnessStreamInput field added/removed in a way old links can't ignore + * - SSE dispatch event shape change + * - Registration payload schema change + * - Daemon route shape change + * - AI SDK major version bump (cluster & link pin in lockstep) + */ +export const LINK_PROTOCOL_VERSION = 1; + +/** + * Cluster rejects link registrations below this with 426. Links MUST + * upgrade. Bumped when an older version becomes too costly to support + * (typically every 2-3 majors). + */ +export const MIN_SUPPORTED_LINK_PROTOCOL = 1; + +export function isVersionAcceptable(reported: number): boolean { + return reported >= MIN_SUPPORTED_LINK_PROTOCOL; +} diff --git a/apps/mesh/src/links/resolve-dispatch-target.test.ts b/apps/mesh/src/links/resolve-dispatch-target.test.ts new file mode 100644 index 0000000000..d69ff80ffc --- /dev/null +++ b/apps/mesh/src/links/resolve-dispatch-target.test.ts @@ -0,0 +1,86 @@ +import { describe, expect, test } from "bun:test"; +import { resolveDispatchTarget } from "./resolve-dispatch-target"; +import type { VmMapEntry } from "@decocms/mesh-sdk"; +import type { LinkRegistry } from "./link-registry"; +import type { LinkEntry } from "./protocol"; + +const linkOnline = ( + caps: string[] = ["claude-code", "codex", "decopilot-sandbox"], +): LinkEntry => + ({ + tunnelUrl: "https://t.example", + linkSecret: "s", + capabilities: caps, + }) as LinkEntry; + +const stubRegistry = (link: LinkEntry | null): LinkRegistry => + ({ get: async () => link }) as unknown as LinkRegistry; + +const cloudVm = ( + kind: "docker" | "freestyle" | "agent-sandbox" = "docker", +): VmMapEntry => + ({ vmId: "v", previewUrl: null, sandboxProviderKind: kind }) as VmMapEntry; + +const localVm = (): VmMapEntry => + ({ + vmId: "v", + previewUrl: null, + sandboxProviderKind: "remote-user", + }) as VmMapEntry; + +describe("resolveDispatchTarget", () => { + test("cloud VM + any harness → local/default", async () => { + const t = await resolveDispatchTarget( + { harnessId: "claude-code", vm: cloudVm(), userId: "u" }, + { linkRegistry: stubRegistry(null) }, + ); + expect(t.kind).toBe("local"); + if (t.kind === "local") expect(t.sandbox).toBe("default"); + }); + + test("remote-user VM + decopilot + link online → local/remote-user", async () => { + const t = await resolveDispatchTarget( + { harnessId: "decopilot", vm: localVm(), userId: "u" }, + { linkRegistry: stubRegistry(linkOnline()) }, + ); + expect(t.kind).toBe("local"); + if (t.kind === "local") expect(t.sandbox).toBe("remote-user"); + }); + + test("remote-user VM + claude-code + link online → remote-cli", async () => { + const t = await resolveDispatchTarget( + { harnessId: "claude-code", vm: localVm(), userId: "u" }, + { linkRegistry: stubRegistry(linkOnline()) }, + ); + expect(t.kind).toBe("remote-cli"); + }); + + test("remote-user VM + codex + link online → remote-cli", async () => { + const t = await resolveDispatchTarget( + { harnessId: "codex", vm: localVm(), userId: "u" }, + { linkRegistry: stubRegistry(linkOnline()) }, + ); + expect(t.kind).toBe("remote-cli"); + }); + + test("remote-user VM + link offline → error/link_offline", async () => { + const t = await resolveDispatchTarget( + { harnessId: "claude-code", vm: localVm(), userId: "u" }, + { linkRegistry: stubRegistry(null) }, + ); + expect(t.kind).toBe("error"); + if (t.kind === "error") expect(t.reason).toBe("link_offline"); + }); + + test("remote-user VM + link missing capability → error/capability_missing", async () => { + const t = await resolveDispatchTarget( + { harnessId: "claude-code", vm: localVm(), userId: "u" }, + { linkRegistry: stubRegistry(linkOnline(["decopilot-sandbox"])) }, + ); + expect(t.kind).toBe("error"); + if (t.kind === "error") { + expect(t.reason).toBe("capability_missing"); + expect(t.activeCapabilities).toEqual(["decopilot-sandbox"]); + } + }); +}); diff --git a/apps/mesh/src/links/resolve-dispatch-target.ts b/apps/mesh/src/links/resolve-dispatch-target.ts new file mode 100644 index 0000000000..823069406a --- /dev/null +++ b/apps/mesh/src/links/resolve-dispatch-target.ts @@ -0,0 +1,70 @@ +/** + * Resolve where a dispatch should execute, from the harness and the VM entry + * the user has selected for this (virtualMcpId, branch). + * + * The VM entry's `sandboxProviderKind` is the single source of truth: + * - cloud kind (docker/freestyle/agent-sandbox) → cluster default sandbox + * - `remote-user` + decopilot → cluster decopilot, sandbox tools tunneled + * - `remote-user` + claude-code/codex → whole stream dispatched to the laptop + * + * Link health is checked only for `remote-user` VMs. Offline/missing-capability + * paths return an `error` target which `POST /messages` surfaces as 409. + */ +import type { VmMapEntry } from "@decocms/mesh-sdk"; +import type { Capability, LinkEntry } from "./protocol"; +import type { LinkRegistry } from "./link-registry"; +import type { HarnessId } from "../harnesses"; + +export type DispatchTarget = + | { + kind: "error"; + reason: "link_offline" | "capability_missing"; + activeCapabilities?: string[]; + } + | { kind: "local"; sandbox: "default" | "remote-user"; link?: LinkEntry } + | { kind: "remote-cli"; link: LinkEntry }; + +interface Input { + harnessId: HarnessId; + vm: VmMapEntry; + userId: string; +} + +interface Deps { + linkRegistry: LinkRegistry; +} + +function capabilityFor(harnessId: HarnessId): Capability | null { + if (harnessId === "claude-code") return "claude-code"; + if (harnessId === "codex") return "codex"; + if (harnessId === "decopilot") return "decopilot-sandbox"; + return null; +} + +export async function resolveDispatchTarget( + input: Input, + deps: Deps, +): Promise { + const kind = input.vm.sandboxProviderKind; + + if (kind !== "remote-user") { + return { kind: "local", sandbox: "default" }; + } + + const link = await deps.linkRegistry.get(input.userId); + if (!link) return { kind: "error", reason: "link_offline" }; + + const requiredCap = capabilityFor(input.harnessId); + if (requiredCap && !link.capabilities.includes(requiredCap)) { + return { + kind: "error", + reason: "capability_missing", + activeCapabilities: link.capabilities, + }; + } + + if (input.harnessId === "decopilot") { + return { kind: "local", sandbox: "remote-user", link }; + } + return { kind: "remote-cli", link }; +} diff --git a/apps/mesh/src/links/routes.test.ts b/apps/mesh/src/links/routes.test.ts new file mode 100644 index 0000000000..3e36f286de --- /dev/null +++ b/apps/mesh/src/links/routes.test.ts @@ -0,0 +1,180 @@ +import { describe, expect, it } from "bun:test"; +import { Hono } from "hono"; +import { createInMemoryLinkRegistry } from "./link-registry"; +import { registerLinksRoutes } from "./routes"; + +function buildApp(allowLocalhost = true) { + const registry = createInMemoryLinkRegistry({ nowSeconds: () => 0 }); + const app = new Hono(); + registerLinksRoutes(app, { + linkRegistry: registry, + getAuthenticatedUserSub: (c) => c.req.header("x-test-sub") ?? null, + allowLocalhostLinks: allowLocalhost, + }); + return { app, registry }; +} + +describe("POST /api/links", () => { + it("rejects when no session", async () => { + const { app } = buildApp(); + const res = await app.request("/api/links", { + method: "POST", + body: JSON.stringify({ + machineId: "m1", + cliVersion: "1.0.0", + protocolVersion: 1, + capabilities: ["claude-code"], + tunnelUrl: "http://localhost:5174", + }), + headers: { "content-type": "application/json" }, + }); + expect(res.status).toBe(401); + }); + + it("registers and returns linkSecret in dev (localhost)", async () => { + const { app, registry } = buildApp(true); + const res = await app.request("/api/links", { + method: "POST", + body: JSON.stringify({ + machineId: "m1", + cliVersion: "1.0.0", + protocolVersion: 1, + capabilities: ["claude-code"], + tunnelUrl: "http://localhost:5174", + }), + headers: { + "content-type": "application/json", + "x-test-sub": "user-a", + }, + }); + expect(res.status).toBe(200); + const body = (await res.json()) as { linkSecret: string }; + expect(typeof body.linkSecret).toBe("string"); + expect(body.linkSecret.length).toBeGreaterThan(20); + const stored = await registry.get("user-a"); + expect(stored).not.toBeNull(); + // HMAC signing requires symmetric key material — the stored value is the + // RAW secret, identical to what the body returned (see schemas.ts JSDoc). + expect(stored?.linkSecret).toBe(body.linkSecret); + expect(stored?.tunnelUrl).toBe("http://localhost:5174"); + }); + + it("rejects below MIN_SUPPORTED_LINK_PROTOCOL", async () => { + const { app } = buildApp(); + const res = await app.request("/api/links", { + method: "POST", + body: JSON.stringify({ + machineId: "m1", + cliVersion: "1.0.0", + protocolVersion: 0, + capabilities: ["claude-code"], + tunnelUrl: "http://localhost:5174", + }), + headers: { + "content-type": "application/json", + "x-test-sub": "user-a", + }, + }); + expect(res.status).toBe(426); + }); + + it("rejects 409 when another machineId is active", async () => { + const { app } = buildApp(); + const first = await app.request("/api/links", { + method: "POST", + body: JSON.stringify({ + machineId: "m1", + cliVersion: "1.0.0", + protocolVersion: 1, + capabilities: ["claude-code"], + tunnelUrl: "http://localhost:5174", + }), + headers: { + "content-type": "application/json", + "x-test-sub": "user-a", + }, + }); + expect(first.status).toBe(200); + const res = await app.request("/api/links", { + method: "POST", + body: JSON.stringify({ + machineId: "m2", + cliVersion: "1.0.0", + protocolVersion: 1, + capabilities: ["claude-code"], + tunnelUrl: "http://localhost:5174", + }), + headers: { + "content-type": "application/json", + "x-test-sub": "user-a", + }, + }); + expect(res.status).toBe(409); + }); +}); + +describe("POST /api/links/heartbeat", () => { + it("returns 204 on valid linkSecret", async () => { + const { app } = buildApp(true); + const reg = await app.request("/api/links", { + method: "POST", + body: JSON.stringify({ + machineId: "m1", + cliVersion: "1.0.0", + protocolVersion: 1, + capabilities: ["claude-code"], + tunnelUrl: "http://localhost:5174", + }), + headers: { "content-type": "application/json", "x-test-sub": "user-a" }, + }); + expect(reg.status).toBe(200); + const { linkSecret } = (await reg.json()) as { linkSecret: string }; + + // Heartbeat carries userSub in X-Mesh-User-Sub and the secret in + // X-Link-Secret (not Authorization: Bearer) so it never enters Better + // Auth's API-key validator on the cluster. + const hb = await app.request("/api/links/heartbeat", { + method: "POST", + headers: { + "x-link-secret": linkSecret, + "x-mesh-user-sub": "user-a", + }, + }); + expect(hb.status).toBe(204); + }); + + it("returns 401 on wrong linkSecret", async () => { + const { app } = buildApp(true); + await app.request("/api/links", { + method: "POST", + body: JSON.stringify({ + machineId: "m1", + cliVersion: "1.0.0", + protocolVersion: 1, + capabilities: ["claude-code"], + tunnelUrl: "http://localhost:5174", + }), + headers: { "content-type": "application/json", "x-test-sub": "user-a" }, + }); + + const hb = await app.request("/api/links/heartbeat", { + method: "POST", + headers: { + "x-link-secret": "not-the-real-secret", + "x-mesh-user-sub": "user-a", + }, + }); + expect(hb.status).toBe(401); + }); +}); + +describe("GET /api/links/me", () => { + it("returns offline when no link", async () => { + const { app } = buildApp(); + const res = await app.request("/api/links/me", { + headers: { "x-test-sub": "user-a" }, + }); + const body = (await res.json()) as { status: string }; + expect(body.status).toBe("offline"); + }); +}); diff --git a/apps/mesh/src/links/routes.ts b/apps/mesh/src/links/routes.ts new file mode 100644 index 0000000000..edc8581451 --- /dev/null +++ b/apps/mesh/src/links/routes.ts @@ -0,0 +1,211 @@ +/** + * `/api/links/*` HTTP routes + * + * Implements the four endpoints from the link spec: + * - `POST /api/links` (register) — session-authed; mints a fresh + * `linkSecret`, persists the RAW value in the registry, returns it to + * the caller. Returns 409 if another `machineId` already has an + * active entry for this user. + * - `POST /api/links/heartbeat` — authed via the `X-Link-Secret` header + * against the stored `linkSecret`; refreshes the entry's TTL. + * - `DELETE /api/links/me` — same `X-Link-Secret` auth as heartbeat; + * graceful shutdown. + * + * The link daemon presents its `linkSecret` in `X-Link-Secret` rather than + * `Authorization: Bearer …` so it never enters Better Auth's API-key + * validator (which logs `INVALID_API_KEY` for every unknown bearer it + * sees — a per-heartbeat false positive when the secret isn't actually + * an API key). + * - `GET /api/links/me` — session-authed; status for the admin UI. + * + * The `linkSecret` field stored in `LinkRegistry` is the RAW bearer + * secret. HMAC signing requires symmetric key material on both sides; a + * hash-at-rest construction is impractical without shipping the cluster + * signing key to the link binary (which defeats the point of HMAC). v1 + * accepts that NATS operators within the cluster's trust boundary can + * see working bearer tokens; mitigations are (a) 30s TTL bounds the leak + * window, (b) rotation = re-register. v2 hardening will encrypt at rest + * with a cluster KMS key. + */ +import { randomBytes, timingSafeEqual } from "node:crypto"; +import { + LINK_PROTOCOL_VERSION, + MIN_SUPPORTED_LINK_PROTOCOL, + type LinkEntry, + isVersionAcceptable, + registrationPayloadSchema, +} from "./protocol"; +import type { Env, Hono } from "hono"; +import type { BlankSchema } from "hono/types"; +import type { Context } from "hono"; +import { HTTPException } from "hono/http-exception"; +import type { LinkRegistry } from "./link-registry"; + +export interface LinksRoutesDeps { + linkRegistry: LinkRegistry; + /** + * Pluggable so tests can inject a session. Returns userSub (the stable + * Better Auth user id) or null when unauthenticated. + */ + getAuthenticatedUserSub: (c: Context) => string | null; + /** When true, accept `tunnelUrl: http://localhost:*` from the body. */ + allowLocalhostLinks: boolean; +} + +function expectedTunnelDomain(userSub: string): string { + return `https://link-${userSub}.deco.host`; +} + +function timingSafeEqualStrings(a: string, b: string): boolean { + if (a.length !== b.length) return false; + return timingSafeEqual(Buffer.from(a, "utf8"), Buffer.from(b, "utf8")); +} + +function isLocalhostUrl(raw: string): boolean { + try { + return new URL(raw).hostname === "localhost"; + } catch { + return false; + } +} + +export function registerLinksRoutes( + app: Hono, + deps: LinksRoutesDeps, +): void { + // POST /api/links — register + app.post("/api/links", async (c) => { + const userSub = deps.getAuthenticatedUserSub(c); + if (!userSub) throw new HTTPException(401, { message: "no session" }); + + let raw: unknown; + try { + raw = await c.req.json(); + } catch { + throw new HTTPException(400, { message: "invalid json" }); + } + const parsed = registrationPayloadSchema.safeParse(raw); + if (!parsed.success) { + throw new HTTPException(400, { message: parsed.error.message }); + } + const payload = parsed.data; + + if (!isVersionAcceptable(payload.protocolVersion)) { + return c.json( + { + code: "upgrade_required", + requiredVersion: MIN_SUPPORTED_LINK_PROTOCOL, + installHint: "bunx decocms@latest link", + }, + 426, + ); + } + + // Determine the canonical tunnel URL. + // - Prod: derived from the authenticated userSub (body is ignored). + // - Dev (allowLocalhostLinks=1): honor a `http://localhost:*` body + // value so the link daemon can advertise the dev reverse proxy. + const tunnelUrl: string = (() => { + if ( + payload.tunnelUrl && + isLocalhostUrl(payload.tunnelUrl) && + deps.allowLocalhostLinks + ) { + return payload.tunnelUrl; + } + return expectedTunnelDomain(userSub); + })(); + + // 409 if a different machineId is already active. + const existing = await deps.linkRegistry.get(userSub); + if (existing && existing.machineId !== payload.machineId) { + return c.json( + { code: "another_machine_active", activeMachineId: existing.machineId }, + 409, + ); + } + + // Re-registering with the same machineId mints a fresh linkSecret and + // overwrites the entry. The previous secret is immediately invalidated — + // any in-flight heartbeat from an older link process will start 401-ing + // after this point. Documented behavior: "rotation = re-register" + // (link-protocol schemas.ts, linkSecret JSDoc). + const linkSecretRaw = randomBytes(32).toString("base64url"); + + const entry: LinkEntry = { + machineId: payload.machineId, + tunnelUrl, + linkSecret: linkSecretRaw, + cliVersion: payload.cliVersion, + protocolVersion: payload.protocolVersion, + capabilities: payload.capabilities, + createdAt: existing?.createdAt ?? new Date().toISOString(), + }; + + await deps.linkRegistry.put(userSub, entry); + return c.json({ linkSecret: linkSecretRaw }); + }); + + // POST /api/links/heartbeat — authenticated by X-Link-Secret + X-Mesh-User-Sub + // + // The link binary loses its OAuth session after registration (it doesn't + // hold an active session cookie or API key). Heartbeat identifies the user + // via the X-Mesh-User-Sub header and proves identity via the linkSecret + // presented in X-Link-Secret. The cluster verifies it matches the stored + // value for that userSub. + app.post("/api/links/heartbeat", async (c) => { + const userSub = c.req.header("x-mesh-user-sub"); + if (!userSub) { + throw new HTTPException(400, { message: "missing X-Mesh-User-Sub" }); + } + const presented = c.req.header("x-link-secret"); + if (!presented) { + throw new HTTPException(401, { message: "missing X-Link-Secret" }); + } + + const existing = await deps.linkRegistry.get(userSub); + if (!existing) throw new HTTPException(401, { message: "no link" }); + if (!timingSafeEqualStrings(existing.linkSecret, presented)) { + throw new HTTPException(401, { message: "bad secret" }); + } + + // Re-put refreshes the TTL. + await deps.linkRegistry.put(userSub, existing); + return c.body(null, 204); + }); + + // DELETE /api/links/me — graceful shutdown, same auth model as heartbeat + app.delete("/api/links/me", async (c) => { + const userSub = c.req.header("x-mesh-user-sub"); + if (!userSub) { + throw new HTTPException(400, { message: "missing X-Mesh-User-Sub" }); + } + const presented = c.req.header("x-link-secret"); + if (!presented) { + throw new HTTPException(401, { message: "missing X-Link-Secret" }); + } + const existing = await deps.linkRegistry.get(userSub); + if (!existing) return c.body(null, 204); // idempotent — already gone + if (!timingSafeEqualStrings(existing.linkSecret, presented)) { + throw new HTTPException(401, { message: "bad secret" }); + } + await deps.linkRegistry.delete(userSub); + return c.body(null, 204); + }); + + // GET /api/links/me — UI status + app.get("/api/links/me", async (c) => { + const userSub = deps.getAuthenticatedUserSub(c); + if (!userSub) throw new HTTPException(401, { message: "no session" }); + const link = await deps.linkRegistry.get(userSub); + if (!link) return c.json({ status: "offline" }); + return c.json({ + status: "online", + capabilities: link.capabilities, + machineId: link.machineId, + cliVersion: link.cliVersion, + currentProtocolVersion: LINK_PROTOCOL_VERSION, + reportedProtocolVersion: link.protocolVersion, + }); + }); +} diff --git a/apps/mesh/src/sandbox/claim-handle.ts b/apps/mesh/src/sandbox/claim-handle.ts index fb5ebc4ba7..426d248ce9 100644 --- a/apps/mesh/src/sandbox/claim-handle.ts +++ b/apps/mesh/src/sandbox/claim-handle.ts @@ -1,8 +1,8 @@ import { computeHandle, - resolveRunnerKindFromEnv, + resolveSandboxProviderKindFromEnv, type SandboxId, -} from "@decocms/sandbox/runner"; +} from "@decocms/sandbox/provider"; /** * Compute the claim handle for a sandbox using the correct hashLen for the @@ -14,10 +14,10 @@ import { * what a runner stored (vm-events, vm-exec, etc.). */ export function computeClaimHandle(id: SandboxId, branch: string): string { - const runnerKind = resolveRunnerKindFromEnv(); + const providerKind = resolveSandboxProviderKindFromEnv(); return computeHandle( id, branch, - runnerKind === "agent-sandbox" ? { hashLen: 16 } : {}, + providerKind === "agent-sandbox" ? { hashLen: 16 } : {}, ); } diff --git a/apps/mesh/src/sandbox/lifecycle.test.ts b/apps/mesh/src/sandbox/lifecycle.test.ts index a8aa0634ff..b0e5c17ff4 100644 --- a/apps/mesh/src/sandbox/lifecycle.test.ts +++ b/apps/mesh/src/sandbox/lifecycle.test.ts @@ -1,19 +1,19 @@ import { afterEach, beforeEach, describe, expect, it } from "bun:test"; import { - DockerSandboxRunner, + DockerSandboxProvider, type ClaimPhase, - type SandboxRunner, -} from "@decocms/sandbox/runner"; + type SandboxProvider, +} from "@decocms/sandbox/provider"; import type { MeshContext } from "@/core/mesh-context"; import { __resetSharedLifecyclesForTesting, asDockerRunner, - getRunnerByKind, + getSandboxProviderByKind, subscribeLifecycle, } from "./lifecycle"; // Minimal MeshContext stub — lifecycle only reads ctx.db, and only to hand -// it to the KyselySandboxRunnerStateStore constructor (no queries run until +// it to the KyselySandboxProviderStateStore constructor (no queries run until // an actual ensure/delete call). const stubCtx = { db: {} } as unknown as MeshContext; @@ -22,14 +22,14 @@ describe("asDockerRunner", () => { expect(asDockerRunner(null)).toBeNull(); }); - it("returns the instance unchanged for a DockerSandboxRunner", () => { - const runner = new DockerSandboxRunner(); + it("returns the instance unchanged for a DockerSandboxProvider", () => { + const runner = new DockerSandboxProvider(); expect(asDockerRunner(runner)).toBe(runner); }); it("returns null for a non-Docker runner", () => { - // Duck-typed non-Docker runner — satisfies the SandboxRunner shape but - // isn't a DockerSandboxRunner instance, so instanceof narrows to null. + // Duck-typed non-Docker runner — satisfies the SandboxProvider shape but + // isn't a DockerSandboxProvider instance, so instanceof narrows to null. const fake = { kind: "agent-sandbox" as const, ensure: async () => ({ handle: "h", workdir: "/app", previewUrl: null }), @@ -49,7 +49,7 @@ describe("asDockerRunner", () => { }); }); -describe("getRunnerByKind caching", () => { +describe("getSandboxProviderByKind caching", () => { // The `runners` cache lives at module scope, so a kind cached by one test // leaks into later tests. Isolate by claiming a kind once per suite and // asserting identity within the same test only. @@ -61,11 +61,11 @@ describe("getRunnerByKind caching", () => { afterEach(() => {}); - it("returns the same DockerSandboxRunner instance across calls", async () => { - const a = await getRunnerByKind(stubCtx, "docker"); - const b = await getRunnerByKind(stubCtx, "docker"); + it("returns the same DockerSandboxProvider instance across calls", async () => { + const a = await getSandboxProviderByKind(stubCtx, "docker"); + const b = await getSandboxProviderByKind(stubCtx, "docker"); expect(a).toBe(b); - expect(a).toBeInstanceOf(DockerSandboxRunner); + expect(a).toBeInstanceOf(DockerSandboxProvider); }); }); @@ -74,7 +74,7 @@ describe("getRunnerByKind caching", () => { // --------------------------------------------------------------------------- interface FakeWatchableHandle { - runner: SandboxRunner; + runner: SandboxProvider; /** How many times the source generator has been started. */ starts: () => number; /** Push a phase to the active source generator. */ @@ -84,7 +84,7 @@ interface FakeWatchableHandle { } /** - * Synthesize a `SandboxRunner` whose `watchClaimLifecycle` is an async + * Synthesize a `SandboxProvider` whose `watchClaimLifecycle` is an async * generator we can drive frame-by-frame from the test. The other interface * methods are no-ops; only the watcher is exercised here. Tracks how many * times the generator has been instantiated (so we can prove dedup). @@ -118,7 +118,7 @@ function makeFakeWatchable(): FakeWatchableHandle { } } - const runner: SandboxRunner = { + const runner: SandboxProvider = { kind: "agent-sandbox", ensure: async () => ({ handle: "h", workdir: "/app", previewUrl: null }), exec: async () => ({ diff --git a/apps/mesh/src/sandbox/lifecycle.ts b/apps/mesh/src/sandbox/lifecycle.ts index 10fb6f9d01..0ba8a8730c 100644 --- a/apps/mesh/src/sandbox/lifecycle.ts +++ b/apps/mesh/src/sandbox/lifecycle.ts @@ -1,6 +1,6 @@ /** * Runner singletons, one per kind. VM_DELETE dispatches on the entry's - * recorded runnerKind (not env), so a pod that flipped STUDIO_SANDBOX_RUNNER + * recorded sandboxProviderKind (not env), so a pod that flipped STUDIO_SANDBOX_RUNNER * between start and stop still tears down the right kind of VM. * Boot/shutdown sweeps are Docker-only — other runners' sandboxes outlive * mesh by design, so a generic sweep would nuke active user VMs. @@ -8,22 +8,22 @@ import type { MeshContext } from "@/core/mesh-context"; import { - DockerSandboxRunner, - resolveRunnerKindFromEnv, - type RunnerKind, - type SandboxRunner, -} from "@decocms/sandbox/runner"; -import type { ClaimPhase } from "@decocms/sandbox/runner/agent-sandbox"; + DockerSandboxProvider, + resolveSandboxProviderKindFromEnv, + type SandboxProviderKind, + type SandboxProvider, +} from "@decocms/sandbox/provider"; +import type { ClaimPhase } from "@decocms/sandbox/provider/agent-sandbox"; import { getDb } from "@/database"; import type { Kysely } from "kysely"; import { meter } from "@/observability"; import type { Database as DatabaseSchema } from "@/storage/types"; -import { KyselySandboxRunnerStateStore } from "@/storage/sandbox-runner-state"; +import { KyselySandboxProviderStateStore } from "@/storage/sandbox-runner-state"; // Stashed on globalThis so they survive Bun's `--hot` reload. The local // sandbox ingress is a long-lived `net.Server` registered at the top of // `apps/mesh/src/index.ts`; it isn't torn down when the entry point -// re-evaluates, and its closure captures `getSharedRunnerIfInit` from +// re-evaluates, and its closure captures `getSharedSandboxProviderIfInit` from // whichever instance of this module was active at boot. Without the // global anchor, post-reload requests to `.localhost:7070` would // look up runners in a stale module's empty map → 503 "Sandbox Runner @@ -31,26 +31,27 @@ import { KyselySandboxRunnerStateStore } from "@/storage/sandbox-runner-state"; const RUNNERS_KEY = Symbol.for("decocms.sandbox.lifecycle.runners"); const INFLIGHT_KEY = Symbol.for("decocms.sandbox.lifecycle.inflight"); type LifecycleGlobal = { - [RUNNERS_KEY]?: Partial>; - [INFLIGHT_KEY]?: Partial>>; + [RUNNERS_KEY]?: Partial>; + [INFLIGHT_KEY]?: Partial< + Record> + >; }; const lifecycleGlobal = globalThis as unknown as LifecycleGlobal; -const runners: Partial> = (lifecycleGlobal[ - RUNNERS_KEY -] ??= {}); +const runners: Partial> = + (lifecycleGlobal[RUNNERS_KEY] ??= {}); // In-flight instantiate() promises, memoized per kind. Two concurrent // callers on a cold mesh would otherwise both miss the resolved-runner // cache and both call instantiate(); memoizing the promise (and only // promoting to `runners` once it resolves) collapses them to a single // build. Cleared on failure so a retry can take a fresh swing. -const inflight: Partial>> = +const inflight: Partial>> = (lifecycleGlobal[INFLIGHT_KEY] ??= {}); function resolveOnce( - kind: RunnerKind, - build: () => Promise, -): Promise { + kind: SandboxProviderKind, + build: () => Promise, +): Promise { const cached = runners[kind]; if (cached) return Promise.resolve(cached); const pending = inflight[kind]; @@ -78,7 +79,7 @@ function readPreviewUrlPattern(): string | undefined { // Per-env SandboxTemplate name. The sandbox-env Helm chart suffixes the // template name with envName so multiple envs share `agent-sandbox-system` // without collisions; mesh in this env must point its claims at the -// matching suffixed name. Empty/unset → AgentSandboxRunner's built-in +// matching suffixed name. Empty/unset → AgentSandboxProvider's built-in // default ("studio-sandbox") so single-env installs that didn't suffix // keep working. function readSandboxTemplateName(): string | undefined { @@ -95,7 +96,7 @@ function readEnvName(): string | undefined { // sandbox-env helm chart's Secret. Set on the mesh side from the same // Secret so both ends agree on what the warm-pool sentinel is. // -// Presence flips AgentSandboxRunner into warm-pool mode (claims with +// Presence flips AgentSandboxProvider into warm-pool mode (claims with // `warmpool: "default"` + empty env; per-claim token rotated post-bind). // Empty/unset → legacy cold-start path with per-claim env injection. function readSandboxSentinelToken(): string | undefined { @@ -133,34 +134,25 @@ function readPreviewGateway(): { name: string; namespace: string } | undefined { } async function instantiate( - kind: RunnerKind, + kind: SandboxProviderKind, db: Kysely, -): Promise { - const stateStore = new KyselySandboxRunnerStateStore(db); +): Promise { + const stateStore = new KyselySandboxProviderStateStore(db); const previewUrlPattern = readPreviewUrlPattern(); switch (kind) { - case "host": { - const { HostSandboxRunner } = await import("@decocms/sandbox/runner"); - const { getSettings } = await import("@/settings"); - return new HostSandboxRunner({ - homeDir: getSettings().dataDir, - stateStore, - previewUrlPattern, - }); - } case "docker": - return new DockerSandboxRunner({ stateStore, previewUrlPattern }); + return new DockerSandboxProvider({ stateStore, previewUrlPattern }); case "agent-sandbox": { // Dynamic import — @kubernetes/client-node is heavy and only needed // when STUDIO_SANDBOX_RUNNER=agent-sandbox. Docker deploys never // load it. - const { AgentSandboxRunner } = await import( - "@decocms/sandbox/runner/agent-sandbox" + const { AgentSandboxProvider } = await import( + "@decocms/sandbox/provider/agent-sandbox" ); // `meter` is reassigned by initObservability() after sdk.start(); read // it at runner construction (post-init) so we get the real instruments // not the no-op evaluated at module load. - return new AgentSandboxRunner({ + return new AgentSandboxProvider({ stateStore, previewUrlPattern, sandboxTemplateName: readSandboxTemplateName(), @@ -170,6 +162,17 @@ async function instantiate( meter, }); } + case "remote-user": { + // remote-user is never the cluster-wide default — there is no + // ambient `LinkEntry` to bind to here. It is constructed per-run by + // `getSharedSandboxProvider` from `ctx.linkForCurrentRun`. Hitting + // this branch means VM_DELETE was called for a `remote-user` row + // without a live link context, which today should not happen (the + // remote-user provider doesn't write to `sandbox_runner_state`). + throw new Error( + "remote-user runner cannot be instantiated without a per-run LinkEntry — call getSharedSandboxProvider with ctx.linkForCurrentRun set.", + ); + } default: { const exhaustive: never = kind; throw new Error(`Unknown runner kind: ${String(exhaustive)}`); @@ -177,15 +180,71 @@ async function instantiate( } } -export function getSharedRunner(ctx: MeshContext): Promise { - return getRunnerByKind(ctx, resolveRunnerKindFromEnv()); +export async function getSharedSandboxProvider( + ctx: MeshContext, +): Promise { + // Per-run override: decopilot runs whose dispatch target is the user's + // laptop forward Code Sandbox tool calls to the link daemon instead of + // the cluster-managed runner. We build a fresh provider per call because + // its only state is an in-memory (handle → sandboxUrl) map and the link + // identity is per-run — caching across runs would mix sandboxes from + // different users. + if ( + ctx.sandboxPreference === "remote-user" && + ctx.linkForCurrentRun !== undefined + ) { + return buildRemoteUserProvider(ctx, ctx.linkForCurrentRun); + } + + const kind = resolveSandboxProviderKindFromEnv(); + // Auto-resolve for tools that don't go through `prepareRun` (VM_START, + // ensureVmForBranch, sandbox preview/event routes). When env says + // `remote-user`, there is no cluster-side singleton to fall back on — + // the provider is per-user. Resolve the acting user's link from the + // registry here so callers don't each have to do it. + if (kind === "remote-user") { + const userId = ctx.auth.user?.id; + if (!userId) { + throw new Error( + "remote-user sandbox provider requires an authenticated user — got an unauthenticated MeshContext.", + ); + } + if (!ctx.linkRegistry) { + throw new Error( + "remote-user sandbox provider requires ctx.linkRegistry to be wired (set on MeshContextConfig).", + ); + } + const link = await ctx.linkRegistry.get(userId); + if (!link) { + throw new Error( + `No link daemon registered for user "${userId}". Start one with \`deco link\` (or run \`bun run dev --local-sandbox-provider\` for dev).`, + ); + } + return buildRemoteUserProvider(ctx, link); + } + + return getSandboxProviderByKind(ctx, kind); +} + +async function buildRemoteUserProvider( + ctx: MeshContext, + link: NonNullable, +): Promise { + const { RemoteUserSandboxProvider } = await import( + "@decocms/sandbox/provider/remote-user" + ); + const stateStore = new KyselySandboxProviderStateStore(ctx.db); + return new RemoteUserSandboxProvider({ + link: { tunnelUrl: link.tunnelUrl, linkSecret: link.linkSecret }, + stateStore, + }); } -/** VM_DELETE uses this so teardown follows the entry's recorded runnerKind. */ -export function getRunnerByKind( +/** VM_DELETE uses this so teardown follows the entry's recorded sandboxProviderKind. */ +export function getSandboxProviderByKind( ctx: MeshContext, - kind: RunnerKind, -): Promise { + kind: SandboxProviderKind, +): Promise { return resolveOnce(kind, () => instantiate(kind, ctx.db)); } @@ -196,10 +255,10 @@ export function getRunnerByKind( * MeshContext (the state store only needs a Kysely instance). Returns null * when no runner kind is configured. */ -export async function getOrInitSharedRunner(): Promise { - let kind: RunnerKind; +export async function getOrInitSharedRunner(): Promise { + let kind: SandboxProviderKind; try { - kind = resolveRunnerKindFromEnv(); + kind = resolveSandboxProviderKindFromEnv(); } catch (err) { console.warn( "[lifecycle] cannot resolve sandbox runner:", @@ -215,10 +274,10 @@ export async function getOrInitSharedRunner(): Promise { * MeshContext (and DB connection) before any request touches a sandbox. * Returns null if env is unresolved. */ -export function getSharedRunnerIfInit(): SandboxRunner | null { - let kind: RunnerKind; +export function getSharedSandboxProviderIfInit(): SandboxProvider | null { + let kind: SandboxProviderKind; try { - kind = resolveRunnerKindFromEnv(); + kind = resolveSandboxProviderKindFromEnv(); } catch { return null; } @@ -227,9 +286,9 @@ export function getSharedRunnerIfInit(): SandboxRunner | null { /** Narrow to Docker for Docker-only methods (resolveDevPort / resolveDaemonPort). */ export function asDockerRunner( - runner: SandboxRunner | null, -): DockerSandboxRunner | null { - return runner instanceof DockerSandboxRunner ? runner : null; + runner: SandboxProvider | null, +): DockerSandboxProvider | null { + return runner instanceof DockerSandboxProvider ? runner : null; } // --------------------------------------------------------------------------- @@ -293,7 +352,7 @@ export interface LifecycleHandle { * observed (whichever comes first). */ export function subscribeLifecycle( - runner: SandboxRunner, + runner: SandboxProvider, claimName: string, onPhase: (phase: ClaimPhase) => void, ): LifecycleHandle { @@ -367,7 +426,7 @@ function makeUnsubscribeHandle( } async function pumpLifecycleSource( - runner: SandboxRunner, + runner: SandboxProvider, claimName: string, entry: SharedLifecycleEntry, ): Promise { diff --git a/apps/mesh/src/sandbox/preview-proxy.test.ts b/apps/mesh/src/sandbox/preview-proxy.test.ts index eacb9f9566..a35447b121 100644 --- a/apps/mesh/src/sandbox/preview-proxy.test.ts +++ b/apps/mesh/src/sandbox/preview-proxy.test.ts @@ -8,7 +8,7 @@ import { /** * Inline mirror of `applyPreviewPattern` from - * `packages/sandbox/server/runner/shared/preview-url.ts` — kept here as a + * `packages/sandbox/server/provider/shared/preview-url.ts` — kept here as a * fixture so the round-trip test below has no cross-package coupling. If the * real implementation drifts, the round-trip test will fail and force this * mirror to update too. diff --git a/apps/mesh/src/sandbox/preview-proxy.ts b/apps/mesh/src/sandbox/preview-proxy.ts index 430ead220b..e1842343d8 100644 --- a/apps/mesh/src/sandbox/preview-proxy.ts +++ b/apps/mesh/src/sandbox/preview-proxy.ts @@ -17,7 +17,7 @@ * the admin surface stays uncallable from preview hosts. */ -import type { AgentSandboxRunner } from "@decocms/sandbox/runner/agent-sandbox"; +import type { AgentSandboxProvider } from "@decocms/sandbox/provider/agent-sandbox"; /** * Cap on frames buffered between client upgrade and upstream WS open. Vite @@ -94,7 +94,7 @@ export interface PreviewProxyDeps { * the agent-sandbox runner — the caller treats null as "not a preview * deployment" and falls through. */ - getRunner: () => Promise; + getRunner: () => Promise; baseDomain: string; } diff --git a/apps/mesh/src/sandbox/resolve-default-provider-kind.test.ts b/apps/mesh/src/sandbox/resolve-default-provider-kind.test.ts new file mode 100644 index 0000000000..e3506ad9e1 --- /dev/null +++ b/apps/mesh/src/sandbox/resolve-default-provider-kind.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, test } from "bun:test"; +import { resolveDefaultSandboxProviderKind } from "./resolve-default-provider-kind"; +import type { LinkRegistry } from "@/links/link-registry"; +import type { LinkEntry } from "../links/protocol"; + +const linkOnline = ( + caps: string[] = ["claude-code", "codex", "decopilot-sandbox"], +): LinkEntry => + ({ + tunnelUrl: "https://t.example", + linkSecret: "s", + capabilities: caps, + }) as LinkEntry; + +function stubRegistry(link: LinkEntry | null): LinkRegistry { + return { get: async () => link } as unknown as LinkRegistry; +} + +describe("resolveDefaultSandboxProviderKind", () => { + test("returns remote-user when the user's link is online", async () => { + const kind = await resolveDefaultSandboxProviderKind("u-1", { + linkRegistry: stubRegistry(linkOnline()), + resolveEnvKind: () => "docker", + }); + expect(kind).toBe("remote-user"); + }); + + test("falls back to env kind when no link is registered", async () => { + const kind = await resolveDefaultSandboxProviderKind("u-1", { + linkRegistry: stubRegistry(null), + resolveEnvKind: () => "docker", + }); + expect(kind).toBe("docker"); + }); +}); diff --git a/apps/mesh/src/sandbox/resolve-default-provider-kind.ts b/apps/mesh/src/sandbox/resolve-default-provider-kind.ts new file mode 100644 index 0000000000..0d3a32672b --- /dev/null +++ b/apps/mesh/src/sandbox/resolve-default-provider-kind.ts @@ -0,0 +1,27 @@ +/** + * Default sandbox provider kind for `(VM_START, ensureVmForBranch)` when the + * caller hasn't explicitly chosen one. + * + * Policy: + * - link online for this user → "remote-user" + * - otherwise → whatever the env's cluster runner is (docker/freestyle/agent-sandbox) + * + * The link probe is the same one `resolveDispatchTarget` uses, so manual VM + * start (from the branch picker) and auto-start (from VmEventsBridge) agree. + */ +import type { SandboxProviderKind } from "@decocms/sandbox/provider"; +import type { LinkRegistry } from "@/links/link-registry"; + +export interface ResolveDefaultDeps { + linkRegistry: LinkRegistry; + resolveEnvKind: () => SandboxProviderKind; +} + +export async function resolveDefaultSandboxProviderKind( + userId: string, + deps: ResolveDefaultDeps, +): Promise { + const link = await deps.linkRegistry.get(userId); + if (link) return "remote-user"; + return deps.resolveEnvKind(); +} diff --git a/apps/mesh/src/storage/sandbox-runner-state.test.ts b/apps/mesh/src/storage/sandbox-runner-state.test.ts index 3ef925501e..f9a5132f28 100644 --- a/apps/mesh/src/storage/sandbox-runner-state.test.ts +++ b/apps/mesh/src/storage/sandbox-runner-state.test.ts @@ -1,21 +1,21 @@ import { afterAll, beforeAll, describe, expect, it } from "bun:test"; -import type { SandboxId } from "@decocms/sandbox/runner"; +import type { SandboxId } from "@decocms/sandbox/provider"; import { closeTestDatabase, createTestDatabase, type TestDatabase, } from "../database/test-db"; -import { KyselySandboxRunnerStateStore } from "./sandbox-runner-state"; +import { KyselySandboxProviderStateStore } from "./sandbox-runner-state"; import { createTestSchema } from "./test-helpers"; -describe("KyselySandboxRunnerStateStore", () => { +describe("KyselySandboxProviderStateStore", () => { let database: TestDatabase; - let store: KyselySandboxRunnerStateStore; + let store: KyselySandboxProviderStateStore; beforeAll(async () => { database = await createTestDatabase(); await createTestSchema(database.db); - store = new KyselySandboxRunnerStateStore(database.db); + store = new KyselySandboxProviderStateStore(database.db); }); afterAll(async () => { @@ -50,7 +50,7 @@ describe("KyselySandboxRunnerStateStore", () => { expect(row!.updatedAt.getTime()).toBeLessThanOrEqual(Date.now() + 1000); }); - it("put UPSERTs on same (user_id, project_ref, runner_kind)", async () => { + it("put UPSERTs on same (user_id, project_ref, sandbox_provider_kind)", async () => { const id = mkId("upsert"); await store.put(id, "docker", { handle: "upsert-handle-1", @@ -69,7 +69,7 @@ describe("KyselySandboxRunnerStateStore", () => { // Verify only one row exists for this (user, project, kind). const { rows } = await database.pglite.query<{ count: string }>( `SELECT COUNT(*)::text AS count FROM sandbox_runner_state - WHERE user_id = $1 AND project_ref = $2 AND runner_kind = $3`, + WHERE user_id = $1 AND project_ref = $2 AND sandbox_provider_kind = $3`, [id.userId, id.projectRef, "docker"], ); expect(rows[0]!.count).toBe("1"); diff --git a/apps/mesh/src/storage/sandbox-runner-state.ts b/apps/mesh/src/storage/sandbox-runner-state.ts index 107ca2c869..f0773e95f2 100644 --- a/apps/mesh/src/storage/sandbox-runner-state.ts +++ b/apps/mesh/src/storage/sandbox-runner-state.ts @@ -1,7 +1,7 @@ /** * Kysely-backed RunnerStateStore. `state` jsonb is opaque — each runner * serialises its own fields. See - * packages/@decocms/sandbox/server/runner/. + * packages/@decocms/sandbox/server/provider/. * * Method implementations take an explicit executor (db or trx) so the scoped * store handed to `withLock` callbacks can reuse the lock's connection. If @@ -19,7 +19,7 @@ import type { RunnerStateStore, RunnerStateStoreOps, SandboxId, -} from "@decocms/sandbox/runner"; +} from "@decocms/sandbox/provider"; import type { Database } from "./types"; type Executor = Kysely; @@ -49,7 +49,7 @@ async function getRow( .select(["handle", "state", "updated_at"]) .where("user_id", "=", id.userId) .where("project_ref", "=", id.projectRef) - .where("runner_kind", "=", kind) + .where("sandbox_provider_kind", "=", kind) .executeTakeFirst(); if (!row) return null; return { @@ -67,7 +67,7 @@ async function getByHandleRow( const row = await exec .selectFrom("sandbox_runner_state") .select(["user_id", "project_ref", "handle", "state", "updated_at"]) - .where("runner_kind", "=", kind) + .where("sandbox_provider_kind", "=", kind) .where("handle", "=", handle) .executeTakeFirst(); if (!row) return null; @@ -92,17 +92,19 @@ async function putRow( .values({ user_id: id.userId, project_ref: id.projectRef, - runner_kind: kind, + sandbox_provider_kind: kind, handle: entry.handle, state: stateJson, updated_at: now, }) .onConflict((oc) => - oc.columns(["user_id", "project_ref", "runner_kind"]).doUpdateSet({ - handle: entry.handle, - state: stateJson, - updated_at: now, - }), + oc + .columns(["user_id", "project_ref", "sandbox_provider_kind"]) + .doUpdateSet({ + handle: entry.handle, + state: stateJson, + updated_at: now, + }), ) .execute(); } @@ -116,7 +118,7 @@ async function deleteRow( .deleteFrom("sandbox_runner_state") .where("user_id", "=", id.userId) .where("project_ref", "=", id.projectRef) - .where("runner_kind", "=", kind) + .where("sandbox_provider_kind", "=", kind) .execute(); } @@ -127,7 +129,7 @@ async function deleteByHandleRow( ): Promise { await exec .deleteFrom("sandbox_runner_state") - .where("runner_kind", "=", kind) + .where("sandbox_provider_kind", "=", kind) .where("handle", "=", handle) .execute(); } @@ -142,7 +144,7 @@ function scopedStore(exec: Executor): RunnerStateStoreOps { }; } -export class KyselySandboxRunnerStateStore implements RunnerStateStore { +export class KyselySandboxProviderStateStore implements RunnerStateStore { constructor(private db: Kysely) {} get(id: SandboxId, kind: string): Promise { diff --git a/apps/mesh/src/storage/threads.ts b/apps/mesh/src/storage/threads.ts index 48d9380a0b..087e496159 100644 --- a/apps/mesh/src/storage/threads.ts +++ b/apps/mesh/src/storage/threads.ts @@ -165,6 +165,8 @@ export class SqlThreadStorage implements ThreadStoragePort { trigger_id: data.trigger_id ?? null, virtual_mcp_id: data.virtual_mcp_id ?? "", branch: data.branch ?? null, + sandbox_provider_kind: null, + harness_id: null, created_at: now, updated_at: now, created_by: data.created_by, @@ -253,7 +255,12 @@ export class SqlThreadStorage implements ThreadStoragePort { if (data.branch !== undefined) { updateData.branch = data.branch; } - + if (data.sandbox_provider_kind !== undefined) { + updateData.sandbox_provider_kind = data.sandbox_provider_kind; + } + if (data.harness_id !== undefined) { + updateData.harness_id = data.harness_id; + } await this.db .updateTable("threads") .set(updateData) @@ -710,6 +717,8 @@ export class SqlThreadStorage implements ThreadStoragePort { run_started_at?: Date | string | null; virtual_mcp_id?: string | null; branch?: string | null; + sandbox_provider_kind?: string | null; + harness_id?: string | null; metadata?: ThreadMetadata | string | null; created_at: Date | string; updated_at: Date | string; @@ -749,6 +758,8 @@ export class SqlThreadStorage implements ThreadStoragePort { : null, virtual_mcp_id: row.virtual_mcp_id ?? "", branch: row.branch ?? null, + sandbox_provider_kind: row.sandbox_provider_kind ?? null, + harness_id: row.harness_id ?? null, metadata, created_at: toIsoString(row.created_at), updated_at: toIsoString(row.updated_at), diff --git a/apps/mesh/src/storage/types.ts b/apps/mesh/src/storage/types.ts index 8887be905d..169d300ca0 100644 --- a/apps/mesh/src/storage/types.ts +++ b/apps/mesh/src/storage/types.ts @@ -816,6 +816,10 @@ export interface ThreadTable { virtual_mcp_id: string; /** Git branch this thread is pinned to (GitHub-linked virtualmcps only) */ branch: string | null; + /** Sandbox provider kind pinned on first message (e.g. "docker", "freestyle") */ + sandbox_provider_kind: string | null; + /** Harness id pinned on first message (e.g. "claude-code", "codex", "decopilot") */ + harness_id: string | null; /** Per-task UI state (e.g., expanded_tools for right-panel tabs) */ metadata: ColumnType; created_at: ColumnType; @@ -856,6 +860,10 @@ export interface Thread { virtual_mcp_id: string; /** Git branch this thread is pinned to (GitHub-linked virtualmcps only) */ branch: string | null; + /** Sandbox provider kind pinned on first message (e.g. "docker", "freestyle") */ + sandbox_provider_kind: string | null; + /** Harness id pinned on first message (e.g. "claude-code", "codex", "decopilot") */ + harness_id: string | null; metadata: ThreadMetadata; } @@ -1157,10 +1165,10 @@ export interface KVTable { updated_at: ColumnType; } -export interface SandboxRunnerStateTable { +export interface SandboxProviderStateTable { user_id: string; project_ref: string; - runner_kind: string; + sandbox_provider_kind: string; handle: string; state: ColumnType, string, string>; updated_at: ColumnType; @@ -1316,5 +1324,5 @@ export interface Database { // Organization domain claims (for auto-join) organization_domains: OrganizationDomainTable; - sandbox_runner_state: SandboxRunnerStateTable; + sandbox_runner_state: SandboxProviderStateTable; } diff --git a/apps/mesh/src/storage/virtual.ts b/apps/mesh/src/storage/virtual.ts index 1c691b9629..ab7d91f785 100644 --- a/apps/mesh/src/storage/virtual.ts +++ b/apps/mesh/src/storage/virtual.ts @@ -15,6 +15,7 @@ import { generatePrefixedId } from "@/shared/utils/generate-id"; import { getWellKnownDecopilotVirtualMCP, isDecopilot, + normalizeVmMap, } from "@decocms/mesh-sdk"; import type { VirtualMCPCreateData, @@ -492,7 +493,19 @@ export class VirtualMCPStorage implements VirtualMCPStoragePort { const status: "active" | "inactive" = row.status === "active" ? "active" : "inactive"; - const metadata = this.parseJson<{ instructions?: string }>(row.metadata); + const rawMetadata = this.parseJson<{ + instructions?: string; + vmMap?: unknown; + }>(row.metadata); + + // Normalize vmMap into v2 shape on read. Rows written before migration + // 082 actually ran still carry the v1 2-level layout (or `runnerKind` + // entries). The output schema VirtualMCPEntitySchema is strict v2 and + // would reject those without this normalization. Strip `vmMap` from + // the rest spread so its `unknown` type doesn't leak into the result. + const { vmMap: rawVmMap, ...metadataRest } = rawMetadata ?? {}; + const normalizedVmMap = + rawVmMap !== undefined ? normalizeVmMap(rawVmMap) : undefined; return { id: row.id, @@ -507,8 +520,9 @@ export class VirtualMCPStorage implements VirtualMCPStoragePort { created_by: row.created_by, updated_by: row.updated_by ?? undefined, metadata: { - ...metadata, - instructions: metadata?.instructions ?? null, + ...metadataRest, + instructions: rawMetadata?.instructions ?? null, + ...(normalizedVmMap !== undefined ? { vmMap: normalizedVmMap } : {}), }, connections: aggregationRows.map((agg) => ({ connection_id: agg.child_connection_id, diff --git a/apps/mesh/src/test/setup.ts b/apps/mesh/src/test/setup.ts new file mode 100644 index 0000000000..1f6ae75fc2 --- /dev/null +++ b/apps/mesh/src/test/setup.ts @@ -0,0 +1,57 @@ +// Test runtime setup for React component tests. Importing this module +// registers happy-dom globals + jest-dom matchers. Each component test +// MUST also call `setupComponentTest()` at module top-level to enable +// the cross-test DOM cleanup (bun:test doesn't run RTL's cleanup the +// way Jest does, so this is the explicit hook). +import { GlobalRegistrator } from "@happy-dom/global-registrator"; +import { afterEach, expect } from "bun:test"; +import * as matchers from "@testing-library/jest-dom/matchers"; +import { cleanup } from "@testing-library/react"; + +if (!GlobalRegistrator.isRegistered) { + // Snapshot Bun's native stream classes before registration. happy-dom + // overwrites globalThis.TransformStream / WritableStream with its own + // implementations, but leaves ReadableStream alone — that mismatch + // breaks ai-sdk's `stream.pipeThrough(new TransformStream(...))` with + // a "readable should be ReadableStream" TypeError when component tests + // and store/stream tests run in the same `bun test` invocation. Pin + // the native classes back so ai-sdk (and any other native-stream + // consumer) sees a consistent runtime. + const NativeReadableStream = globalThis.ReadableStream; + const NativeWritableStream = globalThis.WritableStream; + const NativeTransformStream = globalThis.TransformStream; + // Pass a URL so modules reading window.location at import time + // (e.g. better-auth/react) don't blow up on `about:blank`. + GlobalRegistrator.register({ url: "http://localhost:4000/" }); + globalThis.ReadableStream = NativeReadableStream; + globalThis.WritableStream = NativeWritableStream; + globalThis.TransformStream = NativeTransformStream; +} +expect.extend(matchers as Parameters[0]); + +// Augment bun:test's `expect()` Matchers with the jest-dom matchers we +// just extended at runtime. The published `@testing-library/jest-dom` +// type augmentations target jest/vitest globals, not bun:test, so +// without this declaration TypeScript reports e.g. +// `Property 'toBeInTheDocument' does not exist on type 'Matchers'`. +declare module "bun:test" { + interface Matchers extends matchers.TestingLibraryMatchers {} + interface AsymmetricMatchers + extends matchers.TestingLibraryMatchers {} +} + +/** + * Call at module top-level in every React component test file to + * register the after-each DOM cleanup in that file's scope. The + * registration must happen synchronously from the test file (not via + * the side-effect import) because bun:test's hooks are scoped to the + * importing file, not the importee. + */ +export function setupComponentTest() { + afterEach(() => { + cleanup(); + if (typeof document !== "undefined") { + document.body.innerHTML = ""; + } + }); +} diff --git a/apps/mesh/src/tools/ai-providers/cli-activate.ts b/apps/mesh/src/tools/ai-providers/cli-activate.ts deleted file mode 100644 index 71457b5192..0000000000 --- a/apps/mesh/src/tools/ai-providers/cli-activate.ts +++ /dev/null @@ -1,104 +0,0 @@ -import z from "zod"; -import { defineTool } from "../../core/define-tool"; -import { requireAuth, requireOrganization } from "../../core/mesh-context"; -import { query } from "@anthropic-ai/claude-agent-sdk"; - -async function activateClaudeCode(): Promise<{ - activated: boolean; - email?: string; - error?: string; -}> { - try { - const q = query({ prompt: "", options: { maxTurns: 1 } }); - const info = await q.accountInfo(); - q.return(undefined); - - if (!info.email) { - return { - activated: false, - error: "Claude Code is not authenticated. Run: claude auth login", - }; - } - return { activated: true, email: info.email }; - } catch { - return { - activated: false, - error: - "Claude Code is not available. Install from https://docs.anthropic.com/en/docs/claude-code/overview", - }; - } -} - -async function activateCodex(): Promise<{ - activated: boolean; - email?: string; - error?: string; -}> { - try { - const proc = Bun.spawn(["codex", "--version"], { - stdout: "pipe", - stderr: "pipe", - }); - - const timeout = setTimeout(() => proc.kill(), 10_000); - const exitCode = await proc.exited; - clearTimeout(timeout); - - if (exitCode !== 0) { - return { - activated: false, - error: - "Codex CLI is not available. Install with: npm install -g @openai/codex", - }; - } - } catch { - return { - activated: false, - error: - "Codex CLI is not available. Install with: npm install -g @openai/codex", - }; - } - - return { activated: true }; -} - -export const AI_PROVIDER_CLI_ACTIVATE = defineTool({ - name: "AI_PROVIDER_CLI_ACTIVATE", - description: - "Check if a CLI-based AI provider (Claude Code or Codex) is installed and authenticated, then activate it.", - inputSchema: z.object({ - providerId: z - .enum(["claude-code", "codex"]) - .default("claude-code") - .describe("Which CLI provider to activate"), - }), - outputSchema: z.object({ - activated: z.boolean(), - email: z.string().optional(), - error: z.string().optional(), - }), - handler: async (input, ctx) => { - requireAuth(ctx); - const org = requireOrganization(ctx); - await ctx.access.check(); - - const result = - input.providerId === "codex" - ? await activateCodex() - : await activateClaudeCode(); - - if (!result.activated) { - return result; - } - - await ctx.storage.aiProviderKeys.upsert({ - providerId: input.providerId, - label: input.providerId === "codex" ? "Codex CLI" : "Claude CLI", - apiKey: "cli-local", - organizationId: org.id, - createdBy: ctx.auth.user!.id, - }); - - return result; - }, -}); diff --git a/apps/mesh/src/tools/ai-providers/index.ts b/apps/mesh/src/tools/ai-providers/index.ts index ced74755a9..707e7b1994 100644 --- a/apps/mesh/src/tools/ai-providers/index.ts +++ b/apps/mesh/src/tools/ai-providers/index.ts @@ -10,4 +10,3 @@ export { AI_PROVIDER_OAUTH_EXCHANGE } from "./oauth-exchange"; export { AI_PROVIDER_PROVISION_KEY } from "./provision-key"; export { AI_PROVIDER_TOPUP_URL } from "./topup-url"; export { AI_PROVIDER_CREDITS } from "./credits"; -export { AI_PROVIDER_CLI_ACTIVATE } from "./cli-activate"; diff --git a/apps/mesh/src/tools/ai-providers/list.ts b/apps/mesh/src/tools/ai-providers/list.ts index 29b999c189..1f570a4b54 100644 --- a/apps/mesh/src/tools/ai-providers/list.ts +++ b/apps/mesh/src/tools/ai-providers/list.ts @@ -19,9 +19,7 @@ export const AI_PROVIDERS_LIST = defineTool({ name: z.string(), description: z.string(), logo: z.string().optional(), - supportedMethods: z.array( - z.enum(["api-key", "oauth-pkce", "cli-activate"]), - ), + supportedMethods: z.array(z.enum(["api-key", "oauth-pkce"])), supportsTopUp: z.boolean().optional(), supportsCredits: z.boolean().optional(), supportsProvision: z.boolean().optional(), diff --git a/apps/mesh/src/tools/index.ts b/apps/mesh/src/tools/index.ts index 30c6e0fa44..79ac29e1bb 100644 --- a/apps/mesh/src/tools/index.ts +++ b/apps/mesh/src/tools/index.ts @@ -36,6 +36,7 @@ import * as ObjectStorageTools from "./object-storage"; import * as RegistryTools from "./registry/index"; import * as VmTools from "./vm"; import * as GitHubTools from "./github"; +import * as LinkTools from "./links"; import { ToolName } from "./registry-metadata"; // Core tools - always available const CORE_TOOLS = [ @@ -147,8 +148,6 @@ const CORE_TOOLS = [ AiProvidersTools.AI_PROVIDER_PROVISION_KEY, AiProvidersTools.AI_PROVIDER_TOPUP_URL, AiProvidersTools.AI_PROVIDER_CREDITS, - AiProvidersTools.AI_PROVIDER_CLI_ACTIVATE, - // Secrets tools SecretsTools.SECRET_CREATE, SecretsTools.SECRET_LIST, @@ -170,6 +169,9 @@ const CORE_TOOLS = [ // GitHub tools (app-only) GitHubTools.GITHUB_LIST_USER_ORGS, + + // Link tools + LinkTools.LINK_CURRENT_GET, ] as const satisfies { name: ToolName }[]; // Plugin tools - collected at startup, gated by org settings at runtime diff --git a/apps/mesh/src/tools/links/get-current.test.ts b/apps/mesh/src/tools/links/get-current.test.ts new file mode 100644 index 0000000000..6e71917b57 --- /dev/null +++ b/apps/mesh/src/tools/links/get-current.test.ts @@ -0,0 +1,139 @@ +import { describe, it, expect } from "bun:test"; +import type { MeshContext } from "../../core/mesh-context"; +import { createInMemoryLinkRegistry } from "../../links/link-registry"; +import type { LinkEntry } from "@/links/protocol"; +import { LINK_CURRENT_GET } from "./get-current"; + +const STUB_ENTRY: LinkEntry = { + machineId: "machine_abc", + tunnelUrl: "https://link-user_1.deco.host", + linkSecret: "super-secret-do-not-leak", + cliVersion: "1.2.3", + protocolVersion: 1, + capabilities: ["claude-code"], + createdAt: new Date().toISOString(), +}; + +const USER_ID = "user_1"; + +function makeCtx( + overrides: Partial< + Pick + > = {}, +): MeshContext { + return { + auth: { + user: { + id: USER_ID, + email: "test@example.com", + name: "Test", + role: "user", + }, + }, + access: { + granted: () => true, + check: async () => {}, + grant: () => {}, + setToolName: () => {}, + }, + organization: { id: "org_1", slug: "test-org", name: "Test Org" }, + storage: {} as never, + timings: { + measure: async (_name: string, cb: () => Promise) => await cb(), + }, + vault: null as never, + db: null as never, + authInstance: null as never, + boundAuth: null as never, + tracer: { + startActiveSpan: ( + _name: string, + _opts: unknown, + fn: (span: unknown) => unknown, + ) => + fn({ + setStatus: () => {}, + recordException: () => {}, + end: () => {}, + }), + } as never, + meter: { + createHistogram: () => ({ record: () => {} }), + createCounter: () => ({ add: () => {} }), + } as never, + baseUrl: "https://mesh.example.com", + metadata: { requestId: "req_1", timestamp: new Date() }, + eventBus: null as never, + objectStorage: null as never, + aiProviders: null as never, + createMCPProxy: null as never, + getOrCreateClient: null as never, + pendingRevalidations: [], + monitoring: null as never, + ...overrides, + } as unknown as MeshContext; +} + +const nowSeconds = () => Math.floor(Date.now() / 1000); + +describe("LINK_CURRENT_GET", () => { + it("returns offline when no registry is wired", async () => { + const ctx = makeCtx({ linkRegistry: undefined }); + const result = await LINK_CURRENT_GET.handler({}, ctx); + expect(result).toEqual({ online: false, capabilities: [] }); + }); + + it("returns offline when registry has no entry for the user", async () => { + const registry = createInMemoryLinkRegistry({ nowSeconds }); + const ctx = makeCtx({ linkRegistry: registry }); + const result = await LINK_CURRENT_GET.handler({}, ctx); + expect(result).toEqual({ online: false, capabilities: [] }); + }); + + it("returns online with entry fields when link is active", async () => { + const registry = createInMemoryLinkRegistry({ nowSeconds }); + await registry.put(USER_ID, STUB_ENTRY); + const ctx = makeCtx({ linkRegistry: registry }); + + const result = await LINK_CURRENT_GET.handler({}, ctx); + + expect(result.online).toBe(true); + expect(result.machineId).toBe("machine_abc"); + expect(result.cliVersion).toBe("1.2.3"); + expect(result.capabilities).toEqual(["claude-code"]); + }); + + it("returns offline when the TTL has expired", async () => { + const registry = createInMemoryLinkRegistry({ + ttlSeconds: 10, + nowSeconds, + }); + await registry.put(USER_ID, STUB_ENTRY); + // Advance clock past TTL + registry.advanceNow(11); + const ctx = makeCtx({ linkRegistry: registry }); + + const result = await LINK_CURRENT_GET.handler({}, ctx); + expect(result).toEqual({ online: false, capabilities: [] }); + }); + + it("never exposes linkSecret in the response", async () => { + const registry = createInMemoryLinkRegistry({ nowSeconds }); + await registry.put(USER_ID, STUB_ENTRY); + const ctx = makeCtx({ linkRegistry: registry }); + + const result = await LINK_CURRENT_GET.handler({}, ctx); + const json = JSON.stringify(result); + expect(json).not.toContain("linkSecret"); + expect(json).not.toContain("super-secret-do-not-leak"); + }); + + it("throws when called without auth", async () => { + const ctx = makeCtx({ + auth: {}, + }); + await expect(LINK_CURRENT_GET.handler({}, ctx)).rejects.toThrow( + "Authentication required", + ); + }); +}); diff --git a/apps/mesh/src/tools/links/get-current.ts b/apps/mesh/src/tools/links/get-current.ts new file mode 100644 index 0000000000..ff9244466e --- /dev/null +++ b/apps/mesh/src/tools/links/get-current.ts @@ -0,0 +1,34 @@ +import z from "zod"; +import { capabilitySchema } from "@/links/protocol"; +import { defineTool } from "../../core/define-tool"; +import { requireAuth } from "../../core/mesh-context"; + +export const LINK_CURRENT_GET = defineTool({ + name: "LINK_CURRENT_GET", + description: + "Return the calling user's currently registered laptop link, or `online: false` if no link is registered or the TTL has expired. The `linkSecret` is never returned.", + inputSchema: z.object({}), + outputSchema: z.object({ + online: z.boolean(), + machineId: z.string().optional(), + cliVersion: z.string().optional(), + capabilities: z.array(capabilitySchema).default([]), + }), + handler: async (_input, ctx) => { + requireAuth(ctx); + await ctx.access.check(); + + const registry = ctx.linkRegistry; + if (!registry) return { online: false, capabilities: [] }; + + const entry = await registry.get(ctx.auth.user!.id); + if (!entry) return { online: false, capabilities: [] }; + + return { + online: true, + machineId: entry.machineId, + cliVersion: entry.cliVersion, + capabilities: entry.capabilities, + }; + }, +}); diff --git a/apps/mesh/src/tools/links/index.ts b/apps/mesh/src/tools/links/index.ts new file mode 100644 index 0000000000..06a7fc53aa --- /dev/null +++ b/apps/mesh/src/tools/links/index.ts @@ -0,0 +1 @@ +export { LINK_CURRENT_GET } from "./get-current"; diff --git a/apps/mesh/src/tools/registry-metadata.ts b/apps/mesh/src/tools/registry-metadata.ts index 6bf1fb7652..310168e630 100644 --- a/apps/mesh/src/tools/registry-metadata.ts +++ b/apps/mesh/src/tools/registry-metadata.ts @@ -34,7 +34,8 @@ export type ToolCategory = | "Object Storage" | "Registry" | "GitHub" - | "VM"; + | "VM" + | "Links"; /** * All tool names - keep in sync with ALL_TOOLS in index.ts @@ -138,7 +139,6 @@ const ALL_TOOL_NAMES = [ "AI_PROVIDER_PROVISION_KEY", "AI_PROVIDER_TOPUP_URL", "AI_PROVIDER_CREDITS", - "AI_PROVIDER_CLI_ACTIVATE", // Secrets vault tools "SECRET_CREATE", @@ -192,6 +192,9 @@ const ALL_TOOL_NAMES = [ // GitHub tools (app-only) "GITHUB_LIST_USER_ORGS", + + // Link tools + "LINK_CURRENT_GET", ] as const; /** @@ -674,11 +677,6 @@ export const MANAGEMENT_TOOLS: ToolMetadata[] = [ description: "Get current credit balance for a provider", category: "AI Providers", }, - { - name: "AI_PROVIDER_CLI_ACTIVATE", - description: "Activate Claude Code via local CLI", - category: "AI Providers", - }, // Secrets tools { name: "SECRET_CREATE", @@ -690,6 +688,7 @@ export const MANAGEMENT_TOOLS: ToolMetadata[] = [ description: "List secrets visible to the caller (no values returned)", category: "Secrets", }, + // Object Storage tools { name: "LIST_OBJECTS", @@ -902,6 +901,13 @@ export const MANAGEMENT_TOOLS: ToolMetadata[] = [ description: "List GitHub user's personal account and organizations", category: "GitHub", }, + // Link tools + { + name: "LINK_CURRENT_GET", + description: + "Return the calling user's current laptop link status (online/offline, capabilities)", + category: "Links", + }, ]; // ============================================================================ @@ -1072,7 +1078,6 @@ const PERMISSION_CAPABILITIES: PermissionCapability[] = [ "AI_PROVIDER_PROVISION_KEY", "AI_PROVIDER_TOPUP_URL", "AI_PROVIDER_CREDITS", - "AI_PROVIDER_CLI_ACTIVATE", ], }, // Organization (tags moved here from Developer) @@ -1270,6 +1275,7 @@ export function getToolsByCategory() { Registry: [], GitHub: [], VM: [], + Links: [], }; for (const tool of MANAGEMENT_TOOLS) { diff --git a/apps/mesh/src/tools/thread/create.test.ts b/apps/mesh/src/tools/thread/create.test.ts index 0ea8bb410f..7e12cede1f 100644 --- a/apps/mesh/src/tools/thread/create.test.ts +++ b/apps/mesh/src/tools/thread/create.test.ts @@ -138,14 +138,18 @@ describe("COLLECTION_THREADS_CREATE", () => { vmMap: { [env.userId]: { "deco/old-branch": { - vmId: "vm_old", - previewUrl: null, - createdAt: 1000, + freestyle: { + vmId: "vm_old", + previewUrl: null, + createdAt: 1000, + }, }, "deco/new-branch": { - vmId: "vm_new", - previewUrl: null, - createdAt: 2000, + freestyle: { + vmId: "vm_new", + previewUrl: null, + createdAt: 2000, + }, }, }, }, diff --git a/apps/mesh/src/tools/thread/create.ts b/apps/mesh/src/tools/thread/create.ts index edc63fc265..916edf1ee6 100644 --- a/apps/mesh/src/tools/thread/create.ts +++ b/apps/mesh/src/tools/thread/create.ts @@ -48,22 +48,35 @@ type GithubRepoMeta = { }; type VmMapMeta = { - vmMap?: Record>; + vmMap?: Record< + string, + Record> + >; }; /** - * Pick the user's most-recently-touched branch from vmMap. Returns undefined + * Pick the user's most-recently-touched branch from vmMap (3-level shape: + * vmMap[userId][branch][sandboxProviderKind] → VmMapEntry). Returns undefined * when the user has no entries (caller falls back to generateBranchName). */ function pickWarmBranchFromVmMap( vmMap: VmMapMeta["vmMap"], userId: string, ): string | undefined { - const entries = vmMap?.[userId]; - if (!entries) return undefined; - const sorted = Object.entries(entries).sort( - ([, a], [, b]) => (b.createdAt ?? 0) - (a.createdAt ?? 0), - ); + const branchMap = vmMap?.[userId]; + if (!branchMap) return undefined; + // For each branch, take the max createdAt across all sandboxProviderKind entries. + const sorted = Object.entries(branchMap).sort(([, aKinds], [, bKinds]) => { + const aMax = Math.max( + 0, + ...Object.values(aKinds).map((e) => e.createdAt ?? 0), + ); + const bMax = Math.max( + 0, + ...Object.values(bKinds).map((e) => e.createdAt ?? 0), + ); + return bMax - aMax; + }); return sorted[0]?.[0]; } diff --git a/apps/mesh/src/tools/thread/helpers.test.ts b/apps/mesh/src/tools/thread/helpers.test.ts index da1b005614..41e4b3f596 100644 --- a/apps/mesh/src/tools/thread/helpers.test.ts +++ b/apps/mesh/src/tools/thread/helpers.test.ts @@ -24,6 +24,8 @@ const BASE_THREAD: Thread = { run_started_at: null, virtual_mcp_id: "", branch: null, + sandbox_provider_kind: null, + harness_id: null, metadata: {}, }; diff --git a/apps/mesh/src/tools/thread/schema.ts b/apps/mesh/src/tools/thread/schema.ts index 0b7a3837b2..8dfbf0153c 100644 --- a/apps/mesh/src/tools/thread/schema.ts +++ b/apps/mesh/src/tools/thread/schema.ts @@ -86,6 +86,20 @@ export const ThreadEntitySchema = z.object({ .nullable() .optional() .describe("Git branch this thread is pinned to (GitHub-linked vms only)"), + sandbox_provider_kind: z + .string() + .nullable() + .optional() + .describe( + "Pinned on first message; identifies which VM to dispatch to (e.g. 'docker', 'freestyle', 'agent-sandbox', 'remote-user').", + ), + harness_id: z + .string() + .nullable() + .optional() + .describe( + "Pinned on first message; selects which harness runs the thread (e.g. 'claude-code', 'codex', 'decopilot').", + ), metadata: ThreadMetadataSchema.optional().describe( "Free-form per-thread UI state (e.g. expanded_tools)", ), diff --git a/apps/mesh/src/tools/vm/helpers.ts b/apps/mesh/src/tools/vm/helpers.ts index 81288d7a3b..620596a236 100644 --- a/apps/mesh/src/tools/vm/helpers.ts +++ b/apps/mesh/src/tools/vm/helpers.ts @@ -11,6 +11,7 @@ import { type RuntimeEnvEntry, type VmMapEntry, } from "@decocms/mesh-sdk"; +import type { SandboxProviderKind } from "@decocms/sandbox/provider"; import { requireAuth, @@ -71,10 +72,14 @@ export function readValidatedRuntimeEnv( * Extracts common auth + lookup boilerplate shared by all VM tools. * Validates auth, checks access, fetches and validates the Virtual MCP, * and returns the metadata and vmMap entry for the current user on the - * specified branch. `entry` is null when no vm is registered for that pair. + * specified branch + kind. `entry` is null when no vm is registered for that triple. */ export async function requireVmEntry( - input: { virtualMcpId: string; branch: string }, + input: { + virtualMcpId: string; + branch: string; + sandboxProviderKind: SandboxProviderKind; + }, ctx: MeshContext, ) { requireAuth(ctx); @@ -88,7 +93,12 @@ export async function requireVmEntry( } const metadata = (virtualMcp.metadata ?? {}) as Record; const vmMap = readVmMap(metadata); - const entry: VmMapEntry | null = resolveVm(vmMap, userId, input.branch); + const entry: VmMapEntry | null = resolveVm( + vmMap, + userId, + input.branch, + input.sandboxProviderKind, + ); return { virtualMcp, metadata, userId, entry, organization }; } diff --git a/apps/mesh/src/tools/vm/resolve-env.ts b/apps/mesh/src/tools/vm/resolve-env.ts index 9b63df19a4..2b16ac7610 100644 --- a/apps/mesh/src/tools/vm/resolve-env.ts +++ b/apps/mesh/src/tools/vm/resolve-env.ts @@ -1,5 +1,5 @@ import type { RuntimeEnvEntry } from "@decocms/mesh-sdk"; -import type { SandboxRunner } from "@decocms/sandbox/runner"; +import type { SandboxProvider } from "@decocms/sandbox/provider"; import { SecretAccessDeniedError, SecretNotFoundError, @@ -8,7 +8,7 @@ import type { MeshContext } from "../../core/mesh-context"; interface ResolveAndPushParams { ctx: MeshContext; - runner: SandboxRunner; + runner: SandboxProvider; handle: string; orgId: string; userId: string; diff --git a/apps/mesh/src/tools/vm/start.test.ts b/apps/mesh/src/tools/vm/start.test.ts index 4e45823118..0963136c92 100644 --- a/apps/mesh/src/tools/vm/start.test.ts +++ b/apps/mesh/src/tools/vm/start.test.ts @@ -1,13 +1,15 @@ import { describe, it, expect, mock, beforeEach } from "bun:test"; import type { VmMap, VmMapEntry } from "@decocms/mesh-sdk"; import type { MeshContext } from "../../core/mesh-context"; +import type { LinkRegistry } from "../../links/link-registry"; +import type { LinkEntry } from "@/links/protocol"; import type { EnsureOptions, Sandbox, SandboxId, - SandboxRunner, -} from "@decocms/sandbox/runner"; -import { composeSandboxRef } from "@decocms/sandbox/runner"; + SandboxProvider, +} from "@decocms/sandbox/provider"; +import { composeSandboxRef } from "@decocms/sandbox/provider"; // Pin runner kind — the dev env flips STUDIO_SANDBOX_RUNNER and VM_START // reads it at handler time. @@ -30,7 +32,7 @@ async function* readyOnly() { yield { kind: "ready" as const }; } -const mockDockerRunner: SandboxRunner = { +const mockDockerRunner: SandboxProvider = { kind: "docker", ensure: (id, opts) => mockEnsure(id, opts), exec: async () => ({ stdout: "", stderr: "", exitCode: 0, timedOut: false }), @@ -41,11 +43,13 @@ const mockDockerRunner: SandboxRunner = { watchClaimLifecycle: () => readyOnly(), }; -const mockAgentSandboxRunner: SandboxRunner = { - kind: "agent-sandbox", +const mockRemoteUserDelete = mock(async (_handle: string) => {}); + +const mockRemoteUserRunner: SandboxProvider = { + kind: "remote-user", ensure: (id, opts) => mockEnsure(id, opts), exec: async () => ({ stdout: "", stderr: "", exitCode: 0, timedOut: false }), - delete: (handle) => mockAgentSandboxDelete(handle), + delete: (handle) => mockRemoteUserDelete(handle), alive: async () => true, getPreviewUrl: async () => "https://stub.preview/", proxyDaemonRequest: async () => new Response(null, { status: 204 }), @@ -53,10 +57,20 @@ const mockAgentSandboxRunner: SandboxRunner = { }; mock.module("../../sandbox/lifecycle", () => ({ - getSharedRunner: () => mockDockerRunner, - getRunnerByKind: (_ctx: unknown, kind: "docker" | "agent-sandbox") => - kind === "docker" ? mockDockerRunner : mockAgentSandboxRunner, - getSharedRunnerIfInit: () => mockDockerRunner, + getSharedSandboxProvider: (ctx: MeshContext) => { + if ( + ctx.sandboxPreference === "remote-user" && + ctx.linkForCurrentRun !== undefined + ) { + return mockRemoteUserRunner; + } + return mockDockerRunner; + }, + getSandboxProviderByKind: ( + _ctx: unknown, + kind: "docker" | "agent-sandbox", + ) => (kind === "docker" ? mockDockerRunner : mockDockerRunner), + getSharedSandboxProviderIfInit: () => mockDockerRunner, getOrInitSharedRunner: async () => mockDockerRunner, asDockerRunner: () => null, // Bun's mock.module persists across test files in the same shard. Other @@ -193,12 +207,14 @@ function makeCtx(overrides: { userId?: string; virtualMcp?: ReturnType | null; updateSpy?: ReturnType; + linkRegistry?: LinkRegistry; }): MeshContext { const { orgId = ORG_ID, userId = USER_ID, virtualMcp, updateSpy = mock(async () => {}), + linkRegistry, } = overrides; const findById = mock(async (_id: string) => virtualMcp ?? null); @@ -254,6 +270,7 @@ function makeCtx(overrides: { getOrCreateClient: null as never, pendingRevalidations: [], monitoring: null as never, + linkRegistry, } as unknown as MeshContext; } @@ -262,8 +279,10 @@ describe("VM_START", () => { mockEnsure.mockReset(); mockDockerDelete.mockReset(); mockAgentSandboxDelete.mockReset(); + mockRemoteUserDelete.mockReset(); mockDockerDelete.mockImplementation(async () => {}); mockAgentSandboxDelete.mockImplementation(async () => {}); + mockRemoteUserDelete.mockImplementation(async () => {}); mockTokenGet.mockReset(); mockEnsure.mockImplementation(async () => ({ handle: "vm_xyz", @@ -315,7 +334,7 @@ describe("VM_START", () => { }); }); - it("persists vmMap entry with handle + previewUrl + runnerKind", async () => { + it("persists vmMap entry with handle + previewUrl + sandboxProviderKind", async () => { mockEnsure.mockImplementation(async () => ({ handle: "vm_xyz", workdir: "/app", @@ -334,20 +353,25 @@ describe("VM_START", () => { expect(result.previewUrl).toBe("https://stub.preview/"); expect(result.branch).toBe(BRANCH); expect(result.isNewVm).toBe(true); - expect(result.runnerKind).toBe("docker"); + expect(result.sandboxProviderKind).toBe("docker"); expect(updateSpy).toHaveBeenCalledTimes(1); const updateCall = (updateSpy.mock.calls as unknown[][])[0]!; const updated = (updateCall[2] as { metadata: { vmMap: VmMap } }).metadata; - const stored = updated.vmMap[USER_ID]?.[BRANCH]; + // 3-level key: vmMap[userId][branch][kind] + const stored = ( + updated.vmMap[USER_ID]?.[BRANCH] as Record + )?.["docker"]; expect(stored).toMatchObject({ vmId: "vm_xyz", previewUrl: "https://stub.preview/", - runnerKind: "docker", + sandboxProviderKind: "docker", }); // Server-stamped; assert recency, not exact value. - expect(typeof stored?.createdAt).toBe("number"); - expect(stored?.createdAt).toBeGreaterThan(Date.now() - 60_000); + expect(typeof (stored as VmMapEntry)?.createdAt).toBe("number"); + expect((stored as VmMapEntry)?.createdAt).toBeGreaterThan( + Date.now() - 60_000, + ); }); it("snapshots metadata.runtime selected/port/path into startedWith", async () => { @@ -369,7 +393,10 @@ describe("VM_START", () => { expect(updateSpy).toHaveBeenCalledTimes(1); const updateCall = (updateSpy.mock.calls as unknown[][])[0]!; const updated = (updateCall[2] as { metadata: { vmMap: VmMap } }).metadata; - const stored = updated.vmMap[USER_ID]?.[BRANCH]; + // 3-level key: vmMap[userId][branch][kind] + const stored = ( + updated.vmMap[USER_ID]?.[BRANCH] as Record + )?.["docker"] as VmMapEntry | undefined; expect(stored?.startedWith).toEqual({ packageManager: "pnpm", port: "4321", @@ -401,7 +428,10 @@ describe("VM_START", () => { }); expect(vmMapCall).toBeDefined(); const updated = (vmMapCall![2] as { metadata: { vmMap: VmMap } }).metadata; - const stored = updated.vmMap[USER_ID]?.[BRANCH]; + // 3-level key: vmMap[userId][branch][kind] + const stored = ( + updated.vmMap[USER_ID]?.[BRANCH] as Record + )?.["docker"] as VmMapEntry | undefined; expect(stored?.startedWith).toEqual({ packageManager: null, port: null, @@ -417,7 +447,8 @@ describe("VM_START", () => { })); const metadata: Metadata = { ...BASE_METADATA, - vmMap: { [USER_ID]: { [BRANCH]: CACHED_ENTRY } }, + // 3-level: kind (docker) → entry + vmMap: { [USER_ID]: { [BRANCH]: { docker: CACHED_ENTRY } } }, }; const virtualMcp = makeVirtualMcp(ORG_ID, metadata); const ctx = makeCtx({ virtualMcp }); @@ -533,104 +564,164 @@ describe("VM_START", () => { expect(opts.repo?.cloneUrl).not.toContain("ghu_stale_token"); }); - it("tears down the stale VM under its prior runner when the env runner flipped", async () => { - const staleEntry: VmMapEntry = { - vmId: "vm_agent_sandbox_stale", - previewUrl: "https://agent-sandbox.preview/", - runnerKind: "agent-sandbox", + it("provisions a new remote-user VM even when a docker entry exists under the same branch — kinds are siblings", async () => { + // With kind-in-key, different kinds coexist — no teardown occurs. + const dockerEntry: VmMapEntry = { + vmId: "vm_docker_existing", + previewUrl: "https://docker.preview/", + sandboxProviderKind: "docker", }; const metadata: Metadata = { ...BASE_METADATA, - vmMap: { [USER_ID]: { [BRANCH]: staleEntry } }, + // 3-level: docker entry lives under its own key + vmMap: { + [USER_ID]: { + [BRANCH]: { docker: dockerEntry }, + }, + }, }; const virtualMcp = makeVirtualMcp(ORG_ID, metadata); - const ctx = makeCtx({ virtualMcp }); + // Link registry with online link so resolveDefaultSandboxProviderKind picks remote-user + const linkRegistry: LinkRegistry = { + get: async (_userId: string) => ({ + machineId: "machine_1", + tunnelUrl: "https://tunnel.example.com", + linkSecret: "secret_abc", + cliVersion: "1.0.0", + protocolVersion: 1, + capabilities: [], + createdAt: new Date().toISOString(), + }), + put: async () => {}, + delete: async () => {}, + }; + const ctx = makeCtx({ virtualMcp, linkRegistry }); + // Link is online → kind resolves to remote-user; no docker entry for remote-user → provision a new one const result = await VM_START.handler( { virtualMcpId: VMCP_ID, branch: BRANCH }, ctx, ); - expect(mockAgentSandboxDelete).toHaveBeenCalledTimes(1); - expect(mockAgentSandboxDelete).toHaveBeenCalledWith( - "vm_agent_sandbox_stale", - ); + // No teardown of the docker entry (kinds are siblings) expect(mockDockerDelete).not.toHaveBeenCalled(); expect(mockEnsure).toHaveBeenCalledTimes(1); - expect(result.runnerKind).toBe("docker"); + expect(result.sandboxProviderKind).toBe("remote-user"); expect(result.isNewVm).toBe(true); }); - it("still provisions the new VM when the stale-runner teardown throws", async () => { - mockAgentSandboxDelete.mockImplementation(async () => { - throw new Error("agent-sandbox runner gone"); - }); - const staleEntry: VmMapEntry = { - vmId: "vm_agent_sandbox_stale", - previewUrl: "https://agent-sandbox.preview/", - runnerKind: "agent-sandbox", + it("does not tear down anything when the existing entry is on the same runner", async () => { + const sameRunnerEntry: VmMapEntry = { + vmId: "vm_docker_existing", + previewUrl: "https://docker.preview/", + sandboxProviderKind: "docker", }; const metadata: Metadata = { ...BASE_METADATA, - vmMap: { [USER_ID]: { [BRANCH]: staleEntry } }, + vmMap: { + [USER_ID]: { + [BRANCH]: { docker: sameRunnerEntry }, + }, + }, }; const virtualMcp = makeVirtualMcp(ORG_ID, metadata); const ctx = makeCtx({ virtualMcp }); + await VM_START.handler({ virtualMcpId: VMCP_ID, branch: BRANCH }, ctx); + + expect(mockAgentSandboxDelete).not.toHaveBeenCalled(); + expect(mockDockerDelete).not.toHaveBeenCalled(); + }); + + // ----------------------------------------------------------------------- + // sandboxProviderKind default-resolution tests + // ----------------------------------------------------------------------- + + const STUB_LINK: LinkEntry = { + machineId: "machine_1", + tunnelUrl: "https://tunnel.example.com", + linkSecret: "secret_abc", + cliVersion: "1.0.0", + protocolVersion: 1, + capabilities: [], + createdAt: new Date().toISOString(), + }; + + it("VM_START with no sandboxProviderKind picks remote-user when the link is online", async () => { + const linkRegistry: LinkRegistry = { + get: async (_userId: string) => STUB_LINK, + put: async () => {}, + delete: async () => {}, + }; + const virtualMcp = makeVirtualMcp(ORG_ID, BASE_METADATA); + const updateSpy = mock(async () => {}); + const ctx = makeCtx({ virtualMcp, updateSpy, linkRegistry }); + const result = await VM_START.handler( { virtualMcpId: VMCP_ID, branch: BRANCH }, ctx, ); - expect(mockAgentSandboxDelete).toHaveBeenCalledTimes(1); - expect(mockEnsure).toHaveBeenCalledTimes(1); - expect(result.vmId).toBe("vm_xyz"); - expect(result.runnerKind).toBe("docker"); - expect(result.isNewVm).toBe(true); + expect(result.sandboxProviderKind).toBe("remote-user"); + const updateCall = (updateSpy.mock.calls as unknown[][])[0]!; + const updated = (updateCall[2] as { metadata: { vmMap: VmMap } }).metadata; + // 3-level key: vmMap[userId][branch][kind] + const stored = ( + updated.vmMap[USER_ID]?.[BRANCH] as Record + )?.["remote-user"] as VmMapEntry | undefined; + expect(stored?.sandboxProviderKind).toBe("remote-user"); }); - it("skips teardown for legacy entries (no runnerKind)", async () => { - const legacyEntry: VmMapEntry = { - vmId: "vm_legacy", - previewUrl: "https://legacy.preview/", - // no runnerKind + it("VM_START with no sandboxProviderKind picks env kind when no link", async () => { + // STUDIO_SANDBOX_RUNNER is "docker" at module load time (top of file) + const linkRegistry: LinkRegistry = { + get: async (_userId: string) => null, + put: async () => {}, + delete: async () => {}, }; - const metadata: Metadata = { - ...BASE_METADATA, - vmMap: { [USER_ID]: { [BRANCH]: legacyEntry } }, - }; - const virtualMcp = makeVirtualMcp(ORG_ID, metadata); - const ctx = makeCtx({ virtualMcp }); + const virtualMcp = makeVirtualMcp(ORG_ID, BASE_METADATA); + const updateSpy = mock(async () => {}); + const ctx = makeCtx({ virtualMcp, updateSpy, linkRegistry }); const result = await VM_START.handler( { virtualMcpId: VMCP_ID, branch: BRANCH }, ctx, ); - expect(mockAgentSandboxDelete).not.toHaveBeenCalled(); - expect(mockDockerDelete).not.toHaveBeenCalled(); - expect(mockEnsure).toHaveBeenCalledTimes(1); - expect(result.runnerKind).toBe("docker"); - expect(result.isNewVm).toBe(true); + expect(result.sandboxProviderKind).toBe("docker"); + const updateCall = (updateSpy.mock.calls as unknown[][])[0]!; + const updated = (updateCall[2] as { metadata: { vmMap: VmMap } }).metadata; + // 3-level key: vmMap[userId][branch][kind] + const stored = ( + updated.vmMap[USER_ID]?.[BRANCH] as Record + )?.["docker"] as VmMapEntry | undefined; + expect(stored?.sandboxProviderKind).toBe("docker"); }); - it("does not tear down anything when the existing entry is on the same runner", async () => { - const sameRunnerEntry: VmMapEntry = { - vmId: "vm_docker_existing", - previewUrl: "https://docker.preview/", - runnerKind: "docker", - }; - const metadata: Metadata = { - ...BASE_METADATA, - vmMap: { [USER_ID]: { [BRANCH]: sameRunnerEntry } }, + it("VM_START with explicit sandboxProviderKind ignores defaults", async () => { + // Link is online, env is "docker" — but explicit "docker" must win (and remote-user would also be overrideable). + const linkRegistry: LinkRegistry = { + get: async (_userId: string) => STUB_LINK, + put: async () => {}, + delete: async () => {}, }; - const virtualMcp = makeVirtualMcp(ORG_ID, metadata); - const ctx = makeCtx({ virtualMcp }); + const virtualMcp = makeVirtualMcp(ORG_ID, BASE_METADATA); + const updateSpy = mock(async () => {}); + const ctx = makeCtx({ virtualMcp, updateSpy, linkRegistry }); - await VM_START.handler({ virtualMcpId: VMCP_ID, branch: BRANCH }, ctx); + const result = await VM_START.handler( + { virtualMcpId: VMCP_ID, branch: BRANCH, sandboxProviderKind: "docker" }, + ctx, + ); - expect(mockAgentSandboxDelete).not.toHaveBeenCalled(); - expect(mockDockerDelete).not.toHaveBeenCalled(); + expect(result.sandboxProviderKind).toBe("docker"); + const updateCall = (updateSpy.mock.calls as unknown[][])[0]!; + const updated = (updateCall[2] as { metadata: { vmMap: VmMap } }).metadata; + // 3-level key: vmMap[userId][branch][kind] + const stored = ( + updated.vmMap[USER_ID]?.[BRANCH] as Record + )?.["docker"] as VmMapEntry | undefined; + expect(stored?.sandboxProviderKind).toBe("docker"); }); it("throws RECONNECT_ERROR when refreshing an expired token fails", async () => { diff --git a/apps/mesh/src/tools/vm/start.ts b/apps/mesh/src/tools/vm/start.ts index 345441b58e..83acf3e288 100644 --- a/apps/mesh/src/tools/vm/start.ts +++ b/apps/mesh/src/tools/vm/start.ts @@ -1,22 +1,21 @@ /** - * VM_START. Keyed by (userId, branch) in the Virtual MCP's `vmMap`. - * Runner-agnostic — dispatches through the active `SandboxRunner`; this + * VM_START. Keyed by (userId, branch, sandboxProviderKind) in the Virtual MCP's `vmMap`. + * Runner-agnostic — dispatches through the active `SandboxProvider`; this * handler only does `vmMap` bookkeeping. Branch defaults to * `deco/-` when omitted. * - * Runner flips: if the existing entry's `runnerKind` differs from the env's - * current runner, the stale VM is torn down under its original runner before - * the new one is provisioned. Old VMs are ephemeral — not preserved. + * Different sandbox provider kinds coexist as siblings under the same + * (user, branch) key — no stale-VM teardown is needed on kind change. */ import { z } from "zod"; import type { VmMapEntry } from "@decocms/mesh-sdk"; import { composeSandboxRef, - resolveRunnerKindFromEnv, - type RunnerKind, + resolveSandboxProviderKindFromEnv, + type SandboxProviderKind, type Workload, -} from "@decocms/sandbox/runner"; +} from "@decocms/sandbox/provider"; import { defineTool } from "../../core/define-tool"; import { getUserId, @@ -41,9 +40,13 @@ import { } from "../../shared/github-runtime-detect"; import { generateBranchName } from "../../shared/branch-name"; import { PACKAGE_MANAGER_CONFIG } from "../../shared/runtime-defaults"; -import { getRunnerByKind, getSharedRunner } from "../../sandbox/lifecycle"; +import { + getSandboxProviderByKind, + getSharedSandboxProvider, +} from "../../sandbox/lifecycle"; import { setVmMapEntry } from "./vm-map"; import type { VirtualMCPUpdateData } from "../virtual/schema"; +import { resolveDefaultSandboxProviderKind } from "../../sandbox/resolve-default-provider-kind"; type GithubRepo = { owner: string; @@ -75,32 +78,55 @@ export const VM_START = defineTool({ .describe( "Optional git branch to check out. When omitted the handler generates `deco/-` and uses it. The resolved branch is returned in the response so callers can persist it.", ), + sandboxProviderKind: z + .enum(["docker", "agent-sandbox", "remote-user"]) + .optional() + .describe( + "Explicit runtime choice. When omitted, defaults to `remote-user` if the acting user's link daemon is online, else the cluster env kind.", + ), }), outputSchema: z.object({ previewUrl: z.string().nullable(), vmId: z.string(), branch: z.string(), isNewVm: z.boolean(), - runnerKind: z.enum(["host", "docker", "agent-sandbox"]), + sandboxProviderKind: z.enum(["docker", "agent-sandbox", "remote-user"]), }), handler: async (input, ctx) => { + requireAuth(ctx); const resolvedBranch = input.branch ?? generateBranchName(); + + // Resolve kind before requireVmEntry so the 3-level lookup uses the right key. + // getUserId may return null here; requireVmEntry will throw if so. + const earlyUserId = getUserId(ctx); + if (!earlyUserId) throw new Error("User ID required"); + + const providerKind: SandboxProviderKind = + input.sandboxProviderKind ?? + (ctx.linkRegistry + ? await resolveDefaultSandboxProviderKind(earlyUserId, { + linkRegistry: ctx.linkRegistry, + resolveEnvKind: resolveSandboxProviderKindFromEnv, + }) + : resolveSandboxProviderKindFromEnv()); + const { metadata, userId, organization, entry: existing, } = await requireVmEntry( - { virtualMcpId: input.virtualMcpId, branch: resolvedBranch }, + { + virtualMcpId: input.virtualMcpId, + branch: resolvedBranch, + sandboxProviderKind: providerKind, + }, ctx, ); const githubRepo = (metadata as GithubRepoMeta).githubRepo ?? null; - const runnerKind = resolveRunnerKindFromEnv(); - await reapStaleRunner(ctx, existing, runnerKind); - const { entry, isNewVm } = await provisionSandbox({ ctx, userId, @@ -110,12 +136,13 @@ export const VM_START = defineTool({ metadata, githubRepo, existing, + providerKind, }); return { ...entry, branch: resolvedBranch, isNewVm, - runnerKind, + sandboxProviderKind: providerKind, }; }, }); @@ -124,11 +151,18 @@ export const VM_START = defineTool({ * Lazy provisioner for the always-on VM tools path. Mirrors VM_START's * flow but: (a) tolerates a missing GitHub repo (boots blank under Docker), * and (b) takes a fast path when the existing vmMap entry already matches - * the current runner kind — avoiding a full `runner.ensure` round-trip on + * the requested kind — avoiding a full `runner.ensure` round-trip on * every fresh stream when the VM is already registered. + * + * Unlike VM_START, `sandboxProviderKind` is required — callers (e.g. POST + * /messages) must resolve the kind before calling this function. */ -export async function ensureVmForBranch( - input: { virtualMcpId: string; branch: string }, +export async function ensureVm( + input: { + virtualMcpId: string; + branch: string; + sandboxProviderKind: SandboxProviderKind; + }, ctx: MeshContext, ): Promise { // Inline auth + lookup; the standard `requireVmEntry` runs @@ -149,18 +183,18 @@ export async function ensureVmForBranch( readVmMap(metadata), userId, input.branch, + input.sandboxProviderKind, ); - const runnerKind = resolveRunnerKindFromEnv(); + const providerKind = input.sandboxProviderKind; - // Fast path: vmMap already has an entry under the current runner. Trust - // it; matches the prior `activeVm` behavior in built-in-tools. - if (existing && existing.runnerKind === runnerKind) { + // Fast path: vmMap already has an entry under the requested kind. + // No reap needed: with kind in the key, there's no stale-kind entry to + // tear down. Different kinds coexist as siblings. + if (existing) { return existing; } - await reapStaleRunner(ctx, existing, runnerKind); - const githubRepo = (metadata as GithubRepoMeta).githubRepo ?? null; const { entry } = await provisionSandbox({ ctx, @@ -170,31 +204,12 @@ export async function ensureVmForBranch( branch: input.branch, metadata, githubRepo, - existing, + existing: null, + providerKind, }); return entry; } -async function reapStaleRunner( - ctx: MeshContext, - existing: VmMapEntry | null, - currentKind: RunnerKind, -): Promise { - if (!existing?.runnerKind) return; - if (existing.runnerKind === currentKind) return; - - try { - const priorRunner = await getRunnerByKind(ctx, existing.runnerKind); - await priorRunner.delete(existing.vmId); - } catch (err) { - console.error( - `[VM_START] stale ${existing.runnerKind} ${existing.vmId}: ${ - err instanceof Error ? err.message : String(err) - }`, - ); - } -} - type StartParams = { ctx: MeshContext; userId: string; @@ -204,6 +219,7 @@ type StartParams = { metadata: Record; githubRepo: GithubRepo | null; existing: VmMapEntry | null; + providerKind: SandboxProviderKind; }; async function provisionSandbox( @@ -218,6 +234,7 @@ async function provisionSandbox( metadata, githubRepo, existing, + providerKind, } = params; let { runtime, packageManager, port, packageManagerPath } = @@ -309,7 +326,31 @@ async function provisionSandbox( virtualMcpId, branch, }); - const runner = await getSharedRunner(ctx); + + // Dispatch to the correct runner for the resolved provider kind. + // - remote-user: look up the link and set ctx fields so the getSharedSandboxProvider + // per-user branch fires correctly (it builds a fresh RemoteUserSandboxProvider). + // - all other kinds: go straight through getSandboxProviderByKind so the explicit + // kind is honoured even when env says something different. + let runner; + if (providerKind === "remote-user") { + if (!ctx.linkRegistry) { + throw new Error( + "remote-user sandbox provider requires ctx.linkRegistry to be wired (set on MeshContextConfig).", + ); + } + const link = await ctx.linkRegistry.get(userId); + if (!link) { + throw new Error( + `No link daemon registered for user "${userId}". Start one with \`deco link\` (or run \`bun run dev --local-sandbox-provider\` for dev).`, + ); + } + ctx.sandboxPreference = "remote-user"; + ctx.linkForCurrentRun = link; + runner = await getSharedSandboxProvider(ctx); + } else { + runner = await getSandboxProviderByKind(ctx, providerKind); + } const sandbox = await runner.ensure( { userId, projectRef }, { @@ -346,7 +387,8 @@ async function provisionSandbox( const entry: VmMapEntry = { vmId: sandbox.handle, previewUrl: sandbox.previewUrl, - runnerKind: runner.kind, + sandboxUrl: sandbox.previewUrl, // for remote-user the two are equal + sandboxProviderKind: runner.kind, createdAt, startedWith: { packageManager: runtimeSelected, @@ -361,6 +403,7 @@ async function provisionSandbox( userId, userId, branch, + params.providerKind, entry, ); diff --git a/apps/mesh/src/tools/vm/stop.test.ts b/apps/mesh/src/tools/vm/stop.test.ts index 5cb63b0d18..fbdba1af85 100644 --- a/apps/mesh/src/tools/vm/stop.test.ts +++ b/apps/mesh/src/tools/vm/stop.test.ts @@ -1,7 +1,10 @@ import { describe, it, expect, mock, beforeEach } from "bun:test"; import type { VmMap, VmMapEntry } from "@decocms/mesh-sdk"; import type { MeshContext } from "../../core/mesh-context"; -import type { RunnerKind, SandboxRunner } from "@decocms/sandbox/runner"; +import type { + SandboxProvider, + SandboxProviderKind, +} from "@decocms/sandbox/provider"; // Mock per-kind runner lookup BEFORE importing VM_DELETE. const mockDelete = mock(async (_handle: string): Promise => {}); @@ -11,7 +14,7 @@ async function* readyOnly() { yield { kind: "ready" as const }; } -function makeMockRunner(kind: RunnerKind): SandboxRunner { +function makeMockRunner(kind: SandboxProviderKind): SandboxProvider { return { kind, ensure: async () => ({ @@ -34,12 +37,12 @@ function makeMockRunner(kind: RunnerKind): SandboxRunner { } mock.module("../../sandbox/lifecycle", () => ({ - getSharedRunner: () => makeMockRunner("docker"), - getRunnerByKind: (_ctx: unknown, kind: RunnerKind) => { + getSharedSandboxProvider: () => makeMockRunner("docker"), + getSandboxProviderByKind: (_ctx: unknown, kind: SandboxProviderKind) => { lastRequestedKind.value = kind; return makeMockRunner(kind); }, - getSharedRunnerIfInit: () => null, + getSharedSandboxProviderIfInit: () => null, asDockerRunner: () => null, })); @@ -50,20 +53,27 @@ const BRANCH = "feat/example"; const DOCKER_ENTRY: VmMapEntry = { vmId: "f9e2fadeb813e08eb00eef6f962be2b2", previewUrl: "http://f9e2.localhost:7070/", - runnerKind: "docker", + sandboxProviderKind: "docker", }; -const AGENT_SANDBOX_ENTRY: VmMapEntry = { - vmId: "vm_agent_sandbox", - previewUrl: "https://claim-1.sandbox.example/", - runnerKind: "agent-sandbox", -}; - -const LEGACY_ENTRY: VmMapEntry = { - vmId: "vm_legacy", - previewUrl: "https://legacy.example/", - // no runnerKind — legacy entry, expected to be skipped (no teardown). -}; +/** + * 3-level helper: builds vmMap[userId][branch][kind] = entry. + * Type-cast through `unknown` is needed because VmMap's value type is a union + * that doesn't yet include the record-of-entries shape before the full SDK + * update lands; the runtime shape is correct. + */ +function makeVmMap( + userId: string, + branch: string, + kind: SandboxProviderKind, + entry: VmMapEntry, +): VmMap { + return { + [userId]: { + [branch]: { [kind]: entry } as VmMap[string][string], + }, + }; +} type Metadata = { vmMap?: VmMap }; @@ -157,14 +167,18 @@ describe("VM_DELETE", () => { it("calls runner.delete with the entry's handle and removes vmMap entry", async () => { const metadata: Metadata = { - vmMap: { "user-1": { [BRANCH]: DOCKER_ENTRY } }, + vmMap: makeVmMap("user-1", BRANCH, "docker", DOCKER_ENTRY), }; const virtualMcp = makeVirtualMcp("org_1", metadata); const updateSpy = mock(async () => {}); const ctx = makeCtx({ virtualMcp, updateSpy }); const result = await VM_DELETE.handler( - { virtualMcpId: "vmcp_1", branch: BRANCH }, + { + virtualMcpId: "vmcp_1", + branch: BRANCH, + sandboxProviderKind: "docker", + }, ctx, ); @@ -176,55 +190,47 @@ describe("VM_DELETE", () => { expect(updateSpy).toHaveBeenCalledTimes(1); const updateCall = (updateSpy.mock.calls as unknown[][])[0]!; const updated = (updateCall[2] as { metadata: { vmMap: VmMap } }).metadata; + // After removal, the user bucket should be gone entirely. expect(updated.vmMap["user-1"]).toBeUndefined(); }); - it("dispatches to the agent-sandbox runner when entry.runnerKind is 'agent-sandbox'", async () => { + it("dispatches to the docker runner when input.sandboxProviderKind is 'docker'", async () => { const metadata: Metadata = { - vmMap: { "user-1": { [BRANCH]: AGENT_SANDBOX_ENTRY } }, + vmMap: makeVmMap("user-1", BRANCH, "docker", DOCKER_ENTRY), }; const virtualMcp = makeVirtualMcp("org_1", metadata); const ctx = makeCtx({ virtualMcp }); - await VM_DELETE.handler({ virtualMcpId: "vmcp_1", branch: BRANCH }, ctx); - - expect(mockDelete).toHaveBeenCalledWith(AGENT_SANDBOX_ENTRY.vmId); - expect(lastRequestedKind.value).toBe("agent-sandbox"); - }); - - it("skips teardown when entry has no runnerKind (legacy entries) but still clears vmMap", async () => { - const metadata: Metadata = { - vmMap: { "user-1": { [BRANCH]: LEGACY_ENTRY } }, - }; - const virtualMcp = makeVirtualMcp("org_1", metadata); - const updateSpy = mock(async () => {}); - const ctx = makeCtx({ virtualMcp, updateSpy }); - - const result = await VM_DELETE.handler( - { virtualMcpId: "vmcp_1", branch: BRANCH }, + await VM_DELETE.handler( + { virtualMcpId: "vmcp_1", branch: BRANCH, sandboxProviderKind: "docker" }, ctx, ); - expect(result).toEqual({ success: true }); - expect(mockDelete).not.toHaveBeenCalled(); - // vmMap is still cleared so the UI returns to idle. - expect(updateSpy).toHaveBeenCalledTimes(1); + expect(mockDelete).toHaveBeenCalledWith(DOCKER_ENTRY.vmId); + expect(lastRequestedKind.value).toBe("docker"); }); - // Regression guard for the invariant called out in stop.ts:1–5: a pod that - // flipped STUDIO_SANDBOX_RUNNER between start and stop must still tear down - // the runner that the entry was created against. - it("dispatches on the entry's runnerKind even when STUDIO_SANDBOX_RUNNER env disagrees", async () => { + // Regression guard: a pod that flipped STUDIO_SANDBOX_RUNNER between start + // and stop must still tear down the runner the entry was created against. + // The kind is now caller-supplied, so the env value is irrelevant. + it("dispatches on input.sandboxProviderKind even when STUDIO_SANDBOX_RUNNER env disagrees", async () => { const original = process.env.STUDIO_SANDBOX_RUNNER; process.env.STUDIO_SANDBOX_RUNNER = "agent-sandbox"; try { const metadata: Metadata = { - vmMap: { "user-1": { [BRANCH]: DOCKER_ENTRY } }, + vmMap: makeVmMap("user-1", BRANCH, "docker", DOCKER_ENTRY), }; const virtualMcp = makeVirtualMcp("org_1", metadata); const ctx = makeCtx({ virtualMcp }); - await VM_DELETE.handler({ virtualMcpId: "vmcp_1", branch: BRANCH }, ctx); + await VM_DELETE.handler( + { + virtualMcpId: "vmcp_1", + branch: BRANCH, + sandboxProviderKind: "docker", + }, + ctx, + ); expect(mockDelete).toHaveBeenCalledWith(DOCKER_ENTRY.vmId); expect(lastRequestedKind.value).toBe("docker"); @@ -234,16 +240,43 @@ describe("VM_DELETE", () => { } }); - it("skips runner.delete and DB update when no vmMap entry for (user, branch)", async () => { + it("coalesces legacy 'host' kind input to 'remote-user'", async () => { + // Use a docker entry as a stand-in — what matters is the dispatch kind. + const metadata: Metadata = { + vmMap: makeVmMap("user-1", BRANCH, "remote-user", DOCKER_ENTRY), + }; + const virtualMcp = makeVirtualMcp("org_1", metadata); + const ctx = makeCtx({ virtualMcp }); + + // Cast through unknown to simulate a legacy caller sending "host" before + // the enum was updated. + await VM_DELETE.handler( + { + virtualMcpId: "vmcp_1", + branch: BRANCH, + sandboxProviderKind: "host" as unknown as SandboxProviderKind, + }, + ctx, + ); + + expect(lastRequestedKind.value).toBe("remote-user"); + }); + + it("skips runner.delete and DB update when no vmMap entry for (user, branch, kind)", async () => { + // Entry exists for a different user — this user has no entry. const metadata: Metadata = { - vmMap: { "other-user": { [BRANCH]: DOCKER_ENTRY } }, + vmMap: makeVmMap("other-user", BRANCH, "docker", DOCKER_ENTRY), }; const virtualMcp = makeVirtualMcp("org_1", metadata); const updateSpy = mock(async () => {}); const ctx = makeCtx({ virtualMcp, updateSpy }); const result = await VM_DELETE.handler( - { virtualMcpId: "vmcp_1", branch: BRANCH }, + { + virtualMcpId: "vmcp_1", + branch: BRANCH, + sandboxProviderKind: "docker", + }, ctx, ); @@ -256,7 +289,11 @@ describe("VM_DELETE", () => { const ctx = makeCtx({ virtualMcp: null }); const result = await VM_DELETE.handler( - { virtualMcpId: "vmcp_missing", branch: BRANCH }, + { + virtualMcpId: "vmcp_missing", + branch: BRANCH, + sandboxProviderKind: "docker", + }, ctx, ); @@ -273,7 +310,14 @@ describe("VM_DELETE", () => { undefined; await expect( - VM_DELETE.handler({ virtualMcpId: "vmcp_1", branch: BRANCH }, ctx), + VM_DELETE.handler( + { + virtualMcpId: "vmcp_1", + branch: BRANCH, + sandboxProviderKind: "docker", + }, + ctx, + ), ).rejects.toThrow("User ID required"); }); }); diff --git a/apps/mesh/src/tools/vm/stop.ts b/apps/mesh/src/tools/vm/stop.ts index e3a39a6379..4a6b0d5e01 100644 --- a/apps/mesh/src/tools/vm/stop.ts +++ b/apps/mesh/src/tools/vm/stop.ts @@ -1,14 +1,14 @@ /** - * VM_DELETE. Dispatches on the entry's persisted `runnerKind` (not env), - * so a pod that flipped STUDIO_SANDBOX_RUNNER between start and stop still - * tears down the right kind of VM. + * VM_DELETE. Dispatches on the caller-supplied `sandboxProviderKind` (not + * env), so a pod that flipped STUDIO_SANDBOX_RUNNER between start and stop + * still tears down the right kind of VM. */ import { z } from "zod"; -import type { RunnerKind } from "@decocms/sandbox/runner"; +import type { SandboxProviderKind } from "@decocms/sandbox/provider"; import { defineTool } from "../../core/define-tool"; import { requireVmEntry } from "./helpers"; -import { getRunnerByKind } from "../../sandbox/lifecycle"; +import { getSandboxProviderByKind } from "../../sandbox/lifecycle"; import { removeVmMapEntry } from "./vm-map"; export const VM_DELETE = defineTool({ @@ -28,15 +28,29 @@ export const VM_DELETE = defineTool({ .string() .min(1) .describe("Branch whose vm should be deleted (vmMap[userId][branch])"), + sandboxProviderKind: z + .enum(["docker", "agent-sandbox", "remote-user"]) + .describe( + "Kind of sandbox provider the VM was started with. Used to locate the correct 3-level vmMap entry.", + ), }), outputSchema: z.object({ success: z.boolean(), }), handler: async (input, ctx) => { + // Legacy "host" value can sneak in from pre-removal callers; coalesce to + // the dev-mode replacement so the stop path doesn't crash. + const rawKind = input.sandboxProviderKind as string; + const kind: SandboxProviderKind = + rawKind === "host" ? "remote-user" : (rawKind as SandboxProviderKind); + let vmEntry: Awaited>; try { - vmEntry = await requireVmEntry(input, ctx); + vmEntry = await requireVmEntry( + { ...input, sandboxProviderKind: kind }, + ctx, + ); } catch (err) { if (err instanceof Error && err.message === "Virtual MCP not found") { return { success: true }; @@ -56,13 +70,10 @@ export const VM_DELETE = defineTool({ userId, userId, input.branch, + kind, ); - if (!entry.runnerKind) { - return { success: true }; - } - const kind: RunnerKind = entry.runnerKind; - const runner = await getRunnerByKind(ctx, kind); + const runner = await getSandboxProviderByKind(ctx, kind); await runner .delete(entry.vmId) .catch((err) => diff --git a/apps/mesh/src/tools/vm/vm-map.test.ts b/apps/mesh/src/tools/vm/vm-map.test.ts index 90ae90b144..a0760a598e 100644 --- a/apps/mesh/src/tools/vm/vm-map.test.ts +++ b/apps/mesh/src/tools/vm/vm-map.test.ts @@ -30,7 +30,8 @@ describe("readVmMap", () => { }); test("returns the vmMap when present", () => { - const vmMap = { "user-1": { main: ENTRY_A } }; + // 3-level: userId → branch → kind → entry + const vmMap = { "user-1": { main: { docker: ENTRY_A } } }; expect(readVmMap({ vmMap })).toEqual(vmMap); }); @@ -41,25 +42,48 @@ describe("readVmMap", () => { describe("resolveVm", () => { test("returns null when user is absent", () => { - expect(resolveVm({}, "user-1", "main")).toBeNull(); + expect(resolveVm({}, "user-1", "main", "docker")).toBeNull(); }); test("returns null when branch is absent for that user", () => { - const vmMap = { "user-1": { main: ENTRY_A } }; - expect(resolveVm(vmMap, "user-1", "feat/x")).toBeNull(); + const vmMap = { "user-1": { main: { docker: ENTRY_A } } }; + expect(resolveVm(vmMap, "user-1", "feat/x", "docker")).toBeNull(); }); - test("returns the entry when both are present", () => { - const vmMap = { "user-1": { main: ENTRY_A, "feat/x": ENTRY_B } }; - expect(resolveVm(vmMap, "user-1", "feat/x")).toEqual(ENTRY_B); + test("returns the entry when userId, branch, and kind are all present", () => { + const vmMap = { + "user-1": { + main: { docker: ENTRY_A }, + "feat/x": { docker: ENTRY_B }, + }, + }; + expect(resolveVm(vmMap, "user-1", "feat/x", "docker")).toEqual(ENTRY_B); }); test("isolates users from each other", () => { const vmMap = { - "user-1": { main: ENTRY_A }, - "user-2": { main: ENTRY_B }, + "user-1": { main: { docker: ENTRY_A } }, + "user-2": { main: { docker: ENTRY_B } }, + }; + expect(resolveVm(vmMap, "user-1", "main", "docker")).toEqual(ENTRY_A); + expect(resolveVm(vmMap, "user-2", "main", "docker")).toEqual(ENTRY_B); + }); + + test("returns null when the kind is absent but another kind exists", () => { + const vmMap = { + "user-1": { main: { docker: ENTRY_A } }, + }; + // looking up "agent-sandbox" when only "docker" exists → null + expect(resolveVm(vmMap, "user-1", "main", "agent-sandbox")).toBeNull(); + }); + + test("returns the entry for the requested kind when multiple kinds coexist", () => { + const vmMap = { + "user-1": { main: { docker: ENTRY_A, "agent-sandbox": ENTRY_B } }, }; - expect(resolveVm(vmMap, "user-1", "main")).toEqual(ENTRY_A); - expect(resolveVm(vmMap, "user-2", "main")).toEqual(ENTRY_B); + expect(resolveVm(vmMap, "user-1", "main", "docker")).toEqual(ENTRY_A); + expect(resolveVm(vmMap, "user-1", "main", "agent-sandbox")).toEqual( + ENTRY_B, + ); }); }); diff --git a/apps/mesh/src/tools/vm/vm-map.ts b/apps/mesh/src/tools/vm/vm-map.ts index 47f5cad942..40de97f9b6 100644 --- a/apps/mesh/src/tools/vm/vm-map.ts +++ b/apps/mesh/src/tools/vm/vm-map.ts @@ -1,17 +1,19 @@ /** - * vmMap helpers — per-user, per-branch vm registry. + * vmMap helpers — per-(user, branch, sandboxProviderKind) vm registry. * - * vmMap[userId][branch] -> { vmId, previewUrl } + * Lookup: vmMap[userId][branch][sandboxProviderKind] -> VmMapEntry * - * Kept in the virtualmcp's metadata JSON column. Lookup lets threads sharing - * a (user, branch) pair route to the same vm. + * Stored in the virtualmcp's metadata JSON column. Threads sharing the same + * (user, branch, kind) triple share one vm. * * NOTE: read-modify-write is NOT atomic across pods — two concurrent VM_START - * calls for the same (vm, user, branch) can race. Accepted for v1. A proper - * fix requires a Postgres advisory lock or a dedicated vm_sessions table. + * calls for the same (vm, user, branch, kind) can race. Accepted for v1. A + * proper fix requires a Postgres advisory lock or a dedicated vm_sessions table. */ +import { parseBranchMap } from "@decocms/mesh-sdk"; import type { VmMap, VmMapEntry } from "@decocms/mesh-sdk"; +import type { SandboxProviderKind } from "@decocms/sandbox/provider"; import type { VirtualMCPStoragePort } from "../../storage/ports"; import type { VirtualMCPUpdateData } from "../virtual/schema"; @@ -29,13 +31,18 @@ export function resolveVm( vmMap: VmMap, userId: string, branch: string, + sandboxProviderKind: SandboxProviderKind, ): VmMapEntry | null { - return vmMap[userId]?.[branch] ?? null; + const raw = vmMap[userId]?.[branch]; + if (!raw) return null; + const parsed = parseBranchMap(raw); + return parsed[sandboxProviderKind] ?? null; } /** - * Read-modify-write: sets `vmMap[userId][branch] = entry` on the virtualmcp. - * Creates the user bucket if it doesn't exist. + * Read-modify-write: sets vmMap[userId][branch][kind] = entry on the virtualmcp. + * Creates intermediate buckets as needed. Preserves any sibling-kind entries + * already present at vmMap[userId][branch][*]. */ export async function setVmMapEntry( storage: VirtualMCPStoragePort, @@ -43,6 +50,7 @@ export async function setVmMapEntry( actingUserId: string, targetUserId: string, branch: string, + sandboxProviderKind: SandboxProviderKind, entry: VmMapEntry, ): Promise { const virtualMcp = await storage.findById(virtualMcpId); @@ -50,11 +58,15 @@ export async function setVmMapEntry( const meta = (virtualMcp.metadata ?? {}) as Record; const current = readVmMap(meta); + const currentBranchMap = parseBranchMap(current[targetUserId]?.[branch]); const next: VmMap = { ...current, [targetUserId]: { ...(current[targetUserId] ?? {}), - [branch]: entry, + [branch]: { + ...currentBranchMap, + [sandboxProviderKind]: entry, + } as VmMap[string][string], }, }; @@ -67,8 +79,9 @@ export async function setVmMapEntry( } /** - * Read-modify-write: removes `vmMap[userId][branch]` from the virtualmcp. - * Drops the user bucket entirely when it becomes empty. + * Read-modify-write: removes vmMap[userId][branch][kind]. + * Drops the branch bucket if no kinds remain; drops the user bucket if no + * branches remain. */ export async function removeVmMapEntry( storage: VirtualMCPStoragePort, @@ -76,16 +89,25 @@ export async function removeVmMapEntry( actingUserId: string, targetUserId: string, branch: string, + sandboxProviderKind: SandboxProviderKind, ): Promise { const virtualMcp = await storage.findById(virtualMcpId); if (!virtualMcp) return; const meta = (virtualMcp.metadata ?? {}) as Record; const current = readVmMap(meta); - if (!current[targetUserId]?.[branch]) return; + const branchMap = parseBranchMap(current[targetUserId]?.[branch]); + if (!branchMap[sandboxProviderKind]) return; - const userMap = { ...current[targetUserId] }; - delete userMap[branch]; + const nextBranchMap = { ...branchMap }; + delete nextBranchMap[sandboxProviderKind]; + + const userMap = { ...(current[targetUserId] ?? {}) }; + if (Object.keys(nextBranchMap).length === 0) { + delete userMap[branch]; + } else { + userMap[branch] = nextBranchMap as VmMap[string][string]; + } const next: VmMap = { ...current }; if (Object.keys(userMap).length === 0) { diff --git a/apps/mesh/src/web/components/chat/agent-model-popover.test.tsx b/apps/mesh/src/web/components/chat/agent-model-popover.test.tsx new file mode 100644 index 0000000000..c58fd2ce07 --- /dev/null +++ b/apps/mesh/src/web/components/chat/agent-model-popover.test.tsx @@ -0,0 +1,80 @@ +import { setupComponentTest } from "../../../test/setup"; +setupComponentTest(); +import { describe, expect, test, mock } from "bun:test"; +import { render } from "@testing-library/react"; +import "@testing-library/jest-dom"; +import { AgentModelPopover } from "./agent-model-popover"; +import { getAgentSections } from "./select-model/agent-models"; + +const ALL = getAgentSections({ + hasAnyKey: true, + link: { online: true, capabilities: ["claude-code", "codex"] }, +}); + +describe("AgentModelPopover", () => { + test("renders one AgentSection per item", () => { + const { getAllByTestId } = render( + {}} + />, + ); + expect(getAllByTestId("agent-section")).toHaveLength(3); + }); + + test("when lockedAgent is set, only the matching section is enabled", () => { + const { getAllByTestId } = render( + {}} + />, + ); + const sections = getAllByTestId("agent-section"); + const disabled = sections.filter( + (s) => s.getAttribute("aria-disabled") === "true", + ); + expect(disabled).toHaveLength(2); + }); + + test("row click in a section calls onSelect with (kind, tier)", () => { + const onSelect = mock( + ( + _k: "decopilot" | "claude-code" | "codex", + _t: "fast" | "smart" | "thinking", + ) => {}, + ); + const { getByText } = render( + , + ); + getByText("Haiku").click(); + expect(onSelect).toHaveBeenCalledWith("claude-code", "fast"); + }); + + test("locked non-active section does NOT call onSelect when its rows are clicked", () => { + const onSelect = mock(() => {}); + const { getByText } = render( + , + ); + // Fast row inside the locked Decopilot section + getByText("Fast").click(); + expect(onSelect).not.toHaveBeenCalled(); + }); +}); diff --git a/apps/mesh/src/web/components/chat/agent-model-popover.tsx b/apps/mesh/src/web/components/chat/agent-model-popover.tsx new file mode 100644 index 0000000000..798c0175ce --- /dev/null +++ b/apps/mesh/src/web/components/chat/agent-model-popover.tsx @@ -0,0 +1,42 @@ +import type { ChatTier } from "@/tools/organization/schema"; +import { AgentSection } from "./select-model/agent-section"; +import type { + AgentKind, + AgentSection as AgentSectionData, +} from "./select-model/agent-models"; + +interface Props { + sections: AgentSectionData[]; + activeAgent: AgentKind | null; + activeTier: ChatTier; + /** When non-null, only the section matching this kind is interactive; + * the others render opacity-40 + pointer-events-none. */ + lockedAgent: AgentKind | null; + onSelect: (agent: AgentKind, tier: ChatTier) => void; +} + +export function AgentModelPopover({ + sections, + activeAgent, + activeTier, + lockedAgent, + onSelect, +}: Props) { + return ( +
+ {sections.map((section) => { + const disabled = lockedAgent !== null && lockedAgent !== section.kind; + const selectedTier = activeAgent === section.kind ? activeTier : null; + return ( + onSelect(section.kind, tier)} + /> + ); + })} +
+ ); +} diff --git a/apps/mesh/src/web/components/chat/agent-model-trigger.test.tsx b/apps/mesh/src/web/components/chat/agent-model-trigger.test.tsx new file mode 100644 index 0000000000..81be728281 --- /dev/null +++ b/apps/mesh/src/web/components/chat/agent-model-trigger.test.tsx @@ -0,0 +1,85 @@ +import { setupComponentTest } from "../../../test/setup"; // happy-dom + jest-dom matchers +setupComponentTest(); +import { describe, expect, test } from "bun:test"; +import { render } from "@testing-library/react"; +import "@testing-library/jest-dom"; +import { AgentModelTriggerPure } from "./agent-model-trigger"; +import { getAgentSections } from "./select-model/agent-models"; + +const ALL = getAgentSections({ + hasAnyKey: true, + link: { online: true, capabilities: ["claude-code", "codex"] }, +}); + +describe("AgentModelTriggerPure", () => { + test("closed pill is neutral when active agent is Decopilot", () => { + const { container } = render( + {}} + />, + ); + const button = container.querySelector("button"); + expect(button?.className).not.toMatch(/text-success/); + expect(button?.className).not.toMatch(/bg-success\/10/); + }); + + test("closed pill gets text-success and bg-success/10 when CLI agent active", () => { + const { container } = render( + {}} + />, + ); + const button = container.querySelector("button"); + expect(button?.className).toMatch(/text-success/); + expect(button?.className).toMatch(/bg-success\/10/); + }); + + test("closed pill uses responsive gap so collapsed label doesn't leave phantom gap", () => { + const { container } = render( + {}} + />, + ); + const button = container.querySelector("button"); + expect(button?.className).toMatch(/\bgap-0\b/); + expect(button?.className).toMatch(/@\[496px\]\/chat-bottom:gap-1\.5/); + }); + + test("label reflects the active CLI tier model label (Opus)", () => { + const { getByText } = render( + {}} + />, + ); + expect(getByText("Opus")).toBeInTheDocument(); + }); + + test("label reflects the active Decopilot tier label (Smart)", () => { + const { getByText } = render( + {}} + />, + ); + expect(getByText("Smart")).toBeInTheDocument(); + }); +}); diff --git a/apps/mesh/src/web/components/chat/agent-model-trigger.tsx b/apps/mesh/src/web/components/chat/agent-model-trigger.tsx new file mode 100644 index 0000000000..ad011bb3aa --- /dev/null +++ b/apps/mesh/src/web/components/chat/agent-model-trigger.tsx @@ -0,0 +1,209 @@ +import { Button } from "@deco/ui/components/button.tsx"; +import { + Popover, + PopoverContent, + PopoverTrigger, +} from "@deco/ui/components/popover.tsx"; +import { cn } from "@deco/ui/lib/utils.ts"; +import { useState } from "react"; +import type { HarnessId } from "@/harnesses"; +import type { SandboxProviderKind } from "@decocms/sandbox/provider"; +import type { ChatTier } from "@/tools/organization/schema"; +import { + SELF_MCP_ALIAS_ID, + useMCPClient, + useProjectContext, +} from "@decocms/mesh-sdk"; +import { track } from "@/web/lib/posthog-client"; +import { useAiProviderKeys } from "@/web/hooks/collections/use-ai-providers"; +import { useCurrentLink } from "@/web/hooks/use-current-link"; +import { useVmStart } from "@/web/components/vm/hooks/use-vm-start"; +import { useChatPrefs } from "./context"; +import { AgentModelPopover } from "./agent-model-popover"; +import { + type AgentKind, + type AgentSection, + getAgentSections, +} from "./select-model/agent-models"; + +interface Props { + agent: HarnessId | null; + sandboxKind: SandboxProviderKind | null; + tier: ChatTier; + /** Set when the user is on a branch — needed for the eager VM-start + * when the user picks a CLI agent. `null` when no branch is selected + * (no eager start). */ + currentBranch: string | null; + virtualMcpId: string; + /** Tier-only setter — kept for callers that want to swap tier without + * also potentially flipping agents (the popover handles agent + + * tier itself via `setPendingAgentOption`). */ + onSelect: (tier: ChatTier) => void; +} + +/** Maps the popover's AgentKind back to the persisted AgentOption. */ +function optionForAgent(kind: AgentKind) { + switch (kind) { + case "decopilot": + return "decopilot" as const; + case "claude-code": + return "claude-code-laptop" as const; + case "codex": + return "codex-laptop" as const; + } +} + +function agentKindFromHarness( + agent: HarnessId | null, + sandboxKind: SandboxProviderKind | null, +): AgentKind | null { + if (agent === "claude-code" && sandboxKind === "remote-user") + return "claude-code"; + if (agent === "codex" && sandboxKind === "remote-user") return "codex"; + if (agent === "decopilot") return "decopilot"; + return null; +} + +/** + * Trigger pill in the chat input that opens the merged sectioned + * popover (Decopilot + Claude Code + Codex). When the active agent is + * a laptop-CLI variant the pill turns `text-success` + `bg-success/10` + * to mirror the "Desktop connected" affordance in + * `NoAiProviderEmptyState`. The popover handles agent + tier writes + * atomically. + */ +export function AgentModelTrigger({ + agent, + sandboxKind, + tier, + currentBranch, + virtualMcpId, + onSelect, +}: Props) { + const keys = useAiProviderKeys(); + const link = useCurrentLink(); + const { setPendingAgentOption } = useChatPrefs(); + const { org } = useProjectContext(); + const mcpClient = useMCPClient({ + connectionId: SELF_MCP_ALIAS_ID, + orgId: org.id, + orgSlug: org.slug, + }); + const startVm = useVmStart(mcpClient); + + const sections = getAgentSections({ + hasAnyKey: keys.length > 0, + link, + }); + + const activeAgent = agentKindFromHarness(agent, sandboxKind); + + const handleSelect = (kind: AgentKind, nextTier: ChatTier) => { + const opt = optionForAgent(kind); + setPendingAgentOption(opt); + onSelect(nextTier); + if (kind !== "decopilot" && currentBranch) { + startVm.mutate({ + virtualMcpId, + branch: currentBranch, + sandboxProviderKind: "remote-user" as const, + }); + } + track("agent_model_selected", { agent: kind, tier: nextTier }); + }; + + return ( + + ); +} + +interface PureProps { + sections: AgentSection[]; + activeAgent: AgentKind | null; + activeTier: ChatTier; + lockedAgent: AgentKind | null; + onSelect: (kind: AgentKind, tier: ChatTier) => void; +} + +/** + * Stateless variant for tests. Renders the closed pill + popover — + * does not touch hooks or chat prefs. Keeps `AgentModelTrigger` + * thin so test cases don't have to mock the entire chat context. + */ +export function AgentModelTriggerPure({ + sections, + activeAgent, + activeTier, + lockedAgent, + onSelect, +}: PureProps) { + const [open, setOpen] = useState(false); + + const section = + sections.find((s) => s.kind === activeAgent) ?? sections[0] ?? null; + const tierEntry = section?.tiers[activeTier]; + + const isLocalActive = section?.isLocal ?? false; + const label = tierEntry?.label ?? ""; + + // Closed pill — collapses label at narrow widths; `gap-0` on the + // outer + `@[496px]/chat-bottom:gap-1.5` keeps the icon + chevron + // flush when the label is hidden. + const baseClasses = + "gap-0 @[496px]/chat-bottom:gap-1.5 text-muted-foreground hover:text-foreground"; + const localActiveClasses = isLocalActive + ? "text-success bg-success/10 hover:text-success" + : ""; + + if (!section || !tierEntry) { + return null; + } + + return ( + + + + + + { + onSelect(kind, t); + setOpen(false); + }} + /> + + + ); +} diff --git a/apps/mesh/src/web/components/chat/chat-context.tsx b/apps/mesh/src/web/components/chat/chat-context.tsx index 4e9d4c7ad5..e56d3ed6de 100644 --- a/apps/mesh/src/web/components/chat/chat-context.tsx +++ b/apps/mesh/src/web/components/chat/chat-context.tsx @@ -40,6 +40,9 @@ import { type SubmitAction, type ThreadObserver, } from "./store/thread-connection"; +import type { SandboxProviderKind } from "@decocms/sandbox/provider"; +import type { HarnessId } from "@/harnesses"; +import { AGENT_OPTION_PINS, type AgentOption } from "./pills/agent-options"; import { pickSimpleModeDefaults, SELF_MCP_ALIAS_ID, @@ -92,6 +95,7 @@ import type { Task } from "./task/types"; import type { SendMessageParams, SetAppContextParams } from "./store/types"; import { useLocalStorage } from "../../hooks/use-local-storage"; import { chatModeForTransportRef } from "../../lib/chat-mode-sync"; +import { agentHasClonableSource } from "@/web/lib/agent-capabilities"; import { LOCALSTORAGE_KEYS } from "../../lib/localstorage-keys"; import { KEYS } from "../../lib/query-keys"; import { useSimpleMode } from "../../hooks/use-organization-settings"; @@ -174,6 +178,28 @@ export interface ChatPrefsContextValue { /** The currently selected tier in Simple Model Mode */ simpleModeTier: SimpleTier; setSimpleModeTier: (tier: SimpleTier) => void; + /** + * The agent option the chat will use for the next first message + * (`Decopilot` / `Decopilot desktop` / `Claude Code desktop` / + * `Codex desktop`). Single source of truth for the (harness, sandbox) + * pair — see `AGENT_OPTION_PINS` in `./pills/agent-options`. + * + * This is the **effective** value: the user's persisted pick filtered + * through what the active agent can actually run. If the user picked a + * desktop variant but the current agent has no clonable source + * (Decopilot-only / ephemeral), this falls back to plain Decopilot. + * The persisted pick is unchanged and returns when navigating back to + * an agent with a checkout. The setter writes to the raw underlying state. + * + * Null = server picks the default. Persisted to localStorage so the + * choice survives page reloads. + */ + pendingAgentOption: AgentOption | null; + setPendingAgentOption: (option: AgentOption | null) => void; + /** Derived from `pendingAgentOption`. Read-only. */ + pendingHarnessId: HarnessId | null; + /** Derived from `pendingAgentOption`. Read-only. */ + pendingSandboxProviderKind: SandboxProviderKind | null; } // ============================================================================ @@ -449,6 +475,63 @@ export function ChatPrefsProvider({ children }: PropsWithChildren) { }); }; + // Pending agent — single source of truth for the user's pre-message + // pick (`Decopilot` / `Decopilot desktop` / `Claude Code desktop` / + // `Codex desktop`). Persisted to localStorage so the choice survives + // page reloads. + // + // Everything else (`pendingHarnessId`, `pendingSandboxProviderKind`, + // the request body's harnessId/sandboxProviderKind) derives from this + // through `AGENT_OPTION_PINS`, so the pill display and the submit can + // never disagree. + const [pendingAgentOption, setPendingAgentOptionState] = + useState(() => { + try { + const stored = localStorage.getItem( + "chat:lastAgentOption", + ) as AgentOption | null; + return stored && stored in AGENT_OPTION_PINS ? stored : null; + } catch { + return null; + } + }); + const setPendingAgentOption = (option: AgentOption | null) => { + setPendingAgentOptionState(option); + try { + if (option === null) { + localStorage.removeItem("chat:lastAgentOption"); + } else { + localStorage.setItem("chat:lastAgentOption", option); + } + } catch { + // ignore storage errors (private browsing, quota exceeded, etc.) + } + }; + + // Effective option: the user's pick filtered through what the current + // agent can actually run. Laptop-CLI options (Claude Code / Codex / + // Decopilot desktop) need a git branch to check out on the user's + // desktop; if the user picked a desktop variant but the current agent + // has no clonable source (Decopilot-only / ephemeral), this falls back + // to plain Decopilot. The persisted pick is unchanged and returns when + // navigating back to an agent with a checkout. + const hasClonableSource = agentHasClonableSource( + selectedVirtualMcpData?.metadata, + ); + const effectiveAgentOption: AgentOption | null = + pendingAgentOption === null + ? null + : !hasClonableSource && + AGENT_OPTION_PINS[pendingAgentOption].sandbox === "remote-user" + ? "decopilot" + : pendingAgentOption; + + const effectivePins = effectiveAgentOption + ? AGENT_OPTION_PINS[effectiveAgentOption] + : null; + const pendingHarnessId = effectivePins?.harness ?? null; + const pendingSandboxProviderKind = effectivePins?.sandbox ?? null; + // Tiptap doc (transient UI state) const [tiptapDoc, setTiptapDoc] = useState(undefined); const tiptapDocRef = useRef(tiptapDoc); @@ -486,6 +569,10 @@ export function ChatPrefsProvider({ children }: PropsWithChildren) { resetInteraction: () => {}, simpleModeTier: activeTier, setSimpleModeTier: (tier: SimpleTier) => setStoredTier(tier), + pendingAgentOption: effectiveAgentOption, + setPendingAgentOption, + pendingHarnessId, + pendingSandboxProviderKind, }; return ( @@ -663,6 +750,8 @@ export function ActiveTaskProvider({ appContexts, setTiptapDoc, simpleModeTier: activeTier, + pendingSandboxProviderKind, + pendingHarnessId, } = useChatPrefs(); const internals = useContext(TaskInternalsCtx); if (!internals) { @@ -859,6 +948,8 @@ export function ActiveTaskProvider({ agent: { id: capturedVirtualMcpId }, thread_id: capturedTaskId, branch: currentBranch, + sandboxProviderKind: pendingSandboxProviderKind || undefined, + harnessId: pendingHarnessId || undefined, }, ); } diff --git a/apps/mesh/src/web/components/chat/connect-laptop-dialog.tsx b/apps/mesh/src/web/components/chat/connect-laptop-dialog.tsx new file mode 100644 index 0000000000..4cbb417890 --- /dev/null +++ b/apps/mesh/src/web/components/chat/connect-laptop-dialog.tsx @@ -0,0 +1,98 @@ +import { + Dialog, + DialogContent, + DialogDescription, + DialogHeader, + DialogTitle, +} from "@deco/ui/components/dialog.tsx"; +import { Spinner } from "@deco/ui/components/spinner.tsx"; +import { Button } from "@deco/ui/components/button.tsx"; +import { Check, Copy01 } from "@untitledui/icons"; +import { useState } from "react"; +import type { Capability } from "@/links/protocol"; +import { useCurrentLink } from "@/web/hooks/use-current-link"; + +const INSTALL_SNIPPET = "bunx decocms link"; + +const CAPABILITY_LABELS: Partial> = { + "claude-code": "Claude Code", + codex: "Codex", +}; + +/** + * Format the link's capability list for UI display. Drops + * `decopilot-sandbox` (always present and not meaningful to the user) + * and maps the rest to friendly labels. Returns the empty array when + * nothing user-facing is available. + */ +export function visibleCapabilities(caps: readonly Capability[]): string[] { + return caps + .map((c) => CAPABILITY_LABELS[c]) + .filter((label): label is string => Boolean(label)); +} + +interface ConnectLaptopDialogProps { + open: boolean; + onOpenChange: (open: boolean) => void; +} + +export function ConnectLaptopDialog({ + open, + onOpenChange, +}: ConnectLaptopDialogProps) { + const link = useCurrentLink(); + const [copied, setCopied] = useState(false); + + return ( + + + + + {link.online ? "Desktop connected" : "Connect your desktop"} + + + {link.online + ? "Your desktop is online. Pick a desktop agent in the chat to use it." + : "Run this command in your desktop terminal. The dialog will close once your desktop is online."} + + + + {!link.online && ( +
+ {INSTALL_SNIPPET} + +
+ )} + + {!link.online ? ( +
+ + Waiting for desktop… +
+ ) : ( +
+

+ {link.machineId ?? "Your desktop"} is linked. +

+ {visibleCapabilities(link.capabilities).length > 0 && ( +

+ Available: {visibleCapabilities(link.capabilities).join(", ")} +

+ )} +
+ )} +
+
+ ); +} diff --git a/apps/mesh/src/web/components/chat/input.tsx b/apps/mesh/src/web/components/chat/input.tsx index 1b669ddc8e..74a9b028fe 100644 --- a/apps/mesh/src/web/components/chat/input.tsx +++ b/apps/mesh/src/web/components/chat/input.tsx @@ -32,7 +32,7 @@ import { useThreadActions } from "./store/hooks"; import type { VirtualMCPInfo } from "./select-virtual-mcp"; import { ChatHighlight } from "./highlight"; import { getSupportedFileTypesLabel, modelSupportsFiles } from "./select-model"; -import { SimpleModeTierDropdown } from "./simple-mode-tier-dropdown"; +import { AgentModelTrigger } from "./agent-model-trigger"; import type { AiProviderModel } from "@/web/hooks/collections/use-ai-providers"; import { UnsupportedFileDialog, @@ -209,10 +209,14 @@ function useHomeSubmit() { // Toast already surfaced by the store; navigate anyway — the route's // ensure-fallback will retry if the row is missing. } + const search: Record = { + virtualmcpid: targetVmcp, + autosend: AUTOSEND_QUERY_VALUE, + }; navigate({ to: "/$org/$taskId", params: { org: org.slug, taskId: newId }, - search: { virtualmcpid: targetVmcp, autosend: AUTOSEND_QUERY_VALUE }, + search, }); }; } @@ -243,6 +247,8 @@ export function ChatInput({ setChatMode, simpleModeTier, setSimpleModeTier, + pendingHarnessId, + pendingSandboxProviderKind, } = useChatPrefs(); const { data: session } = authClient.useSession(); const userId = session?.user?.id; @@ -588,8 +594,12 @@ export function ChatInput({ {/* Right Actions (mic, model, send) */}
- diff --git a/apps/mesh/src/web/components/chat/no-ai-provider-empty-state.tsx b/apps/mesh/src/web/components/chat/no-ai-provider-empty-state.tsx index 052609ca8c..3a121df913 100644 --- a/apps/mesh/src/web/components/chat/no-ai-provider-empty-state.tsx +++ b/apps/mesh/src/web/components/chat/no-ai-provider-empty-state.tsx @@ -1,11 +1,15 @@ import { useState } from "react"; -import { Zap } from "@untitledui/icons"; +import { Check, Laptop01, Zap } from "@untitledui/icons"; import { cn } from "@deco/ui/lib/utils.ts"; import { ConnectProviderDialog } from "@/web/views/settings/ai-providers/connect-provider-dialog"; import { ProviderGrid, type ProviderSelection, } from "@/web/views/settings/ai-providers/provider-grid"; +import { + SettingsCard, + SettingsCardItem, +} from "@/web/components/settings/settings-section"; import { SELF_MCP_ALIAS_ID, useMCPClient, @@ -13,10 +17,15 @@ import { } from "@decocms/mesh-sdk"; import { useAiProviders } from "@/web/hooks/collections/use-ai-providers"; import { useAuthConfig } from "@/web/providers/auth-config-provider"; +import { useCurrentLink } from "@/web/hooks/use-current-link"; import { KEYS } from "@/web/lib/query-keys"; import { unwrapToolResult } from "@/web/lib/unwrap-tool-result"; import { useQuery } from "@tanstack/react-query"; import type { BrandContext } from "@/storage/types"; +import { + ConnectLaptopDialog, + visibleCapabilities, +} from "./connect-laptop-dialog"; interface NoAiProviderEmptyStateProps { title?: string; @@ -83,6 +92,8 @@ export function NoAiProviderEmptyState({ const [pendingProvider, setPendingProvider] = useState(null); const [gridOpen, setGridOpen] = useState(false); + const [laptopOpen, setLaptopOpen] = useState(false); + const link = useCurrentLink(); const aiProviders = useAiProviders(); const providers = aiProviders?.providers ?? []; @@ -99,8 +110,8 @@ export function NoAiProviderEmptyState({ const subtitle = description ?? (localMode - ? "Connect a provider to get started — local models and existing subscriptions work too." - : "Choose how to power your AI team."); + ? "Connect a provider, or run `bunx decocms link` on your desktop for Claude Code, Codex, and local files." + : "Connect a provider — or run `bunx decocms link` on your desktop to use Claude Code, Codex, or your local files."); // Badge styles: use brand color if available, otherwise lime gradient const hasBrandStyle = !!(brandIcon || primaryColor); @@ -150,6 +161,35 @@ export function NoAiProviderEmptyState({ />
+
+ + {link.online ? ( + setLaptopOpen(true)} + icon={ +
+ +
+ } + title="Desktop connected" + description={(() => { + const labels = visibleCapabilities(link.capabilities); + return labels.length > 0 + ? `Available: ${labels.join(", ")}` + : "No CLI agents detected on this desktop"; + })()} + /> + ) : ( + setLaptopOpen(true)} + icon={} + title="Connect your desktop" + description="Use Claude Code, Codex, or your local files via the link CLI." + /> + )} +
+
+ { @@ -160,6 +200,8 @@ export function NoAiProviderEmptyState({ }} initialProvider={pendingProvider ?? undefined} /> + + ); } diff --git a/apps/mesh/src/web/components/chat/pills/agent-options.ts b/apps/mesh/src/web/components/chat/pills/agent-options.ts new file mode 100644 index 0000000000..36f38b1047 --- /dev/null +++ b/apps/mesh/src/web/components/chat/pills/agent-options.ts @@ -0,0 +1,41 @@ +import type { HarnessId } from "@/harnesses"; +import type { SandboxProviderKind } from "@decocms/sandbox/provider"; + +export type AgentOption = "decopilot" | "claude-code-laptop" | "codex-laptop"; + +export interface AgentPins { + harness: HarnessId; + sandbox: SandboxProviderKind | null; +} + +/** + * Canonical (harness, sandbox) pair for each `AgentOption`. The persisted + * pending-agent value is the source of truth; everything else (chat + * dispatch, VM start, model selector) reads through here so the pair can + * not drift. + */ +export const AGENT_OPTION_PINS: Record = { + decopilot: { harness: "decopilot", sandbox: null }, + "claude-code-laptop": { harness: "claude-code", sandbox: "remote-user" }, + "codex-laptop": { harness: "codex", sandbox: "remote-user" }, +}; + +export function pinsForOption(option: AgentOption): AgentPins { + return AGENT_OPTION_PINS[option]; +} + +/** Reverse lookup — find the AgentOption matching a persisted + * (harness, sandbox) pair. Returns `null` when the pair is unknown. */ +export function pinsToOption( + harness: HarnessId | null, + sandbox: SandboxProviderKind | null, +): AgentOption | null { + if (!harness) return null; + for (const [option, pins] of Object.entries(AGENT_OPTION_PINS) as [ + AgentOption, + AgentPins, + ][]) { + if (pins.harness === harness && pins.sandbox === sandbox) return option; + } + return null; +} diff --git a/apps/mesh/src/web/components/chat/pills/branch-pill.tsx b/apps/mesh/src/web/components/chat/pills/branch-pill.tsx new file mode 100644 index 0000000000..791b87d5c6 --- /dev/null +++ b/apps/mesh/src/web/components/chat/pills/branch-pill.tsx @@ -0,0 +1,32 @@ +import type { VmMap } from "@decocms/mesh-sdk"; +import { GitBranch01 } from "@untitledui/icons"; +import { BranchPicker } from "../../thread/github/branch-picker"; + +interface Props { + orgId: string; + orgSlug: string; + userId: string; + virtualMcpId: string; + connectionId: string | null; + owner: string; + repo: string; + vmMap: VmMap | undefined; + value: string | null | undefined; + onChange: (branch: string) => void; + locked: boolean; +} + +export function BranchPill({ locked, value, ...props }: Props) { + if (locked) { + return ( + + + {value ?? "—"} + + ); + } + return ; +} diff --git a/apps/mesh/src/web/components/chat/pills/thread-pills.tsx b/apps/mesh/src/web/components/chat/pills/thread-pills.tsx new file mode 100644 index 0000000000..3b0b3aeb6d --- /dev/null +++ b/apps/mesh/src/web/components/chat/pills/thread-pills.tsx @@ -0,0 +1,57 @@ +import type { VmMap } from "@decocms/mesh-sdk"; +import type { HarnessId } from "@/harnesses"; +import type { SandboxProviderKind } from "@decocms/sandbox/provider"; +import { BranchPill } from "./branch-pill"; +import { useOptionalChatStream } from "../context"; + +interface Props { + orgId: string; + orgSlug: string; + userId: string; + virtualMcpId: string; + connectionId: string; + owner: string; + repo: string; + vmMap: VmMap | undefined; + currentBranch: string | null; + onBranchChange: (branch: string) => void; + /** Kept in the signature for parity with the previous version even + * though the agent pill is gone — callers still pass them and they + * may be useful again if we revive a thread-level lock indicator. */ + threadKind: SandboxProviderKind | null; + threadHarness: HarnessId | null; +} + +export function ThreadPills({ + orgId, + orgSlug, + userId, + virtualMcpId, + connectionId, + owner, + repo, + vmMap, + currentBranch, + onBranchChange, +}: Props) { + const stream = useOptionalChatStream(); + const isActive = (stream?.messages ?? []).length > 0; + + return ( +
+ +
+ ); +} diff --git a/apps/mesh/src/web/components/chat/select-model.tsx b/apps/mesh/src/web/components/chat/select-model.tsx index 202739fb90..28b126c31f 100644 --- a/apps/mesh/src/web/components/chat/select-model.tsx +++ b/apps/mesh/src/web/components/chat/select-model.tsx @@ -1,6 +1,4 @@ import { Button } from "@deco/ui/components/button.tsx"; -import { Checkbox } from "@deco/ui/components/checkbox.tsx"; -import { Input } from "@deco/ui/components/input.tsx"; import { Dialog, DialogContent, @@ -14,1337 +12,24 @@ import { DrawerTrigger, } from "@deco/ui/components/drawer.tsx"; import { useIsMobile } from "@deco/ui/hooks/use-mobile.ts"; -import { - Select, - SelectContent, - SelectItem, - SelectTrigger, - SelectValue, -} from "@deco/ui/components/select.tsx"; -import { Skeleton } from "@deco/ui/components/skeleton.tsx"; import { cn } from "@deco/ui/lib/utils.ts"; -import { - AlertTriangle, - AlignLeft, - ArrowLeft, - ChevronDown, - ChevronSelectorVertical, - Image01, - ImagePlus, - Key01, - RefreshCcw01, - SearchMd, - Settings02, - Stars01, - Tool01, -} from "@untitledui/icons"; -import { - type ReactNode, - startTransition, - Suspense, - useRef, - useState, - useTransition, -} from "react"; -import { useVirtualizer } from "@tanstack/react-virtual"; -import { - type AiProviderModel, - useAiProviderKeys, - useAiProviderModels, - useAiProviders, -} from "../../hooks/collections/use-ai-providers"; -import { ErrorBoundary } from "../error-boundary"; -import { track } from "@/web/lib/posthog-client"; +import { useState, Suspense } from "react"; +import { type AiProviderModel } from "./select-model/shared"; import { useChatPrefs } from "./context"; -import { getProviderLogo } from "@/web/utils/ai-providers-logos"; -import { getPreset } from "@/web/utils/openai-compatible-presets"; -import { useNavigate } from "@tanstack/react-router"; -import { useProjectContext } from "@decocms/mesh-sdk"; -import { NoAiProviderEmptyState } from "./no-ai-provider-empty-state"; - -function parseModelTitle(model: { title: string; modelId: string }): { - provider: string; - displayName: string; -} { - const hasPrefix = model.title.includes(": "); - return { - provider: hasPrefix - ? (model.title.split(": ")[0] ?? "") - : (model.modelId.split("/")[0] ?? ""), - displayName: hasPrefix - ? model.title.split(": ").slice(1).join(": ") - : model.title, - }; -} - -// ============================================================================ -// Tier Classification -// ============================================================================ - -const TIER_IDS = ["smarter", "faster", "cheaper"] as const; -type TierId = (typeof TIER_IDS)[number]; - -const TIER_LABELS: Record = { - smarter: "Smarter", - faster: "Faster", - cheaper: "Cheaper", -}; - -const TIER_PATTERNS: Array<{ tier: TierId; prefixes: string[] }> = [ - { - tier: "smarter", - prefixes: [ - "claude-code:opus", - "anthropic/claude-opus-4.7", - "anthropic/claude-sonnet-4.6", - "anthropic/claude-4.6-sonnet", - "openai/gpt-5.3-codex", - "codex:gpt-5.5", - "codex:gpt-5.4", - "codex:gpt-5.3-codex", - "codex:gpt-5.2", - "google/gemini-3-pro", - "google/gemini-2.5-pro", - "cohere/command-r-plus", - "cohere/command-a", - ], - }, - { - tier: "faster", - prefixes: [ - "claude-code:sonnet", - "anthropic/claude-haiku-4.5", - "anthropic/claude-4.5-haiku", - "google/gemini-3-flash", - "openai/gpt-5.1-codex-mini", - "codex:gpt-5.4-mini", - "x-ai/grok-code-fast", - "x-ai/grok-3", - "mistralai/mistral-large", - "mistralai/codestral", - "mistralai/mistral-medium", - "minimax/minimax-m1", - ], - }, - { - tier: "cheaper", - prefixes: [ - "claude-code:haiku", - "google/gemini-2.5-flash-lite", - "google/gemini-2.5-flash", - "google/gemini-2.0-flash", - "deepseek/deepseek-v3", - "openai/gpt-oss-120b", - "mistralai/mistral-small", - "mistralai/pixtral", - "cohere/command-r", - ], - }, -]; - -// Sort rules longest-prefix-first for specificity -const SORTED_TIER_RULES = TIER_PATTERNS.flatMap(({ tier, prefixes }) => - prefixes.map((prefix) => ({ tier, prefix })), -).sort((a, b) => b.prefix.length - a.prefix.length); - -// Only exact matches (no named sub-variants); date suffixes (digits) are fine -const EXACT_ONLY_PREFIXES = new Set(["google/gemini-2.5-pro"]); - -const tierCache = new Map(); - -function classifyModel(modelId: string): TierId | null { - const cached = tierCache.get(modelId); - if (cached !== undefined) return cached; - - let result: TierId | null = null; - if (modelId.endsWith(":free")) { - result = "cheaper"; - } else { - outer: for (const { tier, prefix } of SORTED_TIER_RULES) { - if (modelId.startsWith(prefix)) { - if (EXACT_ONLY_PREFIXES.has(prefix) && modelId.length > prefix.length) { - const nextChar = modelId[prefix.length]; - if (nextChar === "-") { - const charAfterHyphen = modelId[prefix.length + 1]; - if (!charAfterHyphen || !/\d/.test(charAfterHyphen)) continue outer; - } - } - result = tier; - break; - } - } - } - - tierCache.set(modelId, result); - return result; -} - -function groupByTier( - models: AiProviderModel[], -): Record { - const groups: Record = { - smarter: [], - faster: [], - cheaper: [], - other: [], - }; - for (const m of models) { - const tier = classifyModel(m.modelId); - groups[tier ?? "other"].push(m); - } - for (const key of Object.keys(groups) as Array) { - groups[key].sort((a, b) => a.title.localeCompare(b.title)); - } - return groups; -} - -// ============================================================================ -// Model Shortlist (localStorage) -// ============================================================================ - -const SHORTLIST_KEY_PREFIX = "mesh:model-shortlist:"; - -const DEFAULT_SHORTLIST = new Set([ - // Smarter - "anthropic/claude-opus-4.7", - "anthropic/claude-sonnet-4.6", - "anthropic/claude-4.6-sonnet", - "anthropic/claude-sonnet-4.6:extended", - "openai/gpt-5.3-codex", - "google/gemini-3-pro-preview", - "google/gemini-2.5-pro", - // Faster - "anthropic/claude-haiku-4.5", - "anthropic/claude-haiku-4.5-20251001", - "anthropic/claude-4.5-haiku", - "google/gemini-3-flash-preview", - "openai/gpt-5.1-codex-mini", - "x-ai/grok-code-fast-1", - // Cheaper - "google/gemini-2.5-flash", - "deepseek/deepseek-v3.2", - "google/gemini-2.5-flash-lite", - "openai/gpt-oss-120b:free", -]); - -function readShortlist(keyId: string): Set | null { - try { - const raw = localStorage.getItem(SHORTLIST_KEY_PREFIX + keyId); - return raw ? new Set(JSON.parse(raw)) : null; - } catch { - return null; - } -} - -function writeShortlist(keyId: string, ids: Set) { - localStorage.setItem(SHORTLIST_KEY_PREFIX + keyId, JSON.stringify([...ids])); -} - -// ============================================================================ -// Contextual annotations (absolute thresholds, not relative to model list) -// ============================================================================ - -// 1–4 context level for dot indicator (absolute thresholds) -function getContextLevel(tokens: number): { - level: number; - label: string; - description: string; -} { - if (tokens < 32_000) { - return { level: 1, label: "Small", description: "Short conversations" }; - } - if (tokens < 200_000) { - return { level: 2, label: "Medium", description: "Good for most tasks" }; - } - if (tokens < 500_000) { - return { - level: 3, - label: "Large", - description: "Long projects & research", - }; - } - return { level: 4, label: "Very large", description: "Massive files & data" }; -} - -// Semantic colors per level — context (more = better: destructive→success) -const CONTEXT_DOT_COLORS = [ - "bg-destructive", - "bg-warning", - "bg-success", - "bg-success", -] as const; - -// Semantic colors per level — cost (more = worse: success→destructive) -const COST_DOLLAR_COLORS = [ - "text-success", - "text-warning", - "text-warning", - "text-destructive", -] as const; - -// Approximate word count for token amounts -function approxWords(tokens: number): string { - const k = Math.round((tokens * 0.75) / 1000); - return k >= 1 ? `~${k}K words` : `~${Math.round(tokens * 0.75)} words`; -} - -// 1–4 cost level (absolute thresholds, input $/1M) -function getCostLevel(inputPerM: number): { level: number; label: string } { - if (inputPerM < 1) return { level: 1, label: "Cheap" }; - if (inputPerM < 5) return { level: 2, label: "Moderate" }; - if (inputPerM < 15) return { level: 3, label: "High" }; - return { level: 4, label: "Expensive" }; -} - -// ============================================================================ -// UI Components -// ============================================================================ - -const CAPABILITY_CONFIGS: Record = { - text: { icon: , label: "Text" }, - vision: { icon: , label: "Vision" }, - image: { icon: , label: "Image" }, - tools: { icon: , label: "Tools" }, - reasoning: { icon: , label: "Reasoning" }, - "web-search": { - icon: , - label: "Web search", - }, -}; - -function CapabilityBadge({ capability }: { capability: string }) { - const config = CAPABILITY_CONFIGS[capability] ?? { - icon: null, - label: capability.charAt(0).toUpperCase() + capability.slice(1), - }; - - return ( - - {config.icon} - {config.label} - - ); -} - -function ModelDetailsPanel({ - model, - compact = false, -}: { - model: AiProviderModel | null; - compact?: boolean; -}) { - if (!model) { - return ( -
- Hover to preview -
- ); - } - - const inputCostPerM = - model.costs?.input != null ? model.costs.input * 1_000_000 : null; - const outputCostPerM = - model.costs?.output != null ? model.costs.output * 1_000_000 : null; - - const { provider: providerLabel, displayName: modelName } = - parseModelTitle(model); - - if (compact) { - return ( -
- {model.limits?.contextWindow && ( -
- Context - - {model.limits.contextWindow.toLocaleString()} tokens - -
- )} - {inputCostPerM != null && ( -
- Input - - ${inputCostPerM.toFixed(2)} / 1M - -
- )} - {outputCostPerM != null && ( -
- Output - - ${outputCostPerM.toFixed(2)} / 1M - -
- )} - {model.limits?.maxOutputTokens && ( -
- Output limit - - {model.limits.maxOutputTokens.toLocaleString()} tokens - -
- )} -
- ); - } - - return ( -
- {/* Header */} -
- - {providerLabel} - -
- {model.logo && ( - {model.title} - )} -

- {modelName} -

-
-

- {model.modelId} -

-
- - {/* Capabilities */} - {model.capabilities && model.capabilities.length > 0 && ( -
- {[...new Set(model.capabilities)].map((capability) => ( - - ))} -
- )} - - {/* Stats */} -
- {model.limits?.contextWindow && - (() => { - const { level, label, description } = getContextLevel( - model.limits.contextWindow, - ); - return ( -
- - Context window - -
-
- {[1, 2, 3, 4].map((i) => ( -
- ))} -
- - {label} - - - — {description} - -
- - {model.limits.contextWindow.toLocaleString()} tokens - -
- ); - })()} - - {model.limits?.maxOutputTokens && ( -
- - Output limit - -
- - {model.limits.maxOutputTokens.toLocaleString()} tokens - - - {approxWords(model.limits.maxOutputTokens)} - -
-
- )} - - {(inputCostPerM != null || outputCostPerM != null) && - (() => { - const { level, label } = - inputCostPerM != null - ? getCostLevel(inputCostPerM) - : { level: 0, label: "" }; - return ( -
- - Pricing - - {inputCostPerM != null && ( -
-
- {[1, 2, 3, 4].map((i) => ( - - $ - - ))} -
- - {label} - -
- )} -
- {inputCostPerM != null && ( -
- - Input - - - ${inputCostPerM.toFixed(2)} / 1M tokens - -
- )} - {outputCostPerM != null && ( -
- - Output - - - ${outputCostPerM.toFixed(2)} / 1M tokens - -
- )} -
-
- ); - })()} -
-
- ); -} - -function ModelItemContent({ - model, - onHover, -}: { - model: AiProviderModel; - onHover: (model: AiProviderModel) => void; -}) { - const { displayName, provider } = parseModelTitle(model); - - const providerLogo = getProviderLogo(model); - - return ( -
onHover(model)} - > - {model.title} -
- - {displayName} - - - {provider} - -
-
- ); -} - -function ModelListErrorFallback({ - error, - onRetry, -}: { - error: Error | null; - onRetry: () => void; - credentialId: string | undefined; - orgSlug?: string; -}) { - return ( -
-
- -
-
-

- Failed to load models -

-

- {error?.message || "Could not fetch models from this provider."} - {" Try another provider or retry."} -

-
-
- -
-
- ); -} - -function ModelListSkeleton() { - return ( -
- {Array.from({ length: 6 }).map((_, i) => ( -
- - - -
- ))} -
- ); -} - -// ============================================================================ -// ConnectionModelList — browse + manage modes -// ============================================================================ - -function ModelTierSection({ - label, - models, - onSelect, - onHover, -}: { - label: string; - models: AiProviderModel[]; - onSelect: (m: AiProviderModel) => void; - onHover: (m: AiProviderModel) => void; -}) { - if (models.length === 0) return null; - return ( -
-
- {label} -
- {models.map((m) => ( -
onSelect(m)} - className="cursor-pointer" - > - -
- ))} -
- ); -} - -// Each row is its own component so the React Compiler can memoize them -// individually — only the toggled item re-renders, not all 500. -function ManageModelItem({ - model, - isChecked, - onToggle, - onHover, -}: { - model: AiProviderModel; - isChecked: boolean; - onToggle: (modelId: string) => void; - onHover: (m: AiProviderModel) => void; -}) { - // Local state gives instant visual feedback; parent shortlistSet updates - // asynchronously via startTransition so it never blocks the checkbox. - const [checked, setChecked] = useState(isChecked); - if (checked !== isChecked) { - setChecked(isChecked); - } - const logo = getProviderLogo(model); - - return ( - - ); -} - -type ManageVirtualItem = - | { type: "header"; label: string } - | { type: "model"; model: AiProviderModel }; - -function buildManageItems( - grouped: Record, -): ManageVirtualItem[] { - const items: ManageVirtualItem[] = []; - for (const tierId of TIER_IDS) { - if (grouped[tierId].length > 0) { - items.push({ type: "header", label: TIER_LABELS[tierId] }); - for (const m of grouped[tierId]) items.push({ type: "model", model: m }); - } - } - if (grouped.other.length > 0) { - items.push({ type: "header", label: "Other" }); - for (const m of grouped.other) items.push({ type: "model", model: m }); - } - return items; -} - -function VirtualManageList({ - items, - shortlistSet, - onToggle, - onHover, -}: { - items: ManageVirtualItem[]; - shortlistSet: Set; - onToggle: (modelId: string) => void; - onHover: (m: AiProviderModel) => void; -}) { - const scrollRef = useRef(null); - const virtualizer = useVirtualizer({ - count: items.length, - getScrollElement: () => scrollRef.current, - estimateSize: (i) => (items[i]?.type === "header" ? 36 : 44), - overscan: 6, - }); - - return ( -
-
- {virtualizer.getVirtualItems().map((vItem) => { - const item = items[vItem.index]; - if (!item) return null; - return ( -
- {item.type === "header" ? ( -
- {item.label} -
- ) : ( - - )} -
- ); - })} -
-
- ); -} - -function ConnectionModelList({ - keyId, - searchTerm, - onHover, - onModelSelect, - managing, - onToggleManage, - filterModels: filterModelsProp, -}: { - keyId: string | undefined; - searchTerm: string; - onModelSelect: (model: AiProviderModel) => void; - onHover: (model: AiProviderModel) => void; - managing: boolean; - onToggleManage: () => void; - filterModels?: (m: AiProviderModel) => boolean; -}) { - const { models: rawModels } = useAiProviderModels(keyId); - // When no explicit filter is given, hide async-research-only models - // (e.g. Gemini Deep Research). They aren't usable as a Thinking/Coding/ - // Fast model — the agent loop's `streamText` rejects them. Callers that - // want to expose them (the deep-research slot) pass their own filter that - // opts them back in. - const allModels = filterModelsProp - ? rawModels.filter(filterModelsProp) - : rawModels.filter((m) => m.asyncResearch !== true); - const [shortlistSet, setShortlistSet] = useState>( - () => (keyId ? readShortlist(keyId) : null) ?? DEFAULT_SHORTLIST, - ); - const [, startShortlistTransition] = useTransition(); - - const handleToggle = (modelId: string) => { - if (!keyId) return; - // Deferred: ManageModelItem's local state already gave instant feedback, - // so this heavier reconciliation can happen in a transition. - startShortlistTransition(() => { - setShortlistSet((current) => { - const next = new Set(current); - if (next.has(modelId)) { - next.delete(modelId); - } else { - next.add(modelId); - } - writeShortlist(keyId, next); - return next; - }); - }); - }; - - const normalizedSearch = searchTerm.toLowerCase().trim(); - const applySearch = (models: AiProviderModel[]) => - normalizedSearch - ? models.filter( - (m) => - m.title.toLowerCase().includes(normalizedSearch) || - m.modelId.toLowerCase().includes(normalizedSearch), - ) - : models; - - if (managing) { - const grouped = groupByTier(applySearch(allModels)); - const flatItems = buildManageItems(grouped); - const selectedCount = allModels.filter((m) => - shortlistSet.has(m.modelId), - ).length; - - return ( -
-
- - - {selectedCount} selected - -
- -
- ); - } - - // Browse mode: show shortlisted models (fall back to all if none match) - const shortlisted = allModels.filter((m) => shortlistSet.has(m.modelId)); - const browseable = shortlisted.length > 0 ? shortlisted : allModels; - const grouped = groupByTier(applySearch(browseable)); - - return ( -
- {TIER_IDS.map((tierId) => ( - - ))} - -
- ); -} - -// ============================================================================ -// Display Components -// ============================================================================ - -function SelectedModelDisplay({ - model, - placeholder = "Select model", - isLoading = false, -}: { - model: AiProviderModel | null; - placeholder?: string; - isLoading?: boolean; -}) { - if (isLoading) { - return ( -
- - -
- ); - } - - if (!model) { - return ( -
- {placeholder} - -
- ); - } - - const { displayName } = parseModelTitle(model); - - const providerLogo = getProviderLogo(model); - - return ( -
- {model.title} - - {displayName} - - -
- ); -} - -const FILE_BEARING_CAPABILITIES = [ - "vision", - "image", - "file", - "audio", - "video", -] as const; - -const IMAGE_MIME_TYPES = [ - "image/png", - "image/jpeg", - "image/gif", - "image/webp", -] as const; - -/** - * MIME types that no model handles natively but are usable end-to-end - * via sandbox skills: the model invokes `copy_to_sandbox` to bring the - * file in, then runs the matching skill (e.g. pptx-extract) to get - * text/images it can reason over. Allowed whenever the model has any - * file-bearing capability — text output is universal and thumbnail - * images need vision, both already covered by the existing checks. - */ -const SKILL_HANDLED_MIME_TYPES = [ - "application/vnd.openxmlformats-officedocument.presentationml.presentation", - "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", -] as const; - -export function modelSupportsFiles( - selectedModel: AiProviderModel | null | undefined, -): boolean { - const caps = selectedModel?.capabilities; - if (!caps) return false; - return FILE_BEARING_CAPABILITIES.some((c) => caps.includes(c)); -} - -export function isFileTypeSupportedByModel( - mimeType: string, - selectedModel: AiProviderModel | null | undefined, -): boolean { - if (!mimeType) return false; - if (mimeType.startsWith("text/")) return true; - - const caps = selectedModel?.capabilities ?? []; - const hasVision = caps.includes("vision") || caps.includes("image"); - const hasFile = caps.includes("file"); - const hasAudio = caps.includes("audio"); - const hasVideo = caps.includes("video"); - - if (hasVision && IMAGE_MIME_TYPES.includes(mimeType as never)) return true; - if (hasFile && mimeType === "application/pdf") return true; - if (hasAudio && mimeType.startsWith("audio/")) return true; - if (hasVideo && mimeType.startsWith("video/")) return true; - if ( - modelSupportsFiles(selectedModel) && - SKILL_HANDLED_MIME_TYPES.includes(mimeType as never) - ) { - return true; - } - - return false; -} - -export function getAcceptedMimeTypesForModel( - selectedModel: AiProviderModel | null | undefined, -): string { - const caps = selectedModel?.capabilities ?? []; - const accepted: string[] = ["text/*"]; - - if (caps.includes("vision") || caps.includes("image")) { - accepted.push(...IMAGE_MIME_TYPES); - } - if (caps.includes("file")) { - accepted.push("application/pdf"); - } - if (caps.includes("audio")) { - accepted.push("audio/*"); - } - if (caps.includes("video")) { - accepted.push("video/*"); - } - if (modelSupportsFiles(selectedModel)) { - accepted.push(...SKILL_HANDLED_MIME_TYPES); - } - - return accepted.join(","); -} - -export function getSupportedFileTypesLabel( - selectedModel: AiProviderModel | null | undefined, -): string { - const caps = selectedModel?.capabilities ?? []; - const parts: string[] = []; - - if (caps.includes("vision") || caps.includes("image")) parts.push("images"); - if (caps.includes("file")) parts.push("PDFs"); - if (caps.includes("audio")) parts.push("audio"); - if (caps.includes("video")) parts.push("video"); - if (modelSupportsFiles(selectedModel)) parts.push("Office files"); - - if (parts.length === 0) return "text only"; - if (parts.length === 1) return parts[0]!; - if (parts.length === 2) return `${parts[0]} and ${parts[1]}`; - return `${parts.slice(0, -1).join(", ")}, and ${parts.at(-1)}`; -} - -// ============================================================================ -// ModelSelectorContent — fixed size popover, no resize on manage toggle -// ============================================================================ - -function ModelSelectorContentFallback() { - return ( -
-
-
-
- - - -
-
-
- {Array.from({ length: 8 }).map((_, i) => ( -
- - - -
- ))} -
-
-
-
-
-
- - -
-
- - -
-
-
-
- - -
-
- - -
-
-
-
-
- ); -} - -interface ModelSelectorInnerProps { - onClose: () => void; - credentialId: string | null; - onCredentialChange: (id: string | null) => void; - selectedModel: AiProviderModel | null; - onModelChange: (model: AiProviderModel) => void; - filterModels?: (m: AiProviderModel) => boolean; -} - -function ModelSelectorInner({ - onClose, - credentialId, - onCredentialChange, - selectedModel, - onModelChange, - filterModels, -}: ModelSelectorInnerProps) { - const [hoveredModel, setHoveredModel] = useState( - null, - ); - const [searchTerm, setSearchTerm] = useState(""); - const [managing, setManaging] = useState(false); - const searchInputRef = useRef(null); - const aiProviders = useAiProviders(); - const keys = useAiProviderKeys(); - - const providerMap = Object.fromEntries( - (aiProviders?.providers ?? []).map((p) => [p.id, p]), - ); - const settingsNavigate = useNavigate(); - const { org: settingsOrg } = useProjectContext(); - - const handleKeyChange = (keyId: string) => { - onCredentialChange(keyId); - setHoveredModel(null); - }; - - const handleModelSelect = (model: AiProviderModel) => { - if (!credentialId) return; - onModelChange(model); - setSearchTerm(""); - onClose(); - }; - - if (keys.length === 0) { - return ( -
- -
- ); - } - - return ( -
-
-
- -
- - ( - - )} - > - }> - setManaging((v) => !v)} - filterModels={filterModels} - /> - - - {!managing && ( -
- -
- -
- )} -
- -
- -
-
- ); -} - -function ModelSelectorContent({ onClose }: { onClose: () => void }) { - const { credentialId, setCredentialId, selectedModel, setModel } = - useChatPrefs(); +import { ModelSelectorContentFallback } from "./select-model/decopilot"; +import { SelectedModelDisplay } from "./select-model/shared"; +import type { HarnessId } from "@/harnesses"; +import { + ModelSelectorBody, + ModelSelectorStandaloneBody, +} from "./select-model/index"; - return ( - { - track("chat_credential_changed", { credential_id: id }); - setCredentialId(id); - }} - selectedModel={selectedModel} - onModelChange={(model) => { - if (!credentialId) return; - track("chat_model_changed", { - from_model_id: selectedModel?.modelId ?? null, - to_model_id: model.modelId, - to_model_provider: model.providerId ?? null, - credential_id: credentialId, - }); - setModel({ ...model, keyId: credentialId }); - }} - /> - ); -} +export { + getAcceptedMimeTypesForModel, + getSupportedFileTypesLabel, + isFileTypeSupportedByModel, + modelSupportsFiles, +} from "./select-model/shared"; // ============================================================================ // Public Components @@ -1361,6 +46,7 @@ export interface ModelSelectorProps { onCredentialChange?: (id: string | null) => void; onModelChange?: (model: AiProviderModel) => void; filterModels?: (m: AiProviderModel) => boolean; + agent?: HarnessId; } export function ModelSelector({ @@ -1373,6 +59,7 @@ export function ModelSelector({ onCredentialChange, onModelChange, filterModels, + agent, }: ModelSelectorProps) { const [open, setOpen] = useState(false); const standalone = onModelChange !== undefined; @@ -1403,8 +90,9 @@ export function ModelSelector({ const selectorContent = ( }> {standalone ? ( - setOpen(false)} + agent={agent} credentialId={credentialIdProp ?? null} onCredentialChange={onCredentialChange ?? (() => {})} selectedModel={modelProp ?? null} @@ -1412,7 +100,7 @@ export function ModelSelector({ filterModels={filterModels} /> ) : ( - setOpen(false)} /> + setOpen(false)} agent={agent} /> )} ); diff --git a/apps/mesh/src/web/components/chat/select-model/agent-models.test.ts b/apps/mesh/src/web/components/chat/select-model/agent-models.test.ts new file mode 100644 index 0000000000..e039b89eea --- /dev/null +++ b/apps/mesh/src/web/components/chat/select-model/agent-models.test.ts @@ -0,0 +1,92 @@ +import { describe, expect, test } from "bun:test"; +import type { Capability } from "@/links/protocol"; +import { getAgentSections } from "./agent-models"; + +const OFFLINE = { online: false, capabilities: [] as readonly Capability[] }; +const ONLINE = (caps: readonly Capability[]) => ({ + online: true, + capabilities: caps, +}); + +describe("getAgentSections", () => { + test("no keys + no link → empty list", () => { + expect( + getAgentSections({ hasAnyKey: false, link: OFFLINE }).map((s) => s.kind), + ).toEqual([]); + }); + + test("keys + offline link → only Decopilot, not flagged local", () => { + const sections = getAgentSections({ hasAnyKey: true, link: OFFLINE }); + expect(sections.map((s) => s.kind)).toEqual(["decopilot"]); + expect(sections[0]!.isLocal).toBe(false); + }); + + test("no keys + online claude-code capability → only Claude Code, flagged local", () => { + const sections = getAgentSections({ + hasAnyKey: false, + link: ONLINE(["claude-code"]), + }); + expect(sections.map((s) => s.kind)).toEqual(["claude-code"]); + expect(sections[0]!.isLocal).toBe(true); + }); + + test("no keys + online codex capability → only Codex, flagged local", () => { + const sections = getAgentSections({ + hasAnyKey: false, + link: ONLINE(["codex"]), + }); + expect(sections.map((s) => s.kind)).toEqual(["codex"]); + expect(sections[0]!.isLocal).toBe(true); + }); + + test("keys + online both CLI caps → all three in stable order", () => { + const sections = getAgentSections({ + hasAnyKey: true, + link: ONLINE(["claude-code", "codex"]), + }); + expect(sections.map((s) => s.kind)).toEqual([ + "decopilot", + "claude-code", + "codex", + ]); + expect(sections.map((s) => s.isLocal)).toEqual([false, true, true]); + }); + + test("decopilot section exposes Fast/Smart/Thinking tiers with non-null labels", () => { + const [decopilot] = getAgentSections({ hasAnyKey: true, link: OFFLINE }); + expect(decopilot!.title).toBe("Decopilot"); + expect(decopilot!.tiers.fast.label).toBe("Fast"); + expect(decopilot!.tiers.smart.label).toBe("Smart"); + expect(decopilot!.tiers.thinking.label).toBe("Thinking"); + expect(decopilot!.tiers.fast.modelId).toBeNull(); + expect(decopilot!.tiers.smart.modelId).toBeNull(); + expect(decopilot!.tiers.thinking.modelId).toBeNull(); + }); + + test("claude-code section exposes the three CLI model labels with non-null modelIds", () => { + const sections = getAgentSections({ + hasAnyKey: false, + link: ONLINE(["claude-code"]), + }); + const claude = sections[0]!; + expect(claude.title).toBe("Claude Code"); + expect(claude.tiers.fast.modelId).toBe("claude-code:haiku"); + expect(claude.tiers.smart.modelId).toBe("claude-code:sonnet"); + expect(claude.tiers.thinking.modelId).toBe("claude-code:opus"); + expect(claude.tiers.fast.label).toBe("Haiku"); + expect(claude.tiers.smart.label).toBe("Sonnet"); + expect(claude.tiers.thinking.label).toBe("Opus"); + }); + + test("codex section exposes the three Codex model labels", () => { + const sections = getAgentSections({ + hasAnyKey: false, + link: ONLINE(["codex"]), + }); + const codex = sections[0]!; + expect(codex.title).toBe("Codex"); + expect(codex.tiers.fast.modelId).toBe("codex:gpt-5.4-mini"); + expect(codex.tiers.smart.modelId).toBe("codex:gpt-5.3-codex"); + expect(codex.tiers.thinking.modelId).toBe("codex:gpt-5.5"); + }); +}); diff --git a/apps/mesh/src/web/components/chat/select-model/agent-models.tsx b/apps/mesh/src/web/components/chat/select-model/agent-models.tsx new file mode 100644 index 0000000000..cec30a944b --- /dev/null +++ b/apps/mesh/src/web/components/chat/select-model/agent-models.tsx @@ -0,0 +1,191 @@ +import type { ReactNode } from "react"; +import type { HarnessId } from "@/harnesses"; +import type { Capability } from "@/links/protocol"; +import type { AiProviderModel } from "@/web/hooks/collections/use-ai-providers"; +import type { ChatTier } from "@/tools/organization/schema"; +import { Atom01, Lightning01, Stars01 } from "@untitledui/icons"; +import { CLAUDE_CODE_MODELS } from "@/ai-providers/adapters/claude-code-models"; +import { CODEX_MODELS } from "@/ai-providers/adapters/codex-models"; + +/** The three agents that can appear as sections in the chat-input popover. */ +export type AgentKind = "decopilot" | "claude-code" | "codex"; + +/** + * Per-tier entry in an agent section. `modelId` is the wire identifier + * the harness consumes (or `null` for Decopilot, where the server picks + * the model based on tier + provider key). `iconNode` is the React icon + * for Decopilot tiers; CLI rows use `iconUrl` instead. + */ +export interface AgentTierEntry { + modelId: string | null; + label: string; + description: string; + iconNode?: ReactNode; + iconUrl?: string; +} + +export type AgentTierMap = Record; + +/** One section in the merged model selector popover. */ +export interface AgentSection { + kind: AgentKind; + title: string; + /** True for laptop-CLI agents (Claude Code, Codex). Drives the green + * band + " · on this laptop" suffix in the popover, and the green + * ring on the closed chat-input trigger. */ + isLocal: boolean; + tiers: AgentTierMap; + /** Cached list of models the agent exposes — handy for callers that + * need to convert a (kind, tier) into an `AiProviderModel`. */ + models: AiProviderModel[]; +} + +const CLAUDE_CODE_LOGO = + "https://decoims.com/decocms/93e4059c-e598-412b-87eb-54d72a946ec8/claude-stroke-rounded.svg"; +const CODEX_LOGO = + "https://decoims.com/decocms/9170ffd4-b9cc-4661-ad8f-ae2eea019e00/codex.svg"; + +const DECOPILOT_TIERS: AgentTierMap = { + fast: { + modelId: null, + label: "Fast", + description: "Quicker responses", + iconNode: , + }, + smart: { + modelId: null, + label: "Smart", + description: "Balanced quality", + iconNode: , + }, + thinking: { + modelId: null, + label: "Thinking", + description: "Deeper reasoning", + iconNode: , + }, +}; + +const CLAUDE_CODE_TIERS: AgentTierMap = { + fast: { + modelId: "claude-code:haiku", + label: "Haiku", + description: "Quicker responses", + iconUrl: CLAUDE_CODE_LOGO, + }, + smart: { + modelId: "claude-code:sonnet", + label: "Sonnet", + description: "Balanced quality", + iconUrl: CLAUDE_CODE_LOGO, + }, + thinking: { + modelId: "claude-code:opus", + label: "Opus", + description: "Deeper reasoning", + iconUrl: CLAUDE_CODE_LOGO, + }, +}; + +const CODEX_TIERS: AgentTierMap = { + fast: { + modelId: "codex:gpt-5.4-mini", + label: "GPT-5.4 Mini", + description: "Quicker responses", + iconUrl: CODEX_LOGO, + }, + smart: { + modelId: "codex:gpt-5.3-codex", + label: "GPT-5.3 Codex", + description: "Balanced quality", + iconUrl: CODEX_LOGO, + }, + thinking: { + modelId: "codex:gpt-5.5", + label: "GPT-5.5", + description: "Deeper reasoning", + iconUrl: CODEX_LOGO, + }, +}; + +export interface AgentModelSet { + logo: string; + tiers: AgentTierMap; + models: AiProviderModel[]; +} + +/** + * Returns the laptop-CLI model set for an agent, or null for Decopilot + * (which still uses the standard provider-key path on the settings page). + * Kept for the settings flow that mounts `LaptopCliModelSelectorBody`. + */ +export function getAgentModelSet(agent: HarnessId): AgentModelSet | null { + if (agent === "claude-code") { + return { + logo: CLAUDE_CODE_LOGO, + tiers: CLAUDE_CODE_TIERS, + models: CLAUDE_CODE_MODELS as AiProviderModel[], + }; + } + if (agent === "codex") { + return { + logo: CODEX_LOGO, + tiers: CODEX_TIERS, + models: CODEX_MODELS as AiProviderModel[], + }; + } + return null; +} + +export interface AgentSectionsInput { + hasAnyKey: boolean; + link: { online: boolean; capabilities: readonly Capability[] }; +} + +const SECTION_ORDER: AgentKind[] = ["decopilot", "claude-code", "codex"]; + +/** + * Pure eligibility function for the merged chat-input popover. Returns + * sections in stable `SECTION_ORDER`. Mirrors the gates that + * `computeAgentOptions` used to enforce, minus `decopilot-laptop`. + * + * Gates: + * decopilot → hasAnyKey + * claude-code → link.online && caps.includes("claude-code") + * codex → link.online && caps.includes("codex") + */ +export function getAgentSections(input: AgentSectionsInput): AgentSection[] { + const { hasAnyKey, link } = input; + const has = (c: Capability) => link.capabilities.includes(c); + const out: AgentSection[] = []; + if (hasAnyKey) { + out.push({ + kind: "decopilot", + title: "Decopilot", + isLocal: false, + tiers: DECOPILOT_TIERS, + models: [], + }); + } + if (link.online && has("claude-code")) { + out.push({ + kind: "claude-code", + title: "Claude Code", + isLocal: true, + tiers: CLAUDE_CODE_TIERS, + models: CLAUDE_CODE_MODELS as AiProviderModel[], + }); + } + if (link.online && has("codex")) { + out.push({ + kind: "codex", + title: "Codex", + isLocal: true, + tiers: CODEX_TIERS, + models: CODEX_MODELS as AiProviderModel[], + }); + } + return out.sort( + (a, b) => SECTION_ORDER.indexOf(a.kind) - SECTION_ORDER.indexOf(b.kind), + ); +} diff --git a/apps/mesh/src/web/components/chat/select-model/agent-section.test.tsx b/apps/mesh/src/web/components/chat/select-model/agent-section.test.tsx new file mode 100644 index 0000000000..192ebec56a --- /dev/null +++ b/apps/mesh/src/web/components/chat/select-model/agent-section.test.tsx @@ -0,0 +1,92 @@ +import { setupComponentTest } from "../../../../test/setup"; +setupComponentTest(); +import { describe, expect, test, mock } from "bun:test"; +import { render } from "@testing-library/react"; +import "@testing-library/jest-dom"; +import { AgentSection } from "./agent-section"; +import { getAgentSections } from "./agent-models"; + +const SECTIONS = getAgentSections({ + hasAnyKey: true, + link: { online: true, capabilities: ["claude-code", "codex"] }, +}); + +const decopilot = SECTIONS.find((s) => s.kind === "decopilot")!; +const claude = SECTIONS.find((s) => s.kind === "claude-code")!; + +describe("AgentSection", () => { + test("cloud section header has no success styling", () => { + const { container } = render( + {}} + />, + ); + const header = container.querySelector( + "[data-testid=agent-section-header]", + ); + expect(header?.className).not.toMatch(/text-success/); + }); + + test("local CLI section header uses text-success and · on this laptop suffix", () => { + const { container, getByText } = render( + {}} + />, + ); + const header = container.querySelector( + "[data-testid=agent-section-header]", + ); + expect(header?.className).toMatch(/text-success/); + expect(getByText(/Claude Code · on this laptop/)).toBeInTheDocument(); + }); + + test("disabled section sets aria-disabled and stops onSelect from firing", () => { + const onSelect = mock(() => {}); + const { container } = render( + , + ); + const wrapper = container.querySelector("[data-testid=agent-section]"); + expect(wrapper?.getAttribute("aria-disabled")).toBe("true"); + const rows = container.querySelectorAll("button"); + rows.forEach((b) => b.click()); + expect(onSelect).not.toHaveBeenCalled(); + }); + + test("enabled row click fires onSelect with the row's tier", () => { + const onSelect = mock((_tier: "fast" | "smart" | "thinking") => {}); + const { getByText } = render( + , + ); + getByText("Haiku").click(); + expect(onSelect).toHaveBeenCalledTimes(1); + expect(onSelect).toHaveBeenCalledWith("fast"); + }); + + test("selected row marks itself with the On indicator", () => { + const { getByText } = render( + {}} + />, + ); + expect(getByText("On")).toBeInTheDocument(); + }); +}); diff --git a/apps/mesh/src/web/components/chat/select-model/agent-section.tsx b/apps/mesh/src/web/components/chat/select-model/agent-section.tsx new file mode 100644 index 0000000000..08267d0a57 --- /dev/null +++ b/apps/mesh/src/web/components/chat/select-model/agent-section.tsx @@ -0,0 +1,91 @@ +import { Lock01 } from "@untitledui/icons"; +import { cn } from "@deco/ui/lib/utils.ts"; +import type { ChatTier } from "@/tools/organization/schema"; +import type { AgentSection as AgentSectionData } from "./agent-models"; + +const TIER_ORDER: ChatTier[] = ["fast", "smart", "thinking"]; + +interface Props { + section: AgentSectionData; + selectedTier: ChatTier | null; + disabled: boolean; + onSelect: (tier: ChatTier) => void; +} + +export function AgentSection({ + section, + selectedTier, + disabled, + onSelect, +}: Props) { + const localBand = section.isLocal && !disabled ? "bg-success/5" : ""; + + return ( +
+
+ + {section.isLocal + ? `${section.title} · on this laptop` + : section.title} + + {disabled && } +
+ + {TIER_ORDER.map((tier) => { + const entry = section.tiers[tier]; + const isSelected = !disabled && selectedTier === tier; + return ( + + ); + })} +
+ ); +} diff --git a/apps/mesh/src/web/components/chat/select-model/decopilot.tsx b/apps/mesh/src/web/components/chat/select-model/decopilot.tsx new file mode 100644 index 0000000000..5f28daa542 --- /dev/null +++ b/apps/mesh/src/web/components/chat/select-model/decopilot.tsx @@ -0,0 +1,878 @@ +import { Button } from "@deco/ui/components/button.tsx"; +import { Checkbox } from "@deco/ui/components/checkbox.tsx"; +import { Input } from "@deco/ui/components/input.tsx"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@deco/ui/components/select.tsx"; +import { Skeleton } from "@deco/ui/components/skeleton.tsx"; +import { + AlertTriangle, + ArrowLeft, + ChevronSelectorVertical, + Key01, + RefreshCcw01, + SearchMd, + Settings02, +} from "@untitledui/icons"; +import { + startTransition, + Suspense, + useRef, + useState, + useTransition, +} from "react"; +import { useVirtualizer } from "@tanstack/react-virtual"; +import { + type AiProviderModel, + useAiProviderKeys, + useAiProviderModels, + useAiProviders, +} from "../../../hooks/collections/use-ai-providers"; +import { ErrorBoundary } from "../../error-boundary"; +import { track } from "@/web/lib/posthog-client"; +import { useChatPrefs } from "../context"; +import { getProviderLogo } from "@/web/utils/ai-providers-logos"; +import { getPreset } from "@/web/utils/openai-compatible-presets"; +import { useNavigate } from "@tanstack/react-router"; +import { useProjectContext } from "@decocms/mesh-sdk"; +import { NoAiProviderEmptyState } from "../no-ai-provider-empty-state"; +import { ModelDetailsPanel, parseModelTitle } from "./shared"; + +// ============================================================================ +// Tier Classification +// ============================================================================ + +const TIER_IDS = ["smarter", "faster", "cheaper"] as const; +type TierId = (typeof TIER_IDS)[number]; + +const TIER_LABELS: Record = { + smarter: "Smarter", + faster: "Faster", + cheaper: "Cheaper", +}; + +const TIER_PATTERNS: Array<{ tier: TierId; prefixes: string[] }> = [ + { + tier: "smarter", + prefixes: [ + "anthropic/claude-opus-4.7", + "anthropic/claude-sonnet-4.6", + "anthropic/claude-4.6-sonnet", + "openai/gpt-5.3-codex", + "google/gemini-3-pro", + "google/gemini-2.5-pro", + "cohere/command-r-plus", + "cohere/command-a", + ], + }, + { + tier: "faster", + prefixes: [ + "anthropic/claude-haiku-4.5", + "anthropic/claude-4.5-haiku", + "google/gemini-3-flash", + "openai/gpt-5.1-codex-mini", + "x-ai/grok-code-fast", + "x-ai/grok-3", + "mistralai/mistral-large", + "mistralai/codestral", + "mistralai/mistral-medium", + "minimax/minimax-m1", + ], + }, + { + tier: "cheaper", + prefixes: [ + "google/gemini-2.5-flash-lite", + "google/gemini-2.5-flash", + "google/gemini-2.0-flash", + "deepseek/deepseek-v3", + "openai/gpt-oss-120b", + "mistralai/mistral-small", + "mistralai/pixtral", + "cohere/command-r", + ], + }, +]; + +// Sort rules longest-prefix-first for specificity +const SORTED_TIER_RULES = TIER_PATTERNS.flatMap(({ tier, prefixes }) => + prefixes.map((prefix) => ({ tier, prefix })), +).sort((a, b) => b.prefix.length - a.prefix.length); + +// Only exact matches (no named sub-variants); date suffixes (digits) are fine +const EXACT_ONLY_PREFIXES = new Set(["google/gemini-2.5-pro"]); + +const tierCache = new Map(); + +function classifyModel(modelId: string): TierId | null { + const cached = tierCache.get(modelId); + if (cached !== undefined) return cached; + + let result: TierId | null = null; + if (modelId.endsWith(":free")) { + result = "cheaper"; + } else { + outer: for (const { tier, prefix } of SORTED_TIER_RULES) { + if (modelId.startsWith(prefix)) { + if (EXACT_ONLY_PREFIXES.has(prefix) && modelId.length > prefix.length) { + const nextChar = modelId[prefix.length]; + if (nextChar === "-") { + const charAfterHyphen = modelId[prefix.length + 1]; + if (!charAfterHyphen || !/\d/.test(charAfterHyphen)) continue outer; + } + } + result = tier; + break; + } + } + } + + tierCache.set(modelId, result); + return result; +} + +function groupByTier( + models: AiProviderModel[], +): Record { + const groups: Record = { + smarter: [], + faster: [], + cheaper: [], + other: [], + }; + for (const m of models) { + const tier = classifyModel(m.modelId); + groups[tier ?? "other"].push(m); + } + for (const key of Object.keys(groups) as Array) { + groups[key].sort((a, b) => a.title.localeCompare(b.title)); + } + return groups; +} + +// ============================================================================ +// Model Shortlist (localStorage) +// ============================================================================ + +const SHORTLIST_KEY_PREFIX = "mesh:model-shortlist:"; + +const DEFAULT_SHORTLIST = new Set([ + // Smarter + "anthropic/claude-opus-4.7", + "anthropic/claude-sonnet-4.6", + "anthropic/claude-4.6-sonnet", + "anthropic/claude-sonnet-4.6:extended", + "openai/gpt-5.3-codex", + "google/gemini-3-pro-preview", + "google/gemini-2.5-pro", + // Faster + "anthropic/claude-haiku-4.5", + "anthropic/claude-haiku-4.5-20251001", + "anthropic/claude-4.5-haiku", + "google/gemini-3-flash-preview", + "openai/gpt-5.1-codex-mini", + "x-ai/grok-code-fast-1", + // Cheaper + "google/gemini-2.5-flash", + "deepseek/deepseek-v3.2", + "google/gemini-2.5-flash-lite", + "openai/gpt-oss-120b:free", +]); + +function readShortlist(keyId: string): Set | null { + try { + const raw = localStorage.getItem(SHORTLIST_KEY_PREFIX + keyId); + return raw ? new Set(JSON.parse(raw)) : null; + } catch { + return null; + } +} + +function writeShortlist(keyId: string, ids: Set) { + localStorage.setItem(SHORTLIST_KEY_PREFIX + keyId, JSON.stringify([...ids])); +} + +// ============================================================================ +// UI Components +// ============================================================================ + +function ModelItemContent({ + model, + onHover, +}: { + model: AiProviderModel; + onHover: (model: AiProviderModel) => void; +}) { + const { displayName, provider } = parseModelTitle(model); + + const providerLogo = getProviderLogo(model); + + return ( +
onHover(model)} + > + {model.title} +
+ + {displayName} + + + {provider} + +
+
+ ); +} + +function ModelListErrorFallback({ + error, + onRetry, +}: { + error: Error | null; + onRetry: () => void; + credentialId: string | undefined; + orgSlug?: string; +}) { + return ( +
+
+ +
+
+

+ Failed to load models +

+

+ {error?.message || "Could not fetch models from this provider."} + {" Try another provider or retry."} +

+
+
+ +
+
+ ); +} + +function ModelListSkeleton() { + return ( +
+ {Array.from({ length: 6 }).map((_, i) => ( +
+ + + +
+ ))} +
+ ); +} + +// ============================================================================ +// ConnectionModelList — browse + manage modes +// ============================================================================ + +function ModelTierSection({ + label, + models, + onSelect, + onHover, +}: { + label: string; + models: AiProviderModel[]; + onSelect: (m: AiProviderModel) => void; + onHover: (m: AiProviderModel) => void; +}) { + if (models.length === 0) return null; + return ( +
+
+ {label} +
+ {models.map((m) => ( +
onSelect(m)} + className="cursor-pointer" + > + +
+ ))} +
+ ); +} + +// Each row is its own component so the React Compiler can memoize them +// individually — only the toggled item re-renders, not all 500. +function ManageModelItem({ + model, + isChecked, + onToggle, + onHover, +}: { + model: AiProviderModel; + isChecked: boolean; + onToggle: (modelId: string) => void; + onHover: (m: AiProviderModel) => void; +}) { + // Local state gives instant visual feedback; parent shortlistSet updates + // asynchronously via startTransition so it never blocks the checkbox. + const [checked, setChecked] = useState(isChecked); + if (checked !== isChecked) { + setChecked(isChecked); + } + const logo = getProviderLogo(model); + + return ( + + ); +} + +type ManageVirtualItem = + | { type: "header"; label: string } + | { type: "model"; model: AiProviderModel }; + +function buildManageItems( + grouped: Record, +): ManageVirtualItem[] { + const items: ManageVirtualItem[] = []; + for (const tierId of TIER_IDS) { + if (grouped[tierId].length > 0) { + items.push({ type: "header", label: TIER_LABELS[tierId] }); + for (const m of grouped[tierId]) items.push({ type: "model", model: m }); + } + } + if (grouped.other.length > 0) { + items.push({ type: "header", label: "Other" }); + for (const m of grouped.other) items.push({ type: "model", model: m }); + } + return items; +} + +function VirtualManageList({ + items, + shortlistSet, + onToggle, + onHover, +}: { + items: ManageVirtualItem[]; + shortlistSet: Set; + onToggle: (modelId: string) => void; + onHover: (m: AiProviderModel) => void; +}) { + const scrollRef = useRef(null); + const virtualizer = useVirtualizer({ + count: items.length, + getScrollElement: () => scrollRef.current, + estimateSize: (i) => (items[i]?.type === "header" ? 36 : 44), + overscan: 6, + }); + + return ( +
+
+ {virtualizer.getVirtualItems().map((vItem) => { + const item = items[vItem.index]; + if (!item) return null; + return ( +
+ {item.type === "header" ? ( +
+ {item.label} +
+ ) : ( + + )} +
+ ); + })} +
+
+ ); +} + +function ConnectionModelList({ + keyId, + searchTerm, + onHover, + onModelSelect, + managing, + onToggleManage, + filterModels: filterModelsProp, +}: { + keyId: string | undefined; + searchTerm: string; + onModelSelect: (model: AiProviderModel) => void; + onHover: (model: AiProviderModel) => void; + managing: boolean; + onToggleManage: () => void; + filterModels?: (m: AiProviderModel) => boolean; +}) { + const { models: rawModels } = useAiProviderModels(keyId); + // When no explicit filter is given, hide async-research-only models + // (e.g. Gemini Deep Research). They aren't usable as a Thinking/Coding/ + // Fast model — the agent loop's `streamText` rejects them. Callers that + // want to expose them (the deep-research slot) pass their own filter that + // opts them back in. + const allModels = filterModelsProp + ? rawModels.filter(filterModelsProp) + : rawModels.filter((m) => m.asyncResearch !== true); + const [shortlistSet, setShortlistSet] = useState>( + () => (keyId ? readShortlist(keyId) : null) ?? DEFAULT_SHORTLIST, + ); + const [, startShortlistTransition] = useTransition(); + + const handleToggle = (modelId: string) => { + if (!keyId) return; + // Deferred: ManageModelItem's local state already gave instant feedback, + // so this heavier reconciliation can happen in a transition. + startShortlistTransition(() => { + setShortlistSet((current) => { + const next = new Set(current); + if (next.has(modelId)) { + next.delete(modelId); + } else { + next.add(modelId); + } + writeShortlist(keyId, next); + return next; + }); + }); + }; + + const normalizedSearch = searchTerm.toLowerCase().trim(); + const applySearch = (models: AiProviderModel[]) => + normalizedSearch + ? models.filter( + (m) => + m.title.toLowerCase().includes(normalizedSearch) || + m.modelId.toLowerCase().includes(normalizedSearch), + ) + : models; + + if (managing) { + const grouped = groupByTier(applySearch(allModels)); + const flatItems = buildManageItems(grouped); + const selectedCount = allModels.filter((m) => + shortlistSet.has(m.modelId), + ).length; + + return ( +
+
+ + + {selectedCount} selected + +
+ +
+ ); + } + + // Browse mode: show shortlisted models (fall back to all if none match) + const shortlisted = allModels.filter((m) => shortlistSet.has(m.modelId)); + const browseable = shortlisted.length > 0 ? shortlisted : allModels; + const grouped = groupByTier(applySearch(browseable)); + + return ( +
+ {TIER_IDS.map((tierId) => ( + + ))} + +
+ ); +} + +// ============================================================================ +// ModelSelectorContent — fixed size popover, no resize on manage toggle +// ============================================================================ + +function ModelSelectorContentFallback() { + return ( +
+
+
+
+ + + +
+
+
+ {Array.from({ length: 8 }).map((_, i) => ( +
+ + + +
+ ))} +
+
+
+
+
+
+ + +
+
+ + +
+
+
+
+ + +
+
+ + +
+
+
+
+
+ ); +} + +interface ModelSelectorInnerProps { + onClose: () => void; + credentialId: string | null; + onCredentialChange: (id: string | null) => void; + selectedModel: AiProviderModel | null; + onModelChange: (model: AiProviderModel) => void; + filterModels?: (m: AiProviderModel) => boolean; +} + +function ModelSelectorInner({ + onClose, + credentialId, + onCredentialChange, + selectedModel, + onModelChange, + filterModels, +}: ModelSelectorInnerProps) { + const [hoveredModel, setHoveredModel] = useState( + null, + ); + const [searchTerm, setSearchTerm] = useState(""); + const [managing, setManaging] = useState(false); + const searchInputRef = useRef(null); + const aiProviders = useAiProviders(); + const keys = useAiProviderKeys(); + + const providerMap = Object.fromEntries( + (aiProviders?.providers ?? []).map((p) => [p.id, p]), + ); + const settingsNavigate = useNavigate(); + const { org: settingsOrg } = useProjectContext(); + + const handleKeyChange = (keyId: string) => { + onCredentialChange(keyId); + setHoveredModel(null); + }; + + const handleModelSelect = (model: AiProviderModel) => { + if (!credentialId) return; + onModelChange(model); + setSearchTerm(""); + onClose(); + }; + + if (keys.length === 0) { + return ( +
+ +
+ ); + } + + return ( +
+
+
+ +
+ + ( + + )} + > + }> + setManaging((v) => !v)} + filterModels={filterModels} + /> + + + {!managing && ( +
+ +
+ +
+ )} +
+ +
+ +
+
+ ); +} + +function ModelSelectorContent({ onClose }: { onClose: () => void }) { + const { credentialId, setCredentialId, selectedModel, setModel } = + useChatPrefs(); + + return ( + { + track("chat_credential_changed", { credential_id: id }); + setCredentialId(id); + }} + selectedModel={selectedModel} + onModelChange={(model) => { + if (!credentialId) return; + track("chat_model_changed", { + from_model_id: selectedModel?.modelId ?? null, + to_model_id: model.modelId, + to_model_provider: model.providerId ?? null, + credential_id: credentialId, + }); + setModel({ ...model, keyId: credentialId }); + }} + /> + ); +} + +// ============================================================================ +// Public Exports (Decopilot variant) +// ============================================================================ + +/** + * Decopilot model selector body — uses useChatPrefs for credential/model state. + * Mount this inside the Dialog/Drawer wrapper from ModelSelector. + */ +export function DecopilotModelSelectorBody({ + onClose, +}: { + onClose: () => void; +}) { + return ; +} + +/** + * Decopilot model selector in standalone mode — receives credential/model state + * as props (bypasses useChatPrefs). Mount this inside the Dialog/Drawer wrapper + * from ModelSelector. + */ +export function DecopilotModelSelectorStandalone( + props: ModelSelectorInnerProps, +) { + return ; +} + +/** + * Re-export the loading fallback so ModelSelector can use it in the outer + * Suspense boundary without importing from a private path. + */ +export { ModelSelectorContentFallback }; diff --git a/apps/mesh/src/web/components/chat/select-model/index.tsx b/apps/mesh/src/web/components/chat/select-model/index.tsx new file mode 100644 index 0000000000..9031e9a994 --- /dev/null +++ b/apps/mesh/src/web/components/chat/select-model/index.tsx @@ -0,0 +1,72 @@ +// apps/mesh/src/web/components/chat/select-model/index.tsx +import type { HarnessId } from "@/harnesses"; +import { + DecopilotModelSelectorBody, + DecopilotModelSelectorStandalone, +} from "./decopilot"; +import { LaptopCliModelSelectorBody } from "./laptop-cli"; +import { getAgentModelSet } from "./agent-models"; +import { useChatPrefs } from "../context"; +import type { AiProviderModel } from "@/web/hooks/collections/use-ai-providers"; + +interface BodyProps { + onClose: () => void; + /** Explicit lock — automations pass "decopilot" to ignore chat prefs. */ + agent?: HarnessId; +} + +/** + * Renders the model picker matching the active agent. + * Decopilot → existing two-pane API-key selector. + * Claude Code / Codex → fixed three-tier picker, no key dropdown. + */ +export function ModelSelectorBody({ onClose, agent }: BodyProps) { + const prefs = useChatPrefs(); + const effective = agent ?? prefs.pendingHarnessId ?? "decopilot"; + const cli = getAgentModelSet(effective); + + if (!cli) { + return ; + } + + return ( + { + prefs.setModel({ ...model, keyId: undefined }); + onClose(); + }} + /> + ); +} + +interface StandaloneProps { + onClose: () => void; + agent?: HarnessId; + credentialId: string | null; + onCredentialChange: (id: string | null) => void; + selectedModel: AiProviderModel | null; + onModelChange: (model: AiProviderModel) => void; + filterModels?: (m: AiProviderModel) => boolean; +} + +export function ModelSelectorStandaloneBody({ + agent, + ...rest +}: StandaloneProps) { + const cli = agent ? getAgentModelSet(agent) : null; + if (!cli) return ; + return ( + { + rest.onModelChange(model); + rest.onClose(); + }} + /> + ); +} diff --git a/apps/mesh/src/web/components/chat/select-model/laptop-cli.tsx b/apps/mesh/src/web/components/chat/select-model/laptop-cli.tsx new file mode 100644 index 0000000000..d068131cef --- /dev/null +++ b/apps/mesh/src/web/components/chat/select-model/laptop-cli.tsx @@ -0,0 +1,61 @@ +import { cn } from "@deco/ui/lib/utils.ts"; +import type { ChatTier } from "@/tools/organization/schema"; +import type { AgentModelSet } from "./agent-models"; +import type { AiProviderModel } from "@/web/hooks/collections/use-ai-providers"; + +interface LaptopCliModelSelectorProps { + modelSet: AgentModelSet; + selectedModelId: string | null; + onSelect: (model: AiProviderModel) => void; +} + +const TIER_ROWS: Array<{ tier: ChatTier; description: string }> = [ + { tier: "fast", description: "Quicker responses" }, + { tier: "smart", description: "Balanced quality" }, + { tier: "thinking", description: "Deeper reasoning" }, +]; + +export function LaptopCliModelSelectorBody({ + modelSet, + selectedModelId, + onSelect, +}: LaptopCliModelSelectorProps) { + const lookup = Object.fromEntries(modelSet.models.map((m) => [m.modelId, m])); + + return ( +
+ {TIER_ROWS.map(({ tier, description }) => { + const entry = modelSet.tiers[tier]; + if (!entry.modelId) return null; + const model = lookup[entry.modelId]; + if (!model) return null; + const isSelected = selectedModelId === entry.modelId; + return ( + + ); + })} +
+ ); +} diff --git a/apps/mesh/src/web/components/chat/select-model/shared.tsx b/apps/mesh/src/web/components/chat/select-model/shared.tsx new file mode 100644 index 0000000000..f7a5fd4965 --- /dev/null +++ b/apps/mesh/src/web/components/chat/select-model/shared.tsx @@ -0,0 +1,493 @@ +import { Skeleton } from "@deco/ui/components/skeleton.tsx"; +import { cn } from "@deco/ui/lib/utils.ts"; +import { + AlignLeft, + ChevronDown, + Image01, + ImagePlus, + SearchMd, + Stars01, + Tool01, +} from "@untitledui/icons"; +import { type ReactNode } from "react"; +import { type AiProviderModel } from "../../../hooks/collections/use-ai-providers"; +import { getProviderLogo } from "@/web/utils/ai-providers-logos"; + +export type { AiProviderModel } from "../../../hooks/collections/use-ai-providers"; + +export function parseModelTitle(model: { title: string; modelId: string }): { + provider: string; + displayName: string; +} { + const hasPrefix = model.title.includes(": "); + return { + provider: hasPrefix + ? (model.title.split(": ")[0] ?? "") + : (model.modelId.split("/")[0] ?? ""), + displayName: hasPrefix + ? model.title.split(": ").slice(1).join(": ") + : model.title, + }; +} + +// ============================================================================ +// Contextual annotations (absolute thresholds, not relative to model list) +// ============================================================================ + +// 1–4 context level for dot indicator (absolute thresholds) +function getContextLevel(tokens: number): { + level: number; + label: string; + description: string; +} { + if (tokens < 32_000) { + return { level: 1, label: "Small", description: "Short conversations" }; + } + if (tokens < 200_000) { + return { level: 2, label: "Medium", description: "Good for most tasks" }; + } + if (tokens < 500_000) { + return { + level: 3, + label: "Large", + description: "Long projects & research", + }; + } + return { level: 4, label: "Very large", description: "Massive files & data" }; +} + +// Semantic colors per level — context (more = better: destructive→success) +const CONTEXT_DOT_COLORS = [ + "bg-destructive", + "bg-warning", + "bg-success", + "bg-success", +] as const; + +// Semantic colors per level — cost (more = worse: success→destructive) +const COST_DOLLAR_COLORS = [ + "text-success", + "text-warning", + "text-warning", + "text-destructive", +] as const; + +// Approximate word count for token amounts +function approxWords(tokens: number): string { + const k = Math.round((tokens * 0.75) / 1000); + return k >= 1 ? `~${k}K words` : `~${Math.round(tokens * 0.75)} words`; +} + +// 1–4 cost level (absolute thresholds, input $/1M) +function getCostLevel(inputPerM: number): { + level: number; + label: string; +} { + if (inputPerM < 1) return { level: 1, label: "Cheap" }; + if (inputPerM < 5) return { level: 2, label: "Moderate" }; + if (inputPerM < 15) return { level: 3, label: "High" }; + return { level: 4, label: "Expensive" }; +} + +// ============================================================================ +// UI Components +// ============================================================================ + +const CAPABILITY_CONFIGS: Record = { + text: { icon: , label: "Text" }, + vision: { icon: , label: "Vision" }, + image: { icon: , label: "Image" }, + tools: { icon: , label: "Tools" }, + reasoning: { icon: , label: "Reasoning" }, + "web-search": { + icon: , + label: "Web search", + }, +}; + +function CapabilityBadge({ capability }: { capability: string }) { + const config = CAPABILITY_CONFIGS[capability] ?? { + icon: null, + label: capability.charAt(0).toUpperCase() + capability.slice(1), + }; + + return ( + + {config.icon} + {config.label} + + ); +} + +export function ModelDetailsPanel({ + model, + compact = false, +}: { + model: AiProviderModel | null; + compact?: boolean; +}) { + if (!model) { + return ( +
+ Hover to preview +
+ ); + } + + const inputCostPerM = + model.costs?.input != null ? model.costs.input * 1_000_000 : null; + const outputCostPerM = + model.costs?.output != null ? model.costs.output * 1_000_000 : null; + + const { provider: providerLabel, displayName: modelName } = + parseModelTitle(model); + + if (compact) { + return ( +
+ {model.limits?.contextWindow && ( +
+ Context + + {model.limits.contextWindow.toLocaleString()} tokens + +
+ )} + {inputCostPerM != null && ( +
+ Input + + ${inputCostPerM.toFixed(2)} / 1M + +
+ )} + {outputCostPerM != null && ( +
+ Output + + ${outputCostPerM.toFixed(2)} / 1M + +
+ )} + {model.limits?.maxOutputTokens && ( +
+ Output limit + + {model.limits.maxOutputTokens.toLocaleString()} tokens + +
+ )} +
+ ); + } + + return ( +
+ {/* Header */} +
+ + {providerLabel} + +
+ {model.logo && ( + {model.title} + )} +

+ {modelName} +

+
+

+ {model.modelId} +

+
+ + {/* Capabilities */} + {model.capabilities && model.capabilities.length > 0 && ( +
+ {[...new Set(model.capabilities)].map((capability) => ( + + ))} +
+ )} + + {/* Stats */} +
+ {model.limits?.contextWindow && + (() => { + const { level, label, description } = getContextLevel( + model.limits.contextWindow, + ); + return ( +
+ + Context window + +
+
+ {[1, 2, 3, 4].map((i) => ( +
+ ))} +
+ + {label} + + + — {description} + +
+ + {model.limits.contextWindow.toLocaleString()} tokens + +
+ ); + })()} + + {model.limits?.maxOutputTokens && ( +
+ + Output limit + +
+ + {model.limits.maxOutputTokens.toLocaleString()} tokens + + + {approxWords(model.limits.maxOutputTokens)} + +
+
+ )} + + {(inputCostPerM != null || outputCostPerM != null) && + (() => { + const { level, label } = + inputCostPerM != null + ? getCostLevel(inputCostPerM) + : { level: 0, label: "" }; + return ( +
+ + Pricing + + {inputCostPerM != null && ( +
+
+ {[1, 2, 3, 4].map((i) => ( + + $ + + ))} +
+ + {label} + +
+ )} +
+ {inputCostPerM != null && ( +
+ + Input + + + ${inputCostPerM.toFixed(2)} / 1M tokens + +
+ )} + {outputCostPerM != null && ( +
+ + Output + + + ${outputCostPerM.toFixed(2)} / 1M tokens + +
+ )} +
+
+ ); + })()} +
+
+ ); +} + +export function SelectedModelDisplay({ + model, + placeholder = "Select model", + isLoading = false, +}: { + model: AiProviderModel | null; + placeholder?: string; + isLoading?: boolean; +}) { + if (isLoading) { + return ( +
+ + +
+ ); + } + + if (!model) { + return ( +
+ {placeholder} + +
+ ); + } + + const { displayName } = parseModelTitle(model); + + const providerLogo = getProviderLogo(model); + + return ( +
+ {model.title} + + {displayName} + + +
+ ); +} + +const FILE_BEARING_CAPABILITIES = [ + "vision", + "image", + "file", + "audio", + "video", +] as const; + +const IMAGE_MIME_TYPES = [ + "image/png", + "image/jpeg", + "image/gif", + "image/webp", +] as const; + +/** + * MIME types that no model handles natively but are usable end-to-end + * via sandbox skills: the model invokes `copy_to_sandbox` to bring the + * file in, then runs the matching skill (e.g. pptx-extract) to get + * text/images it can reason over. Allowed whenever the model has any + * file-bearing capability — text output is universal and thumbnail + * images need vision, both already covered by the existing checks. + */ +const SKILL_HANDLED_MIME_TYPES = [ + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", +] as const; + +export function modelSupportsFiles( + selectedModel: AiProviderModel | null | undefined, +): boolean { + const caps = selectedModel?.capabilities; + if (!caps) return false; + return FILE_BEARING_CAPABILITIES.some((c) => caps.includes(c)); +} + +export function isFileTypeSupportedByModel( + mimeType: string, + selectedModel: AiProviderModel | null | undefined, +): boolean { + if (!mimeType) return false; + if (mimeType.startsWith("text/")) return true; + + const caps = selectedModel?.capabilities ?? []; + const hasVision = caps.includes("vision") || caps.includes("image"); + const hasFile = caps.includes("file"); + const hasAudio = caps.includes("audio"); + const hasVideo = caps.includes("video"); + + if (hasVision && IMAGE_MIME_TYPES.includes(mimeType as never)) return true; + if (hasFile && mimeType === "application/pdf") return true; + if (hasAudio && mimeType.startsWith("audio/")) return true; + if (hasVideo && mimeType.startsWith("video/")) return true; + if ( + modelSupportsFiles(selectedModel) && + SKILL_HANDLED_MIME_TYPES.includes(mimeType as never) + ) { + return true; + } + + return false; +} + +export function getAcceptedMimeTypesForModel( + selectedModel: AiProviderModel | null | undefined, +): string { + const caps = selectedModel?.capabilities ?? []; + const accepted: string[] = ["text/*"]; + + if (caps.includes("vision") || caps.includes("image")) { + accepted.push(...IMAGE_MIME_TYPES); + } + if (caps.includes("file")) { + accepted.push("application/pdf"); + } + if (caps.includes("audio")) { + accepted.push("audio/*"); + } + if (caps.includes("video")) { + accepted.push("video/*"); + } + if (modelSupportsFiles(selectedModel)) { + accepted.push(...SKILL_HANDLED_MIME_TYPES); + } + + return accepted.join(","); +} + +export function getSupportedFileTypesLabel( + selectedModel: AiProviderModel | null | undefined, +): string { + const caps = selectedModel?.capabilities ?? []; + const parts: string[] = []; + + if (caps.includes("vision") || caps.includes("image")) parts.push("images"); + if (caps.includes("file")) parts.push("PDFs"); + if (caps.includes("audio")) parts.push("audio"); + if (caps.includes("video")) parts.push("video"); + if (modelSupportsFiles(selectedModel)) parts.push("Office files"); + + if (parts.length === 0) return "text only"; + if (parts.length === 1) return parts[0]!; + if (parts.length === 2) return `${parts[0]} and ${parts[1]}`; + return `${parts.slice(0, -1).join(", ")}, and ${parts.at(-1)}`; +} diff --git a/apps/mesh/src/web/components/chat/side-panel-chat.tsx b/apps/mesh/src/web/components/chat/side-panel-chat.tsx index 82eaba0c53..9a0ebb57a1 100644 --- a/apps/mesh/src/web/components/chat/side-panel-chat.tsx +++ b/apps/mesh/src/web/components/chat/side-panel-chat.tsx @@ -14,9 +14,16 @@ import { Chat } from "./index"; import { useChatStream, useChatPrefs, useChatTask } from "./context"; import { ChatContextPanel } from "./context-panel"; import { wasCreditsEmptyDismissed } from "./credits-empty-state"; -import { BranchPicker } from "../thread/github/branch-picker.tsx"; +import { ThreadPills } from "./pills/thread-pills"; +import type { SandboxProviderKind } from "@decocms/sandbox/provider"; +import type { HarnessId } from "@/harnesses"; +import { + agentHasClonableSource, + hasLocalCliHarness, +} from "@/web/lib/agent-capabilities"; import { useAiProviderKeys } from "@/web/hooks/collections/use-ai-providers"; +import { useCurrentLink } from "@/web/hooks/use-current-link"; import { useDecoCredits } from "@/web/hooks/use-deco-credits"; // ---------- Default sidebar empty state ---------- @@ -25,15 +32,22 @@ function SidebarEmptyState() { const { org } = useProjectContext(); const { selectedVirtualMcp } = useChatPrefs(); const { data: session } = authClient.useSession(); - const { currentBranch, setCurrentTaskBranch } = useChatTask(); + const { activeTask, currentBranch, setCurrentTaskBranch } = useChatTask(); const defaultAgent = getWellKnownDecopilotVirtualMCP(org.id); const displayAgent = selectedVirtualMcp ?? defaultAgent; const fullVm = useVirtualMCP(displayAgent.id); const userId = session?.user?.id ?? ""; + const agentId = displayAgent.id; const githubRepo = fullVm?.metadata?.githubRepo ?? null; - const showBranchPicker = !!githubRepo?.connectionId && !!userId; + const showBranchPicker = + agentHasClonableSource(fullVm?.metadata) && !!userId && !!agentId; + + // Active thread's pinned kind + harness (null on a brand-new thread). + const threadKind = (activeTask?.sandbox_provider_kind ?? + null) as SandboxProviderKind | null; + const threadHarness = (activeTask?.harness_id ?? null) as HarnessId | null; return (
@@ -54,16 +68,19 @@ function SidebarEmptyState() {
{showBranchPicker && (
-
)} @@ -83,8 +100,21 @@ function ChatPanelContent() { const { isChatEmpty } = useChatStream(); const [activePanel, setActivePanel] = useState<"chat" | "context">("chat"); const deco = useDecoCredits(); + const { selectedVirtualMcp } = useChatPrefs(); + const defaultAgent = getWellKnownDecopilotVirtualMCP(org.id); + const displayAgent = selectedVirtualMcp ?? defaultAgent; + const fullVm = useVirtualMCP(displayAgent.id); + const link = useCurrentLink(); + + // Clonable agents (Start Website + GitHub-imported) can route through + // a laptop CLI harness when one is online, so the no-provider gate + // only fires for them if neither a cloud provider nor a local CLI is + // available. + const isClonableAgent = agentHasClonableSource(fullVm?.metadata); + const showProviderEmptyState = + allKeys.length === 0 && !(isClonableAgent && hasLocalCliHarness(link)); - if (allKeys.length === 0) { + if (showProviderEmptyState) { return ( diff --git a/apps/mesh/src/web/components/chat/store/thread-connection.ts b/apps/mesh/src/web/components/chat/store/thread-connection.ts index 7f0c2dfe0c..76ac6dd5be 100644 --- a/apps/mesh/src/web/components/chat/store/thread-connection.ts +++ b/apps/mesh/src/web/components/chat/store/thread-connection.ts @@ -50,6 +50,8 @@ import { Store } from "./store-primitive"; import { extractToolErrorMessage } from "./mcp-utils"; import type { ChatMode } from "../types"; import { toast } from "sonner"; +import type { SandboxProviderKind } from "@decocms/sandbox/provider"; +import type { HarnessId } from "@/harnesses"; export { Store }; @@ -64,6 +66,12 @@ export interface RequestOptions { agent?: { id: string }; branch?: string | null; thread_id?: string; + /** + * Optional pins sent on first message. The server persists them onto the + * thread row and ignores them on subsequent messages. + */ + sandboxProviderKind?: SandboxProviderKind; + harnessId?: HarnessId; } // ─── Status ────────────────────────────────────────────────────────────────── diff --git a/apps/mesh/src/web/components/chat/task/types.ts b/apps/mesh/src/web/components/chat/task/types.ts index d1b1aaa22d..862467838a 100644 --- a/apps/mesh/src/web/components/chat/task/types.ts +++ b/apps/mesh/src/web/components/chat/task/types.ts @@ -21,6 +21,10 @@ export interface Task { trigger_id?: string | null; /** Git branch associated with this thread, when the vMCP is GitHub-linked. */ branch?: string | null; + /** Sandbox provider kind pinned on first message (e.g. "docker", "freestyle", "remote-user"). */ + sandbox_provider_kind?: string | null; + /** Harness id pinned on first message (e.g. "claude-code", "codex", "decopilot"). */ + harness_id?: string | null; /** Per-thread metadata — layout tabs, expanded tools, etc. Loaded by COLLECTION_THREADS_GET. */ metadata?: ThreadMetadata; } diff --git a/apps/mesh/src/web/components/thread/github/branch-picker.tsx b/apps/mesh/src/web/components/thread/github/branch-picker.tsx index e72d7c9c2b..e7b6e9ca1d 100644 --- a/apps/mesh/src/web/components/thread/github/branch-picker.tsx +++ b/apps/mesh/src/web/components/thread/github/branch-picker.tsx @@ -23,7 +23,8 @@ interface Props { orgId: string; orgSlug: string; userId: string; - connectionId: string; + virtualMcpId: string; + connectionId: string | null; owner: string; repo: string; vmMap: VmMap | undefined; @@ -39,6 +40,10 @@ export function BranchPicker({ orgId, orgSlug, userId, + // virtualMcpId is consumed by callers via Props (e.g. ThreadPills); + // BranchPicker itself doesn't use it directly. Kept on the Props + // contract so v2's pill container can pass it down uniformly. + virtualMcpId: _virtualMcpId, connectionId, owner, repo, diff --git a/apps/mesh/src/web/components/vm/hooks/use-vm-start.ts b/apps/mesh/src/web/components/vm/hooks/use-vm-start.ts index 62a0f68be8..4694533079 100644 --- a/apps/mesh/src/web/components/vm/hooks/use-vm-start.ts +++ b/apps/mesh/src/web/components/vm/hooks/use-vm-start.ts @@ -27,6 +27,12 @@ export interface VmStartArgs { virtualMcpId: string; /** Optional — VM_START generates one when omitted. */ branch?: string; + /** + * Optional explicit sandbox provider kind. When omitted the server picks + * via resolveDefaultSandboxProviderKind (link-online ⇒ remote-user, else + * the env kind). Used by the v2 RunnerPill to materialize a specific kind. + */ + sandboxProviderKind?: "docker" | "agent-sandbox" | "remote-user"; } export interface VmStartResult { @@ -34,7 +40,7 @@ export interface VmStartResult { vmId: string; branch: string; isNewVm: boolean; - runnerKind?: "host" | "docker" | "agent-sandbox"; + sandboxProviderKind?: "docker" | "agent-sandbox" | "remote-user"; } const inflightStarts = new Map>(); diff --git a/apps/mesh/src/web/components/vm/hooks/vm-events-context.tsx b/apps/mesh/src/web/components/vm/hooks/vm-events-context.tsx index 9c33c7e6a3..c564808a7a 100644 --- a/apps/mesh/src/web/components/vm/hooks/vm-events-context.tsx +++ b/apps/mesh/src/web/components/vm/hooks/vm-events-context.tsx @@ -34,7 +34,7 @@ import { useProjectContext } from "@decocms/mesh-sdk"; import type { ClaimFailureReason, ClaimPhase, -} from "@decocms/sandbox/runner/agent-sandbox"; +} from "@decocms/sandbox/provider/agent-sandbox"; export type { ClaimFailureReason, ClaimPhase }; diff --git a/apps/mesh/src/web/components/vm/preview/preview.tsx b/apps/mesh/src/web/components/vm/preview/preview.tsx index d2a95b55a3..f28160409b 100644 --- a/apps/mesh/src/web/components/vm/preview/preview.tsx +++ b/apps/mesh/src/web/components/vm/preview/preview.tsx @@ -6,6 +6,7 @@ import { useMCPClient, useProjectContext, SELF_MCP_ALIAS_ID, + parseBranchMap, } from "@decocms/mesh-sdk"; import { @@ -134,11 +135,19 @@ export function PreviewContent() { // Current iframe path (for sections editor) const [currentPath, setCurrentPath] = useState("/"); - // vmMap[userId][branch] -> { vmId, previewUrl, runnerKind? } + // vmMap[userId][branch][sandboxProviderKind] -> { vmId, previewUrl, ... } + // Use parseBranchMap to handle both legacy 2-level and current 3-level shapes. + // For the preview surface we pick the first non-remote-user entry (cloud VMs + // have an accessible previewUrl), falling back to the first entry of any kind. + // There is typically only one entry per branch in normal usage. const userId = session?.user?.id; const metadata = inset?.entity?.metadata; + const branchMap = + userId && branch ? parseBranchMap(metadata?.vmMap?.[userId]?.[branch]) : {}; + const branchMapEntries = Object.values(branchMap); const vmEntry = - userId && branch ? metadata?.vmMap?.[userId]?.[branch] : undefined; + branchMapEntries.find((e) => e.sandboxProviderKind !== "remote-user") ?? + branchMapEntries[0]; const previewUrl = vmEntry?.previewUrl ?? null; const virtualMcpId = inset?.entity?.id ?? null; @@ -354,16 +363,24 @@ export function PreviewContent() { const openDrawer = () => handleDrawerOpenChange(true); // Stop / restart. VM_DELETE is best-effort; the vmMap query refetch is - // what actually flips the UI to idle. + // what actually flips the UI to idle. VM_DELETE requires the kind because + // vmMap is keyed by (user, branch, kind); we delete whichever sibling the + // preview surface is currently displaying. const handleStop = async () => { if (!virtualMcpId) return; const branchToStop = branch; if (!branchToStop) return; + const kindToStop = vmEntry?.sandboxProviderKind; + if (!kindToStop) return; vmUserStop.mark(virtualMcpId, branchToStop); try { await mcpClient.callTool({ name: "VM_DELETE", - arguments: { virtualMcpId, branch: branchToStop }, + arguments: { + virtualMcpId, + branch: branchToStop, + sandboxProviderKind: kindToStop, + }, }); } catch { // Best effort diff --git a/apps/mesh/src/web/hooks/use-current-link.ts b/apps/mesh/src/web/hooks/use-current-link.ts new file mode 100644 index 0000000000..9c7abfbe7d --- /dev/null +++ b/apps/mesh/src/web/hooks/use-current-link.ts @@ -0,0 +1,43 @@ +import { useQuery } from "@tanstack/react-query"; +import type { Capability } from "@/links/protocol"; +import { + SELF_MCP_ALIAS_ID, + useMCPClient, + useProjectContext, +} from "@decocms/mesh-sdk"; +import { KEYS } from "@/web/lib/query-keys"; +import { unwrapToolResult } from "@/web/lib/unwrap-tool-result"; + +export interface CurrentLink { + online: boolean; + machineId?: string; + cliVersion?: string; + capabilities: Capability[]; +} + +const OFFLINE: CurrentLink = { online: false, capabilities: [] }; + +export function useCurrentLink(): CurrentLink { + const { org } = useProjectContext(); + const client = useMCPClient({ + connectionId: SELF_MCP_ALIAS_ID, + orgId: org.id, + orgSlug: org.slug, + }); + + const { data } = useQuery({ + queryKey: KEYS.currentLink(org.id), + queryFn: async () => { + const result = await client.callTool({ + name: "LINK_CURRENT_GET", + arguments: {}, + }); + return unwrapToolResult(result) ?? OFFLINE; + }, + staleTime: 10_000, + refetchInterval: 15_000, + refetchOnWindowFocus: true, + }); + + return data ?? OFFLINE; +} diff --git a/apps/mesh/src/web/index.tsx b/apps/mesh/src/web/index.tsx index caaadd9876..8d541526b8 100644 --- a/apps/mesh/src/web/index.tsx +++ b/apps/mesh/src/web/index.tsx @@ -224,6 +224,10 @@ const unifiedChatSearchSchema = z.object({ mainOpen: z.number().optional(), chat: z.number().optional(), autosend: z.string().optional(), + /** Carried from the homepage composer so the new thread's first send + * inherits the "Run locally" toggle state. ChatPrefsProvider seeds + * runLocally from this on mount. */ + runLocally: z.string().optional(), }); const unifiedChatRoute = createRoute({ diff --git a/apps/mesh/src/web/layouts/agent-shell-layout/index.tsx b/apps/mesh/src/web/layouts/agent-shell-layout/index.tsx index f7d2d20067..74b64fa37d 100644 --- a/apps/mesh/src/web/layouts/agent-shell-layout/index.tsx +++ b/apps/mesh/src/web/layouts/agent-shell-layout/index.tsx @@ -60,8 +60,9 @@ import { useMCPClient, useProjectContext, useVirtualMCP, + parseBranchMap, } from "@decocms/mesh-sdk"; -import type { VirtualMCPEntity } from "@decocms/mesh-sdk/types"; +import type { VirtualMCPEntity, VmMap } from "@decocms/mesh-sdk/types"; import { useNavigate, useParams, useSearch } from "@tanstack/react-router"; import { useVmStart } from "@/web/components/vm/hooks/use-vm-start"; import { useStatusSounds } from "../../hooks/use-status-sounds"; @@ -78,7 +79,6 @@ import { MainPanelContent } from "@/web/layouts/main-panel-tabs"; import { MainPanelTabsBar } from "@/web/layouts/main-panel-tabs/main-panel-tabs-bar"; import { VirtualMcpHeaderInfo } from "../../views/virtual-mcp/header-info.tsx"; import { VmEventsProvider } from "@/web/components/vm/hooks/vm-events-context.tsx"; -import type { VmMapEntry } from "@decocms/mesh-sdk"; import { useEnsureTask } from "@/web/hooks/use-ensure-task"; // --------------------------------------------------------------------------- @@ -204,7 +204,7 @@ function VmEventsBridge({ }: { virtualMcpId: string; hasActiveGithubRepo: boolean; - vmMap: Record> | undefined; + vmMap: VmMap | undefined; children: ReactNode; }) { const { org } = useProjectContext(); @@ -212,10 +212,12 @@ function VmEventsBridge({ const { data: session } = authClient.useSession(); const userId = session?.user?.id; - // Auto-start the VM when the active task points at a branch without a - // registered vmMap entry. Routed through useVmStart so concurrent mounts - // (preview, env, this bridge) for the same (virtualMcpId, branch) collapse - // onto one in-flight upstream call. + // Auto-start the VM when the active task points at a branch without any + // registered vmMap entry (regardless of kind). Routed through useVmStart so + // concurrent mounts (preview, env, this bridge) for the same + // (virtualMcpId, branch) collapse onto one in-flight upstream call. + // The server's resolveDefaultSandboxProviderKind decides the kind when + // sandboxProviderKind is omitted — this is intentional for implicit auto-start. const autoStartClient = useMCPClient({ connectionId: SELF_MCP_ALIAS_ID, orgId: org.id, @@ -231,7 +233,11 @@ function VmEventsBridge({ if (!hasActiveGithubRepo) return; if (!userId) return; if (!currentBranch) return; - if (vmMap?.[userId]?.[currentBranch]) { + // Use parseBranchMap to handle both legacy 2-level and current 3-level shapes. + // If any entry exists for this (user, branch) — regardless of kind — a VM is + // already running; don't auto-start. + const branchMap = parseBranchMap(vmMap?.[userId]?.[currentBranch]); + if (Object.keys(branchMap).length > 0) { // VM is already running — record the branch so a user stop won't // re-trigger auto-start within this mount. autoStartAttemptedRef.current.add(currentBranch); diff --git a/apps/mesh/src/web/lib/agent-capabilities.test.ts b/apps/mesh/src/web/lib/agent-capabilities.test.ts new file mode 100644 index 0000000000..db359f00a9 --- /dev/null +++ b/apps/mesh/src/web/lib/agent-capabilities.test.ts @@ -0,0 +1,94 @@ +import { describe, expect, it } from "bun:test"; +import type { CurrentLink } from "@/web/hooks/use-current-link"; +import { + agentHasClonableSource, + hasLocalCliHarness, +} from "./agent-capabilities"; + +describe("agentHasClonableSource", () => { + it("returns false for null/undefined metadata", () => { + expect(agentHasClonableSource(null)).toBe(false); + expect(agentHasClonableSource(undefined)).toBe(false); + }); + + it("returns false for metadata without githubRepo", () => { + expect(agentHasClonableSource({})).toBe(false); + expect(agentHasClonableSource({ githubRepo: null })).toBe(false); + }); + + it("returns false when githubRepo.url is absent or empty", () => { + expect(agentHasClonableSource({ githubRepo: {} })).toBe(false); + expect(agentHasClonableSource({ githubRepo: { url: "" } })).toBe(false); + }); + + it("returns true for a Start Website agent (no connectionId)", () => { + expect( + agentHasClonableSource({ + githubRepo: { + url: "https://github.com/decocms/webapp-template", + owner: "decocms", + name: "webapp-template", + }, + }), + ).toBe(true); + }); + + it("returns true for a github-imported agent (with connectionId)", () => { + expect( + agentHasClonableSource({ + githubRepo: { + url: "https://github.com/acme/app", + owner: "acme", + name: "app", + connectionId: "conn_abc123", + installationId: 42, + }, + }), + ).toBe(true); + }); + + it("ignores non-object metadata", () => { + expect(agentHasClonableSource("string")).toBe(false); + expect(agentHasClonableSource(42)).toBe(false); + }); +}); + +describe("hasLocalCliHarness", () => { + const link = (overrides: Partial = {}): CurrentLink => ({ + online: false, + capabilities: [], + ...overrides, + }); + + it("returns false when the link is offline", () => { + expect(hasLocalCliHarness(link({ online: false }))).toBe(false); + expect( + hasLocalCliHarness( + link({ online: false, capabilities: ["claude-code"] }), + ), + ).toBe(false); + }); + + it("returns false when online but no CLI harness is reported", () => { + expect(hasLocalCliHarness(link({ online: true }))).toBe(false); + expect( + hasLocalCliHarness( + link({ online: true, capabilities: ["decopilot-sandbox"] }), + ), + ).toBe(false); + }); + + it("returns true when online with claude-code or codex", () => { + expect( + hasLocalCliHarness(link({ online: true, capabilities: ["claude-code"] })), + ).toBe(true); + expect( + hasLocalCliHarness(link({ online: true, capabilities: ["codex"] })), + ).toBe(true); + expect( + hasLocalCliHarness( + link({ online: true, capabilities: ["claude-code", "codex"] }), + ), + ).toBe(true); + }); +}); diff --git a/apps/mesh/src/web/lib/agent-capabilities.ts b/apps/mesh/src/web/lib/agent-capabilities.ts new file mode 100644 index 0000000000..8f348782b2 --- /dev/null +++ b/apps/mesh/src/web/lib/agent-capabilities.ts @@ -0,0 +1,33 @@ +import type { CurrentLink } from "@/web/hooks/use-current-link"; + +/** + * True when the agent has source code we can check out into a per-branch + * sandbox. Both Start Website agents (clone from a public template) and + * GitHub-imported agents (clone the user's repo) populate + * `metadata.githubRepo.url`. Decopilot-only agents have neither, so this + * returns false and they fall back to the cloud Decopilot harness. + * + * Kept loosely-typed (accepts `unknown`) because the metadata field + * isn't centrally schematized — different creators add different keys + * and a strict type wouldn't help here. + */ +export function agentHasClonableSource(metadata: unknown): boolean { + if (typeof metadata !== "object" || metadata === null) return false; + const meta = metadata as { githubRepo?: { url?: unknown } | null }; + const url = meta.githubRepo?.url; + return typeof url === "string" && url.length > 0; +} + +/** + * True when the user's link daemon is online AND exposes at least one + * CLI harness (Claude Code or Codex) that a clonable agent's chat can + * route through. Lets the chat skip the no-provider empty state when + * the user has a local CLI to fall back on. + */ +export function hasLocalCliHarness(link: CurrentLink): boolean { + if (!link.online) return false; + return ( + link.capabilities.includes("claude-code") || + link.capabilities.includes("codex") + ); +} diff --git a/apps/mesh/src/web/lib/query-keys.ts b/apps/mesh/src/web/lib/query-keys.ts index a775b68e22..05166cc086 100644 --- a/apps/mesh/src/web/lib/query-keys.ts +++ b/apps/mesh/src/web/lib/query-keys.ts @@ -318,6 +318,13 @@ export const KEYS = { decofile: (previewUrl: string) => ["decofile", previewUrl] as const, liveMeta: (previewUrl: string) => ["live-meta", previewUrl] as const, + // Link daemon status (user-scoped; the cluster derives the userSub + // from the bearer session, so we don't include it in the key). + linkStatus: () => ["link-status"] as const, + + // Current link info (org-scoped; includes capabilities, machineId, cliVersion). + currentLink: (orgId: string) => ["current-link", orgId] as const, + // GitHub integration githubUserOrgs: (orgId: string, connectionId: string) => ["github-user-orgs", orgId, connectionId] as const, diff --git a/apps/mesh/src/web/views/settings/ai-providers/connect-dialog-state.test.ts b/apps/mesh/src/web/views/settings/ai-providers/connect-dialog-state.test.ts index ba17420cca..1d26ea87c1 100644 --- a/apps/mesh/src/web/views/settings/ai-providers/connect-dialog-state.test.ts +++ b/apps/mesh/src/web/views/settings/ai-providers/connect-dialog-state.test.ts @@ -21,8 +21,6 @@ describe("connect-dialog reducer", () => { { kind: "grid" }, { kind: "form", providerId: "openai", presetId: null }, { kind: "oauth-pending", providerId: "anthropic", stateToken: "x" }, - { kind: "cli-pending", providerId: "claude-code" }, - { kind: "cli-error", providerId: "claude-code", error: "x" }, { kind: "provision-pending", providerId: "deco" }, { kind: "provision-error", providerId: "deco", error: "x" }, ]; @@ -36,8 +34,6 @@ describe("connect-dialog reducer", () => { { kind: "grid" }, { kind: "form", providerId: "openai", presetId: null }, { kind: "oauth-pending", providerId: "anthropic", stateToken: "abc" }, - { kind: "cli-pending", providerId: "claude-code" }, - { kind: "cli-error", providerId: "claude-code", error: "no cli" }, { kind: "provision-pending", providerId: "deco" }, { kind: "provision-error", providerId: "deco", error: "boom" }, ]; @@ -89,37 +85,6 @@ describe("connect-dialog reducer", () => { }); }); - test("select-cli transitions grid → cli-pending", () => { - expect( - reducer( - { kind: "grid" }, - { type: "select-cli", providerId: "claude-code" }, - ), - ).toEqual({ kind: "cli-pending", providerId: "claude-code" }); - }); - - test("cli-error transitions cli-pending → cli-error", () => { - expect( - reducer( - { kind: "cli-pending", providerId: "claude-code" }, - { type: "cli-error", error: "CLI not signed in" }, - ), - ).toEqual({ - kind: "cli-error", - providerId: "claude-code", - error: "CLI not signed in", - }); - }); - - test("retry-cli transitions cli-error → cli-pending", () => { - expect( - reducer( - { kind: "cli-error", providerId: "claude-code", error: "x" }, - { type: "retry-cli" }, - ), - ).toEqual({ kind: "cli-pending", providerId: "claude-code" }); - }); - test("select-provision transitions grid → provision-pending", () => { expect( reducer( @@ -155,8 +120,6 @@ describe("connect-dialog reducer", () => { const intermediate: DialogState[] = [ { kind: "form", providerId: "openai", presetId: null }, { kind: "oauth-pending", providerId: "anthropic", stateToken: "x" }, - { kind: "cli-pending", providerId: "claude-code" }, - { kind: "cli-error", providerId: "claude-code", error: "x" }, { kind: "provision-pending", providerId: "deco" }, { kind: "provision-error", providerId: "deco", error: "x" }, ]; @@ -189,9 +152,6 @@ describe("connect-dialog reducer", () => { providerId: "openai", presetId: null, }; - expect( - reducer(s, { type: "select-cli", providerId: "claude-code" }), - ).toEqual(s); expect( reducer(s, { type: "select-provision", providerId: "deco" }), ).toEqual(s); diff --git a/apps/mesh/src/web/views/settings/ai-providers/connect-dialog-state.ts b/apps/mesh/src/web/views/settings/ai-providers/connect-dialog-state.ts index 9e764f1b04..0396a4dff0 100644 --- a/apps/mesh/src/web/views/settings/ai-providers/connect-dialog-state.ts +++ b/apps/mesh/src/web/views/settings/ai-providers/connect-dialog-state.ts @@ -3,8 +3,6 @@ export type DialogState = | { kind: "grid" } | { kind: "form"; providerId: string; presetId: string | null } | { kind: "oauth-pending"; providerId: string; stateToken: string } - | { kind: "cli-pending"; providerId: string } - | { kind: "cli-error"; providerId: string; error: string } | { kind: "provision-pending"; providerId: string } | { kind: "provision-error"; providerId: string; error: string }; @@ -14,11 +12,8 @@ export type DialogAction = | { type: "back" } | { type: "select-form"; providerId: string; presetId: string | null } | { type: "select-oauth"; providerId: string; stateToken: string } - | { type: "select-cli"; providerId: string } | { type: "select-provision"; providerId: string } | { type: "oauth-failed" } - | { type: "cli-error"; error: string } - | { type: "retry-cli" } | { type: "provision-error"; error: string } | { type: "retry-provision" }; @@ -34,8 +29,6 @@ export function reducer(state: DialogState, action: DialogAction): DialogState { switch (state.kind) { case "form": case "oauth-pending": - case "cli-pending": - case "cli-error": case "provision-pending": case "provision-error": return { kind: "grid" }; @@ -56,21 +49,8 @@ export function reducer(state: DialogState, action: DialogAction): DialogState { providerId: action.providerId, stateToken: action.stateToken, }; - case "select-cli": - if (state.kind !== "grid") return state; - return { kind: "cli-pending", providerId: action.providerId }; case "oauth-failed": return state.kind === "oauth-pending" ? { kind: "grid" } : state; - case "cli-error": - if (state.kind !== "cli-pending") return state; - return { - kind: "cli-error", - providerId: state.providerId, - error: action.error, - }; - case "retry-cli": - if (state.kind !== "cli-error") return state; - return { kind: "cli-pending", providerId: state.providerId }; case "select-provision": if (state.kind !== "grid") return state; return { kind: "provision-pending", providerId: action.providerId }; diff --git a/apps/mesh/src/web/views/settings/ai-providers/connect-provider-dialog.tsx b/apps/mesh/src/web/views/settings/ai-providers/connect-provider-dialog.tsx index 953c0c0983..046e98f21d 100644 --- a/apps/mesh/src/web/views/settings/ai-providers/connect-provider-dialog.tsx +++ b/apps/mesh/src/web/views/settings/ai-providers/connect-provider-dialog.tsx @@ -42,8 +42,6 @@ function activeProviderId(state: DialogState): string | null { switch (state.kind) { case "form": case "oauth-pending": - case "cli-pending": - case "cli-error": case "provision-pending": case "provision-error": return state.providerId; @@ -128,48 +126,6 @@ export function ConnectProviderDialog({ }, }); - const { mutate: activateCli } = useMutation({ - mutationFn: async (providerId: string) => { - const result = (await client.callTool({ - name: "AI_PROVIDER_CLI_ACTIVATE", - arguments: { providerId }, - })) as { - structuredContent?: { activated: boolean; error?: string }; - isError?: boolean; - content?: { text?: string }[]; - }; - if (result?.isError) { - throw new Error(result.content?.[0]?.text ?? "CLI activation failed"); - } - return { providerId, ...result.structuredContent }; - }, - onSuccess: (data) => { - if (!data?.activated) { - track("ai_provider_cli_activate_failed", { - provider_id: data.providerId, - error: data?.error ?? "unknown", - }); - dispatch({ - type: "cli-error", - error: data?.error ?? "CLI activation failed", - }); - return; - } - track("ai_provider_cli_activated", { provider_id: data.providerId }); - invalidateKeys(); - const provider = providers.find((p) => p.id === data.providerId); - toast.success(`${provider?.name ?? "Provider"} activated`); - close(); - }, - onError: (err, providerId) => { - track("ai_provider_cli_activate_failed", { - provider_id: providerId, - error: err.message, - }); - dispatch({ type: "cli-error", error: err.message }); - }, - }); - const { mutate: provisionKey } = useMutation({ mutationFn: async (providerId: string) => { const result = (await client.callTool({ @@ -204,7 +160,6 @@ export function ConnectProviderDialog({ ? (selection.preset?.id ?? null) : null; const supportsOAuth = provider.supportedMethods.includes("oauth-pkce"); - const supportsCli = provider.supportedMethods.includes("cli-activate"); const supportsApiKey = provider.supportedMethods.includes("api-key"); const supportsProvision = provider.supportsProvision === true; @@ -218,16 +173,6 @@ export function ConnectProviderDialog({ return; } - if (supportsCli) { - track("ai_provider_connect_clicked", { - provider_id: provider.id, - method: "cli-activate", - }); - dispatch({ type: "select-cli", providerId: provider.id }); - activateCli(provider.id); - return; - } - if (supportsOAuth) { track("ai_provider_connect_clicked", { provider_id: provider.id, @@ -429,17 +374,14 @@ export function ConnectProviderDialog({
)} - {(state.kind === "cli-pending" || - state.kind === "provision-pending") && ( + {state.kind === "provision-pending" && (
-

- {state.kind === "cli-pending" ? "Checking CLI…" : "Connecting…"} -

+

Connecting…

)} - {(state.kind === "cli-error" || state.kind === "provision-error") && ( + {state.kind === "provision-error" && (

{state.error}

@@ -454,13 +396,8 @@ export function ConnectProviderDialog({ - )} + + ); + })} +
+ ); +} +``` + +- [ ] **Step 4: Run tests to confirm passing** + +```bash +bun test apps/mesh/src/web/components/chat/select-model/agent-section.test.tsx +``` + +Expected: all 5 tests PASS. + +- [ ] **Step 5: Format and commit** + +```bash +bun run fmt +git add apps/mesh/src/web/components/chat/select-model/agent-section.tsx apps/mesh/src/web/components/chat/select-model/agent-section.test.tsx +git commit -m "feat(chat): add AgentSection component for merged model selector + +Renders one section in the new sectioned popover — header with the +agent title (plus ' · on this laptop' suffix for CLI agents), three +tier rows with descriptions, and the existing 'On' indicator. Local +sections sit on a faint bg-success/5 band; disabled sections render +opacity-40 + pointer-events-none + a small lock icon." +``` + +--- + +### Task 4: New `AgentModelPopover` shell + +**Files:** +- Create: `apps/mesh/src/web/components/chat/agent-model-popover.tsx` +- Create: `apps/mesh/src/web/components/chat/agent-model-popover.test.tsx` + +- [ ] **Step 1: Write the failing tests** + +Create `apps/mesh/src/web/components/chat/agent-model-popover.test.tsx`: + +```tsx +import { describe, expect, test, mock } from "bun:test"; +import { render } from "@testing-library/react"; +import "@testing-library/jest-dom"; +import { AgentModelPopover } from "./agent-model-popover"; +import { getAgentSections } from "./select-model/agent-models"; + +const ALL = getAgentSections({ + hasAnyKey: true, + link: { online: true, capabilities: ["claude-code", "codex"] }, +}); + +describe("AgentModelPopover", () => { + test("renders one AgentSection per item", () => { + const { getAllByTestId } = render( + {}} + />, + ); + expect(getAllByTestId("agent-section")).toHaveLength(3); + }); + + test("when lockedAgent is set, only the matching section is enabled", () => { + const { getAllByTestId } = render( + {}} + />, + ); + const sections = getAllByTestId("agent-section"); + const disabled = sections.filter( + (s) => s.getAttribute("aria-disabled") === "true", + ); + expect(disabled).toHaveLength(2); + }); + + test("row click in a section calls onSelect with (kind, tier)", () => { + const onSelect = mock( + (_k: "decopilot" | "claude-code" | "codex", _t: "fast" | "smart" | "thinking") => {}, + ); + const { getByText } = render( + , + ); + getByText("Haiku").click(); + expect(onSelect).toHaveBeenCalledWith("claude-code", "fast"); + }); + + test("locked non-active section does NOT call onSelect when its rows are clicked", () => { + const onSelect = mock(() => {}); + const { getByText } = render( + , + ); + // Fast row inside the locked Decopilot section + getByText("Fast").click(); + expect(onSelect).not.toHaveBeenCalled(); + }); +}); +``` + +- [ ] **Step 2: Confirm failure** + +```bash +bun test apps/mesh/src/web/components/chat/agent-model-popover.test.tsx +``` + +Expected: FAIL — module missing. + +- [ ] **Step 3: Implement the popover** + +Create `apps/mesh/src/web/components/chat/agent-model-popover.tsx`: + +```tsx +import type { ChatTier } from "@/tools/organization/schema"; +import { AgentSection } from "./select-model/agent-section"; +import type { + AgentKind, + AgentSection as AgentSectionData, +} from "./select-model/agent-models"; + +interface Props { + sections: AgentSectionData[]; + activeAgent: AgentKind | null; + activeTier: ChatTier; + /** When non-null, only the section matching this kind is interactive; + * the others render opacity-40 + pointer-events-none. */ + lockedAgent: AgentKind | null; + onSelect: (agent: AgentKind, tier: ChatTier) => void; +} + +export function AgentModelPopover({ + sections, + activeAgent, + activeTier, + lockedAgent, + onSelect, +}: Props) { + return ( +
+ {sections.map((section) => { + const disabled = + lockedAgent !== null && lockedAgent !== section.kind; + const selectedTier = + activeAgent === section.kind ? activeTier : null; + return ( + onSelect(section.kind, tier)} + /> + ); + })} +
+ ); +} +``` + +- [ ] **Step 4: Run tests** + +```bash +bun test apps/mesh/src/web/components/chat/agent-model-popover.test.tsx +``` + +Expected: all 4 tests PASS. + +- [ ] **Step 5: Format and commit** + +```bash +bun run fmt +git add apps/mesh/src/web/components/chat/agent-model-popover.tsx apps/mesh/src/web/components/chat/agent-model-popover.test.tsx +git commit -m "feat(chat): add AgentModelPopover shell for merged selector + +Composes AgentSection rows from a getAgentSections result. Handles +lock semantics — when lockedAgent is set, only the matching section +is interactive; the others render disabled. Row click fires onSelect +with (kind, tier) and the locking is verified by tests." +``` + +--- + +### Task 5: Rewrite `AgentModelTrigger` to use the new popover (+ fix gap bug + green styling) + +**Files:** +- Modify: `apps/mesh/src/web/components/chat/agent-model-trigger.tsx` +- Create: `apps/mesh/src/web/components/chat/agent-model-trigger.test.tsx` + +This task replaces the existing trigger entirely. The new trigger pulls its data (`hasAnyKey`, `link`) from hooks so callers don't need to thread eight props through. It receives `currentBranch` and `virtualMcpId` so it can fire the eager VM start that today lives in `ThreadPills`. + +- [ ] **Step 1: Write the failing tests** + +Create `apps/mesh/src/web/components/chat/agent-model-trigger.test.tsx`: + +```tsx +import { describe, expect, test } from "bun:test"; +import { render } from "@testing-library/react"; +import "@testing-library/jest-dom"; +import { AgentModelTriggerPure } from "./agent-model-trigger"; +import { getAgentSections } from "./select-model/agent-models"; + +const ALL = getAgentSections({ + hasAnyKey: true, + link: { online: true, capabilities: ["claude-code", "codex"] }, +}); + +describe("AgentModelTriggerPure", () => { + test("closed pill is neutral when active agent is Decopilot", () => { + const { container } = render( + {}} + />, + ); + const button = container.querySelector("button"); + expect(button?.className).not.toMatch(/text-success/); + expect(button?.className).not.toMatch(/bg-success\/10/); + }); + + test("closed pill gets text-success and bg-success/10 when CLI agent active", () => { + const { container } = render( + {}} + />, + ); + const button = container.querySelector("button"); + expect(button?.className).toMatch(/text-success/); + expect(button?.className).toMatch(/bg-success\/10/); + }); + + test("closed pill uses responsive gap so collapsed label doesn't leave phantom gap", () => { + const { container } = render( + {}} + />, + ); + const button = container.querySelector("button"); + expect(button?.className).toMatch(/\bgap-0\b/); + expect(button?.className).toMatch(/@\[496px\]\/chat-bottom:gap-1\.5/); + }); + + test("label reflects the active CLI tier model label (Opus)", () => { + const { getByText } = render( + {}} + />, + ); + expect(getByText("Opus")).toBeInTheDocument(); + }); + + test("label reflects the active Decopilot tier label (Smart)", () => { + const { getByText } = render( + {}} + />, + ); + expect(getByText("Smart")).toBeInTheDocument(); + }); +}); +``` + +- [ ] **Step 2: Confirm failure** + +```bash +bun test apps/mesh/src/web/components/chat/agent-model-trigger.test.tsx +``` + +Expected: FAIL — `AgentModelTriggerPure` doesn't exist yet. + +- [ ] **Step 3: Rewrite `agent-model-trigger.tsx`** + +Replace the entire contents of `apps/mesh/src/web/components/chat/agent-model-trigger.tsx` with: + +```tsx +import { Button } from "@deco/ui/components/button.tsx"; +import { + Popover, + PopoverContent, + PopoverTrigger, +} from "@deco/ui/components/popover.tsx"; +import { cn } from "@deco/ui/lib/utils.ts"; +import { useState } from "react"; +import type { HarnessId } from "@/harnesses"; +import type { SandboxProviderKind } from "@decocms/sandbox/provider"; +import type { ChatTier } from "@/tools/organization/schema"; +import { + SELF_MCP_ALIAS_ID, + useMCPClient, + useProjectContext, +} from "@decocms/mesh-sdk"; +import { track } from "@/web/lib/posthog-client"; +import { useAiProviderKeys } from "@/web/hooks/collections/use-ai-providers"; +import { useCurrentLink } from "@/web/hooks/use-current-link"; +import { useVmStart } from "@/web/components/vm/hooks/use-vm-start"; +import { useChatPrefs } from "./context"; +import { AgentModelPopover } from "./agent-model-popover"; +import { + type AgentKind, + type AgentSection, + getAgentSections, +} from "./select-model/agent-models"; + +interface Props { + agent: HarnessId | null; + sandboxKind: SandboxProviderKind | null; + tier: ChatTier; + /** Set when the user is on a branch — needed for the eager VM-start + * when the user picks a CLI agent. `null` when no branch is selected + * (no eager start). */ + currentBranch: string | null; + virtualMcpId: string; + /** Tier-only setter — kept for callers that want to swap tier without + * also potentially flipping agents (the popover handles agent + + * tier itself via `setPendingAgentOption`). */ + onSelect: (tier: ChatTier) => void; +} + +/** Maps the popover's AgentKind back to the persisted AgentOption. */ +function optionForAgent(kind: AgentKind) { + switch (kind) { + case "decopilot": + return "decopilot" as const; + case "claude-code": + return "claude-code-laptop" as const; + case "codex": + return "codex-laptop" as const; + } +} + +function agentKindFromHarness( + agent: HarnessId | null, + sandboxKind: SandboxProviderKind | null, +): AgentKind | null { + if (agent === "claude-code" && sandboxKind === "remote-user") + return "claude-code"; + if (agent === "codex" && sandboxKind === "remote-user") return "codex"; + if (agent === "decopilot") return "decopilot"; + return null; +} + +/** + * Trigger pill in the chat input that opens the merged sectioned + * popover (Decopilot + Claude Code + Codex). When the active agent is + * a laptop-CLI variant the pill turns `text-success` + `bg-success/10` + * to mirror the "Desktop connected" affordance in + * `NoAiProviderEmptyState`. The popover handles agent + tier writes + * atomically. + */ +export function AgentModelTrigger({ + agent, + sandboxKind, + tier, + currentBranch, + virtualMcpId, + onSelect, +}: Props) { + const keys = useAiProviderKeys(); + const link = useCurrentLink(); + const { setPendingAgentOption } = useChatPrefs(); + const { org } = useProjectContext(); + const mcpClient = useMCPClient({ + connectionId: SELF_MCP_ALIAS_ID, + orgId: org.id, + orgSlug: org.slug, + }); + const startVm = useVmStart(mcpClient); + + const sections = getAgentSections({ + hasAnyKey: keys.length > 0, + link, + }); + + const activeAgent = agentKindFromHarness(agent, sandboxKind); + + const handleSelect = (kind: AgentKind, nextTier: ChatTier) => { + const opt = optionForAgent(kind); + setPendingAgentOption(opt); + onSelect(nextTier); + if (kind !== "decopilot" && currentBranch) { + startVm.mutate({ + virtualMcpId, + branch: currentBranch, + sandboxProviderKind: "remote-user" as const, + }); + } + track("agent_model_selected", { agent: kind, tier: nextTier }); + }; + + return ( + + ); +} + +interface PureProps { + sections: AgentSection[]; + activeAgent: AgentKind | null; + activeTier: ChatTier; + lockedAgent: AgentKind | null; + onSelect: (kind: AgentKind, tier: ChatTier) => void; +} + +/** + * Stateless variant for tests. Renders the closed pill + popover — + * does not touch hooks or chat prefs. Keeps `AgentModelTrigger` + * thin so test cases don't have to mock the entire chat context. + */ +export function AgentModelTriggerPure({ + sections, + activeAgent, + activeTier, + lockedAgent, + onSelect, +}: PureProps) { + const [open, setOpen] = useState(false); + + const section = + sections.find((s) => s.kind === activeAgent) ?? sections[0] ?? null; + const tierEntry = section?.tiers[activeTier]; + + const isLocalActive = section?.isLocal ?? false; + const label = tierEntry?.label ?? ""; + + // Closed pill — collapses label at narrow widths; `gap-0` on the + // outer + `@[496px]/chat-bottom:gap-1.5` keeps the icon + chevron + // flush when the label is hidden. + const baseClasses = + "gap-0 @[496px]/chat-bottom:gap-1.5 text-muted-foreground hover:text-foreground"; + const localActiveClasses = isLocalActive + ? "text-success bg-success/10 hover:text-success" + : ""; + + if (!section || !tierEntry) { + return null; + } + + return ( + + + + + + { + onSelect(kind, t); + setOpen(false); + }} + /> + + + ); +} +``` + +- [ ] **Step 4: Run tests to confirm pass** + +```bash +bun test apps/mesh/src/web/components/chat/agent-model-trigger.test.tsx +``` + +Expected: all 5 tests PASS. + +- [ ] **Step 5: Run wider type check** + +```bash +bun run check 2>&1 | rg "agent-model-trigger" || echo "trigger clean" +``` + +Expected: `trigger clean`. + +- [ ] **Step 6: Format and commit** + +```bash +bun run fmt +git add apps/mesh/src/web/components/chat/agent-model-trigger.tsx apps/mesh/src/web/components/chat/agent-model-trigger.test.tsx +git commit -m "feat(chat): merge agent picker into AgentModelTrigger + +The trigger now opens the new sectioned AgentModelPopover (Decopilot, +Claude Code, Codex) instead of the old SimpleModeTierDropdown / CLI +tier list split. Closed pill goes text-success + bg-success/10 when +the active agent is a CLI variant, matching the 'Desktop connected' +green elsewhere. Picks both agent (via setPendingAgentOption) and +tier in a single click and fires the eager VM start for CLI agents +when a branch is set. Also fixes the phantom 6px gap that appeared +when the label collapses at narrow container widths." +``` + +--- + +### Task 6: Update the `input.tsx` call site to pass `currentBranch` and `virtualMcpId` + +**Files:** +- Modify: `apps/mesh/src/web/components/chat/input.tsx` (the existing AgentModelTrigger mount around lines 597–602) + +- [ ] **Step 1: Inspect the existing mount and surrounding props** + +Run: +```bash +rg -n "AgentModelTrigger" apps/mesh/src/web/components/chat/input.tsx +``` + +Expected: +``` +597: +``` + +If `currentBranch` is NOT in scope: +1. Find the parent that renders `` (search for `ChatInput\b` in the parent layout file). +2. Add `currentBranch: string | null` to `ChatInputProps` (or whichever interface the input uses). +3. Thread it through. +4. The same applies to `virtualMcpId` if it's not already in scope. + +- [ ] **Step 4: Type check** + +```bash +bun run check 2>&1 | rg "input.tsx" || echo "input.tsx clean" +``` + +Expected: `input.tsx clean`. + +- [ ] **Step 5: Format and commit** + +```bash +bun run fmt +git add apps/mesh/src/web/components/chat/input.tsx +git commit -m "chore(chat): pass currentBranch + virtualMcpId to AgentModelTrigger + +The merged AgentModelTrigger needs both props so it can fire the eager +VM start that ThreadPills used to own when the user picks a CLI agent. +Plumbs them through the input mount without otherwise touching the +composer layout." +``` + +--- + +### Task 7: Slim `ThreadPills` — drop `AgentPill` and the VM-start coupling + +**Files:** +- Modify: `apps/mesh/src/web/components/chat/pills/thread-pills.tsx` + +- [ ] **Step 1: Replace the file with the slim version** + +Overwrite `apps/mesh/src/web/components/chat/pills/thread-pills.tsx` with: + +```tsx +import type { VmMap } from "@decocms/mesh-sdk"; +import type { HarnessId } from "@/harnesses"; +import type { SandboxProviderKind } from "@decocms/sandbox/provider"; +import { BranchPill } from "./branch-pill"; + +interface Props { + orgId: string; + orgSlug: string; + userId: string; + virtualMcpId: string; + connectionId: string; + owner: string; + repo: string; + vmMap: VmMap | undefined; + currentBranch: string | null; + onBranchChange: (branch: string) => void; + /** Kept in the signature for parity with the previous version even + * though the agent pill is gone — callers still pass them and they + * may be useful again if we revive a thread-level lock indicator. */ + threadKind: SandboxProviderKind | null; + threadHarness: HarnessId | null; +} + +export function ThreadPills({ + orgId, + orgSlug, + userId, + virtualMcpId, + connectionId, + owner, + repo, + vmMap, + currentBranch, + onBranchChange, +}: Props) { + return ( +
+ +
+ ); +} +``` + +**Note** the `locked={false}` on `BranchPill`: today it was `locked={isActive}` (i.e. the BranchPill locked when the thread had messages). That was a property of *the row* — not specific to the AgentPill. Preserve that behavior: + +- [ ] **Step 2: Re-add the `isActive`-based BranchPill lock** + +Edit the function body: + +```tsx +import { useOptionalChatStream } from "../context"; +// ... + +export function ThreadPills({ … }: Props) { + const stream = useOptionalChatStream(); + const isActive = (stream?.messages ?? []).length > 0; + + return ( +
+ +
+ ); +} +``` + +- [ ] **Step 3: Type check** + +```bash +bun run check 2>&1 | rg "thread-pills" || echo "thread-pills clean" +``` + +Expected: `thread-pills clean`. + +- [ ] **Step 4: Format and commit** + +```bash +bun run fmt +git add apps/mesh/src/web/components/chat/pills/thread-pills.tsx +git commit -m "refactor(chat): drop AgentPill from ThreadPills + +The agent picker now lives inside the chat-input's AgentModelTrigger. +ThreadPills shrinks back to a single BranchPill (still locked when the +thread has messages). The eager VM-start logic moves into the merged +AgentModelTrigger row click." +``` + +--- + +### Task 8: Delete the obsolete `agent-pill.tsx` + +**Files:** +- Delete: `apps/mesh/src/web/components/chat/pills/agent-pill.tsx` + +- [ ] **Step 1: Confirm no remaining imports** + +Run: +```bash +rg -n "from\s+\".*agent-pill\"|from\s+'.*agent-pill'" apps/mesh/src +``` + +Expected: **empty output**. If anything matches, **STOP** — there's a caller we missed. + +- [ ] **Step 2: Delete the file** + +```bash +rm apps/mesh/src/web/components/chat/pills/agent-pill.tsx +``` + +- [ ] **Step 3: Type check** + +```bash +bun run check 2>&1 | rg "agent-pill" || echo "agent-pill deletion clean" +``` + +Expected: `agent-pill deletion clean`. + +- [ ] **Step 4: Commit** + +```bash +git add apps/mesh/src/web/components/chat/pills/agent-pill.tsx +git commit -m "chore(chat): delete obsolete AgentPill component + +Replaced by the sectioned popover inside AgentModelTrigger. No +remaining importers." +``` + +--- + +### Task 9: Final verification — `bun run check`, lint, fmt, and the full test suite + +**Files:** none (verification only). + +- [ ] **Step 1: Type check** + +```bash +bun run check +``` + +Expected: exit code 0, no errors. If errors surface, fix them and commit per-file with a `fix(chat):` prefix. + +- [ ] **Step 2: Lint** + +```bash +bun run lint +``` + +Expected: exit code 0. Fix any errors before proceeding. + +- [ ] **Step 3: Format check** + +```bash +bun run fmt:check +``` + +Expected: clean. If anything's unformatted, run `bun run fmt` and amend the most recent commit (or add a "chore: format" commit). + +- [ ] **Step 4: Run the targeted test files** + +```bash +bun test \ + apps/mesh/src/web/components/chat/select-model/agent-models.test.ts \ + apps/mesh/src/web/components/chat/select-model/agent-section.test.tsx \ + apps/mesh/src/web/components/chat/agent-model-popover.test.tsx \ + apps/mesh/src/web/components/chat/agent-model-trigger.test.tsx +``` + +Expected: all green. + +- [ ] **Step 5: Run the full chat-package test suite to make sure nothing else broke** + +```bash +bun test apps/mesh/src/web/components/chat/ +``` + +Expected: all green. (If something else has its own snapshot or behavior expectation broken, fix and commit per-file.) + +- [ ] **Step 6: Smoke test in the browser** + +```bash +bun run dev +``` + +Open the app in a browser, navigate to a chat surface, and verify: +1. The chat input no longer shows the Decopilot/Claude Code/Codex pill above it. +2. Clicking the model trigger in the input opens a sectioned popover. +3. Decopilot section has Fast/Smart/Thinking with the matching glyphs. +4. CLI sections have the green band + " · on this laptop" suffix. +5. Selecting a CLI tier turns the closed trigger green (`text-success` + `bg-success/10` ring). +6. The label collapses at narrow widths without a phantom gap. +7. Sending a message then re-opening the popover greys out the non-active sections. + +- [ ] **Step 7: Final commit (if any cleanup happened)** + +```bash +git status +# If clean, nothing to do. Otherwise: +git add -A +git commit -m "chore(chat): clean up after merged-selector implementation" +``` + +--- + +## Self-Review + +| Spec requirement | Implemented in | +| --- | --- | +| Drop `decopilot-laptop` | Task 1 | +| New `getAgentSections` pure fn | Task 2 | +| 3 sections (Decopilot / Claude Code / Codex), 3 tiers each | Tasks 2 + 3 | +| No number shortcuts, no stars | (none added — by omission) | +| Lock semantics (non-active sections opacity-40 + pointer-events-none) | Tasks 3 + 4 | +| Green styling on local sections (`bg-success/5` + `text-success` header) | Task 3 | +| Green styling on closed trigger when CLI active (`text-success` + `bg-success/10`) | Task 5 | +| Phantom-gap fix (`gap-0 @[496px]/chat-bottom:gap-1.5`) | Task 5 | +| Hide CLI sections when laptop offline | Task 2 (via `getAgentSections`) | +| Delete AgentPill | Task 8 | +| Slim agent-options.ts (keep pins, drop computeAgentOptions etc.) | Task 1 | +| Don't delete SimpleModeTierDropdown, ModelSelectorBody, etc. | (untouched) | +| Eager VM start on CLI selection with branch | Task 5 | + +Placeholder scan: none. Type consistency: `AgentKind` is the same across Tasks 2/3/4/5; `AgentSection` is the same shape end-to-end; `ChatTier` re-used. diff --git a/docs/superpowers/specs/2026-05-21-merge-agent-and-model-selectors-design.md b/docs/superpowers/specs/2026-05-21-merge-agent-and-model-selectors-design.md new file mode 100644 index 0000000000..e870f234d0 --- /dev/null +++ b/docs/superpowers/specs/2026-05-21-merge-agent-and-model-selectors-design.md @@ -0,0 +1,168 @@ +# Merge AgentPill and AgentModelTrigger — Design + +**Status:** Draft · 2026-05-21 +**Owner:** @tlgimenes + +## Goal + +Collapse the two stacked selectors around the chat composer — the `AgentPill` (Decopilot vs Claude Code vs Codex, in `ThreadPills`) and the `AgentModelTrigger` (Fast/Smart/Thinking tier inside the chat input) — into a single sectioned popover triggered from the chat input. A row click picks both agent and tier in one action, and laptop-CLI sections (Claude Code, Codex) carry a green styling that matches the "Desktop connected" affordance already used in `NoAiProviderEmptyState`. + +## Motivation + +Today the user has to use two surfaces to fully reconfigure their next message: the `AgentPill` (above the composer) for the agent, and the `AgentModelTrigger` (inside the composer) for the tier. The pill also locks once the thread has messages, which makes its "header" position awkward — it stops being interactive but still takes space. Folding both into one popover behind the model trigger: + +1. Single click reconfigures both axes. +2. The chat input strip stays compact (one trigger instead of two). +3. The popover becomes the discovery surface for "what agents do I have" — it explicitly differentiates cloud (Decopilot) from local-runtime agents with the same green token the rest of the app uses for local CLI status. + +## Non-goals + +- We do not allow swapping agents mid-thread. Lock semantics match today: once a thread has messages, the agent choice freezes; only tier can change. +- We do not touch `NoAiProviderEmptyState` (the full-screen "no providers yet" page). It owns its own discovery flow upstream. +- We do not redesign the Decopilot full-model browser (the existing two-pane picker). It stays mounted on the settings page if it has any other callers; in the chat surface it goes away. + +## Decisions + +### Scope: drop `decopilot-laptop` + +Today there are four `AgentOption`s: `decopilot`, `decopilot-laptop`, `claude-code-laptop`, `codex-laptop`. The `decopilot-laptop` variant ran cloud Decopilot through the local sandbox runtime. We **remove it entirely** — the popover surfaces three sections (`decopilot`, `claude-code`, `codex`) and `decopilot-laptop` is dropped from the `AgentOption` type, `AGENT_OPTION_PINS`, `computeAgentOptions` callers, and its localStorage value migrates silently to `null` (existing validation already falls back when the stored string isn't in `AGENT_OPTION_PINS`). + +### What can NOT be deleted (constraints found during exploration) + +- `SimpleModeTierDropdown` — still used by `apps/mesh/src/web/views/automations/automation-detail.tsx`. Stays. +- `ModelSelectorBody` / `ModelSelectorStandaloneBody` / `LaptopCliModelSelectorBody` — still used by the `ModelSelector` wrapper in `apps/mesh/src/web/components/chat/select-model.tsx`, which is called from `apps/mesh/src/web/views/settings/ai-providers/simple-mode-section.tsx`. They stay (the chat input simply stops calling them). +- `AGENT_OPTION_PINS`, `pinsForOption`, `pinsToOption` — still used by `chat-context.tsx` and `thread-pills.tsx` to map the persisted option to `(harness, sandbox)`. Stay. + +What CAN be deleted: `AgentPill`, `computeAgentOptions`, `AGENT_OPTION_LABELS`, `AgentOptionsInput`, the `ORDER` array, and the `decopilot-laptop` entries. + +### UI surface + +**Closed trigger (in the chat input).** Same position as today (right side of bottom action row). Renders logo + current selection label. + +- Active = Decopilot: `tier glyph + "Fast"|"Smart"|"Thinking"`, neutral colors. +- Active = Claude Code or Codex: `agent logo + model label` (e.g. "Opus 4.7"), with `text-success` on label and a faint `bg-success/10` ring. This is the green signal that "your runtime is local". +- Locked thread: same content, same styling. The popover opens but non-active sections are disabled (see below). + +**Open popover (~`w-72`).** Three sections, in order: + +1. `Decopilot` — three rows: ⚡ Fast / ✦ Smart / ◆ Thinking. Each row shows description ("Quicker responses" / "Balanced quality" / "Deeper reasoning"). +2. `Claude Code · on this laptop` — three rows: Haiku 4.5 / Sonnet 4.6 / Opus 4.7. Same description column. +3. `Codex · on this laptop` — three rows: GPT-5.4 Mini / GPT-5.3 Codex / GPT-5.5. + +Section headers are `text-muted-foreground text-xs`. Local-CLI sections (Claude Code, Codex) sit on a faint `bg-success/5` band and their header text goes `text-success`. The current selection is marked with the existing "On" chip used by `SimpleModeTierDropdown` today. + +**Locked-thread popover.** When the thread has messages, only the section matching the persisted `threadHarness` is interactive. The other two sections render with `opacity-40 pointer-events-none` and a small lock icon on the header — the user can still see what exists, they just can't click it. + +**Offline laptop CLI.** When `link.online === false`, CLI sections are *hidden* (not shown disabled). Matches today's `computeAgentOptions` gating behavior; discovery of CLI agents is the `NoAiProviderEmptyState` card's job. + +### Gap bug fix + +`AgentModelTrigger`'s button currently uses `gap-1.5` while the label-span collapses to `max-w-0 opacity-0` at narrow widths via container queries. The parent gap survives the collapse, leaving a phantom 6px between the logo and the chevron. Fix by making the gap container-responsive: `gap-0 @[496px]/chat-bottom:gap-1.5`. Applies to all three agent variants since they share the trigger. + +### Data model + +A single `AgentSection[]` drives the popover. Built by a pure function `getAgentSections({ hasAnyKey, link })` that subsumes today's `computeAgentOptions` (which goes away with `AgentPill`). + +```ts +export type AgentKind = "decopilot" | "claude-code" | "codex"; + +export interface AgentTierEntry { + modelId: string | null; // null for Decopilot — server resolves via provider key + label: string; // "Smart", "Sonnet 4.6", "GPT-5.3 Codex" + description: string; // tier description + icon: ReactNode | string; // glyph for Decopilot tiers; logo url for CLI rows +} + +export interface AgentSection { + kind: AgentKind; + title: string; // "Decopilot" | "Claude Code" | "Codex" + isLocal: boolean; // drives bg-success/5 + "on this laptop" suffix + tiers: Record; +} +``` + +Selection rules (mirror today's `computeAgentOptions`): + +- Decopilot included iff `hasAnyKey === true`. +- Claude Code included iff `link.online && link.capabilities.includes("claude-code")`. +- Codex included iff `link.online && link.capabilities.includes("codex")`. + +### Selection write path + +Row click does both writes that today live on two different components: + +```ts +function onRowSelect(section: AgentSection, tier: ChatTier) { + const opt = optionForAgent(section.kind); // "decopilot" | "claude-code-laptop" | "codex-laptop" + prefs.setPendingAgentOption(opt); + prefs.setSimpleModeTier(tier); + if (section.kind === "decopilot") { + prefs.clearModel(); + } else { + prefs.setModel({ modelId: section.tiers[tier].modelId!, keyId: undefined }); + } + if (section.isLocal && currentBranch) { + startVm.mutate({ virtualMcpId, branch: currentBranch, sandboxProviderKind: "remote-user" }); + } + track("agent_model_selected", { agent: section.kind, tier }); +} +``` + +### Component layout + +``` +apps/mesh/src/web/components/chat/ +├── agent-model-trigger.tsx UPDATED · closed pill, success styling when isLocal, gap fix +├── agent-model-popover.tsx NEW · renders sections + lock state +├── select-model/ +│ ├── agent-models.ts UPDATED · adds Decopilot, exports getAgentSections +│ ├── agent-section.tsx NEW · one section (header + 3 rows), handles isLocal styling +│ ├── decopilot.tsx UNCHANGED · still mounted from settings page if applicable +│ ├── laptop-cli.tsx DELETED · folded into agent-section.tsx +│ └── index.tsx UPDATED/DELETED · ModelSelectorBody no longer used in chat flow +└── pills/ + ├── agent-pill.tsx DELETED + ├── agent-options.ts DELETED · rules move to agent-models.ts + └── thread-pills.tsx UPDATED · drops AgentPill; BranchPill unchanged +``` + +### Lock semantics wiring + +`ThreadPills` already computes `isActive = (stream?.messages ?? []).length > 0` and reads `threadHarness`/`threadKind`. These move down into `AgentModelPopover` so the popover can disable non-active sections. The closed trigger's green styling reads from `(threadHarness ?? pendingHarnessId)` — correct in both locked and pending states. + +## Edge cases + +| Scenario | Behavior | +| --- | --- | +| No cloud keys, no laptop CLI | `NoAiProviderEmptyState` still owns the screen — popover never renders. | +| No cloud keys, laptop online (Claude Code only) | Only "Claude Code" section shows. | +| Cloud keys present, laptop offline | Only "Decopilot" section shows. | +| Thread active on Claude Code | Decopilot + Codex headers muted; Claude Code rows interactive. | +| User selects Decopilot row, empty thread | `setPendingAgentOption("decopilot")` + `setSimpleModeTier` + `clearModel()`. No eager VM start (cloud). | +| User selects Claude Code row, empty thread, branch picked | Above writes + `setModel({modelId, keyId: undefined})` + eager `startVm.mutate(...)`. | +| Laptop drops offline while popover open | `useCurrentLink` polls every 15s; next tick removes CLI sections via `getAgentSections`. | +| Trigger at narrow container width | `gap-0 @[496px]/chat-bottom:gap-1.5` removes phantom gap. | + +## Testing + +1. **`getAgentSections` unit tests** — table-driven over `(hasAnyKey, link.online, capabilities)`; asserts which sections appear and which carry `isLocal: true`. +2. **`AgentModelPopover` component tests** — render with mocked sections + `isActive`: + - (a) only the active-thread section is interactive when locked, + - (b) row click triggers the expected two-write sequence (verify via mocked prefs), + - (c) `bg-success/5` lands on `isLocal` sections. +3. **`AgentModelTrigger` snapshot/behavior test** — assert (a) `text-success` only when resolved agent is CLI, (b) `gap-0` class present and `@[496px]/chat-bottom:gap-1.5` modifier present. + +No e2e changes needed — message routing semantics didn't change. + +## Removals + +- `apps/mesh/src/web/components/chat/pills/agent-pill.tsx` — full file deleted. +- From `apps/mesh/src/web/components/chat/pills/agent-options.ts`: remove `decopilot-laptop` from `AgentOption` union and `AGENT_OPTION_PINS`; remove `computeAgentOptions`, `AGENT_OPTION_LABELS`, `AgentOptionsInput`, and the local `ORDER` array. Keep `AGENT_OPTION_PINS`, `pinsForOption`, `pinsToOption`, the `AgentOption` type itself, and the `AgentPins` interface. +- From `apps/mesh/src/web/components/chat/pills/thread-pills.tsx`: remove the AgentPill JSX, its imports, the `setPendingAgentOption` callback, the `useAiProviderKeys`/`useCurrentLink`/`useVmStart`/`startVm.mutate` plumbing (moves into the popover's row handler), and the surrounding `·` separator. `BranchPill` and its props stay. +- From `apps/mesh/src/web/components/chat/agent-model-trigger.tsx`: the `SimpleModeTierDropdown` fallback path goes away — the trigger always renders `AgentModelPopover` now. + +What stays: + +- `SimpleModeTierDropdown` (still used by `automation-detail.tsx`). +- `ModelSelectorBody` / `ModelSelectorStandaloneBody` / `LaptopCliModelSelectorBody` (still used by the settings page via the `ModelSelector` wrapper). +- `AGENT_OPTION_PINS`, `pinsForOption`, `pinsToOption` (still used by `chat-context.tsx`). diff --git a/package.json b/package.json index 9a066f8fd0..d91c507eea 100644 --- a/package.json +++ b/package.json @@ -19,7 +19,7 @@ "scripts": { "dev": "bun run --env-file=.env scripts/dev.ts", "dev:worktree": "bun run scripts/dev-worktree.ts", - "dev:conductor": "WORKTREE_SLUG=$CONDUCTOR_WORKSPACE_NAME bun run dev:worktree", + "dev:conductor": "WORKTREE_SLUG=$CONDUCTOR_WORKSPACE_NAME bun run dev:worktree -- --local-sandbox-provider", "fmt": "biome format --write", "lint": "oxlint", "check": "bun run --workspaces check", diff --git a/packages/mesh-sdk/src/index.ts b/packages/mesh-sdk/src/index.ts index ebec292d76..68c2a37b42 100644 --- a/packages/mesh-sdk/src/index.ts +++ b/packages/mesh-sdk/src/index.ts @@ -123,6 +123,9 @@ export { type RuntimeMetadata, type RuntimeEnvEntry, ENV_VAR_KEY_RE, + parseVmMapEntry, + parseBranchMap, + normalizeVmMap, type GithubRepo, // Decopilot event types THREAD_STATUSES, diff --git a/packages/mesh-sdk/src/types/index.ts b/packages/mesh-sdk/src/types/index.ts index ec03a6d94e..d7c2b8abdb 100644 --- a/packages/mesh-sdk/src/types/index.ts +++ b/packages/mesh-sdk/src/types/index.ts @@ -35,6 +35,9 @@ export { type RuntimeMetadata, type RuntimeEnvEntry, ENV_VAR_KEY_RE, + parseVmMapEntry, + parseBranchMap, + normalizeVmMap, } from "./virtual-mcp"; export { diff --git a/packages/mesh-sdk/src/types/virtual-mcp.test.ts b/packages/mesh-sdk/src/types/virtual-mcp.test.ts index 66ecba4026..85908931d3 100644 --- a/packages/mesh-sdk/src/types/virtual-mcp.test.ts +++ b/packages/mesh-sdk/src/types/virtual-mcp.test.ts @@ -4,6 +4,8 @@ import { VirtualMcpUILayoutSchema, VirtualMCPUpdateDataSchema, VmMapEntrySchema, + parseVmMapEntry, + parseBranchMap, } from "./virtual-mcp"; describe("VirtualMcpUILayoutSchema tabs", () => { @@ -117,3 +119,82 @@ test("VmMapEntry.startedWith is optional with nullable packageManager/port/path" expect(c.startedWith?.port).toBeNull(); expect(c.startedWith?.path).toBeNull(); }); + +describe("parseBranchMap tolerant reader", () => { + test("parses 3-level (kind-keyed) map", () => { + const result = parseBranchMap({ + docker: { vmId: "v1", previewUrl: null, sandboxProviderKind: "docker" }, + "remote-user": { + vmId: "v2", + previewUrl: null, + sandboxProviderKind: "remote-user", + }, + }); + expect(result.docker?.vmId).toBe("v1"); + expect(result["remote-user"]?.vmId).toBe("v2"); + }); + + test("wraps 2-level legacy entry under its sandboxProviderKind", () => { + const result = parseBranchMap({ + vmId: "v-legacy", + previewUrl: null, + sandboxProviderKind: "remote-user", + }); + expect(result["remote-user"]?.vmId).toBe("v-legacy"); + expect(result.docker).toBeUndefined(); + }); + + test("coalesces a legacy freestyle entry to docker on read", () => { + const result = parseBranchMap({ + vmId: "v-very-legacy", + previewUrl: null, + runnerKind: "freestyle", + }); + // freestyle runner no longer exists; legacy rows fall back to docker. + expect(result.docker?.vmId).toBe("v-very-legacy"); + }); + + test("returns empty object for null/undefined/arrays", () => { + expect(parseBranchMap(null)).toEqual({}); + expect(parseBranchMap(undefined)).toEqual({}); + expect(parseBranchMap([])).toEqual({}); + }); + + test("legacy entry without sandboxProviderKind defaults to 'docker'", () => { + const result = parseBranchMap({ vmId: "v-orphan", previewUrl: null }); + expect(result.docker?.vmId).toBe("v-orphan"); + }); +}); + +describe("parseVmMapEntry tolerant reader", () => { + test("accepts new sandboxProviderKind field", () => { + const result = parseVmMapEntry({ + vmId: "v1", + previewUrl: null, + sandboxProviderKind: "docker", + }); + expect(result.sandboxProviderKind).toBe("docker"); + }); + + test("normalizes legacy runnerKind into sandboxProviderKind", () => { + const result = parseVmMapEntry({ + vmId: "v1", + previewUrl: null, + runnerKind: "remote-user", + }); + expect(result.sandboxProviderKind).toBe("remote-user"); + expect( + (result as unknown as { runnerKind?: unknown }).runnerKind, + ).toBeUndefined(); + }); + + test("prefers explicit sandboxProviderKind when both keys present", () => { + const result = parseVmMapEntry({ + vmId: "v1", + previewUrl: null, + runnerKind: "docker", + sandboxProviderKind: "remote-user", + }); + expect(result.sandboxProviderKind).toBe("remote-user"); + }); +}); diff --git a/packages/mesh-sdk/src/types/virtual-mcp.ts b/packages/mesh-sdk/src/types/virtual-mcp.ts index 3e065c9ab1..376b7cfab9 100644 --- a/packages/mesh-sdk/src/types/virtual-mcp.ts +++ b/packages/mesh-sdk/src/types/virtual-mcp.ts @@ -213,7 +213,7 @@ export type GithubRepo = z.infer; /** * A single vm entry in vmMap — the vmId plus the preview URL the UI renders. * - * `runnerKind` lets the UI construct daemon URLs correctly: + * `sandboxProviderKind` lets the UI construct daemon URLs correctly: * - docker: daemon is reached via the mesh proxy at `/api/sandbox//_daemon/*` * - agent-sandbox: daemon is reached via the mesh proxy (same transport as docker); * preview URL is the per-claim HTTPRoute host (in-cluster) or a local port-forward (kind dev). @@ -230,7 +230,19 @@ export const VmMapEntrySchema = z.object({ .describe( "URL where the VM's iframe-proxied UI is served, or null when the sandbox has no dev server (blank / tool sandboxes).", ), - runnerKind: z.enum(["host", "docker", "agent-sandbox"]).optional(), + sandboxUrl: z + .string() + .nullable() + .optional() + .describe( + "Daemon's public URL — what cluster→daemon RPCs target. Equal to previewUrl for remote-user; null/absent for runners that route through cluster ingress (docker, agent-sandbox).", + ), + sandboxProviderKind: z + // Legacy values ("freestyle", "host") are tolerated on read for + // pre-removal vmMap entries; writers use one of the active kinds. + // The tolerant readers (parseVmMapEntry, parseBranchMap) normalize. + .enum(["docker", "agent-sandbox", "remote-user", "freestyle", "host"]) + .optional(), createdAt: z .number() .optional() @@ -264,17 +276,137 @@ export const VmMapEntrySchema = z.object({ export type VmMapEntry = z.infer; /** - * Maps a user to their vm entries per branch. - * Lookup: vmMap[userId][branch] -> { vmId, previewUrl } - * Multiple threads with the same (userId, branch) share one vm. + * Tolerant reader: pre-rename rows persisted the field as `runnerKind`. Until + * a full re-write touches every entry, this function normalizes the legacy key + * into `sandboxProviderKind`. Writers always use the new key. + * + * Use this function wherever raw JSON from the database is parsed into a + * `VmMapEntry` — never cast unknown JSON directly as `VmMapEntry`. + * + * TODO(2026-06-20): drop this tolerant reader once migration 080 has run + * everywhere and a write has touched every vmMap entry. See spec + * docs/superpowers/specs/2026-05-20-vm-as-runtime-identity-design.md. + */ +export function parseVmMapEntry(raw: unknown): VmMapEntry { + let normalized = raw; + if (raw && typeof raw === "object" && !Array.isArray(raw)) { + const obj = raw as Record; + if (obj.runnerKind !== undefined && obj.sandboxProviderKind === undefined) { + const { runnerKind, ...rest } = obj; + normalized = { ...rest, sandboxProviderKind: runnerKind }; + } + } + return VmMapEntrySchema.parse(normalized); +} + +/** The active sandbox provider kinds (excludes legacy "freestyle", "host"). */ +type SandboxProviderKind = "docker" | "agent-sandbox" | "remote-user"; + +/** + * Tolerant reader at the branch-map level. + * + * In v2, a branch's value is itself a map of `sandboxProviderKind → VmMapEntry` + * (so cloud + local can coexist on the same branch). Legacy v1 rows stored a + * single `VmMapEntry` directly at the branch level. This function accepts + * either shape and returns a normalized v2 partial record. + * + * TODO(2026-06-20): drop the 2-level wrap path once migration 081 has run + * everywhere and writers have touched every entry. + */ +export function parseBranchMap( + raw: unknown, +): Partial> { + if (!raw || typeof raw !== "object" || Array.isArray(raw)) return {}; + const obj = raw as Record; + + // Legacy 2-level: the value at this level is itself a VmMapEntry (has vmId). + if (typeof obj.vmId === "string") { + const entry = parseVmMapEntry(obj); + // Coalesce legacy "freestyle"/"host" values to "docker" since those + // runners no longer exist; rows from before the removal still parse. + const raw = entry.sandboxProviderKind; + const kind: SandboxProviderKind = + raw === "docker" || raw === "agent-sandbox" || raw === "remote-user" + ? raw + : "docker"; + return { [kind]: entry }; + } + + // New 3-level: kind → entry. + const out: Partial> = {}; + for (const [k, v] of Object.entries(obj)) { + if (!v || typeof v !== "object") continue; + try { + out[k as SandboxProviderKind] = parseVmMapEntry(v); + } catch { + // Skip malformed entries rather than throw — readers are tolerant by design. + } + } + return out; +} + +/** + * Maps a user to their vm entries per (branch, sandboxProviderKind). + * Lookup: vmMap[userId][branch][sandboxProviderKind] -> VmMapEntry + * + * Multiple threads on the same (userId, branch, kind) share one VM. + * Cloud and local VMs can coexist on the same branch as siblings. + * + * The schema is strict v2. Reads of legacy v1 data MUST be normalized via + * `normalizeVmMap` (this file) BEFORE Zod validation — strict input/output + * types here are load-bearing for `useForm<…>(zodResolver(…))` callers, + * whose generic depends on `z.input` being identical to `z.output`. A + * `z.preprocess` here widens `z.input` to `unknown` and breaks the form. */ export const VmMapSchema = z.record( z.string().describe("userId"), - z.record(z.string().describe("branch"), VmMapEntrySchema), + z.record( + z.string().describe("branch"), + z.record(z.string().describe("sandboxProviderKind"), VmMapEntrySchema), + ), ); export type VmMap = z.infer; +/** + * Normalize a raw `metadata.vmMap` value into v2 shape on read. Use this in + * storage adapters BEFORE returning data that will be Zod-validated against + * `VirtualMCPEntitySchema` (or any schema embedding `VmMapSchema`). + * + * Tolerates two legacy on-disk shapes from rows written before migration + * 082 actually rewrote them: + * 1. v1 2-level layout: vmMap[user][branch] = VmMapEntry + * 2. `runnerKind` field on entries instead of `sandboxProviderKind` + * + * Returns `{}` for missing / malformed input rather than throwing — readers + * should never crash on bad on-disk data; the strict schema catches any + * residual issues at validation time. + */ +export function normalizeVmMap(raw: unknown): VmMap { + if (!raw || typeof raw !== "object" || Array.isArray(raw)) return {}; + const out: VmMap = {}; + for (const [userId, userVal] of Object.entries( + raw as Record, + )) { + if (!userVal || typeof userVal !== "object" || Array.isArray(userVal)) { + continue; + } + const userOut: VmMap[string] = {}; + for (const [branch, branchVal] of Object.entries( + userVal as Record, + )) { + const normalized = parseBranchMap(branchVal); + if (Object.keys(normalized).length > 0) { + userOut[branch] = normalized as VmMap[string][string]; + } + } + if (Object.keys(userOut).length > 0) { + out[userId] = userOut; + } + } + return out; +} + /** * Virtual MCP entity schema - single source of truth * Compliant with collections binding pattern diff --git a/packages/sandbox/README.md b/packages/sandbox/README.md index 61904de36c..2b87df9c00 100644 --- a/packages/sandbox/README.md +++ b/packages/sandbox/README.md @@ -4,24 +4,18 @@ Isolated per-user sandboxes for MCP tool execution. One sandbox per `(userId, projectRef)`: a container (or VM) holding a checked-out repo plus an in-pod daemon that proxies exec, file ops, and the dev server. -Callers go through a single `SandboxRunner` interface; the runner decides how +Callers go through a single `SandboxProvider` interface; the runner decides how the sandbox is provisioned and reached. ## Runners -Three runner backends live behind the common `SandboxRunner` interface -(`server/runner/types.ts`): +Three runner backends live behind the common `SandboxProvider` interface +(`server/provider/types.ts`): -- **`host`** — local dev / single-tenant self-host. Spawns the same Bun-based - daemon as the Docker runner but as a host child process, with a per-branch - full git clone in `${DATA_DIR}/sandboxes//`. The local - `*.localhost:7070` ingress routes browser traffic to the per-branch daemon's - host TCP port. No container; no hardening (the daemon runs in the user's - trust boundary). -- **Docker** (`./runner`) — containerized sandboxes. Spawns containers via the +- **Docker** (`./provider`) — containerized sandboxes. Spawns containers via the local Docker CLI and routes browser traffic through an in-process ingress bound on `SANDBOX_INGRESS_PORT`. -- **agent-sandbox** (`./runner/agent-sandbox`) — one `SandboxClaim` per sandbox +- **agent-sandbox** (`./provider/agent-sandbox`) — one `SandboxClaim` per sandbox against the [kubernetes-sigs/agent-sandbox](https://github.com/kubernetes-sigs/agent-sandbox) operator. Studio talks to pods via apiserver port-forward in dev; in prod, `previewUrlPattern` switches the preview URL to real ingress and skips the @@ -29,13 +23,21 @@ Three runner backends live behind the common `SandboxRunner` interface ### Selection -The host app calls `resolveRunnerKindFromEnv()` to pick the runner. Single rule: +The host app calls `resolveSandboxProviderKindFromEnv()` to pick the runner. Single rule: -1. `STUDIO_SANDBOX_RUNNER` is honored if set (one of `host`, `docker`, - `agent-sandbox`). -2. Otherwise the runner defaults to `host`. +1. `STUDIO_SANDBOX_RUNNER` is honored if set (one of `docker`, + `agent-sandbox`, `remote-user`). +2. Otherwise the runner defaults to `remote-user` (the laptop-side + `deco link` daemon — auto-spawned by `bun run dev --local-sandbox-provider` + in local dev, and the supported topology for single-machine self-hosts + running the link side-by-side). -`agent-sandbox` is opt-in only — never auto-selected. +Preconditions: + +- `agent-sandbox` is opt-in only — never auto-selected. +- The retired `host` runner kind is rejected. Local dev now exercises + `remote-user` against the auto-spawned link binary, matching the + production code path. ## URL shape @@ -64,14 +66,15 @@ for this, you can remove them — they're no longer needed. ## Environment -- `STUDIO_SANDBOX_RUNNER` — pin the runner: `host` (default), `docker`, - or `agent-sandbox`. Setting it explicitly is required for any non-host - runner. Auto-detection of Docker has been removed. +- `STUDIO_SANDBOX_RUNNER` — pin the runner: `docker`, + `agent-sandbox`, or `remote-user`. Defaults to `remote-user`. Setting + it explicitly is required for production deploys; auto-detection of + Docker has been removed. - `STUDIO_SANDBOX_IMAGE` — override the Docker runner image (default `studio-sandbox:local`, built from `image/Dockerfile`). - `SANDBOX_INGRESS_PORT` (default `7070`) — local ingress bind port for the - host/docker runners. Set to `0` to skip binding entirely (use this if a - real reverse proxy fronts `*.localhost` traffic instead). + Docker runner. Set to `0` to skip binding entirely (use this if a real + reverse proxy fronts `*.localhost` traffic instead). - `SANDBOX_ROOT_URL` — production template for the pod URL. Either a bare base (`https://sandboxes.example.com` → handle becomes leading subdomain) or a `{handle}` template (`https://{handle}.sandboxes.example.com`). diff --git a/packages/sandbox/daemon/auth.test.ts b/packages/sandbox/daemon/auth.test.ts new file mode 100644 index 0000000000..0c51daea20 --- /dev/null +++ b/packages/sandbox/daemon/auth.test.ts @@ -0,0 +1,92 @@ +import { describe, expect, it } from "bun:test"; +import { signRequest } from "../../../apps/mesh/src/links/protocol/hmac"; +import { requireHmacOrToken } from "./auth"; + +const LINK_SECRET = "test-link-secret-32-chars-min-aaaa"; +const DAEMON_TOKEN = "test-daemon-token-32-chars-min-aaaa"; + +function makeRequest( + method: string, + path: string, + init?: { headers?: Record; body?: string }, +): Request { + return new Request(`http://daemon${path}`, { + method, + headers: init?.headers, + body: init?.body, + }); +} + +describe("requireHmacOrToken", () => { + it("accepts a valid HMAC-signed request", () => { + const sig = signRequest({ + secret: LINK_SECRET, + method: "POST", + path: "/_decopilot_vm/exec", + body: "", + }); + const req = makeRequest("POST", "/_decopilot_vm/exec", { headers: sig }); + expect( + requireHmacOrToken(req, "/_decopilot_vm/exec", "", { + linkSecret: LINK_SECRET, + daemonToken: DAEMON_TOKEN, + seenNonce: () => false, + }), + ).toBeNull(); + }); + + it("rejects an HMAC request signed with the wrong secret", () => { + const sig = signRequest({ + secret: "wrong-secret-32-chars-min-aaaaaaaa", + method: "POST", + path: "/_decopilot_vm/exec", + body: "", + }); + const req = makeRequest("POST", "/_decopilot_vm/exec", { headers: sig }); + const result = requireHmacOrToken(req, "/_decopilot_vm/exec", "", { + linkSecret: LINK_SECRET, + daemonToken: DAEMON_TOKEN, + seenNonce: () => false, + }); + expect(result?.status).toBe(401); + }); + + it("accepts a valid bearer-token request when HMAC headers absent", async () => { + const req = makeRequest("POST", "/_decopilot_vm/exec", { + headers: { authorization: `Bearer ${DAEMON_TOKEN}` }, + }); + expect( + requireHmacOrToken(req, "/_decopilot_vm/exec", "", { + linkSecret: LINK_SECRET, + daemonToken: DAEMON_TOKEN, + seenNonce: () => false, + }), + ).toBeNull(); + }); + + it("rejects a request with neither HMAC nor bearer", () => { + const req = makeRequest("POST", "/_decopilot_vm/exec"); + const result = requireHmacOrToken(req, "/_decopilot_vm/exec", "", { + linkSecret: LINK_SECRET, + daemonToken: DAEMON_TOKEN, + seenNonce: () => false, + }); + expect(result?.status).toBe(401); + }); + + it("rejects when HMAC headers are present but malformed, even with a valid bearer", async () => { + const req = makeRequest("POST", "/_decopilot_vm/exec", { + headers: { + "X-Mesh-Signature": "garbage", + authorization: `Bearer ${DAEMON_TOKEN}`, + }, + }); + // Malformed HMAC is a hard reject — don't silently downgrade. + const result = requireHmacOrToken(req, "/_decopilot_vm/exec", "", { + linkSecret: LINK_SECRET, + daemonToken: DAEMON_TOKEN, + seenNonce: () => false, + }); + expect(result?.status).toBe(401); + }); +}); diff --git a/packages/sandbox/daemon/auth.ts b/packages/sandbox/daemon/auth.ts index 0008ae467d..9a23688ff8 100644 --- a/packages/sandbox/daemon/auth.ts +++ b/packages/sandbox/daemon/auth.ts @@ -1,9 +1,10 @@ +import { + SIG_HEADER, + verifyRequest, +} from "../../../apps/mesh/src/links/protocol/hmac"; import { jsonResponse } from "./routes/body-parser"; -export function requireToken( - req: Request, - expectedToken: string, -): Response | null { +function requireToken(req: Request, expectedToken: string): Response | null { const header = req.headers.get("authorization") ?? ""; const prefix = "Bearer "; if (!header.startsWith(prefix)) { @@ -16,6 +17,63 @@ export function requireToken( return null; } +export interface HmacOrTokenDeps { + // HMAC signing key — equals the cluster-side `LinkEntry.linkSecret`. + // May be empty for daemons that aren't running under a link (legacy docker). + linkSecret: string; + // Bearer token — the legacy auth path. Both env-spawned daemons set this. + daemonToken: string; + // Nonce-replay guard. The caller owns sizing/TTL — verifier just queries. + seenNonce: (nonce: string) => boolean; +} + +// Accepts EITHER an HMAC-signed request (cluster → daemon over the link's +// tunnel) OR a bearer-token request (in-cluster decopilot built-in tools +// spawning docker). HMAC is checked first when its headers are present — +// a malformed HMAC is a hard reject (no silent downgrade to bearer). When +// HMAC headers are absent, falls through to bearer. +// +// Why no silent downgrade: a signature-present-but-invalid request is an +// attack signal, not a hint to retry with another scheme. Allowing bearer +// to succeed when HMAC fails would let an attacker probing the link's +// secret discover that bearer is also accepted, leaking the per-endpoint +// capability set to an unauthenticated probe. Treat the signature-present +// branch as committed: it succeeds or rejects on its own merit. +// +// Returns null on success; an unauthorized Response on failure. +export function requireHmacOrToken( + req: Request, + path: string, + body: string, + deps: HmacOrTokenDeps, +): Response | null { + const headers = Object.fromEntries(req.headers); + const hasSignature = !!( + headers[SIG_HEADER] ?? headers[SIG_HEADER.toLowerCase()] + ); + if (hasSignature) { + if (!deps.linkSecret) { + return jsonResponse({ error: "unauthorized", reason: "no_link" }, 401); + } + const result = verifyRequest({ + secret: deps.linkSecret, + method: req.method, + path, + body, + headers, + seenNonce: deps.seenNonce, + }); + if (!result.valid) { + return jsonResponse( + { error: "unauthorized", reason: result.reason }, + 401, + ); + } + return null; + } + return requireToken(req, deps.daemonToken); +} + function constantTimeEqual(a: string, b: string): boolean { if (a.length !== b.length) return false; let diff = 0; diff --git a/packages/sandbox/daemon/constants.ts b/packages/sandbox/daemon/constants.ts index c9c3aa16ff..d89378dbc5 100644 --- a/packages/sandbox/daemon/constants.ts +++ b/packages/sandbox/daemon/constants.ts @@ -1,6 +1,14 @@ import { IFRAME_BOOTSTRAP_SCRIPT } from "../shared"; -export const MAX_SSE_CLIENTS = 10; +// Per-daemon SSE subscriber cap. Each browser tab opens one +// /api/.../vm-events SSE upstream to the daemon's /_decopilot_vm/events, +// and any failed reconnect quickly stacks: with 10 the cluster's retry +// storm on cold-start (auto-resume vm-events SSE) blew the budget and +// the daemon started returning 429, which the cluster surfaced as +// `Upstream daemon SSE failed (429)` and the UI got stuck on "Starting +// sandbox…". 100 absorbs a typical dev session's reconnect churn +// (3–5 tabs × a few retries each) with headroom. +export const MAX_SSE_CLIENTS = 100; // Per-source ring buffer cap. Real install logs (clone + npm/bun install on a // nontrivial repo) are easily 50–200 KB; with the prior 4 KB cap, late SSE // joiners only saw the last few package-manager lines. 256 KB covers a diff --git a/packages/sandbox/daemon/entry.ts b/packages/sandbox/daemon/entry.ts index 3d808f714b..5e44ab45cb 100644 --- a/packages/sandbox/daemon/entry.ts +++ b/packages/sandbox/daemon/entry.ts @@ -2,7 +2,7 @@ import { randomUUID } from "node:crypto"; import { mkdirSync } from "node:fs"; import { join } from "node:path"; import { bumpActivity } from "./activity"; -import { requireToken } from "./auth"; +import { requireHmacOrToken } from "./auth"; import { TenantConfigStore } from "./config-store"; import { REPLAY_BYTES } from "./constants"; import { Broadcaster } from "./events/broadcast"; @@ -23,6 +23,19 @@ import { makeConfigReadHandler, makeConfigUpdateHandler, } from "./routes/config"; +import { handleCancelRequest, handleDispatchRequest } from "./routes/dispatch"; +// Import CLI factories from their subpaths (rather than the barrel +// `apps/mesh/src/harnesses/index.ts`) to avoid pulling in the cluster-only +// `decopilotHarnessFactory` and its dependency tree (which references +// `@/...` path aliases that only exist under apps/mesh). +import { claudeCodeHarnessFactory } from "../../../apps/mesh/src/harnesses/claude-code"; +import { codexHarnessFactory } from "../../../apps/mesh/src/harnesses/codex"; +import type { + HarnessContext, + HarnessFactory, + HarnessStreamInput, +} from "../../../apps/mesh/src/harnesses/types"; +import { metrics, trace } from "@opentelemetry/api"; import { makeEventsHandler } from "./routes/events-stream"; import { makeExecHandler } from "./routes/exec"; import { @@ -72,6 +85,15 @@ const bootConfig = { appRoot: APP_ROOT, repoDir: join(APP_ROOT, "repo"), proxyPort: parseInt(resolvedDaemonPort, 10), + // HMAC secret used to authenticate the cluster's harness-dispatch + // calls (POST /_decopilot_vm/dispatch + DELETE /_decopilot_vm/runs/:id). + // Equals the link's `linkSecret` from the cluster's `LinkRegistry`. + // Spawned by the `deco link` daemon for the laptop case; absent on legacy + // cluster-side runner daemons that don't accept remote harness + // dispatch — in that case the dispatch routes refuse all requests + // (HMAC verification with an empty secret never matches a valid + // signature). + linkSecret: process.env.DAEMON_LINK_SECRET ?? "", }; // Ensure repoDir exists so bash commands with the default cwd don't fail with // ENOENT when no repo has been cloned yet (tool-only sandboxes, no-repo agents). @@ -332,6 +354,67 @@ const eventsH = makeEventsHandler({ const idleH = makeIdleHandler(); const proxyH = makeProxyHandler({ broadcaster, getDevPort }); + +// ─── Remote harness dispatch ─────────────────────────────────────────── +// Authenticated by HMAC against `bootConfig.linkSecret` (see comment on +// the field above). The cluster's `remoteDispatch` posts a signed +// HarnessStreamInput here; the daemon spawns the named factory's CLI +// in-process and streams `UIMessageChunk` back as SSE. +// +// Only the CLI factories live in the daemon — decopilot pulls in +// cluster-only modules (RunRegistry, run-stream internals) and is never +// invoked over the wire. +const dispatchHarnessRegistry: Map = new Map([ + ["claude-code", claudeCodeHarnessFactory], + ["codex", codexHarnessFactory], +]); +const dispatchTracer = trace.getTracer("link-daemon"); +const dispatchMeter = metrics.getMeter("link-daemon"); +// Per-process replay-protection cache. 100k nonces ≈ a few MB; the +// cluster's signing layer rotates nonces per request so the set fills +// slowly under legitimate traffic. Shared across dispatch/cancel AND +// the dual-auth (HMAC-or-bearer) guard on the rest of /_decopilot_vm/* +// — collision is computationally infeasible since the nonce is part of +// the signed string. +// +// Note: `verifyRequest` consults `seenNonce` BEFORE the HMAC comparison +// (see apps/mesh/src/links/protocol/hmac), so an unauthenticated attacker +// reaching the daemon can flood the cache with chosen nonces. The +// bounded Set + FIFO eviction means the attacker pays the same per- +// request cost as legitimate clients — irritating, not exploitable — +// but the surface is wider than just "compromised-secret traffic." +const vmNonces = new Set(); +const seenVmNonce = (nonce: string): boolean => { + if (vmNonces.has(nonce)) return true; + if (vmNonces.size >= 100_000) { + // Drop the iterator's first entry (insertion order — oldest). + const first = vmNonces.values().next().value; + if (first) vmNonces.delete(first); + } + vmNonces.add(nonce); + return false; +}; +const lookupDispatchHarness = (id: string, input: unknown) => { + const factory = dispatchHarnessRegistry.get(id); + if (!factory) throw new Error(`unknown harness: ${id}`); + // Build a minimal HarnessContext. CLI harnesses don't read storage, + // db, vault, or aiProviders — they only need tracer/meter for OTel + // and metadata for span attributes. The cluster's richer MeshContext + // is structurally compatible with this shape (see + // `apps/mesh/src/core/harness-context.ts`). + const harnessInput = input as HarnessStreamInput; + const ctx: HarnessContext = { + tracer: dispatchTracer, + meter: dispatchMeter, + metadata: { + threadId: harnessInput.threadId, + orgId: harnessInput.organizationId, + userId: harnessInput.user?.id, + }, + }; + const harness = factory.create(ctx); + return { stream: () => harness.stream(harnessInput) }; +}; const wsProxy = makeWsUpgrader(getDevPort, { onClientMessage: bumpActivity }); const configReadH = makeConfigReadHandler({ @@ -448,26 +531,58 @@ const CORS_HEADERS = { "Content-Type, Accept, Cache-Control, Authorization", }; -function vmRouteH( +async function vmRouteH( req: Request, method: string, vmPath: string, -): Response | Promise { +): Promise { if (method === "GET" && vmPath === "/idle") return idleH(); if (method === "GET" && vmPath === "/events") return eventsH(); if (method === "GET" && vmPath === "/scripts") return scriptsHandler(); if (method === "OPTIONS") return new Response(null, { status: 204, headers: CORS_HEADERS }); - const denied = requireToken(req, bootConfig.daemonToken); + // Harness dispatch + cancel are HMAC-only and read their own body / + // signed path (the cluster signs against the pre-strip path forwarded + // via X-Forwarded-Path). Keep them inline so we don't double-read the + // body or short-circuit their own signature checks. + if (method === "POST" && vmPath === "/dispatch") { + return handleDispatchRequest(req, { + bearerSecret: bootConfig.linkSecret, + lookupHarness: lookupDispatchHarness, + seenNonce: seenVmNonce, + }); + } + if (method === "DELETE" && vmPath.startsWith("/runs/")) { + return handleCancelRequest(req, { + bearerSecret: bootConfig.linkSecret, + seenNonce: seenVmNonce, + }); + } + + // Buffer body once so HMAC can sign over it; re-inject as a fresh + // Request for downstream handlers that re-read. + const hasBody = method !== "GET" && method !== "HEAD" && method !== "DELETE"; + const body = hasBody ? await req.text() : ""; + const path = `/_decopilot_vm${vmPath}`; + + const denied = requireHmacOrToken(req, path, body, { + linkSecret: bootConfig.linkSecret, + daemonToken: bootConfig.daemonToken, + seenNonce: seenVmNonce, + }); if (denied) return denied; - if (vmPath === "/config") return configH(req); - if (vmPath.startsWith("/tasks")) return tasksRouteH(req, method, vmPath); + const rebuilt = hasBody + ? new Request(req.url, { method, headers: req.headers, body }) + : req; + + if (vmPath === "/config") return configH(rebuilt); + if (vmPath.startsWith("/tasks")) return tasksRouteH(rebuilt, method, vmPath); if (method === "POST" && vmPath in setupH) return setupH[vmPath](); - if (method === "POST" && vmPath in fsH) return fsH[vmPath](req); + if (method === "POST" && vmPath in fsH) return fsH[vmPath](rebuilt); if (method === "POST" && vmPath.startsWith("/exec/")) - return execRouteH(req, vmPath); + return execRouteH(rebuilt, vmPath); return jsonResponse({ error: `Not found: /_decopilot_vm${vmPath}` }, 404); } diff --git a/packages/sandbox/daemon/routes/dispatch.test.ts b/packages/sandbox/daemon/routes/dispatch.test.ts new file mode 100644 index 0000000000..49c84ade19 --- /dev/null +++ b/packages/sandbox/daemon/routes/dispatch.test.ts @@ -0,0 +1,173 @@ +import { describe, expect, it } from "bun:test"; +import { + fixtures, + signRequest, +} from "../../../../apps/mesh/src/links/protocol"; +import { handleCancelRequest, handleDispatchRequest } from "./dispatch"; + +const SECRET = "test-secret-32-bytes-padding-padding-padding"; + +function makeFakeHarness() { + return { + async *stream() { + yield { type: "start", id: "m1" } as const; + yield { type: "text-delta", id: "m1", delta: "hello" } as const; + yield { type: "finish", finishReason: "stop" } as const; + }, + }; +} + +function makeDeps( + overrides: Partial[1]> = {}, +) { + return { + bearerSecret: SECRET, + lookupHarness: () => makeFakeHarness(), + seenNonce: () => false, + ...overrides, + }; +} + +function signedDispatch(body: string) { + const sig = signRequest({ + secret: SECRET, + method: "POST", + path: "/_decopilot_vm/dispatch", + body, + }); + return new Request("http://localhost/_decopilot_vm/dispatch", { + method: "POST", + body, + headers: { ...sig, "Content-Type": "application/json" }, + }); +} + +function signedCancel(runId: string) { + const path = `/_decopilot_vm/runs/${runId}`; + const sig = signRequest({ secret: SECRET, method: "DELETE", path, body: "" }); + return new Request(`http://localhost${path}`, { + method: "DELETE", + headers: { ...sig }, + }); +} + +async function readSSE(res: Response): Promise { + const text = await res.text(); + return text + .split("\n\n") + .filter((s) => s.startsWith("data: ")) + .map((s) => s.slice("data: ".length)); +} + +describe("POST /_decopilot_vm/dispatch", () => { + it("emits the harness's UIMessageChunks as SSE", async () => { + const body = JSON.stringify({ + harnessId: "fake", + input: { ...fixtures.FIXTURE_MINIMAL_INPUT, runId: "run-dispatch-1" }, + }); + const res = await handleDispatchRequest(signedDispatch(body), makeDeps()); + expect(res.status).toBe(200); + expect(res.headers.get("content-type")).toContain("text/event-stream"); + + const events = await readSSE(res); + expect(events.length).toBeGreaterThan(0); + expect(events.some((e) => e.includes('"type":"ui-message-chunk"'))).toBe( + true, + ); + expect(events.at(-1)).toBe('{"type":"done"}'); + }); + + it("rejects an unsigned request", async () => { + const req = new Request("http://x/_decopilot_vm/dispatch", { + method: "POST", + body: "{}", + }); + const res = await handleDispatchRequest(req, makeDeps()); + expect(res.status).toBe(401); + }); + + it("rejects a bad signature", async () => { + const body = JSON.stringify({ harnessId: "fake", input: {} }); + const sig = signRequest({ + secret: "wrong-secret-32-bytes-paddingpadding", + method: "POST", + path: "/_decopilot_vm/dispatch", + body, + }); + const req = new Request("http://localhost/_decopilot_vm/dispatch", { + method: "POST", + body, + headers: { ...sig, "Content-Type": "application/json" }, + }); + const res = await handleDispatchRequest(req, makeDeps()); + expect(res.status).toBe(401); + }); + + it("returns 400 on invalid input shape", async () => { + const body = JSON.stringify({ harnessId: "fake", input: { bogus: true } }); + const res = await handleDispatchRequest(signedDispatch(body), makeDeps()); + expect(res.status).toBe(400); + }); + + it("returns 410 Gone for a tombstoned runId (cancel-before-dispatch)", async () => { + // Cancel first — this writes a tombstone. + const runId = "run-tombstone-1"; + const cancelRes = await handleCancelRequest(signedCancel(runId), { + bearerSecret: SECRET, + seenNonce: () => false, + }); + expect(cancelRes.status).toBe(204); + + // Subsequent dispatch with the same runId should be rejected. + const body = JSON.stringify({ + harnessId: "fake", + input: { ...fixtures.FIXTURE_MINIMAL_INPUT, runId }, + }); + const res = await handleDispatchRequest(signedDispatch(body), makeDeps()); + expect(res.status).toBe(410); + }); + + it("wraps harness errors as an error SSE event followed by done", async () => { + const harnessId = "throws"; + const body = JSON.stringify({ + harnessId, + input: { ...fixtures.FIXTURE_MINIMAL_INPUT, runId: "run-error-1" }, + }); + const deps = makeDeps({ + lookupHarness: () => ({ + async *stream() { + throw new Error("boom"); + // biome-ignore lint/correctness/useYield: unreachable + yield 0 as never; + }, + }), + }); + const res = await handleDispatchRequest(signedDispatch(body), deps); + expect(res.status).toBe(200); + const events = await readSSE(res); + const errorEvent = events.find((e) => e.includes('"type":"error"')); + expect(errorEvent).toBeDefined(); + expect(errorEvent).toContain("boom"); + expect(events.at(-1)).toBe('{"type":"done"}'); + }); +}); + +describe("DELETE /_decopilot_vm/runs/:runId", () => { + it("returns 204 even for an unknown runId (idempotent)", async () => { + const res = await handleCancelRequest(signedCancel("run-unknown-1"), { + bearerSecret: SECRET, + seenNonce: () => false, + }); + expect(res.status).toBe(204); + }); + + it("rejects an unsigned cancel", async () => { + const path = "/_decopilot_vm/runs/run-x"; + const req = new Request(`http://x${path}`, { method: "DELETE" }); + const res = await handleCancelRequest(req, { + bearerSecret: SECRET, + seenNonce: () => false, + }); + expect(res.status).toBe(401); + }); +}); diff --git a/packages/sandbox/daemon/routes/dispatch.ts b/packages/sandbox/daemon/routes/dispatch.ts new file mode 100644 index 0000000000..8b878b2b04 --- /dev/null +++ b/packages/sandbox/daemon/routes/dispatch.ts @@ -0,0 +1,232 @@ +/** + * `POST /_decopilot_vm/dispatch` + `DELETE /_decopilot_vm/runs/:runId`. + * + * Authenticated by HMAC against the daemon's `linkSecret` (the same + * value the cluster computed for the registered link entry — see + * `apps/mesh/src/links/protocol`'s `signRequest` / `verifyRequest`). The + * cluster-side caller is `remoteDispatch` in + * `apps/mesh/src/harnesses/remote-dispatch.ts`; both sides parse SSE + * events from `dispatchSSEEventSchema`. + * + * Dispatch flow: + * 1. Verify HMAC. + * 2. Decode + Zod-validate the body. + * 3. Refuse if the runId is currently tombstoned (cancel-before-dispatch). + * 4. Register an AbortController keyed by runId so a later DELETE + * aborts the in-flight harness loop. + * 5. Stream `UIMessageChunk` from the harness as SSE + * `data: {"type":"ui-message-chunk","chunk":...}\n\n` events. + * 6. Wrap any harness throw as `{type:"error", code, message}` then + * always emit `{type:"done"}` and close the stream. + * + * Cancel is idempotent (204 for unknown ids) and writes a 60s tombstone + * so a dispatch racing in after the cancel still resolves to 410 Gone. + */ + +import { + dispatchSSEEventSchema, + harnessStreamInputSchema, + verifyRequest, + type DispatchSSEEvent, +} from "../../../../apps/mesh/src/links/protocol"; + +/** Minimal harness shape the dispatch route needs. Decoupled from the + * harness factories in `apps/mesh/src/harnesses` so the route file (and + * its tests) don't need to import a full harness factory. The daemon's + * `lookupHarness` injection adapts a real factory to this shape. */ +export interface DispatchHarness { + stream: () => AsyncIterable; +} + +export interface DispatchDeps { + /** Daemon-side bearer secret. Equals the link's `linkSecret` from the + * cluster's `LinkRegistry` — the cluster signs with it, the daemon + * verifies with it. */ + bearerSecret: string; + /** Look up a harness factory by id and instantiate it for this run. + * Throws if the id is unknown. */ + lookupHarness: (id: string, input: unknown) => DispatchHarness; + /** Nonce-replay guard. Returning `true` means the nonce was already + * used and should be rejected. The caller owns the cache (sized, + * TTL'd, etc.) — the route itself doesn't decide policy. */ + seenNonce: (nonce: string) => boolean; +} + +export interface CancelDeps { + bearerSecret: string; + seenNonce: (nonce: string) => boolean; +} + +const TOMBSTONE_MS = 60_000; + +/** AbortController per active dispatch, keyed by runId. The cancel route + * signals through this so the harness's `for await` loop breaks. */ +const activeRuns = new Map(); + +/** Recently-cancelled runIds. A dispatch that arrives within + * `TOMBSTONE_MS` of a cancel is rejected with 410 Gone — this resolves + * the cancel-before-dispatch race that otherwise leaves an orphan + * harness running on the laptop. */ +const tombstones = new Map(); + +/** Test-visible hook to reset module state between tests. The harness + * tests in this file share the module-scoped maps; resetting prevents + * cross-test pollution. */ +export function resetDispatchStateForTests(): void { + activeRuns.clear(); + tombstones.clear(); +} + +export async function handleDispatchRequest( + req: Request, + deps: DispatchDeps, +): Promise { + const body = await req.text(); + const url = new URL(req.url); + + // When fronted by the link's reverse proxy, the cluster signed the + // request against the PRE-strip path (e.g. /_sandbox//_decopilot_vm/dispatch), + // not the post-strip path the daemon sees. The proxy forwards the + // original via X-Forwarded-Path; fall back to url.pathname for direct + // (non-proxied) callers like loopback tests. + const signedPath = req.headers.get("x-forwarded-path") ?? url.pathname; + const verification = verifyRequest({ + secret: deps.bearerSecret, + method: req.method, + path: signedPath, + body, + headers: Object.fromEntries(req.headers), + seenNonce: deps.seenNonce, + }); + if (!verification.valid) { + return new Response(JSON.stringify({ error: verification.reason }), { + status: 401, + headers: { "content-type": "application/json" }, + }); + } + + let parsed: { harnessId: unknown; input: unknown }; + try { + parsed = JSON.parse(body) as { harnessId: unknown; input: unknown }; + } catch { + return new Response(JSON.stringify({ error: "bad_json" }), { + status: 400, + headers: { "content-type": "application/json" }, + }); + } + if (typeof parsed.harnessId !== "string") { + return new Response(JSON.stringify({ error: "missing_harness_id" }), { + status: 400, + headers: { "content-type": "application/json" }, + }); + } + const inputParse = harnessStreamInputSchema.safeParse(parsed.input); + if (!inputParse.success) { + return new Response( + JSON.stringify({ error: "bad_input", detail: inputParse.error.message }), + { status: 400, headers: { "content-type": "application/json" } }, + ); + } + const input = inputParse.data; + + // Tombstone check — a cancel landed before this dispatch did. Decline + // and let the cluster surface a clear cancellation instead of starting + // a CLI process that will be immediately torn down. + const tombstoneExpiry = tombstones.get(input.runId); + if (tombstoneExpiry && tombstoneExpiry > Date.now()) { + return new Response(JSON.stringify({ error: "tombstoned" }), { + status: 410, + headers: { "content-type": "application/json" }, + }); + } else if (tombstoneExpiry) { + // Expired entry — clean up opportunistically. + tombstones.delete(input.runId); + } + + const ctrl = new AbortController(); + activeRuns.set(input.runId, ctrl); + + let harness: DispatchHarness; + try { + harness = deps.lookupHarness(parsed.harnessId, input); + } catch (err) { + activeRuns.delete(input.runId); + return new Response( + JSON.stringify({ + error: "unknown_harness", + detail: err instanceof Error ? err.message : String(err), + }), + { status: 400, headers: { "content-type": "application/json" } }, + ); + } + + const encoder = new TextEncoder(); + const sseStream = new ReadableStream({ + async start(controller) { + const write = (event: DispatchSSEEvent) => { + controller.enqueue( + encoder.encode(`data: ${JSON.stringify(event)}\n\n`), + ); + }; + try { + for await (const chunk of harness.stream()) { + if (ctrl.signal.aborted) break; + write({ type: "ui-message-chunk", chunk }); + } + } catch (err) { + write({ + type: "error", + code: "harness_crashed", + message: err instanceof Error ? err.message : String(err), + }); + } finally { + write({ type: "done" }); + activeRuns.delete(input.runId); + controller.close(); + } + }, + }); + + return new Response(sseStream, { + status: 200, + headers: { + "content-type": "text/event-stream", + "cache-control": "no-store", + connection: "keep-alive", + }, + }); +} + +export async function handleCancelRequest( + req: Request, + deps: CancelDeps, +): Promise { + const url = new URL(req.url); + const match = url.pathname.match(/\/runs\/([^/]+)$/); + if (!match) return new Response(null, { status: 404 }); + const runId = match[1]!; + + const signedPath = req.headers.get("x-forwarded-path") ?? url.pathname; + const v = verifyRequest({ + secret: deps.bearerSecret, + method: req.method, + path: signedPath, + body: "", + headers: Object.fromEntries(req.headers), + seenNonce: deps.seenNonce, + }); + if (!v.valid) return new Response(null, { status: 401 }); + + const ctrl = activeRuns.get(runId); + if (ctrl) ctrl.abort(); + tombstones.set(runId, Date.now() + TOMBSTONE_MS); + + // Idempotent: 204 whether or not the runId was active. This mirrors + // the cluster's `remoteDispatch` cancel — it fires DELETE on consumer + // abort regardless of whether the daemon ever saw the runId. + return new Response(null, { status: 204 }); +} + +// Schema re-exports so the daemon's main handler can validate without +// reaching back into `apps/mesh/src/links/protocol` directly. +export { dispatchSSEEventSchema, harnessStreamInputSchema }; diff --git a/packages/sandbox/package.json b/packages/sandbox/package.json index a679d232a6..6514c8ae86 100644 --- a/packages/sandbox/package.json +++ b/packages/sandbox/package.json @@ -11,8 +11,12 @@ }, "exports": { "./shared": "./shared.ts", - "./runner": "./server/runner/index.ts", - "./runner/agent-sandbox": "./server/runner/agent-sandbox/index.ts" + "./provider": "./server/provider/index.ts", + "./provider/agent-sandbox": "./server/provider/agent-sandbox/index.ts", + "./daemon-spawn": "./server/daemon-spawn.ts", + "./provider/remote-user": "./server/provider/remote-user/index.ts", + "./daemon-client": "./server/daemon-client.ts", + "./daemon/routes/dispatch": "./daemon/routes/dispatch.ts" }, "dependencies": { "@kubernetes/client-node": "^1.4.0", diff --git a/packages/sandbox/server/runner/host/daemon-asset.ts b/packages/sandbox/server/daemon-asset.ts similarity index 58% rename from packages/sandbox/server/runner/host/daemon-asset.ts rename to packages/sandbox/server/daemon-asset.ts index 7b197abe3d..395651d5ee 100644 --- a/packages/sandbox/server/runner/host/daemon-asset.ts +++ b/packages/sandbox/server/daemon-asset.ts @@ -1,21 +1,20 @@ /** * Embeds the prebuilt sandbox daemon bundle as a string at build time. * - * Isolated in its own file so `host/runner.ts` can `await import()` it + * Isolated in its own file so `daemon-spawn.ts` can `await import()` it * lazily — that way tests using the `_spawn` test seam never trigger the * text-import resolution and don't require `daemon/dist/daemon.js` to * exist on disk. * * In production (bundled `server.js`), `bun build` inlines the daemon - * bytes here so no asset has to ship alongside the bundle. The host - * runner writes these bytes to disk on first spawn and points - * `bun run` at the materialized file — see `host/runner.ts`. + * bytes here so no asset has to ship alongside the bundle. The + * `materializeDaemonBundle` helper writes these bytes to disk on first + * spawn and points `bun run` at the materialized file — see + * `daemon-spawn.ts`. */ // @ts-expect-error - Bun-specific text loader attribute; TS resolves the // underlying .js file and doesn't model `with { type: "text" }`. -import _daemonBundle from "../../../daemon/dist/daemon.js" with { - type: "text", -}; +import _daemonBundle from "../daemon/dist/daemon.js" with { type: "text" }; export const DAEMON_BUNDLE: string = _daemonBundle; diff --git a/packages/sandbox/server/daemon-client.ts b/packages/sandbox/server/daemon-client.ts index 134f525b6e..d88ccf28dd 100644 --- a/packages/sandbox/server/daemon-client.ts +++ b/packages/sandbox/server/daemon-client.ts @@ -6,7 +6,7 @@ import type { ConfigPatch } from "../daemon/config-store/types"; import type { TenantConfig } from "../daemon/types"; import { sleep } from "../shared"; -import type { ExecInput, ExecOutput } from "./runner/types"; +import type { ExecInput, ExecOutput } from "./provider/types"; export type { ConfigPatch }; diff --git a/packages/sandbox/server/daemon-spawn.ts b/packages/sandbox/server/daemon-spawn.ts new file mode 100644 index 0000000000..081319ea9c --- /dev/null +++ b/packages/sandbox/server/daemon-spawn.ts @@ -0,0 +1,120 @@ +/** + * Shared daemon spawn / executable resolution. + * + * Used by the `deco link` daemon (laptop-side, where the link binary + * fronts the sandbox daemon for the cluster's remote-harness dispatcher). + * + * In dev (source tree present), spawn `bun run ` so the + * daemon code reloads on file change without a build step. + * + * In production (the link binary, or `bunx decocms@latest`), the source + * TS path resolves to a nonexistent `/.../daemon/entry.ts` — + * we materialize the embedded bundle (loaded lazily from + * `daemon-asset.ts`) into `/.deco/cache/sandbox-daemon-.js` + * and spawn that. + * + * `node-pty` is a runtime dep of the daemon. Its install location lives + * inside the parent's node_modules tree, but the materialized bundle + * sits in DATA_DIR — bun won't find node-pty by walking up from there. + * `resolveNodePtyNodeModulesDir` returns the directory to expose via + * NODE_PATH so the spawned daemon can `import "node-pty"`. + */ + +import { createHash } from "node:crypto"; +import { existsSync } from "node:fs"; +import { mkdir, rename, writeFile } from "node:fs/promises"; +import { join, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +export interface DaemonProcess { + pid: number; + kill: (signal?: NodeJS.Signals | number) => boolean; + /** Bun's Subprocess.exited — resolves with the exit code when the + * process terminates. */ + exited: Promise; +} + +export interface SpawnDaemonInput { + workdir: string; + env: Record; + /** Port the daemon should bind to (PROXY_PORT). */ + daemonPort: number; +} + +export type SpawnDaemonFn = (input: SpawnDaemonInput) => Promise; + +export function resolveSourceDaemonPath(): string { + return resolve(fileURLToPath(new URL("../daemon/entry.ts", import.meta.url))); +} + +export function resolveNodePtyNodeModulesDir(): string { + const ptyEntry = Bun.resolveSync("node-pty", import.meta.dir); + const marker = "/node_modules/"; + const idx = ptyEntry.lastIndexOf(marker); + if (idx < 0) { + throw new Error( + `could not derive node_modules path from node-pty resolution: ${ptyEntry}`, + ); + } + return ptyEntry.slice(0, idx + marker.length - 1); +} + +export async function materializeDaemonBundle( + homeDir: string, +): Promise { + const { DAEMON_BUNDLE } = await import("./daemon-asset"); + const hash = createHash("sha256") + .update(DAEMON_BUNDLE) + .digest("hex") + .slice(0, 16); + const cacheDir = join(homeDir, ".deco", "cache"); + const cachePath = join(cacheDir, `sandbox-daemon-${hash}.js`); + if (existsSync(cachePath)) return cachePath; + await mkdir(cacheDir, { recursive: true }); + // Write atomically — concurrent spawns racing to materialize the same + // hashed file are tolerated because `rename` is atomic on POSIX. + const tmpPath = `${cachePath}.${process.pid}.tmp`; + await writeFile(tmpPath, DAEMON_BUNDLE); + await rename(tmpPath, cachePath); + return cachePath; +} + +export async function resolveDaemonExec(homeDir: string): Promise { + const sourceTs = resolveSourceDaemonPath(); + if (existsSync(sourceTs)) return sourceTs; + return materializeDaemonBundle(homeDir); +} + +/** + * Default Bun.spawn-based daemon launcher. `homeDir` is the DATA_DIR + * root used to materialize the daemon bundle when running from a bundle. + */ +export function createDefaultDaemonSpawn(homeDir: string): SpawnDaemonFn { + return async (args) => { + const daemonExec = await resolveDaemonExec(homeDir); + const ptyNodeModulesDir = resolveNodePtyNodeModulesDir(); + const existingNodePath = process.env.NODE_PATH; + const nodePath = existingNodePath + ? `${ptyNodeModulesDir}:${existingNodePath}` + : ptyNodeModulesDir; + const proc = Bun.spawn({ + cmd: ["bun", "run", daemonExec], + env: { + ...process.env, + NODE_PATH: nodePath, + ...args.env, + }, + stdout: "inherit", + stderr: "inherit", + stdin: "ignore", + }); + return { + pid: proc.pid, + kill: (sig) => { + proc.kill(sig as NodeJS.Signals | number | undefined); + return true; + }, + exited: proc.exited, + }; + }; +} diff --git a/packages/sandbox/server/docker-cli.ts b/packages/sandbox/server/docker-cli.ts index db9a90db34..040def8442 100644 --- a/packages/sandbox/server/docker-cli.ts +++ b/packages/sandbox/server/docker-cli.ts @@ -65,7 +65,7 @@ export interface StartContainerOptions { timeoutMs?: number; /** Short label used in error messages. */ label: string; - /** Override for test-mode `exec` injection from DockerSandboxRunner. */ + /** Override for test-mode `exec` injection from DockerSandboxProvider. */ exec?: DockerExecFn; } diff --git a/packages/sandbox/server/runner/agent-sandbox/client.test.ts b/packages/sandbox/server/provider/agent-sandbox/client.test.ts similarity index 100% rename from packages/sandbox/server/runner/agent-sandbox/client.test.ts rename to packages/sandbox/server/provider/agent-sandbox/client.test.ts diff --git a/packages/sandbox/server/runner/agent-sandbox/client.ts b/packages/sandbox/server/provider/agent-sandbox/client.ts similarity index 100% rename from packages/sandbox/server/runner/agent-sandbox/client.ts rename to packages/sandbox/server/provider/agent-sandbox/client.ts diff --git a/packages/sandbox/server/runner/agent-sandbox/constants.ts b/packages/sandbox/server/provider/agent-sandbox/constants.ts similarity index 100% rename from packages/sandbox/server/runner/agent-sandbox/constants.ts rename to packages/sandbox/server/provider/agent-sandbox/constants.ts diff --git a/packages/sandbox/server/runner/agent-sandbox/index.ts b/packages/sandbox/server/provider/agent-sandbox/index.ts similarity index 88% rename from packages/sandbox/server/runner/agent-sandbox/index.ts rename to packages/sandbox/server/provider/agent-sandbox/index.ts index 6b15e084a2..d221774cfa 100644 --- a/packages/sandbox/server/runner/agent-sandbox/index.ts +++ b/packages/sandbox/server/provider/agent-sandbox/index.ts @@ -19,8 +19,8 @@ export type { SandboxCondition, SandboxResource, } from "./client"; -export { AgentSandboxRunner } from "./runner"; -export type { AgentSandboxRunnerOptions } from "./runner"; +export { AgentSandboxProvider } from "./runner"; +export type { AgentSandboxProviderOptions } from "./runner"; // Lifecycle types live in their own module (no K8s deps) so type-only // consumers — notably the studio web bundle — can import them safely. export type { ClaimFailureReason, ClaimPhase } from "./lifecycle-types"; diff --git a/packages/sandbox/server/runner/agent-sandbox/lifecycle-types.ts b/packages/sandbox/server/provider/agent-sandbox/lifecycle-types.ts similarity index 68% rename from packages/sandbox/server/runner/agent-sandbox/lifecycle-types.ts rename to packages/sandbox/server/provider/agent-sandbox/lifecycle-types.ts index c22eb42865..d59c5b38af 100644 --- a/packages/sandbox/server/runner/agent-sandbox/lifecycle-types.ts +++ b/packages/sandbox/server/provider/agent-sandbox/lifecycle-types.ts @@ -1,8 +1,8 @@ /** * Re-export from the runner-root lifecycle types module. Kept for back-compat - * with consumers that import via `@decocms/sandbox/runner/agent-sandbox` + * with consumers that import via `@decocms/sandbox/provider/agent-sandbox` * (notably the studio web bundle's vm-events context). New code should import - * from `@decocms/sandbox/runner`. + * from `@decocms/sandbox/provider`. */ export type { ClaimFailureReason, ClaimPhase } from "../lifecycle-types"; diff --git a/packages/sandbox/server/runner/agent-sandbox/lifecycle-watcher.test.ts b/packages/sandbox/server/provider/agent-sandbox/lifecycle-watcher.test.ts similarity index 100% rename from packages/sandbox/server/runner/agent-sandbox/lifecycle-watcher.test.ts rename to packages/sandbox/server/provider/agent-sandbox/lifecycle-watcher.test.ts diff --git a/packages/sandbox/server/runner/agent-sandbox/lifecycle-watcher.ts b/packages/sandbox/server/provider/agent-sandbox/lifecycle-watcher.ts similarity index 100% rename from packages/sandbox/server/runner/agent-sandbox/lifecycle-watcher.ts rename to packages/sandbox/server/provider/agent-sandbox/lifecycle-watcher.ts diff --git a/packages/sandbox/server/runner/agent-sandbox/runner.ts b/packages/sandbox/server/provider/agent-sandbox/runner.ts similarity index 99% rename from packages/sandbox/server/runner/agent-sandbox/runner.ts rename to packages/sandbox/server/provider/agent-sandbox/runner.ts index 2b55988426..de412cb3e5 100644 --- a/packages/sandbox/server/runner/agent-sandbox/runner.ts +++ b/packages/sandbox/server/provider/agent-sandbox/runner.ts @@ -59,7 +59,7 @@ import type { ProxyRequestInit, Sandbox, SandboxId, - SandboxRunner, + SandboxProvider, Workload, } from "../types"; import { @@ -87,7 +87,7 @@ import { watchClaimLifecycle } from "./lifecycle-watcher"; import type { ClaimPhase } from "../lifecycle-types"; const RUNNER_KIND = "agent-sandbox" as const; -const LOG_LABEL = "AgentSandboxRunner"; +const LOG_LABEL = "AgentSandboxProvider"; // Shared-namespace topology for MVP; tenancy enforced by unguessable claim // names (sha256(userId:projectRef)). Per-org namespaces are deferred. @@ -259,7 +259,7 @@ interface PersistedK8sState { [k: string]: unknown; } -export interface AgentSandboxRunnerOptions { +export interface AgentSandboxProviderOptions { stateStore?: RunnerStateStore; previewUrlPattern?: string; /** Defaults to `new KubeConfig().loadFromDefault()`. Tests pass a stub. */ @@ -341,7 +341,7 @@ export interface AgentSandboxRunnerOptions { }; } -export class AgentSandboxRunner implements SandboxRunner { +export class AgentSandboxProvider implements SandboxProvider { readonly kind = RUNNER_KIND; private readonly records = new Map(); @@ -369,7 +369,7 @@ export class AgentSandboxRunner implements SandboxRunner { */ private readonly previewGateway: { name: string; namespace: string } | null; /** - * Non-null = warm-pool mode (see `AgentSandboxRunnerOptions.sentinelToken`). + * Non-null = warm-pool mode (see `AgentSandboxProviderOptions.sentinelToken`). * Treated as the bearer token for the *first* daemon contact only; * mesh rotates to a per-claim token via `auth.rotateToken` immediately * after, and persists the new token. Empty/whitespace strings are @@ -379,7 +379,7 @@ export class AgentSandboxRunner implements SandboxRunner { private readonly sentinelToken: string | null; private closed = false; - constructor(opts: AgentSandboxRunnerOptions = {}) { + constructor(opts: AgentSandboxProviderOptions = {}) { this.stateStore = opts.stateStore ?? null; this.previewUrlPattern = opts.previewUrlPattern ?? null; this.kubeConfig = opts.kubeConfig ?? loadDefaultKubeConfig(); @@ -404,7 +404,7 @@ export class AgentSandboxRunner implements SandboxRunner { this.sentinelToken = trimmedSentinel.length > 0 ? trimmedSentinel : null; } - // ---- SandboxRunner surface ------------------------------------------------ + // ---- SandboxProvider surface ------------------------------------------------ async ensure(id: SandboxId, opts: EnsureOptions = {}): Promise { // Branch is the slug source; absent when caller didn't pass `repo` @@ -1796,7 +1796,7 @@ function normalizeEnvName(raw: string | undefined): string | null { if (trimmed === "") return null; if (!ENV_NAME_RE.test(trimmed)) { throw new Error( - `AgentSandboxRunner: envName=${JSON.stringify(trimmed)} is not a valid DNS-label-safe environment name (lowercase alphanumeric or '-', starts with a letter, ends alphanumeric, ≤32 chars). Mesh sets this from STUDIO_ENV; check the studio chart's configMap.`, + `AgentSandboxProvider: envName=${JSON.stringify(trimmed)} is not a valid DNS-label-safe environment name (lowercase alphanumeric or '-', starts with a letter, ends alphanumeric, ≤32 chars). Mesh sets this from STUDIO_ENV; check the studio chart's configMap.`, ); } return trimmed; diff --git a/packages/sandbox/server/runner/docker/index.ts b/packages/sandbox/server/provider/docker/index.ts similarity index 78% rename from packages/sandbox/server/runner/docker/index.ts rename to packages/sandbox/server/provider/docker/index.ts index 02025e8727..f31acb1795 100644 --- a/packages/sandbox/server/runner/docker/index.ts +++ b/packages/sandbox/server/provider/docker/index.ts @@ -1,7 +1,7 @@ -export { DockerSandboxRunner } from "./runner"; +export { DockerSandboxProvider } from "./runner"; export type { DockerExec, - DockerRunnerOptions, + DockerProviderOptions, ExecResult, } from "./runner"; export { startLocalSandboxIngress } from "./local-ingress"; diff --git a/packages/sandbox/server/runner/docker/local-ingress.test.ts b/packages/sandbox/server/provider/docker/local-ingress.test.ts similarity index 97% rename from packages/sandbox/server/runner/docker/local-ingress.test.ts rename to packages/sandbox/server/provider/docker/local-ingress.test.ts index 55dff9244f..67d99c4b79 100644 --- a/packages/sandbox/server/runner/docker/local-ingress.test.ts +++ b/packages/sandbox/server/provider/docker/local-ingress.test.ts @@ -1,7 +1,7 @@ import { afterEach, describe, expect, it } from "bun:test"; import * as net from "node:net"; import type { AddressInfo } from "node:net"; -import type { DockerSandboxRunner } from "./runner"; +import type { DockerSandboxProvider } from "./runner"; import { startLocalSandboxIngress } from "./local-ingress"; // local-ingress is a raw TCP proxy (not fetch-based). Testing it end-to-end @@ -105,15 +105,15 @@ function sendRaw(port: number, bytes: string): Promise { function runnerFor( map: Record, -): DockerSandboxRunner { +): DockerSandboxProvider { return { resolveDevPort: async (h: string) => map[h]?.dev ?? null, resolveDaemonPort: async (h: string) => map[h]?.daemon ?? null, - } as unknown as DockerSandboxRunner; + } as unknown as DockerSandboxProvider; } async function startIngress( - getRunner: () => DockerSandboxRunner | null, + getRunner: () => DockerSandboxProvider | null, ): Promise<{ servers: net.Server[]; port: number }> { // port 0 → OS picks a free port; dodges EADDRINUSE + the retry loop. const servers = startLocalSandboxIngress(getRunner, 0); @@ -290,7 +290,7 @@ describe("startLocalSandboxIngress", () => { calls.push(`daemon:${h}`); return null; }, - } as unknown as DockerSandboxRunner; + } as unknown as DockerSandboxProvider; const { servers, port } = await startIngress(() => runner); currentServers = servers; diff --git a/packages/sandbox/server/runner/docker/local-ingress.ts b/packages/sandbox/server/provider/docker/local-ingress.ts similarity index 98% rename from packages/sandbox/server/runner/docker/local-ingress.ts rename to packages/sandbox/server/provider/docker/local-ingress.ts index 538e218acc..d9961e4c1b 100644 --- a/packages/sandbox/server/runner/docker/local-ingress.ts +++ b/packages/sandbox/server/provider/docker/local-ingress.ts @@ -14,7 +14,8 @@ const HEADERS_TERMINATOR = Buffer.from("\r\n\r\n"); /** * Structural view: any runner that can map a handle to a host-side daemon - * TCP port. Both DockerSandboxRunner and HostSandboxRunner implement this. + * TCP port. DockerSandboxProvider implements this; the retired + * HostSandboxProvider used to as well. */ export interface DaemonPortResolver { resolveDaemonPort(handle: string): Promise; diff --git a/packages/sandbox/server/runner/docker/runner.test.ts b/packages/sandbox/server/provider/docker/runner.test.ts similarity index 94% rename from packages/sandbox/server/runner/docker/runner.test.ts rename to packages/sandbox/server/provider/docker/runner.test.ts index 8e1fa780d5..8771b3c456 100644 --- a/packages/sandbox/server/runner/docker/runner.test.ts +++ b/packages/sandbox/server/provider/docker/runner.test.ts @@ -1,6 +1,6 @@ import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test"; import type { DockerExecFn, DockerResult } from "../../docker-cli"; -import { DockerSandboxRunner } from "./runner"; +import { DockerSandboxProvider } from "./runner"; import type { RunnerStateRecord, RunnerStateRecordWithId, @@ -193,13 +193,13 @@ const ID: SandboxId = { userId: "u_1", projectRef: "agent:o:v:main" }; // Tests // ----------------------------------------------------------------------------- -describe("DockerSandboxRunner.ensure() — fresh provision", () => { +describe("DockerSandboxProvider.ensure() — fresh provision", () => { it("runs container with hardening flags, reads ports, probes health, persists", async () => { const { exec, calls } = makeExec(defaultResponder); const store = makeStore(); installFetch(() => healthOkResponse()); - const runner = new DockerSandboxRunner({ + const runner = new DockerSandboxProvider({ image: "test-image:latest", exec, stateStore: store, @@ -278,7 +278,7 @@ describe("DockerSandboxRunner.ensure() — fresh provision", () => { const { exec, calls } = makeExec(defaultResponder); const store = makeStore(); installFetch(() => healthOkResponse()); - const runner = new DockerSandboxRunner({ + const runner = new DockerSandboxProvider({ image: "test-image:latest", exec, stateStore: store, @@ -296,7 +296,7 @@ describe("DockerSandboxRunner.ensure() — fresh provision", () => { }); }); -describe("DockerSandboxRunner.ensure() — adopt by label", () => { +describe("DockerSandboxProvider.ensure() — adopt by label", () => { it("adopts an existing labeled container by name without calling docker run", async () => { let runCount = 0; const expectedHandle = computeHandle(ID); @@ -329,7 +329,7 @@ describe("DockerSandboxRunner.ensure() — adopt by label", () => { const store = makeStore(); installFetch(() => healthOkResponse()); - const runner = new DockerSandboxRunner({ + const runner = new DockerSandboxProvider({ image: "test-image:latest", exec, stateStore: store, @@ -345,7 +345,7 @@ describe("DockerSandboxRunner.ensure() — adopt by label", () => { }); }); -describe("DockerSandboxRunner.ensure() — --name collision recovery", () => { +describe("DockerSandboxProvider.ensure() — --name collision recovery", () => { it("removes a colliding orphan container and retries the run", async () => { let runCalls = 0; let rmCalls = 0; @@ -374,7 +374,7 @@ describe("DockerSandboxRunner.ensure() — --name collision recovery", () => { const store = makeStore(); installFetch(() => healthOkResponse()); - const runner = new DockerSandboxRunner({ + const runner = new DockerSandboxProvider({ image: "test-image:latest", exec, stateStore: store, @@ -388,7 +388,7 @@ describe("DockerSandboxRunner.ensure() — --name collision recovery", () => { }); }); -describe("DockerSandboxRunner.ensure() — in-process dedupe", () => { +describe("DockerSandboxProvider.ensure() — in-process dedupe", () => { it("two concurrent ensure() calls share one docker run", async () => { let runCount = 0; const { exec, calls } = makeExec((args) => { @@ -408,7 +408,7 @@ describe("DockerSandboxRunner.ensure() — in-process dedupe", () => { // the runner spawns a real `docker build` (not honoring the injected exec) // and the test times out on the cold cache. Matches the pattern used by the // tests above. - const runner = new DockerSandboxRunner({ + const runner = new DockerSandboxProvider({ image: "test-image:latest", exec, }); @@ -421,7 +421,7 @@ describe("DockerSandboxRunner.ensure() — in-process dedupe", () => { }); }); -describe("DockerSandboxRunner.ensure() — resume from persisted state", () => { +describe("DockerSandboxProvider.ensure() — resume from persisted state", () => { it("uses persisted token/workdir, no docker run, alive + health ok", async () => { let runCount = 0; const { exec, calls } = makeExec((args) => { @@ -448,7 +448,7 @@ describe("DockerSandboxRunner.ensure() — resume from persisted state", () => { installFetch(() => healthOkResponse()); // /health ok - const runner = new DockerSandboxRunner({ exec, stateStore: store }); + const runner = new DockerSandboxProvider({ exec, stateStore: store }); const sandbox = await runner.ensure(ID); expect(sandbox.handle).toBe(persistedHandle); @@ -458,7 +458,7 @@ describe("DockerSandboxRunner.ensure() — resume from persisted state", () => { }); }); -describe("DockerSandboxRunner.ensure() — config bootstrap contract", () => { +describe("DockerSandboxProvider.ensure() — config bootstrap contract", () => { it("plumbs only daemon-identity env into the container, then POSTs repo + workload via /_decopilot_vm/config", async () => { const { exec, calls } = makeExec(defaultResponder); const fetchCalls: FetchCall[] = []; @@ -482,7 +482,7 @@ describe("DockerSandboxRunner.ensure() — config bootstrap contract", () => { return new Response("", { status: 204 }); }) as unknown as typeof fetch; - const runner = new DockerSandboxRunner({ + const runner = new DockerSandboxProvider({ image: "test-image:latest", exec, }); @@ -563,7 +563,7 @@ describe("DockerSandboxRunner.ensure() — config bootstrap contract", () => { return new Response("", { status: 204 }); }) as unknown as typeof fetch; - const runner = new DockerSandboxRunner({ + const runner = new DockerSandboxProvider({ image: "test-image:latest", exec, }); @@ -586,7 +586,7 @@ describe("DockerSandboxRunner.ensure() — config bootstrap contract", () => { }); }); -describe("DockerSandboxRunner.sweepOrphans()", () => { +describe("DockerSandboxProvider.sweepOrphans()", () => { it("stops every container returned by the ps filter", async () => { const stopCalls: string[] = []; const { exec } = makeExec((args) => { @@ -600,7 +600,7 @@ describe("DockerSandboxRunner.sweepOrphans()", () => { return defaultResponder(args); }); const store = makeStore(); - const runner = new DockerSandboxRunner({ exec, stateStore: store }); + const runner = new DockerSandboxProvider({ exec, stateStore: store }); const n = await runner.sweepOrphans(); @@ -628,14 +628,14 @@ describe("DockerSandboxRunner.sweepOrphans()", () => { } return defaultResponder(args); }); - const runner = new DockerSandboxRunner({ exec }); + const runner = new DockerSandboxProvider({ exec }); const n = await runner.sweepOrphans(); expect(n).toBe(3); expect(stopCalls.sort()).toEqual(["a", "b", "c"]); }); }); -describe("DockerSandboxRunner.delete()", () => { +describe("DockerSandboxProvider.delete()", () => { it("container stop and store.delete with record (no graceful dev-stop)", async () => { const stopCalls: string[] = []; const { exec } = makeExec((args) => { @@ -653,7 +653,7 @@ describe("DockerSandboxRunner.delete()", () => { : new Response("", { status: 204 }), ); - const runner = new DockerSandboxRunner({ + const runner = new DockerSandboxProvider({ image: "test-image:latest", exec, stateStore: store, @@ -685,7 +685,7 @@ describe("DockerSandboxRunner.delete()", () => { return defaultResponder(args); }); const store = makeStore(); - const runner = new DockerSandboxRunner({ exec, stateStore: store }); + const runner = new DockerSandboxProvider({ exec, stateStore: store }); installFetch(() => new Response("", { status: 204 })); const unknownHandle = "unknownhandle1234567890abcdef"; @@ -700,12 +700,12 @@ describe("DockerSandboxRunner.delete()", () => { }); }); -describe("DockerSandboxRunner — sanity: preview URL & port resolvers", () => { +describe("DockerSandboxProvider — sanity: preview URL & port resolvers", () => { it("composePreviewUrl uses pattern when workload provided; resolvers return ports", async () => { const { exec } = makeExec(defaultResponder); installFetch(() => healthOkResponse()); - const runner = new DockerSandboxRunner({ + const runner = new DockerSandboxProvider({ image: "test-image:latest", exec, previewUrlPattern: "https://preview.example.com/{handle}", diff --git a/packages/sandbox/server/runner/docker/runner.ts b/packages/sandbox/server/provider/docker/runner.ts similarity index 98% rename from packages/sandbox/server/runner/docker/runner.ts rename to packages/sandbox/server/provider/docker/runner.ts index dad3b4e15b..a7814fddff 100644 --- a/packages/sandbox/server/runner/docker/runner.ts +++ b/packages/sandbox/server/provider/docker/runner.ts @@ -40,7 +40,7 @@ import type { ProxyRequestInit, Sandbox, SandboxId, - SandboxRunner, + SandboxProvider, Workload, } from "../types"; import type { ClaimPhase } from "../lifecycle-types"; @@ -51,7 +51,7 @@ const LABEL_ID = "studio-sandbox.id"; const DEFAULT_DEV_PORT = 3000; const PORT_READBACK_ATTEMPTS = 15; const PORT_READBACK_INTERVAL_MS = 200; -const LOG_LABEL = "DockerSandboxRunner"; +const LOG_LABEL = "DockerSandboxProvider"; type PhaseLog = (msg: string, fields?: Record) => void; @@ -70,7 +70,7 @@ function makePhaseLog(scope: string): PhaseLog { export type ExecResult = DockerResult; export type DockerExec = DockerExecFn; -export interface DockerRunnerOptions { +export interface DockerProviderOptions { image?: string; exec?: DockerExecFn; stateStore?: RunnerStateStore; @@ -110,7 +110,7 @@ interface PersistedDockerState { [k: string]: unknown; } -export class DockerSandboxRunner implements SandboxRunner { +export class DockerSandboxProvider implements SandboxProvider { readonly kind = RUNNER_KIND; private readonly records = new Map(); @@ -121,7 +121,7 @@ export class DockerSandboxRunner implements SandboxRunner { private readonly stateStore: RunnerStateStore | null; private readonly previewUrlPattern: string | null; - constructor(opts: DockerRunnerOptions = {}) { + constructor(opts: DockerProviderOptions = {}) { this.defaultImage = opts.image ?? process.env.STUDIO_SANDBOX_IMAGE ?? DEFAULT_IMAGE; this.exec_ = opts.exec ?? dockerExec; @@ -130,7 +130,7 @@ export class DockerSandboxRunner implements SandboxRunner { this.previewUrlPattern = opts.previewUrlPattern ?? null; } - // ---- SandboxRunner surface ------------------------------------------------ + // ---- SandboxProvider surface ------------------------------------------------ async ensure(id: SandboxId, opts: EnsureOptions = {}): Promise { const labelId = hashSandboxId(id, 16); diff --git a/packages/sandbox/server/runner/docker/sweep.ts b/packages/sandbox/server/provider/docker/sweep.ts similarity index 87% rename from packages/sandbox/server/runner/docker/sweep.ts rename to packages/sandbox/server/provider/docker/sweep.ts index d7e644127c..6f12122c2d 100644 --- a/packages/sandbox/server/runner/docker/sweep.ts +++ b/packages/sandbox/server/provider/docker/sweep.ts @@ -1,15 +1,15 @@ /** * Docker-only sweeps. Other runners' sandboxes outlive mesh by design — a * polymorphic sweep would nuke user VMs on K8s rolling restart. So this - * lives on `DockerSandboxRunner`, not on the `SandboxRunner` interface. + * lives on `DockerSandboxProvider`, not on the `SandboxProvider` interface. */ -import { DockerSandboxRunner, type DockerRunnerOptions } from "./runner"; +import { DockerSandboxProvider, type DockerProviderOptions } from "./runner"; const BOOT_SWEEP_KEY = Symbol.for("mesh.sandbox.bootSweepDone"); export type SweepDockerOrphansOnBootOptions = Pick< - DockerRunnerOptions, + DockerProviderOptions, "labelPrefix" | "exec" >; @@ -27,7 +27,7 @@ export async function sweepDockerOrphansOnBoot( if (g[BOOT_SWEEP_KEY]) return; g[BOOT_SWEEP_KEY] = true; try { - const runner = new DockerSandboxRunner(opts); + const runner = new DockerSandboxProvider(opts); const n = await runner.sweepOrphans(); if (n > 0) { console.log(`[sandbox] Boot sweep: stopped ${n} stale container(s).`); @@ -46,7 +46,7 @@ export async function sweepDockerOrphansOnBoot( * single-pod-per-host (the only sane docker deployment shape today). */ export async function sweepDockerOrphansOnShutdown( - runner: DockerSandboxRunner | null, + runner: DockerSandboxProvider | null, ): Promise { if (!runner) return; console.log("[shutdown] Sweeping docker sandbox containers..."); diff --git a/packages/sandbox/server/provider/index.test.ts b/packages/sandbox/server/provider/index.test.ts new file mode 100644 index 0000000000..a832c20f91 --- /dev/null +++ b/packages/sandbox/server/provider/index.test.ts @@ -0,0 +1,40 @@ +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { resolveSandboxProviderKindFromEnv } from "./index"; + +describe("resolveSandboxProviderKindFromEnv", () => { + const ORIG = { ...process.env }; + beforeEach(() => { + delete process.env.STUDIO_SANDBOX_RUNNER; + }); + afterEach(() => { + process.env = { ...ORIG }; + }); + + it("defaults to 'remote-user' when nothing is configured", () => { + expect(resolveSandboxProviderKindFromEnv()).toBe("remote-user"); + }); + + it("honors explicit STUDIO_SANDBOX_RUNNER=docker", () => { + process.env.STUDIO_SANDBOX_RUNNER = "docker"; + expect(resolveSandboxProviderKindFromEnv()).toBe("docker"); + }); + + it("honors explicit STUDIO_SANDBOX_RUNNER=agent-sandbox", () => { + process.env.STUDIO_SANDBOX_RUNNER = "agent-sandbox"; + expect(resolveSandboxProviderKindFromEnv()).toBe("agent-sandbox"); + }); + + it("throws on unknown STUDIO_SANDBOX_RUNNER value", () => { + process.env.STUDIO_SANDBOX_RUNNER = "nonsense"; + expect(() => resolveSandboxProviderKindFromEnv()).toThrow( + /Unknown STUDIO_SANDBOX_RUNNER/, + ); + }); + + it("rejects the retired 'host' runner kind", () => { + process.env.STUDIO_SANDBOX_RUNNER = "host"; + expect(() => resolveSandboxProviderKindFromEnv()).toThrow( + /Unknown STUDIO_SANDBOX_RUNNER/, + ); + }); +}); diff --git a/packages/sandbox/server/provider/index.ts b/packages/sandbox/server/provider/index.ts new file mode 100644 index 0000000000..934abb5523 --- /dev/null +++ b/packages/sandbox/server/provider/index.ts @@ -0,0 +1,95 @@ +/** + * Public surface. Ships `DockerSandboxProvider` only via the default entry; + * agent-sandbox sits behind its own subpath export (./provider/agent-sandbox) + * because its SDK is heavy and not every deploy needs it. `remote-user` is + * constructed per-run from the acting user's link entry. + */ + +import { DockerSandboxProvider, type DockerProviderOptions } from "./docker"; +import type { RunnerStateStore } from "./state-store"; +import type { SandboxProviderKind, SandboxProvider } from "./types"; + +export type { + EnsureOptions, + ExecInput, + ExecOutput, + ProxyRequestInit, + SandboxProviderKind, + Sandbox, + SandboxId, + SandboxProvider, + Workload, +} from "./types"; +export type { ClaimFailureReason, ClaimPhase } from "./lifecycle-types"; +export { sandboxIdKey } from "./types"; +export { DockerSandboxProvider } from "./docker"; +export type { DockerExec, DockerProviderOptions, ExecResult } from "./docker"; +// Needed by mesh callers (decopilot dispatch-run) that compute handles +// directly. Re-exported here so consumers don't dig into shared/. +export { computeHandle } from "./shared"; +export { ensureSandboxImage } from "../image-build"; +export type { EnsureImageOptions } from "../image-build"; +export { startLocalSandboxIngress } from "./docker"; +export { + sweepDockerOrphansOnBoot, + sweepDockerOrphansOnShutdown, +} from "./docker"; +export type { SweepDockerOrphansOnBootOptions } from "./docker"; +export type { + RunnerStateRecord, + RunnerStateRecordWithId, + RunnerStatePut, + RunnerStateStore, + RunnerStateStoreOps, +} from "./state-store"; +export { + composeSandboxRef, + type AgentSandboxRefInput, + type SandboxRefInput, + type ThreadSandboxRefInput, +} from "./sandbox-ref"; + +export interface CreateDockerProviderOptions { + stateStore?: RunnerStateStore; + docker?: Omit; +} + +/** Convenience for host apps wiring only the in-package provider. */ +export function createDockerProvider( + opts: CreateDockerProviderOptions = {}, +): SandboxProvider { + return new DockerSandboxProvider({ + ...opts.docker, + stateStore: opts.stateStore, + }); +} + +const RUNNER_KINDS: ReadonlySet = new Set([ + "docker", + "agent-sandbox", + "remote-user", +]); + +/** + * Single resolution rule: + * - explicit STUDIO_SANDBOX_RUNNER wins (validated against the kind set); + * - otherwise default to "remote-user" (the laptop-side link daemon — + * auto-spawned by `bun run dev` in local dev, and the supported + * topology for single-machine self-hosts running the link side-by-side). + * + * Production deploys MUST set STUDIO_SANDBOX_RUNNER explicitly to + * "docker" or "agent-sandbox" — the default is only meaningful when paired + * with a co-located link binary. + */ +export function resolveSandboxProviderKindFromEnv(): SandboxProviderKind { + const raw = process.env.STUDIO_SANDBOX_RUNNER; + const kind = ( + raw && raw.length > 0 ? raw : "remote-user" + ) as SandboxProviderKind; + if (!RUNNER_KINDS.has(kind)) { + throw new Error( + `Unknown STUDIO_SANDBOX_RUNNER="${raw}" — expected "docker", "agent-sandbox", or "remote-user".`, + ); + } + return kind; +} diff --git a/packages/sandbox/server/runner/lifecycle-types.ts b/packages/sandbox/server/provider/lifecycle-types.ts similarity index 95% rename from packages/sandbox/server/runner/lifecycle-types.ts rename to packages/sandbox/server/provider/lifecycle-types.ts index dd26801497..6cde3447f1 100644 --- a/packages/sandbox/server/runner/lifecycle-types.ts +++ b/packages/sandbox/server/provider/lifecycle-types.ts @@ -1,5 +1,5 @@ /** - * Lifecycle phase types for `SandboxRunner.watchClaimLifecycle`. + * Lifecycle phase types for `SandboxProvider.watchClaimLifecycle`. * * Lives at the runner package root (rather than under `agent-sandbox/`) so the * runner abstraction can reference these without depending on a concrete impl. diff --git a/packages/sandbox/server/provider/remote-user/index.ts b/packages/sandbox/server/provider/remote-user/index.ts new file mode 100644 index 0000000000..31f6099878 --- /dev/null +++ b/packages/sandbox/server/provider/remote-user/index.ts @@ -0,0 +1,8 @@ +export { + RemoteUserSandboxProvider, + createRemoteUserProvider, +} from "./runner"; +export type { + RemoteUserLinkRef, + RemoteUserProviderOptions, +} from "./runner"; diff --git a/packages/sandbox/server/provider/remote-user/runner.test.ts b/packages/sandbox/server/provider/remote-user/runner.test.ts new file mode 100644 index 0000000000..8767fbbe81 --- /dev/null +++ b/packages/sandbox/server/provider/remote-user/runner.test.ts @@ -0,0 +1,523 @@ +import { describe, expect, it } from "bun:test"; +import { + NONCE_HEADER, + SIG_HEADER, + TS_HEADER, + verifyRequest, +} from "../../../../../apps/mesh/src/links/protocol/hmac"; +import type { RunnerStateStoreOps } from "../state-store"; +import { RemoteUserSandboxProvider } from "./runner"; + +const TUNNEL = "https://link-x.deco.host"; +const SECRET = "this-is-the-stored-hash-acting-as-the-signing-key"; + +interface FakeFetchCall { + url: string; + method: string; + headers: Headers; + body: string; +} + +function makeFakeFetch( + responder: (call: FakeFetchCall) => Response | Promise, +): { fetch: typeof fetch; calls: FakeFetchCall[] } { + const calls: FakeFetchCall[] = []; + const fakeFetch: typeof fetch = async (input, init) => { + const url = + typeof input === "string" + ? input + : input instanceof URL + ? input.toString() + : (input as Request).url; + const method = (init?.method ?? "GET").toUpperCase(); + const headers = new Headers(init?.headers ?? {}); + const rawBody = init?.body; + let body = ""; + if (typeof rawBody === "string") body = rawBody; + else if (rawBody instanceof ArrayBuffer) + body = Buffer.from(rawBody).toString("utf8"); + else if (rawBody && ArrayBuffer.isView(rawBody)) { + body = Buffer.from( + rawBody.buffer, + rawBody.byteOffset, + rawBody.byteLength, + ).toString("utf8"); + } else if (rawBody != null) { + body = await new Response(rawBody as BodyInit).text(); + } + const call: FakeFetchCall = { url, method, headers, body }; + calls.push(call); + return responder(call); + }; + return { fetch: fakeFetch, calls }; +} + +function jsonResponse(body: unknown, status = 200): Response { + return new Response(JSON.stringify(body), { + status, + headers: { "content-type": "application/json" }, + }); +} + +function verifyCallSignature(call: FakeFetchCall): void { + const url = new URL(call.url); + const result = verifyRequest({ + secret: SECRET, + method: call.method, + path: url.pathname, + body: call.body, + headers: { + [SIG_HEADER]: call.headers.get(SIG_HEADER) ?? undefined, + [TS_HEADER]: call.headers.get(TS_HEADER) ?? undefined, + [NONCE_HEADER]: call.headers.get(NONCE_HEADER) ?? undefined, + }, + seenNonce: () => false, + }); + expect(result.valid).toBe(true); +} + +function makeFakeStateStore(): RunnerStateStoreOps & { + rows: Map< + string, + { + id: { userId: string; projectRef: string }; + state: Record; + } + >; +} { + const rows = new Map< + string, + { + id: { userId: string; projectRef: string }; + state: Record; + } + >(); + return { + rows, + async get(id, _kind) { + const k = `${id.userId}/${id.projectRef}`; + const row = rows.get(k); + return row + ? { + handle: (row.state.handle as string) ?? "", + state: row.state, + updatedAt: new Date(), + } + : null; + }, + async getByHandle(_kind, handle) { + for (const [, row] of rows) { + if ((row.state.handle as string) === handle) { + return { + id: row.id, + handle, + state: row.state, + updatedAt: new Date(), + }; + } + } + return null; + }, + async put(id, _kind, entry) { + const k = `${id.userId}/${id.projectRef}`; + rows.set(k, { id, state: entry.state }); + }, + async delete(id, _kind) { + const k = `${id.userId}/${id.projectRef}`; + rows.delete(k); + }, + async deleteByHandle(_kind, handle) { + for (const [k, row] of rows) { + if ((row.state.handle as string) === handle) rows.delete(k); + } + }, + }; +} + +describe("RemoteUserSandboxProvider.ensure", () => { + it("POSTs /api/sandboxes with HMAC headers and returns a Sandbox", async () => { + const { fetch, calls } = makeFakeFetch(() => + jsonResponse({ sandboxUrl: `${TUNNEL}/_sandbox/handle-abc` }), + ); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + }); + + const sandbox = await provider.ensure( + { userId: "user-1", projectRef: "proj-1" }, + { + repo: { + cloneUrl: "https://github.com/a/b.git", + userName: "n", + userEmail: "e@example.com", + branch: "main", + }, + }, + ); + + expect(calls).toHaveLength(1); + expect(calls[0].url).toBe(`${TUNNEL}/api/sandboxes`); + expect(calls[0].method).toBe("POST"); + const reqBody = JSON.parse(calls[0].body); + expect(reqBody.repo.branch).toBe("main"); + verifyCallSignature(calls[0]); + + expect(sandbox.handle).toBeTruthy(); + // Handle should be the deterministic computeHandle output, NOT the + // server-returned `handle-abc` — that string is just the + // sandboxUrl's path tail, not necessarily the same as the cluster's handle. + expect(sandbox.workdir).toBe(`${TUNNEL}/_sandbox/handle-abc`); + // previewUrl is now the same daemon URL — there's no separate + // /preview// rewrite in the per-daemon-tunnel world. + expect(sandbox.previewUrl).toBe(`${TUNNEL}/_sandbox/handle-abc`); + }); + + it("returns the same Sandbox on re-ensure without a second POST", async () => { + const { fetch, calls } = makeFakeFetch(() => + jsonResponse({ sandboxUrl: `${TUNNEL}/_sandbox/handle-abc` }), + ); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + }); + + const id = { userId: "u", projectRef: "p" }; + const first = await provider.ensure(id); + const second = await provider.ensure(id); + + expect(calls).toHaveLength(1); + expect(second.handle).toBe(first.handle); + expect(second.workdir).toBe(first.workdir); + }); + + it("throws when the link responds non-2xx", async () => { + const { fetch } = makeFakeFetch( + () => new Response("nope", { status: 500 }), + ); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + }); + expect(provider.ensure({ userId: "u", projectRef: "p" })).rejects.toThrow( + /remote-user ensure failed: 500/, + ); + }); +}); + +describe("RemoteUserSandboxProvider.exec", () => { + it("proxies to /_decopilot_vm/exec with HMAC headers", async () => { + const { fetch, calls } = makeFakeFetch((call) => { + if (call.url.endsWith("/api/sandboxes")) { + return jsonResponse({ sandboxUrl: `${TUNNEL}/_sandbox/h-1` }); + } + return jsonResponse({ + stdout: "ok", + stderr: "", + exitCode: 0, + timedOut: false, + }); + }); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + }); + + const sandbox = await provider.ensure({ userId: "u", projectRef: "p" }); + const out = await provider.exec(sandbox.handle, { command: "echo hi" }); + + expect(out.exitCode).toBe(0); + expect(out.stdout).toBe("ok"); + const execCall = calls[1]; + expect(execCall.url).toBe(`${TUNNEL}/_sandbox/h-1/_decopilot_vm/exec`); + expect(execCall.method).toBe("POST"); + verifyCallSignature(execCall); + }); + + it("throws on unknown handle", async () => { + const { fetch } = makeFakeFetch(() => jsonResponse({})); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + }); + expect(provider.exec("nope", { command: "x" })).rejects.toThrow( + /unknown handle/, + ); + }); +}); + +describe("RemoteUserSandboxProvider.delete", () => { + it("calls DELETE /api/sandboxes/", async () => { + const { fetch, calls } = makeFakeFetch((call) => { + if (call.method === "POST") + return jsonResponse({ sandboxUrl: `${TUNNEL}/_sandbox/h-1` }); + return new Response(null, { status: 204 }); + }); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + }); + const sandbox = await provider.ensure({ userId: "u", projectRef: "p" }); + await provider.delete(sandbox.handle); + + const delCall = calls[1]; + expect(delCall.method).toBe("DELETE"); + expect(delCall.url).toBe( + `${TUNNEL}/api/sandboxes/${encodeURIComponent(sandbox.handle)}`, + ); + verifyCallSignature(delCall); + + // alive() with no record + no state store → false. + const { fetch: deadFetch } = makeFakeFetch( + () => new Response(null, { status: 404 }), + ); + const probeOnly = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: deadFetch, + }); + expect(await probeOnly.alive(sandbox.handle)).toBe(false); + }); + + it("tolerates 404 from the link (already gone)", async () => { + const { fetch } = makeFakeFetch((call) => { + if (call.method === "POST") + return jsonResponse({ sandboxUrl: `${TUNNEL}/_sandbox/h-1` }); + return new Response("not found", { status: 404 }); + }); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + }); + const sandbox = await provider.ensure({ userId: "u", projectRef: "p" }); + // Must not throw. + await provider.delete(sandbox.handle); + }); +}); + +describe("RemoteUserSandboxProvider.alive", () => { + it("returns true after ensure() populated the in-process records cache", async () => { + // ensure() populates `records`, so alive() reuses it without hitting + // the state store. Probe URL is `/health`, unsigned. + const { fetch, calls } = makeFakeFetch((call) => { + if (call.url.endsWith("/api/sandboxes")) + return jsonResponse({ sandboxUrl: `${TUNNEL}/_sandbox/alive-1` }); + return new Response("", { status: 200 }); + }); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + }); + const sandbox = await provider.ensure({ userId: "u", projectRef: "p" }); + const ok = await provider.alive(sandbox.handle); + expect(ok).toBe(true); + expect(calls[1].url).toBe(`${TUNNEL}/_sandbox/alive-1/health`); + expect(calls[1].method).toBe("GET"); + }); + + it("returns false when there is no cached record and no state store", async () => { + const { fetch } = makeFakeFetch(() => new Response(null, { status: 404 })); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + }); + expect(await provider.alive("nope")).toBe(false); + }); +}); + +describe("RemoteUserSandboxProvider.proxyDaemonRequest", () => { + it("forwards to with HMAC", async () => { + const { fetch, calls } = makeFakeFetch((call) => { + if (call.url.endsWith("/api/sandboxes")) + return jsonResponse({ sandboxUrl: `${TUNNEL}/_sandbox/h-1` }); + return new Response('{"ok":1}', { + status: 200, + headers: { "content-type": "application/json" }, + }); + }); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + }); + const sandbox = await provider.ensure({ userId: "u", projectRef: "p" }); + const res = await provider.proxyDaemonRequest( + sandbox.handle, + "/_decopilot_vm/status", + { + method: "POST", + headers: new Headers({ "content-type": "application/json" }), + body: JSON.stringify({ ping: true }), + }, + ); + expect(res.status).toBe(200); + const proxyCall = calls[1]; + expect(proxyCall.url).toBe(`${TUNNEL}/_sandbox/h-1/_decopilot_vm/status`); + expect(JSON.parse(proxyCall.body)).toEqual({ ping: true }); + verifyCallSignature(proxyCall); + }); + + it("returns 404 when no record is known and no state store is configured", async () => { + // With no in-memory record and no state store hydration, the provider + // can't know where to forward — surface 404 to the caller instead of + // guessing a URL. + const { fetch, calls } = makeFakeFetch( + () => new Response("", { status: 200 }), + ); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + }); + const res = await provider.proxyDaemonRequest( + "any-handle", + "/_decopilot_vm/status", + { + method: "GET", + headers: new Headers(), + body: null, + }, + ); + expect(res.status).toBe(404); + expect(calls).toHaveLength(0); + }); +}); + +describe("RemoteUserSandboxProvider misc surface", () => { + it("localWorkdir + getPreviewUrl return null when no record is known", async () => { + const { fetch } = makeFakeFetch(() => jsonResponse({})); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + }); + expect(await provider.localWorkdir("anything")).toBeNull(); + expect(await provider.getPreviewUrl("anything")).toBeNull(); + }); + + it("watchClaimLifecycle yields a single ready phase", async () => { + const { fetch } = makeFakeFetch(() => jsonResponse({})); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + }); + const phases: unknown[] = []; + for await (const phase of provider.watchClaimLifecycle("h")) { + phases.push(phase); + } + expect(phases).toEqual([{ kind: "ready" }]); + }); + + it("rejects construction without tunnelUrl or linkSecret", () => { + expect( + () => + new RemoteUserSandboxProvider({ + // @ts-expect-error - intentional + link: { tunnelUrl: "", linkSecret: "x" }, + }), + ).toThrow(); + expect( + () => + new RemoteUserSandboxProvider({ + // @ts-expect-error - intentional + link: { tunnelUrl: TUNNEL, linkSecret: "" }, + }), + ).toThrow(); + }); +}); + +describe("RemoteUserSandboxProvider + state store", () => { + it("persists {handle, sandboxUrl} on ensure", async () => { + const { fetch } = makeFakeFetch(() => + jsonResponse({ sandboxUrl: `${TUNNEL}/_sandbox/persisted` }), + ); + const stateStore = makeFakeStateStore(); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + stateStore, + }); + + const sandbox = await provider.ensure({ userId: "u", projectRef: "p" }); + const row = await stateStore.getByHandle("remote-user", sandbox.handle); + expect(row).not.toBeNull(); + expect(row?.state.sandboxUrl).toBe(`${TUNNEL}/_sandbox/persisted`); + }); + + it("alive() probes sandboxUrl from state store on cache miss", async () => { + const stateStore = makeFakeStateStore(); + await stateStore.put({ userId: "u", projectRef: "p" }, "remote-user", { + handle: "fresh-handle", + state: { + handle: "fresh-handle", + sandboxUrl: `${TUNNEL}/_sandbox/fresh`, + }, + }); + const { fetch, calls } = makeFakeFetch( + () => new Response("", { status: 200 }), + ); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + stateStore, + }); + const ok = await provider.alive("fresh-handle"); + expect(ok).toBe(true); + expect(calls[0].url).toBe(`${TUNNEL}/_sandbox/fresh/health`); + }); + + it("proxyDaemonRequest forwards to sandboxUrl from state store on cache miss", async () => { + const stateStore = makeFakeStateStore(); + await stateStore.put({ userId: "u", projectRef: "p" }, "remote-user", { + handle: "proxy-handle", + state: { + handle: "proxy-handle", + sandboxUrl: `${TUNNEL}/_sandbox/proxy`, + }, + }); + const { fetch, calls } = makeFakeFetch( + () => new Response("{}", { status: 200 }), + ); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + stateStore, + }); + const res = await provider.proxyDaemonRequest( + "proxy-handle", + "/_decopilot_vm/exec", + { + method: "POST", + headers: new Headers(), + body: JSON.stringify({ cmd: "x" }), + }, + ); + expect(res.status).toBe(200); + expect(calls[0].url).toBe(`${TUNNEL}/_sandbox/proxy/_decopilot_vm/exec`); + }); + + it("alive() returns false when state store has no row", async () => { + const stateStore = makeFakeStateStore(); + const { fetch } = makeFakeFetch(() => new Response("", { status: 200 })); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + stateStore, + }); + expect(await provider.alive("never-seen")).toBe(false); + }); + + it("delete() clears the state store row", async () => { + const stateStore = makeFakeStateStore(); + const { fetch } = makeFakeFetch((call) => { + if (call.method === "POST") + return jsonResponse({ sandboxUrl: `${TUNNEL}/_sandbox/del` }); + return new Response(null, { status: 204 }); + }); + const provider = new RemoteUserSandboxProvider({ + link: { tunnelUrl: TUNNEL, linkSecret: SECRET }, + fetchImpl: fetch, + stateStore, + }); + const sandbox = await provider.ensure({ userId: "u", projectRef: "p" }); + expect(stateStore.rows.size).toBe(1); + await provider.delete(sandbox.handle); + expect(stateStore.rows.size).toBe(0); + }); +}); diff --git a/packages/sandbox/server/provider/remote-user/runner.ts b/packages/sandbox/server/provider/remote-user/runner.ts new file mode 100644 index 0000000000..66f946842f --- /dev/null +++ b/packages/sandbox/server/provider/remote-user/runner.ts @@ -0,0 +1,390 @@ +/** + * remote-user sandbox provider — cluster-side stub that forwards every + * `SandboxProvider` call to a per-user `link` binary running on the + * developer's laptop. The link exposes: + * + * - `/api/sandboxes` (POST: ensure, DELETE/: tear down) + * - `/_decopilot_vm/*` (exec + daemon proxy passthrough) + * - `/health` (alive probe) + * + * The control plane is authenticated with the link-protocol HMAC scheme. The + * per-daemon `sandboxUrl` (returned by the link's `POST /api/sandboxes`) + * is itself a daemon-authenticated URL — the daemon accepts the same HMAC + * against `DAEMON_LINK_SECRET` (set up by Task 1). HMAC requires symmetric + * key material; v2 will encrypt at rest with a cluster KMS key. + * + * The cluster builds a fresh `RemoteUserSandboxProvider` per request, so the + * in-memory `records` map is almost always empty. To remain functional across + * cluster pod boundaries we mirror docker's pattern: take a `stateStore` in + * the constructor, persist `{handle, sandboxUrl}` on ensure, hydrate on cache + * miss. The `records` map becomes an advisory in-process cache; the state + * store is the canonical lookup. + * + * `localWorkdir` returns null — the workdir lives on the laptop and is never + * referenced by cluster code. `watchClaimLifecycle` emits a single synthetic + * `ready` phase, matching host/docker semantics — by the time `ensure` + * resolves the link has already brought the daemon up. + */ + +import { signRequest } from "../../../../../apps/mesh/src/links/protocol/hmac"; +import { computeHandle } from "../shared"; +import type { ClaimPhase } from "../lifecycle-types"; +import type { RunnerStateStoreOps } from "../state-store"; +import type { + EnsureOptions, + ExecInput, + ExecOutput, + ProxyRequestInit, + Sandbox, + SandboxId, + SandboxProvider, +} from "../types"; + +const RUNNER_KIND = "remote-user" as const; + +/** + * Subset of `LinkEntry` the provider actually needs. The dispatch path passes + * the full `LinkEntry` it pulled from the registry; we accept anything + * structurally compatible so tests can fake it without inventing a + * `createdAt` timestamp. + */ +export interface RemoteUserLinkRef { + tunnelUrl: string; + /** + * HMAC signing key — the raw bearer secret stored in `LinkEntry`. Both + * the cluster and the link sign with this same value (symmetric signing). + */ + linkSecret: string; +} + +export interface RemoteUserProviderOptions { + link: RemoteUserLinkRef; + /** @internal test seam */ + fetchImpl?: typeof fetch; + /** + * Persistent handle → URL store. Optional for compatibility with + * in-process tests that don't need cross-instance hydration; the + * cluster MUST pass one (KyselySandboxProviderStateStore) so a + * fresh provider per request can still find a previously-ensured + * sandbox. Same dependency the docker provider takes. + */ + stateStore?: RunnerStateStoreOps; +} + +interface RemoteRecord { + handle: string; + /** Daemon's public URL — `https://.deco.host` or `http://127.0.0.1:`. */ + sandboxUrl: string; +} + +export class RemoteUserSandboxProvider implements SandboxProvider { + readonly kind = RUNNER_KIND; + + private readonly link: RemoteUserLinkRef; + private readonly fetcher: typeof fetch; + private readonly stateStore: RunnerStateStoreOps | null; + private readonly records = new Map(); + + constructor(opts: RemoteUserProviderOptions) { + if (!opts.link?.tunnelUrl) { + throw new Error("RemoteUserSandboxProvider requires link.tunnelUrl"); + } + if (!opts.link?.linkSecret) { + throw new Error("RemoteUserSandboxProvider requires link.linkSecret"); + } + this.link = opts.link; + this.fetcher = opts.fetchImpl ?? fetch; + this.stateStore = opts.stateStore ?? null; + } + + async ensure(id: SandboxId, opts: EnsureOptions = {}): Promise { + const handle = computeHandle(id, opts.repo?.branch); + + const cached = this.records.get(handle); + if (cached) return this.toSandbox(cached); + + if (this.stateStore) { + const row = await this.stateStore.getByHandle(RUNNER_KIND, handle); + const sandboxUrl = (row?.state as { sandboxUrl?: string } | undefined) + ?.sandboxUrl; + if (sandboxUrl) { + const rec: RemoteRecord = { handle, sandboxUrl }; + this.records.set(handle, rec); + return this.toSandbox(rec); + } + } + + const res = await this.signedFetch("POST", "/api/sandboxes", { + handle, + repo: opts.repo, + branch: opts.repo?.branch, + }); + if (!res.ok) { + const detail = await safeReadText(res); + throw new Error( + `remote-user ensure failed: ${res.status}${detail ? ` ${detail}` : ""}`, + ); + } + const body = (await res.json()) as { sandboxUrl?: unknown }; + if (typeof body.sandboxUrl !== "string") { + throw new Error( + "remote-user ensure: link did not return a sandboxUrl string", + ); + } + const rec: RemoteRecord = { handle, sandboxUrl: body.sandboxUrl }; + this.records.set(handle, rec); + if (this.stateStore) { + await this.stateStore.put(id, RUNNER_KIND, { + handle, + state: { handle, sandboxUrl: body.sandboxUrl }, + }); + } + return this.toSandbox(rec); + } + + async exec(handle: string, input: ExecInput): Promise { + const rec = await this.resolveRecord(handle); + if (!rec) { + throw new Error( + `remote-user provider: unknown handle "${handle}" — was ensure() called?`, + ); + } + const bodyString = JSON.stringify(input); + const targetUrl = `${rec.sandboxUrl}/_decopilot_vm/exec`; + const sig = signRequest({ + secret: this.link.linkSecret, + method: "POST", + path: new URL(targetUrl).pathname, + body: bodyString, + }); + const res = await this.fetcher(targetUrl, { + method: "POST", + headers: { ...sig, "content-type": "application/json" }, + body: bodyString, + }); + if (!res.ok) { + const detail = await safeReadText(res); + throw new Error( + `remote-user exec failed: ${res.status}${detail ? ` ${detail}` : ""}`, + ); + } + return (await res.json()) as ExecOutput; + } + + async proxyDaemonRequest( + handle: string, + path: string, + init: ProxyRequestInit, + ): Promise { + const rec = await this.resolveRecord(handle); + if (!rec) { + return new Response(JSON.stringify({ error: "sandbox not found" }), { + status: 404, + headers: { "content-type": "application/json" }, + }); + } + const fullPath = path.startsWith("/") ? path : `/${path}`; + const targetUrl = `${rec.sandboxUrl}${fullPath}`; + const body = await normalizeBodyForSigning(init.body); + const headers = new Headers(init.headers); + // Strip hop-by-hop / cookie headers; HMAC headers replace any client-set + // signature header. + for (const h of [ + "host", + "cookie", + "connection", + "keep-alive", + "transfer-encoding", + "upgrade", + "x-mesh-signature", + "x-mesh-timestamp", + "x-mesh-nonce", + "authorization", + ]) { + headers.delete(h); + } + const sig = signRequest({ + secret: this.link.linkSecret, + method: init.method, + path: new URL(targetUrl).pathname, + body, + }); + for (const [k, v] of Object.entries(sig)) headers.set(k, v); + return this.fetcher(targetUrl, { + method: init.method, + headers, + body: body.length > 0 ? body : null, + signal: init.signal, + }); + } + + async alive(handle: string): Promise { + // The daemon's /health endpoint is unauthenticated; probe it directly. + // Hydrate the {handle → sandboxUrl} mapping from the state store on + // cache miss so a fresh provider in a different pod can still reach + // the daemon. + const rec = await this.resolveRecord(handle); + if (!rec) return false; + try { + const res = await this.fetcher(`${rec.sandboxUrl}/health`); + return res.ok; + } catch { + return false; + } + } + + async delete(handle: string): Promise { + const rec = this.records.get(handle); + this.records.delete(handle); + if (this.stateStore) { + await this.stateStore.deleteByHandle(RUNNER_KIND, handle).catch(() => { + // best-effort — state-store row may already be gone + }); + } + // Always tell the link to tear down, even if we lost our cached record — + // the link is authoritative for sandbox lifecycle and may still hold the + // daemon process. + const res = await this.signedFetch( + "DELETE", + `/api/sandboxes/${encodeURIComponent(handle)}`, + ); + if (!res.ok && res.status !== 404) { + const detail = await safeReadText(res); + throw new Error( + `remote-user delete failed: ${res.status}${detail ? ` ${detail}` : ""}`, + ); + } + // Hint to dead-code: rec read only for symmetry / future logging. + void rec; + } + + async getPreviewUrl(handle: string): Promise { + const rec = await this.resolveRecord(handle); + return rec?.sandboxUrl ?? null; + } + + /** + * Workdir lives on the laptop; cluster code never references it. Returning + * null lets dispatch-run fall through to its default (`process.cwd()`), + * which is fine because cluster-side dispatch for `remote-user` is the + * decopilot Code Sandbox tool path — the harness itself runs on the + * laptop, where `localWorkdir` IS meaningful (different provider). + */ + async localWorkdir(_handle: string): Promise { + return null; + } + + // Same shape as host/docker/freestyle: a single synthetic `ready` is the + // only honest answer here. By the time `ensure` resolves the link has + // already brought the daemon up — there is no separately-observable + // pre-Ready window worth surfacing back to the UI. + // eslint-disable-next-line require-yield + async *watchClaimLifecycle( + _handle: string, + _signal?: AbortSignal, + ): AsyncGenerator { + yield { kind: "ready" }; + } + + // ---- Internal helpers ----------------------------------------------------- + + private toSandbox(rec: RemoteRecord): Sandbox { + return { + handle: rec.handle, + // workdir is opaque to the cluster — surface the sandboxUrl so debug + // logs that include `sandbox.workdir` show something meaningful, but + // anything that tries to fs.stat() this string will (correctly) fail. + workdir: rec.sandboxUrl, + previewUrl: rec.sandboxUrl, + }; + } + + /** + * Cache → state-store hydration. The cluster builds a fresh provider per + * request, so the in-process `records` map is almost always empty even + * when the link previously ensured this handle. Falling back to the state + * store is what keeps alive/proxy/exec working across pod boundaries. + */ + private async resolveRecord(handle: string): Promise { + const cached = this.records.get(handle); + if (cached) return cached; + if (!this.stateStore) return null; + const row = await this.stateStore.getByHandle(RUNNER_KIND, handle); + const sandboxUrl = (row?.state as { sandboxUrl?: string } | undefined) + ?.sandboxUrl; + if (!sandboxUrl) return null; + const rec: RemoteRecord = { handle, sandboxUrl }; + this.records.set(handle, rec); + return rec; + } + + private async signedFetch( + method: string, + path: string, + body?: unknown, + ): Promise { + const bodyString = body === undefined ? "" : JSON.stringify(body); + const sig = signRequest({ + secret: this.link.linkSecret, + method, + path, + body: bodyString, + }); + const headers: Record = { + ...sig, + }; + if (bodyString.length > 0) headers["content-type"] = "application/json"; + return this.fetcher(`${this.link.tunnelUrl}${path}`, { + method, + headers, + body: bodyString.length > 0 ? bodyString : undefined, + }); + } +} + +/** + * Backwards-compatible factory matching the shape Phase 5 was originally + * sketched against in the plan. Construct via `new` is the canonical form; + * this exists so callers don't have to update if they're already importing + * `createRemoteUserProvider`. + */ +export function createRemoteUserProvider( + opts: RemoteUserProviderOptions, +): RemoteUserSandboxProvider { + return new RemoteUserSandboxProvider(opts); +} + +// ---- Module-private helpers -------------------------------------------------- + +async function safeReadText(res: Response): Promise { + try { + const t = await res.text(); + return t.length > 200 ? `${t.slice(0, 200)}…` : t; + } catch { + return ""; + } +} + +/** + * Reduce a `ProxyRequestInit.body` (BodyInit | null) to the string form HMAC + * signing expects. Buffers and ArrayBuffers are decoded as UTF-8 because the + * daemon control plane uses JSON exclusively; streams aren't supported here + * (decopilot doesn't proxy file uploads through the daemon today). If a + * future caller needs binary body support we'll have to switch the signing + * scheme to a content-hash header — punted to a follow-up. + */ +async function normalizeBodyForSigning(body: BodyInit | null): Promise { + if (body == null) return ""; + if (typeof body === "string") return body; + if (body instanceof ArrayBuffer) return Buffer.from(body).toString("utf8"); + if (ArrayBuffer.isView(body)) { + return Buffer.from(body.buffer, body.byteOffset, body.byteLength).toString( + "utf8", + ); + } + if (body instanceof URLSearchParams) return body.toString(); + // Fallback: drive it through Response.text() so Blob/FormData callers at + // least get a deterministic serialization. This is best-effort — callers + // sending FormData through proxyDaemonRequest are out-of-band. + return await new Response(body).text(); +} diff --git a/packages/sandbox/server/runner/sandbox-ref.test.ts b/packages/sandbox/server/provider/sandbox-ref.test.ts similarity index 100% rename from packages/sandbox/server/runner/sandbox-ref.test.ts rename to packages/sandbox/server/provider/sandbox-ref.test.ts diff --git a/packages/sandbox/server/runner/sandbox-ref.ts b/packages/sandbox/server/provider/sandbox-ref.ts similarity index 100% rename from packages/sandbox/server/runner/sandbox-ref.ts rename to packages/sandbox/server/provider/sandbox-ref.ts diff --git a/packages/sandbox/server/runner/shared/build-config-payload.ts b/packages/sandbox/server/provider/shared/build-config-payload.ts similarity index 100% rename from packages/sandbox/server/runner/shared/build-config-payload.ts rename to packages/sandbox/server/provider/shared/build-config-payload.ts diff --git a/packages/sandbox/server/runner/shared/handle.test.ts b/packages/sandbox/server/provider/shared/handle.test.ts similarity index 100% rename from packages/sandbox/server/runner/shared/handle.test.ts rename to packages/sandbox/server/provider/shared/handle.test.ts diff --git a/packages/sandbox/server/runner/shared/handle.ts b/packages/sandbox/server/provider/shared/handle.ts similarity index 100% rename from packages/sandbox/server/runner/shared/handle.ts rename to packages/sandbox/server/provider/shared/handle.ts diff --git a/packages/sandbox/server/runner/shared/index.ts b/packages/sandbox/server/provider/shared/index.ts similarity index 100% rename from packages/sandbox/server/runner/shared/index.ts rename to packages/sandbox/server/provider/shared/index.ts diff --git a/packages/sandbox/server/runner/shared/inflight.ts b/packages/sandbox/server/provider/shared/inflight.ts similarity index 100% rename from packages/sandbox/server/runner/shared/inflight.ts rename to packages/sandbox/server/provider/shared/inflight.ts diff --git a/packages/sandbox/server/runner/shared/lock.ts b/packages/sandbox/server/provider/shared/lock.ts similarity index 88% rename from packages/sandbox/server/runner/shared/lock.ts rename to packages/sandbox/server/provider/shared/lock.ts index 9f3ad39ffe..7d596c7b83 100644 --- a/packages/sandbox/server/runner/shared/lock.ts +++ b/packages/sandbox/server/provider/shared/lock.ts @@ -8,12 +8,12 @@ * starve the main pool during long provisioning. */ import type { RunnerStateStore, RunnerStateStoreOps } from "../state-store"; -import type { RunnerKind, SandboxId } from "../types"; +import type { SandboxProviderKind, SandboxId } from "../types"; export function withSandboxLock( store: RunnerStateStore | null, id: SandboxId, - kind: RunnerKind, + kind: SandboxProviderKind, fn: (ops: RunnerStateStoreOps | null) => Promise, ): Promise { if (!store) return fn(null); diff --git a/packages/sandbox/server/runner/shared/preview-url.ts b/packages/sandbox/server/provider/shared/preview-url.ts similarity index 100% rename from packages/sandbox/server/runner/shared/preview-url.ts rename to packages/sandbox/server/provider/shared/preview-url.ts diff --git a/packages/sandbox/server/runner/state-store.ts b/packages/sandbox/server/provider/state-store.ts similarity index 100% rename from packages/sandbox/server/runner/state-store.ts rename to packages/sandbox/server/provider/state-store.ts diff --git a/packages/sandbox/server/runner/types.ts b/packages/sandbox/server/provider/types.ts similarity index 94% rename from packages/sandbox/server/runner/types.ts rename to packages/sandbox/server/provider/types.ts index e2835b14ab..f2eb4c8fc9 100644 --- a/packages/sandbox/server/runner/types.ts +++ b/packages/sandbox/server/provider/types.ts @@ -99,13 +99,13 @@ export interface ProxyRequestInit { } /** - * Persisted on `vmMap` and `sandbox_runner_state.runner_kind`. When widening, - * keep `VmMapEntry.runnerKind` in sync. + * Persisted on `vmMap` and `sandbox_runner_state.sandbox_provider_kind`. + * When widening, keep `VmMapEntry.sandboxProviderKind` in sync. */ -export type RunnerKind = "host" | "docker" | "agent-sandbox"; +export type SandboxProviderKind = "docker" | "agent-sandbox" | "remote-user"; -export interface SandboxRunner { - readonly kind: RunnerKind; +export interface SandboxProvider { + readonly kind: SandboxProviderKind; ensure(id: SandboxId, opts?: EnsureOptions): Promise; exec(handle: string, input: ExecInput): Promise; diff --git a/packages/sandbox/server/runner/host/index.ts b/packages/sandbox/server/runner/host/index.ts deleted file mode 100644 index 7904c69988..0000000000 --- a/packages/sandbox/server/runner/host/index.ts +++ /dev/null @@ -1,2 +0,0 @@ -export { HostSandboxRunner } from "./runner"; -export type { HostRunnerOptions } from "./runner"; diff --git a/packages/sandbox/server/runner/host/runner.test.ts b/packages/sandbox/server/runner/host/runner.test.ts deleted file mode 100644 index 46f47cf4dd..0000000000 --- a/packages/sandbox/server/runner/host/runner.test.ts +++ /dev/null @@ -1,434 +0,0 @@ -import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test"; -import { mkdtemp, rm } from "node:fs/promises"; -import { tmpdir } from "node:os"; -import { join } from "node:path"; -import { HostSandboxRunner } from "./runner"; -import type { RunnerStateStore } from "../state-store"; - -function makeStore(): RunnerStateStore { - const byKey = new Map(); - const byHandle = new Map(); - return { - async get(id, kind) { - return ( - (byKey.get(`${kind}:${id.userId}:${id.projectRef}`) as - | { handle: string; state: Record; updatedAt: Date } - | undefined) ?? null - ); - }, - async getByHandle(kind, handle) { - // biome-ignore lint/suspicious/noExplicitAny: test mock - return (byHandle.get(`${kind}:${handle}`) as any) ?? null; - }, - async put(id, kind, entry) { - const key = `${kind}:${id.userId}:${id.projectRef}`; - const rec = { - handle: entry.handle, - state: entry.state, - updatedAt: new Date(), - id, - }; - byKey.set(key, rec); - byHandle.set(`${kind}:${entry.handle}`, rec); - }, - async delete(id, kind) { - const key = `${kind}:${id.userId}:${id.projectRef}`; - const rec = byKey.get(key) as { handle: string } | undefined; - byKey.delete(key); - if (rec) byHandle.delete(`${kind}:${rec.handle}`); - }, - async deleteByHandle(kind, handle) { - byHandle.delete(`${kind}:${handle}`); - }, - async withLock(_id, _kind, fn) { - return fn(this); - }, - }; -} - -describe("HostSandboxRunner.ensure provisioning", () => { - let homeDir: string; - beforeEach(async () => { - homeDir = await mkdtemp(join(tmpdir(), "host-runner-")); - }); - afterEach(async () => { - await rm(homeDir, { recursive: true, force: true }); - }); - - it("spawns the daemon, probes /health, POSTs config, and persists", async () => { - let probeCount = 0; - const fakeSpawn = mock( - async (_args: { - workdir: string; - env: Record; - daemonPort: number; - }) => ({ - pid: 4242, - kill: () => true, - }), - ); - const fakeProbe = mock(async (_url: string) => { - probeCount++; - if (probeCount === 1) return null; - return { - ready: true, - bootId: "boot-from-daemon", - configured: false, - setup: { running: false, done: true }, - }; - }); - const fakePostConfig = mock( - async (_url: string, _token: string, _payload: unknown) => ({ - bootId: "boot-from-daemon", - transition: "first-bootstrap", - // biome-ignore lint/suspicious/noExplicitAny: test mock - config: _payload as any, - }), - ); - - const runner = new HostSandboxRunner({ - homeDir, - stateStore: makeStore(), - _spawn: fakeSpawn, - _probe: fakeProbe, - _postConfig: fakePostConfig, - }); - - const sandbox = await runner.ensure( - { userId: "u1", projectRef: "vmcp:1:branch:main" }, - { - repo: { - cloneUrl: "https://example.com/x.git", - userName: "u", - userEmail: "u@x", - branch: "main", - }, - }, - ); - - expect(sandbox.handle).toMatch(/^[a-z0-9-]+$/); - expect(sandbox.workdir).toBe(join(homeDir, "sandboxes", sandbox.handle)); - expect(sandbox.previewUrl).toMatch(/^http:\/\/[a-z0-9-]+\.localhost:\d+\//); - expect(fakeSpawn).toHaveBeenCalledTimes(1); - expect(probeCount).toBeGreaterThanOrEqual(2); - - const spawnArgs = fakeSpawn.mock.calls[0][0]; - expect(spawnArgs.env.DAEMON_TOKEN).toMatch(/^[0-9a-f]{48}$/); - expect(spawnArgs.env.DAEMON_BOOT_ID).toBeTruthy(); - expect(spawnArgs.env.APP_ROOT).toBe(sandbox.workdir); - expect(spawnArgs.env.PROXY_PORT).toBe(String(spawnArgs.daemonPort)); - // Config now lives at /config.json; no separate DAEMON_CONFIG_DIR. - expect(spawnArgs.env.DAEMON_CONFIG_DIR).toBeUndefined(); - expect(spawnArgs.env.CLONE_URL).toBeUndefined(); - expect(spawnArgs.env.BRANCH).toBeUndefined(); - expect(spawnArgs.env.RUNTIME).toBeUndefined(); - expect(spawnArgs.env.PORT).toMatch(/^\d+$/); - expect(Number(spawnArgs.env.PORT)).toBeGreaterThan(0); - expect(spawnArgs.env.SANDBOX_INGRESS_PORT).toMatch(/^\d+$/); - - // config was POSTed with the new TenantConfig shape. - expect(fakePostConfig).toHaveBeenCalledTimes(1); - const callArgs = fakePostConfig.mock.calls[0] as [ - string, - string, - { - git?: { - repository: { cloneUrl: string; branch?: string }; - identity: { userName: string; userEmail: string }; - }; - application?: { - runtime: string; - packageManager: { name: string }; - intent: string; - }; - }, - ]; - const [configUrl, _configToken, configPayload] = callArgs; - expect(configUrl).toBe(`http://127.0.0.1:${spawnArgs.daemonPort}`); - expect(configPayload.git?.repository?.cloneUrl).toBe( - "https://example.com/x.git", - ); - expect(configPayload.git?.repository?.branch).toBe("main"); - expect(configPayload.git?.identity?.userName).toBe("u"); - }); - - it("returns the cached sandbox on a second ensure() call", async () => { - const fakeSpawn = mock(async () => ({ pid: 5000, kill: () => true })); - const fakeProbe = mock(async () => ({ - ready: true, - bootId: "b", - configured: true, - setup: { running: false, done: true }, - })); - const fakePostConfig = mock(async () => ({ - bootId: "b", - transition: "first-bootstrap", - config: {} as never, - })); - - const runner = new HostSandboxRunner({ - homeDir, - stateStore: makeStore(), - _spawn: fakeSpawn, - _probe: fakeProbe, - _postConfig: fakePostConfig, - _isAlive: (pid) => pid === 5000, - }); - - const id = { userId: "u2", projectRef: "vmcp:2:branch:dev" }; - const opts = { - repo: { - cloneUrl: "https://example.com/y.git", - userName: "u", - userEmail: "u@x", - branch: "dev", - }, - }; - - const a = await runner.ensure(id, opts); - const b = await runner.ensure(id, opts); - - expect(a.handle).toBe(b.handle); - expect(fakeSpawn).toHaveBeenCalledTimes(1); - }); -}); - -describe("HostSandboxRunner.ensure rehydration", () => { - let homeDir: string; - beforeEach(async () => { - homeDir = await mkdtemp(join(tmpdir(), "host-runner-rehydrate-")); - }); - afterEach(async () => { - await rm(homeDir, { recursive: true, force: true }); - }); - - it("returns the previously-provisioned record when /health still answers", async () => { - const store = makeStore(); - const id = { userId: "u1", projectRef: "vmcp:1:branch:main" }; - - const handle = "deadbe-abcde"; - await store.put(id, "host", { - handle, - state: { - pid: process.pid, - daemonPort: 12345, - daemonUrl: "http://127.0.0.1:12345", - workdir: join(homeDir, "sandboxes", handle), - token: "t".repeat(48), - bootId: "old-boot", - }, - }); - - const fakeProbe = mock(async () => ({ - ready: true, - bootId: "old-boot", - configured: true, - setup: { running: false, done: true }, - })); - const fakeSpawn = mock(async () => { - throw new Error("should not be called on rehydrate"); - }); - - const runner = new HostSandboxRunner({ - homeDir, - stateStore: store, - _spawn: fakeSpawn, - _probe: fakeProbe, - _isAlive: (pid) => pid === process.pid, - }); - - const port = await runner.resolveDaemonPort(handle); - expect(port).toBe(12345); - expect(fakeProbe).toHaveBeenCalled(); - expect(fakeSpawn).not.toHaveBeenCalled(); - }); - - it("returns null and purges state when the persisted PID is dead", async () => { - const store = makeStore(); - const id = { userId: "u1", projectRef: "vmcp:1:branch:dead" }; - const handle = "deadpid-abcde"; - - await store.put(id, "host", { - handle, - state: { - pid: 999_999_999, - daemonPort: 12345, - daemonUrl: "http://127.0.0.1:12345", - workdir: join(homeDir, "sandboxes", handle), - token: "t".repeat(48), - bootId: "old-boot", - }, - }); - - const fakeProbe = mock(async () => ({ - ready: true, - bootId: "x", - configured: true, - setup: { running: false, done: true }, - })); - - const runner = new HostSandboxRunner({ - homeDir, - stateStore: store, - _spawn: mock(async () => ({ pid: 1234, kill: () => true })), - _probe: fakeProbe, - _isAlive: () => false, - }); - - const port = await runner.resolveDaemonPort(handle); - expect(port).toBeNull(); - expect(fakeProbe).not.toHaveBeenCalled(); - }); - - it("returns null when /health does not respond", async () => { - const store = makeStore(); - const id = { userId: "u1", projectRef: "vmcp:1:branch:nohealth" }; - const handle = "noheal-abcde"; - - await store.put(id, "host", { - handle, - state: { - pid: process.pid, - daemonPort: 12345, - daemonUrl: "http://127.0.0.1:12345", - workdir: join(homeDir, "sandboxes", handle), - token: "t".repeat(48), - bootId: "old-boot", - }, - }); - - const fakeProbe = mock(async () => null); - - const runner = new HostSandboxRunner({ - homeDir, - stateStore: store, - _spawn: mock(async () => ({ pid: 1234, kill: () => true })), - _probe: fakeProbe, - _isAlive: (pid) => pid === process.pid, - }); - - const port = await runner.resolveDaemonPort(handle); - expect(port).toBeNull(); - expect(fakeProbe).toHaveBeenCalled(); - }); -}); - -describe("HostSandboxRunner.delete", () => { - let homeDir: string; - beforeEach(async () => { - homeDir = await mkdtemp(join(tmpdir(), "host-runner-delete-")); - }); - afterEach(async () => { - await rm(homeDir, { recursive: true, force: true }); - }); - - it("kills the daemon, removes the workdir, and clears state-store entry", async () => { - const store = makeStore(); - const id = { userId: "u1", projectRef: "vmcp:1:branch:main" }; - - const killed: { signal: NodeJS.Signals }[] = []; - let aliveCount = 0; - const fakeSpawn = mock(async () => ({ pid: 99999, kill: () => true })); - const fakeProbe = mock(async () => ({ - ready: true, - bootId: "boot", - configured: true, - setup: { running: false, done: true }, - })); - const fakePostConfig = mock(async () => ({ - bootId: "boot", - transition: "first-bootstrap", - config: {} as never, - })); - - const runner = new HostSandboxRunner({ - homeDir, - stateStore: store, - _spawn: fakeSpawn, - _probe: fakeProbe, - _postConfig: fakePostConfig, - _kill: (_pid, signal) => killed.push({ signal }), - _isAlive: () => { - aliveCount++; - return aliveCount === 1; - }, - }); - - const sandbox = await runner.ensure(id, { - repo: { - cloneUrl: "https://example.com/x.git", - userName: "u", - userEmail: "u@x", - branch: "main", - }, - }); - - const { existsSync } = await import("node:fs"); - expect(existsSync(join(homeDir, "sandboxes"))).toBe(true); - - await runner.delete(sandbox.handle); - - expect(killed.length).toBeGreaterThanOrEqual(1); - expect(killed[0].signal).toBe("SIGTERM"); - expect(existsSync(sandbox.workdir)).toBe(false); - expect(await store.getByHandle("host", sandbox.handle)).toBeNull(); - }); - - it("escalates to SIGKILL when the daemon ignores SIGTERM", async () => { - const store = makeStore(); - const id = { userId: "u1", projectRef: "vmcp:1:branch:zombie" }; - - const killed: NodeJS.Signals[] = []; - const fakeSpawn = mock(async () => ({ pid: 88888, kill: () => true })); - const fakeProbe = mock(async () => ({ - ready: true, - bootId: "b", - configured: true, - setup: { running: false, done: true }, - })); - const fakePostConfig = mock(async () => ({ - bootId: "b", - transition: "first-bootstrap", - config: {} as never, - })); - - const runner = new HostSandboxRunner({ - homeDir, - stateStore: store, - _spawn: fakeSpawn, - _probe: fakeProbe, - _postConfig: fakePostConfig, - _kill: (_pid, signal) => killed.push(signal), - _isAlive: () => true, - }); - - const sandbox = await runner.ensure(id, { - repo: { - cloneUrl: "https://example.com/y.git", - userName: "u", - userEmail: "u@x", - branch: "zombie", - }, - }); - - await runner.delete(sandbox.handle); - - expect(killed).toContain("SIGTERM"); - expect(killed).toContain("SIGKILL"); - }); - - it("is a no-op for an unknown handle (no throw, no work)", async () => { - const runner = new HostSandboxRunner({ - homeDir, - stateStore: makeStore(), - _spawn: mock(async () => ({ pid: 0, kill: () => true })), - _probe: mock(async () => null), - _kill: () => { - throw new Error("should not be called"); - }, - _isAlive: () => false, - }); - - await runner.delete("does-not-exist"); - }); -}); diff --git a/packages/sandbox/server/runner/host/runner.ts b/packages/sandbox/server/runner/host/runner.ts deleted file mode 100644 index 463f94c65f..0000000000 --- a/packages/sandbox/server/runner/host/runner.ts +++ /dev/null @@ -1,599 +0,0 @@ -/** - * Host sandbox runner — local dev / single-tenant self-host. - * - * Spawns the same Bun-based daemon as Docker but as a host child process, - * with the workdir at `${homeDir}/sandboxes//`. When `opts.repo` is - * set, the daemon clones cloneUrl@branch into that workdir during setup; - * otherwise the workdir stays empty and the daemon skips clone/install/ - * autostart. The local ingress (`startLocalSandboxIngress`) routes - * `.localhost:7070` to the daemon's host-side TCP port. - * - * Hardening (read-only rootfs, dropped caps, memory limits) is intentionally - * absent — the daemon runs in the user's trust boundary. - */ - -import { createHash, randomBytes, randomUUID } from "node:crypto"; -import { existsSync } from "node:fs"; -import { mkdir, rename, rm, writeFile } from "node:fs/promises"; -import { createServer } from "node:net"; -import { join, resolve } from "node:path"; -import { fileURLToPath } from "node:url"; -import { - postConfig, - probeDaemonHealth, - proxyDaemonRequest, - daemonBash, -} from "../../daemon-client"; -import type { - ConfigPatch, - ConfigResponse, - DaemonHealth, -} from "../../daemon-client"; -import { - applyPreviewPattern, - buildConfigPayload, - computeHandle, -} from "../shared"; -import type { RunnerStateStore } from "../state-store"; -import type { - EnsureOptions, - ExecInput, - ExecOutput, - ProxyRequestInit, - Sandbox, - SandboxId, - SandboxRunner, -} from "../types"; -import type { ClaimPhase } from "../lifecycle-types"; - -const RUNNER_KIND = "host" as const; -const READY_TIMEOUT_MS = 30_000; -const READY_INTERVAL_MS = 250; -const STOP_GRACE_MS = 2_000; - -type DaemonProcess = { - pid: number; - kill: (signal?: NodeJS.Signals | number) => boolean; -}; -type SpawnFn = (args: { - workdir: string; - env: Record; - daemonPort: number; -}) => Promise; -type HealthProbeFn = (daemonUrl: string) => Promise; -type PostConfigFn = ( - daemonUrl: string, - token: string, - payload: ConfigPatch, -) => Promise; -type KillFn = (pid: number, signal: NodeJS.Signals) => void; -type IsAliveFn = (pid: number) => boolean; - -export interface HostRunnerOptions { - /** Root data directory; usually `settings.home` (i.e. DATA_DIR). */ - homeDir: string; - stateStore?: RunnerStateStore; - /** Override preview URL pattern (matches DockerRunnerOptions semantics). */ - previewUrlPattern?: string; - /** @internal test seam */ - _spawn?: SpawnFn; - /** @internal test seam */ - _probe?: HealthProbeFn; - /** @internal test seam */ - _postConfig?: PostConfigFn; - /** @internal test seam */ - _kill?: KillFn; - /** @internal test seam */ - _isAlive?: IsAliveFn; -} - -interface HostRecord { - id: SandboxId; - handle: string; - pid: number; - daemonPort: number; - daemonUrl: string; - workdir: string; - token: string; - bootId: string; -} - -interface PersistedHostState { - pid: number; - daemonPort: number; - daemonUrl: string; - workdir: string; - token: string; - bootId: string; -} - -export class HostSandboxRunner implements SandboxRunner { - readonly kind = RUNNER_KIND; - - private readonly records = new Map(); - private readonly homeDir: string; - private readonly stateStore: RunnerStateStore | null; - private readonly previewUrlPattern: string | null; - private readonly spawnFn: SpawnFn; - private readonly probeFn: HealthProbeFn; - private readonly postConfigFn: PostConfigFn; - private readonly killFn: KillFn; - private readonly isAliveFn: IsAliveFn; - - constructor(opts: HostRunnerOptions) { - if (!opts.homeDir) { - throw new Error("HostSandboxRunner requires a homeDir (DATA_DIR)"); - } - this.homeDir = opts.homeDir; - this.stateStore = opts.stateStore ?? null; - this.previewUrlPattern = opts.previewUrlPattern ?? null; - this.spawnFn = opts._spawn ?? createDefaultSpawn(this.homeDir); - this.probeFn = opts._probe ?? probeDaemonHealth; - this.postConfigFn = opts._postConfig ?? postConfig; - this.killFn = opts._kill ?? ((pid, sig) => process.kill(pid, sig)); - this.isAliveFn = opts._isAlive ?? isPidAlive; - } - - // ---- SandboxRunner surface ------------------------------------------------ - - async ensure(id: SandboxId, opts: EnsureOptions = {}): Promise { - const handle = computeHandle(id, opts.repo?.branch); - - // 1. In-memory cache hit? - const cached = this.records.get(handle); - if (cached && this.isAliveFn(cached.pid)) return this.toSandbox(cached); - - // 2. State-store resume. - if (this.stateStore) { - const persisted = await this.stateStore.getByHandle(RUNNER_KIND, handle); - if (persisted) { - const rec = await this.rehydrate(persisted.id, persisted); - if (rec) { - this.records.set(handle, rec); - return this.toSandbox(rec); - } - await this.stateStore - .deleteByHandle(RUNNER_KIND, handle) - .catch(() => undefined); - } - } - - // 3. Fresh provision. - const workdir = this.workdirFor(handle); - // Pre-create the workspace root so the daemon (and bash routes) have - // a valid cwd before clone runs. The daemon clones into `/app`, - // not `` itself, so a pre-created workspace dir doesn't trip - // git's "destination already exists" check. - await mkdir(workdir, { recursive: true }); - - const token = randomBytes(24).toString("hex"); - const bootId = randomUUID(); - const daemonPort = await preallocatePort(); - const daemonUrl = `http://127.0.0.1:${daemonPort}`; - const devPort = await preallocatePort(); - const ingressPort = await preallocatePort(); - - const env = buildDaemonEnv({ - token, - bootId, - workdir, - daemonPort, - devPort, - ingressPort, - extraEnv: opts.env, - }); - const configPayload = buildConfigPayload({ - runtime: opts.workload?.runtime ?? "bun", - packageManager: opts.workload?.packageManager - ? { - name: opts.workload.packageManager, - ...(opts.workload.packageManagerPath - ? { path: opts.workload.packageManagerPath } - : {}), - } - : null, - repo: opts.repo ?? null, - port: opts.workload?.devPort ?? devPort, - }); - - const proc = await this.spawnFn({ workdir, env, daemonPort }); - try { - await this.waitForDaemon(daemonUrl); - if (configPayload) { - await this.postConfigFn(daemonUrl, token, configPayload); - } - } catch (err) { - // Daemon never came up (or rejected the bootstrap) — kill it so we don't - // leak the child process or pin daemonPort. The deterministic workdir is - // left in place; a retry will reuse it. - try { - proc.kill("SIGKILL"); - } catch { - /* already gone */ - } - throw err; - } - - const rec: HostRecord = { - id, - handle, - pid: proc.pid, - daemonPort, - daemonUrl, - workdir, - token, - bootId, - }; - this.records.set(handle, rec); - - if (this.stateStore) { - const state = { - pid: rec.pid, - daemonPort: rec.daemonPort, - daemonUrl: rec.daemonUrl, - workdir: rec.workdir, - token: rec.token, - bootId: rec.bootId, - } as PersistedHostState as unknown as Record; - await this.stateStore.put(id, RUNNER_KIND, { handle, state }); - } - return this.toSandbox(rec); - } - - /** - * Match docker's `waitForDaemonReady` semantics: return as soon as `/health` - * responds with a valid shape, even if `health.ready === false`. The prior - * code waited for `ready === true`, which only flips after the daemon's - * upstream probe finds the user's dev server listening — i.e. clone + - * install + autoStartDev all complete. That gating blocked VM_START until - * the dev server was up, kept the SSE proxy from connecting in the - * meantime, and made the frontend look frozen for the entire setup window - * before flushing a flood of replayed logs. Dev-server-ready is still - * observable via the daemon's `status` SSE events. - * - * Inlined (vs. calling `waitForDaemonReady` directly) so `_probe` test - * seam still drives the loop. - */ - private async waitForDaemon(daemonUrl: string): Promise { - const deadline = Date.now() + READY_TIMEOUT_MS; - while (Date.now() < deadline) { - const health = await this.probeFn(daemonUrl); - if (health) return; - await new Promise((r) => setTimeout(r, READY_INTERVAL_MS)); - } - throw new Error(`daemon at ${daemonUrl} never reported healthy`); - } - - async exec(handle: string, input: ExecInput): Promise { - const rec = await this.requireRecord(handle); - return daemonBash(rec.daemonUrl, rec.token, input); - } - - async delete(handle: string): Promise { - const rec = await this.getRecord(handle); - this.records.delete(handle); - - if (rec) { - if (this.isAliveFn(rec.pid)) { - try { - this.killFn(rec.pid, "SIGTERM"); - } catch { - /* already gone */ - } - const deadline = Date.now() + STOP_GRACE_MS; - while (Date.now() < deadline) { - if (!this.isAliveFn(rec.pid)) break; - await new Promise((r) => setTimeout(r, 50)); - } - if (this.isAliveFn(rec.pid)) { - try { - this.killFn(rec.pid, "SIGKILL"); - } catch { - /* ignore */ - } - } - } - await rm(rec.workdir, { recursive: true, force: true }).catch((err) => - console.warn( - `[HostSandboxRunner] rm workdir(${handle}) failed:`, - err instanceof Error ? err.message : String(err), - ), - ); - } - - if (this.stateStore) { - if (rec) await this.stateStore.delete(rec.id, RUNNER_KIND); - else await this.stateStore.deleteByHandle(RUNNER_KIND, handle); - } - } - - async alive(handle: string): Promise { - // Use getRecord (which rehydrates from the state-store on cold mesh - // boot) so the answer is honest regardless of in-memory cache state. - // Without this, a fresh mesh process with a still-running daemon would - // report alive=false and the SSE's stale-handle probe would emit a - // spurious `gone` event before VM_START got a chance to rehydrate. - const rec = await this.getRecord(handle); - if (!rec) return false; - return this.isAliveFn(rec.pid); - } - - // No pre-Ready window worth surfacing: VM_START's `runner.ensure` blocks - // until the daemon's HTTP server is up (typically <1s on host). Yield a - // single `ready` and let the caller proceed straight to the daemon SSE. - // Generator returns immediately even if `signal` aborts later — there's - // nothing to clean up on the host side. - async *watchClaimLifecycle( - _handle: string, - _signal?: AbortSignal, - ): AsyncGenerator { - yield { kind: "ready" }; - } - - async getPreviewUrl(handle: string): Promise { - const rec = await this.getRecord(handle); - return rec ? this.composePreviewUrl(rec) : null; - } - - async proxyDaemonRequest( - handle: string, - path: string, - init: ProxyRequestInit, - ): Promise { - const rec = await this.getRecord(handle); - if (!rec) { - return new Response(JSON.stringify({ error: "sandbox not found" }), { - status: 404, - headers: { "content-type": "application/json" }, - }); - } - return proxyDaemonRequest(rec.daemonUrl, rec.token, path, init); - } - - // ---- Public host-only surface --------------------------------------------- - - /** Used by the local ingress to map handle → daemon TCP port. */ - async resolveDaemonPort(handle: string): Promise { - const rec = await this.getRecord(handle); - return rec?.daemonPort ?? null; - } - - /** - * Host-side absolute path of the per-branch clone. Used by dispatch-run to - * set `cwd` on the Claude Code adapter so it edits the right files. Null - * for unknown handles — caller falls back to `process.cwd()`. - */ - async localWorkdir(handle: string): Promise { - const rec = await this.getRecord(handle); - return rec?.workdir ?? null; - } - - // ---- Internal helpers ------------------------------------------------------ - - private workdirFor(handle: string): string { - return join(this.homeDir, "sandboxes", handle); - } - - private composePreviewUrl(rec: HostRecord): string { - if (this.previewUrlPattern) { - return applyPreviewPattern(this.previewUrlPattern, rec.handle); - } - const envRoot = process.env.SANDBOX_ROOT_URL; - if (envRoot) return applyPreviewPattern(envRoot, rec.handle); - const ingressPort = Number(process.env.SANDBOX_INGRESS_PORT ?? 7070); - return `http://${rec.handle}.localhost:${ingressPort}/`; - } - - private toSandbox(rec: HostRecord): Sandbox { - return { - handle: rec.handle, - workdir: rec.workdir, - previewUrl: this.composePreviewUrl(rec), - }; - } - - private async getRecord(handle: string): Promise { - const cached = this.records.get(handle); - if (cached) return cached; - if (!this.stateStore) return null; - const persisted = await this.stateStore.getByHandle(RUNNER_KIND, handle); - if (!persisted) return null; - const rec = await this.rehydrate(persisted.id, persisted); - if (rec) this.records.set(handle, rec); - return rec; - } - - private async requireRecord(handle: string): Promise { - const rec = await this.getRecord(handle); - if (!rec) throw new Error(`unknown sandbox handle ${handle}`); - return rec; - } - - private async rehydrate( - id: SandboxId, - persisted: { handle: string; state: Record }, - ): Promise { - const state = persisted.state as Partial; - if ( - typeof state.pid !== "number" || - typeof state.daemonPort !== "number" || - typeof state.daemonUrl !== "string" || - typeof state.workdir !== "string" || - typeof state.token !== "string" || - typeof state.bootId !== "string" - ) { - return null; - } - if (!this.isAliveFn(state.pid)) return null; - const health = await this.probeFn(state.daemonUrl); - if (!health) return null; - return { - id, - handle: persisted.handle, - pid: state.pid, - daemonPort: state.daemonPort, - daemonUrl: state.daemonUrl, - workdir: state.workdir, - token: state.token, - bootId: health.bootId, - }; - } -} - -// ---- Module-private helpers (used from later tasks) -------------------------- - -function isPidAlive(pid: number): boolean { - if (pid <= 0) return false; - try { - process.kill(pid, 0); - return true; - } catch { - return false; - } -} - -/** - * Pre-allocate a host-side TCP port. The daemon binds to it on startup. - * Race window is non-zero — the kernel may hand the port to another process - * between close() and the daemon's bind() — in which case the daemon fails - * to come up, `waitForDaemon` times out, and `ensure()` rejects. There is - * no automatic retry; the caller (e.g. VM_START) surfaces the error. In - * practice this never fires on a developer machine. - */ -function preallocatePort(): Promise { - return new Promise((resolve_, reject) => { - const srv = createServer(); - srv.unref(); - srv.on("error", reject); - srv.listen(0, "127.0.0.1", () => { - const addr = srv.address(); - if (addr && typeof addr === "object") { - const { port } = addr; - srv.close(() => resolve_(port)); - } else { - srv.close(() => reject(new Error("could not allocate port"))); - } - }); - }); -} - -function buildDaemonEnv(args: { - token: string; - bootId: string; - workdir: string; - daemonPort: number; - devPort: number; - ingressPort: number; - extraEnv: Record | undefined; -}): Record { - return { - DAEMON_TOKEN: args.token, - DAEMON_BOOT_ID: args.bootId, - APP_ROOT: args.workdir, - PROXY_PORT: String(args.daemonPort), - // Inherited by every child the daemon spawns. extraEnv is spread last - // so the caller can override (rare — passing PORT/SANDBOX_INGRESS_PORT/ - // VITE_PORT through opts.env defeats the collision-avoidance, but the - // escape hatch stays). - PORT: String(args.devPort), - SANDBOX_INGRESS_PORT: String(args.ingressPort), - ...(args.extraEnv ?? {}), - }; -} - -// ---- Daemon executable resolution ------------------------------------------ -// -// In dev (source tree present), spawn `bun run ` so the -// daemon code reloads on file change without a build step. -// -// In production (`bunx decocms@latest`), `runner.ts` has been inlined into -// `dist/server/server.js`, so the source TS path resolves to the -// nonexistent `/node_modules/daemon/entry.ts`. Materialize the -// embedded bundle (loaded lazily from `daemon-asset.ts`) into -// `${homeDir}/.deco/cache/sandbox-daemon-.js` and spawn that. -// -// `node-pty` is a runtime dep of the daemon. Its install location lives -// inside the parent's `node_modules` tree, but the materialized bundle -// sits in DATA_DIR — bun won't find `node-pty` by walking up from there. -// Resolve the parent's node_modules dir at the call site and pass it via -// `NODE_PATH` so the spawned daemon can `import "node-pty"`. - -function resolveSourceDaemonPath(): string { - return resolve( - fileURLToPath(new URL("../../../daemon/entry.ts", import.meta.url)), - ); -} - -function resolveNodePtyNodeModulesDir(): string { - // node-pty is a peer of the parent process (decocms ships it as a direct - // dep; in dev it lives in packages/sandbox/node_modules). We resolve from - // this module's location and walk back to the enclosing node_modules - // root. - const ptyEntry = Bun.resolveSync("node-pty", import.meta.dir); - const marker = "/node_modules/"; - const idx = ptyEntry.lastIndexOf(marker); - if (idx < 0) { - throw new Error( - `[HostSandboxRunner] could not derive node_modules path from node-pty resolution: ${ptyEntry}`, - ); - } - return ptyEntry.slice(0, idx + marker.length - 1); -} - -async function materializeDaemonBundle(homeDir: string): Promise { - // Lazy-imported so tests using the `_spawn` test seam don't trigger the - // text-import resolution (which would require `daemon/dist/daemon.js` to - // exist on disk before the bundle has been built). - const { DAEMON_BUNDLE } = await import("./daemon-asset"); - const hash = createHash("sha256") - .update(DAEMON_BUNDLE) - .digest("hex") - .slice(0, 16); - const cacheDir = join(homeDir, ".deco", "cache"); - const cachePath = join(cacheDir, `sandbox-daemon-${hash}.js`); - if (existsSync(cachePath)) return cachePath; - await mkdir(cacheDir, { recursive: true }); - // Write atomically — concurrent spawns racing to materialize the same - // hashed file are tolerated because `rename` is atomic on POSIX. - const tmpPath = `${cachePath}.${process.pid}.tmp`; - await writeFile(tmpPath, DAEMON_BUNDLE); - await rename(tmpPath, cachePath); - return cachePath; -} - -async function resolveDaemonExec(homeDir: string): Promise { - const sourceTs = resolveSourceDaemonPath(); - if (existsSync(sourceTs)) return sourceTs; - return materializeDaemonBundle(homeDir); -} - -function createDefaultSpawn(homeDir: string): SpawnFn { - return async (args) => { - const daemonExec = await resolveDaemonExec(homeDir); - const ptyNodeModulesDir = resolveNodePtyNodeModulesDir(); - const existingNodePath = process.env.NODE_PATH; - const nodePath = existingNodePath - ? `${ptyNodeModulesDir}:${existingNodePath}` - : ptyNodeModulesDir; - const proc = Bun.spawn({ - cmd: ["bun", "run", daemonExec], - // cwd is intentionally inherited from the parent — daemon resolves - // its own paths relative to the entry file. - env: { - ...process.env, - NODE_PATH: nodePath, - ...args.env, - }, - stdout: "inherit", - stderr: "inherit", - stdin: "ignore", - }); - return { - pid: proc.pid, - kill: (sig) => { - proc.kill(sig as NodeJS.Signals | number | undefined); - return true; - }, - }; - }; -} diff --git a/packages/sandbox/server/runner/index.test.ts b/packages/sandbox/server/runner/index.test.ts deleted file mode 100644 index 3c5d09af81..0000000000 --- a/packages/sandbox/server/runner/index.test.ts +++ /dev/null @@ -1,33 +0,0 @@ -import { afterEach, beforeEach, describe, expect, it } from "bun:test"; -import { resolveRunnerKindFromEnv } from "./index"; - -describe("resolveRunnerKindFromEnv", () => { - const ORIG = { ...process.env }; - beforeEach(() => { - delete process.env.STUDIO_SANDBOX_RUNNER; - }); - afterEach(() => { - process.env = { ...ORIG }; - }); - - it("defaults to 'host' when nothing is configured", () => { - expect(resolveRunnerKindFromEnv()).toBe("host"); - }); - - it("honors explicit STUDIO_SANDBOX_RUNNER=docker", () => { - process.env.STUDIO_SANDBOX_RUNNER = "docker"; - expect(resolveRunnerKindFromEnv()).toBe("docker"); - }); - - it("honors explicit STUDIO_SANDBOX_RUNNER=agent-sandbox", () => { - process.env.STUDIO_SANDBOX_RUNNER = "agent-sandbox"; - expect(resolveRunnerKindFromEnv()).toBe("agent-sandbox"); - }); - - it("throws on unknown STUDIO_SANDBOX_RUNNER value", () => { - process.env.STUDIO_SANDBOX_RUNNER = "nonsense"; - expect(() => resolveRunnerKindFromEnv()).toThrow( - /Unknown STUDIO_SANDBOX_RUNNER/, - ); - }); -}); diff --git a/packages/sandbox/server/runner/index.ts b/packages/sandbox/server/runner/index.ts deleted file mode 100644 index 4922f1b57d..0000000000 --- a/packages/sandbox/server/runner/index.ts +++ /dev/null @@ -1,91 +0,0 @@ -/** - * Public surface. Ships `DockerSandboxRunner` only via the default entry; - * agent-sandbox sits behind its own subpath export (./runner/agent-sandbox) - * because its SDK is heavy and not every deploy needs it. - */ - -import { DockerSandboxRunner, type DockerRunnerOptions } from "./docker"; -import type { RunnerStateStore } from "./state-store"; -import type { RunnerKind, SandboxRunner } from "./types"; - -export type { - EnsureOptions, - ExecInput, - ExecOutput, - ProxyRequestInit, - RunnerKind, - Sandbox, - SandboxId, - SandboxRunner, - Workload, -} from "./types"; -export type { ClaimFailureReason, ClaimPhase } from "./lifecycle-types"; -export { sandboxIdKey } from "./types"; -export { DockerSandboxRunner } from "./docker"; -export type { DockerExec, DockerRunnerOptions, ExecResult } from "./docker"; -export { HostSandboxRunner } from "./host"; -export type { HostRunnerOptions } from "./host"; -// Needed by mesh callers (decopilot dispatch-run) that compute handles -// directly. Re-exported here so consumers don't dig into shared/. -export { computeHandle } from "./shared"; -export { ensureSandboxImage } from "../image-build"; -export type { EnsureImageOptions } from "../image-build"; -export { startLocalSandboxIngress } from "./docker"; -export { - sweepDockerOrphansOnBoot, - sweepDockerOrphansOnShutdown, -} from "./docker"; -export type { SweepDockerOrphansOnBootOptions } from "./docker"; -export type { - RunnerStateRecord, - RunnerStateRecordWithId, - RunnerStatePut, - RunnerStateStore, - RunnerStateStoreOps, -} from "./state-store"; -export { - composeSandboxRef, - type AgentSandboxRefInput, - type SandboxRefInput, - type ThreadSandboxRefInput, -} from "./sandbox-ref"; - -export interface CreateDockerRunnerOptions { - stateStore?: RunnerStateStore; - docker?: Omit; -} - -/** Convenience for host apps wiring only the in-package runner. */ -export function createDockerRunner( - opts: CreateDockerRunnerOptions = {}, -): SandboxRunner { - return new DockerSandboxRunner({ - ...opts.docker, - stateStore: opts.stateStore, - }); -} - -const RUNNER_KINDS: ReadonlySet = new Set([ - "host", - "docker", - "agent-sandbox", -]); - -/** - * Single resolution rule: - * - explicit STUDIO_SANDBOX_RUNNER wins (validated against the kind set); - * - otherwise default to "host". - * - * Docker CLI presence is not probed. Any non-host runner must be opted into - * explicitly. - */ -export function resolveRunnerKindFromEnv(): RunnerKind { - const raw = process.env.STUDIO_SANDBOX_RUNNER; - const kind = (raw && raw.length > 0 ? raw : "host") as RunnerKind; - if (!RUNNER_KINDS.has(kind)) { - throw new Error( - `Unknown STUDIO_SANDBOX_RUNNER="${raw}" — expected "host", "docker", or "agent-sandbox".`, - ); - } - return kind; -} diff --git a/packages/sandbox/tsconfig.json b/packages/sandbox/tsconfig.json index 1df7a2fa35..904d9ddfda 100644 --- a/packages/sandbox/tsconfig.json +++ b/packages/sandbox/tsconfig.json @@ -2,7 +2,6 @@ "extends": "../../tsconfig.json", "compilerOptions": { "outDir": "./dist", - "rootDir": ".", "declaration": true, "declarationMap": true },