diff --git a/packages/argent-mcp/src/content.ts b/packages/argent-mcp/src/content.ts index 3306a48c..74eac821 100644 --- a/packages/argent-mcp/src/content.ts +++ b/packages/argent-mcp/src/content.ts @@ -117,10 +117,14 @@ function isRecord(value: unknown): value is Record { // ── screenshot-diff adapter ────────────────────────────────────────── +/** + * `diffPath` / `contextDiffPath` are artifact handles on current tool-servers + * and raw host-path strings on older ones; both shapes render here. + */ export interface ScreenshotDiffResult { summary: string; - diffPath?: string; - contextDiffPath?: string; + diffPath?: unknown; + contextDiffPath?: unknown; } export function isScreenshotDiffResult(value: unknown): value is ScreenshotDiffResult { @@ -128,19 +132,47 @@ export function isScreenshotDiffResult(value: unknown): value is ScreenshotDiffR return typeof value.summary === "string"; } -// Render a screenshot-diff tool result as MCP content blocks. +// Render a screenshot-diff tool result as MCP content blocks: the downscaled +// context-diff image inline, then the textual summary. export async function screenshotDiffToMcpContent( - result: ScreenshotDiffResult + result: ScreenshotDiffResult, + ctx?: ContentContext ): Promise { const blocks: ContentBlock[] = []; - if (typeof result.contextDiffPath === "string") { - const buf = await readFile(result.contextDiffPath); - blocks.push({ - type: "image" as const, - data: buf.toString("base64"), - mimeType: "image/png" as const, - }); + // Resolve artifact handles to local files first; the context diff's bytes + // come back in `images` whether the file was already on this machine or was + // downloaded from a remote tool-server. + let contextDiffPath = result.contextDiffPath; + let materializedImages: { localPath: string; data: Buffer; mimeType: string }[] = []; + if (ctx) { + const { result: rewritten, images } = await materializeArtifacts(result, ctx); + contextDiffPath = (rewritten as ScreenshotDiffResult).contextDiffPath; + materializedImages = images; + } + + if (typeof contextDiffPath === "string") { + const fromMaterializer = materializedImages.find((img) => img.localPath === contextDiffPath); + if (fromMaterializer) { + blocks.push({ + type: "image" as const, + data: fromMaterializer.data.toString("base64"), + mimeType: fromMaterializer.mimeType, + }); + } else { + // Legacy tool-server: a raw host path the materializer passed through. + // Only readable when co-located — exactly the old behavior. + try { + const buf = await readFile(contextDiffPath); + blocks.push({ + type: "image" as const, + data: buf.toString("base64"), + mimeType: "image/png" as const, + }); + } catch { + // Image unavailable; the summary below still renders. + } + } } blocks.push({ type: "text" as const, text: result.summary }); diff --git a/packages/argent-mcp/src/mcp-server.ts b/packages/argent-mcp/src/mcp-server.ts index adb88de3..873f9c5c 100644 --- a/packages/argent-mcp/src/mcp-server.ts +++ b/packages/argent-mcp/src/mcp-server.ts @@ -9,6 +9,8 @@ import { getResolvedToolsUrl, isRemoteRouted, getDeviceIdFromArgs, + prepareFileInputs, + applyClientFileDirectives, type ToolMeta, type ToolsServerPaths, } from "@argent/tools-client"; @@ -161,11 +163,26 @@ export async function startMcpServer(options: StartMcpServerOptions): Promise { const tools = await fetchTools(); const meta = tools.find((t) => t.name === name); + + // File boundary, outbound: wrap declared file-path args so the tool-server + // can read them in place (co-located) or from inlined content (remote). + // Metadata-driven: an older server that doesn't declare fileInputs gets + // the args verbatim. + let finalArgs = args; + if (meta?.fileInputs?.length) { + finalArgs = await prepareFileInputs(meta.fileInputs, args ?? {}, { + // `resolved` is the startup routing decision that picked TOOLS_URL — + // an external target means this process may not share the server's + // filesystem, so file bytes must ride along. + includeContent: resolved.url !== null, + }); + } + const res = await fetchWithReconnect(() => `${TOOLS_URL}/tools/${name}`, reconnect, { init: { method: "POST", headers: { "Content-Type": "application/json", ...authHeader() }, - body: JSON.stringify(args ?? {}), + body: JSON.stringify(finalArgs ?? {}), }, fetchTimeoutMs: meta?.longRunning ? null : FETCH_TIMEOUT_MS, }); @@ -174,7 +191,11 @@ export async function startMcpServer(options: StartMcpServerOptions): Promise): string | null { + let missing = false; + const out = template.replace(/\$\{([A-Za-z0-9_]+)\}/g, (_m, name: string) => { + const v = args[name]; + if (typeof v !== "string" || v.length === 0) { + missing = true; + return ""; + } + return v; + }); + return missing ? null : out; +} + +/** + * Replace declared file-path args with boundary wrappers. Returns the args + * untouched (same reference) when no spec applies, so callers can cheaply + * pass everything through here. + */ +export async function prepareFileInputs( + specs: FileInputSpec[] | undefined, + args: unknown, + opts: PrepareFileInputsOptions +): Promise { + if (!specs || specs.length === 0 || typeof args !== "object" || args === null) { + return args; + } + const record = args as Record; + let out: Record | null = null; + + for (const spec of specs) { + // A target the agent already filled in (e.g. an explicit server-side + // flow_file override) is respected — wrapping it would second-guess the + // caller with a client-side path that may not exist. + if (spec.target in record && typeof record[spec.target] !== "string") continue; + const filePath = interpolatePath(spec.path, record); + if (filePath === null) continue; + // When the target IS a source param, the interpolated path equals its + // value; when it's a derived param (flow_file), only wrap if unset. + if (spec.target in record && record[spec.target] !== filePath) continue; + + const wire: FileInputWire = { [FILE_INPUT_MARKER]: true, path: filePath }; + if (spec.kind === "file") { + try { + const st = await stat(filePath); + if (st.isFile()) { + wire.size = st.size; + wire.mtimeMs = st.mtimeMs; + if (opts.includeContent && st.size <= MAX_CONTENT_BYTES) { + wire.content = (await readFile(filePath)).toString("base64"); + } else if (opts.includeContent) { + // Too big to ride in the call — say so instead of sending a bare + // wrapper, so an absent-on-server path gets a "transfer limit" + // error rather than misleading "file not found" guidance. The + // stat fields stay, so a co-located copy still resolves in place. + wire.contentOmitted = "size-limit"; + } + } + } catch { + // Unreadable here — send the path-only wrapper; the server may still + // find it on its own filesystem, and otherwise errors precisely. + } + } + + out = out ?? { ...record }; + out[spec.target] = wire; + } + + return out ?? args; +} + +// ── Client-write directives ────────────────────────────────────────── + +export interface AppliedClientFiles { + /** The result with every directive replaced by the written path (or null). */ + result: unknown; + /** Paths actually written on this machine. */ + written: string[]; +} + +/** + * Trust boundary: the directive path is authored by the tool-server. Today + * the only producer is flow recording, so writes are constrained to flow + * files — an absolute path whose final segments are `.argent/flows/.yaml` + * with a conservative name charset and no `..` anywhere. Widen deliberately + * (and equally conservatively) if another tool ever needs this channel. + */ +function isAllowedClientFilePath(p: string): boolean { + if (!path.isAbsolute(p)) return false; + const segments = p.split(/[\\/]+/); + if (segments.includes("..")) return false; + const file = segments[segments.length - 1] ?? ""; + if (!/^[A-Za-z0-9_-]+\.yaml$/.test(file)) return false; + return segments[segments.length - 3] === ".argent" && segments[segments.length - 2] === "flows"; +} + +function isClientFileDirective(value: unknown): value is ClientFileDirective { + return ( + !!value && + typeof value === "object" && + (value as Record)[CLIENT_FILE_MARKER] === true && + typeof (value as ClientFileDirective).path === "string" && + typeof (value as ClientFileDirective).content === "string" + ); +} + +/** + * Deep-walk a tool result, writing every client-file directive to disk and + * rewriting it to the written path. A directive that fails validation or the + * write resolves to null, mirroring how the artifact materializer signals a + * missing file. Results without directives pass through untouched. + */ +export async function applyClientFileDirectives(result: unknown): Promise { + const written: string[] = []; + + async function walk(value: unknown): Promise { + if (isClientFileDirective(value)) { + if (!isAllowedClientFilePath(value.path)) return null; + try { + await mkdir(path.dirname(value.path), { recursive: true }); + await writeFile(value.path, value.content, "utf8"); + written.push(value.path); + return value.path; + } catch { + return null; + } + } + if (Array.isArray(value)) { + return Promise.all(value.map(walk)); + } + if (value && typeof value === "object") { + const out: Record = {}; + for (const [k, v] of Object.entries(value)) { + out[k] = await walk(v); + } + return out; + } + return value; + } + + const rewritten = await walk(result); + return { result: rewritten, written }; +} diff --git a/packages/argent-tools-client/src/index.ts b/packages/argent-tools-client/src/index.ts index 583ce05f..d9d542de 100644 --- a/packages/argent-tools-client/src/index.ts +++ b/packages/argent-tools-client/src/index.ts @@ -57,3 +57,16 @@ export { type MaterializeContext, type MaterializedImage, } from "./artifacts.js"; + +export { + prepareFileInputs, + applyClientFileDirectives, + FILE_INPUT_MARKER, + CLIENT_FILE_MARKER, + type FileInputSpec, + type FileInputKind, + type FileInputWire, + type ClientFileDirective, + type PrepareFileInputsOptions, + type AppliedClientFiles, +} from "./file-inputs.js"; diff --git a/packages/argent-tools-client/src/tools-client.ts b/packages/argent-tools-client/src/tools-client.ts index d814770d..d441fa83 100644 --- a/packages/argent-tools-client/src/tools-client.ts +++ b/packages/argent-tools-client/src/tools-client.ts @@ -1,11 +1,14 @@ import { ensureToolsServer, type ToolsServerHandle, type ToolsServerPaths } from "./launcher.js"; import { getResolvedToolsUrl } from "./link-config.js"; +import { prepareFileInputs, applyClientFileDirectives, type FileInputSpec } from "./file-inputs.js"; export interface ToolMeta { name: string; description: string; inputSchema: Record; outputHint?: string; + /** Args that name files on the CALLER's machine — see file-inputs.ts. */ + fileInputs?: FileInputSpec[]; alwaysLoad?: boolean; searchHint?: string; longRunning?: boolean; @@ -77,10 +80,24 @@ export function createToolsClient(options: CreateToolsClientOptions = {}): Tools async function callTool(name: string, args: unknown): Promise { const { url, token } = await baseUrl(); + + // File boundary, outbound: wrap declared file-path args so the server can + // read them in place (co-located) or from inlined content (remote). The + // tool's advertised metadata drives this — an older server that doesn't + // declare fileInputs gets the args verbatim. + let finalArgs = args; + const meta = await fetchTool(name); + if (meta?.fileInputs?.length) { + const { url: routedUrl } = await getResolvedToolsUrl(); + finalArgs = await prepareFileInputs(meta.fileInputs, args ?? {}, { + includeContent: routedUrl !== null, + }); + } + const res = await fetch(`${url}/tools/${encodeURIComponent(name)}`, { method: "POST", headers: { "Content-Type": "application/json", ...authHeaders(token) }, - body: JSON.stringify(args ?? {}), + body: JSON.stringify(finalArgs ?? {}), }); const json = (await res.json().catch(() => ({}))) as { data?: unknown; @@ -91,7 +108,11 @@ export function createToolsClient(options: CreateToolsClientOptions = {}): Tools if (!res.ok) { throw new Error(json.error ?? json.message ?? `${res.status} ${res.statusText}`); } - return { data: json.data, note: json.note }; + // File boundary, inbound: persist any client-write directives (files that + // belong in the caller's project, e.g. recorded flow YAMLs) and rewrite + // them to the written paths. + const { result: data } = await applyClientFileDirectives(json.data); + return { data, note: json.note }; } return { fetchTools, fetchTool, callTool, baseUrl }; diff --git a/packages/argent-tools-client/test/file-inputs.test.ts b/packages/argent-tools-client/test/file-inputs.test.ts new file mode 100644 index 00000000..31658e26 --- /dev/null +++ b/packages/argent-tools-client/test/file-inputs.test.ts @@ -0,0 +1,187 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import * as fs from "node:fs/promises"; +import * as os from "node:os"; +import * as path from "node:path"; +import { + prepareFileInputs, + applyClientFileDirectives, + FILE_INPUT_MARKER, + CLIENT_FILE_MARKER, + type FileInputSpec, + type FileInputWire, +} from "../src/file-inputs.js"; + +let tmpDir: string; + +beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "client-file-inputs-")); +}); + +afterEach(async () => { + await fs.rm(tmpDir, { recursive: true, force: true }); +}); + +describe("prepareFileInputs", () => { + it("returns args unchanged when no specs apply", async () => { + const args = { udid: "ABC" }; + const out = await prepareFileInputs(undefined, args, { includeContent: false }); + expect(out).toBe(args); + }); + + it("wraps a file param with stat info, without content when local", async () => { + const filePath = path.join(tmpDir, "baseline.png"); + await fs.writeFile(filePath, "png"); + const st = await fs.stat(filePath); + + const specs: FileInputSpec[] = [ + { target: "baselinePath", path: "${baselinePath}", kind: "file", optional: true }, + ]; + const out = (await prepareFileInputs( + specs, + { baselinePath: filePath, udid: "X" }, + { includeContent: false } + )) as Record; + + expect(out.udid).toBe("X"); + expect(out.baselinePath).toEqual({ + [FILE_INPUT_MARKER]: true, + path: filePath, + size: st.size, + mtimeMs: st.mtimeMs, + }); + }); + + it("inlines base64 content when routed to a remote server", async () => { + const filePath = path.join(tmpDir, "baseline.png"); + await fs.writeFile(filePath, "png-bytes"); + + const specs: FileInputSpec[] = [ + { target: "baselinePath", path: "${baselinePath}", kind: "file" }, + ]; + const out = (await prepareFileInputs( + specs, + { baselinePath: filePath }, + { includeContent: true } + )) as Record; + + expect(Buffer.from(out.baselinePath!.content!, "base64").toString()).toBe("png-bytes"); + }); + + it("marks oversize content as omitted instead of silently skipping it", async () => { + const filePath = path.join(tmpDir, "huge.bin"); + await fs.writeFile(filePath, Buffer.alloc(32 * 1024 * 1024 + 1)); + const st = await fs.stat(filePath); + + const specs: FileInputSpec[] = [ + { target: "baselinePath", path: "${baselinePath}", kind: "file" }, + ]; + const out = (await prepareFileInputs( + specs, + { baselinePath: filePath }, + { includeContent: true } + )) as Record; + + // Stat survives so a co-located copy still resolves in place; only the + // bytes are withheld, with the reason on the wire. + expect(out.baselinePath).toEqual({ + [FILE_INPUT_MARKER]: true, + path: filePath, + size: st.size, + mtimeMs: st.mtimeMs, + contentOmitted: "size-limit", + }); + }); + + it("derives a new target param from a multi-param template (flow_file)", async () => { + const flowsDir = path.join(tmpDir, ".argent", "flows"); + await fs.mkdir(flowsDir, { recursive: true }); + const flowPath = path.join(flowsDir, "my-flow.yaml"); + await fs.writeFile(flowPath, "steps: []\n"); + + const specs: FileInputSpec[] = [ + { target: "flow_file", path: "${project_root}/.argent/flows/${name}.yaml", kind: "file" }, + ]; + const out = (await prepareFileInputs( + specs, + { name: "my-flow", project_root: tmpDir }, + { includeContent: true } + )) as Record; + + // Source params stay strings; the derived target carries the wrapper. + expect(out.project_root).toBe(tmpDir); + expect(out.name).toBe("my-flow"); + expect(out.flow_file).toMatchObject({ [FILE_INPUT_MARKER]: true, path: flowPath }); + expect((out.flow_file as FileInputWire).content).toBeDefined(); + }); + + it("skips a spec whose referenced params are absent (live-capture mode)", async () => { + const specs: FileInputSpec[] = [ + { target: "baselinePath", path: "${baselinePath}", kind: "file", optional: true }, + ]; + const args = { captureBaseline: true, udid: "X" }; + const out = await prepareFileInputs(specs, args, { includeContent: true }); + expect(out).toBe(args); + }); + + it("respects an explicitly set derived target (server-side override)", async () => { + const specs: FileInputSpec[] = [ + { target: "flow_file", path: "${project_root}/.argent/flows/${name}.yaml", kind: "file" }, + ]; + const out = (await prepareFileInputs( + specs, + { name: "f", project_root: tmpDir, flow_file: "/server/side/flow.yaml" }, + { includeContent: true } + )) as Record; + expect(out.flow_file).toBe("/server/side/flow.yaml"); + }); + + it("still sends a path-only wrapper for an unreadable file", async () => { + const specs: FileInputSpec[] = [{ target: "p", path: "${p}", kind: "file" }]; + const ghost = path.join(tmpDir, "ghost.png"); + const out = (await prepareFileInputs(specs, { p: ghost }, { includeContent: true })) as Record< + string, + unknown + >; + expect(out.p).toEqual({ [FILE_INPUT_MARKER]: true, path: ghost }); + }); +}); + +describe("applyClientFileDirectives", () => { + function directive(p: string, content = "steps: []\n") { + return { [CLIENT_FILE_MARKER]: true, path: p, content }; + } + + it("writes an allowed flow path and rewrites the directive to it", async () => { + const flowPath = path.join(tmpDir, ".argent", "flows", "demo.yaml"); + const result = { message: "ok", savedTo: directive(flowPath) }; + + const { result: rewritten, written } = await applyClientFileDirectives(result); + + expect(written).toEqual([flowPath]); + expect((rewritten as { savedTo: string }).savedTo).toBe(flowPath); + expect(await fs.readFile(flowPath, "utf8")).toBe("steps: []\n"); + }); + + it("passes results without directives through untouched", async () => { + const result = { a: 1, nested: { b: "x" } }; + const { result: rewritten, written } = await applyClientFileDirectives(result); + expect(rewritten).toEqual(result); + expect(written).toEqual([]); + }); + + it.each([ + ["relative path", path.join(".argent", "flows", "x.yaml")], + ["outside .argent/flows", path.join(os.tmpdir(), "x.yaml")], + // Built by concatenation: path.join would collapse the ".." segments + // before the validator ever saw them. + ["traversal", `${os.tmpdir()}/.argent/flows/../../evil.yaml`], + ["bad extension", path.join(os.tmpdir(), ".argent", "flows", "x.sh")], + ["bad name charset", path.join(os.tmpdir(), ".argent", "flows", "x y.yaml")], + ])("refuses to write %s and resolves the directive to null", async (_label, p) => { + const { result: rewritten, written } = await applyClientFileDirectives({ + savedTo: directive(p), + }); + expect(written).toEqual([]); + expect((rewritten as { savedTo: unknown }).savedTo).toBeNull(); + }); +}); diff --git a/packages/registry/src/file-inputs.ts b/packages/registry/src/file-inputs.ts new file mode 100644 index 00000000..4af2579f --- /dev/null +++ b/packages/registry/src/file-inputs.ts @@ -0,0 +1,162 @@ +/** + * File-input wire contract — the INPUT-side counterpart of {@link ArtifactHandle}. + * + * Artifacts move files the tool-server *produced* out to the client; this + * module's types move files the client *owns* (saved screenshots, flow YAMLs) + * in to the tool-server. A tool that reads a caller-supplied path declares it + * in {@link ToolDefinition.fileInputs}; the declaration is surfaced through + * `GET /tools`, so the client knows — without tool-specific logic — which args + * name files on *its* filesystem. + * + * Before sending a call, the client replaces each declared arg with a + * {@link FileInputWire} wrapper carrying the path, its stat, and (only when the + * client is routed to a remote tool-server) the base64 file content. The + * tool-server resolves the wrapper back to a server-readable path *before* zod + * validation: when the path on its own disk matches the recorded stat it is + * used in place (co-located ⇒ zero copies, exactly mirroring the artifact + * gate), otherwise the inlined content is materialized to a temp file. Tools + * therefore always execute against a plain local path and stay + * location-agnostic. + * + * {@link ClientFileDirective} is the reverse of an upload: a tool that needs a + * file to land in the *client's* project (e.g. a recorded flow YAML) returns + * the content plus the client-side destination path, and the client writes it. + */ + +/** Discriminant key identifying a client-file wrapper inside tool args. */ +export const FILE_INPUT_MARKER = "__argentFileInput" as const; + +/** What the client sends in place of a declared file-path arg. */ +export interface FileInputWire { + [FILE_INPUT_MARKER]: true; + /** + * Absolute path on the CLIENT machine. Also probed on the tool-server's own + * filesystem — a hit (existence for directories, size/mtime match for files) + * means client and server are co-located (or share a checkout) and the path + * is used in place with no copy. + */ + path: string; + /** stat of `path` on the client, for the server-side co-location probe. */ + size?: number; + mtimeMs?: number; + /** + * Base64 file bytes. The client inlines them only when it is routed to an + * external tool-server (`argent link` / ARGENT_TOOLS_URL), so unlinked local + * calls never pay the encoding cost. + */ + content?: string; + /** + * Present when the client had readable content but deliberately did not + * inline it. Lets the server explain *why* an absent-on-host file has no + * bytes instead of guessing ("size-limit" = file exceeds the client's + * inline-content cap). A string enum so future reasons extend it without + * another field. + */ + contentOmitted?: "size-limit"; +} + +/** + * How the server treats a declared file input: + * - `"file"` — the tool reads this file. Resolved to a server-readable path + * (in place, or materialized from `content`); the call fails + * with a clear error when neither is possible. + * - `"directory"` — the tool reads a tree that cannot travel over the wire + * (e.g. a project root). Must exist on the tool-server host; + * otherwise the call fails with remote-mode guidance instead + * of silently reading nothing. + * - `"probe"` — advisory only. The arg passes through unchanged; the tool + * learns via `ctx.fileInputs` whether the path exists on the + * server host and adapts (e.g. flow recording switches to + * client-side persistence, screenshot-diff falls back to a + * temp output dir). + */ +export type FileInputKind = "file" | "directory" | "probe"; + +/** + * Declaration of one file-boundary arg on a {@link ToolDefinition}. Shipped + * verbatim to the client in `GET /tools`, so it must stay JSON-serializable + * and dumb: `path` is a template over the tool's own string args + * (`"${baselinePath}"`, `"${project_root}/.argent/flows/${name}.yaml"`). + */ +export interface FileInputSpec { + /** Arg name the resolved wrapper lands in (must be a string param, may be one the agent never sets). */ + target: string; + /** Client-side path template; `${param}` substitutes the tool's string args. */ + path: string; + kind: FileInputKind; + /** + * Skip this spec silently when a referenced param is absent (e.g. + * screenshot-diff's baselinePath in live-capture mode). A non-optional spec + * with absent params is also skipped client-side — the tool's own zod + * validation owns required-param errors. + */ + optional?: boolean; +} + +/** Per-target resolution outcome, passed to the tool via `ctx.fileInputs`. */ +export interface ResolvedFileInput { + /** The client-side path as originally sent in the wrapper. */ + clientPath: string; + /** True when the path was usable on the tool-server's own filesystem. */ + presentOnHost: boolean; + /** True when the value was materialized from uploaded content. */ + viaUpload: boolean; +} + +/** Discriminant key identifying a client-write directive inside a tool result. */ +export const CLIENT_FILE_MARKER = "__argentClientFile" as const; + +/** + * A file the CLIENT must persist: returned by tools whose output belongs in + * the agent's project rather than on the tool-server host. The client writes + * `content` to `path` and rewrites the directive to that path string, so the + * rendered result reads the same as a server-side write used to. + */ +export interface ClientFileDirective { + [CLIENT_FILE_MARKER]: true; + /** Absolute CLIENT-side destination path (the client validates it before writing). */ + path: string; + content: string; +} + +export function isFileInputWire(value: unknown): value is FileInputWire { + return ( + !!value && + typeof value === "object" && + (value as Record)[FILE_INPUT_MARKER] === true && + typeof (value as FileInputWire).path === "string" + ); +} + +export function isClientFileDirective(value: unknown): value is ClientFileDirective { + return ( + !!value && + typeof value === "object" && + (value as Record)[CLIENT_FILE_MARKER] === true && + typeof (value as ClientFileDirective).path === "string" && + typeof (value as ClientFileDirective).content === "string" + ); +} + +/** + * Interpolate a {@link FileInputSpec.path} template from string args. + * Returns null when any referenced param is missing or not a non-empty + * string — callers treat that as "spec does not apply to this call". + * Shared by the client (to read the file) and kept here so both sides agree + * on the micro-grammar: `${name}` only, no nesting, no defaults. + */ +export function interpolateFileInputPath( + template: string, + args: Record +): string | null { + let missing = false; + const out = template.replace(/\$\{([A-Za-z0-9_]+)\}/g, (_m, name: string) => { + const v = args[name]; + if (typeof v !== "string" || v.length === 0) { + missing = true; + return ""; + } + return v; + }); + return missing ? null : out; +} diff --git a/packages/registry/src/index.ts b/packages/registry/src/index.ts index 7d575189..616ccb33 100644 --- a/packages/registry/src/index.ts +++ b/packages/registry/src/index.ts @@ -20,6 +20,20 @@ export type { } from "./types"; export { ArtifactStore, ARTIFACT_MARKER } from "./artifacts"; export type { ArtifactHandle, ArtifactEntry, RegisterArtifactOptions } from "./artifacts"; +export { + FILE_INPUT_MARKER, + CLIENT_FILE_MARKER, + isFileInputWire, + isClientFileDirective, + interpolateFileInputPath, +} from "./file-inputs"; +export type { + FileInputWire, + FileInputKind, + FileInputSpec, + ResolvedFileInput, + ClientFileDirective, +} from "./file-inputs"; export { parseURN } from "./urn"; export { ServiceNotFoundError, diff --git a/packages/registry/src/types.ts b/packages/registry/src/types.ts index 2bbf61ee..8e188135 100644 --- a/packages/registry/src/types.ts +++ b/packages/registry/src/types.ts @@ -1,6 +1,7 @@ import { TypedEventEmitter } from "./event-emitter"; import { z } from "zod"; import type { ArtifactStore } from "./artifacts"; +import type { FileInputSpec, ResolvedFileInput } from "./file-inputs"; // ── Service Types ── @@ -59,6 +60,15 @@ export type ServiceRef = string | { urn: string; options?: Record; } /** @@ -176,6 +186,13 @@ export interface ToolDefinition { * resolved branch's deps after `classifyDevice`. */ requires?: ToolDependency[]; + /** + * Args that name files/directories on the CALLER's machine. Surfaced through + * `GET /tools` so the client can wrap them for the file boundary, and + * resolved back to server-readable paths before zod validation. See + * `file-inputs.ts` for the wire contract and kind semantics. + */ + fileInputs?: FileInputSpec[]; /** Returns alias → URN or { urn, options }; registry resolves each and passes alias → API into execute. */ services: (params: TParams) => Record; execute(services: Record, params: TParams, ctx?: ToolContext): Promise; diff --git a/packages/tool-server/src/artifacts.ts b/packages/tool-server/src/artifacts.ts index 399c4c14..8a308f30 100644 --- a/packages/tool-server/src/artifacts.ts +++ b/packages/tool-server/src/artifacts.ts @@ -33,7 +33,7 @@ export { * tool's `execute` is called directly (bypassing the registry) without a * context — i.e. a misconfigured unit test, not a real invocation. */ -export function requireArtifacts(ctx?: ToolContext): ArtifactStore { +export function requireArtifacts(ctx?: Partial): ArtifactStore { if (!ctx?.artifacts) { throw new Error( "Artifact store missing from tool context. Invoke this tool via registry.invokeTool " + diff --git a/packages/tool-server/src/file-inputs.ts b/packages/tool-server/src/file-inputs.ts new file mode 100644 index 00000000..7ae030b6 --- /dev/null +++ b/packages/tool-server/src/file-inputs.ts @@ -0,0 +1,206 @@ +/** + * Server-side resolution of file-input wrappers — the INPUT half of the + * remote file boundary (the OUTPUT half is `artifacts.ts`). + * + * The client replaces args declared in a tool's `fileInputs` with + * {@link FileInputWire} wrappers (path + stat + optional base64 content). This + * module turns each wrapper back into a plain server-readable string *before* + * zod validation, so tools always execute against a local path: + * + * - co-located client (the common, unlinked case): the wrapper's path matches + * on this host's own filesystem and is used in place — zero copies, exactly + * mirroring the artifact materializer's gate on the client side. + * - remote client: `kind: "file"` content is materialized into a temp file; + * `kind: "directory"` fails with remote-mode guidance (a tree can't ride in + * a tool call); `kind: "probe"` passes through and only reports presence. + * + * Plain string args (older clients, direct invocations) pass through untouched, + * which is what keeps both halves of the version-skew matrix on today's + * behavior. + */ + +import { mkdtemp, rm, stat, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { basename, join } from "node:path"; +import { + isFileInputWire, + type FileInputSpec, + type FileInputWire, + type ResolvedFileInput, + type ToolDefinition, +} from "@argent/registry"; + +/** + * Decoded upload ceiling. Large enough for full-resolution PNG baselines and + * any flow YAML; small enough that a misbehaving client can't fill the temp + * dir through a single call. Must stay below the express.json body limit in + * `http.ts` (which bounds the base64-encoded request as a whole). + */ +const MAX_UPLOAD_BYTES = 32 * 1024 * 1024; + +/** Typed so the HTTP layer can map it to a 422 instead of a generic 500. */ +export class FileInputError extends Error {} + +export interface ResolveFileInputsResult { + /** The request body with every wrapper replaced by a plain path string. */ + args: Record; + /** Per-target outcomes, forwarded to the tool via `InvokeToolOptions.fileInputs`. */ + fileInputs: Record | undefined; + /** + * Removes every temp file this call materialized. Uploads are call-scoped — + * the tool reads them during execution and nothing may reference them after + * the response, so the caller must invoke this once the call settles. + * Always safe: a no-op when everything resolved in place, and removal + * failures are swallowed (cleanup must never affect the call's outcome). + */ + cleanup: () => Promise; +} + +/** + * True when the wrapper's path is usable on THIS host. `directory` only needs + * to exist as a directory and `probe` to exist at all (size/mtime are + * meaningless there); a `file` must match the client-recorded stat so a stale + * or unrelated file at the same path — or a remote host that merely mirrors + * the directory layout — falls through to the uploaded content instead of + * being read by accident. A same-stat match on a genuinely different machine + * means a synced checkout, where reading the server's identical copy is the + * intended outcome. + */ +async function probeHostPath(wire: FileInputWire, kind: FileInputSpec["kind"]): Promise { + try { + const st = await stat(wire.path); + if (kind === "directory") return st.isDirectory(); + if (kind === "probe") return true; + if (!st.isFile()) return false; + if (wire.size != null && st.size !== wire.size) return false; + if (wire.mtimeMs != null && Math.round(st.mtimeMs) !== Math.round(wire.mtimeMs)) return false; + return true; + } catch { + return false; + } +} + +function formatBytes(bytes: number | undefined): string { + if (bytes == null) return "unknown size"; + if (bytes < 1024 * 1024) return `${Math.ceil(bytes / 1024)} KB`; + return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; +} + +function sanitizeFilename(name: string): string { + const cleaned = name.replace(/[^A-Za-z0-9._-]/g, "_"); + return cleaned.length > 0 && cleaned !== "." && cleaned !== ".." ? cleaned : "upload"; +} + +/** Write uploaded content into a fresh OS temp dir; returns the file path and the dir to remove on cleanup. */ +async function materializeUpload(wire: FileInputWire): Promise<{ filePath: string; dir: string }> { + const data = Buffer.from(wire.content!, "base64"); + if (data.length > MAX_UPLOAD_BYTES) { + throw new FileInputError( + `Uploaded file "${wire.path}" is ${data.length} bytes — exceeds the ` + + `${MAX_UPLOAD_BYTES}-byte file-input limit.` + ); + } + // Integrity: a size recorded client-side that disagrees with the decoded + // bytes means the upload was truncated or mangled in transit — fail loudly + // rather than handing the tool a corrupt file. + if (wire.size != null && data.length !== wire.size) { + throw new FileInputError( + `Uploaded content for "${wire.path}" is ${data.length} bytes but the client ` + + `recorded ${wire.size} — refusing a truncated or corrupted upload.` + ); + } + const dir = await mkdtemp(join(tmpdir(), "argent-file-input-")); + const filePath = join(dir, sanitizeFilename(basename(wire.path))); + await writeFile(filePath, data); + return { filePath, dir }; +} + +async function resolveOne( + spec: FileInputSpec, + wire: FileInputWire, + tempDirs: string[] +): Promise<{ value: string; meta: ResolvedFileInput }> { + const meta: ResolvedFileInput = { + clientPath: wire.path, + presentOnHost: await probeHostPath(wire, spec.kind), + viaUpload: false, + }; + + if (spec.kind === "probe" || meta.presentOnHost) { + return { value: wire.path, meta }; + } + + if (spec.kind === "directory") { + throw new FileInputError( + `Directory "${wire.path}" does not exist on the tool-server host. ` + + `This tool reads a directory tree from the tool-server's filesystem, which cannot be ` + + `uploaded with the call — when the tool-server runs on a different machine, pass a ` + + `path that exists on that machine (e.g. the server-side checkout of the project).` + ); + } + + if (typeof wire.content === "string") { + const { filePath, dir } = await materializeUpload(wire); + tempDirs.push(dir); + return { value: filePath, meta: { ...meta, viaUpload: true } }; + } + + if (wire.contentOmitted === "size-limit") { + throw new FileInputError( + `File "${wire.path}" is ${formatBytes(wire.size)} — larger than the ` + + `${formatBytes(MAX_UPLOAD_BYTES)} file-input transfer limit, so the client did not ` + + `upload it, and it was not found on the tool-server host. Copy the file to the ` + + `tool-server machine and pass that path, or use a smaller file.` + ); + } + + throw new FileInputError( + `File "${wire.path}" was not found on the tool-server host and the client did not ` + + `upload its content. Either the file does not exist, or it changed since it was ` + + `referenced — re-create it (or re-run the producing tool) and try again.` + ); +} + +/** + * Replace every declared file-input wrapper in `body` with a plain + * server-readable path string. Returns the rewritten args plus per-target + * resolution metadata. Wrappers are only honored on declared targets — a + * wrapper anywhere else simply fails the tool's own schema validation, so + * clients can't smuggle uploads through undeclared params. + */ +export async function resolveFileInputs( + def: Pick, "fileInputs">, + body: unknown +): Promise { + const tempDirs: string[] = []; + const cleanup = async () => { + await Promise.all( + tempDirs.map((dir) => rm(dir, { recursive: true, force: true }).catch(() => {})) + ); + }; + + const specs = def.fileInputs; + if (!specs || specs.length === 0 || typeof body !== "object" || body === null) { + return { args: (body ?? {}) as Record, fileInputs: undefined, cleanup }; + } + + const args = { ...(body as Record) }; + let resolved: Record | undefined; + + try { + for (const spec of specs) { + const value = args[spec.target]; + if (!isFileInputWire(value)) continue; + const { value: path, meta } = await resolveOne(spec, value, tempDirs); + args[spec.target] = path; + resolved = { ...(resolved ?? {}), [spec.target]: meta }; + } + } catch (err) { + // A later spec failing must not leak the uploads already written for + // earlier ones — the caller never gets a result to clean up from. + await cleanup(); + throw err; + } + + return { args, fileInputs: resolved, cleanup }; +} diff --git a/packages/tool-server/src/http.ts b/packages/tool-server/src/http.ts index 242ae7ab..0b0d716d 100644 --- a/packages/tool-server/src/http.ts +++ b/packages/tool-server/src/http.ts @@ -1,5 +1,5 @@ import express, { Request, Response } from "express"; -import type { Registry } from "@argent/registry"; +import type { FileInputSpec, Registry, ResolvedFileInput } from "@argent/registry"; import { ToolNotFoundError } from "@argent/registry"; import { createIdleTimer } from "./utils/idle-timer"; import { DependencyMissingError, ensureDeps } from "./utils/check-deps"; @@ -8,6 +8,7 @@ import { getUpdateState, isUpdateNoteSuppressed, suppressUpdateNote } from "./ut import { buildUpdateNote } from "./update-utils"; import { createPreviewRouter } from "./preview"; import { makeArtifactRoute } from "./artifacts"; +import { FileInputError, resolveFileInputs } from "./file-inputs"; import { assertSupported, NotImplementedOnPlatformError, @@ -104,7 +105,10 @@ function extractHostname(host: string): string { export function createHttpApp(registry: Registry, options?: HttpAppOptions): HttpAppHandle { const app = express(); - app.use(express.json()); + // 48mb: file-input wrappers may inline base64 file content (saved PNG + // baselines, flow YAMLs) when the client is remote. Bounds the whole encoded + // request; the decoded per-file ceiling is enforced in file-inputs.ts. + app.use(express.json({ limit: "48mb" })); const idleTimer = createIdleTimer(options?.idleTimeoutMs ?? 0, options?.onIdle); @@ -222,6 +226,7 @@ export function createHttpApp(registry: Registry, options?: HttpAppOptions): Htt description: string; inputSchema: Record; outputHint?: string; + fileInputs?: FileInputSpec[]; alwaysLoad?: boolean; searchHint?: string; longRunning?: boolean; @@ -231,6 +236,7 @@ export function createHttpApp(registry: Registry, options?: HttpAppOptions): Htt inputSchema: def?.inputSchema ?? { type: "object", properties: {} }, }; if (def?.outputHint) entry.outputHint = def.outputHint; + if (def?.fileInputs && def.fileInputs.length > 0) entry.fileInputs = def.fileInputs; if (def?.alwaysLoad) entry.alwaysLoad = true; if (def?.searchHint) entry.searchHint = def.searchHint; if (def?.longRunning) entry.longRunning = true; @@ -254,9 +260,31 @@ export function createHttpApp(registry: Registry, options?: HttpAppOptions): Htt return; } - let parsedData = req.body; + // File boundary: turn any client file-input wrappers back into plain + // server-readable paths BEFORE schema validation, so the tool's zod + // schema only ever sees the string params it declares. 422 on a file + // that is reachable neither in place nor via uploaded content. + let bodyArgs = req.body; + let resolvedFileInputs: Record | undefined; + try { + const resolved = await resolveFileInputs(def, req.body); + bodyArgs = resolved.args; + resolvedFileInputs = resolved.fileInputs; + // Materialized uploads are call-scoped: remove them once the response + // settles, whichever way it ends (success, validation failure, tool + // error, or client abort). + res.once("close", () => void resolved.cleanup()); + } catch (err) { + if (err instanceof FileInputError) { + res.status(422).json({ error: err.message }); + return; + } + throw err; + } + + let parsedData = bodyArgs; if (def.zodSchema) { - const parseResult = def.zodSchema.safeParse(req.body); + const parseResult = def.zodSchema.safeParse(bodyArgs); if (!parseResult.success) { res.status(400).json({ error: parseResult.error.message }); return; @@ -320,6 +348,7 @@ export function createHttpApp(registry: Registry, options?: HttpAppOptions): Htt try { const data = await registry.invokeTool(name, parsedData, { signal: controller.signal, + ...(resolvedFileInputs ? { fileInputs: resolvedFileInputs } : {}), }); // Gate on `updateInstallable` (not `updateAvailable`) and advertise the // version the resolver would install — both account for the release-age policy. diff --git a/packages/tool-server/src/tools/flows/flow-add-step.ts b/packages/tool-server/src/tools/flows/flow-add-step.ts index b3359ae7..ec1162ed 100644 --- a/packages/tool-server/src/tools/flows/flow-add-step.ts +++ b/packages/tool-server/src/tools/flows/flow-add-step.ts @@ -1,6 +1,6 @@ import { z } from "zod"; import type { Registry, ToolDefinition } from "@argent/registry"; -import { getFlowPath, getActiveFlow, appendStep } from "./flow-utils"; +import { getActiveFlow, appendStepToActiveFlow, type FlowSavedTo } from "./flow-utils"; const zodSchema = z.object({ command: z.string().describe('MCP tool name (e.g. "tap", "screenshot", "launch-app")'), @@ -22,7 +22,7 @@ export function createFlowAddStepTool( registry: Registry ): ToolDefinition< z.infer, - { message: string; toolResult: unknown; flowFile: string } + { message: string; toolResult: unknown; flowFile: string; savedTo: FlowSavedTo } > { return { id: "flow-add-step", @@ -31,12 +31,11 @@ export function createFlowAddStepTool( services: () => ({}), async execute(_services, params) { const flowName = getActiveFlow(); - const filePath = getFlowPath(flowName); const args: Record = params.args ? JSON.parse(params.args) : {}; const toolResult = await registry.invokeTool(params.command, args); - const flowFile = await appendStep(filePath, { + const { flowFile, savedTo } = await appendStepToActiveFlow({ kind: "tool", name: params.command, args, @@ -47,6 +46,7 @@ export function createFlowAddStepTool( message: `Step added to "${flowName}" flow`, toolResult, flowFile, + savedTo, }; }, }; diff --git a/packages/tool-server/src/tools/flows/flow-finish-recording.ts b/packages/tool-server/src/tools/flows/flow-finish-recording.ts index a6d319d9..cb389cd7 100644 --- a/packages/tool-server/src/tools/flows/flow-finish-recording.ts +++ b/packages/tool-server/src/tools/flows/flow-finish-recording.ts @@ -1,7 +1,16 @@ import { z } from "zod"; import * as fs from "node:fs/promises"; import type { ToolDefinition } from "@argent/registry"; -import { getFlowPath, getActiveFlow, clearActiveFlow, parseFlow } from "./flow-utils"; +import { + getFlowPath, + getActiveFlow, + getRecordingSession, + clearActiveFlow, + clientFileDirective, + parseFlow, + serializeFlow, + type FlowSavedTo, +} from "./flow-utils"; const zodSchema = z.object({}); @@ -14,6 +23,7 @@ export const flowFinishRecordingTool: ToolDefinition< steps: number; summary: string[]; flowFile: string; + savedTo: FlowSavedTo; } > = { id: "flow-finish-recording", @@ -23,8 +33,21 @@ You can still edit the .yaml file directly afterwards to remove or reorder steps services: () => ({}), async execute(_services, _params) { const flowName = getActiveFlow(); - const filePath = getFlowPath(flowName); - const flowFile = await fs.readFile(filePath, "utf8"); + const session = getRecordingSession(); + + // Host mode re-reads the file so manual edits made during the recording + // survive into the summary; in client mode this host never has the file, + // so the in-memory copy is the truth and travels back in the directive. + const filePath = session?.filePath ?? getFlowPath(flowName); + let flowFile: string; + let savedTo: FlowSavedTo; + if (session?.persist === "client") { + flowFile = serializeFlow(session.flow); + savedTo = clientFileDirective(filePath, flowFile); + } else { + flowFile = await fs.readFile(filePath, "utf8"); + savedTo = filePath; + } const flow = parseFlow(flowFile); const summary = flow.steps.map((step, i) => { @@ -43,6 +66,7 @@ You can still edit the .yaml file directly afterwards to remove or reorder steps steps: flow.steps.length, summary, flowFile, + savedTo, }; }, }; diff --git a/packages/tool-server/src/tools/flows/flow-insert-echo.ts b/packages/tool-server/src/tools/flows/flow-insert-echo.ts index 23d90e10..e51cbbc0 100644 --- a/packages/tool-server/src/tools/flows/flow-insert-echo.ts +++ b/packages/tool-server/src/tools/flows/flow-insert-echo.ts @@ -1,6 +1,6 @@ import { z } from "zod"; import type { ToolDefinition } from "@argent/registry"; -import { getFlowPath, getActiveFlow, appendStep } from "./flow-utils"; +import { getActiveFlow, appendStepToActiveFlow, type FlowSavedTo } from "./flow-utils"; const zodSchema = z.object({ message: z.string().describe("Message to echo when the flow is replayed"), @@ -8,7 +8,7 @@ const zodSchema = z.object({ export const flowInsertEchoTool: ToolDefinition< z.infer, - { message: string; flowFile: string } + { message: string; flowFile: string; savedTo: FlowSavedTo } > = { id: "flow-add-echo", description: `Record an echo step in the active flow. Echo steps print a message when the flow is replayed — useful as labels between tool calls. @@ -18,9 +18,8 @@ Returns { message, flowFile }. Fails if no active flow recording is in progress. services: () => ({}), async execute(_services, params) { const flowName = getActiveFlow(); - const filePath = getFlowPath(flowName); - const flowFile = await appendStep(filePath, { + const { flowFile, savedTo } = await appendStepToActiveFlow({ kind: "echo", message: params.message, }); @@ -28,6 +27,7 @@ Returns { message, flowFile }. Fails if no active flow recording is in progress. return { message: `Echo added to "${flowName}" flow`, flowFile, + savedTo, }; }, }; diff --git a/packages/tool-server/src/tools/flows/flow-read-prerequisite.ts b/packages/tool-server/src/tools/flows/flow-read-prerequisite.ts index 3dce8bf4..be712a87 100644 --- a/packages/tool-server/src/tools/flows/flow-read-prerequisite.ts +++ b/packages/tool-server/src/tools/flows/flow-read-prerequisite.ts @@ -1,7 +1,8 @@ import { z } from "zod"; import * as fs from "node:fs/promises"; -import type { ToolDefinition } from "@argent/registry"; -import { getFlowPath, parseFlow, setActiveProjectRoot } from "./flow-utils"; +import type { FileInputSpec, ToolDefinition } from "@argent/registry"; +import { parseFlow } from "./flow-utils"; +import { resolveFlowFilePath } from "./flow-run"; const zodSchema = z.object({ name: z.string().describe('Name of the flow to inspect (e.g. "settings-explore")'), @@ -10,8 +11,19 @@ const zodSchema = z.object({ .describe( "Absolute path to the project root directory that contains `.argent/flows/.yaml`." ), + flow_file: z + .string() + .optional() + .describe( + "Path to the flow .yaml as readable by the tool-server. Internal — the argent client derives it from project_root and name automatically; leave unset." + ), }); +// Same boundary contract as flow-execute: the YAML is the agent's file. +const fileInputs: FileInputSpec[] = [ + { target: "flow_file", path: "${project_root}/.argent/flows/${name}.yaml", kind: "file" }, +]; + export const flowReadPrerequisiteTool: ToolDefinition< z.infer, { flow: string; executionPrerequisite: string } @@ -22,10 +34,10 @@ Returns the prerequisite description so you can verify the required state is met Use when you need to check what app/simulator state is required before executing a flow. Fails if the flow file does not exist in the .argent/flows/ directory.`, zodSchema, + fileInputs, services: () => ({}), async execute(_services, params) { - setActiveProjectRoot(params.project_root); - const filePath = getFlowPath(params.name); + const filePath = resolveFlowFilePath(params); const fileContent = await fs.readFile(filePath, "utf8"); const flow = parseFlow(fileContent); diff --git a/packages/tool-server/src/tools/flows/flow-run.ts b/packages/tool-server/src/tools/flows/flow-run.ts index a430b550..99a50fd6 100644 --- a/packages/tool-server/src/tools/flows/flow-run.ts +++ b/packages/tool-server/src/tools/flows/flow-run.ts @@ -1,7 +1,13 @@ import { z } from "zod"; import * as fs from "node:fs/promises"; -import type { Registry, ToolDefinition } from "@argent/registry"; -import { getFlowPath, parseFlow, setActiveProjectRoot, type FlowStep } from "./flow-utils"; +import type { FileInputSpec, Registry, ToolDefinition } from "@argent/registry"; +import { + assertSafeFlowName, + getFlowPath, + parseFlow, + setActiveProjectRoot, + type FlowStep, +} from "./flow-utils"; import { sleep } from "../../utils/timing"; const zodSchema = z.object({ @@ -11,6 +17,12 @@ const zodSchema = z.object({ .describe( "Absolute path to the project root directory that contains `.argent/flows/.yaml`." ), + flow_file: z + .string() + .optional() + .describe( + "Path to the flow .yaml as readable by the tool-server. Internal — the argent client derives it from project_root and name automatically; leave unset." + ), prerequisiteAcknowledged: z .boolean() .optional() @@ -19,6 +31,16 @@ const zodSchema = z.object({ ), }); +/** + * The flow YAML lives in the AGENT's project, so it crosses the boundary as a + * `file` input: read in place when this host can see it, materialized from the + * uploaded content when the tool-server is remote. Either way `flow_file` + * arrives as a path this process can read. + */ +const fileInputs: FileInputSpec[] = [ + { target: "flow_file", path: "${project_root}/.argent/flows/${name}.yaml", kind: "file" }, +]; + type StepResult = | { kind: "echo"; message: string } | { kind: "tool"; tool: string; result: unknown; outputHint?: string; args?: unknown } @@ -53,10 +75,10 @@ set to true, the tool returns a notice with the prerequisite instead of running. Use flow-read-prerequisite to inspect the prerequisite beforehand.`, longRunning: true, zodSchema, + fileInputs, services: () => ({}), async execute(_services, params) { - setActiveProjectRoot(params.project_root); - const filePath = getFlowPath(params.name); + const filePath = resolveFlowFilePath(params); const fileContent = await fs.readFile(filePath, "utf8"); const flow = parseFlow(fileContent); @@ -108,3 +130,20 @@ Use flow-read-prerequisite to inspect the prerequisite beforehand.`, }, }; } + +/** + * Prefer the boundary-resolved `flow_file` (always a path this host can read); + * fall back to deriving the path from project_root + name for callers that + * predate the file boundary. The name is validated in both branches so an + * unsafe name can't reach error messages or the legacy path join. + */ +export function resolveFlowFilePath(params: { + name: string; + project_root: string; + flow_file?: string; +}): string { + assertSafeFlowName(params.name); + if (params.flow_file) return params.flow_file; + setActiveProjectRoot(params.project_root); + return getFlowPath(params.name); +} diff --git a/packages/tool-server/src/tools/flows/flow-start-recording.ts b/packages/tool-server/src/tools/flows/flow-start-recording.ts index fb1ddf9d..cb67acff 100644 --- a/packages/tool-server/src/tools/flows/flow-start-recording.ts +++ b/packages/tool-server/src/tools/flows/flow-start-recording.ts @@ -1,13 +1,15 @@ import { z } from "zod"; import * as fs from "node:fs/promises"; -import type { ToolDefinition } from "@argent/registry"; +import type { FileInputSpec, ToolDefinition } from "@argent/registry"; import { getFlowsDir, getFlowPath, getActiveFlowOrNull, - setActiveFlow, setActiveProjectRoot, + startRecordingSession, + clientFileDirective, serializeFlow, + type FlowSavedTo, } from "./flow-utils"; const zodSchema = z.object({ @@ -24,14 +26,26 @@ const zodSchema = z.object({ ), }); +/** + * `project_root` is the AGENT's project. The probe tells us whether it exists + * on this host: when it does (co-located, or a synced checkout) the flow file + * is written here exactly as before; when it doesn't (remote tool-server) the + * recording is kept in memory and every mutating flow tool returns a + * client-write directive so the YAML lands in the agent's project instead of + * recreating the agent's directory layout on this host. + */ +const fileInputs: FileInputSpec[] = [ + { target: "project_root", path: "${project_root}", kind: "probe" }, +]; + export const flowStartRecordingTool: ToolDefinition< z.infer, - { message: string; previousFlow?: string; flowFile: string } + { message: string; previousFlow?: string; flowFile: string; savedTo: FlowSavedTo } > = { id: "flow-start-recording", description: `Start recording a new flow. Creates a .yaml file in the .argent/flows/ directory. Use when you want to capture a reusable sequence of device interactions for later replay. -Returns { message, flowFile } and optionally { previousFlow } if a prior recording was abandoned. +Returns { message, flowFile, savedTo } and optionally { previousFlow } if a prior recording was abandoned. Fails if the .argent/flows/ directory cannot be created or the flow file cannot be written. After starting, use flow-add-step to append tool calls — each step is executed @@ -41,21 +55,30 @@ to add labels. Call flow-finish-recording when done. If a recorded step turns out to be wrong, you can edit the .yaml file directly to remove or reorder steps.`, zodSchema, + fileInputs, services: () => ({}), - async execute(_services, params) { + async execute(_services, params, ctx) { setActiveProjectRoot(params.project_root); const previousFlow = getActiveFlowOrNull(); - const dir = getFlowsDir(); - await fs.mkdir(dir, { recursive: true }); - const filePath = getFlowPath(params.name); - const flowFile = serializeFlow({ - executionPrerequisite: params.executionPrerequisite, - steps: [], - }); - await fs.writeFile(filePath, flowFile, "utf8"); - setActiveFlow(params.name); + const flow = { executionPrerequisite: params.executionPrerequisite, steps: [] }; + const flowFile = serializeFlow(flow); + + // No probe (older client, direct invocation) means the caller shares this + // filesystem — the pre-boundary assumption — so host persistence stands. + const probe = ctx?.fileInputs?.project_root; + const persist = probe && !probe.presentOnHost ? "client" : "host"; + + let savedTo: FlowSavedTo; + if (persist === "host") { + await fs.mkdir(getFlowsDir(), { recursive: true }); + await fs.writeFile(filePath, flowFile, "utf8"); + savedTo = filePath; + } else { + savedTo = clientFileDirective(filePath, flowFile); + } + startRecordingSession(params.name, { persist, filePath, flow }); if (previousFlow && previousFlow !== params.name) { return { @@ -65,12 +88,14 @@ to remove or reorder steps.`, `Now recording "${params.name}".`, previousFlow, flowFile, + savedTo, }; } return { message: `Started recording "${params.name}" flow`, flowFile, + savedTo, }; }, }; diff --git a/packages/tool-server/src/tools/flows/flow-utils.ts b/packages/tool-server/src/tools/flows/flow-utils.ts index 6afd73e6..d4c28b8c 100644 --- a/packages/tool-server/src/tools/flows/flow-utils.ts +++ b/packages/tool-server/src/tools/flows/flow-utils.ts @@ -1,6 +1,7 @@ import * as path from "node:path"; import * as fs from "node:fs/promises"; import { stringify as yamlStringify, parse as yamlParse } from "yaml"; +import { CLIENT_FILE_MARKER, type ClientFileDirective } from "@argent/registry"; const FLOWS_DIR_NAME = path.join(".argent", "flows"); @@ -11,6 +12,32 @@ const FLOWS_DIR_NAME = path.join(".argent", "flows"); let activeFlowName: string | null = null; let activeProjectRoot: string | null = null; +/** + * Where the active recording's YAML is persisted: + * - `"host"` — this process writes `/.argent/flows/.yaml` + * directly (the original behavior; correct whenever the caller's + * project root is on this machine). + * - `"client"` — the caller's project root is NOT on this machine (remote + * tool-server). The flow lives in memory here and every mutating + * tool returns a {@link ClientFileDirective} so the *client* + * writes the YAML into the agent's project. + */ +export type FlowPersistMode = "host" | "client"; + +export interface RecordingSession { + persist: FlowPersistMode; + /** + * Absolute path of the flow file as the CALLER knows it. A real host path in + * "host" mode; in "client" mode it is only echoed back inside the directive + * (it names a file on the client's machine, never touched here). + */ + filePath: string; + /** In-memory flow content — authoritative in "client" mode. */ + flow: FlowFile; +} + +let recordingSession: RecordingSession | null = null; + export function setActiveProjectRoot(root: string): void { if (!path.isAbsolute(root)) { throw new Error( @@ -73,6 +100,23 @@ export function setActiveFlow(name: string): void { activeFlowName = name; } +/** Begin a recording session (replacing any abandoned one). */ +export function startRecordingSession(name: string, session: RecordingSession): void { + activeFlowName = name; + recordingSession = session; +} + +export function getRecordingSession(): RecordingSession | null { + return recordingSession; +} + +function requireRecordingSession(): RecordingSession { + if (!activeFlowName || !recordingSession) { + throw new Error("No active flow. Call flow-start-recording first."); + } + return recordingSession; +} + /** Returns the active flow name, or null if none is active. */ export function getActiveFlowOrNull(): string | null { return activeFlowName; @@ -87,6 +131,7 @@ export function getActiveFlow(): string { export function clearActiveFlow(): void { activeFlowName = null; + recordingSession = null; } // ── Types ──────────────────────────────────────────────────────────── @@ -184,3 +229,36 @@ export async function appendStep(filePath: string, step: FlowStep): Promise { + const session = requireRecordingSession(); + if (session.persist === "host") { + const flowFile = await appendStep(session.filePath, step); + session.flow = parseFlow(flowFile); + return { flowFile, savedTo: session.filePath, session }; + } + session.flow.steps.push(step); + const flowFile = serializeFlow(session.flow); + return { flowFile, savedTo: clientFileDirective(session.filePath, flowFile), session }; +} diff --git a/packages/tool-server/src/tools/profiler/native-profiler/native-profiler-analyze.ts b/packages/tool-server/src/tools/profiler/native-profiler/native-profiler-analyze.ts index 1051b82a..182fe142 100644 --- a/packages/tool-server/src/tools/profiler/native-profiler/native-profiler-analyze.ts +++ b/packages/tool-server/src/tools/profiler/native-profiler/native-profiler-analyze.ts @@ -10,6 +10,7 @@ import { ensureDeps } from "../../../utils/check-deps"; import type { NativeProfilerAnalyzeResult } from "../../../utils/ios-profiler/types"; import { analyzeNativeProfilerIos } from "./platforms/ios"; import { analyzeNativeProfilerAndroid } from "./platforms/android"; +import { requireArtifacts, type ArtifactHandle } from "../../../artifacts"; const zodSchema = z.object({ device_id: z @@ -22,9 +23,19 @@ const capability = { android: { emulator: true, device: true, unknown: true }, } as const; +/** + * Wire shape: `reportFile` leaves as an artifact handle (not a host path) so + * the client can materialize the full markdown report locally and the inline + * report's "Read the reportFile" instruction works wherever the tool-server + * runs — the exact pattern react-profiler-analyze already uses. + */ +type NativeProfilerAnalyzeToolResult = Omit & { + reportFile: ArtifactHandle | null; +}; + export const nativeProfilerAnalyzeTool: ToolDefinition< z.infer, - NativeProfilerAnalyzeResult + NativeProfilerAnalyzeToolResult > = { id: "native-profiler-analyze", capability, @@ -41,16 +52,25 @@ Fails if native-profiler-stop has not been called first to export trace data.`, services: (params) => ({ session: nativeProfilerSessionRef(resolveDevice(params.device_id)), }), - async execute(services, params) { + async execute(services, params, ctx) { const api = services.session as NativeProfilerSessionApi; const device = resolveDevice(params.device_id); assertSupported("native-profiler-analyze", capability, device); + let result: NativeProfilerAnalyzeResult; if (api.platform === "ios") { await ensureDeps(["xcrun"]); - return analyzeNativeProfilerIos(api); + result = await analyzeNativeProfilerIos(api); + } else { + await ensureDeps(["adb"]); + result = await analyzeNativeProfilerAndroid(api); } - await ensureDeps(["adb"]); - return analyzeNativeProfilerAndroid(api); + + return { + ...result, + reportFile: result.reportFile + ? await requireArtifacts(ctx).register(result.reportFile) + : null, + }; }, }; diff --git a/packages/tool-server/src/tools/profiler/react/react-profiler-component-source.ts b/packages/tool-server/src/tools/profiler/react/react-profiler-component-source.ts index aa2c6416..7f32a090 100644 --- a/packages/tool-server/src/tools/profiler/react/react-profiler-component-source.ts +++ b/packages/tool-server/src/tools/profiler/react/react-profiler-component-source.ts @@ -1,6 +1,6 @@ import { z } from "zod"; import { promises as fs } from "fs"; -import type { ToolDefinition } from "@argent/registry"; +import type { FileInputSpec, ToolDefinition } from "@argent/registry"; import { buildAstIndexWithDiagnostics } from "../../../utils/react-profiler/pipeline/06-resolve/ast-index"; const zodSchema = z.object({ @@ -8,6 +8,16 @@ const zodSchema = z.object({ project_root: z.string().describe("Absolute path to the RN project root"), }); +/** + * The AST lookup scans the whole project tree, which can't ride along in a + * tool call — so the boundary gates on the directory existing on this host. A + * remote caller whose checkout isn't mirrored here gets an actionable error + * instead of a silent empty index ("component not found" for everything). + */ +const fileInputs: FileInputSpec[] = [ + { target: "project_root", path: "${project_root}", kind: "directory" }, +]; + export const reactProfilerComponentSourceTool: ToolDefinition< z.infer, Record @@ -17,6 +27,7 @@ export const reactProfilerComponentSourceTool: ToolDefinition< Call this per-finding after react-profiler-analyze to inspect source before proposing a fix. Returns found: false if the component is not found in user-owned code (e.g. lives in node_modules).`, zodSchema, + fileInputs, services: () => ({}), async execute(_services, params) { const astIndex = await buildAstIndexWithDiagnostics(params.project_root); diff --git a/packages/tool-server/src/tools/screenshot-diff/index.ts b/packages/tool-server/src/tools/screenshot-diff/index.ts index 68cc9a36..0440c57d 100644 --- a/packages/tool-server/src/tools/screenshot-diff/index.ts +++ b/packages/tool-server/src/tools/screenshot-diff/index.ts @@ -1,16 +1,19 @@ import crypto from "node:crypto"; import fs from "fs/promises"; +import os from "node:os"; import path from "path"; import { z } from "zod"; import type { - InvokeToolOptions, + FileInputSpec, ServiceRef, + ToolContext, ToolCapability, ToolDefinition, } from "@argent/registry"; import { simulatorServerRef, type SimulatorServerApi } from "../../blueprints/simulator-server"; import { resolveDevice } from "../../utils/device-info"; import { httpScreenshot } from "../../utils/simulator-client"; +import { requireArtifacts, type ArtifactHandle } from "../../artifacts"; import { diffPngFiles } from "./screenshot-diff"; const zodSchema = z @@ -45,7 +48,13 @@ const zodSchema = z .enum(["Portrait", "LandscapeLeft", "LandscapeRight", "PortraitUpsideDown"]) .optional() .describe("Orientation override for live baseline/current captures."), - outputDir: z.string().min(1).describe("Directory where diff artifacts should be written."), + outputDir: z + .string() + .min(1) + .optional() + .describe( + "Directory where diff artifacts should be written. Optional — defaults to a temp directory; the diff images are returned in the result either way." + ), }) .strict(); @@ -53,8 +62,13 @@ type Params = z.infer; export interface ScreenshotDiffResult { summary: string; - diffPath?: string; - contextDiffPath?: string; + /** + * Artifact handles (not raw host paths): the client materializes the diff + * images to files on ITS machine, so the agent can open them — and the MCP + * adapter can inline `contextDiff` — even when the tool-server is remote. + */ + diffPath?: ArtifactHandle; + contextDiffPath?: ArtifactHandle; } type CaptureScreenshot = typeof httpScreenshot; @@ -64,6 +78,19 @@ const capability: ToolCapability = { android: { emulator: true, device: true, unknown: true }, }; +/** + * The saved PNGs live on the AGENT's machine (typically materialized there by + * an earlier full-res `screenshot` call), so both path params cross the file + * boundary as `file` inputs. `outputDir` is only probed: when the agent-chosen + * directory doesn't exist on this host (remote mode), the tool quietly falls + * back to its temp default rather than recreating an agent-side path here. + */ +const fileInputs: FileInputSpec[] = [ + { target: "baselinePath", path: "${baselinePath}", kind: "file", optional: true }, + { target: "currentPath", path: "${currentPath}", kind: "file", optional: true }, + { target: "outputDir", path: "${outputDir}", kind: "probe", optional: true }, +]; + export const screenshotDiffTool: ToolDefinition = { id: "screenshot-diff", description: `Compare two PNG screenshots and return a compact visual-diff summary. @@ -77,6 +104,7 @@ Fails if the input sources are invalid, PNG files cannot be read, outputDir cann "compare screenshots png diff visual UI changes UI regression visual regression screenshot diff changed regions text ocr live capture", zodSchema, capability, + fileInputs, services: (params): Record => ({ simulatorServer: simulatorServerRef(resolveDevice(params.udid)), }), @@ -88,12 +116,15 @@ Fails if the input sources are invalid, PNG files cannot be read, outputDir cann export async function executeScreenshotDiffTool( services: Record, params: Params, - options?: InvokeToolOptions, + options?: Partial, captureScreenshot: CaptureScreenshot = httpScreenshot ): Promise { + const outputDir = await resolveOutputDir(params, options); + const { baselinePath, currentPath } = await resolveInputPaths( services, params, + outputDir, options, captureScreenshot ); @@ -101,20 +132,51 @@ export async function executeScreenshotDiffTool( const result = await diffPngFiles({ baselinePath, currentPath, - outputDir: params.outputDir, + outputDir, }); + const artifacts = requireArtifacts(options); return { summary: result.summary, - ...(result.diffPath ? { diffPath: result.diffPath } : {}), - ...(result.contextDiffPath ? { contextDiffPath: result.contextDiffPath } : {}), + ...(result.diffPath + ? { diffPath: await artifacts.register(result.diffPath, { mimeType: "image/png" }) } + : {}), + ...(result.contextDiffPath + ? { + contextDiffPath: await artifacts.register(result.contextDiffPath, { + mimeType: "image/png", + }), + } + : {}), }; } +/** + * Where diff artifacts (and live-capture intermediates) are written on this + * host. An agent-supplied outputDir is honored when it is usable here — i.e. + * not flagged absent by the boundary probe (a remote client's local directory). + * Everything else gets a per-call temp dir; the diff images travel back as + * artifacts, so the directory's location no longer matters to the agent. + */ +async function resolveOutputDir(params: Params, options?: Partial): Promise { + const probe = options?.fileInputs?.outputDir; + if (params.outputDir && (probe === undefined || probe.presentOnHost)) { + return params.outputDir; + } + const dir = path.join( + os.tmpdir(), + "argent-screenshot-diff", + crypto.randomBytes(6).toString("hex") + ); + await fs.mkdir(dir, { recursive: true }); + return dir; +} + async function resolveInputPaths( services: Record, params: Params, - options: InvokeToolOptions | undefined, + outputDir: string, + options: Partial | undefined, captureScreenshot: CaptureScreenshot ): Promise<{ baselinePath: string; currentPath: string }> { validateInputSources(params); @@ -122,7 +184,7 @@ async function resolveInputPaths( const baselinePath = params.captureBaseline ? await captureLiveInput({ api: services.simulatorServer as SimulatorServerApi, - outputDir: params.outputDir, + outputDir, name: "baseline", rotation: params.rotation, signal: options?.signal, @@ -133,7 +195,7 @@ async function resolveInputPaths( const currentPath = params.captureCurrent ? await captureLiveInput({ api: services.simulatorServer as SimulatorServerApi, - outputDir: params.outputDir, + outputDir, name: "current", rotation: params.rotation, signal: options?.signal, diff --git a/packages/tool-server/src/tools/screenshot-diff/screenshot-diff-summary.ts b/packages/tool-server/src/tools/screenshot-diff/screenshot-diff-summary.ts index 35c644cf..6072d4dc 100644 --- a/packages/tool-server/src/tools/screenshot-diff/screenshot-diff-summary.ts +++ b/packages/tool-server/src/tools/screenshot-diff/screenshot-diff-summary.ts @@ -39,9 +39,10 @@ export function formatScreenshotDiffSummary(result: ScreenshotDiffSummaryInput): ); if (result.diffPath || result.contextDiffPath) { - lines.push(`- diff_images:`); - if (result.diffPath) lines.push(` - diff: ${result.diffPath}`); - if (result.contextDiffPath) lines.push(` - context: ${result.contextDiffPath}`); + // Reference the result fields instead of embedding the paths: the client + // rewrites `diffPath`/`contextDiffPath` to paths on ITS machine, and a raw + // server path inlined here would dangle when the tool-server runs remotely. + lines.push(`- diff_images: see diffPath (full size) and contextDiffPath in this result`); lines.push( ` - legend: green=pixel brighter in current, red=pixel darker in current, yellow rectangles outline changed regions` ); diff --git a/packages/tool-server/src/tools/workspace/gather-workspace-data.ts b/packages/tool-server/src/tools/workspace/gather-workspace-data.ts index 31ee84e5..05b5b12a 100644 --- a/packages/tool-server/src/tools/workspace/gather-workspace-data.ts +++ b/packages/tool-server/src/tools/workspace/gather-workspace-data.ts @@ -1,5 +1,5 @@ import { z } from "zod"; -import type { ToolDefinition } from "@argent/registry"; +import type { FileInputSpec, ToolDefinition } from "@argent/registry"; import { readWorkspaceSnapshot, type WorkspaceSnapshot } from "../../utils/workspace-reader"; const zodSchema = z.object({ @@ -8,6 +8,16 @@ const zodSchema = z.object({ .describe("Absolute path to the project root directory to inspect (e.g. /Users/dev/MyApp)"), }); +/** + * The snapshot reads dozens of files across the workspace tree, which can't + * ride along in a tool call — gate on the directory existing on this host so + * a remote caller gets an actionable error instead of the all-nulls snapshot + * readWorkspaceSnapshot returns for an unreadable path. + */ +const fileInputs: FileInputSpec[] = [ + { target: "workspacePath", path: "${workspacePath}", kind: "directory" }, +]; + export const gatherWorkspaceDataTool: ToolDefinition< z.infer, WorkspaceSnapshot @@ -32,6 +42,7 @@ Use when you need to inspect project configuration without manually reading mult Returns partial data if workspacePath does not exist or is not readable; missing items are represented as null or empty collections. Fails if the workspacePath is not an absolute path or the directory cannot be accessed.`, zodSchema, + fileInputs, services: () => ({}), async execute(_services, params) { return readWorkspaceSnapshot(params.workspacePath); diff --git a/packages/tool-server/src/utils/ios-profiler/render.ts b/packages/tool-server/src/utils/ios-profiler/render.ts index dce4b009..fa7aafdc 100644 --- a/packages/tool-server/src/utils/ios-profiler/render.ts +++ b/packages/tool-server/src/utils/ios-profiler/render.ts @@ -63,10 +63,13 @@ export async function renderNativeProfilerReport( const shownHotspots = Math.min(MAX_INLINE_HOTSPOTS, cpuHotspotsCount); const shownHangs = Math.min(MAX_INLINE_HANGS, uiHangsCount); + // Reference the result field rather than embedding the host path: the + // client materializes `reportFile` to a path on ITS machine, and the raw + // server path would dangle when the tool-server runs remotely. const report = wroteFile && reportFile ? inlineReport + - `\n\n> Full report: \`${reportFile}\` — ${bottlenecksTotal} bottleneck(s) total, showing top ${shownHotspots} CPU hotspots and top ${shownHangs} hangs inline. Use the Read tool to view all details.` + `\n\n> Full report saved — ${bottlenecksTotal} bottleneck(s) total, showing top ${shownHotspots} CPU hotspots and top ${shownHangs} hangs inline. Use the Read tool on the \`reportFile\` path in this result to view all details.` : inlineReport; return { diff --git a/packages/tool-server/test/file-inputs.test.ts b/packages/tool-server/test/file-inputs.test.ts new file mode 100644 index 00000000..b051a27f --- /dev/null +++ b/packages/tool-server/test/file-inputs.test.ts @@ -0,0 +1,249 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import * as fs from "node:fs/promises"; +import * as os from "node:os"; +import * as path from "node:path"; +import { FILE_INPUT_MARKER, type FileInputSpec } from "@argent/registry"; +import { resolveFileInputs, FileInputError } from "../src/file-inputs"; + +let tmpDir: string; + +beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "file-inputs-test-")); +}); + +afterEach(async () => { + await fs.rm(tmpDir, { recursive: true, force: true }); +}); + +function wire(overrides: Record) { + return { [FILE_INPUT_MARKER]: true, ...overrides }; +} + +const FILE_SPEC: FileInputSpec[] = [{ target: "input", path: "${input}", kind: "file" }]; + +describe("resolveFileInputs", () => { + it("passes plain-string args through untouched (legacy callers)", async () => { + const body = { input: "/some/path.png", other: 1 }; + const { args, fileInputs } = await resolveFileInputs({ fileInputs: FILE_SPEC }, body); + expect(args).toEqual(body); + expect(fileInputs).toBeUndefined(); + }); + + it("uses the wrapper path in place when it matches on this host", async () => { + const filePath = path.join(tmpDir, "input.png"); + await fs.writeFile(filePath, "png-bytes"); + const st = await fs.stat(filePath); + + const { args, fileInputs } = await resolveFileInputs( + { fileInputs: FILE_SPEC }, + { input: wire({ path: filePath, size: st.size, mtimeMs: st.mtimeMs }) } + ); + + expect(args.input).toBe(filePath); + expect(fileInputs).toEqual({ + input: { clientPath: filePath, presentOnHost: true, viaUpload: false }, + }); + }); + + it("falls back to uploaded content when the stat does not match", async () => { + const filePath = path.join(tmpDir, "input.png"); + await fs.writeFile(filePath, "stale"); + const content = Buffer.from("fresh client bytes"); + + const { args, fileInputs } = await resolveFileInputs( + { fileInputs: FILE_SPEC }, + { + input: wire({ + path: filePath, + size: content.length, + content: content.toString("base64"), + }), + } + ); + + expect(args.input).not.toBe(filePath); + expect(await fs.readFile(args.input as string, "utf8")).toBe("fresh client bytes"); + expect(fileInputs!.input).toMatchObject({ presentOnHost: false, viaUpload: true }); + }); + + it("materializes uploaded content for a path that does not exist here", async () => { + const clientPath = path.join(tmpDir, "not-here", "flow.yaml"); + const content = Buffer.from("steps: []\n"); + + const { args } = await resolveFileInputs( + { fileInputs: FILE_SPEC }, + { + input: wire({ + path: clientPath, + size: content.length, + content: content.toString("base64"), + }), + } + ); + + expect(await fs.readFile(args.input as string, "utf8")).toBe("steps: []\n"); + }); + + it("cleanup removes materialized uploads and is a no-op for in-place paths", async () => { + const inPlace = path.join(tmpDir, "in-place.png"); + await fs.writeFile(inPlace, "png-bytes"); + const st = await fs.stat(inPlace); + const content = Buffer.from("uploaded bytes"); + + const specs: FileInputSpec[] = [ + { target: "a", path: "${a}", kind: "file" }, + { target: "b", path: "${b}", kind: "file" }, + ]; + const { args, cleanup } = await resolveFileInputs( + { fileInputs: specs }, + { + a: wire({ path: inPlace, size: st.size, mtimeMs: st.mtimeMs }), + b: wire({ + path: "/client/only.png", + size: content.length, + content: content.toString("base64"), + }), + } + ); + + const materialized = args.b as string; + expect(await fs.readFile(materialized, "utf8")).toBe("uploaded bytes"); + + await cleanup(); + await expect(fs.stat(materialized)).rejects.toThrow(); + // The temp dir holding it is gone too, and double-cleanup is harmless. + await expect(fs.stat(path.dirname(materialized))).rejects.toThrow(); + await cleanup(); + // In-place inputs are the caller's files — never removed. + expect(await fs.readFile(inPlace, "utf8")).toBe("png-bytes"); + }); + + it("cleans up already-materialized uploads when a later spec fails", async () => { + const content = Buffer.from("bytes"); + const specs: FileInputSpec[] = [ + { target: "a", path: "${a}", kind: "file" }, + { target: "b", path: "${b}", kind: "file" }, + ]; + + const listInputTempDirs = async () => { + const entries = await fs.readdir(os.tmpdir()); + return entries.filter((e) => e.startsWith("argent-file-input-")); + }; + const before = await listInputTempDirs(); + + await expect( + resolveFileInputs( + { fileInputs: specs }, + { + a: wire({ + path: "/client/a.png", + size: content.length, + content: content.toString("base64"), + }), + b: wire({ path: path.join(tmpDir, "ghost.png") }), + } + ) + ).rejects.toThrow(FileInputError); + + expect(await listInputTempDirs()).toEqual(before); + }); + + it("rejects a missing file with no uploaded content", async () => { + await expect( + resolveFileInputs( + { fileInputs: FILE_SPEC }, + { input: wire({ path: path.join(tmpDir, "ghost.png") }) } + ) + ).rejects.toThrow(/was not found on the tool-server host/); + }); + + it("explains the transfer limit when content was omitted for size and the path is absent", async () => { + await expect( + resolveFileInputs( + { fileInputs: FILE_SPEC }, + { + input: wire({ + path: path.join(tmpDir, "huge.bin"), + size: 36 * 1024 * 1024, + mtimeMs: 1234, + contentOmitted: "size-limit", + }), + } + ) + ).rejects.toThrow(/36\.0 MB — larger than the 32\.0 MB file-input transfer limit/); + }); + + it("still resolves an oversize file in place when it matches on this host", async () => { + const filePath = path.join(tmpDir, "big-but-here.bin"); + await fs.writeFile(filePath, "stat-matched bytes"); + const st = await fs.stat(filePath); + + const { args, fileInputs } = await resolveFileInputs( + { fileInputs: FILE_SPEC }, + { + input: wire({ + path: filePath, + size: st.size, + mtimeMs: st.mtimeMs, + contentOmitted: "size-limit", + }), + } + ); + + expect(args.input).toBe(filePath); + expect(fileInputs!.input).toMatchObject({ presentOnHost: true, viaUpload: false }); + }); + + it("rejects an upload whose decoded size disagrees with the recorded size", async () => { + await expect( + resolveFileInputs( + { fileInputs: FILE_SPEC }, + { + input: wire({ + path: "/client/file.png", + size: 999, + content: Buffer.from("short").toString("base64"), + }), + } + ) + ).rejects.toThrow(/truncated or corrupted/); + }); + + it("directory kind resolves in place when the directory exists", async () => { + const spec: FileInputSpec[] = [{ target: "root", path: "${root}", kind: "directory" }]; + const { args, fileInputs } = await resolveFileInputs( + { fileInputs: spec }, + { root: wire({ path: tmpDir }) } + ); + expect(args.root).toBe(tmpDir); + expect(fileInputs!.root).toMatchObject({ presentOnHost: true }); + }); + + it("directory kind fails with remote-mode guidance when absent on this host", async () => { + const spec: FileInputSpec[] = [{ target: "root", path: "${root}", kind: "directory" }]; + await expect( + resolveFileInputs({ fileInputs: spec }, { root: wire({ path: path.join(tmpDir, "nope") }) }) + ).rejects.toThrow(/does not exist on the tool-server host/); + }); + + it("probe kind never fails and reports presence via metadata", async () => { + const spec: FileInputSpec[] = [{ target: "dir", path: "${dir}", kind: "probe" }]; + const ghost = path.join(tmpDir, "ghost-dir"); + + const present = await resolveFileInputs({ fileInputs: spec }, { dir: wire({ path: tmpDir }) }); + expect(present.args.dir).toBe(tmpDir); + expect(present.fileInputs!.dir).toMatchObject({ presentOnHost: true }); + + const absent = await resolveFileInputs({ fileInputs: spec }, { dir: wire({ path: ghost }) }); + expect(absent.args.dir).toBe(ghost); + expect(absent.fileInputs!.dir).toMatchObject({ presentOnHost: false }); + }); + + it("ignores wrappers on undeclared targets", async () => { + const body = { smuggled: wire({ path: "/etc/passwd" }) }; + const { args, fileInputs } = await resolveFileInputs({ fileInputs: FILE_SPEC }, body); + // Left untouched — the tool's own schema validation rejects the object. + expect(args.smuggled).toEqual(body.smuggled); + expect(fileInputs).toBeUndefined(); + }); +}); diff --git a/packages/tool-server/test/flows/flow-remote-recording.test.ts b/packages/tool-server/test/flows/flow-remote-recording.test.ts new file mode 100644 index 00000000..1d0f7ce2 --- /dev/null +++ b/packages/tool-server/test/flows/flow-remote-recording.test.ts @@ -0,0 +1,160 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import * as fs from "node:fs/promises"; +import * as os from "node:os"; +import * as path from "node:path"; +import type { Registry, ToolContext } from "@argent/registry"; +import { ArtifactStore, CLIENT_FILE_MARKER } from "@argent/registry"; + +import { flowStartRecordingTool } from "../../src/tools/flows/flow-start-recording"; +import { flowInsertEchoTool } from "../../src/tools/flows/flow-insert-echo"; +import { flowFinishRecordingTool } from "../../src/tools/flows/flow-finish-recording"; +import { createFlowAddStepTool } from "../../src/tools/flows/flow-add-step"; +import { createRunFlowTool } from "../../src/tools/flows/flow-run"; +import { flowReadPrerequisiteTool } from "../../src/tools/flows/flow-read-prerequisite"; +import { + clearActiveFlow, + clearActiveProjectRoot, + parseFlow, +} from "../../src/tools/flows/flow-utils"; + +/** + * Remote-mode flow behavior: the agent's project_root does NOT exist on this + * host (the boundary probe says presentOnHost: false), so recording stays in + * memory and every mutating tool returns a client-write directive instead of + * touching this host's disk. + */ + +// A path that exists on the (simulated) client but not on this "server". +const CLIENT_ROOT = path.join(os.tmpdir(), "definitely-not-on-this-host", "agent-project"); +const CLIENT_FLOW_PATH = path.join(CLIENT_ROOT, ".argent", "flows", "remote-flow.yaml"); + +function remoteCtx(): ToolContext { + return { + artifacts: new ArtifactStore(), + fileInputs: { + project_root: { clientPath: CLIENT_ROOT, presentOnHost: false, viaUpload: false }, + }, + }; +} + +function createMockRegistry(tools: Record = {}) { + return { + invokeTool: vi.fn(async (id: string) => { + const entry = tools[id]; + if (!entry) throw new Error(`Tool "${id}" not found`); + return entry.result; + }), + getTool: vi.fn(() => undefined), + } as unknown as Registry; +} + +beforeEach(() => { + clearActiveFlow(); +}); + +afterEach(async () => { + clearActiveFlow(); + clearActiveProjectRoot(); + await fs.rm(CLIENT_ROOT, { recursive: true, force: true }); +}); + +describe("flow recording with a remote client (probe miss)", () => { + it("start-recording returns a directive and writes nothing on this host", async () => { + const result = await flowStartRecordingTool.execute( + {}, + { name: "remote-flow", project_root: CLIENT_ROOT, executionPrerequisite: "Home" }, + remoteCtx() + ); + + expect(result.savedTo).toMatchObject({ + [CLIENT_FILE_MARKER]: true, + path: CLIENT_FLOW_PATH, + }); + const directive = result.savedTo as { content: string }; + expect(parseFlow(directive.content).executionPrerequisite).toBe("Home"); + // The agent's directory layout must not be recreated on the server host. + await expect(fs.stat(CLIENT_ROOT)).rejects.toThrow(); + }); + + it("add-step / add-echo accumulate in memory and return updated directives", async () => { + const registry = createMockRegistry({ tap: { result: { tapped: true } } }); + const addStep = createFlowAddStepTool(registry); + + await flowStartRecordingTool.execute( + {}, + { name: "remote-flow", project_root: CLIENT_ROOT, executionPrerequisite: "Home" }, + remoteCtx() + ); + + await flowInsertEchoTool.execute({}, { message: "label" }); + const stepResult = await addStep.execute({}, { command: "tap", args: '{"x":0.5}' }); + + const directive = stepResult.savedTo as { path: string; content: string }; + expect(directive.path).toBe(CLIENT_FLOW_PATH); + expect(parseFlow(directive.content).steps).toEqual([ + { kind: "echo", message: "label" }, + { kind: "tool", name: "tap", args: { x: 0.5 } }, + ]); + await expect(fs.stat(CLIENT_ROOT)).rejects.toThrow(); + }); + + it("finish-recording summarizes the in-memory flow and clears the session", async () => { + await flowStartRecordingTool.execute( + {}, + { name: "remote-flow", project_root: CLIENT_ROOT, executionPrerequisite: "Home" }, + remoteCtx() + ); + await flowInsertEchoTool.execute({}, { message: "only step" }); + + const result = await flowFinishRecordingTool.execute({}, {}); + + expect(result.steps).toBe(1); + expect(result.summary).toEqual(["1. echo: only step"]); + expect(result.path).toBe(CLIENT_FLOW_PATH); + expect(result.savedTo).toMatchObject({ [CLIENT_FILE_MARKER]: true }); + + await expect(flowFinishRecordingTool.execute({}, {})).rejects.toThrow("No active flow"); + }); +}); + +describe("flow replay with a boundary-resolved flow_file", () => { + it("flow-execute reads the resolved path instead of deriving from project_root", async () => { + // Simulates the server-side temp file the boundary materialized from the + // client's upload. + const uploaded = path.join(os.tmpdir(), `uploaded-flow-${Date.now()}.yaml`); + await fs.writeFile( + uploaded, + ["executionPrerequisite: ''", "steps:", " - echo: from upload", ""].join("\n") + ); + try { + const runFlow = createRunFlowTool(createMockRegistry()); + const result = await runFlow.execute( + {}, + { name: "remote-flow", project_root: CLIENT_ROOT, flow_file: uploaded } + ); + expect(result).toMatchObject({ + flow: "remote-flow", + steps: [{ kind: "echo", message: "from upload" }], + }); + } finally { + await fs.rm(uploaded, { force: true }); + } + }); + + it("flow-read-prerequisite reads the resolved path", async () => { + const uploaded = path.join(os.tmpdir(), `uploaded-prereq-${Date.now()}.yaml`); + await fs.writeFile( + uploaded, + ["executionPrerequisite: 'Device unlocked'", "steps: []", ""].join("\n") + ); + try { + const result = await flowReadPrerequisiteTool.execute( + {}, + { name: "remote-flow", project_root: CLIENT_ROOT, flow_file: uploaded } + ); + expect(result.executionPrerequisite).toBe("Device unlocked"); + } finally { + await fs.rm(uploaded, { force: true }); + } + }); +}); diff --git a/packages/tool-server/test/native-profiler-analyze-failure.test.ts b/packages/tool-server/test/native-profiler-analyze-failure.test.ts index 6fe6ce14..0b519fab 100644 --- a/packages/tool-server/test/native-profiler-analyze-failure.test.ts +++ b/packages/tool-server/test/native-profiler-analyze-failure.test.ts @@ -11,6 +11,7 @@ * (empty-but-readable) file so the existence guard passes. */ import { describe, it, expect, vi, beforeEach } from "vitest"; +import { ArtifactStore } from "@argent/registry"; import { join } from "node:path"; import { tmpdir } from "node:os"; import { mkdtemp, rm, writeFile } from "node:fs/promises"; @@ -142,7 +143,8 @@ describe("native-profiler-analyze: status + exportErrors envelope (Bug 4)", () = const result = await nativeProfilerAnalyzeTool.execute( { session }, - { device_id: "emulator-5554" } + { device_id: "emulator-5554" }, + { artifacts: new ArtifactStore() } ); expect(result.status).toBe("ok"); @@ -175,7 +177,8 @@ describe("native-profiler-analyze: status + exportErrors envelope (Bug 4)", () = const result = await nativeProfilerAnalyzeTool.execute( { session }, - { device_id: "emulator-5554" } + { device_id: "emulator-5554" }, + { artifacts: new ArtifactStore() } ); expect(result.status).toBe("analysis_failed"); @@ -225,7 +228,8 @@ describe("native-profiler-analyze: status + exportErrors envelope (Bug 4)", () = const result = await nativeProfilerAnalyzeTool.execute( { session }, - { device_id: "emulator-5554" } + { device_id: "emulator-5554" }, + { artifacts: new ArtifactStore() } ); expect(result.status).toBe("analysis_failed"); @@ -261,7 +265,8 @@ describe("native-profiler-analyze: status + exportErrors envelope (Bug 4)", () = const result = await nativeProfilerAnalyzeTool.execute( { session }, - { device_id: "emulator-5554" } + { device_id: "emulator-5554" }, + { artifacts: new ArtifactStore() } ); expect(result.status).toBe("analysis_failed"); diff --git a/packages/tool-server/test/native-profiler-missing-trace.test.ts b/packages/tool-server/test/native-profiler-missing-trace.test.ts index 67050525..405fa2d1 100644 --- a/packages/tool-server/test/native-profiler-missing-trace.test.ts +++ b/packages/tool-server/test/native-profiler-missing-trace.test.ts @@ -10,6 +10,7 @@ * has no findings" and surface the export failure via `exportErrors`. */ import { describe, it, expect, vi } from "vitest"; +import { ArtifactStore } from "@argent/registry"; import { join } from "node:path"; import { tmpdir } from "node:os"; import { mkdtemp, rm, writeFile } from "node:fs/promises"; @@ -70,7 +71,8 @@ describe("native-profiler-analyze: missing trace file", () => { const result = await nativeProfilerAnalyzeTool.execute( { session }, - { device_id: "TEST-DEVICE" } + { device_id: "TEST-DEVICE" }, + { artifacts: new ArtifactStore() } ); // Bug: previously this rendered "All clear" with no warning because diff --git a/packages/tool-server/test/screenshot-diff-tool.test.ts b/packages/tool-server/test/screenshot-diff-tool.test.ts index 0d02d78e..a6b96ffe 100644 --- a/packages/tool-server/test/screenshot-diff-tool.test.ts +++ b/packages/tool-server/test/screenshot-diff-tool.test.ts @@ -3,6 +3,7 @@ import os from "os"; import path from "path"; import { PNG } from "pngjs"; import { describe, expect, it, vi } from "vitest"; +import { ArtifactStore } from "@argent/registry"; import { executeScreenshotDiffTool, screenshotDiffTool } from "../src/tools/screenshot-diff"; describe("screenshotDiffTool", () => { @@ -66,13 +67,22 @@ describe("screenshotDiffTool", () => { currentPath, udid: "ABC", outputDir: dir, - } + }, + { artifacts: new ArtifactStore() } ); - expect(result).toEqual({ - summary: expect.stringContaining("Screenshot diff summary"), - diffPath: path.join(dir, "current-diff.png"), - contextDiffPath: path.join(dir, "current-context-diff.png"), + // Diff outputs leave as artifact handles so a remote client can download + // them; hostPath still points at the requested outputDir. + expect(result.summary).toContain("Screenshot diff summary"); + expect(result.diffPath).toMatchObject({ + __argentArtifact: true, + hostPath: path.join(dir, "current-diff.png"), + mimeType: "image/png", + }); + expect(result.contextDiffPath).toMatchObject({ + __argentArtifact: true, + hostPath: path.join(dir, "current-context-diff.png"), + mimeType: "image/png", }); expect(Object.keys(result).sort()).toEqual(["contextDiffPath", "diffPath", "summary"]); }); @@ -98,7 +108,7 @@ describe("screenshotDiffTool", () => { rotation: "LandscapeLeft", outputDir: dir, }, - { signal }, + { signal, artifacts: new ArtifactStore() }, captureScreenshot as never ); @@ -116,7 +126,9 @@ describe("screenshotDiffTool", () => { await expect(fs.stat(path.join(dir, liveCaptures[0]!))).resolves.toMatchObject({ size: expect.any(Number), }); - expect(result.diffPath).toBe(path.join(dir, `${liveBaseName}-diff.png`)); + expect(result.diffPath).toMatchObject({ + hostPath: path.join(dir, `${liveBaseName}-diff.png`), + }); }); it("falls back to the default scale when the full-resolution capture fails (Android framebuffer mismatch)", async () => { @@ -139,7 +151,7 @@ describe("screenshotDiffTool", () => { const result = await executeScreenshotDiffTool( { simulatorServer: { apiUrl: "http://localhost:4949" } }, { baselinePath, captureCurrent: true, udid: "ABC", outputDir: dir }, - {}, + { artifacts: new ArtifactStore() }, captureScreenshot as never ); @@ -189,13 +201,13 @@ describe("screenshotDiffTool", () => { await executeScreenshotDiffTool( { simulatorServer: { apiUrl: "http://localhost:4949" } }, { baselinePath, captureCurrent: true, udid: "ABC", outputDir: dir }, - {}, + { artifacts: new ArtifactStore() }, captureScreenshot as never ); await executeScreenshotDiffTool( { simulatorServer: { apiUrl: "http://localhost:4949" } }, { baselinePath, captureCurrent: true, udid: "ABC", outputDir: dir }, - {}, + { artifacts: new ArtifactStore() }, captureScreenshot as never );