upstash · up-ilter · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026 · Copilot
diff --git a/.changeset/tool-call-ids-and-results.md b/.changeset/tool-call-ids-and-results.md
@@ -0,0 +1,5 @@
+---
+"@upstash/box": minor
+---
+
+Add `toolCallId` to `tool-call` chunks and add a new `tool-result` chunk variant to the streaming `Chunk` union. This lets consumers reliably match results back to their originating call when an agent runs multiple tools in parallel, and removes the need to intercept `tool_result` from the `unknown` event variant.
diff --git a/packages/sdk/src/__tests__/box-agent-run.test.ts b/packages/sdk/src/__tests__/box-agent-run.test.ts
@@ -442,7 +442,7 @@ describe("box.agent.stream", () => {
     fetchMock.mockResolvedValueOnce(
       mockSSEResponse([
         { event: "run_start", data: { run_id: "r1" } },
-        { event: "tool", data: { name: "Write", input: { path: "/x" } } },
+        { event: "tool", data: { id: "toolu_1", name: "Write", input: { path: "/x" } } },
         { event: "text", data: { text: "done" } },
         { event: "done", data: {} },
       ]),
@@ -459,14 +459,92 @@ describe("box.agent.stream", () => {
 
     expect(tools).toHaveLength(1);
     expect(tools[0]!.name).toBe("Write");
-    const toolChunks = chunks.filter((c) => c.type === "tool-call");
+    const toolChunks = chunks.filter(
+      (c): c is Extract<Chunk, { type: "tool-call" }> => c.type === "tool-call",
+    );
     expect(toolChunks).toHaveLength(1);
+    expect(toolChunks[0]!.toolCallId).toBe("toolu_1");
+    expect(toolChunks[0]!.toolName).toBe("Write");
     const textChunks = chunks.filter(
       (c): c is Extract<Chunk, { type: "text-delta" }> => c.type === "text-delta",
     );
     expect(textChunks.map((c) => c.text)).toEqual(["done"]);
   });
 
+  it("matches parallel tool-call and tool-result chunks by id", async () => {
+    const { box, fetchMock } = await createTestBox();
+
+    fetchMock.mockResolvedValueOnce(
+      mockSSEResponse([
+        { event: "run_start", data: { run_id: "r1" } },
+        { event: "tool", data: { id: "toolu_a", name: "Read", input: { path: "/a" } } },
+        { event: "tool", data: { id: "toolu_b", name: "Read", input: { path: "/b" } } },
+        // Results arrive out of order — must still match by id.
+        {
+          event: "tool_result",
+          data: { tool_use_id: "toolu_b", output: "B contents", is_error: false },
+        },
+        {
+          event: "tool_result",
+          data: { tool_use_id: "toolu_a", output: "A contents", is_error: false },
+        },
+        { event: "done", data: {} },
+      ]),
+    );
+
+    const run = await box.agent.stream({ prompt: "read both" });
+    const chunks: Chunk[] = [];
+    for await (const chunk of run) {
+      chunks.push(chunk);
+    }
+
+    const calls = chunks.filter(
+      (c): c is Extract<Chunk, { type: "tool-call" }> => c.type === "tool-call",
+    );
+    const results = chunks.filter(
+      (c): c is Extract<Chunk, { type: "tool-result" }> => c.type === "tool-result",
+    );
+
+    expect(calls.map((c) => c.toolCallId)).toEqual(["toolu_a", "toolu_b"]);
+    // Out-of-order results must still be matchable by id.
+    expect(results.map((r) => r.toolCallId)).toEqual(["toolu_b", "toolu_a"]);
+
+    const resultsById = new Map(results.map((r) => [r.toolCallId, r.output]));
+    expect(resultsById.get("toolu_a")).toBe("A contents");
+    expect(resultsById.get("toolu_b")).toBe("B contents");
+  });
+
+  it("parses tool-result with fallback fields (id instead of tool_use_id, content instead of output)", async () => {
+    const { box, fetchMock } = await createTestBox();
+
+    fetchMock.mockResolvedValueOnce(
+      mockSSEResponse([
+        { event: "run_start", data: { run_id: "r1" } },
+        { event: "tool", data: { id: "t1", name: "Bash", input: { command: "ls" } } },
+        // Backend uses `id` instead of `tool_use_id`, and `content` instead of `output`
+        {
+          event: "tool_result",
+          data: { id: "t1", content: "file.txt", is_error: true },
+        },
+        { event: "done", data: {} },
+      ]),
+    );
+
+    const run = await box.agent.stream({ prompt: "test" });
+    const chunks: Chunk[] = [];
+    for await (const chunk of run) {
+      chunks.push(chunk);
+    }
+
+    const results = chunks.filter(
+      (c): c is Extract<Chunk, { type: "tool-result" }> => c.type === "tool-result",
+    );
+    expect(results).toHaveLength(1);
+    expect(results[0]!.toolCallId).toBe("t1");
+    expect(results[0]!.output).toBe("file.txt");
+    expect(results[0]!.isError).toBe(true);
+  });
+
   it("yields all chunk types in order", async () => {
     const { box, fetchMock } = await createTestBox();
 

diff --git a/packages/sdk/src/client.ts b/packages/sdk/src/client.ts
@@ -826,7 +826,11 @@ export class Box<TProvider = unknown> {
             break;
           }
           case "tool": {
-            options.onToolUse?.({ name: parsed.name, input: parsed.input });
+            options.onToolUse?.({
+              name: parsed.name,
+              input: parsed.input,
-              name: parsed.name,
-              input: parsed.input,
+              name: parsed.name ?? "",
+              input: parsed.input ?? {},
-              name: parsed.name,
-              input: parsed.input,
+              name: parsed.name ?? "",
+              input: parsed.input ?? {},
+              toolCallId: parsed.id ?? "",
+            });
             break;
           }
           case "done": {
@@ -980,10 +984,25 @@ export class Box<TProvider = unknown> {
           case "tool": {
             const chunk: Chunk = {
               type: "tool-call",
+              toolCallId: parsed.id ?? "",
               toolName: parsed.name ?? "",
               input: parsed.input ?? {},
             };
-            options.onToolUse?.({ name: parsed.name ?? "", input: parsed.input ?? {} });
+            options.onToolUse?.({
+              name: parsed.name ?? "",
+              input: parsed.input ?? {},
+              toolCallId: parsed.id ?? "",
+            });
+            return chunk;
+          }
+          case "tool_result": {
+            const chunk: Chunk = {
+              type: "tool-result",
+              toolCallId: parsed.tool_use_id ?? parsed.id ?? "",
+              toolName: parsed.name,
+              output: parsed.output ?? parsed.content,
+              isError: parsed.is_error,
+            };
             return chunk;
           }
           case "done": {

diff --git a/packages/sdk/src/types.ts b/packages/sdk/src/types.ts
@@ -385,7 +385,35 @@ export type Chunk =
   | { type: "start"; runId: string }
   | { type: "text-delta"; text: string }
   | { type: "reasoning"; text: string }
-  | { type: "tool-call"; toolName: string; input: Record<string, unknown> }
+  | {
+      type: "tool-call";
+      /**
+       * Stable identifier for this tool invocation. Use this to match a
+       * `tool-result` chunk back to its originating call when multiple tool
+       * calls are in flight in the same turn.
+       *
+       * May be an empty string for older agents that don't surface an ID.
+       */
+      toolCallId: string;
+      toolName: string;
+      input: Record<string, unknown>;
+    }
+  | {
+      type: "tool-result";
+      /**
+       * Identifier of the `tool-call` chunk this result corresponds to.
+       *
+       * May be an empty string when the backend does not provide an ID
+       * (e.g. older agents or the OpenCode provider).
+       */
+      toolCallId: string;
+      /** Name of the tool that produced this result, when known. */
+      toolName?: string;
+      /** Tool output payload. Shape is tool-specific. */
+      output: unknown;
+      /** True when the tool reported an error. */
+      isError?: boolean;
+    }
   | {
       type: "finish";
       output: string;
@@ -431,7 +459,7 @@ export interface StreamOptions<TProvider = unknown> {
   /** Timeout in milliseconds — aborts if exceeded */
   timeout?: number;
   /** Tool use callback — called when the agent invokes a tool (Read, Write, Bash, etc.) */
-  onToolUse?: (tool: { name: string; input: Record<string, unknown> }) => void;
+  onToolUse?: (tool: { name: string; input: Record<string, unknown>; toolCallId?: string }) => void;
 }
 
 /**
@@ -451,7 +479,7 @@ export interface RunOptions<T = undefined, TProvider = unknown> {
   /** Retries with exponential backoff on transient failures */
   maxRetries?: number;
   /** Tool use callback — called when the agent invokes a tool (Read, Write, Bash, etc.) */
-  onToolUse?: (tool: { name: string; input: Record<string, unknown> }) => void;
+  onToolUse?: (tool: { name: string; input: Record<string, unknown>; toolCallId?: string }) => void;
   /** Webhook — fire-and-forget, POST to URL on completion */
   webhook?: WebhookConfig;
 }