diff --git a/README.md b/README.md index 4032cdb..8634f67 100644 --- a/README.md +++ b/README.md @@ -60,20 +60,20 @@ You should see `morph_edit`, `warpgrep_codebase_search`, and `warpgrep_github_se ## Compaction -Context compression via the Morph Compact API. Runs automatically before each LLM call when the conversation exceeds a token threshold. +Context compression via the Morph Compact API. In current OpenCode 1.14.x releases, only OpenCode native compaction writes the persisted summary message that future turns and the sidebar use. This plugin handles that path by pre-compressing the selected history with Morph before OpenCode's native compaction model writes its summary. ### How it works -1. Before each LLM call, the plugin estimates the total characters in the conversation -2. If the estimate exceeds the threshold, older messages are compressed via the Morph Compact API (~250ms) -3. The compressed result is cached ("frozen") and reused on subsequent calls for prompt cache stability -4. Only the most recent user message is kept uncompacted +1. When OpenCode native compaction starts, the plugin adds Morph-aware instructions to the compaction prompt +2. The following `experimental.chat.messages.transform` hook receives the history OpenCode selected for compaction +3. Morph compresses that selected history to one summary message before OpenCode sends it to the compaction model +4. OpenCode then persists its normal compaction summary and emits `session.compacted` -The LLM receives compressed history + your latest prompt. The "Context: X tokens" number in the sidebar reflects the actual tokens sent (post-compaction). +The Morph toast means the compaction input was compressed for OpenCode. Seeing OpenCode native compaction immediately after the toast is expected; that is the mechanism that persists the summary. The "Context: X tokens" number in the sidebar is based on OpenCode's stored assistant token usage, so it updates after OpenCode finishes compaction and/or after the next assistant response, not at the instant the Morph toast appears. ### Configuring the compaction threshold -By default, compaction triggers at **70% of the model's context window**. You can override this with a fixed token limit: +For non-native transform calls, the plugin uses a default threshold of **70% of the model's context window**. With a 1M token model, that is roughly 700k estimated tokens. You can override this with a fixed token limit: ```bash # Compact when conversation exceeds 20,000 tokens @@ -97,13 +97,19 @@ grep "service=morph" ~/.local/share/opencode/log/*.log | grep -i compact When compaction fires, you'll see entries like: ``` -INFO service=morph First compaction: 2 messages (30137 chars), keeping 1 recent. Threshold crossed: 30178 >= 15000 -INFO service=morph Compact: 2 messages -> 2 frozen (15142 chars). Messages: 3 -> 3. Ratio: 45% kept (244ms) +INFO service=morph OpenCode native compaction triggered; Morph will pre-compress selected history and OpenCode will persist the summary. +INFO service=morph Native compaction: compressing 42 selected messages (210137 chars) before OpenCode writes its persisted summary. +INFO service=morph Native compaction: Morph compressed 42 messages -> 1 summary (15142 chars). Ratio: 20% kept (244ms) +INFO service=morph OpenCode native compaction completed; cleared Morph transient compaction state. ``` -You'll also see a toast notification in the OpenCode UI when compaction triggers. +You'll also see a toast notification in the OpenCode UI: -On subsequent LLM calls (before re-compaction is needed), you'll see: +``` +Prepared OpenCode compaction with Morph (20% kept) | 244ms +``` + +If OpenCode native compaction is not involved and a future OpenCode version calls `experimental.chat.messages.transform` before normal LLM turns, the plugin still has the older proactive path. In that path, subsequent LLM calls can show: ``` INFO service=morph Under threshold - reusing frozen block. Messages: 5 -> 5 diff --git a/index.test.ts b/index.test.ts index c37a166..14e5a6c 100644 --- a/index.test.ts +++ b/index.test.ts @@ -114,6 +114,8 @@ describe("packaged tool-selection instructions", () => { expect(content).toContain("warpgrep_github_search"); expect(content).toContain("MORPH_API_KEY"); expect(content).toContain("MORPH_COMPACT_TOKEN_LIMIT"); + expect(content).toContain("OpenCode native compaction"); + expect(content).toContain("sidebar"); expect(content).toContain("opencode.json"); }); }); @@ -963,6 +965,103 @@ describe("plugin runtime hooks", () => { ); expect(combined).toContain("Use write for brand new files."); }); + + test("native session compaction is pre-compressed by Morph while preserving OpenCode prompt anchoring", async () => { + const originalCompact = CompactClient.prototype.compact; + const logs: any[] = []; + const toasts: any[] = []; + let capturedCompactArgs: any; + + CompactClient.prototype.compact = async function (args: any) { + capturedCompactArgs = args; + return { + output: "condensed task summary", + messages: [{ role: "user", content: "condensed task summary" }], + usage: { + compression_ratio: 0.2, + processing_time_ms: 42, + input_tokens: 100, + output_tokens: 20, + }, + } as any; + }; + + try { + const { default: MorphPlugin } = await importPluginWithEnv({ + MORPH_API_KEY: "sk-test-key", + MORPH_COMPACT_TOKEN_LIMIT: undefined, + }); + + const input = makePluginInput("/tmp/morph-plugin") as any; + input.client = { + app: { + log: async ({ body }: any) => { + logs.push(body); + }, + }, + tui: { + showToast: async ({ body }: any) => { + toasts.push(body); + }, + }, + }; + + const hooks = await MorphPlugin(input); + const compactingOutput: { context: string[]; prompt?: string } = { + context: [], + }; + + await hooks["experimental.session.compacting"]?.( + { sessionID: "session-test" }, + compactingOutput, + ); + + expect(compactingOutput.prompt).toBeUndefined(); + expect(compactingOutput.context).toHaveLength(1); + expect(compactingOutput.context[0]).toContain( + "Morph compact plugin is active", + ); + expect(compactingOutput.context[0]).toContain("Morph-compressed history"); + + const output = { + messages: [ + makeTextMsg("msg-1", "user", "please refactor auth"), + makeTextMsg( + "msg-2", + "assistant", + "read src/auth.ts and found token storage", + ), + ], + }; + + await hooks["experimental.chat.messages.transform"]?.({}, output as any); + + expect(capturedCompactArgs.messages).toEqual([ + { role: "user", content: "please refactor auth" }, + { + role: "assistant", + content: "read src/auth.ts and found token storage", + }, + ]); + expect(capturedCompactArgs.preserveRecent).toBe(0); + expect(output.messages).toHaveLength(1); + expect(output.messages[0]!.parts[0]!.type).toBe("text"); + expect((output.messages[0]!.parts[0] as any).text).toContain( + "Morph-compressed conversation history", + ); + expect((output.messages[0]!.parts[0] as any).text).toContain( + "condensed task summary", + ); + expect(toasts[0]!.message).toContain( + "Prepared OpenCode compaction with Morph", + ); + expect(logs.some((entry) => entry.message.includes("persisted summary"))).toBe( + true, + ); + } finally { + CompactClient.prototype.compact = originalCompact; + } + }); }); describe("ToolContext path resolution", () => { diff --git a/index.ts b/index.ts index 0190aa7..dd6355f 100644 --- a/index.ts +++ b/index.ts @@ -127,6 +127,24 @@ let compactionState: { frozenChars: number; } | null = null; +const NATIVE_COMPACTION_ARM_TTL_MS = 30_000; +const nativeCompactionArms: number[] = []; + +function armNativeCompaction() { + nativeCompactionArms.push(Date.now()); +} + +function consumeNativeCompactionArm(): boolean { + const now = Date.now(); + + while (nativeCompactionArms.length > 0) { + const armedAt = nativeCompactionArms.shift()!; + if (now - armedAt <= NATIVE_COMPACTION_ARM_TTL_MS) return true; + } + + return false; +} + /** * Normalize code_edit input from LLM tool calls. * @@ -215,6 +233,47 @@ function estimateTotalChars( return total; } +function formatCompressionPercent(result: CompactResult): number { + return Math.round(result.usage.compression_ratio * 100); +} + +function compactResultText(result: CompactResult): string { + const output = result.output?.trim(); + if (output) return output; + + return result.messages + .map((message) => `[${message.role}] ${message.content}`) + .join("\n\n") + .trim(); +} + +function buildCompactedSummaryMessages( + originalMessages: { info: Message; parts: Part[] }[], + result: CompactResult, +): { info: Message; parts: Part[] }[] { + if (originalMessages.length === 0) return []; + + const template = originalMessages[0]!; + const last = originalMessages[originalMessages.length - 1]!; + const summary = compactResultText(result); + + if (!summary) return []; + + return [ + { + info: { ...template.info } as Message, + parts: [ + { + id: `morph-compact-summary-${template.info.id}-${last.info.id}`, + sessionID: template.info.sessionID, + messageID: template.info.id, + type: "text" as const, + text: `Morph-compressed conversation history:\n\n${summary}`, + } as TextPart, + ], + }, + ]; +} function resolveSessionFilepath( targetFilepath: string, @@ -1225,6 +1284,17 @@ Get your API key at: https://morphllm.com/dashboard/api-keys`; }; if (MORPH_COMPACT_ENABLED) { + hooks.event = async ({ event }: any) => { + if (event.type !== "session.compacted") return; + + compactionState = null; + nativeCompactionArms.length = 0; + await log( + "info", + "OpenCode native compaction completed; cleared Morph transient compaction state.", + ); + }; + // Capture model context window from chat.params (fires every LLM call) hooks["chat.params"] = async (input: any) => { //chat.params CALLED: model=${input.model?.id}, context=${input.model?.limit?.context}`); @@ -1233,14 +1303,68 @@ Get your API key at: https://morphllm.com/dashboard/api-keys`; } }; - // Compaction: compress older messages via Morph, then FREEZE the result. - // The frozen block is reused byte-for-byte on every subsequent call, - // preserving the LLM provider's prompt prefix cache. - // Re-compaction only fires when the threshold is crossed again. + // Compaction: current OpenCode calls this during native session compaction. + // When armed by experimental.session.compacting, Morph reduces the selected + // history to a single summary before OpenCode writes its persisted summary. + // The fallback path below preserves the older proactive/frozen behavior for + // OpenCode versions that call this hook before normal LLM turns. hooks["experimental.chat.messages.transform"] = async (_input: any, output: any) => { if (!MORPH_API_KEY) return; const messages = output.messages; + const nativeCompaction = consumeNativeCompactionArm(); + + if (nativeCompaction) { + const compactInput = messagesToCompactInput(messages); + if (compactInput.length === 0) { + await log( + "debug", + "Native compaction: selected history had no compactable text; using OpenCode compaction unchanged.", + ); + return; + } + + await log( + "info", + `Native compaction: compressing ${messages.length} selected messages (${estimateTotalChars(messages)} chars) before OpenCode writes its persisted summary.`, + ); + + try { + const result = await compactClient!.compact({ + messages: compactInput, + compressionRatio: COMPACT_RATIO, + preserveRecent: 0, + }); + + const compacted = buildCompactedSummaryMessages(messages, result); + if (compacted.length === 0) { + await log( + "warn", + "Native compaction: Morph returned an empty summary; using OpenCode compaction unchanged.", + ); + return; + } + + const compactedChars = estimateTotalChars(compacted); + output.messages = compacted; + compactionState = null; + + await log( + "info", + `Native compaction: Morph compressed ${messages.length} messages -> ${compacted.length} summary (${compactedChars} chars). Ratio: ${formatCompressionPercent(result)}% kept (${result.usage.processing_time_ms}ms)`, + ); + await showToast( + "success", + `Prepared OpenCode compaction with Morph (${formatCompressionPercent(result)}% kept) | ${result.usage.processing_time_ms}ms`, + ); + } catch (err) { + await log( + "warn", + `Native compaction: Morph compact failed: ${(err as Error).message}. Using OpenCode compaction unchanged.`, + ); + } + return; + } // Approximate char threshold — fixed token limit takes precedence const charThreshold = COMPACT_TOKEN_LIMIT @@ -1317,11 +1441,11 @@ Get your API key at: https://morphllm.com/dashboard/api-keys`; await log( "info", - `Compact (re): ${toCompact.length} messages → ${frozen.length} frozen (${frozenChars} chars). Messages: ${beforeLen} → ${output.messages.length}. Ratio: ${Math.round(result.usage.compression_ratio * 100)}% kept (${result.usage.processing_time_ms}ms)`, + `Compact (re): ${toCompact.length} messages -> ${frozen.length} frozen (${frozenChars} chars). Messages: ${beforeLen} -> ${output.messages.length}. Ratio: ${formatCompressionPercent(result)}% kept (${result.usage.processing_time_ms}ms)`, ); await showToast( "success", - `${toCompact.length} messages re-compacted (${Math.round(result.usage.compression_ratio * 100)}% kept) | ${result.usage.processing_time_ms}ms`, + `${toCompact.length} messages re-compacted (${formatCompressionPercent(result)}% kept) | ${result.usage.processing_time_ms}ms`, ); } catch (err) { // On failure, use stale frozen block + uncompacted as best-effort @@ -1378,11 +1502,11 @@ Get your API key at: https://morphllm.com/dashboard/api-keys`; //Compact done: ${toCompact.length} msgs → ${frozen.length} frozen (${frozenChars} chars). Messages: ${beforeLen} → ${output.messages.length}`); await log( "info", - `Compact: ${toCompact.length} messages → ${frozen.length} frozen (${frozenChars} chars). Messages: ${beforeLen} → ${output.messages.length}. Ratio: ${Math.round(result.usage.compression_ratio * 100)}% kept (${result.usage.processing_time_ms}ms)`, + `Compact: ${toCompact.length} messages -> ${frozen.length} frozen (${frozenChars} chars). Messages: ${beforeLen} -> ${output.messages.length}. Ratio: ${formatCompressionPercent(result)}% kept (${result.usage.processing_time_ms}ms)`, ); await showToast( "success", - `${toCompact.length} messages compacted (${Math.round(result.usage.compression_ratio * 100)}% kept) | ${result.usage.processing_time_ms}ms`, + `${toCompact.length} messages compacted (${formatCompressionPercent(result)}% kept) | ${result.usage.processing_time_ms}ms`, ); } catch (err) { //Compact FAILED: ${(err as Error).message}\n${(err as Error).stack}`); @@ -1393,11 +1517,25 @@ Get your API key at: https://morphllm.com/dashboard/api-keys`; } }; - // When OpenCode's native compaction triggers, log it + // OpenCode's native compaction is the only path that writes a persisted + // summary message in current OpenCode versions. Morph compresses the + // selected history before that summary model call. hooks["experimental.session.compacting"] = async (_input: any, output: any) => { - await log("debug", "OpenCode native compaction triggered"); + if (!MORPH_API_KEY) { + output.context.push( + "Note: Morph compact plugin is installed, but MORPH_API_KEY is not configured.", + ); + return; + } + + armNativeCompaction(); output.context.push( - "Note: Morph compact plugin is active. Older messages may already be compressed.", + "Morph compact plugin is active. The selected conversation history will be pre-compressed before this native compaction summary is generated. Treat any Morph-compressed history as authoritative and preserve concrete facts, file paths, commands, errors, constraints, decisions, and remaining work.", + ); + + await log( + "info", + "OpenCode native compaction triggered; Morph will pre-compress selected history and OpenCode will persist the summary.", ); }; }