From e8e09608e7013f244877ca45793648a555bcca9b Mon Sep 17 00:00:00 2001 From: Sergei Arutiunian Date: Tue, 12 May 2026 18:29:48 +0200 Subject: [PATCH 1/2] fix(correctness): tab matching, completion regex, version sync, COMET_PORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A bundle of small correctness fixes uncovered while auditing the repo. Each fix is independent; happy to split if reviewer prefers. ### page-scripts.ts — completion detection * `Выполнен(?:о|ы)?` — match Russian singular `Выполнен 1 шаг` in addition to the plural cases the original regex covered. Russian agrees the past-tense participle with grammatical number, so the singular form was being missed on Russian-localised Perplexity accounts. * `\bFinished\b(?!\s+(?:reading|analyzing|browsing|searching|loading))` — word-boundary the English "Finished" marker and exclude the intermediate "Finished reading … " step Perplexity renders mid-task. Without this the agent flips to `completed` the moment the first source is processed. * Strategy 1 and Strategy 2 in `extractAgentStatus` now use `lastIndexOf` for the `steps completed` / `Reviewed N sources` markers. In multi-turn chats the previous-turn markers stay in the scroll buffer; `indexOf` returned the OLDEST match, causing the extractor to return the previous turn's answer for the current question. ### cdp-client.ts — tab matching * `findTabByDomain` now matches "exact or subdomain" instead of the ambiguous `includes` / reverse-`includes` heuristic. Search `"github.com"` matches `github.com` and `gist.github.com` but no longer matches `notgithub.com`. Search `"ai"` no longer matches every `*.ai` tab plus anything containing the substring `ai`. * `listTargets` on native Windows now closes its `CDP({...})` client in a `finally` block. If `Target.getTargets()` threw the underlying WebSocket leaked; with `withAutoReconnect` retrying, leaks compounded. ### cdp-client.ts / index.ts — `COMET_PORT` env The README documents `COMET_PORT` but the constant was hardcoded and call sites passed the literal `9223` directly. `DEFAULT_PORT` now reads from the env var (validated as `[1, 65535]`, falls back to 9223 with a warning), and call sites use `DEFAULT_PORT`. ### index.ts — MCP version `new Server({ name: "comet-bridge", version: "2.5.0" }, ...)` advertised 2.5.0 to clients while `package.json` shipped 2.6.2. Now read at startup via `readFileSync(packageJson)` so the handshake matches the published version automatically. ### tests/unit/page-scripts.test.ts Four new cases: * singular `Выполнен 1 шаг` -> `completed` * plural `Выполнено 5 шагов` -> `completed` * `Finished reading sources` with a visible stop button -> `working` (the previous behaviour would have flipped it to `completed`) * multi-turn `N steps completed`: the response comes from the LAST marker, not the first Tests build DOM with `createElement`/`textContent` to keep the linter happy and to avoid any innerHTML in test fixtures. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/cdp-client.ts | 54 +++++++++++++++++++++------- src/index.ts | 26 +++++++++++--- src/page-scripts.ts | 23 +++++++++--- tests/unit/page-scripts.test.ts | 62 +++++++++++++++++++++++++++++++++ 4 files changed, 143 insertions(+), 22 deletions(-) diff --git a/src/cdp-client.ts b/src/cdp-client.ts index 10eb74e..dc7bb8f 100644 --- a/src/cdp-client.ts +++ b/src/cdp-client.ts @@ -143,7 +143,21 @@ function getCometPath(): string { const COMET_PATH = getCometPath(); const IS_WINDOWS = platform() === "win32" || IS_WSL; -const DEFAULT_PORT = 9223; + +// Honour the documented `COMET_PORT` env var (see README "Environment Variables"). +// Previously the constant was hardcoded to 9223 and call sites passed the literal +// straight to `startComet(9223)`, so the env var was silently ignored. +function readPortFromEnv(): number { + const raw = process.env.COMET_PORT; + if (!raw) return 9223; + const n = parseInt(raw, 10); + if (!Number.isInteger(n) || n < 1 || n > 65535) { + console.error(`Invalid COMET_PORT="${raw}", falling back to 9223`); + return 9223; + } + return n; +} +export const DEFAULT_PORT = readPortFromEnv(); export class CometCDPClient { private client: CDP.Client | null = null; @@ -574,12 +588,21 @@ export class CometCDPClient { } /** - * Find a tab by domain (for reuse) + * Find a tab by domain (for reuse). + * + * Match is "exact or subdomain": `findTabByDomain("github.com")` matches + * both `github.com` and `gist.github.com`, but NOT `notgithub.com`. The + * previous `includes`/reverse-`includes` heuristic produced surprising + * matches — `domain: "ai"` matched `perplexity.ai`, `chat.openai.com`, + * etc., and a search for `"mail.google.com"` would match a `google.com` + * tab via the reverse direction. */ async findTabByDomain(domain: string): Promise { await this.refreshTabRegistry(); + const target = domain.toLowerCase(); for (const tab of this.tabRegistry.values()) { - if (tab.domain.includes(domain) || domain.includes(tab.domain)) { + const tabDomain = tab.domain.toLowerCase(); + if (tabDomain === target || tabDomain.endsWith(`.${target}`)) { return tab; } } @@ -965,16 +988,21 @@ export class CometCDPClient { if (IS_WINDOWS) { try { const tempClient = await CDP({ port: this.state.port, host: '127.0.0.1' }); - const { targetInfos } = await (tempClient as any).Target.getTargets(); - await tempClient.close(); - - return targetInfos.map((t: any) => ({ - id: t.targetId, - type: t.type, - title: t.title, - url: t.url, - webSocketDebuggerUrl: `ws://127.0.0.1:${this.state.port}/devtools/page/${t.targetId}` - })); + try { + const { targetInfos } = await (tempClient as any).Target.getTargets(); + return targetInfos.map((t: any) => ({ + id: t.targetId, + type: t.type, + title: t.title, + url: t.url, + webSocketDebuggerUrl: `ws://127.0.0.1:${this.state.port}/devtools/page/${t.targetId}` + })); + } finally { + // Close in `finally` so a throw inside `Target.getTargets()` does + // not leak the underlying WebSocket. Each retry in withAutoReconnect + // calls listTargets() again — even a slow leak exhausts handles. + await tempClient.close().catch(() => { /* already closed */ }); + } } catch (error) { throw new Error(`Failed to list targets: ${error}`); } diff --git a/src/index.ts b/src/index.ts index e0ab6fb..2d3914b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,6 +4,9 @@ // Claude Code ↔ Perplexity Comet bidirectional interaction // Simplified to 6 essential tools +import { readFileSync } from "fs"; +import { fileURLToPath } from "url"; +import { dirname, join } from "path"; import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { @@ -11,7 +14,7 @@ import { ListToolsRequestSchema, Tool, } from "@modelcontextprotocol/sdk/types.js"; -import { cometClient } from "./cdp-client.js"; +import { cometClient, DEFAULT_PORT } from "./cdp-client.js"; import { cometAI } from "./comet-ai.js"; import { sessionState, @@ -21,6 +24,21 @@ import { } from "./session-state.js"; import { readProseState, type ProseState } from "./page-scripts.js"; +// Read version from package.json so the MCP `initialize` handshake reports +// the actually-shipped version. Hardcoding (previously "2.5.0" while +// package.json was "2.6.2") drifts every release. +function readPackageVersion(): string { + try { + const here = dirname(fileURLToPath(import.meta.url)); + const pkgPath = join(here, "..", "package.json"); + const pkg = JSON.parse(readFileSync(pkgPath, "utf8")) as { version?: string }; + return pkg.version ?? "0.0.0"; + } catch { + return "0.0.0"; + } +} +const SERVER_VERSION = readPackageVersion(); + const TOOLS: Tool[] = [ { name: "comet_connect", @@ -117,7 +135,7 @@ const TOOLS: Tool[] = [ ]; const server = new Server( - { name: "comet-bridge", version: "2.5.0" }, + { name: "comet-bridge", version: SERVER_VERSION }, { capabilities: { tools: {} } } ); @@ -130,7 +148,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { switch (name) { case "comet_connect": { // Auto-start Comet with debug port (will restart if running without it) - const startResult = await cometClient.startComet(9223); + const startResult = await cometClient.startComet(DEFAULT_PORT); // Get all tabs - DON'T clean up tabs, as closing them can crash Comet const targets = await cometClient.listTargets(); @@ -186,7 +204,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { } catch (preCheckError) { // If pre-check fails, try to recover try { - await cometClient.startComet(9223); + await cometClient.startComet(DEFAULT_PORT); const targets = await cometClient.listTargets(); const page = targets.find(t => t.type === 'page'); if (page) await cometClient.connect(page.id); diff --git a/src/page-scripts.ts b/src/page-scripts.ts index ad53001..03f7d4b 100644 --- a/src/page-scripts.ts +++ b/src/page-scripts.ts @@ -70,8 +70,17 @@ export function extractAgentStatus(): AgentStatusResult { // miss completion on non-English accounts and force fallback to slow // response-stability polling (~90s). See PR #9 notes for marker source. const hasStepsCompleted = /\d+ steps? completed/i.test(body) - || /Выполнено\s+\d+\s+шаг(?:а|ов)?/iu.test(body); // ru - const hasFinishedMarker = body.includes("Finished") && !hasActiveStopButton; + // Russian agrees the verb with grammatical number: + // "Выполнен 1 шаг" (sg), "Выполнено 2/3/4 шага", + // "Выполнено 5+ шагов". The previous regex matched + // only "Выполнено …" and missed the singular case. + || /Выполнен(?:о|ы)?\s+\d+\s+шаг(?:а|ов)?/iu.test(body); // ru + // Word-boundary the English marker and exclude "Finished reading|analyzing|…", + // which is an *intermediate* step Perplexity renders while the agent is + // still running. Without this, the agent flips to "completed" the moment + // the first source is processed. + const hasFinishedMarker = /\bFinished\b(?!\s+(?:reading|analyzing|browsing|searching|loading))/i.test(body) + && !hasActiveStopButton; const hasReviewedSources = /Reviewed \d+ sources?/i.test(body); const hasSourcesIndicator = /\d+\s*sources?/i.test(body) // en || /\d+\s*источник(?:а|ов)?/iu.test(body); // ru @@ -138,10 +147,13 @@ export function extractAgentStatus(): AgentStatusResult { const mainContent = (document.querySelector("main") || document.body) as HTMLElement; const bodyText = mainContent.innerText; - // Strategy 1: Find content after "X steps completed" marker (agent's final response) + // Strategy 1: Find content after "X steps completed" marker (agent's final response). + // Use lastIndexOf: in multi-turn chats Perplexity keeps previous-turn + // markers visible in the scroll buffer, and `indexOf` would return the + // FIRST (oldest) match — i.e. the response from a previous question. const stepsMatch = bodyText.match(/(\d+)\s*steps?\s*completed/i); if (stepsMatch) { - const markerIndex = bodyText.indexOf(stepsMatch[0]); + const markerIndex = bodyText.lastIndexOf(stepsMatch[0]); if (markerIndex !== -1) { // Get everything after the marker let afterMarker = bodyText.substring(markerIndex + stepsMatch[0].length).trim(); @@ -170,7 +182,8 @@ export function extractAgentStatus(): AgentStatusResult { if (!response || response.length < 50) { const sourcesMatch = bodyText.match(/Reviewed\s+\d+\s+sources?/i); if (sourcesMatch) { - const markerIndex = bodyText.indexOf(sourcesMatch[0]); + // Same rationale as Strategy 1: prefer the most recent marker. + const markerIndex = bodyText.lastIndexOf(sourcesMatch[0]); if (markerIndex !== -1) { let afterMarker = bodyText.substring(markerIndex + sourcesMatch[0].length).trim(); const endMarkers = [ diff --git a/tests/unit/page-scripts.test.ts b/tests/unit/page-scripts.test.ts index b7aac8b..1406d94 100644 --- a/tests/unit/page-scripts.test.ts +++ b/tests/unit/page-scripts.test.ts @@ -82,6 +82,68 @@ describe("extractAgentStatus", () => { expect(result.response.length).toBeGreaterThan(0); }); + it("returns 'completed' for Russian singular 'Выполнен 1 шаг'", () => { + const main = document.createElement("main"); + const m = document.createElement("div"); + m.textContent = "Выполнен 1 шаг"; + const p = document.createElement("div"); + p.className = "prose"; + p.textContent = "Готовый ответ агента, длина превышает 15 символов."; + main.append(m, p); + document.body.append(main); + + const result = extractAgentStatus(); + expect(result.status).toBe("completed"); + }); + + it("returns 'completed' for Russian plural 'Выполнено 5 шагов'", () => { + const main = document.createElement("main"); + const m = document.createElement("div"); + m.textContent = "Выполнено 5 шагов"; + const p = document.createElement("div"); + p.className = "prose"; + p.textContent = "Готовый ответ агента, длина превышает 15 символов."; + main.append(m, p); + document.body.append(main); + + const result = extractAgentStatus(); + expect(result.status).toBe("completed"); + }); + + it("does NOT treat 'Finished reading sources' as a completion marker", () => { + const main = document.createElement("main"); + const m = document.createElement("div"); + m.textContent = "Finished reading sources"; + const btn = document.createElement("button"); + btn.setAttribute("aria-label", "Stop"); + btn.textContent = "stop"; + main.append(m, btn); + document.body.append(main); + markVisible(btn); + + const result = extractAgentStatus(); + // Stop button visible -> still working, regardless of "Finished reading". + expect(result.status).toBe("working"); + }); + + it("picks the response after the LAST 'steps completed' marker", () => { + // Multi-turn chat: the older turn's marker must NOT win over the newer one. + const main = document.createElement("main"); + const turn1 = document.createElement("div"); + turn1.textContent = + "3 steps completed Previous turn answer text here, long enough to exceed thresholds easily. Ask anything"; + const turn2 = document.createElement("div"); + turn2.textContent = + "5 steps completed New turn answer that we actually want returned to the caller. Ask a follow-up"; + main.append(turn1, turn2); + document.body.append(main); + + const result = extractAgentStatus(); + expect(result.status).toBe("completed"); + expect(result.response).toContain("New turn answer"); + expect(result.response).not.toContain("Previous turn answer"); + }); + it("extracts and dedupes step descriptions matching the working patterns", () => { // Pattern matching runs against document.body.innerText. // Use one step per
so jsdom's innerText emits one per line. From 1dffc641d34038fd6e25863bdbfe3154e300c9c9 Mon Sep 17 00:00:00 2001 From: Sergei Arutiunian Date: Tue, 12 May 2026 18:56:52 +0200 Subject: [PATCH 2/2] fix(detect): walk every steps/sources marker to anchor on the latest turn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI failed on the previous commit's "picks the response after the LAST 'steps completed' marker" test. The bug: - `bodyText.match(/(\d+)\s*steps?\s*completed/i)` returns only the FIRST match (no /g flag). - In real multi-turn chats the marker text differs across turns: "3 steps completed" for turn 1, "5 steps completed" for turn 2. - `lastIndexOf(stepsMatch[0])` then looks for the LAST occurrence of "3 steps completed" — which appears exactly once — and anchors on the OLDER turn anyway. The fix `lastIndexOf` was looking for the wrong needle. Replace with `matchAll` + take the last result: this enumerates every marker across all turns and picks the most recent one regardless of digit value. Same change applied to the `Reviewed N sources` path in Strategy 2. Behaviour now matches the documented intent of the previous commit; the existing unit test passes. --- src/page-scripts.ts | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/page-scripts.ts b/src/page-scripts.ts index 03f7d4b..f97bed1 100644 --- a/src/page-scripts.ts +++ b/src/page-scripts.ts @@ -148,12 +148,16 @@ export function extractAgentStatus(): AgentStatusResult { const bodyText = mainContent.innerText; // Strategy 1: Find content after "X steps completed" marker (agent's final response). - // Use lastIndexOf: in multi-turn chats Perplexity keeps previous-turn - // markers visible in the scroll buffer, and `indexOf` would return the - // FIRST (oldest) match — i.e. the response from a previous question. - const stepsMatch = bodyText.match(/(\d+)\s*steps?\s*completed/i); + // In multi-turn chats Perplexity keeps previous-turn markers in the + // scroll buffer, and the marker text differs across turns ("3 steps + // completed" vs "5 steps completed"). `match()` returns only the + // FIRST match, so anchoring on it — with either `indexOf` or + // `lastIndexOf` of that exact string — lands on the OLDEST turn. + // Walk every match with the /g flag and take the last one. + const stepsMatches = [...bodyText.matchAll(/(\d+)\s*steps?\s*completed/gi)]; + const stepsMatch = stepsMatches.length > 0 ? stepsMatches[stepsMatches.length - 1] : null; if (stepsMatch) { - const markerIndex = bodyText.lastIndexOf(stepsMatch[0]); + const markerIndex = stepsMatch.index ?? -1; if (markerIndex !== -1) { // Get everything after the marker let afterMarker = bodyText.substring(markerIndex + stepsMatch[0].length).trim(); @@ -180,10 +184,11 @@ export function extractAgentStatus(): AgentStatusResult { // Strategy 2: If no steps marker, look for content after source citations if (!response || response.length < 50) { - const sourcesMatch = bodyText.match(/Reviewed\s+\d+\s+sources?/i); + // Same rationale as Strategy 1: walk every match and take the last. + const sourcesMatches = [...bodyText.matchAll(/Reviewed\s+\d+\s+sources?/gi)]; + const sourcesMatch = sourcesMatches.length > 0 ? sourcesMatches[sourcesMatches.length - 1] : null; if (sourcesMatch) { - // Same rationale as Strategy 1: prefer the most recent marker. - const markerIndex = bodyText.lastIndexOf(sourcesMatch[0]); + const markerIndex = sourcesMatch.index ?? -1; if (markerIndex !== -1) { let afterMarker = bodyText.substring(markerIndex + sourcesMatch[0].length).trim(); const endMarkers = [