diff --git a/src/cdp-client.ts b/src/cdp-client.ts index 10eb74e..dc7bb8f 100644 --- a/src/cdp-client.ts +++ b/src/cdp-client.ts @@ -143,7 +143,21 @@ function getCometPath(): string { const COMET_PATH = getCometPath(); const IS_WINDOWS = platform() === "win32" || IS_WSL; -const DEFAULT_PORT = 9223; + +// Honour the documented `COMET_PORT` env var (see README "Environment Variables"). +// Previously the constant was hardcoded to 9223 and call sites passed the literal +// straight to `startComet(9223)`, so the env var was silently ignored. +function readPortFromEnv(): number { + const raw = process.env.COMET_PORT; + if (!raw) return 9223; + const n = parseInt(raw, 10); + if (!Number.isInteger(n) || n < 1 || n > 65535) { + console.error(`Invalid COMET_PORT="${raw}", falling back to 9223`); + return 9223; + } + return n; +} +export const DEFAULT_PORT = readPortFromEnv(); export class CometCDPClient { private client: CDP.Client | null = null; @@ -574,12 +588,21 @@ export class CometCDPClient { } /** - * Find a tab by domain (for reuse) + * Find a tab by domain (for reuse). + * + * Match is "exact or subdomain": `findTabByDomain("github.com")` matches + * both `github.com` and `gist.github.com`, but NOT `notgithub.com`. The + * previous `includes`/reverse-`includes` heuristic produced surprising + * matches — `domain: "ai"` matched `perplexity.ai`, `chat.openai.com`, + * etc., and a search for `"mail.google.com"` would match a `google.com` + * tab via the reverse direction. */ async findTabByDomain(domain: string): Promise { await this.refreshTabRegistry(); + const target = domain.toLowerCase(); for (const tab of this.tabRegistry.values()) { - if (tab.domain.includes(domain) || domain.includes(tab.domain)) { + const tabDomain = tab.domain.toLowerCase(); + if (tabDomain === target || tabDomain.endsWith(`.${target}`)) { return tab; } } @@ -965,16 +988,21 @@ export class CometCDPClient { if (IS_WINDOWS) { try { const tempClient = await CDP({ port: this.state.port, host: '127.0.0.1' }); - const { targetInfos } = await (tempClient as any).Target.getTargets(); - await tempClient.close(); - - return targetInfos.map((t: any) => ({ - id: t.targetId, - type: t.type, - title: t.title, - url: t.url, - webSocketDebuggerUrl: `ws://127.0.0.1:${this.state.port}/devtools/page/${t.targetId}` - })); + try { + const { targetInfos } = await (tempClient as any).Target.getTargets(); + return targetInfos.map((t: any) => ({ + id: t.targetId, + type: t.type, + title: t.title, + url: t.url, + webSocketDebuggerUrl: `ws://127.0.0.1:${this.state.port}/devtools/page/${t.targetId}` + })); + } finally { + // Close in `finally` so a throw inside `Target.getTargets()` does + // not leak the underlying WebSocket. Each retry in withAutoReconnect + // calls listTargets() again — even a slow leak exhausts handles. + await tempClient.close().catch(() => { /* already closed */ }); + } } catch (error) { throw new Error(`Failed to list targets: ${error}`); } diff --git a/src/index.ts b/src/index.ts index e0ab6fb..2d3914b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,6 +4,9 @@ // Claude Code ↔ Perplexity Comet bidirectional interaction // Simplified to 6 essential tools +import { readFileSync } from "fs"; +import { fileURLToPath } from "url"; +import { dirname, join } from "path"; import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { @@ -11,7 +14,7 @@ import { ListToolsRequestSchema, Tool, } from "@modelcontextprotocol/sdk/types.js"; -import { cometClient } from "./cdp-client.js"; +import { cometClient, DEFAULT_PORT } from "./cdp-client.js"; import { cometAI } from "./comet-ai.js"; import { sessionState, @@ -21,6 +24,21 @@ import { } from "./session-state.js"; import { readProseState, type ProseState } from "./page-scripts.js"; +// Read version from package.json so the MCP `initialize` handshake reports +// the actually-shipped version. Hardcoding (previously "2.5.0" while +// package.json was "2.6.2") drifts every release. +function readPackageVersion(): string { + try { + const here = dirname(fileURLToPath(import.meta.url)); + const pkgPath = join(here, "..", "package.json"); + const pkg = JSON.parse(readFileSync(pkgPath, "utf8")) as { version?: string }; + return pkg.version ?? "0.0.0"; + } catch { + return "0.0.0"; + } +} +const SERVER_VERSION = readPackageVersion(); + const TOOLS: Tool[] = [ { name: "comet_connect", @@ -117,7 +135,7 @@ const TOOLS: Tool[] = [ ]; const server = new Server( - { name: "comet-bridge", version: "2.5.0" }, + { name: "comet-bridge", version: SERVER_VERSION }, { capabilities: { tools: {} } } ); @@ -130,7 +148,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { switch (name) { case "comet_connect": { // Auto-start Comet with debug port (will restart if running without it) - const startResult = await cometClient.startComet(9223); + const startResult = await cometClient.startComet(DEFAULT_PORT); // Get all tabs - DON'T clean up tabs, as closing them can crash Comet const targets = await cometClient.listTargets(); @@ -186,7 +204,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { } catch (preCheckError) { // If pre-check fails, try to recover try { - await cometClient.startComet(9223); + await cometClient.startComet(DEFAULT_PORT); const targets = await cometClient.listTargets(); const page = targets.find(t => t.type === 'page'); if (page) await cometClient.connect(page.id); diff --git a/src/page-scripts.ts b/src/page-scripts.ts index ad53001..f97bed1 100644 --- a/src/page-scripts.ts +++ b/src/page-scripts.ts @@ -70,8 +70,17 @@ export function extractAgentStatus(): AgentStatusResult { // miss completion on non-English accounts and force fallback to slow // response-stability polling (~90s). See PR #9 notes for marker source. const hasStepsCompleted = /\d+ steps? completed/i.test(body) - || /Выполнено\s+\d+\s+шаг(?:а|ов)?/iu.test(body); // ru - const hasFinishedMarker = body.includes("Finished") && !hasActiveStopButton; + // Russian agrees the verb with grammatical number: + // "Выполнен 1 шаг" (sg), "Выполнено 2/3/4 шага", + // "Выполнено 5+ шагов". The previous regex matched + // only "Выполнено …" and missed the singular case. + || /Выполнен(?:о|ы)?\s+\d+\s+шаг(?:а|ов)?/iu.test(body); // ru + // Word-boundary the English marker and exclude "Finished reading|analyzing|…", + // which is an *intermediate* step Perplexity renders while the agent is + // still running. Without this, the agent flips to "completed" the moment + // the first source is processed. + const hasFinishedMarker = /\bFinished\b(?!\s+(?:reading|analyzing|browsing|searching|loading))/i.test(body) + && !hasActiveStopButton; const hasReviewedSources = /Reviewed \d+ sources?/i.test(body); const hasSourcesIndicator = /\d+\s*sources?/i.test(body) // en || /\d+\s*источник(?:а|ов)?/iu.test(body); // ru @@ -138,10 +147,17 @@ export function extractAgentStatus(): AgentStatusResult { const mainContent = (document.querySelector("main") || document.body) as HTMLElement; const bodyText = mainContent.innerText; - // Strategy 1: Find content after "X steps completed" marker (agent's final response) - const stepsMatch = bodyText.match(/(\d+)\s*steps?\s*completed/i); + // Strategy 1: Find content after "X steps completed" marker (agent's final response). + // In multi-turn chats Perplexity keeps previous-turn markers in the + // scroll buffer, and the marker text differs across turns ("3 steps + // completed" vs "5 steps completed"). `match()` returns only the + // FIRST match, so anchoring on it — with either `indexOf` or + // `lastIndexOf` of that exact string — lands on the OLDEST turn. + // Walk every match with the /g flag and take the last one. + const stepsMatches = [...bodyText.matchAll(/(\d+)\s*steps?\s*completed/gi)]; + const stepsMatch = stepsMatches.length > 0 ? stepsMatches[stepsMatches.length - 1] : null; if (stepsMatch) { - const markerIndex = bodyText.indexOf(stepsMatch[0]); + const markerIndex = stepsMatch.index ?? -1; if (markerIndex !== -1) { // Get everything after the marker let afterMarker = bodyText.substring(markerIndex + stepsMatch[0].length).trim(); @@ -168,9 +184,11 @@ export function extractAgentStatus(): AgentStatusResult { // Strategy 2: If no steps marker, look for content after source citations if (!response || response.length < 50) { - const sourcesMatch = bodyText.match(/Reviewed\s+\d+\s+sources?/i); + // Same rationale as Strategy 1: walk every match and take the last. + const sourcesMatches = [...bodyText.matchAll(/Reviewed\s+\d+\s+sources?/gi)]; + const sourcesMatch = sourcesMatches.length > 0 ? sourcesMatches[sourcesMatches.length - 1] : null; if (sourcesMatch) { - const markerIndex = bodyText.indexOf(sourcesMatch[0]); + const markerIndex = sourcesMatch.index ?? -1; if (markerIndex !== -1) { let afterMarker = bodyText.substring(markerIndex + sourcesMatch[0].length).trim(); const endMarkers = [ diff --git a/tests/unit/page-scripts.test.ts b/tests/unit/page-scripts.test.ts index b7aac8b..1406d94 100644 --- a/tests/unit/page-scripts.test.ts +++ b/tests/unit/page-scripts.test.ts @@ -82,6 +82,68 @@ describe("extractAgentStatus", () => { expect(result.response.length).toBeGreaterThan(0); }); + it("returns 'completed' for Russian singular 'Выполнен 1 шаг'", () => { + const main = document.createElement("main"); + const m = document.createElement("div"); + m.textContent = "Выполнен 1 шаг"; + const p = document.createElement("div"); + p.className = "prose"; + p.textContent = "Готовый ответ агента, длина превышает 15 символов."; + main.append(m, p); + document.body.append(main); + + const result = extractAgentStatus(); + expect(result.status).toBe("completed"); + }); + + it("returns 'completed' for Russian plural 'Выполнено 5 шагов'", () => { + const main = document.createElement("main"); + const m = document.createElement("div"); + m.textContent = "Выполнено 5 шагов"; + const p = document.createElement("div"); + p.className = "prose"; + p.textContent = "Готовый ответ агента, длина превышает 15 символов."; + main.append(m, p); + document.body.append(main); + + const result = extractAgentStatus(); + expect(result.status).toBe("completed"); + }); + + it("does NOT treat 'Finished reading sources' as a completion marker", () => { + const main = document.createElement("main"); + const m = document.createElement("div"); + m.textContent = "Finished reading sources"; + const btn = document.createElement("button"); + btn.setAttribute("aria-label", "Stop"); + btn.textContent = "stop"; + main.append(m, btn); + document.body.append(main); + markVisible(btn); + + const result = extractAgentStatus(); + // Stop button visible -> still working, regardless of "Finished reading". + expect(result.status).toBe("working"); + }); + + it("picks the response after the LAST 'steps completed' marker", () => { + // Multi-turn chat: the older turn's marker must NOT win over the newer one. + const main = document.createElement("main"); + const turn1 = document.createElement("div"); + turn1.textContent = + "3 steps completed Previous turn answer text here, long enough to exceed thresholds easily. Ask anything"; + const turn2 = document.createElement("div"); + turn2.textContent = + "5 steps completed New turn answer that we actually want returned to the caller. Ask a follow-up"; + main.append(turn1, turn2); + document.body.append(main); + + const result = extractAgentStatus(); + expect(result.status).toBe("completed"); + expect(result.response).toContain("New turn answer"); + expect(result.response).not.toContain("Previous turn answer"); + }); + it("extracts and dedupes step descriptions matching the working patterns", () => { // Pattern matching runs against document.body.innerText. // Use one step per
so jsdom's innerText emits one per line.