From e8e09608e7013f244877ca45793648a555bcca9b Mon Sep 17 00:00:00 2001
From: Sergei Arutiunian <sergeiarutiunian@Sergei-sin-MacBook-Air.local>
Date: Tue, 12 May 2026 18:29:48 +0200
Subject: [PATCH 1/2] fix(correctness): tab matching, completion regex, version
 sync, COMET_PORT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A bundle of small correctness fixes uncovered while auditing the repo.
Each fix is independent; happy to split if reviewer prefers.

### page-scripts.ts — completion detection

* `Выполнен(?:о|ы)?` — match Russian singular `Выполнен 1 шаг` in
  addition to the plural cases the original regex covered. Russian
  agrees the past-tense participle with grammatical number, so the
  singular form was being missed on Russian-localised Perplexity
  accounts.
* `\bFinished\b(?!\s+(?:reading|analyzing|browsing|searching|loading))`
  — word-boundary the English "Finished" marker and exclude the
  intermediate "Finished reading … " step Perplexity renders mid-task.
  Without this the agent flips to `completed` the moment the first
  source is processed.
* Strategy 1 and Strategy 2 in `extractAgentStatus` now use
  `lastIndexOf` for the `steps completed` / `Reviewed N sources`
  markers. In multi-turn chats the previous-turn markers stay in the
  scroll buffer; `indexOf` returned the OLDEST match, causing the
  extractor to return the previous turn's answer for the current
  question.

### cdp-client.ts — tab matching

* `findTabByDomain` now matches "exact or subdomain" instead of the
  ambiguous `includes` / reverse-`includes` heuristic. Search
  `"github.com"` matches `github.com` and `gist.github.com` but no
  longer matches `notgithub.com`. Search `"ai"` no longer matches
  every `*.ai` tab plus anything containing the substring `ai`.
* `listTargets` on native Windows now closes its `CDP({...})` client
  in a `finally` block. If `Target.getTargets()` threw the underlying
  WebSocket leaked; with `withAutoReconnect` retrying, leaks
  compounded.

### cdp-client.ts / index.ts — `COMET_PORT` env

The README documents `COMET_PORT` but the constant was hardcoded and
call sites passed the literal `9223` directly. `DEFAULT_PORT` now
reads from the env var (validated as `[1, 65535]`, falls back to
9223 with a warning), and call sites use `DEFAULT_PORT`.

### index.ts — MCP version

`new Server({ name: "comet-bridge", version: "2.5.0" }, ...)` advertised
2.5.0 to clients while `package.json` shipped 2.6.2. Now read at startup
via `readFileSync(packageJson)` so the handshake matches the published
version automatically.

### tests/unit/page-scripts.test.ts

Four new cases:

* singular `Выполнен 1 шаг` -> `completed`
* plural   `Выполнено 5 шагов` -> `completed`
* `Finished reading sources` with a visible stop button -> `working`
  (the previous behaviour would have flipped it to `completed`)
* multi-turn `N steps completed`: the response comes from the LAST
  marker, not the first

Tests build DOM with `createElement`/`textContent` to keep the linter
happy and to avoid any innerHTML in test fixtures.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/cdp-client.ts               | 54 +++++++++++++++++++++-------
 src/index.ts                    | 26 +++++++++++---
 src/page-scripts.ts             | 23 +++++++++---
 tests/unit/page-scripts.test.ts | 62 +++++++++++++++++++++++++++++++++
 4 files changed, 143 insertions(+), 22 deletions(-)

diff --git a/src/cdp-client.ts b/src/cdp-client.ts
index 10eb74e..dc7bb8f 100644
--- a/src/cdp-client.ts
+++ b/src/cdp-client.ts
@@ -143,7 +143,21 @@ function getCometPath(): string {
 
 const COMET_PATH = getCometPath();
 const IS_WINDOWS = platform() === "win32" || IS_WSL;
-const DEFAULT_PORT = 9223;
+
+// Honour the documented `COMET_PORT` env var (see README "Environment Variables").
+// Previously the constant was hardcoded to 9223 and call sites passed the literal
+// straight to `startComet(9223)`, so the env var was silently ignored.
+function readPortFromEnv(): number {
+  const raw = process.env.COMET_PORT;
+  if (!raw) return 9223;
+  const n = parseInt(raw, 10);
+  if (!Number.isInteger(n) || n < 1 || n > 65535) {
+    console.error(`Invalid COMET_PORT="${raw}", falling back to 9223`);
+    return 9223;
+  }
+  return n;
+}
+export const DEFAULT_PORT = readPortFromEnv();
 
 export class CometCDPClient {
   private client: CDP.Client | null = null;
@@ -574,12 +588,21 @@ export class CometCDPClient {
   }
 
   /**
-   * Find a tab by domain (for reuse)
+   * Find a tab by domain (for reuse).
+   *
+   * Match is "exact or subdomain": `findTabByDomain("github.com")` matches
+   * both `github.com` and `gist.github.com`, but NOT `notgithub.com`. The
+   * previous `includes`/reverse-`includes` heuristic produced surprising
+   * matches — `domain: "ai"` matched `perplexity.ai`, `chat.openai.com`,
+   * etc., and a search for `"mail.google.com"` would match a `google.com`
+   * tab via the reverse direction.
    */
   async findTabByDomain(domain: string): Promise<TabContext | null> {
     await this.refreshTabRegistry();
+    const target = domain.toLowerCase();
     for (const tab of this.tabRegistry.values()) {
-      if (tab.domain.includes(domain) || domain.includes(tab.domain)) {
+      const tabDomain = tab.domain.toLowerCase();
+      if (tabDomain === target || tabDomain.endsWith(`.${target}`)) {
         return tab;
       }
     }
@@ -965,16 +988,21 @@ export class CometCDPClient {
     if (IS_WINDOWS) {
       try {
         const tempClient = await CDP({ port: this.state.port, host: '127.0.0.1' });
-        const { targetInfos } = await (tempClient as any).Target.getTargets();
-        await tempClient.close();
-
-        return targetInfos.map((t: any) => ({
-          id: t.targetId,
-          type: t.type,
-          title: t.title,
-          url: t.url,
-          webSocketDebuggerUrl: `ws://127.0.0.1:${this.state.port}/devtools/page/${t.targetId}`
-        }));
+        try {
+          const { targetInfos } = await (tempClient as any).Target.getTargets();
+          return targetInfos.map((t: any) => ({
+            id: t.targetId,
+            type: t.type,
+            title: t.title,
+            url: t.url,
+            webSocketDebuggerUrl: `ws://127.0.0.1:${this.state.port}/devtools/page/${t.targetId}`
+          }));
+        } finally {
+          // Close in `finally` so a throw inside `Target.getTargets()` does
+          // not leak the underlying WebSocket. Each retry in withAutoReconnect
+          // calls listTargets() again — even a slow leak exhausts handles.
+          await tempClient.close().catch(() => { /* already closed */ });
+        }
       } catch (error) {
         throw new Error(`Failed to list targets: ${error}`);
       }
diff --git a/src/index.ts b/src/index.ts
index e0ab6fb..2d3914b 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -4,6 +4,9 @@
 // Claude Code ↔ Perplexity Comet bidirectional interaction
 // Simplified to 6 essential tools
 
+import { readFileSync } from "fs";
+import { fileURLToPath } from "url";
+import { dirname, join } from "path";
 import { Server } from "@modelcontextprotocol/sdk/server/index.js";
 import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 import {
@@ -11,7 +14,7 @@ import {
   ListToolsRequestSchema,
   Tool,
 } from "@modelcontextprotocol/sdk/types.js";
-import { cometClient } from "./cdp-client.js";
+import { cometClient, DEFAULT_PORT } from "./cdp-client.js";
 import { cometAI } from "./comet-ai.js";
 import {
   sessionState,
@@ -21,6 +24,21 @@ import {
 } from "./session-state.js";
 import { readProseState, type ProseState } from "./page-scripts.js";
 
+// Read version from package.json so the MCP `initialize` handshake reports
+// the actually-shipped version. Hardcoding (previously "2.5.0" while
+// package.json was "2.6.2") drifts every release.
+function readPackageVersion(): string {
+  try {
+    const here = dirname(fileURLToPath(import.meta.url));
+    const pkgPath = join(here, "..", "package.json");
+    const pkg = JSON.parse(readFileSync(pkgPath, "utf8")) as { version?: string };
+    return pkg.version ?? "0.0.0";
+  } catch {
+    return "0.0.0";
+  }
+}
+const SERVER_VERSION = readPackageVersion();
+
 const TOOLS: Tool[] = [
   {
     name: "comet_connect",
@@ -117,7 +135,7 @@ const TOOLS: Tool[] = [
 ];
 
 const server = new Server(
-  { name: "comet-bridge", version: "2.5.0" },
+  { name: "comet-bridge", version: SERVER_VERSION },
   { capabilities: { tools: {} } }
 );
 
@@ -130,7 +148,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
     switch (name) {
       case "comet_connect": {
         // Auto-start Comet with debug port (will restart if running without it)
-        const startResult = await cometClient.startComet(9223);
+        const startResult = await cometClient.startComet(DEFAULT_PORT);
 
         // Get all tabs - DON'T clean up tabs, as closing them can crash Comet
         const targets = await cometClient.listTargets();
@@ -186,7 +204,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
         } catch (preCheckError) {
           // If pre-check fails, try to recover
           try {
-            await cometClient.startComet(9223);
+            await cometClient.startComet(DEFAULT_PORT);
             const targets = await cometClient.listTargets();
             const page = targets.find(t => t.type === 'page');
             if (page) await cometClient.connect(page.id);
diff --git a/src/page-scripts.ts b/src/page-scripts.ts
index ad53001..03f7d4b 100644
--- a/src/page-scripts.ts
+++ b/src/page-scripts.ts
@@ -70,8 +70,17 @@ export function extractAgentStatus(): AgentStatusResult {
   // miss completion on non-English accounts and force fallback to slow
   // response-stability polling (~90s). See PR #9 notes for marker source.
   const hasStepsCompleted = /\d+ steps? completed/i.test(body)
-                         || /Выполнено\s+\d+\s+шаг(?:а|ов)?/iu.test(body); // ru
-  const hasFinishedMarker = body.includes("Finished") && !hasActiveStopButton;
+                         // Russian agrees the verb with grammatical number:
+                         //   "Выполнен 1 шаг" (sg), "Выполнено 2/3/4 шага",
+                         //   "Выполнено 5+ шагов". The previous regex matched
+                         //   only "Выполнено …" and missed the singular case.
+                         || /Выполнен(?:о|ы)?\s+\d+\s+шаг(?:а|ов)?/iu.test(body); // ru
+  // Word-boundary the English marker and exclude "Finished reading|analyzing|…",
+  // which is an *intermediate* step Perplexity renders while the agent is
+  // still running. Without this, the agent flips to "completed" the moment
+  // the first source is processed.
+  const hasFinishedMarker = /\bFinished\b(?!\s+(?:reading|analyzing|browsing|searching|loading))/i.test(body)
+                          && !hasActiveStopButton;
   const hasReviewedSources = /Reviewed \d+ sources?/i.test(body);
   const hasSourcesIndicator = /\d+\s*sources?/i.test(body)              // en
                            || /\d+\s*источник(?:а|ов)?/iu.test(body);    // ru
@@ -138,10 +147,13 @@ export function extractAgentStatus(): AgentStatusResult {
     const mainContent = (document.querySelector("main") || document.body) as HTMLElement;
     const bodyText = mainContent.innerText;
 
-    // Strategy 1: Find content after "X steps completed" marker (agent's final response)
+    // Strategy 1: Find content after "X steps completed" marker (agent's final response).
+    // Use lastIndexOf: in multi-turn chats Perplexity keeps previous-turn
+    // markers visible in the scroll buffer, and `indexOf` would return the
+    // FIRST (oldest) match — i.e. the response from a previous question.
     const stepsMatch = bodyText.match(/(\d+)\s*steps?\s*completed/i);
     if (stepsMatch) {
-      const markerIndex = bodyText.indexOf(stepsMatch[0]);
+      const markerIndex = bodyText.lastIndexOf(stepsMatch[0]);
       if (markerIndex !== -1) {
         // Get everything after the marker
         let afterMarker = bodyText.substring(markerIndex + stepsMatch[0].length).trim();
@@ -170,7 +182,8 @@ export function extractAgentStatus(): AgentStatusResult {
     if (!response || response.length < 50) {
       const sourcesMatch = bodyText.match(/Reviewed\s+\d+\s+sources?/i);
       if (sourcesMatch) {
-        const markerIndex = bodyText.indexOf(sourcesMatch[0]);
+        // Same rationale as Strategy 1: prefer the most recent marker.
+        const markerIndex = bodyText.lastIndexOf(sourcesMatch[0]);
         if (markerIndex !== -1) {
           let afterMarker = bodyText.substring(markerIndex + sourcesMatch[0].length).trim();
           const endMarkers = [
diff --git a/tests/unit/page-scripts.test.ts b/tests/unit/page-scripts.test.ts
index b7aac8b..1406d94 100644
--- a/tests/unit/page-scripts.test.ts
+++ b/tests/unit/page-scripts.test.ts
@@ -82,6 +82,68 @@ describe("extractAgentStatus", () => {
     expect(result.response.length).toBeGreaterThan(0);
   });
 
+  it("returns 'completed' for Russian singular 'Выполнен 1 шаг'", () => {
+    const main = document.createElement("main");
+    const m = document.createElement("div");
+    m.textContent = "Выполнен 1 шаг";
+    const p = document.createElement("div");
+    p.className = "prose";
+    p.textContent = "Готовый ответ агента, длина превышает 15 символов.";
+    main.append(m, p);
+    document.body.append(main);
+
+    const result = extractAgentStatus();
+    expect(result.status).toBe("completed");
+  });
+
+  it("returns 'completed' for Russian plural 'Выполнено 5 шагов'", () => {
+    const main = document.createElement("main");
+    const m = document.createElement("div");
+    m.textContent = "Выполнено 5 шагов";
+    const p = document.createElement("div");
+    p.className = "prose";
+    p.textContent = "Готовый ответ агента, длина превышает 15 символов.";
+    main.append(m, p);
+    document.body.append(main);
+
+    const result = extractAgentStatus();
+    expect(result.status).toBe("completed");
+  });
+
+  it("does NOT treat 'Finished reading sources' as a completion marker", () => {
+    const main = document.createElement("main");
+    const m = document.createElement("div");
+    m.textContent = "Finished reading sources";
+    const btn = document.createElement("button");
+    btn.setAttribute("aria-label", "Stop");
+    btn.textContent = "stop";
+    main.append(m, btn);
+    document.body.append(main);
+    markVisible(btn);
+
+    const result = extractAgentStatus();
+    // Stop button visible -> still working, regardless of "Finished reading".
+    expect(result.status).toBe("working");
+  });
+
+  it("picks the response after the LAST 'steps completed' marker", () => {
+    // Multi-turn chat: the older turn's marker must NOT win over the newer one.
+    const main = document.createElement("main");
+    const turn1 = document.createElement("div");
+    turn1.textContent =
+      "3 steps completed Previous turn answer text here, long enough to exceed thresholds easily. Ask anything";
+    const turn2 = document.createElement("div");
+    turn2.textContent =
+      "5 steps completed New turn answer that we actually want returned to the caller. Ask a follow-up";
+    main.append(turn1, turn2);
+    document.body.append(main);
+
+    const result = extractAgentStatus();
+    expect(result.status).toBe("completed");
+    expect(result.response).toContain("New turn answer");
+    expect(result.response).not.toContain("Previous turn answer");
+  });
+
   it("extracts and dedupes step descriptions matching the working patterns", () => {
     // Pattern matching runs against document.body.innerText.
     // Use one step per <div> so jsdom's innerText emits one per line.

From 1dffc641d34038fd6e25863bdbfe3154e300c9c9 Mon Sep 17 00:00:00 2001
From: Sergei Arutiunian <sergeiarutiunian@Sergei-sin-MacBook-Air.local>
Date: Tue, 12 May 2026 18:56:52 +0200
Subject: [PATCH 2/2] fix(detect): walk every steps/sources marker to anchor on
 the latest turn
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CI failed on the previous commit's "picks the response after the LAST
'steps completed' marker" test. The bug:

  - `bodyText.match(/(\d+)\s*steps?\s*completed/i)` returns only the
    FIRST match (no /g flag).
  - In real multi-turn chats the marker text differs across turns:
    "3 steps completed" for turn 1, "5 steps completed" for turn 2.
  - `lastIndexOf(stepsMatch[0])` then looks for the LAST occurrence of
    "3 steps completed" — which appears exactly once — and anchors on
    the OLDER turn anyway.

The fix `lastIndexOf` was looking for the wrong needle. Replace with
`matchAll` + take the last result: this enumerates every marker
across all turns and picks the most recent one regardless of digit
value. Same change applied to the `Reviewed N sources` path in
Strategy 2.

Behaviour now matches the documented intent of the previous commit;
the existing unit test passes.
---
 src/page-scripts.ts | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/page-scripts.ts b/src/page-scripts.ts
index 03f7d4b..f97bed1 100644
--- a/src/page-scripts.ts
+++ b/src/page-scripts.ts
@@ -148,12 +148,16 @@ export function extractAgentStatus(): AgentStatusResult {
     const bodyText = mainContent.innerText;
 
     // Strategy 1: Find content after "X steps completed" marker (agent's final response).
-    // Use lastIndexOf: in multi-turn chats Perplexity keeps previous-turn
-    // markers visible in the scroll buffer, and `indexOf` would return the
-    // FIRST (oldest) match — i.e. the response from a previous question.
-    const stepsMatch = bodyText.match(/(\d+)\s*steps?\s*completed/i);
+    // In multi-turn chats Perplexity keeps previous-turn markers in the
+    // scroll buffer, and the marker text differs across turns ("3 steps
+    // completed" vs "5 steps completed"). `match()` returns only the
+    // FIRST match, so anchoring on it — with either `indexOf` or
+    // `lastIndexOf` of that exact string — lands on the OLDEST turn.
+    // Walk every match with the /g flag and take the last one.
+    const stepsMatches = [...bodyText.matchAll(/(\d+)\s*steps?\s*completed/gi)];
+    const stepsMatch = stepsMatches.length > 0 ? stepsMatches[stepsMatches.length - 1] : null;
     if (stepsMatch) {
-      const markerIndex = bodyText.lastIndexOf(stepsMatch[0]);
+      const markerIndex = stepsMatch.index ?? -1;
       if (markerIndex !== -1) {
         // Get everything after the marker
         let afterMarker = bodyText.substring(markerIndex + stepsMatch[0].length).trim();
@@ -180,10 +184,11 @@ export function extractAgentStatus(): AgentStatusResult {
 
     // Strategy 2: If no steps marker, look for content after source citations
     if (!response || response.length < 50) {
-      const sourcesMatch = bodyText.match(/Reviewed\s+\d+\s+sources?/i);
+      // Same rationale as Strategy 1: walk every match and take the last.
+      const sourcesMatches = [...bodyText.matchAll(/Reviewed\s+\d+\s+sources?/gi)];
+      const sourcesMatch = sourcesMatches.length > 0 ? sourcesMatches[sourcesMatches.length - 1] : null;
       if (sourcesMatch) {
-        // Same rationale as Strategy 1: prefer the most recent marker.
-        const markerIndex = bodyText.lastIndexOf(sourcesMatch[0]);
+        const markerIndex = sourcesMatch.index ?? -1;
         if (markerIndex !== -1) {
           let afterMarker = bodyText.substring(markerIndex + sourcesMatch[0].length).trim();
           const endMarkers = [