Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
"dependencies": {
"@anthropic-ai/sdk": "^0.39.0",
"@modelcontextprotocol/sdk": "^1.0.0",
"chrome-remote-interface": "^0.33.2"
"chrome-remote-interface": "^0.34.0"
},
"devDependencies": {
"@types/chrome-remote-interface": "^0.33.0",
Expand Down
202 changes: 201 additions & 1 deletion src/cdp-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,160 @@ import type {
TabContext,
} from "./types.js";

// chrome-remote-interface@^0.34.0 exposes `ProtocolError` at runtime
// (`module.exports.ProtocolError = ...`), but @types/chrome-remote-interface
// doesn't declare it yet. Cast at import so `instanceof` typechecks; remove
// the cast once DefinitelyTyped/DefinitelyTyped#74992 lands and we pick up
// the updated @types.
interface CdpProtocolError extends Error {
request: { method: string; params?: unknown };
response: CDP.SendError;
}
const ProtocolError = (CDP as unknown as {
ProtocolError: new (...args: unknown[]) => CdpProtocolError;
}).ProtocolError;

// Hidden targets (e.g. the Perplexity sidecar panel) can report a 0x0 layout
// viewport in some Comet window states (cold launch, no real browsing tabs in
// front). When that happens, Page.captureScreenshot waits for compositor
// frames that never arrive and stalls for ~2 minutes. Detecting that state
// via Page.getLayoutMetrics() and supplying an explicit clip +
// captureBeyondViewport=true makes the renderer produce a frame immediately.
//
// The predicate is intentionally tight: both dimensions must be zero. Live
// CDP testing confirmed that 0x0 is the only reproducible viewport state
// that triggers the stall — Chromium's Emulation.setDeviceMetricsOverride
// rejects single-zero dimensions and falls back to the natural viewport,
// and the natural 0x0 case is "no layout computed yet" (an all-or-nothing
// state). A 0xN or Nx0 viewport is not a state we can observe in practice.
const SCREENSHOT_FALLBACK_CLIP = {
x: 0,
y: 0,
width: 1280,
height: 800,
scale: 1,
} as const;

/**
* The slice of the CDP Page domain that `captureScreenshotWithFallback` uses.
* Lets unit tests substitute a hand-written fake without dragging in the full
* `chrome-remote-interface` Page surface.
*/
export interface ScreenshotPageAPI {
bringToFront(): Promise<unknown>;
getLayoutMetrics(): Promise<{
cssLayoutViewport?: { clientWidth: number; clientHeight: number };
layoutViewport?: { clientWidth: number; clientHeight: number };
}>;
captureScreenshot(opts: {
format: "png" | "jpeg";
captureBeyondViewport?: boolean;
clip?: typeof SCREENSHOT_FALLBACK_CLIP;
}): Promise<ScreenshotResult>;
}

/**
* Capture a screenshot, falling back to an explicit clip when the layout
* viewport is degenerate. Extracted as a free function so it can be unit
* tested against a fake Page without a live CDP connection.
*/
export async function captureScreenshotWithFallback(
page: ScreenshotPageAPI,
format: "png" | "jpeg" = "png",
): Promise<ScreenshotResult> {
try { await page.bringToFront(); } catch { /* not all targets support it */ }

let clip: typeof SCREENSHOT_FALLBACK_CLIP | undefined;
try {
const metrics = await page.getLayoutMetrics();
const v = metrics.cssLayoutViewport ?? metrics.layoutViewport;
if (!v?.clientWidth && !v?.clientHeight) {
clip = SCREENSHOT_FALLBACK_CLIP;
}
} catch (err) {
// Chrome-side rejection (e.g. method unsupported on a non-page target):
// apply the fallback so captureScreenshot can still produce a frame.
// Anything else (websocket dropped, unexpected throw): propagate — the
// next CDP call would fail the same way, and masking with a synthetic
// 1280x800 capture would hide a real transport-level failure.
if (err instanceof ProtocolError) {
clip = SCREENSHOT_FALLBACK_CLIP;
} else {
throw err;
}
}

const result = await page.captureScreenshot({
format,
...(clip ? { captureBeyondViewport: true, clip } : {}),
});

if (!result?.data) {
throw new Error(
"Screenshot returned empty data. Ensure you're connected to a visible tab with content.",
);
}

return result;
}

/** Per-frame lifecycle accumulator: frameId -> { current loaderId, event names seen }. */
export type FrameLifecycleMap = Map<string, { loaderId: string; events: Set<string> }>;

/**
* The slice of the CDP Page domain that `waitForLifecycle` uses. The return
* type of `lifecycleEvent(handler)` is CRI's unsubscribe function — api.js:49
* returns `() => chrome.removeListener(rawEventName, handler)`.
*/
export interface LifecyclePageAPI {
lifecycleEvent(handler: (params: { name?: string }) => void): () => unknown;
}

/**
* Wait until any frame in `frameLifecycle` has fired the named
* Page.lifecycleEvent (e.g. 'firstContentfulPaint', 'networkAlmostIdle').
* Resolves true if the event is in the cache or arrives before the timeout;
* false otherwise. Cleans up its listener and timer on every exit path.
*
* Defensive ordering: the listener is registered *before* scanning the
* cache, so even if the event arrived on an I/O turn between calls it's
* caught by the live listener rather than missed. Single-threaded JS makes
* the synchronous-only path safe today, but the order matters if anything
* upstream (CRI internals, scheduler) ever inserts a microtask here.
*
* Extracted as a free function so it can be unit tested against a fake
* Page + map without a live CDP connection.
*/
export function waitForLifecycle(
page: LifecyclePageAPI,
frameLifecycle: FrameLifecycleMap,
eventName: string,
timeoutMs: number,
): Promise<boolean> {
return new Promise<boolean>((resolve) => {
let done = false;
let unsubscribe: (() => unknown) | null = null;
let timer: NodeJS.Timeout | null = null;
const finish = (val: boolean) => {
if (done) return;
done = true;
if (unsubscribe) { try { unsubscribe(); } catch { /* ignore */ } }
if (timer) clearTimeout(timer);
resolve(val);
};
const listener = (params: { name?: string }) => {
if (params?.name === eventName) finish(true);
};
try {
unsubscribe = page.lifecycleEvent(listener);
} catch { finish(false); return; }
for (const { events } of frameLifecycle.values()) {
if (events.has(eventName)) { finish(true); return; }
}
timer = setTimeout(() => finish(false), timeoutMs);
});
}

// Detect if running in WSL (must be before windowsFetch)
function isWSL(): boolean {
if (platform() !== 'linux') return false;
Expand Down Expand Up @@ -164,6 +318,13 @@ export class CometCDPClient {
// Tab context registry for multi-tab workflow awareness
private tabRegistry: Map<string, TabContext> = new Map();

// Page lifecycle tracking — events accumulated per frame for the current
// document (loaderId). Used by waitForLifecycle() so screenshots and other
// ops can confirm the renderer has actually painted before they run.
private frameLifecycle: FrameLifecycleMap = new Map();
private lifecycleListener: ((params: any) => void) | null = null;
private lifecycleUnsubscribe: (() => unknown) | null = null;

get isConnected(): boolean {
return this.state.connected && this.client !== null;
}
Expand Down Expand Up @@ -1024,6 +1185,33 @@ export class CometCDPClient {
} catch { /* continue */ }
}

// Subscribe to Page.lifecycleEvent so we can wait for paint readiness
// (firstContentfulPaint, networkAlmostIdle, etc.) the way Lighthouse and
// Puppeteer do, instead of polling document.readyState.
this.frameLifecycle.clear();
if (this.lifecycleUnsubscribe) {
try { this.lifecycleUnsubscribe(); } catch { /* ignore */ }
this.lifecycleUnsubscribe = null;
}
this.lifecycleListener = null;
try {
await this.client.Page.setLifecycleEventsEnabled({ enabled: true });
this.lifecycleListener = (params: any) => {
const { frameId, loaderId, name } = params || {};
if (!frameId || !loaderId || !name) return;
const existing = this.frameLifecycle.get(frameId);
if (!existing || existing.loaderId !== loaderId) {
this.frameLifecycle.set(frameId, { loaderId, events: new Set([name]) });
} else {
existing.events.add(name);
}
};
// chrome-remote-interface's domain event callbacks return an unsubscribe
// function (api.js: `() => chrome.removeListener(rawEventName, handler)`).
// Capture it so disconnect() can deregister cleanly.
this.lifecycleUnsubscribe = this.client.Page.lifecycleEvent(this.lifecycleListener);
} catch { /* lifecycle tracking is best-effort */ }

this.state.connected = true;
this.state.activeTabId = targetId;
this.lastTargetId = targetId;
Expand All @@ -1039,6 +1227,12 @@ export class CometCDPClient {
* Disconnect from current tab
*/
async disconnect(): Promise<void> {
if (this.lifecycleUnsubscribe) {
try { this.lifecycleUnsubscribe(); } catch { /* ignore */ }
this.lifecycleUnsubscribe = null;
}
this.lifecycleListener = null;
this.frameLifecycle.clear();
if (this.client) {
await this.client.close();
this.client = null;
Expand Down Expand Up @@ -1112,7 +1306,13 @@ export class CometCDPClient {
*/
async screenshot(format: "png" | "jpeg" = "png"): Promise<ScreenshotResult> {
this.ensureConnected();
return this.client!.Page.captureScreenshot({ format }) as Promise<ScreenshotResult>;

// Wait for paint readiness via Page.lifecycleEvent (Lighthouse/Puppeteer
// approach). If firstContentfulPaint already fired for this document the
// call returns synchronously; otherwise we wait up to 2s for the next FCP.
await waitForLifecycle(this.client!.Page, this.frameLifecycle, 'firstContentfulPaint', 2000);

return captureScreenshotWithFallback(this.client!.Page, format);
}

/**
Expand Down
Loading