From b162fb23e777f88ce102c2290928b07fefc0a33e Mon Sep 17 00:00:00 2001
From: Serhii Vasylenko <github.com@vasylenko.info>
Date: Tue, 12 May 2026 21:04:32 +0200
Subject: [PATCH 1/2] Fix terminology mislabels and inaccurate technical claims
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Calling the published JS artifact a "binary" or "TypeScript executable" misled
readers — the project ships `dist/index.js` (a JS file with a node shebang) via
npm, no native binary anywhere. Replaced those usages in README, e2e test names,
comments, and the e2e fixture body with "compiled output" / "built dist/index.js"
/ "compiled JS output" depending on context. Kept npm-`bin` jargon (BUILT_BIN,
TSX_BIN, spawnCompiled) and the SPEC line 43 future-feature mention of
`bun --compile` / Node SEA — those uses are accurate.

Also corrected several other inaccuracies:

- release.yml line 62: the comment claimed awk did NR/sub trimming, but the
  script only prints-after-match until the next header. Rewrote the comment to
  describe what the awk block actually does.
- .npmignore: the group headers misclassified tests/ + tsconfig.json as
  "source files" and claimed CLAUDE.md is rendered into the GitHub release
  body (only CHANGELOG.md is). Rewrote both headers.
- package.json description: omitted CLI mode. Replaced with one line that
  covers both MCP and CLI surfaces.
- scripts/postbuild.mjs: misattributed the +x effect to npm `bin` resolution.
  Rewrote to credit the shebang-based launch path.
- docs/SPEC.md: "All 7 error codes throw from core.ts" was imprecise — only
  five throw explicitly; network_error, timeout, and sometimes http_error are
  translated by classifyError. Tightened the wording.
- src/core.ts: softened the HTTP/2 framing comment to acknowledge the plain-
  HTTP fallback the same comment already noted; documented the AbortError
  branch in classifyError that the comment previously omitted.

Text-only changes. `npm run build` and `npm test` (50/50) pass.
---
 .github/workflows/release.yml |  5 +++--
 .npmignore                    |  4 ++--
 README.md                     |  6 +++---
 docs/SPEC.md                  |  4 ++--
 package.json                  |  2 +-
 scripts/postbuild.mjs         |  7 ++++---
 src/core.ts                   | 13 +++++++------
 tests/cli.test.ts             |  2 +-
 tests/e2e.test.ts             | 22 +++++++++++-----------
 9 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 08f8723..db27a29 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -58,8 +58,9 @@ jobs:
           TAG_NAME: ${{ github.ref_name }}
         run: |
           VERSION=${TAG_NAME#v}
-          # Print everything between "## [VERSION]" and the next "## [" header.
-          # The trailing blank lines are trimmed by awk's NR/sub trick below.
+          # Prints lines after the matched "## [VERSION]" header until the
+          # next "## [" header (or EOF). The matched header line itself is
+          # skipped via `next`.
           CHANGELOG_BODY=$(awk -v ver="$VERSION" '
             /^## \[/ {
               if (found) exit
diff --git a/.npmignore b/.npmignore
index 8230ec0..3b8569f 100644
--- a/.npmignore
+++ b/.npmignore
@@ -1,4 +1,4 @@
-# Source files (compiled to dist/, which is in `files`)
+# Source, tests, and TS build config (not needed at runtime; dist/ ships instead)
 src/
 tests/
 *.ts
@@ -12,7 +12,7 @@ tsconfig.json
 docs/
 scripts/
 
-# Project files (rendered into the GitHub release body, not the npm package)
+# Repo-only docs (CHANGELOG.md is rendered into the GitHub release body; CLAUDE.md is agent instructions). Excluded from the npm tarball.
 CHANGELOG.md
 CLAUDE.md
 
diff --git a/README.md b/README.md
index f739e58..e649560 100644
--- a/README.md
+++ b/README.md
@@ -32,7 +32,7 @@ markfetch https://en.wikipedia.org/wiki/Markdown
 }
 ```
 
-That snippet is the whole MCP setup — or jump to [CLI usage](#cli-usage) to drive the same binary from a shell.
+That snippet is the whole MCP setup — or jump to [CLI usage](#cli-usage) to drive the same command from a shell.
 
 ## MCP install commands
 
@@ -73,7 +73,7 @@ gemini mcp add -s user markfetch npx -y markfetch
 
 - **Stdio-clean.** Stdout is reserved for MCP frames. Stderr is fatal-only. No log spam, no ANSI escapes that could corrupt protocol framing.
 
-- **Pure Node, no subprocesses.** No Playwright, no headless Chromium, no Python hop. Single TypeScript executable on Node 24+ — one process whether you invoke it as an MCP server or from the shell.
+- **Pure Node, no subprocesses.** No Playwright, no headless Chromium, no Python hop. Single Node process — one Node process whether you invoke it as an MCP server or from the shell.
 
 ## CLI usage
 
@@ -142,7 +142,7 @@ Pass overrides via the `env` block of your MCP client config:
 
 Requires Node.js ≥ 24.
 
-When iterating on CLI changes, `tsx src/index.ts <url>` and `tsx src/index.ts --help` route through the same argv-discriminated dispatcher as the compiled binary — no rebuild needed between edits.
+When iterating on CLI changes, `tsx src/index.ts <url>` and `tsx src/index.ts --help` route through the same argv-discriminated dispatcher as the built `dist/index.js` — no rebuild needed between edits.
 
 To point an MCP client at a local source build, swap `npx` for `node` + an absolute path to `dist/index.js`:
 
diff --git a/docs/SPEC.md b/docs/SPEC.md
index 4c9ee40..f8c6305 100644
--- a/docs/SPEC.md
+++ b/docs/SPEC.md
@@ -21,7 +21,7 @@ Errors throw `MarkfetchError` uniformly from core; adapters catch once. Codes: `
 
 - **Lazy adapter imports.** The dispatcher uses `await import()` to load exactly one adapter. The only `console.log` in the project lives in `cli.ts`; under MCP, `cli.ts` never loads, so stdout-discipline is enforced by the module graph — not by linter or convention.
 
-- **Core throws, adapters translate.** All 7 error codes throw from `core.ts`; `classifyError` normalizes underlying-API errors (undici TypeErrors, AbortSignal timeouts). New codes need an `ErrorCode` union member + a throw site; adapters don't change.
+- **Core throws, adapters translate.** All 7 error codes surface from `core.ts` — five are thrown explicitly as `MarkfetchError`; `network_error`, `timeout`, and (sometimes) `http_error` are translated by `classifyError` from underlying-API errors (undici TypeErrors, AbortSignal timeouts). New codes need an `ErrorCode` union member + a throw site; adapters don't change.
 
 - **HTTP/2 + coherent Chrome fingerprint.** Wire protocol, headers, and UA must agree — a Chrome UA over HTTP/1.1 or without `Sec-CH-UA-*` is *more* suspicious than curl. `Sec-CH-UA-*` is derived from `MARKFETCH_USER_AGENT` at startup so override-coherence is mechanical.
 
@@ -36,7 +36,7 @@ Errors throw `MarkfetchError` uniformly from core; adapters catch once. Codes: `
 ## Ideas for future
 
 - **Authentication.** `MARKFETCH_AUTH_HEADER` env var (simple), or Chrome-cookie import for sites where the user is already logged in (frictionless, platform-specific, security-sensitive). Trigger: first useful internal / paywalled doc.
-- **JS rendering fallback for SPAs.** Playwright / headless Chrome as a companion package (`markfetch-heavy`) so the lean binary stays lean. Trigger: enough useful sites returning `extraction_failed`.
+- **JS rendering fallback for SPAs.** Playwright / headless Chrome as a companion package (`markfetch-heavy`) so the lean package stays lean. Trigger: enough useful sites returning `extraction_failed`.
 - **CloudFlare `/markdown` fallback.** Gated by `CF_AUTH_TOKEN`; fall back when Readability fails. Trigger: extraction failure rate stays high after Readability tuning.
 - **Cookie reuse across redirects within a single fetch.** Currently none. Trigger: a target serves content only after a session-cookie redirect.
 - **Proxy support** (`MARKFETCH_PROXY_URL`) and **`Accept-Language` control** (`MARKFETCH_ACCEPT_LANGUAGE`). Trigger: corporate proxy / locale-specific content.
diff --git a/package.json b/package.json
index f81c12c..445f75b 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "markfetch",
   "version": "0.5.0",
-  "description": "MCP server: fetch a URL, return clean markdown. Built for AI agents.",
+  "description": "Fetch a URL, return clean markdown. MCP server and CLI for AI agents.",
   "license": "MIT",
   "author": {
     "name": "Serhii Vasylenko",
diff --git a/scripts/postbuild.mjs b/scripts/postbuild.mjs
index 8fd8b9d..ada56b7 100644
--- a/scripts/postbuild.mjs
+++ b/scripts/postbuild.mjs
@@ -1,6 +1,7 @@
-// Sets execute bit on dist/index.js so the npm `bin` entry resolves correctly
-// when invoked via `npx markfetch` or as a direct script. tsc preserves the
-// shebang but doesn't chmod its outputs.
+// Sets execute bit on dist/index.js so the shebang-based launch works —
+// both when npm links the `bin` entry (npm/npx exec the linked target)
+// and when running ./dist/index.js directly. tsc preserves the shebang
+// but doesn't chmod its outputs.
 import { chmodSync } from "node:fs";
 
 chmodSync("dist/index.js", 0o755);
diff --git a/src/core.ts b/src/core.ts
index eff4f29..2c7e1cd 100644
--- a/src/core.ts
+++ b/src/core.ts
@@ -83,11 +83,11 @@ function deriveClientHints(ua: string): {
 const clientHints = deriveClientHints(config.userAgent);
 
 // Enable HTTP/2 via TLS ALPN. Modern bot-detection systems and CDNs consider
-// wire protocol alongside header fingerprint; HTTP/2 paired with a Chrome
-// header set is internally consistent, HTTP/1.1 + Chrome headers is not.
-// Servers that don't advertise h2 in ALPN fall back to HTTP/1.1 transparently
-// during the TLS handshake — no manual retry needed. Plain-HTTP connections
-// (port 80) skip ALPN entirely and use HTTP/1.1.
+// wire protocol alongside header fingerprint; HTTP/2 over TLS pairs cleanly
+// with a Chrome header set. Servers that don't advertise h2 in ALPN fall back
+// to HTTP/1.1 transparently during the TLS handshake — no manual retry needed.
+// Plain-HTTP connections (port 80) skip ALPN entirely and use HTTP/1.1,
+// accepting the protocol/fingerprint mismatch in that case.
 setGlobalDispatcher(new Agent({ allowH2: true }));
 
 const TURNDOWN = new TurndownService({
@@ -165,7 +165,8 @@ export function classifyError(err: unknown): { code: ErrorCode; message: string
   if (err instanceof MarkfetchError) {
     return { code: err.code, message: err.message };
   }
-  // AbortSignal.timeout produces DOMException with name "TimeoutError".
+  // AbortSignal.timeout normally produces a DOMException named "TimeoutError";
+  // some undici code paths surface AbortError instead, so accept both.
   if (
     err instanceof Error &&
     (err.name === "TimeoutError" || err.name === "AbortError")
diff --git a/tests/cli.test.ts b/tests/cli.test.ts
index bbac5a0..271e600 100644
--- a/tests/cli.test.ts
+++ b/tests/cli.test.ts
@@ -18,7 +18,7 @@ import { join, resolve as resolvePath } from "node:path";
 const execFileAsync = promisify(execFile);
 
 // Resolved at module load against the test runner's cwd (the project root).
-// Tests that override `cwd` to a tmpdir still need to find the tsx binary
+// Tests that override `cwd` to a tmpdir still need to find the tsx CLI
 // and the source entry — passing relative paths would resolve against the
 // new cwd and produce a confusing ENOENT instead of the behavior under test.
 const TSX_BIN = resolvePath("./node_modules/.bin/tsx");
diff --git a/tests/e2e.test.ts b/tests/e2e.test.ts
index 5f56230..e90f9f9 100644
--- a/tests/e2e.test.ts
+++ b/tests/e2e.test.ts
@@ -1,4 +1,4 @@
-// E2E tests against the COMPILED binary (`node dist/index.js`), not the dev
+// E2E tests against the COMPILED JS output (`node dist/index.js`), not the dev
 // source. server.test.ts already exercises the full surface via tsx; this file
 // verifies that `tsc` output is itself correct and runnable. If server.test.ts
 // passes but this file fails, the bug lives in the build pipeline, not the
@@ -21,7 +21,7 @@ import { join, resolve as resolvePath } from "node:path";
 const execFileAsync = promisify(execFile);
 
 // Resolved absolute paths so a test that overrides cwd still locates the
-// built binary. node is on PATH, so a bare command name is fine for it.
+// built JS entry. node is on PATH, so a bare command name is fine for it.
 const BUILT_BIN = resolvePath("dist/index.js");
 
 before(() => {
@@ -74,7 +74,7 @@ const HAPPY_FIXTURE = `<!DOCTYPE html>
   <main>
     <article>
       <h1>E2E Fixture Heading</h1>
-      <p>This is a deterministic fixture for verifying the compiled binary's full pipeline. The article contains enough prose to pass Readability scoring without depending on any external network resource.</p>
+      <p>This is a deterministic fixture for verifying the compiled output's full pipeline. The article contains enough prose to pass Readability scoring without depending on any external network resource.</p>
       <h2>Sub-section</h2>
       <p>Second paragraph adds more substance so the extracted markdown has multiple structural elements to assert against. Lorem ipsum dolor sit amet.</p>
     </article>
@@ -83,7 +83,7 @@ const HAPPY_FIXTURE = `<!DOCTYPE html>
 </body>
 </html>`;
 
-test("e2e: compiled binary boots, exposes fetch_markdown, pins version", async () => {
+test("e2e: compiled output boots, exposes fetch_markdown, pins version", async () => {
   const client = await spawnCompiled();
   try {
     const info = client.getServerVersion();
@@ -97,7 +97,7 @@ test("e2e: compiled binary boots, exposes fetch_markdown, pins version", async (
   }
 });
 
-test("e2e: compiled binary returns markdown for a mock fixture", async () => {
+test("e2e: compiled output returns markdown for a mock fixture", async () => {
   const mock = await startMock((_req, res) => {
     res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
     res.end(HAPPY_FIXTURE);
@@ -121,7 +121,7 @@ test("e2e: compiled binary returns markdown for a mock fixture", async () => {
   }
 });
 
-test("e2e: compiled binary returns [network_error] for invalid host", async () => {
+test("e2e: compiled output returns [network_error] for invalid host", async () => {
   const client = await spawnCompiled();
   try {
     const result = await client.callTool({
@@ -135,10 +135,10 @@ test("e2e: compiled binary returns [network_error] for invalid host", async () =
   }
 });
 
-// E1 — savePath against the compiled binary. Pins the build pipeline against
+// E1 — savePath against the compiled JS output. Pins the build pipeline against
 // the new code path. If T1 (server.test) passes but this fails, the bug is
 // in tsc/postbuild, not the runtime logic.
-test("e2e: compiled binary writes markdown to savePath, returns confirmation", async () => {
+test("e2e: compiled output writes markdown to savePath, returns confirmation", async () => {
   const mock = await startMock((_req, res) => {
     res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
     res.end(HAPPY_FIXTURE);
@@ -166,12 +166,12 @@ test("e2e: compiled binary writes markdown to savePath, returns confirmation", a
   }
 });
 
-// CLI-mode e2e tests. These spawn the compiled binary with arguments so the
+// CLI-mode e2e tests. These spawn the compiled JS output with arguments so the
 // dispatcher in dist/index.js routes to dist/cli.js — exercising the lazy
 // import path that tsc must emit correctly. If the corresponding cli.test
 // passes but these fail, the bug is in the build pipeline, not runtime logic.
 
-test("e2e: compiled binary CLI prints markdown to stdout, exit 0", async () => {
+test("e2e: compiled output CLI prints markdown to stdout, exit 0", async () => {
   const mock = await startMock((_req, res) => {
     res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
     res.end(HAPPY_FIXTURE);
@@ -189,7 +189,7 @@ test("e2e: compiled binary CLI prints markdown to stdout, exit 0", async () => {
   }
 });
 
-test("e2e: compiled binary --version prints package version, exit 0", async () => {
+test("e2e: compiled output --version prints package version, exit 0", async () => {
   const { stdout, stderr } = await execFileAsync(
     "node",
     [BUILT_BIN, "--version"],

From 2a5faad134378c47395f0518c95c0a47ad590ea1 Mon Sep 17 00:00:00 2001
From: Serhii Vasylenko <github.com@vasylenko.info>
Date: Tue, 12 May 2026 21:35:44 +0200
Subject: [PATCH 2/2] Clean up remaining build vocabulary and tighten technical
 claims
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A follow-up to b162fb2 that addresses items the previous pass kept as
"npm-`bin` jargon" plus several technical inaccuracies surfaced by a
fresh audit.

Build vocabulary (the dist/ artifact is plain JavaScript transpiled by
tsc — not a binary or compiled executable):
- tests/e2e.test.ts: spawnCompiled → spawnBuilt, BUILT_BIN → BUILT_JS,
  "compiled output" → "built output" across 10 test names and comments.
- tests/cli.test.ts: TSX_BIN → TSX_CLI.
- src/index.ts: "node binary" → "path to node" in the argv comment.

Kept legitimate uses intact: Bun's --compile / Node SEA references in
docs/SPEC.md:43 (those tools really do produce binaries), npm's literal
.bin/ folder paths, libuv "native" assertion, "pure Node" idiom.

Technical accuracy in src/core.ts (comment-only — runtime untouched):
- Sec-CH-UA decoy comment: Chrome's GREASE rotation changes BOTH the
  brand token and its version per major (130: "Not?A_Brand";v="99",
  131: "Not_A Brand";v="24"). Previous comment claimed only the version
  rotated.
- CommonMark escape rules: setext underlines are = or - on a line by
  themselves; list markers require -/+/* then whitespace or EOL. The
  previous "===" / "- " restatement was overly specific.
- Sec-Fetch-User: ?1 is always-on; real browsers omit it on
  non-user-activated navigations. Added a one-line comment noting the
  deliberate simplification.
- decodeEncodedCodeTags: trailing requirement is whitespace, /, or & (the
  start of &gt;), not "end-of-tag".
- "exit code 1" → "sets process.exitCode = 1" — matches cli.ts's actual
  mechanism documented in its own comments.

Overreaching claims softened:
- src/mcp.ts tool description + README:115: only pure client-rendered
  SPAs with no extractable static HTML return extraction_failed. SPAs
  with server-rendered or SEO-prerendered HTML extract whatever they
  ship. Previous wording implied a determinism the pipeline doesn't have.
- README:68 + docs/SPEC.md:28: "Modern MCP clients hide content[]"
  replaced with concrete client names (Claude Code CLI, VS Code/Copilot).
  Behavior varies across clients; concrete examples are honest.
- README:3 lede: "request rate" → "request fingerprint". markfetch has no
  rate-limit logic; the actual contrast is shape (HTTP/2 + headers), not
  pacing.
- README:74 Stdio-clean ANSI clause: "could corrupt protocol framing" →
  "keeping stderr parseable for shell consumers". Stdout-discipline is
  already covered by the prior sentence in the same bullet.

Stale PRD references (the PRD was deleted in commit 52e2139):
- src/core.ts: dropped "PRD §4 calls out that" from the client-hints
  derivation comment.
- tests/server.test.ts:436: "(Principle #4: stderr is fatal-only)" →
  "(stderr-is-fatal-only invariant per SPEC.md)".
- tests/server.test.ts:670: "PRD §5: file at savePath is only ever the
  markdown" → reformulated to cite README and SPEC.md instead.

Text-only changes. `npm run build` and `npm test` (50/50) pass.
---
 README.md            |  8 ++++----
 docs/SPEC.md         |  2 +-
 src/core.ts          | 34 +++++++++++++++++++++-------------
 src/index.ts         |  2 +-
 src/mcp.ts           |  2 +-
 tests/cli.test.ts    |  4 ++--
 tests/e2e.test.ts    | 36 ++++++++++++++++++------------------
 tests/server.test.ts |  4 ++--
 8 files changed, 50 insertions(+), 42 deletions(-)

diff --git a/README.md b/README.md
index e649560..6e5c7fe 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # markfetch
 
-**Reader View for AI agents and your shell. Fetch any URL, get back clean markdown — at a real Chrome's request rate, not curl's.**
+**Reader View for AI agents and your shell. Fetch any URL, get back clean markdown — with a real Chrome's request fingerprint, not curl's.**
 
 [![npm](https://img.shields.io/npm/v/markfetch.svg?color=10b981&label=npm)](https://www.npmjs.com/package/markfetch)
 [![ci](https://github.com/vasylenko/markfetch/actions/workflows/ci.yml/badge.svg)](https://github.com/vasylenko/markfetch/actions/workflows/ci.yml)
@@ -65,13 +65,13 @@ gemini mcp add -s user markfetch npx -y markfetch
 
 - **Reader-View-quality extraction.** [linkedom](https://github.com/WebReflection/linkedom) → [@mozilla/readability](https://github.com/mozilla/readability) → [turndown](https://github.com/mixmark-io/turndown) with GFM tables, strikethrough, and task lists. Code fences preserve `language-X` hints. Sphinx-style bare `<pre>` blocks render as code, not escaped prose. Intraword underscores stay un-escaped — no more `list\_tools`.
 
-- **One tool, one shape (MCP).** `fetch_markdown(url, savePath?)` returns markdown in `content[0].text`. No `structuredContent`, no frontmatter, no metadata fields. Modern MCP clients hide `content[]` when `structuredContent` is present — `markfetch` deliberately stays on the channel your LLM can actually read.
+- **One tool, one shape (MCP).** `fetch_markdown(url, savePath?)` returns markdown in `content[0].text`. No `structuredContent`, no frontmatter, no metadata fields. Several major MCP clients (Claude Code CLI, VS Code/Copilot) forward only `structuredContent` to the model and drop `content[]` when both are present — `markfetch` deliberately stays on the channel your LLM can actually read.
 
 - **`savePath` / `-o` escape valve.** Pass an absolute path (MCP `savePath`) or `-o <path>` (CLI) and the markdown lands on disk instead of the response channel. Use it when your client's inline tool-result cap would truncate large responses, or to redirect output from a shell pipeline. The file is only ever the markdown of the URL — fetch errors return a `[code]` string and never touch the disk.
 
 - **Whole document or honest failure.** No pagination, no truncation. If the document doesn't fit in `MARKFETCH_MAX_BYTES`, you get `too_large` — never a half-truth.
 
-- **Stdio-clean.** Stdout is reserved for MCP frames. Stderr is fatal-only. No log spam, no ANSI escapes that could corrupt protocol framing.
+- **Stdio-clean.** Stdout is reserved for MCP frames. Stderr is fatal-only. No log spam, no ANSI escapes — keeping stderr parseable for shell consumers.
 
 - **Pure Node, no subprocesses.** No Playwright, no headless Chromium, no Python hop. Single Node process — one Node process whether you invoke it as an MCP server or from the shell.
 
@@ -112,7 +112,7 @@ Errors go to stderr with the same `[code] message` shape the MCP tool returns (s
 
 - **Not a crawler.** No recursion, no `robots.txt` parsing, no rate-limit orchestration. One URL in, one document out.
 - **Not authenticated.** Anonymous fetch only — no cookie jar, no auth headers, no session reuse. Pages behind login walls return whatever the public response is, usually surfaced as `http_error`.
-- **Not a JS renderer.** Single-page apps that paint their content client-side return `extraction_failed`. Use this on server-rendered pages.
+- **Not a JS renderer.** Pure client-rendered SPAs with no static content return `extraction_failed`. SPAs with server-rendered or SEO-prerendered HTML will extract whatever static content they ship.
 
 ## Configuration
 
diff --git a/docs/SPEC.md b/docs/SPEC.md
index f8c6305..3429baf 100644
--- a/docs/SPEC.md
+++ b/docs/SPEC.md
@@ -25,7 +25,7 @@ Errors throw `MarkfetchError` uniformly from core; adapters catch once. Codes: `
 
 - **HTTP/2 + coherent Chrome fingerprint.** Wire protocol, headers, and UA must agree — a Chrome UA over HTTP/1.1 or without `Sec-CH-UA-*` is *more* suspicious than curl. `Sec-CH-UA-*` is derived from `MARKFETCH_USER_AGENT` at startup so override-coherence is mechanical.
 
-- **Single-channel MCP response.** `content[0].text` only. Modern MCP clients hide `content[]` when `structuredContent` is present, which would route the response away from the LLM that called the tool.
+- **Single-channel MCP response.** `content[0].text` only. Several major MCP clients (Claude Code CLI, VS Code/Copilot) forward only `structuredContent` to the model and drop `content[]` when both are present — a single-channel response keeps the markdown reachable from those clients.
 
 - **Whole document or `too_large`.** No pagination. Partial content lets the agent reason over truncated bodies without knowing they're truncated. `savePath` / `-o` is the escape valve for genuinely large documents.
 
diff --git a/src/core.ts b/src/core.ts
index 2c7e1cd..f6122bc 100644
--- a/src/core.ts
+++ b/src/core.ts
@@ -42,10 +42,10 @@ const config = {
   userAgent: process.env.MARKFETCH_USER_AGENT || DEFAULT_USER_AGENT,
 };
 
-// Derive Sec-CH-UA-* client hints from the User-Agent. PRD §4 calls out that a
-// Chrome UA paired with mismatched (or absent) client hints is a stronger bot
-// signal than a curl UA — the two MUST agree. Deriving from a single source
-// makes that invariant mechanical: override the UA, the hints follow.
+// Derive Sec-CH-UA-* client hints from the User-Agent. A Chrome UA paired
+// with mismatched (or absent) client hints is a stronger bot signal than a
+// curl UA — the two MUST agree. Deriving from a single source makes that
+// invariant mechanical: override the UA, the hints follow.
 function deriveClientHints(ua: string): {
   brands: string;
   mobile: string;
@@ -58,8 +58,12 @@ function deriveClientHints(ua: string): {
     );
   }
   const major = versionMatch[1];
-  // The "Not?A_Brand" decoy rotates per Chrome major (130 ships v="99"). Servers
-  // don't fingerprint the decoy version, so pinning v="99" is acceptable.
+  // Chrome's GREASE rotation changes BOTH the decoy brand token AND its
+  // version per major: Chrome 130 ships "Not?A_Brand";v="99", Chrome 131
+  // ships "Not_A Brand";v="24". We hard-code the Chrome-130 values; if a
+  // caller overrides MARKFETCH_USER_AGENT to a different Chrome major, the
+  // decoy shape will be stale. That is acceptable because bot detectors
+  // don't fingerprint the decoy itself — only the real brand pair.
   const brands = `"Chromium";v="${major}", "Google Chrome";v="${major}", "Not?A_Brand";v="99"`;
   // Chrome's mobile UAs include a literal " Mobile " token; tablets/desktop omit it.
   const mobile = /\bMobile\b/.test(ua) ? "?1" : "?0";
@@ -111,9 +115,10 @@ TURNDOWN.use(gfm);
 //     the start of each text node, not start-of-line. After inline
 //     elements, the next text node often begins with `-suffix` / `=value`,
 //     and gets escaped even though it sits mid-line in the rendered
-//     markdown. CommonMark requires `- ` (dash + space) for an unordered
-//     list and `===` alone for a setext underline, so `\-X` / `\=X` where
-//     X is alphanumeric is never structurally meaningful.
+//     markdown. CommonMark setext underlines are `=` or `-` characters on
+//     a line by themselves; unordered-list markers require `-`/`+`/`*`
+//     followed by whitespace or end-of-line. `\-X` / `\=X` where X is
+//     alphanumeric cannot match either rule, so the escape is pure noise.
 //
 // Drop both. The negative lookbehind `(?<!\\)` on the second replace
 // protects literal-backslash content: source HTML containing `\-X`
@@ -211,6 +216,9 @@ function chromeHeaders(): Record<string, string> {
     "Sec-Fetch-Dest": "document",
     "Sec-Fetch-Mode": "navigate",
     "Sec-Fetch-Site": "none",
+    // Always-on. Real browsers omit this header when there's no user
+    // activation; we model a "user clicked a link" navigation, consistent
+    // with `Sec-Fetch-Site: "none"` above.
     "Sec-Fetch-User": "?1",
     "Sec-CH-UA": clientHints.brands,
     "Sec-CH-UA-Mobile": clientHints.mobile,
@@ -268,9 +276,9 @@ function enforceTooLarge(stage: string, actual: number): MarkfetchError {
 // rather than real `<code>` elements. Decode those specific tag patterns so
 // turndown processes them as real elements and converts to backticks.
 // Pattern accepts `<code>`, `<code class="...">`, `</code>`, `<pre>` etc., but
-// rejects `<codename>`, `<preview>`, `<codeblock>` — the trailing requirement
-// is whitespace, "/", or end-of-tag, so element names with extra characters
-// after `code`/`pre` are not matched.
+// rejects `<codename>`, `<preview>`, `<codeblock>` — the next char after
+// `code`/`pre` must be whitespace, `/`, or `&` (the start of `&gt;`), so
+// element names with extra characters are not matched.
 function decodeEncodedCodeTags(html: string): string {
   return html.replaceAll(
     /&lt;(\/?(?:code|pre)(?:\s[^&]*?)?\/?)&gt;/g,
@@ -390,7 +398,7 @@ function convertToMarkdown(article: {
 //
 // Errors are thrown uniformly as MarkfetchError. Adapters catch and translate:
 //   - mcp.ts catches → errorResult(code, message) → MCP {isError, content}
-//   - cli.ts catches → console.error("[code] message") → exit code 1
+//   - cli.ts catches → console.error("[code] message") → sets process.exitCode = 1
 //
 // The full set of error codes this can throw:
 //   network_error, http_error, timeout, unsupported_content_type,
diff --git a/src/index.ts b/src/index.ts
index 2215e77..f4a0345 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -3,7 +3,7 @@
 // Argv-discriminated dispatcher.
 //
 // `process.argv.length === 2` means the user provided zero arguments
-// (argv[0] is the node binary, argv[1] is this script path). That's the
+// (argv[0] is the path to node, argv[1] is this script path). That's the
 // shape every MCP client uses when spawning a server — so bare invocation
 // routes to the MCP adapter and preserves every existing client config.
 //
diff --git a/src/mcp.ts b/src/mcp.ts
index 62e0518..94f0d7c 100644
--- a/src/mcp.ts
+++ b/src/mcp.ts
@@ -27,7 +27,7 @@ server.registerTool(
   "fetch_markdown",
   {
     description:
-      "Fetch a single public HTTP/S URL and return its main article content as clean markdown. Best for articles, documentation, blog posts, news, and reference pages. JavaScript-rendered SPAs and non-HTML responses return structured errors instead of partial content. Also supports saving the markdown to a file, e.g., to bypass client tool-result size limits or to reuse later.",
+      "Fetch a single public HTTP/S URL and return its main article content as clean markdown. Best for articles, documentation, blog posts, news, and reference pages. Non-HTML responses return `unsupported_content_type`. Pure client-rendered SPAs with no extractable static HTML return `extraction_failed`; SPAs that ship server-rendered or SEO-prerendered HTML will extract whatever static content they expose. Also supports saving the markdown to a file, e.g., to bypass client tool-result size limits or to reuse later.",
     inputSchema: {
       url: z
         .string()
diff --git a/tests/cli.test.ts b/tests/cli.test.ts
index 271e600..5087374 100644
--- a/tests/cli.test.ts
+++ b/tests/cli.test.ts
@@ -21,7 +21,7 @@ const execFileAsync = promisify(execFile);
 // Tests that override `cwd` to a tmpdir still need to find the tsx CLI
 // and the source entry — passing relative paths would resolve against the
 // new cwd and produce a confusing ENOENT instead of the behavior under test.
-const TSX_BIN = resolvePath("./node_modules/.bin/tsx");
+const TSX_CLI = resolvePath("./node_modules/.bin/tsx");
 const ENTRY = resolvePath("src/index.ts");
 
 const HAPPY_FIXTURE = `<!DOCTYPE html>
@@ -80,7 +80,7 @@ async function runCli(
 ): Promise<RunResult> {
   try {
     const { stdout, stderr } = await execFileAsync(
-      TSX_BIN,
+      TSX_CLI,
       [ENTRY, ...args],
       {
         env: { ...process.env, ...env } as Record<string, string>,
diff --git a/tests/e2e.test.ts b/tests/e2e.test.ts
index e90f9f9..e637447 100644
--- a/tests/e2e.test.ts
+++ b/tests/e2e.test.ts
@@ -1,4 +1,4 @@
-// E2E tests against the COMPILED JS output (`node dist/index.js`), not the dev
+// E2E tests against the BUILT JS output (`node dist/index.js`), not the dev
 // source. server.test.ts already exercises the full surface via tsx; this file
 // verifies that `tsc` output is itself correct and runnable. If server.test.ts
 // passes but this file fails, the bug lives in the build pipeline, not the
@@ -22,14 +22,14 @@ const execFileAsync = promisify(execFile);
 
 // Resolved absolute paths so a test that overrides cwd still locates the
 // built JS entry. node is on PATH, so a bare command name is fine for it.
-const BUILT_BIN = resolvePath("dist/index.js");
+const BUILT_JS = resolvePath("dist/index.js");
 
 before(() => {
   // Always rebuild so e2e tests run against current source, not a stale dist/.
   execSync("npm run build", { stdio: "inherit" });
 });
 
-async function spawnCompiled(env: Record<string, string> = {}) {
+async function spawnBuilt(env: Record<string, string> = {}) {
   const transport = new StdioClientTransport({
     command: "node",
     args: ["dist/index.js"],
@@ -74,7 +74,7 @@ const HAPPY_FIXTURE = `<!DOCTYPE html>
   <main>
     <article>
       <h1>E2E Fixture Heading</h1>
-      <p>This is a deterministic fixture for verifying the compiled output's full pipeline. The article contains enough prose to pass Readability scoring without depending on any external network resource.</p>
+      <p>This is a deterministic fixture for verifying the built output's full pipeline. The article contains enough prose to pass Readability scoring without depending on any external network resource.</p>
       <h2>Sub-section</h2>
       <p>Second paragraph adds more substance so the extracted markdown has multiple structural elements to assert against. Lorem ipsum dolor sit amet.</p>
     </article>
@@ -83,8 +83,8 @@ const HAPPY_FIXTURE = `<!DOCTYPE html>
 </body>
 </html>`;
 
-test("e2e: compiled output boots, exposes fetch_markdown, pins version", async () => {
-  const client = await spawnCompiled();
+test("e2e: built output boots, exposes fetch_markdown, pins version", async () => {
+  const client = await spawnBuilt();
   try {
     const info = client.getServerVersion();
     assert.equal(info?.name, "markfetch");
@@ -97,12 +97,12 @@ test("e2e: compiled output boots, exposes fetch_markdown, pins version", async (
   }
 });
 
-test("e2e: compiled output returns markdown for a mock fixture", async () => {
+test("e2e: built output returns markdown for a mock fixture", async () => {
   const mock = await startMock((_req, res) => {
     res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
     res.end(HAPPY_FIXTURE);
   });
-  const client = await spawnCompiled();
+  const client = await spawnBuilt();
   try {
     const result = await client.callTool({
       name: "fetch_markdown",
@@ -121,8 +121,8 @@ test("e2e: compiled output returns markdown for a mock fixture", async () => {
   }
 });
 
-test("e2e: compiled output returns [network_error] for invalid host", async () => {
-  const client = await spawnCompiled();
+test("e2e: built output returns [network_error] for invalid host", async () => {
+  const client = await spawnBuilt();
   try {
     const result = await client.callTool({
       name: "fetch_markdown",
@@ -135,17 +135,17 @@ test("e2e: compiled output returns [network_error] for invalid host", async () =
   }
 });
 
-// E1 — savePath against the compiled JS output. Pins the build pipeline against
+// E1 — savePath against the built JS output. Pins the build pipeline against
 // the new code path. If T1 (server.test) passes but this fails, the bug is
 // in tsc/postbuild, not the runtime logic.
-test("e2e: compiled output writes markdown to savePath, returns confirmation", async () => {
+test("e2e: built output writes markdown to savePath, returns confirmation", async () => {
   const mock = await startMock((_req, res) => {
     res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
     res.end(HAPPY_FIXTURE);
   });
   const dir = await mkdtemp(join(tmpdir(), "mf-e2e-savepath-"));
   const savePath = join(dir, "out.md");
-  const client = await spawnCompiled();
+  const client = await spawnBuilt();
   try {
     const result = await client.callTool({
       name: "fetch_markdown",
@@ -166,12 +166,12 @@ test("e2e: compiled output writes markdown to savePath, returns confirmation", a
   }
 });
 
-// CLI-mode e2e tests. These spawn the compiled JS output with arguments so the
+// CLI-mode e2e tests. These spawn the built JS output with arguments so the
 // dispatcher in dist/index.js routes to dist/cli.js — exercising the lazy
 // import path that tsc must emit correctly. If the corresponding cli.test
 // passes but these fail, the bug is in the build pipeline, not runtime logic.
 
-test("e2e: compiled output CLI prints markdown to stdout, exit 0", async () => {
+test("e2e: built output CLI prints markdown to stdout, exit 0", async () => {
   const mock = await startMock((_req, res) => {
     res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
     res.end(HAPPY_FIXTURE);
@@ -179,7 +179,7 @@ test("e2e: compiled output CLI prints markdown to stdout, exit 0", async () => {
   try {
     const { stdout, stderr } = await execFileAsync(
       "node",
-      [BUILT_BIN, mock.url],
+      [BUILT_JS, mock.url],
       { timeout: 10_000, maxBuffer: 5_000_000 },
     );
     assert.equal(stderr, "", "stderr must stay empty on happy path");
@@ -189,10 +189,10 @@ test("e2e: compiled output CLI prints markdown to stdout, exit 0", async () => {
   }
 });
 
-test("e2e: compiled output --version prints package version, exit 0", async () => {
+test("e2e: built output --version prints package version, exit 0", async () => {
   const { stdout, stderr } = await execFileAsync(
     "node",
-    [BUILT_BIN, "--version"],
+    [BUILT_JS, "--version"],
     { timeout: 10_000 },
   );
   assert.equal(stderr, "");
diff --git a/tests/server.test.ts b/tests/server.test.ts
index 4129fa6..0d174ef 100644
--- a/tests/server.test.ts
+++ b/tests/server.test.ts
@@ -433,7 +433,7 @@ test("Sec-CH-UA-* client hints are derived from MARKFETCH_USER_AGENT", async ()
   }
 });
 
-test("per-request errors do not leak to stderr (Principle #4: stderr is fatal-only)", async () => {
+test("per-request errors do not leak to stderr (stderr-is-fatal-only invariant per SPEC.md)", async () => {
   // Connect with stderr: "pipe" so we observe the server's stderr directly
   // while it handles a per-request failure. A network_error from an
   // unresolvable host is the cheapest reliable per-request failure.
@@ -667,7 +667,7 @@ test("savePath: writeFile rejection surfaces as [save_failed] with errno; file i
   }
 });
 
-// T6 — THE Invariant. PRD §5: file at savePath is only ever the markdown.
+// T6 — THE Invariant. The file at savePath is only ever the markdown of the URL (per README and SPEC.md).
 test("savePath INVARIANT: fetch error + savePath → file is NOT written", async () => {
   const mock = await startMock((_req, res) => {
     res.writeHead(404, { "Content-Type": "text/html" });